Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 70 additions & 2 deletions arc/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,8 @@ def __init__(self,
self.orbitals_level = orbitals_level
self.unique_species_labels = list()
self.save_restart = False
if self.restart_dict is not None:
self._sanitize_restart_output()

if len(self.rxn_list):
rxn_info_path = self.make_reaction_labels_info_file()
Expand Down Expand Up @@ -2619,7 +2621,6 @@ def switch_ts(self, label: str):
logger.info(f'Switching a TS guess for {label}...')
self.determine_most_likely_ts_conformer(label=label) # Look for a different TS guess.
self.delete_all_species_jobs(label=label) # Delete other currently running jobs for this TS.
self.output[label]['geo'] = self.output[label]['freq'] = self.output[label]['sp'] = self.output[label]['composite'] = ''
freq_path = os.path.join(self.project_directory, 'output', 'rxns', label, 'geometry', 'freq.out')
if os.path.isfile(freq_path):
os.remove(freq_path)
Expand Down Expand Up @@ -3044,6 +3045,9 @@ def check_all_done(self, label: str):
logger.debug(f'Species {label} did not converge.')
all_converged = False
break
if all_converged and self._missing_required_paths(label):
logger.debug(f'Species {label} did not converge due to missing output paths.')
all_converged = False
if label in self.output and all_converged:
self.output[label]['convergence'] = True
if self.species_dict[label].is_ts:
Expand Down Expand Up @@ -3084,6 +3088,64 @@ def check_all_done(self, label: str):
# Update restart dictionary and save the yaml restart file:
self.save_restart_dict()

def _missing_required_paths(self, label: str) -> bool:
"""
Check whether required output paths are missing for a species/TS.

Args:
label (str): The species label.

Returns:
bool: Whether required output paths are missing.
"""
return bool(self._get_missing_required_paths(label))

def _get_missing_required_paths(self, label: str) -> set:
"""
Get missing required output path job types for a species/TS.

Args:
label (str): The species label.

Returns:
set: Job types with missing required output paths.
"""
if label not in self.output or 'paths' not in self.output[label]:
return set()
path_map = {
'opt': 'geo',
'freq': 'freq',
'sp': 'sp',
'composite': 'composite',
}
missing = set()
for job_type, path_key in path_map.items():
if job_type == 'composite':
required = self.composite_method is not None
else:
required = self.job_types.get(job_type, False)
if not required:
continue
if self.species_dict[label].number_of_atoms == 1 and job_type in ['opt', 'freq']:
continue
if self.output[label]['job_types'].get(job_type, False) and not self.output[label]['paths'].get(path_key, ''):
missing.add(job_type)
return missing

def _sanitize_restart_output(self) -> None:
"""
Ensure restart output state is internally consistent (e.g., convergence without paths).
"""
for label in list(self.output.keys()):
if label not in self.species_dict:
continue
missing_job_types = self._get_missing_required_paths(label)
if self.output[label].get('convergence') and missing_job_types:
self.output[label]['convergence'] = False
if 'job_types' in self.output[label]:
for job_type in missing_job_types:
self.output[label]['job_types'][job_type] = False
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the _sanitize_restart_output method, there's no check for the existence of 'job_types' key before accessing it at line 3155. While _missing_required_paths at line 3126 uses .get() with a default value to safely access job_types, this line directly accesses self.output[label]['job_types'][job_type]. If 'job_types' doesn't exist in self.output[label], this will raise a KeyError. Consider using safe dictionary access or add a check for the existence of 'job_types' key.

Suggested change
self.output[label]['job_types'][job_type] = False
if 'job_types' in self.output[label]:
self.output[label]['job_types'][job_type] = False

Copilot uses AI. Check for mistakes.

def get_server_job_ids(self, specific_server: Optional[str] = None):
"""
Check job status on a specific server or on all active servers, get a list of relevant running job IDs.
Expand Down Expand Up @@ -3547,7 +3609,13 @@ def delete_all_species_jobs(self, label: str):
logger.info(f'Deleted job {job_name}')
job.delete()
self.running_jobs[label] = list()
self.output[label]['paths'] = {key: '' if key != 'irc' else list() for key in self.output[label]['paths'].keys()}
if label in self.output:
self.output[label]['convergence'] = False
for key in ['opt', 'freq', 'sp', 'composite', 'fine']:
if key in self.output[label]['job_types']:
self.output[label]['job_types'][key] = False
self.output[label]['paths'] = {key: '' if key != 'irc' else list()
for key in self.output[label]['paths'].keys()}

def restore_running_jobs(self):
"""
Expand Down
119 changes: 119 additions & 0 deletions arc/scheduler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import unittest
import os
import shutil
from unittest import mock

import arc.parser.parser as parser
from arc.checks.ts import check_ts
Expand Down Expand Up @@ -726,6 +727,124 @@ def test_save_e_elect(self):
self.assertEqual(content, {'formaldehyde': -300621.95378630824, 'mehylamine': -251360.00924747565})
shutil.rmtree(project_directory, ignore_errors=True)

def test_check_all_done_requires_paths(self):
"""Test that convergence isn't set when required paths are missing."""
spc = ARCSpecies(label='formaldehyde', smiles='C=O')
output = {
'formaldehyde': {
'paths': {'geo': '', 'freq': '', 'sp': '', 'composite': ''},
'restart': '',
'convergence': False,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': False, 'composite': False},
}
}
sched = Scheduler(project='test_check_all_done_requires_paths',
ess_settings=self.ess_settings,
species_list=[spc],
opt_level=Level(repr=default_levels_of_theory['opt']),
freq_level=Level(repr=default_levels_of_theory['freq']),
sp_level=Level(repr=default_levels_of_theory['sp']),
project_directory=self.project_directory,
testing=True,
job_types=initialize_job_types(),
restart_dict={'output': output},
)
sched.check_all_done(label='formaldehyde')
self.assertFalse(sched.output['formaldehyde']['convergence'])

def test_restart_sanitizes_convergence_for_ts(self):
"""Test restart output sanitization for TS with missing paths."""
ts_spc = ARCSpecies(label='TS0', is_ts=True, multiplicity=2, charge=0)
output = {
'TS0': {
'paths': {'geo': '', 'freq': '', 'sp': '', 'composite': ''},
'restart': '',
'convergence': True,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': False, 'composite': False},
}
}
sched = Scheduler(project='test_restart_sanitizes_convergence_for_ts',
ess_settings=self.ess_settings,
species_list=[ts_spc],
opt_level=Level(repr=default_levels_of_theory['opt']),
freq_level=Level(repr=default_levels_of_theory['freq']),
sp_level=Level(repr=default_levels_of_theory['sp']),
project_directory=self.project_directory,
testing=True,
job_types=initialize_job_types(),
restart_dict={'output': output, 'running_jobs': {'TS0': []}},
)
self.assertFalse(sched.output['TS0']['convergence'])
for key in ['opt', 'freq', 'sp', 'composite']:
self.assertFalse(sched.output['TS0']['job_types'][key])

def test_delete_all_species_jobs_resets_output(self):
"""Test that deleting jobs clears convergence and job status."""
spc = ARCSpecies(label='formaldehyde', smiles='C=O')
output = {
'formaldehyde': {
'paths': {'geo': 'geo.out', 'freq': 'freq.out', 'sp': 'sp.out', 'composite': ''},
'restart': '',
'convergence': True,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': True, 'composite': False},
}
}
sched = Scheduler(project='test_delete_all_species_jobs_resets_output',
ess_settings=self.ess_settings,
species_list=[spc],
opt_level=Level(repr=default_levels_of_theory['opt']),
freq_level=Level(repr=default_levels_of_theory['freq']),
sp_level=Level(repr=default_levels_of_theory['sp']),
project_directory=self.project_directory,
testing=True,
job_types=initialize_job_types(),
restart_dict={'output': output},
)
sched.job_dict['formaldehyde'] = {'opt': {}, 'freq': {}, 'sp': {}}
sched.running_jobs['formaldehyde'] = []
sched.delete_all_species_jobs(label='formaldehyde')
self.assertFalse(sched.output['formaldehyde']['convergence'])
for key in ['opt', 'freq', 'sp', 'composite', 'fine']:
self.assertFalse(sched.output['formaldehyde']['job_types'][key])
self.assertEqual(sched.output['formaldehyde']['paths'],
{'geo': '', 'freq': '', 'sp': '', 'composite': ''})

def test_switch_ts_resets_output(self):
"""Test that switching TS guesses resets convergence and paths."""
ts_spc = ARCSpecies(label='TS0', is_ts=True, multiplicity=2, charge=0)
output = {
'TS0': {
'paths': {'geo': 'geo.out', 'freq': 'freq.out', 'sp': 'sp.out', 'composite': ''},
'restart': '',
'convergence': True,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': True, 'composite': False},
}
}
sched = Scheduler(project='test_switch_ts_resets_output',
ess_settings=self.ess_settings,
species_list=[ts_spc],
opt_level=Level(repr=default_levels_of_theory['opt']),
freq_level=Level(repr=default_levels_of_theory['freq']),
sp_level=Level(repr=default_levels_of_theory['sp']),
project_directory=self.project_directory,
testing=True,
job_types=initialize_job_types(),
restart_dict={'output': output},
)
sched.species_dict['TS0'].ts_guesses_exhausted = True
sched.species_dict['TS0'].chosen_ts = None
with mock.patch.object(Scheduler, 'determine_most_likely_ts_conformer', return_value=None):
sched.switch_ts(label='TS0')
self.assertFalse(sched.output['TS0']['convergence'])
for key in ['opt', 'freq', 'sp', 'composite', 'fine']:
self.assertFalse(sched.output['TS0']['job_types'][key])
self.assertEqual(sched.output['TS0']['paths'],
{'geo': '', 'freq': '', 'sp': '', 'composite': ''})

def test_species_has_geo_sp_freq(self):
"""Test the species_has_geo() / species_has_sp() / species_has_freq() functions."""
for property_, species_has_property in zip(['geo', 'sp', 'freq'], [species_has_geo, species_has_sp, species_has_freq]):
Expand Down
8 changes: 4 additions & 4 deletions arc/species/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -1536,12 +1536,12 @@ def make_ts_report(self):
self.ts_report += ':\n'
if self.successful_methods:
self.ts_report += 'Methods that successfully generated a TS guess:\n'
for successful_method in self.successful_methods:
self.ts_report += successful_method + ','
unique_successful_methods = list(dict.fromkeys(self.successful_methods))
self.ts_report += ','.join(unique_successful_methods)
if self.unsuccessful_methods:
self.ts_report += '\nMethods that were unsuccessfully in generating a TS guess:\n'
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Grammatical error: "were unsuccessfully in generating" should be "were unsuccessful in generating" (remove the 'ly').

Suggested change
self.ts_report += '\nMethods that were unsuccessfully in generating a TS guess:\n'
self.ts_report += '\nMethods that were unsuccessful in generating a TS guess:\n'

Copilot uses AI. Check for mistakes.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see this minor comment

for unsuccessful_method in self.unsuccessful_methods:
self.ts_report += unsuccessful_method + ','
unique_unsuccessful_methods = list(dict.fromkeys(self.unsuccessful_methods))
self.ts_report += ','.join(unique_unsuccessful_methods)
Comment on lines +1539 to +1544
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The deduplication of TS methods in the report will cause an existing test to fail. The test in arc/species/species_test.py at line 1202-1206 expects the ts_report to contain duplicates (e.g., 'autotst,autotst,autotst,autotst,gcn,gcn,...'), but after this change, the report will show 'autotst,gcn,kinbot' instead. The test should be updated to match the new expected behavior.

Copilot uses AI. Check for mistakes.
if not self.ts_guesses_exhausted:
self.ts_report += f'\nThe method that generated the best TS guess and its output used for the ' \
f'optimization: {self.chosen_ts_method}\n'
Expand Down
54 changes: 48 additions & 6 deletions functional/restart_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,19 @@
import shutil
import unittest
import warnings
from unittest import mock

from arc.molecule.molecule import Molecule

from arc.common import ARC_PATH, read_yaml_file
from arc.common import ARC_PATH, read_yaml_file, save_yaml_file
from arc.main import ARC


class TestRestart(unittest.TestCase):
"""
Contains unit tests for restarting ARC.
"""
created_projects = set()

@classmethod
def setUpClass(cls):
Expand All @@ -37,6 +39,7 @@ def test_restart_thermo(self):
restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '1_restart_thermo')
restart_path = os.path.join(restart_dir, 'restart.yml')
project = 'arc_project_for_testing_delete_after_usage_restart_thermo'
self.created_projects.add(project)
project_directory = os.path.join(ARC_PATH, 'Projects', project)
os.makedirs(os.path.dirname(project_directory), exist_ok=True)
shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs', 'Species'), dirs_exist_ok=True)
Expand Down Expand Up @@ -134,6 +137,7 @@ def test_restart_rate_1(self):
restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '2_restart_rate')
restart_path = os.path.join(restart_dir, 'restart.yml')
project = 'arc_project_for_testing_delete_after_usage_restart_rate_1'
self.created_projects.add(project)
project_directory = os.path.join(ARC_PATH, 'Projects', project)
os.makedirs(os.path.dirname(project_directory), exist_ok=True)
shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True)
Expand All @@ -155,6 +159,7 @@ def test_restart_rate_1(self):
def test_restart_rate_2(self):
"""Test restarting ARC and attaining a reaction rate coefficient"""
project = 'arc_project_for_testing_delete_after_usage_restart_rate_2'
self.created_projects.add(project)
project_directory = os.path.join(ARC_PATH, 'Projects', project)
base_path = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '5_TS1')
restart_path = os.path.join(base_path, 'restart.yml')
Expand Down Expand Up @@ -184,6 +189,7 @@ def test_restart_bde (self):
restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '3_restart_bde')
restart_path = os.path.join(restart_dir, 'restart.yml')
project = 'test_restart_bde'
self.created_projects.add(project)
project_directory = os.path.join(ARC_PATH, 'Projects', project)
os.makedirs(os.path.dirname(project_directory), exist_ok=True)
shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True)
Expand Down Expand Up @@ -212,17 +218,53 @@ def test_globalize_paths(self):
content['output']['spc']['paths']['freq'])
self.assertNotIn('gpfs/workspace/users/user', content['output']['spc']['paths']['freq'])

def test_restart_sanitizes_ts_output(self):
"""Test sanitizing inconsistent TS output on restart."""
project = 'arc_project_for_testing_delete_after_usage_restart_sanitize_ts'
self.created_projects.add(project)
project_directory = os.path.join(ARC_PATH, 'Projects', project)
os.makedirs(project_directory, exist_ok=True)
restart_path = os.path.join(project_directory, 'restart.yml')
restart_dict = {
'project': project,
'project_directory': project_directory,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': False},
'species': [{'label': 'TS0', 'is_ts': True, 'multiplicity': 1, 'charge': 0}],
'output': {
'TS0': {
'paths': {'geo': '', 'freq': '', 'sp': '', 'composite': ''},
'restart': '',
'convergence': True,
'job_types': {'conf_opt': False, 'conf_sp': False, 'opt': True, 'freq': True, 'sp': True,
'rotors': False, 'irc': False, 'fine': False, 'composite': False},
}
},
'running_jobs': {'TS0': []},
}
save_yaml_file(path=restart_path, content=restart_dict)
input_dict = read_yaml_file(path=restart_path, project_directory=project_directory)
input_dict['project'], input_dict['project_directory'] = project, project_directory
with mock.patch('arc.scheduler.Scheduler.schedule_jobs', return_value=None), \
mock.patch('arc.scheduler.Scheduler.run_opt_job', return_value=None), \
mock.patch('arc.main.process_arc_project', return_value=None):
arc1 = ARC(**input_dict)
arc1.execute()
self.assertFalse(arc1.scheduler.output['TS0']['convergence'])

@classmethod
def tearDownClass(cls):
"""
A function that is run ONCE after all unit tests in this class.
Delete all project directories created during these unit tests
"""
projects = ['arc_project_for_testing_delete_after_usage_restart_thermo',
'arc_project_for_testing_delete_after_usage_restart_rate_1',
'arc_project_for_testing_delete_after_usage_restart_rate_2',
'test_restart_bde',
]
projects = cls.created_projects or {
'arc_project_for_testing_delete_after_usage_restart_thermo',
'arc_project_for_testing_delete_after_usage_restart_rate_1',
'arc_project_for_testing_delete_after_usage_restart_rate_2',
'test_restart_bde',
'arc_project_for_testing_delete_after_usage_restart_sanitize_ts',
}
for project in projects:
project_directory = os.path.join(ARC_PATH, 'Projects', project)
shutil.rmtree(project_directory, ignore_errors=True)
Expand Down
Loading