Skip to content

Commit

Permalink
Multi species (#724)
Browse files Browse the repository at this point in the history
This branch allows us to create a cluster species including
multi-individual species. To be more specific, the current arc creates
different input Gaussian input files if it receives multispecies. With
this new feature, we can combine all the species in one Gaussian input
file, followed by the relevant post-processing steps.
  • Loading branch information
alongd authored Jan 16, 2024
2 parents 3b46df6 + 4aa5282 commit 061cdc8
Show file tree
Hide file tree
Showing 26 changed files with 5,886 additions and 118 deletions.
10 changes: 7 additions & 3 deletions arc/job/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def determine_job_array_parameters(self):
ARC will allocate, e.g., 8 workers, to simultaneously get processes (one by one) from the HDF5 bank
and execute them. On average, each worker in this example executes 125 jobs.
"""
if self.execution_type == 'incore':
if self.execution_type == 'incore' or self.run_multi_species:
return None
if len(self.job_types) > 1:
self.iterate_by.append('job_types')
Expand Down Expand Up @@ -533,7 +533,10 @@ def set_file_paths(self):
Set local and remote job file paths.
"""
folder_name = 'TS_guesses' if self.reactions is not None else 'TSs' if self.species[0].is_ts else 'Species'
self.local_path = os.path.join(self.project_directory, 'calcs', folder_name, self.species_label, self.job_name)
if self.run_multi_species == False:
self.local_path = os.path.join(self.project_directory, 'calcs', folder_name, self.species_label, self.job_name)
else:
self.local_path = os.path.join(self.project_directory, 'calcs', folder_name, self.species[0].multi_species, self.job_name)
self.local_path_to_output_file = os.path.join(self.local_path, 'output.out')
self.local_path_to_orbitals_file = os.path.join(self.local_path, 'orbitals.fchk')
self.local_path_to_check_file = os.path.join(self.local_path, 'check.chk')
Expand All @@ -545,7 +548,8 @@ def set_file_paths(self):

if self.server is not None:
# Parentheses don't play well in folder names:
species_name_remote = self.species_label.replace('(', '_').replace(')', '_')
species_name_remote = self.species_label if isinstance(self.species_label, str) else self.species[0].multi_species
species_name_remote = species_name_remote.replace('(', '_').replace(')', '_')
path = servers[self.server].get('path', '').lower()
path = os.path.join(path, servers[self.server]['un']) if path else ''
self.remote_path = os.path.join(path, 'runs', 'ARC_Projects', self.project,
Expand Down
3 changes: 3 additions & 0 deletions arc/job/adapters/cfour.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class CFourAdapter(JobAdapter):
job_status (list, optional): The job's server and ESS statuses.
level (Level, optional): The level of theory to use.
max_job_time (float, optional): The maximal allowed job time on the server in hours (can be fractional).
run_multi_species (bool, optional): Whether to run a job for multiple species in the same input file.
reactions (List[ARCReaction], optional): Entries are ARCReaction instances, used for TS search methods.
rotor_index (int, optional): The 0-indexed rotor number (key) in the species.rotors_dict dictionary.
server (str): The server to run on.
Expand Down Expand Up @@ -122,6 +123,7 @@ def __init__(self,
job_status: Optional[List[Union[dict, str]]] = None,
level: Optional[Level] = None,
max_job_time: Optional[float] = None,
run_multi_species: bool = False,
reactions: Optional[List['ARCReaction']] = None,
rotor_index: Optional[int] = None,
server: Optional[str] = None,
Expand Down Expand Up @@ -170,6 +172,7 @@ def __init__(self,
job_status=job_status,
level=level,
max_job_time=max_job_time,
run_multi_species=run_multi_species,
reactions=reactions,
rotor_index=rotor_index,
server=server,
Expand Down
72 changes: 58 additions & 14 deletions arc/job/adapters/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def _initialize_adapter(obj: 'JobAdapter',
job_status: Optional[List[Union[dict, str]]] = None,
level: Optional[Level] = None,
max_job_time: Optional[float] = None,
run_multi_species: bool = False,
reactions: Optional[List['ARCReaction']] = None,
rotor_index: Optional[int] = None,
server: Optional[str] = None,
Expand All @@ -114,7 +115,7 @@ def _initialize_adapter(obj: 'JobAdapter',
times_rerun: int = 0,
torsions: Optional[List[List[int]]] = None,
tsg: Optional[int] = None,
xyz: Optional[dict] = None,
xyz: Optional[Union[dict,List[dict]]] = None,
):
"""
A common Job adapter initializer function.
Expand Down Expand Up @@ -164,6 +165,7 @@ def _initialize_adapter(obj: 'JobAdapter',
# When restarting ARC and re-setting the jobs, ``level`` is a string, convert it to a Level object instance
obj.level = Level(repr=level) if not isinstance(level, Level) and level is not None else level
obj.max_job_time = max_job_time or default_job_settings.get('job_time_limit_hrs', 120)
obj.run_multi_species = run_multi_species
obj.number_of_processes = 0
obj.reactions = [reactions] if reactions is not None and not isinstance(reactions, list) else reactions
obj.remote_path = None
Expand All @@ -180,18 +182,34 @@ def _initialize_adapter(obj: 'JobAdapter',
obj.pivots = [[tor[1] + 1, tor[2] + 1] for tor in obj.torsions] if obj.torsions is not None else None
obj.tsg = tsg
obj.workers = None
obj.xyz = obj.species[0].get_xyz() if obj.species is not None and xyz is None else xyz
if not obj.run_multi_species:
obj.xyz = obj.species[0].get_xyz() if obj.species is not None and xyz is None else xyz
else:
obj.xyz = list()
if obj.species is not None:
for spc in obj.species:
obj.xyz.append(spc.get_xyz() if xyz is None else xyz)

if obj.job_num is None or obj.job_name is None or obj.job_server_name:
obj._set_job_number()

if obj.species is not None:
obj.charge = obj.species[0].charge
obj.multiplicity = obj.species[0].multiplicity
obj.is_ts = obj.species[0].is_ts
obj.species_label = obj.species[0].label
if len(obj.species) > 1:
obj.species_label += f'_and_{len(obj.species) - 1}_others'
if not obj.run_multi_species:
obj.charge = obj.species[0].charge
obj.multiplicity = obj.species[0].multiplicity
obj.is_ts = obj.species[0].is_ts
obj.species_label = obj.species[0].label
if len(obj.species) > 1:
obj.species_label += f'_and_{len(obj.species) - 1}_others'
else:
obj.charge = list()
obj.multiplicity = list()
obj.is_ts = obj.species[0].is_ts
obj.species_label = list()
for spc in obj.species:
obj.charge.append(spc.charge)
obj.multiplicity.append(spc.multiplicity)
obj.species_label.append(spc.label)
elif obj.reactions is not None:
obj.charge = obj.reactions[0].charge
obj.multiplicity = obj.reactions[0].multiplicity
Expand Down Expand Up @@ -233,25 +251,51 @@ def _initialize_adapter(obj: 'JobAdapter',
check_argument_consistency(obj)


def is_restricted(obj) -> bool:
def is_restricted(obj: 'JobAdapter') -> Union[bool, List[bool]]:
"""
Check whether a Job Adapter should be executed as restricted or unrestricted.
If the job adapter contains a list of species, return True or False per species.
Args:
obj: The job adapter object.
Returns:
Union[bool, List[bool]]: Whether to run as restricted (``True``) or not (``False``).
"""
if not obj.run_multi_species:
return is_species_restricted(obj)
else:
return [is_species_restricted(obj, species) for species in obj.species]


def is_species_restricted(obj: 'JobAdapter',
species: Optional['ARCSpecies'] = None,
) -> bool:
"""
Check whether a species should be executed as restricted or unrestricted.
Args:
obj: The job adapter object.
species (ARCSpecies, optional): The species to check.
Returns:
bool: Whether to run as restricted (``True``) or not (``False``).
"""
if (obj.multiplicity > 1 and obj.level.method_type != 'composite') \
or (obj.species[0].number_of_radicals is not None and obj.species[0].number_of_radicals > 1):

if obj.level.method_type in ['force_field','composite','semiempirical']:
return True

multiplicity = obj.multiplicity if species is None else species.multiplicity
number_of_radicals = obj.species[0].number_of_radicals if species is None else species.number_of_radicals
species_label = obj.species[0].label if species is None else species.label
if multiplicity > 1 or (number_of_radicals is not None and number_of_radicals > 1):
# run an unrestricted electronic structure calculation if the spin multiplicity is greater than one,
# or if it is one but the number of radicals is greater than one (e.g., bi-rad singlet)
# don't run unrestricted for composite methods such as CBS-QB3, it'll be done automatically if the
# multiplicity is greater than one, but do specify uCBS-QB3 for example for bi-rad singlets.
if obj.species[0].number_of_radicals is not None and obj.species[0].number_of_radicals > 1:
logger.info(f'Using an unrestricted method for species {obj.species_label} which has '
f'{obj.species[0].number_of_radicals} radicals and multiplicity {obj.multiplicity}.')
if number_of_radicals is not None and number_of_radicals > 1:
logger.info(f'Using an unrestricted method for species {species_label} which has '
f'{number_of_radicals} radicals and multiplicity {multiplicity}.')
return False
return True

Expand Down
14 changes: 14 additions & 0 deletions arc/job/adapters/common_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,26 @@ def setUpClass(cls):
testing=True,
args={'keyword': {'general': 'IOp(1/12=5,3/44=0)'}},
)
cls.job_multi = GaussianAdapter(execution_type='incore',
job_type='scan',
torsions=[[1, 2, 3, 4]],
level=Level(method='wb97xd', basis='def2tzvp'),
project='test',
project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_GaussianAdapter'),
species=[ARCSpecies(label='spc1', xyz=['O 0 0 1'], multiplicity=1, number_of_radicals=2, multi_species='mltspc1'),
ARCSpecies(label='spc1', xyz=['O 0 0 1'], multiplicity=1, number_of_radicals=1, multi_species='mltspc1')],
testing=True,
args={'keyword': {'general': 'IOp(1/12=5,3/44=0)'}},
run_multi_species = True,
)

def test_is_restricted(self):
"""Test the is_restricted() function"""
self.assertTrue(common.is_restricted(self.job_1))
self.assertFalse(common.is_restricted(self.job_2))
self.assertFalse(common.is_restricted(self.job_3))
benchmark_list = [False, True]
self.assertEqual(common.is_restricted(self.job_multi),benchmark_list)

def test_check_argument_consistency(self):
"""Test the check_argument_consistency() function"""
Expand Down
40 changes: 31 additions & 9 deletions arc/job/adapters/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class GaussianAdapter(JobAdapter):
job_status (list, optional): The job's server and ESS statuses.
level (Level, optional): The level of theory to use.
max_job_time (float, optional): The maximal allowed job time on the server in hours (can be fractional).
run_multi_species (bool, optional): Whether to run a job for multiple species in the same input file.
reactions (List[ARCReaction], optional): Entries are ARCReaction instances, used for TS search methods.
rotor_index (int, optional): The 0-indexed rotor number (key) in the species.rotors_dict dictionary.
server (str): The server to run on.
Expand All @@ -106,7 +107,7 @@ class GaussianAdapter(JobAdapter):
times_rerun (int, optional): Number of times this job was re-run with the same arguments (no trsh methods).
torsions (List[List[int]], optional): The 0-indexed atom indices of the torsion(s).
tsg (int, optional): TSGuess number if optimizing TS guesses.
xyz (dict, optional): The 3D coordinates to use. If not give, species.get_xyz() will be used.
xyz (Union[dict,List[dict]], optional): The 3D coordinates to use. If not give, species.get_xyz() will be used.
"""

def __init__(self,
Expand Down Expand Up @@ -136,6 +137,7 @@ def __init__(self,
job_status: Optional[List[Union[dict, str]]] = None,
level: Optional[Level] = None,
max_job_time: Optional[float] = None,
run_multi_species: bool = False,
reactions: Optional[List['ARCReaction']] = None,
rotor_index: Optional[int] = None,
server: Optional[str] = None,
Expand All @@ -145,7 +147,7 @@ def __init__(self,
times_rerun: int = 0,
torsions: Optional[List[List[int]]] = None,
tsg: Optional[int] = None,
xyz: Optional[dict] = None,
xyz: Optional[Union[dict,List[dict]]] = None,
):

self.incore_capacity = 1
Expand Down Expand Up @@ -184,6 +186,7 @@ def __init__(self,
job_status=job_status,
level=level,
max_job_time=max_job_time,
run_multi_species=run_multi_species,
reactions=reactions,
rotor_index=rotor_index,
server=server,
Expand Down Expand Up @@ -232,10 +235,10 @@ def write_input_file(self) -> None:
input_dict['memory'] = self.input_file_memory
input_dict['method'] = self.level.method
input_dict['multiplicity'] = self.multiplicity
input_dict['xyz'] = xyz_to_str(self.xyz)
input_dict['scan_trsh'] = self.args['keyword']['scan_trsh'] if 'scan_trsh' in self.args['keyword'] else ''
integral_algorithm = 'Acc2E=14' if 'Acc2E=14' in input_dict['trsh'] else 'Acc2E=12'
input_dict['trsh'] = input_dict['trsh'].replace('int=(Acc2E=14)', '') if 'Acc2E=14' in input_dict['trsh'] else input_dict['trsh']
input_dict['xyz'] = [xyz_to_str(xyz) for xyz in self.xyz] if self.run_multi_species else xyz_to_str(self.xyz)

if self.level.basis is not None:
input_dict['slash_1'] = '/'
Expand All @@ -245,9 +248,6 @@ def write_input_file(self) -> None:
if self.level.method_type in ['semiempirical', 'force_field']:
self.checkfile = None

if not is_restricted(self):
input_dict['restricted'] = 'u'

if self.level.dispersion is not None:
input_dict['dispersion'] = self.level.dispersion

Expand All @@ -263,8 +263,9 @@ def write_input_file(self) -> None:
self.level.method = 'cbs-qb3'

# Job type specific options
max_c = self.args['trsh'].split()[1] if 'max_cycles' in self.args['trsh'] else 100
if self.job_type in ['opt', 'conformers', 'optfreq', 'composite']:
keywords = ['ts', 'calcfc', 'noeigentest', 'maxcycles=100'] if self.is_ts else ['calcfc']
keywords = ['ts', 'calcfc', 'noeigentest', f'maxcycles={max_c}'] if self.is_ts else ['calcfc']
if self.level.method in ['rocbs-qb3']:
# There are no analytical 2nd derivatives (FC) for this method.
keywords = ['ts', 'noeigentest', 'maxcycles=100'] if self.is_ts else []
Expand All @@ -291,7 +292,8 @@ def write_input_file(self) -> None:
keywords.extend(['tight', 'maxstep=5'])
else:
keywords.extend(['tight', 'maxstep=5'])
input_dict['job_type_1'] = f"opt=({', '.join(key for key in keywords)})"
input_dict['job_type_1'] = "opt" if self.level.method_type not in ['dft', 'composite', 'wavefunction']\
else f"opt=({', '.join(key for key in keywords)})"

elif self.job_type == 'freq':
input_dict['job_type_2'] = f'freq IOp(7/33=1) scf=(tight, direct) integral=(grid=ultrafine, {integral_algorithm})'
Expand Down Expand Up @@ -391,9 +393,29 @@ def write_input_file(self) -> None:

# Remove double spaces
input_dict['job_type_1'] = input_dict['job_type_1'].replace(' ', ' ')

input_file = ''
input_dict_origin = input_dict.copy()

restricted_list_bool = is_restricted(self)
restricted_list = ["" if flag else 'u' for flag in ([restricted_list_bool]
if isinstance(restricted_list_bool, bool) else restricted_list_bool)]

with open(os.path.join(self.local_path, input_filenames[self.job_adapter]), 'w') as f:
f.write(Template(input_template).render(**input_dict))
if not self.run_multi_species:
input_dict['restricted'] = restricted_list[0]
f.write(Template(input_template).render(**input_dict))
else:
for index, spc in enumerate(self.species):
input_dict['charge'] = input_dict_origin['charge'][index]
input_dict['label'] = input_dict_origin['label'][index]
input_dict['multiplicity'] = input_dict_origin['multiplicity'][index]
input_dict['xyz'] = input_dict_origin['xyz'][index]
input_dict['restricted'] = restricted_list[index]
input_file += Template(input_template).render(**input_dict)
if index < len(self.species) - 1:
input_file += '\n--link1--\n'
f.write(input_file)

def set_files(self) -> None:
"""
Expand Down
Loading

0 comments on commit 061cdc8

Please sign in to comment.