Multi species (#724)

This branch allows us to create a cluster species including multi-individual species. To be more specific, the current arc creates different input Gaussian input files if it receives multispecies. With this new feature, we can combine all the species in one Gaussian input file, followed by the relevant post-processing steps.
ReactionMechanismGenerator · Jan 16, 2024 · 061cdc8 · 061cdc8
2 parents 3b46df6 + 4aa5282
commit 061cdc8
Show file tree

Hide file tree

Showing 26 changed files with 5,886 additions and 118 deletions.
diff --git a/arc/job/adapter.py b/arc/job/adapter.py
@@ -375,7 +375,7 @@ def determine_job_array_parameters(self):
         ARC will allocate, e.g., 8 workers, to simultaneously get processes (one by one) from the HDF5 bank
         and execute them. On average, each worker in this example executes 125 jobs.
         """
-        if self.execution_type == 'incore':
+        if self.execution_type == 'incore' or self.run_multi_species:
             return None
         if len(self.job_types) > 1:
             self.iterate_by.append('job_types')
@@ -533,7 +533,10 @@ def set_file_paths(self):
         Set local and remote job file paths.
         """
         folder_name = 'TS_guesses' if self.reactions is not None else 'TSs' if self.species[0].is_ts else 'Species'
-        self.local_path = os.path.join(self.project_directory, 'calcs', folder_name, self.species_label, self.job_name)
+        if self.run_multi_species == False:
+            self.local_path = os.path.join(self.project_directory, 'calcs', folder_name, self.species_label, self.job_name)
+        else:
+            self.local_path = os.path.join(self.project_directory, 'calcs', folder_name, self.species[0].multi_species, self.job_name)
         self.local_path_to_output_file = os.path.join(self.local_path, 'output.out')
         self.local_path_to_orbitals_file = os.path.join(self.local_path, 'orbitals.fchk')
         self.local_path_to_check_file = os.path.join(self.local_path, 'check.chk')
@@ -545,7 +548,8 @@ def set_file_paths(self):
 
         if self.server is not None:
             # Parentheses don't play well in folder names:
-            species_name_remote = self.species_label.replace('(', '_').replace(')', '_')
+            species_name_remote = self.species_label if isinstance(self.species_label, str) else self.species[0].multi_species
+            species_name_remote = species_name_remote.replace('(', '_').replace(')', '_')
             path = servers[self.server].get('path', '').lower()
             path = os.path.join(path, servers[self.server]['un']) if path else ''
             self.remote_path = os.path.join(path, 'runs', 'ARC_Projects', self.project,

diff --git a/arc/job/adapters/cfour.py b/arc/job/adapters/cfour.py
@@ -82,6 +82,7 @@ class CFourAdapter(JobAdapter):
         job_status (list, optional): The job's server and ESS statuses.
         level (Level, optional): The level of theory to use.
         max_job_time (float, optional): The maximal allowed job time on the server in hours (can be fractional).
+        run_multi_species (bool, optional): Whether to run a job for multiple species in the same input file.
         reactions (List[ARCReaction], optional): Entries are ARCReaction instances, used for TS search methods.
         rotor_index (int, optional): The 0-indexed rotor number (key) in the species.rotors_dict dictionary.
         server (str): The server to run on.
@@ -122,6 +123,7 @@ def __init__(self,
                  job_status: Optional[List[Union[dict, str]]] = None,
                  level: Optional[Level] = None,
                  max_job_time: Optional[float] = None,
+                 run_multi_species: bool = False,
                  reactions: Optional[List['ARCReaction']] = None,
                  rotor_index: Optional[int] = None,
                  server: Optional[str] = None,
@@ -170,6 +172,7 @@ def __init__(self,
                             job_status=job_status,
                             level=level,
                             max_job_time=max_job_time,
+                            run_multi_species=run_multi_species,
                             reactions=reactions,
                             rotor_index=rotor_index,
                             server=server,

diff --git a/arc/job/adapters/common.py b/arc/job/adapters/common.py
@@ -105,6 +105,7 @@ def _initialize_adapter(obj: 'JobAdapter',
                         job_status: Optional[List[Union[dict, str]]] = None,
                         level: Optional[Level] = None,
                         max_job_time: Optional[float] = None,
+                        run_multi_species: bool = False,
                         reactions: Optional[List['ARCReaction']] = None,
                         rotor_index: Optional[int] = None,
                         server: Optional[str] = None,
@@ -114,7 +115,7 @@ def _initialize_adapter(obj: 'JobAdapter',
                         times_rerun: int = 0,
                         torsions: Optional[List[List[int]]] = None,
                         tsg: Optional[int] = None,
-                        xyz: Optional[dict] = None,
+                        xyz: Optional[Union[dict,List[dict]]] = None,
                         ):
     """
     A common Job adapter initializer function.
@@ -164,6 +165,7 @@ def _initialize_adapter(obj: 'JobAdapter',
     # When restarting ARC and re-setting the jobs, ``level`` is a string, convert it to a Level object instance
     obj.level = Level(repr=level) if not isinstance(level, Level) and level is not None else level
     obj.max_job_time = max_job_time or default_job_settings.get('job_time_limit_hrs', 120)
+    obj.run_multi_species = run_multi_species
     obj.number_of_processes = 0
     obj.reactions = [reactions] if reactions is not None and not isinstance(reactions, list) else reactions
     obj.remote_path = None
@@ -180,18 +182,34 @@ def _initialize_adapter(obj: 'JobAdapter',
     obj.pivots = [[tor[1] + 1, tor[2] + 1] for tor in obj.torsions] if obj.torsions is not None else None
     obj.tsg = tsg
     obj.workers = None
-    obj.xyz = obj.species[0].get_xyz() if obj.species is not None and xyz is None else xyz
+    if not obj.run_multi_species:
+        obj.xyz = obj.species[0].get_xyz() if obj.species is not None and xyz is None else xyz
+    else:
+        obj.xyz = list()
+        if obj.species is not None:
+            for spc in obj.species:
+                obj.xyz.append(spc.get_xyz() if xyz is None else xyz)
 
     if obj.job_num is None or obj.job_name is None or obj.job_server_name:
         obj._set_job_number()
 
     if obj.species is not None:
-        obj.charge = obj.species[0].charge
-        obj.multiplicity = obj.species[0].multiplicity
-        obj.is_ts = obj.species[0].is_ts
-        obj.species_label = obj.species[0].label
-        if len(obj.species) > 1:
-            obj.species_label += f'_and_{len(obj.species) - 1}_others'
+        if not obj.run_multi_species:
+            obj.charge = obj.species[0].charge
+            obj.multiplicity = obj.species[0].multiplicity
+            obj.is_ts = obj.species[0].is_ts
+            obj.species_label = obj.species[0].label
+            if len(obj.species) > 1:
+                obj.species_label += f'_and_{len(obj.species) - 1}_others'
+        else:
+            obj.charge = list()
+            obj.multiplicity = list()
+            obj.is_ts = obj.species[0].is_ts
+            obj.species_label = list()
+            for spc in obj.species:
+                obj.charge.append(spc.charge) 
+                obj.multiplicity.append(spc.multiplicity)
+                obj.species_label.append(spc.label)
     elif obj.reactions is not None:
         obj.charge = obj.reactions[0].charge
         obj.multiplicity = obj.reactions[0].multiplicity
@@ -233,25 +251,51 @@ def _initialize_adapter(obj: 'JobAdapter',
     check_argument_consistency(obj)
 
 
-def is_restricted(obj) -> bool:
+def is_restricted(obj: 'JobAdapter') -> Union[bool, List[bool]]:
     """
     Check whether a Job Adapter should be executed as restricted or unrestricted.
+    If the job adapter contains a list of species, return True or False per species.
 
     Args:
         obj: The job adapter object.
 
+    Returns:
+        Union[bool, List[bool]]: Whether to run as restricted (``True``) or not (``False``).
+    """
+    if not obj.run_multi_species:
+        return is_species_restricted(obj)
+    else:
+        return [is_species_restricted(obj, species) for species in obj.species]
+
+
+def is_species_restricted(obj: 'JobAdapter',
+                          species: Optional['ARCSpecies'] = None,
+                          ) -> bool:
+    """
+    Check whether a species should be executed as restricted or unrestricted.
+
+    Args:
+        obj: The job adapter object.
+        species (ARCSpecies, optional): The species to check.
+
     Returns:
         bool: Whether to run as restricted (``True``) or not (``False``).
     """
-    if (obj.multiplicity > 1 and obj.level.method_type != 'composite') \
-            or (obj.species[0].number_of_radicals is not None and obj.species[0].number_of_radicals > 1):
+
+    if obj.level.method_type in ['force_field','composite','semiempirical']:
+        return True
+
+    multiplicity = obj.multiplicity if species is None else species.multiplicity
+    number_of_radicals = obj.species[0].number_of_radicals if species is None else species.number_of_radicals
+    species_label = obj.species[0].label if species is None else species.label
+    if multiplicity > 1 or (number_of_radicals is not None and number_of_radicals > 1):
         # run an unrestricted electronic structure calculation if the spin multiplicity is greater than one,
         # or if it is one but the number of radicals is greater than one (e.g., bi-rad singlet)
         # don't run unrestricted for composite methods such as CBS-QB3, it'll be done automatically if the
         # multiplicity is greater than one, but do specify uCBS-QB3 for example for bi-rad singlets.
-        if obj.species[0].number_of_radicals is not None and obj.species[0].number_of_radicals > 1:
-            logger.info(f'Using an unrestricted method for species {obj.species_label} which has '
-                        f'{obj.species[0].number_of_radicals} radicals and multiplicity {obj.multiplicity}.')
+        if number_of_radicals is not None and number_of_radicals > 1:
+            logger.info(f'Using an unrestricted method for species {species_label} which has '
+                        f'{number_of_radicals} radicals and multiplicity {multiplicity}.')
         return False
     return True
 

diff --git a/arc/job/adapters/common_test.py b/arc/job/adapters/common_test.py
@@ -56,12 +56,26 @@ def setUpClass(cls):
                                     testing=True,
                                     args={'keyword': {'general': 'IOp(1/12=5,3/44=0)'}},
                                     )
+        cls.job_multi = GaussianAdapter(execution_type='incore',
+                                    job_type='scan',
+                                    torsions=[[1, 2, 3, 4]],
+                                    level=Level(method='wb97xd', basis='def2tzvp'),
+                                    project='test',
+                                    project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_GaussianAdapter'),
+                                    species=[ARCSpecies(label='spc1', xyz=['O 0 0 1'], multiplicity=1, number_of_radicals=2, multi_species='mltspc1'),
+                                            ARCSpecies(label='spc1', xyz=['O 0 0 1'], multiplicity=1, number_of_radicals=1, multi_species='mltspc1')],
+                                    testing=True,
+                                    args={'keyword': {'general': 'IOp(1/12=5,3/44=0)'}},
+                                    run_multi_species = True,
+                                    )
 
     def test_is_restricted(self):
         """Test the is_restricted() function"""
         self.assertTrue(common.is_restricted(self.job_1))
         self.assertFalse(common.is_restricted(self.job_2))
         self.assertFalse(common.is_restricted(self.job_3))
+        benchmark_list = [False, True]
+        self.assertEqual(common.is_restricted(self.job_multi),benchmark_list)
 
     def test_check_argument_consistency(self):
         """Test the check_argument_consistency() function"""

diff --git a/arc/job/adapters/gaussian.py b/arc/job/adapters/gaussian.py
@@ -96,6 +96,7 @@ class GaussianAdapter(JobAdapter):
         job_status (list, optional): The job's server and ESS statuses.
         level (Level, optional): The level of theory to use.
         max_job_time (float, optional): The maximal allowed job time on the server in hours (can be fractional).
+        run_multi_species (bool, optional): Whether to run a job for multiple species in the same input file.
         reactions (List[ARCReaction], optional): Entries are ARCReaction instances, used for TS search methods.
         rotor_index (int, optional): The 0-indexed rotor number (key) in the species.rotors_dict dictionary.
         server (str): The server to run on.
@@ -106,7 +107,7 @@ class GaussianAdapter(JobAdapter):
         times_rerun (int, optional): Number of times this job was re-run with the same arguments (no trsh methods).
         torsions (List[List[int]], optional): The 0-indexed atom indices of the torsion(s).
         tsg (int, optional): TSGuess number if optimizing TS guesses.
-        xyz (dict, optional): The 3D coordinates to use. If not give, species.get_xyz() will be used.
+        xyz (Union[dict,List[dict]], optional): The 3D coordinates to use. If not give, species.get_xyz() will be used.
     """
 
     def __init__(self,
@@ -136,6 +137,7 @@ def __init__(self,
                  job_status: Optional[List[Union[dict, str]]] = None,
                  level: Optional[Level] = None,
                  max_job_time: Optional[float] = None,
+                 run_multi_species: bool = False,
                  reactions: Optional[List['ARCReaction']] = None,
                  rotor_index: Optional[int] = None,
                  server: Optional[str] = None,
@@ -145,7 +147,7 @@ def __init__(self,
                  times_rerun: int = 0,
                  torsions: Optional[List[List[int]]] = None,
                  tsg: Optional[int] = None,
-                 xyz: Optional[dict] = None,
+                 xyz: Optional[Union[dict,List[dict]]] = None,
                  ):
 
         self.incore_capacity = 1
@@ -184,6 +186,7 @@ def __init__(self,
                             job_status=job_status,
                             level=level,
                             max_job_time=max_job_time,
+                            run_multi_species=run_multi_species,
                             reactions=reactions,
                             rotor_index=rotor_index,
                             server=server,
@@ -232,10 +235,10 @@ def write_input_file(self) -> None:
         input_dict['memory'] = self.input_file_memory
         input_dict['method'] = self.level.method
         input_dict['multiplicity'] = self.multiplicity
-        input_dict['xyz'] = xyz_to_str(self.xyz)
         input_dict['scan_trsh'] = self.args['keyword']['scan_trsh'] if 'scan_trsh' in self.args['keyword'] else ''
         integral_algorithm = 'Acc2E=14' if 'Acc2E=14' in input_dict['trsh'] else 'Acc2E=12'
         input_dict['trsh'] = input_dict['trsh'].replace('int=(Acc2E=14)', '') if 'Acc2E=14' in input_dict['trsh'] else input_dict['trsh']
+        input_dict['xyz'] = [xyz_to_str(xyz) for xyz in self.xyz] if self.run_multi_species else xyz_to_str(self.xyz)
 
         if self.level.basis is not None:
             input_dict['slash_1'] = '/'
@@ -245,9 +248,6 @@ def write_input_file(self) -> None:
         if self.level.method_type in ['semiempirical', 'force_field']:
             self.checkfile = None
 
-        if not is_restricted(self):
-            input_dict['restricted'] = 'u'
-
         if self.level.dispersion is not None:
             input_dict['dispersion'] = self.level.dispersion
 
@@ -263,8 +263,9 @@ def write_input_file(self) -> None:
             self.level.method = 'cbs-qb3'
 
         # Job type specific options
+        max_c = self.args['trsh'].split()[1] if 'max_cycles' in self.args['trsh'] else 100
         if self.job_type in ['opt', 'conformers', 'optfreq', 'composite']:
-            keywords = ['ts', 'calcfc', 'noeigentest', 'maxcycles=100'] if self.is_ts else ['calcfc']
+            keywords = ['ts', 'calcfc', 'noeigentest', f'maxcycles={max_c}'] if self.is_ts else ['calcfc']
             if self.level.method in ['rocbs-qb3']:
                 # There are no analytical 2nd derivatives (FC) for this method.
                 keywords = ['ts', 'noeigentest', 'maxcycles=100'] if self.is_ts else []
@@ -291,7 +292,8 @@ def write_input_file(self) -> None:
                     keywords.extend(['tight', 'maxstep=5'])
                 else:
                     keywords.extend(['tight', 'maxstep=5'])
-            input_dict['job_type_1'] = f"opt=({', '.join(key for key in keywords)})"
+            input_dict['job_type_1'] = "opt" if self.level.method_type not in ['dft', 'composite', 'wavefunction']\
+                else f"opt=({', '.join(key for key in keywords)})"
 
         elif self.job_type == 'freq':
             input_dict['job_type_2'] = f'freq IOp(7/33=1) scf=(tight, direct) integral=(grid=ultrafine, {integral_algorithm})'
@@ -391,9 +393,29 @@ def write_input_file(self) -> None:
 
         # Remove double spaces
         input_dict['job_type_1'] = input_dict['job_type_1'].replace('  ', ' ')
+
+        input_file = ''
+        input_dict_origin = input_dict.copy()
+
+        restricted_list_bool = is_restricted(self)
+        restricted_list = ["" if flag else 'u' for flag in ([restricted_list_bool]
+                           if isinstance(restricted_list_bool, bool) else restricted_list_bool)]
 
         with open(os.path.join(self.local_path, input_filenames[self.job_adapter]), 'w') as f:
-            f.write(Template(input_template).render(**input_dict))
+            if not self.run_multi_species:
+                input_dict['restricted'] = restricted_list[0]
+                f.write(Template(input_template).render(**input_dict))
+            else:
+                for index, spc in enumerate(self.species):
+                    input_dict['charge'] = input_dict_origin['charge'][index]
+                    input_dict['label'] = input_dict_origin['label'][index]
+                    input_dict['multiplicity'] = input_dict_origin['multiplicity'][index]
+                    input_dict['xyz'] = input_dict_origin['xyz'][index]
+                    input_dict['restricted'] = restricted_list[index]
+                    input_file += Template(input_template).render(**input_dict)
+                    if index < len(self.species) - 1:
+                        input_file += '\n--link1--\n'
+                f.write(input_file)
 
     def set_files(self) -> None:
         """