Added Mockter - a dummy job adapter for testing and debug (#759)

It uses a YAML input/output file, returns the same `xyz` as in the input, returns a higher TS energy than wells, and returns 3N-6 freqs (with one imaginary freq for TSs only) Tests added
ReactionMechanismGenerator · Aug 11, 2024 · 0bb9d9a · 0bb9d9a
2 parents a3f79be + 5841e39
commit 0bb9d9a
Show file tree

Hide file tree

Showing 10 changed files with 458 additions and 8 deletions.
diff --git a/arc/job/adapter.py b/arc/job/adapter.py
@@ -85,6 +85,7 @@ class JobEnum(str, Enum):
     # ESS
     cfour = 'cfour'
     gaussian = 'gaussian'
+    mockter = 'mockter'
     molpro = 'molpro'
     orca = 'orca'
     psi4 = 'psi4'
@@ -553,7 +554,8 @@ def set_file_paths(self):
             self.local_path = os.path.join(self.project_directory, 'calcs', folder_name, self.species_label, self.job_name)
         else:
             self.local_path = os.path.join(self.project_directory, 'calcs', folder_name, self.species[0].multi_species, self.job_name)
-        self.local_path_to_output_file = os.path.join(self.local_path, 'output.out')
+        self.local_path_to_output_file = os.path.join(self.local_path, settings['output_filenames'][self.job_adapter]) \
+            if self.job_adapter in settings['output_filenames'] else 'output.out'
         self.local_path_to_orbitals_file = os.path.join(self.local_path, 'orbitals.fchk')
         self.local_path_to_check_file = os.path.join(self.local_path, 'check.chk')
         self.local_path_to_hess_file = os.path.join(self.local_path, 'input.hess')

diff --git a/arc/job/adapters/__init__.py b/arc/job/adapters/__init__.py
@@ -1,5 +1,6 @@
 import arc.job.adapters.common
 import arc.job.adapters.gaussian
+import arc.job.adapters.mockter
 import arc.job.adapters.molpro
 import arc.job.adapters.orca
 import arc.job.adapters.obabel

diff --git a/arc/job/adapters/mockter.py b/arc/job/adapters/mockter.py
@@ -0,0 +1,267 @@
+"""
+An adapter for dummy jobs, meant for testing and debugging only.
+"""
+
+import datetime
+import os
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union
+
+from arc.common import get_logger, read_yaml_file, save_yaml_file
+from arc.imports import settings
+from arc.job.adapter import JobAdapter
+from arc.job.adapters.common import _initialize_adapter, update_input_dict_with_args
+from arc.job.factory import register_job_adapter
+from arc.level import Level
+from arc.species.converter import xyz_to_str, str_to_xyz
+
+if TYPE_CHECKING:
+    from arc.reaction import ARCReaction
+    from arc.species import ARCSpecies
+
+logger = get_logger()
+
+default_job_settings, global_ess_settings, input_filenames, output_filenames, servers, submit_filenames = \
+    settings['default_job_settings'], settings['global_ess_settings'], settings['input_filenames'], \
+    settings['output_filenames'], settings['servers'], settings['submit_filenames']
+
+
+class MockAdapter(JobAdapter):
+    """
+    A class for executing mock jobs.
+
+    Args:
+        project (str): The project's name. Used for setting the remote path.
+        project_directory (str): The path to the local project directory.
+        job_type (list, str): The job's type, validated against ``JobTypeEnum``. If it's a list, pipe.py will be called.
+        args (dict, optional): Methods (including troubleshooting) to be used in input files.
+                               Keys are either 'keyword', 'block', or 'trsh', values are dictionaries with values
+                               to be used either as keywords or as blocks in the respective software input file.
+                               If 'trsh' is specified, an action might be taken instead of appending a keyword or a
+                               block to the input file (e.g., change server or change scan resolution).
+        bath_gas (str, optional): A bath gas. Currently only used in OneDMin to calculate L-J parameters.
+        checkfile (str, optional): The path to a previous Gaussian checkfile to be used in the current job.
+        conformer (int, optional): Conformer number if optimizing conformers.
+        constraints (list, optional): A list of constraints to use during an optimization or scan.
+        cpu_cores (int, optional): The total number of cpu cores requested for a job.
+        dihedral_increment (float, optional): The degrees increment to use when scanning dihedrals of TS guesses.
+        dihedrals (List[float], optional): The dihedral angels corresponding to self.torsions.
+        directed_scan_type (str, optional): The type of the directed scan.
+        ess_settings (dict, optional): A dictionary of available ESS and a corresponding server list.
+        ess_trsh_methods (List[str], optional): A list of troubleshooting methods already tried out.
+        execution_type (str, optional): The execution type, 'incore', 'queue', or 'pipe'.
+        fine (bool, optional): Whether to use fine geometry optimization parameters. Default: ``False``.
+        initial_time (datetime.datetime or str, optional): The time at which this job was initiated.
+        irc_direction (str, optional): The direction of the IRC job (`forward` or `reverse`).
+        job_id (int, optional): The job's ID determined by the server.
+        job_memory_gb (int, optional): The total job allocated memory in GB (14 by default).
+        job_name (str, optional): The job's name (e.g., 'opt_a103').
+        job_num (int, optional): Used as the entry number in the database, as well as in ``job_name``.
+        job_server_name (str, optional): Job's name on the server (e.g., 'a103').
+        job_status (list, optional): The job's server and ESS statuses.
+        level (Level, optional): The level of theory to use.
+        max_job_time (float, optional): The maximal allowed job time on the server in hours (can be fractional).
+        run_multi_species (bool, optional): Whether to run a job for multiple species in the same input file.
+        reactions (List[ARCReaction], optional): Entries are ARCReaction instances, used for TS search methods.
+        rotor_index (int, optional): The 0-indexed rotor number (key) in the species.rotors_dict dictionary.
+        server (str): The server to run on.
+        server_nodes (list, optional): The nodes this job was previously submitted to.
+        species (List[ARCSpecies], optional): Entries are ARCSpecies instances.
+                                              Either ``reactions`` or ``species`` must be given.
+        testing (bool, optional): Whether the object is generated for testing purposes, ``True`` if it is.
+        times_rerun (int, optional): Number of times this job was re-run with the same arguments (no trsh methods).
+        torsions (List[List[int]], optional): The 0-indexed atom indices of the torsion(s).
+        tsg (int, optional): TSGuess number if optimizing TS guesses.
+        xyz (dict, optional): The 3D coordinates to use. If not give, species.get_xyz() will be used.
+    """
+
+    def __init__(self,
+                 project: str,
+                 project_directory: str,
+                 job_type: Union[List[str], str],
+                 args: Optional[dict] = None,
+                 bath_gas: Optional[str] = None,
+                 checkfile: Optional[str] = None,
+                 conformer: Optional[int] = None,
+                 constraints: Optional[List[Tuple[List[int], float]]] = None,
+                 cpu_cores: Optional[str] = None,
+                 dihedral_increment: Optional[float] = None,
+                 dihedrals: Optional[List[float]] = None,
+                 directed_scan_type: Optional[str] = None,
+                 ess_settings: Optional[dict] = None,
+                 ess_trsh_methods: Optional[List[str]] = None,
+                 execution_type: Optional[str] = None,
+                 fine: bool = False,
+                 initial_time: Optional[Union['datetime.datetime', str]] = None,
+                 irc_direction: Optional[str] = None,
+                 job_id: Optional[int] = None,
+                 job_memory_gb: float = 14.0,
+                 job_name: Optional[str] = None,
+                 job_num: Optional[int] = None,
+                 job_server_name: Optional[str] = None,
+                 job_status: Optional[List[Union[dict, str]]] = None,
+                 level: Optional[Level] = None,
+                 max_job_time: Optional[float] = None,
+                 run_multi_species: bool = False,
+                 reactions: Optional[List['ARCReaction']] = None,
+                 rotor_index: Optional[int] = None,
+                 server: Optional[str] = None,
+                 server_nodes: Optional[list] = None,
+                 queue: Optional[str] = None,
+                 attempted_queues: Optional[List[str]] = None,
+                 species: Optional[List['ARCSpecies']] = None,
+                 testing: bool = False,
+                 times_rerun: int = 0,
+                 torsions: Optional[List[List[int]]] = None,
+                 tsg: Optional[int] = None,
+                 xyz: Optional[dict] = None,
+                 ):
+
+        self.incore_capacity = 1
+        self.job_adapter = 'mockter'
+        self.execution_type = 'incore'
+        self.command = 'mockter'
+        self.url = ''
+
+        if species is None:
+            raise ValueError('Cannot execute Mockter without an ARCSpecies object.')
+
+        _initialize_adapter(obj=self,
+                            is_ts=False,
+                            project=project,
+                            project_directory=project_directory,
+                            job_type=job_type,
+                            args=args,
+                            bath_gas=bath_gas,
+                            checkfile=checkfile,
+                            conformer=conformer,
+                            constraints=constraints,
+                            cpu_cores=cpu_cores,
+                            dihedral_increment=dihedral_increment,
+                            dihedrals=dihedrals,
+                            directed_scan_type=directed_scan_type,
+                            ess_settings=ess_settings,
+                            ess_trsh_methods=ess_trsh_methods,
+                            fine=fine,
+                            initial_time=initial_time,
+                            irc_direction=irc_direction,
+                            job_id=job_id,
+                            job_memory_gb=job_memory_gb,
+                            job_name=job_name,
+                            job_num=job_num,
+                            job_server_name=job_server_name,
+                            job_status=job_status,
+                            level=level,
+                            max_job_time=max_job_time,
+                            run_multi_species=run_multi_species,
+                            reactions=reactions,
+                            rotor_index=rotor_index,
+                            server=server,
+                            server_nodes=server_nodes,
+                            queue=queue,
+                            attempted_queues=attempted_queues,
+                            species=species,
+                            testing=testing,
+                            times_rerun=times_rerun,
+                            torsions=torsions,
+                            tsg=tsg,
+                            xyz=xyz,
+                            )
+
+    def write_input_file(self) -> None:
+        """
+        Write the input file to execute the job on the server.
+        """
+        input_dict = dict()
+        input_dict['basis'] = self.level.basis or ''
+        input_dict['charge'] = self.charge
+        input_dict['label'] = self.species_label
+        input_dict['memory'] = self.input_file_memory
+        input_dict['method'] = self.level.method
+        input_dict['multiplicity'] = self.multiplicity
+        input_dict['xyz'] = xyz_to_str(self.xyz)
+        input_dict['job_type'] = self.job_type
+        input_dict['memory'] = self.input_file_memory
+
+        input_dict = update_input_dict_with_args(args=self.args, input_dict=input_dict)
+        save_yaml_file(path=os.path.join(self.local_path, input_filenames[self.job_adapter]), content=input_dict)
+
+    def set_files(self) -> None:
+        """
+        Set files to be uploaded and downloaded. Writes the files if needed.
+        Modifies the self.files_to_upload and self.files_to_download attributes.
+
+        self.files_to_download is a list of remote paths.
+
+        self.files_to_upload is a list of dictionaries, each with the following keys:
+        ``'name'``, ``'source'``, ``'make_x'``, ``'local'``, and ``'remote'``.
+        If ``'source'`` = ``'path'``, then the value in ``'local'`` is treated as a file path.
+        Else if ``'source'`` = ``'input_files'``, then the value in ``'local'`` will be taken
+        from the respective entry in inputs.py
+        If ``'make_x'`` is ``True``, the file will be made executable.
+        """
+        # 1. ** Upload **
+        # 1.1. submit file
+        if self.execution_type != 'incore':
+            # we need a submit file for single or array jobs (either submitted to local or via SSH)
+            self.write_submit_script()
+            self.files_to_upload.append(self.get_file_property_dictionary(
+                file_name=submit_filenames[servers[self.server]['cluster_soft']]))
+        # 1.2. input file
+        if not self.iterate_by:
+            # if this is not a job array, we need the ESS input file
+            self.write_input_file()
+            self.files_to_upload.append(self.get_file_property_dictionary(file_name=input_filenames[self.job_adapter]))
+        # 1.3. HDF5 file
+        if self.iterate_by and os.path.isfile(os.path.join(self.local_path, 'data.hdf5')):
+            self.files_to_upload.append(self.get_file_property_dictionary(file_name='data.hdf5'))
+        # 1.4 job.sh
+        job_sh_dict = self.set_job_shell_file_to_upload()  # Set optional job.sh files if relevant.
+        if job_sh_dict is not None:
+            self.files_to_upload.append(job_sh_dict)
+        # 2. ** Download **
+        # 2.1. HDF5 file
+        if self.iterate_by and os.path.isfile(os.path.join(self.local_path, 'data.hdf5')):
+            self.files_to_download.append(self.get_file_property_dictionary(file_name='data.hdf5'))
+        else:
+            # 2.2. output file
+            self.files_to_download.append(self.get_file_property_dictionary(file_name=output_filenames[self.job_adapter]))
+
+    def set_additional_file_paths(self) -> None:
+        """
+        Set additional file paths specific for the adapter.
+        Called from set_file_paths() and extends it.
+        """
+        pass
+
+    def set_input_file_memory(self) -> None:
+        """
+        Set the input_file_memory attribute.
+        """
+        self.input_file_memory = self.job_memory_gb
+
+    def execute_incore(self):
+        """
+        Execute a job incore.
+        """
+        input = read_yaml_file(os.path.join(self.local_path, input_filenames[self.job_adapter]))
+        xyz = str_to_xyz(input['xyz'])
+        e_elect = 0.0 if not self.species[0].is_ts else 50
+        freqs = [500 + 20 * i for i in range(3 * len(xyz['symbols']) - 6)]
+        if self.species[0].is_ts:
+            freqs[0] = -500
+        output = {'adapter': 'mockter',
+                  'xyz': input['xyz'],
+                  'sp': e_elect,
+                  'T1': 0.0002,
+                  'freqs': freqs,
+                  }
+        save_yaml_file(path=os.path.join(self.local_path, output_filenames[self.job_adapter]), content=output)
+
+    def execute_queue(self):
+        """
+        Execute a job to the server's queue.
+        """
+        self.legacy_queue_execution()
+
+
+register_job_adapter('mockter', MockAdapter)