ReactionMechanismGenerator · calvinp0 · Jul 30, 2023 · Jul 30, 2023 · Jul 30, 2023 · Jul 30, 2023
diff --git a/.github/workflows/cont_int.yml b/.github/workflows/cont_int.yml
@@ -144,7 +144,7 @@ jobs:
         key: conda-${{ runner.os }}--${{ runner.arch }}-arcenv-${{ env.CACHE_NUMBER}}
       env:
         # Increase this value to reset cache if etc/example-environment.yml has not changed
-        CACHE_NUMBER: 0
+        CACHE_NUMBER: 1
       id: cache-arc-env
     - name: Update environment
       run: mamba env update -n arc_env -f environment.yml

diff --git a/.gitignore b/.gitignore
@@ -36,6 +36,7 @@ scratch/
 
 # csv database
 *.csv
+!basis_sets.csv
 
 # iPython files
 *.ipynb_*
@@ -52,6 +53,10 @@ timer.dat
 # .vscode
 .vscode
 
+# files created via testing
+nul
+run.out
+
 # .trunk folder
 .trunk
 

diff --git a/arc/job/adapter.py b/arc/job/adapter.py
@@ -772,10 +772,13 @@ def set_cpu_and_mem(self):
                            f'exceeds {100 * job_max_server_node_memory_allocation}% of the the maximum node memory on '
                            f'{self.server}. Setting it to {job_max_server_node_memory_allocation * max_mem:.2f} GB.')
             self.job_memory_gb = job_max_server_node_memory_allocation * max_mem
-            total_submit_script_memory = self.job_memory_gb * 1024 * 1.05  # MB
+            total_submit_script_memory = self.job_memory_gb * 1024 * 1.05 if (self.job_memory_gb * 1024 * 1.05) <= (max_mem * 1024) else max_mem * 1024 # MB
             self.job_status[1]['keywords'].append('max_total_job_memory')  # Useful info when troubleshooting.
         else:
-            total_submit_script_memory = self.job_memory_gb * 1024 * 1.1  # MB
+            if max_mem is None:
+                total_submit_script_memory = self.job_memory_gb * 1024 * 1.1
+            else:
+                total_submit_script_memory = self.job_memory_gb * 1024 * 1.1 if (self.job_memory_gb * 1024 * 1.1) <= (max_mem * 1024) else max_mem * 1024  # MB
         # Determine amount of memory in submit script based on cluster job scheduling system.
         cluster_software = servers[self.server].get('cluster_soft').lower() if self.server is not None else None
         if cluster_software in ['oge', 'sge', 'htcondor']:
@@ -785,8 +788,8 @@ def set_cpu_and_mem(self):
             # In PBS, "#PBS -l select=1:ncpus=8:mem=12000000" specifies the memory for all cores to be 12 MB.
             self.submit_script_memory = math.ceil(total_submit_script_memory) * 1E6  # in Bytes
         elif cluster_software in ['slurm']:
-            # In Slurm, "#SBATCH --mem-per-cpu=2000" specifies the memory **per cpu/thread** to be 2000 MB.
-            self.submit_script_memory = math.ceil(total_submit_script_memory / self.cpu_cores)  # in MB
+            # In Slurm, "#SBATCH --mem=2000" specifies the memory to be 2000 MB.
+            self.submit_script_memory = math.ceil(total_submit_script_memory)  # in MB
         self.set_input_file_memory()
 
     def as_dict(self) -> dict:
@@ -942,18 +945,25 @@ def _get_additional_job_info(self):
         if cluster_soft in ['oge', 'sge', 'slurm', 'pbs', 'htcondor']:
             local_file_path_1 = os.path.join(self.local_path, 'out.txt')
             local_file_path_2 = os.path.join(self.local_path, 'err.txt')
-            local_file_path_3 = os.path.join(self.local_path, 'job.log')
-            if self.server != 'local' and self.remote_path is not None and not self.testing:
+            local_file_path_3 = None
+            for files in self.files_to_upload:
+                if 'job.sh' in files.values():
+                    local_file_path_3 = os.path.join(self.local_path, 'job.log')
+            if self.server != 'local' and self.remote_path is not None:
                 remote_file_path_1 = os.path.join(self.remote_path, 'out.txt')
                 remote_file_path_2 = os.path.join(self.remote_path, 'err.txt')
-                remote_file_path_3 = os.path.join(self.remote_path, 'job.log')
+                remote_file_path_3 = None
+                for files in self.files_to_upload:
+                    if 'job.sh' in files.values():
+                        remote_file_path_3 = os.path.join(self.remote_path, 'job.log')                        
                 with SSHClient(self.server) as ssh:
-                    for local_file_path, remote_file_path in zip([local_file_path_1,
-                                                                  local_file_path_2,
-                                                                  local_file_path_3],
-                                                                 [remote_file_path_1,
-                                                                  remote_file_path_2,
-                                                                  remote_file_path_3]):
+
+                    local_files_to_zip = [local_file_path_1, local_file_path_2]
+                    remote_files_to_zip = [remote_file_path_1, remote_file_path_2]
+                    if local_file_path_3 and remote_file_path_3:
+                        local_files_to_zip.append(local_file_path_3)
+                        remote_files_to_zip.append(remote_file_path_3)
+                    for local_file_path, remote_file_path in zip(local_files_to_zip, remote_files_to_zip):
                         try:
                             ssh.download_file(remote_file_path=remote_file_path,
                                               local_file_path=local_file_path)
@@ -963,10 +973,21 @@ def _get_additional_job_info(self):
                                            f'flags with stdout and stderr of out.txt and err.txt, respectively '
                                            f'(e.g., "#SBATCH -o out.txt"). Error message:')
                             logger.warning(e)
-            for local_file_path in [local_file_path_1, local_file_path_2, local_file_path_3]:
+            for local_file_path in [path for path in [local_file_path_1, local_file_path_2, local_file_path_3] if path]:
                 if os.path.isfile(local_file_path):
-                    with open(local_file_path, 'r') as f:
-                        lines = f.readlines()
+                    with open(local_file_path, 'rb') as f:
+                        # Read the file
+                        first_bytes = f.read()
+                        # Check if the bytes contain a null byte
+                        has_null_byte = b'\x00' in first_bytes
+                        # Use the appropriate mode based on whether the file is binary or not
+                        mode = 'rb' if has_null_byte else 'r'
+                        # Read the file contents using the determined mode
+                        lines = first_bytes.decode('utf-8')
+                    if mode == 'r':
+                        with open(local_file_path, 'r') as f:
+                            lines = f.readlines()
+
                     content += ''.join([line for line in lines])
                     content += '\n'
         else:
@@ -1346,6 +1367,14 @@ def troubleshoot_server(self):
         if run_job:
             # resubmit job
             self.execute()
+
+    def remove_remote_files(self):
+        """
+        Remove the remote files.
+        """
+        if (self.server != 'local' and self.server is not None):
+            with SSHClient(self.server) as ssh:
+                ssh.remove_dir(self.remote_path)
 
     def troubleshoot_queue(self) -> bool:
         """Troubleshoot queue errors.

diff --git a/arc/job/adapter_test.py b/arc/job/adapter_test.py
@@ -354,7 +354,7 @@ def test_set_cpu_and_mem(self):
         self.job_4.cpu_cores = None
         self.job_4.set_cpu_and_mem()
         self.assertEqual(self.job_4.cpu_cores, 8)
-        expected_memory = math.ceil((14 * 1024 * 1.1) / self.job_4.cpu_cores)
+        expected_memory = math.ceil((14 * 1024 * 1.1))
         self.assertEqual(self.job_4.submit_script_memory, expected_memory)
         self.job_4.server = 'local'
 

diff --git a/arc/job/adapters/molpro.py b/arc/job/adapters/molpro.py
@@ -9,6 +9,7 @@
 import os
 from typing import TYPE_CHECKING, List, Optional, Tuple, Union
 import socket
+import re
 
 from mako.template import Template
 
@@ -156,6 +157,7 @@
         self.execution_type = execution_type or 'queue'
         self.command = 'molpro'
         self.url = 'https://www.molpro.net/'
+        self.core_change = None
 
         if species is None:
             raise ValueError('Cannot execute Molpro without an ARCSpecies object.')
@@ -326,7 +328,7 @@
         Set the input_file_memory attribute.
         """
         # Molpro's memory is per cpu core and in MW (mega word; 1000 MW = 7.45 GB on a 64-bit machine)
-        # The conversion from mW to GB was done using this (https://deviceanalytics.com/words-to-bytes-converter/)
+        # The conversion from mW to GB was done using this (c)
         # specifying a 64-bit architecture.
         #
         # See also:
@@ -335,8 +337,37 @@
         # 800,000,000 bytes (800 mb).
         # Formula - (100,000,000 [Words]/( 800,000,000 [Bytes] / (job mem in gb * 1000,000,000 [Bytes])))/ 1000,000 [Words -> MegaWords]
         # The division by 1E6 is for converting into MWords
-                # Due to Zeus's configuration, there is only 1 nproc so the memory should not be divided by cpu_cores. 
+                # Due to Zeus's configuration, there is only 1 nproc so the memory should not be divided by cpu_cores.
         self.input_file_memory = math.ceil(self.job_memory_gb / (7.45e-3 * self.cpu_cores)) if 'zeus' not in socket.gethostname() else math.ceil(self.job_memory_gb / (7.45e-3))
+        # We need to check if ess_trsh_methods=['cpu'] and ess_trsh_methods=['molpro_memory:] exists
+        # If it does, we need to reduce the cpu_cores
+        if self.ess_trsh_methods is not None:
+            if 'cpu' in self.ess_trsh_methods and  any('molpro_memory:' in method for method in self.ess_trsh_methods):
+                current_cpu_cores = self.cpu_cores
+                max_memory = self.job_memory_gb
+                memory_values = []
+                for item in self.ess_trsh_methods:
+                    if 'molpro_memory:' in item:
+                        memory_value = item.split('molpro_memory:')[1]
+                        memory_values.append(float(memory_value))
+
+                if memory_values:
+                    min_memory_value = min(memory_values)
+                    required_cores = math.floor(max_memory / (min_memory_value * 7.45e-3))
+                    if self.core_change is None:
+                        self.core_change = required_cores
+                    elif self.core_change == required_cores:
+                        # We have already done this
+                        # Reduce the cores by 1
+                        required_cores -= 1
+                    if required_cores < current_cpu_cores:
+                        self.cpu_cores = required_cores
+                        logger.info(f'Changing the number of cpu_cores from {current_cpu_cores} to {self.cpu_cores}')
+            self.input_file_memory = math.ceil(self.job_memory_gb / (7.45e-3 * self.cpu_cores)) if 'zeus' not in socket.gethostname() else math.ceil(self.job_memory_gb / (7.45e-3))
+
+
+
+
 
     def execute_incore(self):
         """

diff --git a/arc/job/adapters/molpro_test.py b/arc/job/adapters/molpro_test.py
@@ -141,7 +141,7 @@ def test_set_files(self):
                                   'source': 'path',
                                   'make_x': False},
                                  ]
-        job_1_files_to_download = [{'file_name': 'input.out',
+        job_1_files_to_download = [{'file_name':'output.out',
                                     'local': os.path.join(self.job_1.local_path, output_filenames[self.job_1.job_adapter]),
                                     'remote': os.path.join(self.job_1.remote_path, output_filenames[self.job_1.job_adapter]),
                                     'source': 'path',