From 25152dfeea4200c4a527c49f9cc5487dff86b0bd Mon Sep 17 00:00:00 2001 From: Calvin Date: Sat, 21 Dec 2024 23:02:12 +0200 Subject: [PATCH] Server Running CREST --- arc/job/adapters/ts/heuristics.py | 357 +++++++++++++++++++----------- arc/settings/submit.py | 48 ++++ arc/species/converter.py | 2 + 3 files changed, 275 insertions(+), 132 deletions(-) diff --git a/arc/job/adapters/ts/heuristics.py b/arc/job/adapters/ts/heuristics.py index 6d2118cc42..69cf3da7c0 100644 --- a/arc/job/adapters/ts/heuristics.py +++ b/arc/job/adapters/ts/heuristics.py @@ -16,6 +16,7 @@ """ import datetime +import time import itertools import math import subprocess @@ -33,17 +34,17 @@ from arkane.statmech import is_linear from arc.common import almost_equal_coords, get_logger, is_angle_linear, key_by_val -from arc.imports import settings +from arc.imports import settings, submit_scripts from arc.job.adapter import JobAdapter from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family from arc.job.factory import register_job_adapter +from arc.job.local import submit_job, check_job_status from arc.plotter import save_geo from arc.species.converter import (compare_zmats, relocate_zmat_dummy_atoms_to_the_end, zmat_from_xyz, zmat_to_xyz, str_to_xyz, - xyz_to_str, str_to_str, xyz_to_str, xyz_to_dmat) @@ -60,7 +61,7 @@ CREST_PATH = settings['CREST_PATH'] CREST_ENV_PATH = settings['CREST_ENV_PATH'] HAS_CREST = True - + SERVERS = settings['servers'] except KeyError: HAS_CREST = False @@ -999,7 +1000,7 @@ def h_abstraction(arc_reaction: 'ARCReaction', # Don't modify dihedrals for an attacking H (or other linear radical) at a linear angle, C ~ A -- H1 - H2 -- H. dihedral_increment = 360 - for rmg_reaction in rmg_reactions: + for i, rmg_reaction in enumerate(rmg_reactions): rmg_reactant_mol = rmg_reaction.reactants[int(reactants_reversed)].molecule[0] rmg_product_mol = rmg_reaction.products[int(not products_reversed)].molecule[0] h1 = rmg_reactant_mol.atoms.index([atom for atom in rmg_reactant_mol.atoms if atom.label == '*2'][0]) @@ -1024,7 +1025,7 @@ def h_abstraction(arc_reaction: 'ARCReaction', d2_d3_product = [(None, d3) for d3 in d3_values] else: d2_d3_product = [(None, None)] - + rmg_xyz_guesses = list() zmats = list() for d2, d3 in d2_d3_product: xyz_guess = None @@ -1058,10 +1059,10 @@ def h_abstraction(arc_reaction: 'ARCReaction', # This TS is unique, and has no atom collisions. zmats.append(zmat_guess) xyz_guesses.append(xyz_guess) - - if xyz_guesses: - for i, xyz_guess_crest in enumerate(xyz_guesses): - # 1. Check if dict + rmg_xyz_guesses.append(xyz_guess) + crest_paths = list() + if rmg_xyz_guesses and HAS_CREST: + xyz_guess_crest = rmg_xyz_guesses[0] if isinstance(xyz_guess_crest, dict): df_dmat = convert_xyz_to_df(xyz_guess_crest) elif isinstance(xyz_guess_crest, str): @@ -1077,13 +1078,18 @@ def h_abstraction(arc_reaction: 'ARCReaction', a = extract_digits(h_abs_atoms_dict['A']) h = extract_digits(h_abs_atoms_dict['H']) b = extract_digits(h_abs_atoms_dict['B']) - xyz_guess = crest_ts_conformer_search(xyz_guess_crest, a, h, b, path=path, xyz_crest_int=i) - if xyz_guess is not None: - logger.info('Successfully completed crest conformer search:' - f' {xyz_to_str(xyz_guess)}') - xyz_guesses.append(xyz_guess) - except (ValueError or KeyError) as e: - logger.error(f'Could not determine the H abstraction atoms, got:\n{e}') + crest_path = crest_ts_conformer_search(xyz_guess_crest, a, h, b, path=path) + crest_paths.append(crest_path) + except (ValueError, KeyError) as e: + logger.error(f"Could not determine the H abstraction atoms, got:\n{e}") + + if crest_paths: + crest_jobs = submit_crest_jobs(crest_paths) + monitor_crest_jobs(crest_jobs) # Keep checking job statuses until complete + process_completed_jobs(crest_jobs, xyz_guesses) + else: + logger.error("No CREST paths found") + return xyz_guesses @@ -1099,33 +1105,23 @@ def crest_ts_conformer_search(xyz_guess: dict, a_atom: int, h_atom: int, b_atom: # TODO: Pass the atom A - H - B for constraints """ - path = os.path.join(path, 'crest') + path = os.path.join(path, f'crest_{xyz_crest_int}') if not os.path.exists(path): os.makedirs(path) # Write coords to coords.ref file symbols = xyz_guess['symbols'] - coords = xyz_guess['coords'] - angstrom_to_bohr = 1.8897259886 - - - with open(os.path.join(path, 'coords.ref'), 'w') as f: - # Format - XYZ in Bohr: - # - # $coord - # x y z atom1 - # x y z atom2 - # ... - # $end - - f.write('$coord\n') - for i, (x, y, z) in enumerate(coords): - # Convert to Bohr from Angstrom - x_bohr = x * angstrom_to_bohr - y_bohr = y * angstrom_to_bohr - z_bohr = z * angstrom_to_bohr - f.write(f' {x_bohr:>18.14f} {y_bohr:>18.14f} {z_bohr:>18.14f} {symbols[i]}\n') - f.write('$end\n') + + converted_coords = str_to_str(xyz_str=xyz_guess['coords'], reverse_atoms=True, convert_to="bohr") + coords_ref_content = f"$coord\n{converted_coords}\n$end\n" + coords_ref_path = os.path.join(path, 'coords.ref') + + try: + with open(coords_ref_path, 'w') as f: + f.write(coords_ref_content) + except IOError as e: + print(f"Failed to write to {coords_ref_path}: {e}") + return None # Get count of atoms - remember to add 1 to all atom numbers num_atoms = len(symbols) @@ -1150,36 +1146,133 @@ def crest_ts_conformer_search(xyz_guess: dict, a_atom: int, h_atom: int, b_atom: f.write('$metadyn\n') f.write(f' atoms: {", ".join(map(str, list_of_atoms_numbers_not_participating_in_reaction))}\n') f.write('$end\n') - # Run CREST - # Prepare the command + commands = [ f'{CREST_PATH}', + f' -T {SERVERS["local"].get("cpus", 8)}', f'{path}/coords.ref', f'--cinp {path}/constraints.inp', '--noreftopo' ] command = ' '.join(commands) command = f"{CREST_ENV_PATH} && {command}" if CREST_ENV_PATH else command - # Run the command using Popen - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=path, executable='/bin/bash') - stdout, stderr = process.communicate() - - if process.returncode == 0: - print("Command completed successfully.") - with open(os.path.join(path, 'crest_best.xyz'), 'r') as f: - content = f.read() - - xyz_guess = str_to_xyz(content) - # Delete unnecessary files - for file in os.listdir(path): - if file not in {'coords.ref', 'constraints.inp', 'crest_best.xyz'}: - os.remove(os.path.join(path, file)) - return xyz_guess - else: - print(f"Command failed with return code {process.returncode}") - print(f"Standard Output: {stdout.decode()}") - print(f"Standard Error: {stderr.decode()}") - return None + + if SERVERS.get('local') is not None: + if SERVERS['local']['cluster_soft'].lower() in ['condor', 'htcondor']: + # Write job submission scripts + + sub_job = submit_scripts['local']['crest'] + format_params = { + "name": f"crest_{xyz_crest_int}", + "cpus": SERVERS['local'].get('cpus', 8), + "memory": SERVERS['local'].get('memory', 32.0) * 1024, + } + sub_job = sub_job.format(**format_params) + + with open(os.path.join(path, settings['submit_filenames']['HTCondor']), 'w') as f: + f.write(sub_job) + + # Write the crest job + crest_job = submit_scripts['local']['crest_job'] + format_params = { + "commands": command, + } + crest_job = crest_job.format(**format_params) + + with open(os.path.join(path, 'job.sh'), 'w') as f: + f.write(crest_job) + elif SERVERS['local']['cluster_soft'].lower() == 'pbs': + # Write job submission scripts + sub_job = submit_scripts['local']['crest'] + format_params = { + "queue": "alon_q", + "name": f"crest_{xyz_crest_int}", + "cpus": SERVERS['local'].get('cpus', 8), + "memory": SERVERS['local'].get('memory', 32.0) * 1024, + "commands": command, + } + sub_job = sub_job.format(**format_params) + + with open(os.path.join(path, settings['submit_filenames']['PBS']), 'w') as f: + f.write(sub_job) + + return path + +def submit_crest_jobs(crest_paths: List[str]) -> None: + """ + Submit CREST jobs to the server. + + Args: + crest_paths (List[str]): List of paths to the CREST directories. + + Returns: + dict: A dictionary containing job IDs as keys and their statuses as values. + """ + crest_jobs = {} + for crest_path in crest_paths: + job_status, job_id = submit_job(path=crest_path) + crest_jobs[job_id] = {"path": crest_path, "status": job_status} + return crest_jobs + + +def monitor_crest_jobs(crest_jobs: dict, check_interval: int = 300) -> None: + """ + Monitor CREST jobs until they are complete. + + Args: + crest_jobs (dict): Dictionary containing job information (job ID, path, and status). + check_interval (int): Time interval (in seconds) to wait between status checks. + """ + while True: + all_done = True + for job_id, job_info in crest_jobs.items(): + if job_info["status"] not in ["done", "failed"]: + try: + job_info["status"] = check_job_status(job_id) # Update job status + except Exception as e: + logger.error(f"Error checking job status for job {job_id}: {e}") + job_info["status"] = "failed" + if job_info["status"] not in ["done", "failed"]: + all_done = False + if all_done: + break + time.sleep(min(check_interval, 100)) + +def process_completed_jobs(crest_jobs: dict, xyz_guesses: list) -> None: + """ + Process the completed CREST jobs and update XYZ guesses. + + Args: + crest_jobs (dict): Dictionary containing job information. + xyz_guesses (list): List to store the resulting XYZ guesses. + """ + for job_id, job_info in crest_jobs.items(): + crest_path = job_info["path"] + if job_info["status"] == "done": + crest_best_path = os.path.join(crest_path, "crest_best.xyz") + if os.path.exists(crest_best_path): + with open(crest_best_path, "r") as f: + content = f.read() + xyz_guess = str_to_xyz(content) + xyz_guesses.append(xyz_guess) + else: + logger.error(f"crest_best.xyz not found in {crest_path}") + elif job_info["status"] == "failed": + logger.error(f"CREST job failed for {crest_path}") + +def extract_digits(s: str) -> int: + """ + Extract the first integer from a string + + Args: + s (str): The string to extract the integer from + + Returns: + int: The first integer in the string + + """ + return int(re.sub(r"[^\d]", "", s)) + def convert_xyz_to_df(xyz: dict) -> pd.DataFrame: """ @@ -1208,85 +1301,85 @@ def get_h_abs_atoms(dataframe: pd.DataFrame) -> dict: Returns: dict: The hydrogen atom and the two heavy atoms. The keys are 'H', 'A', 'B' - """ - # Ensure there are at least 3 atoms in the TS - if len(dataframe) < 3: - raise ValueError("TS must contain at least 3 atoms.") - if len(dataframe) == 3 and dataframe.index.str.startswith("H").sum() == 2: - # Identify the heavy atom - heavy_atom = dataframe.index[~dataframe.index.str.startswith("H")][0] # Should be the only heavy atom - hydrogen_atoms = dataframe.index[dataframe.index.str.startswith("H")] # List of hydrogen atoms - - # Get distances from the heavy atom to the two hydrogens - distances_to_hydrogens = dataframe.loc[heavy_atom, hydrogen_atoms] - - # Select the hydrogen with the smallest distance to the heavy atom as `H` - hydrogen_with_min_distance = distances_to_hydrogens.idxmin() - - # The other hydrogen becomes `B` - other_hydrogen = hydrogen_atoms[hydrogen_atoms != hydrogen_with_min_distance][0] - - return {"H": hydrogen_with_min_distance, "A": heavy_atom, "B": other_hydrogen} - - elif len(dataframe) == 4 and dataframe.index.str.startswith("H").sum() == 3: - # Identify the heavy atom - heavy_atom = dataframe.index[~dataframe.index.str.startswith("H")][0] # Should be the only heavy atom - hydrogen_atoms = dataframe.index[dataframe.index.str.startswith("H")] # List of hydrogen atoms - - # Remove hydrogens from columns and the heavy atom from rows - filtered_df = dataframe.loc[hydrogen_atoms, [heavy_atom]] - - # Sort the distances from the heavy atom to all hydrogens - sorted_distances = filtered_df[heavy_atom].sort_values() - - # Select the hydrogen with the second furthest distance - hydrogen_with_max_distance = sorted_distances.index[-2] - - # Reset the DataFrame back to the original to find the other hydrogen (`B`) - remaining_hydrogens = hydrogen_atoms[hydrogen_atoms != hydrogen_with_max_distance] - filtered_hydrogens_df = dataframe.loc[[hydrogen_with_max_distance], remaining_hydrogens] - - # Find the hydrogen closest to the selected hydrogen (`H`) - closest_hydrogen = filtered_hydrogens_df.idxmin(axis=1).iloc[0] - - return {"H": hydrogen_with_max_distance, "A": heavy_atom, "B": closest_hydrogen} + closest_atoms = {} + for index, row in dataframe.iterrows(): + + row[index] = np.inf + closest = row.nsmallest(2).index.tolist() + closest_atoms[index] = closest + + hydrogen_keys = [key for key in dataframe.index if key.startswith("H")] + condition_occurrences = [] + + for hydrogen_key in hydrogen_keys: + atom_neighbours = closest_atoms[hydrogen_key] + is_heavy_present = any( + atom for atom in closest_atoms if not atom.startswith("H") + ) + if_hydrogen_present = any( + atom + for atom in closest_atoms + if atom.startswith("H") and atom != hydrogen_key + ) + + if is_heavy_present and if_hydrogen_present: + # Store the details of this occurrence + condition_occurrences.append( + {"H": hydrogen_key, "A": atom_neighbours[0], "B": atom_neighbours[1]} + ) + + # Check if the condition was met + if condition_occurrences: + if len(condition_occurrences) > 1: + # Store distances to decide which occurrence to use + occurrence_distances = [] + for occurrence in condition_occurrences: + # Calculate the sum of distances to the two heavy atoms + hydrogen_key = f"{occurrence['H']}" + heavy_atoms = [f"{occurrence['A']}", f"{occurrence['B']}"] + try: + distances = dataframe.loc[hydrogen_key, heavy_atoms].sum() + occurrence_distances.append((occurrence, distances)) + except KeyError as e: + print(f"Error accessing distances for occurrence {occurrence}: {e}") + + # Select the occurrence with the smallest distance + best_occurrence = min(occurrence_distances, key=lambda x: x[1])[0] + return { + "H": extract_digits(best_occurrence["H"]), + "A": extract_digits(best_occurrence["A"]), + "B": extract_digits(best_occurrence["B"]), + } else: - # Filter the DataFrame for hydrogen rows and non-hydrogen columns - hydrogen_rows = dataframe.index[dataframe.index.str.startswith("H")] - heavy_atom_columns = dataframe.columns[~dataframe.columns.str.startswith("H")] - - filtered_df = dataframe.loc[hydrogen_rows, heavy_atom_columns] + # Check the all the hydrogen atoms, and see the closest two heavy atoms and aggregate their distances to determine which Hyodrogen atom has the lowest distance aggregate + min_distance = np.inf + selected_hydrogen = None + selected_heavy_atoms = None - # Find the hydrogen atom with the smallest bond distance to a heavy atom - min_distances = filtered_df.min(axis=1) - min_distances = min_distances[min_distances <= 2.0] - hydrogen_with_min_distance = min_distances.idxmax() - min_distance_column = filtered_df.loc[hydrogen_with_min_distance].idxmin() + for hydrogen_key in hydrogen_keys: + atom_neighbours = closest_atoms[hydrogen_key] + heavy_atoms = [atom for atom in atom_neighbours if not atom.startswith("H")] - # Handle cases with multiple heavy atoms - remaining_columns = dataframe.columns[ - ~dataframe.columns.isin([hydrogen_with_min_distance, min_distance_column]) - ] - remaining_df = dataframe.loc[[hydrogen_with_min_distance], remaining_columns] - second_closest_atom = remaining_df.idxmin(axis=1).iloc[0] - - return {"H": hydrogen_with_min_distance, "A": min_distance_column, "B": second_closest_atom} - -def extract_digits(s: str) -> int: - """ - Extract the first integer from a string - - Args: - s (str): The string to extract the integer from - - Returns: - int: The first integer in the string + if len(heavy_atoms) < 2: + continue - """ - return int(re.sub(r'[^\d]', '', s)) + distances = dataframe.loc[hydrogen_key, heavy_atoms[:2]].sum() + if distances < min_distance: + min_distance = distances + selected_hydrogen = hydrogen_key + selected_heavy_atoms = heavy_atoms + + if selected_hydrogen: + return { + "H": extract_digits(selected_hydrogen), + "A": extract_digits(selected_heavy_atoms[0]), + "B": extract_digits(selected_heavy_atoms[1]), + } + else: + raise ValueError("No valid hydrogen atom found.") register_job_adapter('heuristics', HeuristicsAdapter) diff --git a/arc/settings/submit.py b/arc/settings/submit.py index 58bf1577a5..fb0647f808 100644 --- a/arc/settings/submit.py +++ b/arc/settings/submit.py @@ -289,6 +289,38 @@ }, 'atlas': { + 'crest': """Universe = vanilla + ++JobName = "{name}" + +log = job.log +output = out.txt +error = err.txt + +getenv = True + +should_transfer_files = no + +executable = job.sh + +request_cpus = {cpus} +request_memory = {memory}MB + +queue + +""", + 'crest_job': """#!/bin/bash + +touch initial_time + +{commands} + +touch final_time + +# Remove all files except for crest_best.xyz, coords.ref constraints.inp +rm -vfr !(crest_best.xyz|coords.ref|constraints.inp) + +""", # Atlas uses HTCondor, see docs here: https://htcondor.readthedocs.io/en/latest/ # Gaussian 09 'gaussian': """Universe = vanilla @@ -854,6 +886,22 @@ touch final_time """, + 'crest': """#!/bin/bash -l +#PBS -q {queue} +#PBS -N {name} +#PBS -l select=1:ncpus={cpus}:mem={memory}:mpiprocs={cpus} +#PBS -o out.txt +#PBS -e err.txt + +touch initial_time + +{commands} + +touch final_time + +rm -vrf !(crest_best.xyz|coords.ref|constraints.inp) + + """, }, 'server3': { 'gaussian': """#!/bin/bash -l diff --git a/arc/species/converter.py b/arc/species/converter.py index e43fcc4bf4..c7345bfda6 100644 --- a/arc/species/converter.py +++ b/arc/species/converter.py @@ -69,6 +69,8 @@ def str_to_str(xyz_str: str, Returns: str The converted string xyz format. """ + if isinstance(xyz_str, list): + xyz_str = '\n'.join(xyz_str) if not isinstance(xyz_str, str): raise ConverterError(f'Expected a string input, got {type(xyz_str)}') if project_directory is not None and os.path.isfile(os.path.join(project_directory, xyz_str)):