MolSSI · ahurta92 · Jan 30, 2020 · Jan 30, 2020 · Mar 19, 2020 · Apr 21, 2020
diff --git a/qcengine/programs/base.py b/qcengine/programs/base.py
@@ -13,6 +13,7 @@
 from .mopac import MopacHarness
 from .mp2d import MP2DHarness
 from .nwchem import NWChemHarness
+from .madness import MadnessHarness
 from .openmm import OpenMMHarness
 from .psi4 import Psi4Harness
 from .qchem import QChemHarness
@@ -101,6 +102,7 @@ def list_available_programs() -> Set[str]:
 register_program(GAMESSHarness())
 register_program(MolproHarness())
 register_program(NWChemHarness())
+register_program(MadnessHarness())
 register_program(Psi4Harness())
 register_program(QChemHarness())
 register_program(TeraChemHarness())

diff --git a/qcengine/programs/madness/__init__.py b/qcengine/programs/madness/__init__.py
@@ -0,0 +1 @@
+from .runner import MadnessHarness
diff --git a/qcengine/programs/madness/germinate.py b/qcengine/programs/madness/germinate.py
@@ -0,0 +1,131 @@
+from typing import Any, Dict, Tuple
+
+from qcengine.exceptions import InputError
+
+# List of XC functionals known to NWChem
+_xc_functionals = [
+    "hf",
+    "acm",
+    "b3lyp",
+    "beckehandh",
+    "pbe0",
+    "becke97",
+    "becke97-1",
+    "becke97-2",
+    "becke97-3",
+    "becke97-d",
+    "becke98",
+    "hcth",
+    "hcth120",
+    "hcth147",
+    "hcth407",
+    "becke97gga1",
+    "hcth407p",
+    "mpw91",
+    "mpw1k",
+    "xft97",
+    "cft97",
+    "ft97",
+    "op",
+    "bop",
+    "pbeop",
+    "xpkzb99",
+    "cpkzb99",
+    "xtpss03",
+    "ctpss03",
+    "xctpssh",
+    "b1b95",
+    "bb1k",
+    "mpw1b95",
+    "mpwb1k",
+    "pw6b95",
+    "pwb6k",
+    "m05",
+    "m05-2x",
+    "vs98",
+    "m06",
+    "m06-hf",
+    "m06-L",
+    "m06-2x",
+    "HFexch",
+    "becke88",
+    "xperdew91",
+    "xpbe96",
+    "gill96",
+    "lyp",
+    "perdew81",
+    "perdew86",
+    "perdew91",
+    "cpbe96",
+    "pw91lda",
+    "slater",
+    "vwn_1",
+    "vwn_2",
+    "vwn_3",
+    "vwn_4",
+    "vwn_5",
+    "vwn_1_rpa",
+    "xtpss03",
+    "ctpss03",
+    "bc95",
+    "xpw6b95",
+    "xpwb6k",
+    "xm05",
+    "xm05-2x",
+    "cpw6b95",
+    "cpwb6k",
+    "cm05",
+    "cm05-2x",
+    "xvs98",
+    "cvs98",
+    "xm06-L",
+    "xm06-hf",
+    "xm06",
+    "xm06-2x",
+    "cm06-L",
+    "cm06-hf",
+    "cm06",
+    "cm06-2x",
+]
+
+
+def muster_modelchem(method: str, derint: int, use_tce: bool) -> Tuple[str, Dict[str, Any]]:
+    """Converts the QC method into NWChem keywords
+
+     Args:
+        method (str): Name of the QC method to use
+        derint (str): Index of the run type
+        use_tce (bool): Whether to use the Tensor Contraction Engine
+     Returns:
+        (str): Task command for NWChem
+        (dict): Any options for NWChem
+     """
+
+    # Standardize the method name
+    method = method.lower()
+    opts = {}
+
+    # Map the run type to
+    # runtyp = {"energy": "energy", "gradient": "gradient", "hessian": "hessian", "properties": "property"}[derint]
+    # runtyp = {"energy": "energy", "optimization": "gopt", "hessian": "hessian", "properties": "property"}[derint]
+
+    # Write out the theory directive
+
+    mdccmd = f""  ## we don't need this right now
+    ## in the future when we link other exec this will change
+    ## all we have to do is add options to the dft block in order to change the run type
+    ## default in energy
+    # do nothing
+    if method == "optimization":
+        opts["dft__gopt"] = True
+    elif method == "response":
+        opts["dft__response"] = True
+    elif method.split()[0] in _xc_functionals:
+        opts["dft__xc"] = method
+    else:
+        raise InputError(f"Method not recognized: {method}")
+
+    return mdccmd, opts
+
+
+# # # #
diff --git a/qcengine/programs/madness/harvester.py b/qcengine/programs/madness/harvester.py
@@ -0,0 +1,222 @@
+import re
+import json
+import logging
+from decimal import Decimal
+from typing import Tuple
+
+import numpy as np
+import qcelemental as qcel
+from qcelemental.models import Molecule
+from qcelemental.models.results import AtomicResultProperties
+from qcelemental.molparse import regex
+
+from ..util import PreservingDict
+
+logger = logging.getLogger(__name__)
+
+
+def harvest_output(outtext: str) -> Tuple[PreservingDict, Molecule, list, str, str]:
+    """Function to read an entire MADNESS output file.
+
+    Reads all of the different "line search" segments of a file and returns
+    values from the last segment for which a geometry was written.
+
+    Args:
+        outtext (str): Output written to stdout
+    Returns:
+        - (PreservingDict) Variables extracted from the output file in the last complete step
+        - (Molecule): Molecule from the last complete step
+        - (list): Gradient from the last complete step
+        - (str): Version string
+        - (str): Error message, if any
+    """
+
+    # Loop over all steps
+    # TODO (wardlt): Is it only necessary to read the last two steps?
+    pass_psivar = []
+    pass_coord = []
+    pass_grad = []
+    for outpass in re.split(r"Converged!", outtext, re.MULTILINE):
+        psivar, madcoord, madgrad, version, error = harvest_outfile_pass(outpass)
+        pass_psivar.append(psivar)## all the variables extracted
+        pass_coord.append(madcoord)
+        pass_grad.append(madgrad)
+
+    # Determine which segment contained the last geometry
+    retindx = -1 #if pass_coord[-1] else -2
+    return pass_psivar[retindx], pass_coord[retindx], pass_grad[retindx], version, error
+
+
+def harvest_outfile_pass(outtext):
+    """Function to read Madness output file *outtext* and parse important
+    quantum chemical information from it in
+
+    """
+    psivar = PreservingDict()
+    psivar_coord = None
+    psivar_grad = None
+    version = ""
+    error = ""  # TODO (wardlt): The error string is never used.
+
+    NUMBER = r"(?x:" + regex.NUMBER + ")"
+    # fmt: off
+
+    # Process version
+    mobj = re.search(
+        r'^\s+' + r'MADNESS' + r'\s+' + r'(\d+.\d\d+.\d)' +r'\s'+ r'multiresolution suite'+r'\s*$',
+        outtext, re.MULTILINE)
+    if mobj:
+        logger.debug('matched version')
+        version = mobj.group(1)
+
+    # Process SCF
+    # 1)Fail to converge (TODO Robert ask for failed convergence)
+    mobj = re.search(r'^\s+' + r'(?:Calculation failed to converge)' + r'\s*$', outtext, re.MULTILINE)
+    if mobj:
+        logger.debug('failed to converge')
+
+    # 2)Calculation converged
+    else:
+        OPTIONS=[r'kinetic',r'nonlocal psp',r'nuclear attraction',r'coulomb',r'PCM',r'exchange-correlation',r'nuclear-repulsion',r'total']
+        PSIVAR=['KINETIC ENERGY','NONLOCAL PSP','NUCLEAR ATTRACTION ENERGY','COULOMB','PCM','EXCHANGE-CORRELATION','NUCLEAR REPULSION ENERGY','TOTAL SCF ENERGY']
+        optDict=dict(zip(OPTIONS,PSIVAR)) 
+
+        for var,VAR in optDict.items():
+            mobj = re.search(r'^\s+' + var + r'\s*' + NUMBER + r's*$', outtext, re.MULTILINE)
+            if mobj:
+                logger.debug('matched SCF')## not sure what this means
+                psivar[VAR] = mobj.group(1)
+    # Other options
+
+
+    # Process CURRENT energies (TODO: needs better way)
+    if "TOTAL SCF ENERGY" in psivar:
+        psivar["CURRENT REFERENCE ENERGY"] = psivar["TOTAL SCF ENERGY"]
+        psivar["CURRENT ENERGY"] = psivar["TOTAL SCF ENERGY"]
+
+
+    return psivar, psivar_coord, psivar_grad, version, error
+
+
+def harvest_hessian(hess: str) -> np.ndarray: pass 
+#    """Parses the contents of the NWChem hess file into a hessian array.
+
+#     Args:
+#         hess (str): Contents of the hess file
+#     Returns:
+#         (np.ndarray) Hessian matrix as a 2D array
+#     """
+
+    # Change the "D[+-]" notation of Fortran output to "E[+-]" used by Python
+#     hess_conv = hess.replace("D", "E")
+
+#     # Parse all of the float values
+#     hess_tri = [float(x) for x in hess_conv.strip().splitlines()]
+
+#     # The value in the Hessian matrix is the lower triangle printed row-wise (e.g., 0,0 -> 1,0 -> 1,1 -> ...)
+#     n = int(np.sqrt(8 * len(hess_tri) + 1) - 1) // 2  # Size of the 2D matrix
+
+#     # Add the lower diagonal
+#     hess_arr = np.zeros((n, n))
+#     hess_arr[np.tril_indices(n)] = hess_tri
+
+#     # Transpose and then set the lower diagonal again
+#     hess_arr = np.transpose(hess_arr)  # Numpy implementations might only change the ordering to column-major
+#     hess_arr[np.tril_indices(n)] = hess_tri
+
+#     return hess_arr.T  # So that the array is listed in C-order, needed by some alignment routines
+
+
+def extract_formatted_properties(psivars: PreservingDict) -> AtomicResultProperties:
+    """Get named properties out of the general variables extracted out of the result file
+
+    Args:
+        psivars (PreservingDict): Dictionary of the output results
+    Returns:
+        (AtomicResultProperties) Properties in a standard format
+    """
+    # TODO (wardlt): Get more of the named variables out of the NWChem file
+
+    # Initialize the output
+    output = dict()
+
+    # Extract the Calc Info
+    output.update(
+        {
+            "calcinfo_nbasis": psivars.get("N BASIS", None), ## Not a thing in madness
+            "calcinfo_nmo": psivars.get("N MO", None), ## Number of Mo orbitals
+            "calcinfo_natom": psivars.get("N ATOMS", None), ## Get madness to print this out
+            "calcinfo_nalpha": psivars.get("N ALPHA ELECTRONS", None), ## TODO (figure out how to read)
+            "calcinfo_nbeta": psivars.get("N BETA ELECTRONS", None),
+        }
+    )
+
+    # Get the "canonical" properties
+    output["return_energy"] = psivars["CURRENT ENERGY"]
+    output["nuclear_repulsion_energy"] = psivars["NUCLEAR REPULSION ENERGY"]
+
+    # Get the SCF properties
+    output["scf_total_energy"] = psivars.get("TOTAL SCF ENERGY", None)
+    #output["scf_one_electron_energy"] = psivars.get("ONE-ELECTRON ENERGY", None)
+    #output["scf_two_electron_energy"] = psivars.get("TWO-ELECTRON ENERGY", None)
+    #output["scf_dispersion_correction_energy"] = psivars.get("DFT DISPERSION ENERGY", None)
+
+    return AtomicResultProperties(**output)
+
+
+def harvest(in_mol: Molecule, madout: str, **outfiles) -> Tuple[PreservingDict, None, None, Molecule, str, str]:
+    """Parses all the pieces of output from NWChem: the stdout in
+    *nwout* Scratch files are not yet considered at this moment.
+
+    Args:
+        in_mol (Molecule): Input molecule
+        madout (str): Madness output molecule
+        outfiles (dict): Dictionary of outfile files and their contents
+    Returns:
+        - (PreservingDict) Variables extracted from the output file in the last complete step
+        - (None): Hessian from the last complete step (Not yet implemented)
+        - (None): Gradient from the last complete step (Not yet implemented)
+        - (Molecule): Molecule from the last complete step
+        - (str): Version string
+        - (str): Error message, if any
+    """
+
+    # Parse the Madness output
+    out_psivar, out_mol, out_grad, version, error = harvest_output(madout)
+
+
+    # If available, read higher-accuracy gradients
+    #  These were output using a Python Task in NWChem to read them out of the database
+    if outfiles.get("mad.grad") is not None:
+        logger.debug("Reading higher-accuracy gradients")
+        out_grad = json.loads(outfiles.get("mad.grad"))
+
+    # If available, read the hessian
+    out_hess = None
+    if outfiles.get("mad.hess") is not None:
+        out_hess = harvest_hessian(outfiles.get("mad.hess"))
+
+    # Make sure the input and output molecules are the same
+    if out_mol:
+        if in_mol:
+            if abs(out_mol.nuclear_repulsion_energy() - in_mol.nuclear_repulsion_energy()) > 1.0e-3:
+                raise ValueError(
+                    """Madness outfile (NRE: %f) inconsistent with Psi4 input (NRE: %f)."""
+                    % (out_mol.nuclear_repulsion_energy(), in_mol.nuclear_repulsion_energy())
+                )
+    #else:
+    #    raise ValueError("""No coordinate information extracted from Madness output.""")
+
+    # If present, align the gradients and hessian with the original molecular coordinates
+    #  Madness rotates the coordinates of the input molecule. `out_mol` contains the coordinates for the
+    #  rotated molecule, which we can use to determine how to rotate the gradients/hessian
+    #amol, data = out_mol.align(in_mol, atoms_map=False, mols_align=True, verbose=0)
+
+   # mill = data["mill"]  # Retrieve tool with alignment routines
+
+    if out_grad is not None:
+        out_grad = mill.align_gradient(np.array(out_grad).reshape(-1, 3))
+    if out_hess is not None:
+        out_hess = mill.align_hessian(np.array(out_hess))
+
+    return out_psivar, out_hess, out_grad, out_mol, version, error