diff --git a/README.md b/README.md index dc4a834..f1e6488 100644 --- a/README.md +++ b/README.md @@ -1,105 +1,109 @@ -# NOMAD's parser example plugin +# NOMAD's H5MD-NOMAD Parser Plugin +This is a plugin for [NOMAD](https://nomad-lab.eu) which contains a parser for the H5MD-NOMAD HDF5 file format. ## Getting started -### Fork the project +### Install the dependencies + +Clone the project and in the workspace folder, create a virtual environment (note this project uses Python 3.9): + +```sh +git clone https://github.com/nomad-coe/nomad-parser-h5md.git +cd nomad-parser-h5md +python3.9 -m venv .pyenv +. .pyenv/bin/activate +``` -Go to the github project page https://github.com/nomad-coe/nomad-parser-plugin-example, hit -fork (and leave a star, thanks!). Maybe you want to rename the project while forking! +There are 2 options for installation, while linking to the nomad-lab package: -### Clone your fork +1. Install with the current settings, which link to the develop branch of nomad-lab. In this case, +leave the `pyproject.toml` settings as is. -Follow the github instructions. The URL and directory depends on your user name or organization and the -project name you choose. But, it should look somewhat like this: +2. Install by linking to your local development version of nomad-lab. In this case, go into `pyproject.toml` +and replace: ``` -git clone git@github.com:markus1978/my-nomad-schema.git -cd my-nomad-schema +"nomad-lab@git+https://github.com/nomad-coe/nomad.git@develop", ``` -### Install the dependencies +under the dependencies variable to: +``` +"nomad-lab@file://", +``` -You should create a virtual environment. You will need the `nomad-lab` package (and `pytest`). -You need at least Python 3.9. +Now, install the plugin in development mode: ```sh -python3 -m venv .pyenv -source .pyenv/bin/activate pip install --upgrade pip -pip install '.[dev]' --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple +pip install -e '.[dev]' ``` -**Note!** -Until we have an official pypi NOMAD release with the plugins functionality. Make -sure to include NOMAD's internal package registry (e.g. via `--index-url`). Follow the instructions -in `requirements.txt`. -### Run the tests - -Make sure the current directory is in your path: -```sh -export PYTHONPATH=. -``` - -You can run automated tests with `pytest`: - -```sh -pytest -svx tests -``` +### Run the tests -### Run linting +You can run local tests using the `pytest` package: ```sh -ruff check . +python -m pytest -sv ``` -### Run auto-formatting +where the `-s` and `-v` options toggle the output verbosity. -This is entirely optional. To add this as a check in github actions pipeline, uncomment the `ruff-formatting` step in `./github/workflows/actions.yaml`. +Our CI/CD pipeline produces a more comprehensive test report using `coverage` and `coveralls` packages. +To emulate this locally, perform: ```sh -ruff format . +pip install coverage coveralls +python -m coverage run -m pytest -sv ``` -You can parse an example archive that uses the schema with `nomad` -(installed via `nomad-lab` Python package): +This setup should allow you to run and test the plugin as a "standalone" package, i.e., without explicitly adding it to the NOMAD package. +However, if there is some issue in NOMAD recognizing the package, you may also need to add the package folder to the `PYTHONPATH` of the Python environment of your local NOMAD installation: ```sh -nomad parse tests/data/test.example-format.txt --show-archive +export PYTHONPATH="$PYTHONPATH:/src" ``` -## Developing your schema - -You can now start to develop you schema. Here are a few things that you might want to change: - -- The metadata in `nomad_plugin.yaml`. -- The name of the Python package `nomadparserexample`. If you want to define multiple plugins, you can nest packages. -- The name of the example parser `ExampleParser` and the schema definitions it might define. -- When you change module and class names, make sure to update the `nomad_plugin.yaml` accordingly. - - -## Build the python package -The `pyproject.toml` file contains everything that is necessary to turn the project -into a pip installable python package. Run the python build tool to create a package distribution: +### Run linting and auto-formatting +```sh +ruff check . +ruff format . --check ``` -pip install build -python -m build --sdist -``` - -You can install the package with pip: +Ruff auto-formatting is also a part of the GitHub workflow actions. Make sure that before you make a Pull Request, `ruff format . --check` runs in your local without any errors otherwise the workflow action will fail. + +### Debugging + +For interactive debugging of the tests, use `pytest` with the `--pdb` flag. +We recommend using an IDE for debugging, e.g., _VSCode_. +If using VSCode, you can add the following snippet to your `.vscode/launch.json`: + +```json +{ + "configurations": [ + { + "name": "", + "type": "debugpy", + "request": "launch", + "cwd": "${workspaceFolder}", + "program": "${workspaceFolder}/.pyenv/bin/pytest", + "justMyCode": true, + "env": { + "_PYTEST_RAISE": "1" + }, + "args": [ + "-sv", + "--pdb", + "", + ] + } + ] +} ``` -pip install dist/nomad-schema-plugin-example-1.0.tar.gz -``` - -Read more about python packages, `pyproject.toml`, and how to upload packages to PyPI -on the [PyPI documentation](https://packaging.python.org/en/latest/tutorials/packaging-projects/). - -## Next steps +where `${workspaceFolder}` refers to the NOMAD root. -To learn more about plugins, how to add them to an Oasis, how to publish them, read our -documentation on plugins: https://nomad-lab.eu/docs/plugins/plugins.html. +The settings configuration file `.vscode/settings.json` performs automatically applies the linting upon saving the file progress. diff --git a/pyproject.toml b/pyproject.toml index 0087e7e..04e4026 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" authors = [{ name = "The NOMAD Authors" }] license = { text = "Apache-2.0" } dependencies = [ - "nomad-lab@file:///home/jfrudzinski/work/soft/nomad/", + "nomad-lab@git+https://github.com/nomad-coe/nomad.git@develop", "nomad-schema-plugin-simulation-workflow@git+https://github.com/nomad-coe/nomad-schema-plugin-simulation-workflow.git@develop", "nomad-schema-plugin-run@git+https://github.com/nomad-coe/nomad-schema-plugin-run.git@develop", "nomad-schema-plugin-simulation-data@git+https://github.com/nomad-coe/nomad-schema-plugin-simulation-data.git@develop", diff --git a/src/nomad_parser_h5md/__main__.py b/src/nomad_parser_h5md/__main__.py index 63914fe..459af58 100644 --- a/src/nomad_parser_h5md/__main__.py +++ b/src/nomad_parser_h5md/__main__.py @@ -16,15 +16,15 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import sys import json import logging +import sys -from nomad.utils import configure_logging from nomad.datamodel import EntryArchive +from nomad.utils import configure_logging from nomadparserh5md.parser import H5MDParser -if __name__ == "__main__": +if __name__ == '__main__': configure_logging(console_log_level=logging.DEBUG) archive = EntryArchive() H5MDParser().parse(sys.argv[1], archive, logging) diff --git a/src/nomad_parser_h5md/parser.py b/src/nomad_parser_h5md/parser.py index 84f366d..d6f75e1 100644 --- a/src/nomad_parser_h5md/parser.py +++ b/src/nomad_parser_h5md/parser.py @@ -16,44 +16,68 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import os -import numpy as np import logging -import h5py +import os +import inspect +from typing import Any, Dict, List, Union -from typing import List, Dict, Union, Any +import h5py +import numpy as np +from ase.symbols import symbols2numbers +from atomisticparsers.utils import MOL, MDParser from h5py import Group - from nomad.datamodel import EntryArchive from nomad.metainfo.util import MEnum from nomad.parsing.file_parser import FileParser -from runschema.run import Run, Program, MSection -from runschema.method import ( - Method, - ForceField, - Model, - AtomParameters, - ForceCalculations, - NeighborSearching, +from nomad.units import ureg +from nomad_simulations import Program as BaseProgram + +# New schema +from nomad_simulations import Simulation +from nomad_simulations.atoms_state import ( + AtomsState, + CoreHole, + HubbardInteractions, + OrbitalsState, ) -from runschema.system import System, AtomsGroup +from nomad_simulations.model_system import AtomicCell +import nomad_simulations.properties.energies as energy_module +import nomad_simulations.properties.forces as force_module from runschema.calculation import ( + BaseCalculation, Calculation, Energy, EnergyEntry, - BaseCalculation, ) +from runschema.method import ( + AtomParameters, + ForceCalculations, + ForceField, + Method, + Model, + NeighborSearching, +) +from runschema.run import MSection, Program, Run +from runschema.system import AtomsGroup, System from simulationworkflowschema.molecular_dynamics import ( - EnsembleProperty, - EnsemblePropertyValues, CorrelationFunction, CorrelationFunctionValues, + EnsembleProperty, + EnsemblePropertyValues, ) -from atomisticparsers.utils import MDParser, MOL -from .schema import ParamEntry, CalcEntry, Author -from nomad.units import ureg -from ase.symbols import symbols2numbers +from .schema import Author, CalcEntry, ParamEntry +from .schema2 import ( + Author as Author2, + ModelSystem, + TrajectoryOutputs, + OutputsEntry, + TotalEnergy, + TotalForce, + ParamEntry as ParamEntry2, + ForceEntry, + EnergyEntry as EnergyEntry2, +) class HDF5Parser(FileParser): def __init__(self): @@ -63,9 +87,9 @@ def __init__(self): def filehdf5(self): if self._file_handler is None: try: - self._file_handler = h5py.File(self.mainfile, "r") + self._file_handler = h5py.File(self.mainfile, 'r') except Exception: - self.logger.error("Error reading hdf5 file.") + self.logger.error('Error reading hdf5 file.') return self._file_handler @@ -86,7 +110,7 @@ def decode_bytes(self, dataset): if dataset.size == 0: return None dataset = ( - [val.decode("utf-8") for val in dataset] + [val.decode('utf-8') for val in dataset] if isinstance(dataset[0], bytes) else dataset ) @@ -96,11 +120,11 @@ def decode_bytes(self, dataset): else dataset ) elif ( - type(dataset).__name__ == "bool_" + type(dataset).__name__ == 'bool_' ): # TODO fix error when using isinstance() here dataset = dataset.__bool__() else: - dataset = dataset.decode("utf-8") if isinstance(dataset, bytes) else dataset + dataset = dataset.decode('utf-8') if isinstance(dataset, bytes) else dataset return dataset def get_attribute( @@ -110,7 +134,7 @@ def get_attribute( Extracts attribute from group object based on path, and returns default if not defined. """ if path: - section_segments = path.split(".") + section_segments = path.split('.') for section in section_segments: try: value = group.get(section) @@ -126,13 +150,13 @@ def get_value(self, group, path: str, default=None): """ Extracts group or dataset from group object based on path, and returns default if not defined. """ - section_segments = path.split(".") + section_segments = path.split('.') for section in section_segments: try: value = group.get(section) - unit = self.get_attribute(group, "unit", path=section) + unit = self.get_attribute(group, 'unit', path=section) unit_factor = self.get_attribute( - group, "unit_factor", path=section, default=1.0 + group, 'unit_factor', path=section, default=1.0 ) group = value except Exception: @@ -148,11 +172,11 @@ def get_value(self, group, path: str, default=None): return value if value is not None else default def parse(self, path: str = None, **kwargs): - source = kwargs.get("source", self.filehdf5) - isattr = kwargs.get("isattr", False) + source = kwargs.get('source', self.filehdf5) + isattr = kwargs.get('isattr', False) value = None if isattr: - attr_path, attribute = path.rsplit(".", 1) + attr_path, attribute = path.rsplit('.', 1) value = self.get_attribute(source, attribute, path=attr_path) else: value = self.get_value(source, path) @@ -167,27 +191,51 @@ def __init__(self): self._n_atoms = None self._atom_parameters = None self._system_info = None + self._system_info2 = None self._observable_info = None self._parameter_info = None self._time_unit = ureg.picosecond - self._path_group_particles_all = "particles.all" - self._path_group_positions_all = "particles.all.position" - self._path_value_positions_all = "particles.all.position.value" + self._path_group_particles_all = 'particles.all' + self._path_group_positions_all = 'particles.all.position' + self._path_value_positions_all = 'particles.all.position.value' + + self.energy_classes = {m[1].__name__.replace('Energy', '').lower(): m[1] + for m in inspect.getmembers(energy_module, inspect.isclass) + if 'Energy' in m[1].__name__} + self.force_classes = {m[1].__name__.replace('Force', '').lower(): m[1] + for m in inspect.getmembers(force_module, inspect.isclass) + if 'Force' in m[1].__name__} self._nomad_to_particles_group_map = { - "positions": "position", - "velocities": "velocity", - "forces": "force", - "labels": "species_label", - "label": "force_field_label", - "mass": "mass", - "charge": "charge", + 'positions': 'position', + 'velocities': 'velocity', + 'forces': 'force', + 'labels': 'species_label', + 'label': 'force_field_label', + 'mass': 'mass', + 'charge': 'charge', + } + + self._nomad_to_particles_group_map2 = { + 'positions': 'position', + 'velocities': 'velocity', + 'forces': 'force', + 'labels': 'species_label', + 'label': 'force_field_label', + 'mass': 'mass', + 'charge': 'charge', } self._nomad_to_box_group_map = { - "lattice_vectors": "edges", - "periodic": "boundary", - "dimension": "dimension", + 'lattice_vectors': 'edges', + 'periodic': 'boundary', + 'dimension': 'dimension', + } + + self._nomad_to_box_group_map2 = { + 'lattice_vectors': 'edges', + 'periodic_boundary_conditions': 'boundary', + 'dimensionality': 'dimension', } def parse_atom_parameters(self): @@ -196,10 +244,10 @@ def parse_atom_parameters(self): self._atom_parameters = {} n_atoms = self._n_atoms[0] # TODO Extend to non-static n_atoms - atom_parameter_keys = ["label", "mass", "charge"] + atom_parameter_keys = ['label', 'mass', 'charge'] for key in atom_parameter_keys: value = self._data_parser.get( - f"{self._path_group_particles_all}.{self._nomad_to_particles_group_map[key]}" + f'{self._path_group_particles_all}.{self._nomad_to_particles_group_map[key]}' ) if value is not None: self._atom_parameters[key] = value @@ -207,19 +255,19 @@ def parse_atom_parameters(self): continue if isinstance(self._atom_parameters[key], h5py.Group): self.logger.warning( - "Time-dependent atom parameters currently not supported." - " Atom parameter values will not be stored." + 'Time-dependent atom parameters currently not supported.' + ' Atom parameter values will not be stored.' ) continue elif len(self._atom_parameters[key]) != n_atoms: self.logger.warning( - "Inconsistent length of some atom parameters." - " Atom parameter values will not be stored." + 'Inconsistent length of some atom parameters.' + ' Atom parameter values will not be stored.' ) continue def parse_system_info(self): - self._system_info = {"system": {}, "calculation": {}} + self._system_info = {'system': {}, 'calculation': {}} particles_group = self._data_parser.get(self._path_group_particles_all) positions = self._data_parser.get(self._path_value_positions_all) n_frames = self._n_frames @@ -227,28 +275,28 @@ def parse_system_info(self): particles_group is None or positions is None or positions is None ): # For now we require that positions are present in the H5MD file to store other particle attributes self.logger.warning( - "No positions available in H5MD file." - " Other particle attributes will not be stored" + 'No positions available in H5MD file.' + ' Other particle attributes will not be stored' ) return self._system_info - def get_value(value, steps, path=""): + def get_value(value, steps, path=''): if value is None: return value if isinstance(value, h5py.Group): - value = self._data_parser.get(f"{path}.value" if path else "value") - path_step = f"{path}.step" if path else "step" + value = self._data_parser.get(f'{path}.value' if path else 'value') + path_step = f'{path}.step' if path else 'step' attr_steps = self._data_parser.get(path_step) if value is None or attr_steps is None: self.logger.warning( - "Missing values or steps in particle attributes." - " These attributes will not be stored." + 'Missing values or steps in particle attributes.' + ' These attributes will not be stored.' ) return None elif sorted(attr_steps) != sorted(steps): self.logger.warning( - "Distinct trajectory lengths of particle attributes not supported." - " These attributes will not be stored." + 'Distinct trajectory lengths of particle attributes not supported.' + ' These attributes will not be stored.' ) return None else: @@ -257,131 +305,226 @@ def get_value(value, steps, path=""): return [value] * n_frames # get the steps based on the positions - steps = self._data_parser.get(f"{self._path_group_positions_all}.step") + steps = self._data_parser.get(f'{self._path_group_positions_all}.step') if steps is None: self.logger.warning( - "No step information available in H5MD file." - " System information cannot be parsed." + 'No step information available in H5MD file.' + ' System information cannot be parsed.' ) return self._system_info self.trajectory_steps = steps # get the rest of the particle quantities - values_dict = {"system": {}, "calculation": {}} - times = self._data_parser.get(f"{self._path_group_positions_all}.time") - values_dict["system"]["time"] = times - values_dict["calculation"]["time"] = times - values_dict["system"]["positions"] = positions - values_dict["system"]["n_atoms"] = self._n_atoms + values_dict = {'system': {}, 'calculation': {}} + times = self._data_parser.get(f'{self._path_group_positions_all}.time') + values_dict['system']['time'] = times + values_dict['calculation']['time'] = times + values_dict['system']['positions'] = positions + values_dict['system']['n_atoms'] = self._n_atoms system_keys = { - "labels": "system", - "velocities": "system", - "forces": "calculation", + 'labels': 'system', + 'velocities': 'system', + 'forces': 'calculation', } for key, sec_key in system_keys.items(): - path = f"{self._path_group_particles_all}.{self._nomad_to_particles_group_map[key]}" + path = f'{self._path_group_particles_all}.{self._nomad_to_particles_group_map[key]}' value = self._data_parser.get(path) values_dict[sec_key][key] = get_value(value, steps, path=path) # get the box quantities - box = self._data_parser.get(f"{self._path_group_particles_all}.box") + box = self._data_parser.get(f'{self._path_group_particles_all}.box') if box is not None: - box_attributes = ["dimension", "periodic"] + box_attributes = ['dimension', 'periodic'] for box_key in box_attributes: value = self._data_parser.get( - f"{self._path_group_particles_all}.box.{self._nomad_to_box_group_map[box_key]}", + f'{self._path_group_particles_all}.box.{self._nomad_to_box_group_map[box_key]}', isattr=True, ) - values_dict["system"][box_key] = ( + values_dict['system'][box_key] = ( [value] * n_frames if value is not None else None ) - box_key = "lattice_vectors" - path = f"{self._path_group_particles_all}.box.{self._nomad_to_box_group_map[box_key]}" + box_key = 'lattice_vectors' + path = f'{self._path_group_particles_all}.box.{self._nomad_to_box_group_map[box_key]}' value = self._data_parser.get(path) - values_dict["system"][box_key] = get_value(value, steps, path=path) + values_dict['system'][box_key] = get_value(value, steps, path=path) # populate the dictionary for i_step, step in enumerate(steps): - self._system_info["system"][step] = { + self._system_info['system'][step] = { key: val[i_step] - for key, val in values_dict["system"].items() + for key, val in values_dict['system'].items() if val is not None } - self._system_info["calculation"][step] = { + self._system_info['calculation'][step] = { key: val[i_step] - for key, val in values_dict["calculation"].items() + for key, val in values_dict['calculation'].items() + if val is not None + } + + def parse_system_info2(self): + self._system_info2 = {'system': {}, 'calculation': {}} + particles_group = self._data_parser.get(self._path_group_particles_all) + positions = self._data_parser.get(self._path_value_positions_all) + n_frames = self._n_frames + if ( + particles_group is None or positions is None or positions is None + ): # For now we require that positions are present in the H5MD file to store other particle attributes + self.logger.warning( + 'No positions available in H5MD file.' + ' Other particle attributes will not be stored' + ) + return self._system_info2 + + def get_value(value, steps, path=''): + if value is None: + return value + if isinstance(value, h5py.Group): + value = self._data_parser.get(f'{path}.value' if path else 'value') + path_step = f'{path}.step' if path else 'step' + attr_steps = self._data_parser.get(path_step) + if value is None or attr_steps is None: + self.logger.warning( + 'Missing values or steps in particle attributes.' + ' These attributes will not be stored.' + ) + return None + elif sorted(attr_steps) != sorted(steps): + self.logger.warning( + 'Distinct trajectory lengths of particle attributes not supported.' + ' These attributes will not be stored.' + ) + return None + else: + return value + else: + return [value] * n_frames + + # get the steps based on the positions + steps = self._data_parser.get(f'{self._path_group_positions_all}.step') + if steps is None: + self.logger.warning( + 'No step information available in H5MD file.' + ' System information cannot be parsed.' + ) + return self._system_info2 + self.trajectory_steps = steps + + # get the rest of the particle quantities + values_dict = {'system': {}, 'calculation': {}} + times = self._data_parser.get(f'{self._path_group_positions_all}.time') + values_dict['system']['time'] = times + values_dict['calculation']['time'] = times + values_dict['system']['positions'] = positions + values_dict['system']['n_atoms'] = self._n_atoms + system_keys = { + 'labels': 'system', + 'velocities': 'system', + 'forces': 'calculation', + } + for key, sec_key in system_keys.items(): + path = f'{self._path_group_particles_all}.{self._nomad_to_particles_group_map2[key]}' + value = self._data_parser.get(path) + values_dict[sec_key][key] = get_value(value, steps, path=path) + + # get the box quantities + box = self._data_parser.get(f'{self._path_group_particles_all}.box') + if box is not None: + box_attributes = ['dimensionality', 'periodic_boundary_conditions'] + for box_key in box_attributes: + value = self._data_parser.get( + f'{self._path_group_particles_all}.box.{self._nomad_to_box_group_map2[box_key]}', + isattr=True, + ) + values_dict['system'][box_key] = ( + [value] * n_frames if value is not None else None + ) + + box_key = 'lattice_vectors' + path = f'{self._path_group_particles_all}.box.{self._nomad_to_box_group_map2[box_key]}' + value = self._data_parser.get(path) + values_dict['system'][box_key] = get_value(value, steps, path=path) + # populate the dictionary + for i_step, step in enumerate(steps): + self._system_info2['system'][step] = { + key: val[i_step] + for key, val in values_dict['system'].items() + if val is not None + } + self._system_info2['calculation'][step] = { + key: val[i_step] + for key, val in values_dict['calculation'].items() if val is not None } def parse_observable_info(self): self._observable_info = { - "configurational": {}, - "ensemble_average": {}, - "correlation_function": {}, + 'configurational': {}, + 'ensemble_average': {}, + 'correlation_function': {}, } thermodynamics_steps = [] - observables_group = self._data_parser.get("observables") + observables_group = self._data_parser.get('observables') if observables_group is None: return self._observable_info def get_observable_paths(observable_group: Dict, current_path: str) -> List: paths = [] for obs_key in observable_group.keys(): - path = f"{obs_key}." + path = f'{obs_key}.' observable = self._data_parser.get_value(observable_group, obs_key) observable_type = self._data_parser.get_value( observable_group, obs_key - ).attrs.get("type") + ).attrs.get('type') if not observable_type: paths.extend( - get_observable_paths(observable, f"{current_path}{path}") + get_observable_paths(observable, f'{current_path}{path}') ) else: paths.append(current_path + path[:-1]) return paths - observable_paths = get_observable_paths(observables_group, current_path="") + observable_paths = get_observable_paths(observables_group, current_path='') for path in observable_paths: - full_path = f"observables.{path}" + full_path = f'observables.{path}' observable = self._data_parser.get_value(observables_group, path) observable_type = self._data_parser.get_value( observables_group, path - ).attrs.get("type") + ).attrs.get('type') observable_name, observable_label = ( - path.split(".", 1) if len(path.split(".")) > 1 else [path, ""] + path.split('.', 1) if len(path.split('.')) > 1 else [path, ''] ) - if observable_type == "configurational": - steps = self._data_parser.get(f"{full_path}.step") + if observable_type == 'configurational': + steps = self._data_parser.get(f'{full_path}.step') if steps is None: self.logger.warning( - "Missing step information in some observables." - " These will not be stored." + 'Missing step information in some observables.' + ' These will not be stored.' ) continue thermodynamics_steps = set(list(steps) + list(thermodynamics_steps)) - times = self._data_parser.get(f"{full_path}.time") - values = self._data_parser.get(f"{full_path}.value") + times = self._data_parser.get(f'{full_path}.time') + values = self._data_parser.get(f'{full_path}.value') if isinstance(values, h5py.Group): self.logger.warning( - "Group structures within individual observables not supported." - " These will not be stored." + 'Group structures within individual observables not supported.' + ' These will not be stored.' ) continue for i_step, step in enumerate(steps): if not self._observable_info[observable_type].get(step): self._observable_info[observable_type][step] = {} - self._observable_info[observable_type][step]["time"] = times[ + self._observable_info[observable_type][step]['time'] = times[ i_step ] observable_key = ( - f"{observable_name}-{observable_label}" + f'{observable_name}-{observable_label}' if observable_label - else f"{observable_name}" + else f'{observable_name}' + ) + self._observable_info[observable_type][step][observable_key] = ( + values[i_step] ) - self._observable_info[observable_type][step][ - observable_key - ] = values[i_step] else: if observable_name not in self._observable_info[observable_type].keys(): self._observable_info[observable_type][observable_name] = {} @@ -389,11 +532,11 @@ def get_observable_paths(observable_group: Dict, current_path: str) -> List: observable_label ] = {} for key in observable.keys(): - observable_attribute = self._data_parser.get(f"{full_path}.{key}") + observable_attribute = self._data_parser.get(f'{full_path}.{key}') if isinstance(observable_attribute, h5py.Group): self.logger.warning( - "Group structures within individual observables not supported." - " These will not be stored." + 'Group structures within individual observables not supported.' + ' These will not be stored.' ) continue self._observable_info[observable_type][observable_name][ @@ -409,31 +552,31 @@ def parse_atomsgroup( path_particlesgroup: str, ): for i_key, key in enumerate(h5md_sec_particlesgroup.keys()): - path_particlesgroup_key = f"{path_particlesgroup}.{key}" + path_particlesgroup_key = f'{path_particlesgroup}.{key}' particles_group = { group_key: self._data_parser.get( - f"{path_particlesgroup_key}.{group_key}" + f'{path_particlesgroup_key}.{group_key}' ) for group_key in h5md_sec_particlesgroup[key].keys() } sec_atomsgroup = AtomsGroup() nomad_sec.atoms_group.append(sec_atomsgroup) - sec_atomsgroup.type = particles_group.pop("type", None) + sec_atomsgroup.type = particles_group.pop('type', None) sec_atomsgroup.index = i_key - sec_atomsgroup.atom_indices = particles_group.pop("indices", None) + sec_atomsgroup.atom_indices = particles_group.pop('indices', None) sec_atomsgroup.n_atoms = ( len(sec_atomsgroup.atom_indices) if sec_atomsgroup.atom_indices is not None else None ) - sec_atomsgroup.is_molecule = particles_group.pop("is_molecule", None) - sec_atomsgroup.label = particles_group.pop("label", None) - sec_atomsgroup.composition_formula = particles_group.pop("formula", None) - particles_subgroup = particles_group.pop("particles_group", None) + sec_atomsgroup.is_molecule = particles_group.pop('is_molecule', None) + sec_atomsgroup.label = particles_group.pop('label', None) + sec_atomsgroup.composition_formula = particles_group.pop('formula', None) + particles_subgroup = particles_group.pop('particles_group', None) # set the remaining attributes for particles_group_key in particles_group.keys(): val = particles_group.get(particles_group_key) - units = val.units if hasattr(val, "units") else None + units = val.units if hasattr(val, 'units') else None val = val.magnitude if units is not None else val sec_atomsgroup.x_h5md_parameters.append( ParamEntry(kind=particles_group_key, value=val, unit=units) @@ -443,24 +586,24 @@ def parse_atomsgroup( self.parse_atomsgroup( sec_atomsgroup, particles_subgroup, - f"{path_particlesgroup_key}.particles_group", + f'{path_particlesgroup_key}.particles_group', ) def is_valid_key_val(self, metainfo_class: MSection, key: str, val) -> bool: if hasattr(metainfo_class, key): - quant_type = getattr(metainfo_class, key).get("type") + quant_type = getattr(metainfo_class, key).get('type') is_menum = isinstance(quant_type, MEnum) if quant_type else False return False if is_menum and val not in quant_type._list else True else: return False def parse_parameter_info(self): - self._parameter_info = {"force_calculations": {}, "workflow": {}} + self._parameter_info = {'force_calculations': {}, 'workflow': {}} def get_parameters(parameter_group: Group, path: str) -> Dict: param_dict: Dict[Any, Any] = {} for key, val in parameter_group.items(): - path_key = f"{path}.{key}" + path_key = f'{path}.{key}' if isinstance(val, h5py.Group): param_dict[key] = get_parameters(val, path_key) else: @@ -468,7 +611,7 @@ def get_parameters(parameter_group: Group, path: str) -> Dict: if isinstance(param_dict[key], str): param_dict[key] = ( param_dict[key].upper() - if key == "thermodynamic_ensemble" + if key == 'thermodynamic_ensemble' else param_dict[key].lower() ) elif isinstance(param_dict[key], (int, np.int32, np.int64)): @@ -477,82 +620,90 @@ def get_parameters(parameter_group: Group, path: str) -> Dict: return param_dict force_calculations_group = self._data_parser.get( - "parameters.force_calculations" + 'parameters.force_calculations' ) if force_calculations_group is not None: - self._parameter_info["force_calculations"] = get_parameters( - force_calculations_group, "parameters.force_calculations" + self._parameter_info['force_calculations'] = get_parameters( + force_calculations_group, 'parameters.force_calculations' ) - workflow_group = self._data_parser.get("parameters.workflow") + workflow_group = self._data_parser.get('parameters.workflow') if workflow_group is not None: - self._parameter_info["workflow"] = get_parameters( - workflow_group, "parameters.workflow" + self._parameter_info['workflow'] = get_parameters( + workflow_group, 'parameters.workflow' ) def parse_calculation(self): sec_run = self.archive.run[-1] - calculation_info = self._observable_info.get("configurational") + calculation_info = self._observable_info.get('configurational') if ( not calculation_info ): # TODO should still create entries for system time link in this case return system_info = self._system_info.get( - "calculation" + 'calculation' ) # note: it is currently ensured in parse_system() that these have the same length as the system_map for step in self.steps: data = { - "method_ref": sec_run.method[-1] if sec_run.method else None, - "step": step, - "energy": {}, + 'method_ref': sec_run.method[-1] if sec_run.method else None, + 'step': step, + 'energy': {}, } data_h5md = { - "x_h5md_custom_calculations": [], - "x_h5md_energy_contributions": [], + 'x_h5md_custom_calculations': [], + 'x_h5md_energy_contributions': [], } - data["time"] = calculation_info.get("step", {}).get("time") - if not data["time"]: - data["time"] = system_info.get("step", {}).get("time") + #! Apparently this is not being set here, but where?! + # data['time'] = calculation_info.get(step, {}).get('time') + # if not data['time']: + # data['time'] = system_info.get(step, {}).get('time') for key, val in system_info.get(step, {}).items(): - if key == "forces": + if key == 'forces': data[key] = dict(total=dict(value=val)) + elif hasattr(BaseCalculation, key): + data[key] = val else: - if hasattr(BaseCalculation, key): - data[key] = val - else: - unit = None - if hasattr(val, "units"): - unit = val.units - val = val.magnitude - data_h5md["x_h5md_custom_calculations"].append( - CalcEntry(kind=key, value=val, unit=unit) - ) + unit = None + if hasattr(val, 'units'): + unit = val.units + val = val.magnitude + data_h5md['x_h5md_custom_calculations'].append( + CalcEntry(kind=key, value=val, unit=unit) + ) for key, val in calculation_info.get(step).items(): - key_split = key.split("-") + key_split = key.split('-') observable_name = key_split[0] observable_label = key_split[1] if len(key_split) > 1 else key_split[0] if ( - "energ" in observable_name + 'energ' in observable_name ): # TODO check for energies or energy when matching name - if hasattr(Energy, observable_label): - data["energy"][observable_label] = dict(value=val) + # check for usage of energy/mole and convert to energy + if val.check("[energy]/[substance]") and "mole" in str(val.units): + val = val * MOL * ureg.mole + + if val.check("[energy]"): + if hasattr(Energy, observable_label): + data["energy"][observable_label] = dict(value=val) + else: + data_h5md["x_h5md_energy_contributions"].append( + EnergyEntry(kind=key, value=val) + ) else: - data_h5md["x_h5md_energy_contributions"].append( - EnergyEntry(kind=key, value=val) + self.logger.warning( + "Energy value not in energy units. Skipping entry." ) + elif hasattr(BaseCalculation, observable_label): + data[observable_label] = val else: - if hasattr(BaseCalculation, observable_label): - data[observable_label] = val - else: - unit = None - if hasattr(val, "units"): - unit = val.units - val = val.magnitude - data_h5md["x_h5md_custom_calculations"].append( - CalcEntry(kind=key, value=val, unit=unit) - ) + unit = None + if hasattr(val, 'units'): + unit = val.units + val = val.magnitude + data_h5md['x_h5md_custom_calculations'].append( + CalcEntry(kind=key, value=val, unit=unit) + ) self.parse_thermodynamics_step(data) sec_calc = sec_run.calculation[-1] @@ -561,43 +712,43 @@ def parse_calculation(self): sec_calc = Calculation() sec_run.calculation.append(sec_calc) sec_calc.step = int(step) - sec_calc.time = data["time"] - for calc_entry in data_h5md["x_h5md_custom_calculations"]: + sec_calc.time = data['time'] + for calc_entry in data_h5md['x_h5md_custom_calculations']: sec_calc.x_h5md_custom_calculations.append(calc_entry) sec_energy = sec_calc.energy if not sec_energy: sec_energy = Energy() sec_calc.append(sec_energy) - for energy_entry in data_h5md["x_h5md_energy_contributions"]: + for energy_entry in data_h5md['x_h5md_energy_contributions']: sec_energy.x_h5md_energy_contributions.append(energy_entry) def parse_system(self): sec_run = self.archive.run[-1] - system_info = self._system_info.get("system") + system_info = self._system_info.get('system') if not system_info: - self.logger.error("No particle information found in H5MD file.") + self.logger.error('No particle information found in H5MD file.') return self._system_time_map = {} for i_step, step in enumerate(self.trajectory_steps): - time = system_info[step].pop("time") + # time = system_info[step].pop("time") # unused! atoms_dict = system_info[step] - atom_labels = atoms_dict.get("labels") + atom_labels = atoms_dict.get('labels') if atom_labels is not None: try: symbols2numbers(atom_labels) except KeyError: # TODO this check should be moved to the system normalizer in the new schema - atoms_dict["labels"] = ["X"] * len(atom_labels) + atoms_dict['labels'] = ['X'] * len(atom_labels) topology = None if i_step == 0: # TODO extend to time-dependent bond lists and topologies - atoms_dict["bond_list"] = self._data_parser.get("connectivity.bonds") - path_topology = "connectivity.particles_group" + atoms_dict['bond_list'] = self._data_parser.get('connectivity.bonds') + path_topology = 'connectivity.particles_group' topology = self._data_parser.get(path_topology) - self.parse_trajectory_step({"atoms": atoms_dict}) + self.parse_trajectory_step({'atoms': atoms_dict}) if i_step == 0 and topology: # TODO extend to time-dependent topologies self.parse_atomsgroup(sec_run.system[i_step], topology, path_topology) @@ -625,30 +776,30 @@ def parse_method(self): ) # Get the interactions - connectivity_group = self._data_parser.get("connectivity") + connectivity_group = self._data_parser.get('connectivity') if connectivity_group: - atom_labels = self._atom_parameters.get("label") - interaction_keys = ["bonds", "angles", "dihedrals", "impropers"] + atom_labels = self._atom_parameters.get('label') + interaction_keys = ['bonds', 'angles', 'dihedrals', 'impropers'] interactions_by_type = [] for interaction_key in interaction_keys: interaction_list = self._data_parser.get( - f"connectivity.{interaction_key}" + f'connectivity.{interaction_key}' ) if interaction_list is None: continue elif isinstance(interaction_list, h5py.Group): self.logger.warning( - "Time-dependent interactions currently not supported." - " These values will not be stored" + 'Time-dependent interactions currently not supported.' + ' These values will not be stored' ) continue interaction_type_dict = { - "type": interaction_key, - "n_interactions": len(interaction_list), - "n_atoms": len(interaction_list[0]), - "atom_indices": interaction_list, - "atom_labels": np.array(atom_labels)[interaction_list] + 'type': interaction_key, + 'n_interactions': len(interaction_list), + 'n_atoms': len(interaction_list[0]), + 'atom_indices': interaction_list, + 'atom_labels': np.array(atom_labels)[interaction_list] if atom_labels is not None else None, } @@ -656,7 +807,7 @@ def parse_method(self): self.parse_interactions_by_type(interactions_by_type, sec_model) # Get the force calculation parameters - force_calculation_parameters = self._parameter_info.get("force_calculations") + force_calculation_parameters = self._parameter_info.get('force_calculations') if force_calculation_parameters: sec_force_calculations = ForceCalculations() sec_force_field.force_calculations = sec_force_calculations @@ -670,12 +821,12 @@ def parse_method(self): sec_force_calculations.m_get_quantity_definition(key), val ) else: - units = val.units if hasattr(val, "units") else None + units = val.units if hasattr(val, 'units') else None val = val.magnitude if units is not None else val sec_force_calculations.x_h5md_parameters.append( ParamEntry(kind=key, value=val, unit=units) ) - elif key == "neighbor_searching": + elif key == 'neighbor_searching': for neigh_key, neigh_val in val.items(): if self.is_valid_key_val( NeighborSearching, neigh_key, neigh_val @@ -687,15 +838,15 @@ def parse_method(self): neigh_val, ) else: - units = val.units if hasattr(val, "units") else None + units = val.units if hasattr(val, 'units') else None val = val.magnitude if units is not None else val sec_neighbor_searching.x_h5md_parameters.append( ParamEntry(kind=key, value=val, unit=units) ) else: self.logger.warning( - "Unknown parameters in force calculations section." - " These will not be stored." + 'Unknown parameters in force calculations section.' + ' These will not be stored.' ) def get_workflow_properties_dict( @@ -712,17 +863,17 @@ def populate_property_dict( ): if val is None: return - value_unit = val.units if hasattr(val, "units") else None - value_magnitude = val.magnitude if hasattr(val, "units") else val + value_unit = val.units if hasattr(val, 'units') else None + value_magnitude = val.magnitude if hasattr(val, 'units') else val if flag_known_property: property_dict[val_name] = ( value_magnitude * value_unit if value_unit else value_magnitude ) else: - property_dict[f"{val_name}_unit"] = ( + property_dict[f'{val_name}_unit'] = ( str(value_unit) if value_unit else None ) - property_dict[f"{val_name}_magnitude"] = value_magnitude + property_dict[f'{val_name}_magnitude'] = value_magnitude workflow_properties_dict: Dict[Any, Any] = {} for observable_type, observable_dict in observables.items(): @@ -732,13 +883,13 @@ def populate_property_dict( property_type_value_key = properties_known[observable_type] flag_known_property = True property_dict: Dict[Any, Any] = {property_type_value_key: []} - property_dict["label"] = observable_type + property_dict['label'] = observable_type for key, observable in observable_dict.items(): - property_values_dict = {"label": key} + property_values_dict = {'label': key} for quant_name, val in observable.items(): - if quant_name == "val": + if quant_name == 'val': continue - if quant_name == "bins": + if quant_name == 'bins': continue if quant_name in property_keys_list: property_dict[quant_name] = val @@ -746,17 +897,17 @@ def populate_property_dict( property_values_dict[quant_name] = val # TODO Still need to add custom values here. - val = observable.get("value") + val = observable.get('value') populate_property_dict( property_values_dict, - "value", + 'value', val, flag_known_property=flag_known_property, ) - bins = observable.get("bins") + bins = observable.get('bins') populate_property_dict( property_values_dict, - "bins", + 'bins', bins, flag_known_property=flag_known_property, ) @@ -770,22 +921,22 @@ def populate_property_dict( return workflow_properties_dict def parse_workflow(self): - workflow_parameters = self._parameter_info.get("workflow").get( - "molecular_dynamics" + workflow_parameters = self._parameter_info.get('workflow').get( + 'molecular_dynamics' ) if workflow_parameters is None: return workflow_results = {} - ensemble_average_observables = self._observable_info.get("ensemble_average") + ensemble_average_observables = self._observable_info.get('ensemble_average') ensemble_property_dict = { - "property_type_key": "ensemble_properties", - "property_type_value_key": "ensemble_property_values", - "properties_known": { - "radial_distribution_functions": "radial_distribution_function_values" + 'property_type_key': 'ensemble_properties', + 'property_type_value_key': 'ensemble_property_values', + 'properties_known': { + 'radial_distribution_functions': 'radial_distribution_function_values' }, - "property_keys_list": EnsembleProperty.m_def.all_quantities.keys(), - "property_value_keys_list": EnsemblePropertyValues.m_def.all_quantities.keys(), + 'property_keys_list': EnsembleProperty.m_def.all_quantities.keys(), + 'property_value_keys_list': EnsemblePropertyValues.m_def.all_quantities.keys(), } workflow_results.update( self.get_workflow_properties_dict( @@ -793,16 +944,16 @@ def parse_workflow(self): ) ) correlation_function_observables = self._observable_info.get( - "correlation_function" + 'correlation_function' ) correlation_function_dict = { - "property_type_key": "correlation_functions", - "property_type_value_key": "correlation_function_values", - "properties_known": { - "mean_squared_displacements": "mean_squared_displacement_values" + 'property_type_key': 'correlation_functions', + 'property_type_value_key': 'correlation_function_values', + 'properties_known': { + 'mean_squared_displacements': 'mean_squared_displacement_values' }, - "property_keys_list": CorrelationFunction.m_def.all_quantities.keys(), - "property_value_keys_list": CorrelationFunctionValues.m_def.all_quantities.keys(), + 'property_keys_list': CorrelationFunction.m_def.all_quantities.keys(), + 'property_value_keys_list': CorrelationFunctionValues.m_def.all_quantities.keys(), } workflow_results.update( self.get_workflow_properties_dict( @@ -813,13 +964,305 @@ def parse_workflow(self): dict(method=workflow_parameters, results=workflow_results) ) + def parse_h5md_group(self) -> dict: + group_h5md = self._data_parser.get('h5md') + group_h5md_dict = {} + if group_h5md: + group_h5md_dict['program_name'] = self._data_parser.get( + 'h5md.program.name', isattr=True + ) + group_h5md_dict['program_version'] = self._data_parser.get( + 'h5md.program.version', isattr=True + ) + group_h5md_dict['h5md_version'] = self._data_parser.get( + 'h5md.version', isattr=True + ) + group_h5md_dict['h5md_author_name'] = self._data_parser.get( + 'h5md.author.name', isattr=True + ) + group_h5md_dict['h5md_author_email'] = self._data_parser.get( + 'h5md.author.email', isattr=True + ) + group_h5md_dict['h5md_creator_name'] = self._data_parser.get( + 'h5md.creator.name', isattr=True + ) + group_h5md_dict['h5md_creator_version'] = self._data_parser.get( + 'h5md.creator.version', isattr=True + ) + else: + self.logger.warning( + '"h5md" group missing in (H5MD)hdf5 file.' + ' Program and author metadata will be missing!' + ) + + return group_h5md_dict + + def parse_system_hierarchy( + self, + nomad_sec: ModelSystem, + h5md_sec_particlesgroup: Group, + path_particlesgroup: str, + ): + data = {} + for key in h5md_sec_particlesgroup.keys(): + path_particlesgroup_key = f'{path_particlesgroup}.{key}' + particles_group = { + group_key: self._data_parser.get( + f'{path_particlesgroup_key}.{group_key}' + ) + for group_key in h5md_sec_particlesgroup[key].keys() + } + sec_model_system = ModelSystem() + nomad_sec.model_system.append(sec_model_system) + data['branch_label'] = particles_group.pop('label', None) + data['atom_indices'] = particles_group.pop('indices', None) + # TODO remove the deprecated below from the test file + # sec_atomsgroup.type = particles_group.pop("type", None) #? deprecate? + particles_group.pop('type', None) + # sec_atomsgroup.is_molecule = particles_group.pop("is_molecule", None) #? deprecate? + particles_group.pop('is_molecule', None) + particles_group.pop('formula', None) # covered in normalization now + # write all the standard quantities to the archive + self.parse_section(data, sec_model_system) + particles_subgroup = particles_group.pop('particles_group', None) + + # set the remaining attributes + for particles_group_key in particles_group.keys(): + val = particles_group.get(particles_group_key) + units = val.units if hasattr(val, 'units') else None + val = val.magnitude if units is not None else val + sec_model_system.custom_system_attributes.append( + ParamEntry2(name=particles_group_key, value=val, unit=units) + ) + + # get the next branch level + if particles_subgroup: + self.parse_system_hierarchy( + sec_model_system, + particles_subgroup, + f'{path_particlesgroup_key}.particles_group', + ) + + # TODO move this function to the MDParser class + def parse_trajectory_step2( + self, data: Dict[str, Any], simulation: Simulation + ) -> None: + """ + Create a system section and write the provided data. + """ + if self.archive is None: + return + + if (step := data.get('step')) is not None and step not in self.trajectory_steps: + return + + model_system = ModelSystem() + atomic_cell = AtomicCell() + atomic_cell_dict = data.pop('atomic_cell') + atom_labels = atomic_cell_dict.pop('labels') + for label in atom_labels: + atoms_state = AtomsState(chemical_symbol=label) + atomic_cell.atoms_state.append(atoms_state) + self.parse_section(atomic_cell_dict, atomic_cell) + model_system.cell.append(atomic_cell) + self.parse_section(data, model_system) + simulation.model_system.append(model_system) + + return model_system + + def parse_system2(self, simulation): + system_info = self._system_info2.get('system') + if not system_info: + self.logger.error('No particle information found in H5MD file.') + return + + self._system_time_map = {} + for i_step, step in enumerate(self.trajectory_steps): + atoms_dict = system_info[step] + atoms_dict['is_representative'] = False + + atom_labels = atoms_dict.get('labels') + if atom_labels is not None: + try: + symbols2numbers(atom_labels) + atoms_dict['labels'] = atom_labels + except KeyError: # TODO this check should be moved to the system normalizer in the new schema + atoms_dict['labels'] = ['X'] * len(atom_labels) + + topology = None + if i_step == 0: # TODO extend to time-dependent bond lists and topologies + atoms_dict['is_representative'] = True + atoms_dict['bond_list'] = self._data_parser.get('connectivity.bonds') + path_topology = 'connectivity.particles_group' + topology = self._data_parser.get(path_topology) + + # REMAP some of the data for the schema + atoms_dict['branch_label'] = ( + 'Total System' # ? Do we or should we have a default name for the entire system? + ) + atoms_dict['time_step'] = atoms_dict.pop( + 'time' + ).magnitude # TODO change in system_info + atomic_cell_keys = [ + 'n_atoms', + 'lattice_vectors', + 'periodic_boundary_conditions', + 'positions', + 'velocities', + 'labels', + ] + atoms_dict['atomic_cell'] = {} + for key in atomic_cell_keys: + atoms_dict['atomic_cell'][key] = atoms_dict.pop(key) + + self.parse_trajectory_step2(atoms_dict, simulation) + + if i_step == 0 and topology: # TODO extend to time-dependent topologies + self.parse_system_hierarchy( + simulation.model_system[-1], topology, path_topology + ) + + # TODO move this function to the MDParser class and rename! + def parse_output_step(self, data: Dict[str, Any], simulation: Simulation) -> bool: + if self.archive is None: + return False + + if ( + step := data.get("step") + ) is not None and step not in self.thermodynamics_steps: + return False + + output = TrajectoryOutputs() # Outputs(model_system_ref=simulation.model_system[-1]) + simulation.outputs.append(output) + + energy_contributions = data.get('total_energy', {}).pop('contributions', {}) + force_contributions = data.get('total_force', {}).pop('contributions', {}) + self.parse_section(data, output) + try: + system_ref_index = self.trajectory_steps.index(output.step) + output.model_system_ref = simulation.model_system[system_ref_index] + except Exception: + self.logger.warning('Could not set system reference in parsing of outputs.') + + if energy_contributions: + if len(output.total_energy) == 0: + output.total_energy.append(TotalEnergy()) + + for energy_label, energy_dict in energy_contributions.items(): + energy = self.energy_classes[energy_label]() + output.total_energy[-1].contributions.append(energy) + self.parse_section(energy_dict, energy) + + if force_contributions: + if len(output.total_force) == 0: + output.total_force.append(TotalForce()) + + for force_label, force_dict in force_contributions.items(): + force = self.force_classes[force_label]() + output.total_force[-1].contributions.append(force) + self.parse_section(force_dict, force) + + return True + + def parse_outputs(self, simulation: Simulation): + + outputs_info = self._observable_info.get('configurational') + if ( + not outputs_info + ): # TODO should still create entries for system time link in this case + return + + system_info = self._system_info2.get( + 'calculation' + ) # note: it is currently ensured in parse_system() that these have the same length as the system_map + + for step in self.steps: + + if step is not None and step not in self.thermodynamics_steps: + continue + + data_outputs = { + # 'method_ref': simulation.method[-1] if simulation.method else None, + 'step': step, + 'total_energy': {'contributions': {}}, + 'total_force': {'contributions': {}}, + } # nb - only allowing 1 contribution to total energy and total force for now + data_h5md = { + 'x_h5md_custom_calculations': [], + 'x_h5md_energy_contributions': [], + 'x_h5md_force_contributions': [], + } + data_outputs['time'] = outputs_info.get(step, {}).pop('time') + if not data_outputs['time']: + data_outputs['time'] = system_info.get(step, {}).pop('time') + + # TODO decide if forces will stay in system_info and will be placed still in outputs + forces = system_info.get(step, {}).get('forces') + if forces is not None: + data_outputs['total_force']['value'] = forces + + for key, val in outputs_info.get(step).items(): + key_split = key.split('-') + observable_name = key_split[0] + observable_label = key_split[1] if len(key_split) > 1 else key_split[0] + if observable_label == 'total': + data_outputs['total_energy']['value'] = val + elif ( + 'energ' in key + ): # TODO check for energies or energy when matching name + # TODO add support for energy/mole as in parse_calculation + if observable_label in self.energy_classes.keys(): + data_outputs['total_energy']['contributions'][observable_label] = {'value': val} + else: + data_h5md['x_h5md_energy_contributions'].append( + EnergyEntry2(name=key, value=val) + ) + elif ( + 'force' in key + ): + if observable_label in self.force_classes.keys(): + data_outputs['total_force']['contributions'][observable_label] = {'value': val} + else: + data_h5md['x_h5md_force_contributions'].append( + ForceEntry(name=key, value=val) + ) + elif hasattr(TrajectoryOutputs, observable_label): + data_outputs[observable_label] = {'value': val} + else: + unit = None + if hasattr(val, 'units'): + unit = val.units + val = val.magnitude + data_h5md['x_h5md_custom_calculations'].append( + OutputsEntry(name=key, value=val, unit=unit) + ) + + flag_parsed = self.parse_output_step(data_outputs, simulation) + # TODO use parse_section for the items below + if flag_parsed: + output = simulation.outputs[-1] + for output_entry in data_h5md['x_h5md_custom_calculations']: + output.x_h5md_custom_outputs.append(output_entry) + if len(output.total_energy) == 0 and data_h5md['x_h5md_energy_contributions']: + total_energy = TotalEnergy() + output.total_energy.append(total_energy) + sec_total_energy = output.total_energy[0] + for energy_entry in data_h5md['x_h5md_energy_contributions']: + sec_total_energy.x_h5md_contributions.append(energy_entry) + if len(output.total_force) == 0 and data_h5md['x_h5md_force_contributions']: + total_force = TotalForce() + output.total_force.append(total_force) + sec_total_force = output.total_force[0] + for force_entry in data_h5md['x_h5md_force_contributions']: + sec_total_force.x_h5md_contributions.append(force_entry) + def write_to_archive(self) -> None: self._maindir = os.path.dirname(self.mainfile) self._h5md_files = os.listdir(self._maindir) - self._basename = os.path.basename(self.mainfile).rsplit(".", 1)[0] + self._basename = os.path.basename(self.mainfile).rsplit('.', 1)[0] self._data_parser.mainfile = self.mainfile if self._data_parser.filehdf5 is None: - self.logger.warning("hdf5 file missing in H5MD Parser.") + self.logger.warning('hdf5 file missing in H5MD Parser.') return positions = self._data_parser.get(self._path_value_positions_all) @@ -828,47 +1271,66 @@ def write_to_archive(self) -> None: self._n_atoms = ( [len(pos) for pos in positions] if positions is not None else None ) - # Parse the hdf5 groups - sec_run = Run() - print('we are live') - self.archive.run.append(sec_run) - - group_h5md = self._data_parser.get("h5md") - if group_h5md: - program_name = self._data_parser.get("h5md.program.name", isattr=True) - program_version = self._data_parser.get("h5md.program.version", isattr=True) - sec_run.program = Program(name=program_name, version=program_version) - h5md_version = self._data_parser.get("h5md.version", isattr=True) - sec_run.x_h5md_version = h5md_version - h5md_author_name = self._data_parser.get("h5md.author.name", isattr=True) - h5md_author_email = self._data_parser.get("h5md.author.email", isattr=True) - sec_run.x_h5md_author = Author( - name=h5md_author_name, email=h5md_author_email - ) - h5md_creator_name = self._data_parser.get("h5md.creator.name", isattr=True) - h5md_creator_version = self._data_parser.get( - "h5md.creator.version", isattr=True - ) - sec_run.x_h5md_creator = Program( - name=h5md_creator_name, version=h5md_creator_version - ) - else: - self.logger.warning( - '"h5md" group missing in (H5MD)hdf5 file.' - " Program and author metadata will be missing!" - ) + group_h5md_dict = self.parse_h5md_group() self.parse_atom_parameters() self.parse_system_info() + self.parse_system_info2() self.parse_observable_info() - self.parse_parameter_info() + # self.parse_parameter_info() - # Populate the archive - self.parse_method() + ########################### + # Populate the OLD SCHEMA # + ########################### + sec_run = Run() + self.archive.run.append(sec_run) + sec_run.program = Program( + name=group_h5md_dict.get('program_name'), + version=group_h5md_dict.get('program_version'), + ) + sec_run.x_h5md_version = group_h5md_dict.get('h5md_version') + sec_run.x_h5md_author = Author( + name=group_h5md_dict.get('h5md_author_name'), + email=group_h5md_dict.get('h5md_author_email'), + ) + sec_run.x_h5md_creator = Program( + name=group_h5md_dict.get('h5md_creator_name'), + version=group_h5md_dict.get('h5md_creator_version'), + ) + + # self.parse_method() self.parse_system() self.parse_calculation() - self.parse_workflow() + # self.parse_workflow() + + ########################### + # Populate the NEW SCHEMA # + ########################### + simulation = Simulation() + simulation.program = BaseProgram( + name=group_h5md_dict.get('program_name'), + version=group_h5md_dict.get('program_version'), + ) + simulation.x_h5md_version = group_h5md_dict.get('h5md_version') + simulation.x_h5md_author = Author2( + name=group_h5md_dict.get('h5md_author_name'), + email=group_h5md_dict.get('h5md_author_email'), + ) + simulation.x_h5md_creator = BaseProgram( + name=group_h5md_dict.get('h5md_creator_name'), + version=group_h5md_dict.get('h5md_creator_version'), + ) + + self.parse_system2(simulation) + + # # self.parse_method(simulation) + + self.parse_outputs(simulation) + + # self.parse_workflow(simulation) + + self.archive.m_add_sub_section(EntryArchive.data, simulation) diff --git a/src/nomad_parser_h5md/schema.py b/src/nomad_parser_h5md/schema.py index 54d40cb..e0c6621 100644 --- a/src/nomad_parser_h5md/schema.py +++ b/src/nomad_parser_h5md/schema.py @@ -16,27 +16,29 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import numpy as np # pylint: disable=unused-import import typing # pylint: disable=unused-import + +import numpy as np # pylint: disable=unused-import +import runschema.calculation # pylint: disable=unused-import +import runschema.method # pylint: disable=unused-import +import runschema.run # pylint: disable=unused-import +import runschema.system # pylint: disable=unused-import from nomad.metainfo import ( # pylint: disable=unused-import - MSection, - MCategory, Category, + MCategory, + MSection, Package, Quantity, + Reference, Section, - SubSection, SectionProxy, - Reference, + SubSection, ) -import runschema.run # pylint: disable=unused-import -import runschema.calculation # pylint: disable=unused-import -import runschema.method # pylint: disable=unused-import -import runschema.system # pylint: disable=unused-import - m_package = Package() +# TODO update this entire schema! + class ParamEntry(MSection): """ @@ -230,4 +232,3 @@ class Run(runschema.run.Run): x_h5md_author = SubSection(sub_section=Author.m_def) x_h5md_creator = SubSection(sub_section=runschema.run.Program.m_def) - diff --git a/src/nomad_parser_h5md/schema2.py b/src/nomad_parser_h5md/schema2.py new file mode 100644 index 0000000..677217f --- /dev/null +++ b/src/nomad_parser_h5md/schema2.py @@ -0,0 +1,268 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. +# See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import nomad_simulations +import numpy as np +from nomad.datamodel.data import ArchiveSection +from nomad.metainfo import Context, MEnum, Quantity, Section, SectionProxy, SubSection + + +class ParamEntry(ArchiveSection): + """ + Generic section defining a parameter name and value + """ + + name = Quantity( + type=str, + shape=[], + description=""" + Name of the parameter. + """, + ) + + value = Quantity( + type=str, + shape=[], + description=""" + Value of the parameter as a string. + """, + ) + + unit = Quantity( + type=str, + shape=[], + description=""" + Unit of the parameter as a string. + """, + ) + + description = Quantity( + type=str, + shape=[], + description=""" + Further description of the attribute. + """, + ) + + +class OutputsEntry(ArchiveSection): + """ + Section describing a general type of calculation. + """ + + name = Quantity( + type=str, + shape=[], + description=""" + Name of the quantity. + """, + ) + + value = Quantity( + type=np.dtype(np.float64), + shape=[], + description=""" + Value of this contribution. + """, + ) + + unit = Quantity( + type=str, + shape=[], + description=""" + Unit of the parameter as a string. + """, + ) + + description = Quantity( + type=str, + shape=[], + description=""" + Further description of the output. + """, + ) + +class EnergyEntry(ArchiveSection): + """ + Section describing a general type of energy contribution. + """ + + name = Quantity( + type=str, + shape=[], + description=""" + Name of the energy contribution. + """, + ) + + value = Quantity( + type=np.dtype(np.float64), + shape=[], + unit='joule', + description=""" + Value of the energy contribution. + """, + ) + +class ForceEntry(ArchiveSection): + """ + Section describing a general type of force contribution. + """ + + name = Quantity( + type=str, + shape=[], + description=""" + Name of the force contribution. + """, + ) + + value = Quantity( + type=np.dtype(np.float64), + shape=[], + unit='newton', + description=""" + Value of the force contribution. + """, + ) + +# class ForceCalculations(runschema.method.ForceCalculations): +# m_def = Section( +# validate=False, +# extends_base_section=True, +# ) + +# x_h5md_parameters = SubSection( +# sub_section=ParamEntry.m_def, +# description=""" +# Contains non-normalized force calculation parameters. +# """, +# repeats=True, +# ) + + +# class NeighborSearching(runschema.method.NeighborSearching): +# m_def = Section( +# validate=False, +# extends_base_section=True, +# ) + +# x_h5md_parameters = SubSection( +# sub_section=ParamEntry.m_def, +# description=""" +# Contains non-normalized neighbor searching parameters. +# """, +# repeats=True, +# ) + + +class ModelSystem(nomad_simulations.model_system.ModelSystem): + """ + Model system used as an input for simulating the material. + """ + + custom_system_attributes = ( + SubSection( # TODO should this be called parameters or attributes or what? + sub_section=ParamEntry.m_def, + description=""" + Contains additional information about the (sub)system . + """, + repeats=True, + ) + ) + +class TotalEnergy(nomad_simulations.properties.TotalEnergy): + + x_h5md_contributions = SubSection( + sub_section=EnergyEntry.m_def, + description=""" + Contains other custom energy contributions that are not already defined. + """, + repeats=True, + ) + +class TotalForce(nomad_simulations.properties.TotalForce): + + x_h5md_contributions = SubSection( + sub_section=ForceEntry.m_def, + description=""" + Contains other custom force contributions that are not already defined. + """, + repeats=True, + ) + + +class TrajectoryOutputs(nomad_simulations.outputs.TrajectoryOutputs): + m_def = Section( + validate=False, + extends_base_section=True, + ) + + x_h5md_custom_outputs = SubSection( + sub_section=OutputsEntry.m_def, + description=""" + Contains other generic custom outputs that are not already defined. + """, + repeats=True, + ) + + total_energy = SubSection(sub_section=TotalEnergy.m_def, repeats=True) + + total_force = SubSection(sub_section=TotalForce.m_def, repeats=True) + +class Author(ArchiveSection): + """ + Contains the specifications of the program. + """ + + name = Quantity( + type=str, + shape=[], + description=""" + Specifies the name of the author who generated the h5md file. + """, + ) + + email = Quantity( + type=str, + shape=[], + description=""" + Author's email. + """, + ) + + +class Simulation(nomad_simulations.Simulation): + m_def = Section( + validate=False, + extends_base_section=True, + ) + + # TODO Not sure how we are dealing with versioning with H5MD-NOMAD + x_h5md_version = Quantity( + type=np.dtype(np.int32), + shape=[2], + description=""" + Specifies the version of the h5md schema being followed. + """, + ) + + x_h5md_author = SubSection(sub_section=Author.m_def) + + x_h5md_creator = SubSection(sub_section=nomad_simulations.Program.m_def) diff --git a/tests/data/openmm/test_traj_openmm_5frames.h5 b/tests/data/openmm/test_traj_openmm_5frames.h5 index c686fd1..22bc400 100644 Binary files a/tests/data/openmm/test_traj_openmm_5frames.h5 and b/tests/data/openmm/test_traj_openmm_5frames.h5 differ diff --git a/tests/test_h5md.py b/tests/test_h5md.py index 564a56a..f1d2c04 100644 --- a/tests/test_h5md.py +++ b/tests/test_h5md.py @@ -16,9 +16,8 @@ # limitations under the License. # -import pytest import numpy as np - +import pytest from nomad.datamodel import EntryArchive from nomad_parser_h5md.parser import H5MDParser @@ -36,16 +35,22 @@ def test_md(parser): archive = EntryArchive() parser.parse('tests/data/openmm/test_traj_openmm_5frames.h5', archive, None) - # sec_run = archive.run[0] - # assert sec_run.program.name == 'OpenMM' - # assert sec_run.program.version == '-1.-1.-1' - # assert len(sec_run.x_h5md_version) == 2 - # assert sec_run.x_h5md_version[1] == 0 - # assert sec_run.x_h5md_author.name == 'Joseph F. Rudzinski' - # assert sec_run.x_h5md_author.email == 'joseph.rudzinski@physik.hu-berlin.de' - # assert sec_run.x_h5md_creator.name == 'h5py' - # assert sec_run.x_h5md_creator.version == '3.6.0' + ####################### + # Test the OLD SCHEMA # + ####################### + + ## H5MD + sec_run = archive.run[0] + assert sec_run.program.name == 'OpenMM' + assert sec_run.program.version == '-1.-1.-1' + assert len(sec_run.x_h5md_version) == 2 + assert sec_run.x_h5md_version[1] == 0 + assert sec_run.x_h5md_author.name == 'Joseph F. Rudzinski' + assert sec_run.x_h5md_author.email == 'joseph.rudzinski@physik.hu-berlin.de' + assert sec_run.x_h5md_creator.name == 'h5py' + assert sec_run.x_h5md_creator.version == '3.6.0' + ## METHOD # sec_method = sec_run.method # sec_atom_params = sec_method[0].atom_parameters # assert len(sec_atom_params) == 31583 @@ -66,69 +71,88 @@ def test_md(parser): # assert sec_method[0].force_field.force_calculations.neighbor_searching.neighbor_update_frequency == 1 # assert sec_method[0].force_field.force_calculations.neighbor_searching.neighbor_update_cutoff.to('nm').magnitude == approx(1.2) - # sec_systems = sec_run.system - # assert len(sec_systems) == 5 - # assert np.shape(sec_systems[0].atoms.positions) == (31583, 3) - # assert np.shape(sec_systems[0].atoms.velocities) == (31583, 3) - # assert sec_systems[0].atoms.n_atoms == 31583 - # assert sec_systems[0].atoms.labels[100] == 'H' - - # assert sec_systems[2].atoms.positions[800][1].to('angstrom').magnitude == approx(26.860575) - # assert sec_systems[2].atoms.velocities[1200][2].to('angstrom/ps').magnitude == approx(400.0) - # assert sec_systems[3].atoms.lattice_vectors[2][2].to('angstrom').magnitude == approx(68.22318) - # assert sec_systems[0].atoms.bond_list[200][0] == 198 - - # sec_atoms_group = sec_systems[0].atoms_group - # assert len(sec_atoms_group) == 4 - # assert sec_atoms_group[0].label == 'group_1ZNF' - # assert sec_atoms_group[0].type == 'molecule_group' - # assert sec_atoms_group[0].composition_formula == '1ZNF(1)' - # assert sec_atoms_group[0].n_atoms == 423 - # assert sec_atoms_group[0].atom_indices[159] == 159 - # assert sec_atoms_group[0].is_molecule is False - # sec_proteins = sec_atoms_group[0].atoms_group - # assert len(sec_proteins) == 1 - # assert sec_proteins[0].label == '1ZNF' - # assert sec_proteins[0].type == 'molecule' - # assert sec_proteins[0].composition_formula == 'ACE(1)TYR(1)LYS(1)CYS(1)GLY(1)LEU(1)CYS(1)GLU(1)ARG(1)SER(1)PHE(1)VAL(1)GLU(1)LYS(1)SER(1)ALA(1)LEU(1)SER(1)ARG(1)HIS(1)GLN(1)ARG(1)VAL(1)HIS(1)LYS(1)ASN(1)NH2(1)' - # assert sec_proteins[0].n_atoms == 423 - # assert sec_proteins[0].atom_indices[400] == 400 - # assert sec_proteins[0].is_molecule is True - # sec_res_group = sec_proteins[0].atoms_group - # assert len(sec_res_group) == 27 - # assert sec_res_group[14].label == 'group_ARG' - # assert sec_res_group[14].type == 'monomer_group' - # assert sec_res_group[14].composition_formula == 'ARG(1)' - # assert sec_res_group[14].n_atoms == 24 - # assert sec_res_group[14].atom_indices[2] == 329 - # assert sec_res_group[14].is_molecule is False - # sec_res = sec_res_group[14].atoms_group - # assert len(sec_res) == 1 - # assert sec_res[0].label == 'ARG' - # assert sec_res[0].type == 'monomer' - # assert sec_res[0].composition_formula == 'C(1)CA(1)CB(1)CD(1)CG(1)CZ(1)H(1)HA(1)HB2(1)HB3(1)HD2(1)HD3(1)HE(1)HG2(1)HG3(1)HH11(1)HH12(1)HH21(1)HH22(1)N(1)NE(1)NH1(1)NH2(1)O(1)' - # assert sec_res[0].n_atoms == 24 - # assert sec_res[0].atom_indices[10] == 337 - # assert sec_res[0].is_molecule is False - # assert sec_res[0].x_h5md_parameters[0].kind == 'hydrophobicity' - # assert sec_res[0].x_h5md_parameters[0].value == '0.81' - # assert sec_res[0].x_h5md_parameters[0].unit is None - - # sec_calc = sec_run.calculation - # assert len(sec_calc) == 5 - # assert np.shape(sec_calc[1].forces.total.value) == (31583, 3) - # assert sec_calc[1].forces.total.value[2100][2].to('newton').magnitude == approx(500.0) - # assert sec_calc[2].temperature.to('kelvin').magnitude == approx(300.0) - # assert len(sec_calc[1].x_h5md_custom_calculations) == 1 - # assert sec_calc[1].x_h5md_custom_calculations[0].kind == 'custom_thermo' - # assert sec_calc[1].x_h5md_custom_calculations[0].value == approx(100.0) - # assert sec_calc[1].x_h5md_custom_calculations[0].unit == 'newton / angstrom ** 2' - # assert sec_calc[2].time.to('ps').magnitude == approx(2.0) - # assert sec_calc[2].energy.kinetic.value.to('kilojoule').magnitude == approx(2.0) - # assert sec_calc[2].energy.potential.value.to('kilojoule').magnitude == approx(1.0) - # assert sec_calc[1].energy.x_h5md_energy_contributions[0].kind == 'energy-custom' - # assert sec_calc[1].energy.x_h5md_energy_contributions[0].value.magnitude == approx(3000.0) + ## SYSTEM + sec_systems = sec_run.system + assert len(sec_systems) == 5 + assert np.shape(sec_systems[0].atoms.positions) == (31583, 3) + assert np.shape(sec_systems[0].atoms.velocities) == (31583, 3) + assert sec_systems[0].atoms.n_atoms == 31583 + assert sec_systems[0].atoms.labels[100] == 'H' + + assert sec_systems[2].atoms.positions[800][1].to('angstrom').magnitude == approx( + 26.860575 + ) + assert sec_systems[2].atoms.velocities[1200][2].to( + 'angstrom/ps' + ).magnitude == approx(400.0) + assert sec_systems[3].atoms.lattice_vectors[2][2].to( + 'angstrom' + ).magnitude == approx(68.22318) + assert sec_systems[0].atoms.bond_list[200][0] == 198 + + sec_atoms_group = sec_systems[0].atoms_group + assert len(sec_atoms_group) == 4 + assert sec_atoms_group[0].label == 'group_1ZNF' + assert sec_atoms_group[0].type == 'molecule_group' + assert sec_atoms_group[0].composition_formula == '1ZNF(1)' + assert sec_atoms_group[0].n_atoms == 423 + assert sec_atoms_group[0].atom_indices[159] == 159 + assert sec_atoms_group[0].is_molecule is False + sec_proteins = sec_atoms_group[0].atoms_group + assert len(sec_proteins) == 1 + assert sec_proteins[0].label == '1ZNF' + assert sec_proteins[0].type == 'molecule' + assert ( + sec_proteins[0].composition_formula + == 'ACE(1)TYR(1)LYS(3)CYS(2)GLY(1)LEU(2)GLU(2)ARG(3)SER(3)PHE(1)VAL(2)ALA(1)HIS(2)GLN(1)ASN(1)NH2(1)' + ) + assert sec_proteins[0].n_atoms == 423 + assert sec_proteins[0].atom_indices[400] == 400 + assert sec_proteins[0].is_molecule is True + sec_res_group = sec_proteins[0].atoms_group + assert len(sec_res_group) == 16 + assert sec_res_group[14].label == 'group_SER' + assert sec_res_group[14].type == 'monomer_group' + assert sec_res_group[14].composition_formula == 'SER(3)' + assert sec_res_group[14].n_atoms == 33 + assert sec_res_group[14].atom_indices[2] == 136 + assert sec_res_group[14].is_molecule is False + sec_res = sec_res_group[14].atoms_group + assert len(sec_res) == 3 + assert sec_res[0].label == 'SER' + assert sec_res[0].type == 'monomer' + assert ( + sec_res[0].composition_formula + == 'C(1)CA(1)CB(1)H(1)HA(1)HB2(1)HB3(1)HG(1)N(1)O(1)OG(1)' + ) + assert sec_res[0].n_atoms == 11 + assert sec_res[0].atom_indices[10] == 144 + assert sec_res[0].is_molecule is False + assert sec_res[0].x_h5md_parameters[0].kind == 'hydrophobicity' + assert sec_res[0].x_h5md_parameters[0].value == '0.13' + assert sec_res[0].x_h5md_parameters[0].unit is None + + ## CALCULATION + sec_calc = sec_run.calculation + assert len(sec_calc) == 5 + assert np.shape(sec_calc[1].forces.total.value) == (31583, 3) + assert sec_calc[1].forces.total.value[2100][2].to('newton').magnitude == approx( + 500.0 + ) + assert sec_calc[2].temperature.to('kelvin').magnitude == approx(300.0) + assert len(sec_calc[1].x_h5md_custom_calculations) == 1 + assert sec_calc[1].x_h5md_custom_calculations[0].kind == 'custom_thermo' + assert sec_calc[1].x_h5md_custom_calculations[0].value == approx(100.0) + assert sec_calc[1].x_h5md_custom_calculations[0].unit == 'newton / angstrom ** 2' + assert sec_calc[2].time.to('ps').magnitude == approx(2.0) + assert sec_calc[2].energy.kinetic.value.to('kilojoule').magnitude == approx(2.0) + assert sec_calc[2].energy.potential.value.to('kilojoule').magnitude == approx(1.0) + assert sec_calc[1].energy.x_h5md_energy_contributions[0].kind == 'energy-custom' + assert sec_calc[1].energy.x_h5md_energy_contributions[0].value.magnitude == approx( + 3000.0 + ) + ## WORKFLOW # sec_workflow = archive.workflow2 # assert sec_workflow.m_def.name == 'MolecularDynamics' # sec_method = sec_workflow.method @@ -177,3 +201,84 @@ def test_md(parser): # assert correlation_function_0.mean_squared_displacement_values[0].times[10].to('ps').magnitude == approx(20.0) # assert correlation_function_0.mean_squared_displacement_values[0].value[10].to('nm**2').magnitude == approx(0.679723) # assert correlation_function_0.mean_squared_displacement_values[0].errors[10] == approx(0.0) + + ####################### + # Test the NEW SCHEMA # + ####################### + + ## H5MD + # TODO convert towards unit testing + sec_simulation = archive.data + assert sec_simulation.program.name == 'OpenMM' + assert sec_simulation.program.version == '-1.-1.-1' + assert len(sec_simulation.x_h5md_version) == 2 + assert sec_simulation.x_h5md_version[1] == 0 + assert sec_simulation.x_h5md_author.name == 'Joseph F. Rudzinski' + assert sec_simulation.x_h5md_author.email == 'joseph.rudzinski@physik.hu-berlin.de' + assert sec_simulation.x_h5md_creator.name == 'h5py' + assert sec_simulation.x_h5md_creator.version == '3.6.0' + + ## SYSTEM + sec_systems = sec_simulation.model_system + assert len(sec_systems) == 5 + assert np.shape(sec_systems[0].cell[0].positions) == (31583, 3) + assert np.shape(sec_systems[0].cell[0].velocities) == (31583, 3) + assert sec_systems[0].cell[0].n_atoms == 31583 + assert sec_systems[0].cell[0].atoms_state[100].chemical_symbol == 'H' + + assert sec_systems[2].cell[0].positions[800][1].to('angstrom').magnitude == approx( + 26.860575 + ) + assert sec_systems[2].cell[0].velocities[1200][2].to( + 'angstrom/ps' + ).magnitude == approx(400.0) + assert sec_systems[3].cell[0].lattice_vectors[2][2].to( + 'angstrom' + ).magnitude == approx(68.22318) + assert sec_systems[0].bond_list[200][0] == 198 + assert sec_systems[0].dimensionality == 3 + + sec_atoms_group = sec_systems[0].model_system + assert len(sec_atoms_group) == 4 + assert sec_atoms_group[0].branch_label == 'group_1ZNF' + assert sec_atoms_group[0].atom_indices[159] == 159 + sec_proteins = sec_atoms_group[0].model_system + assert len(sec_proteins) == 1 + assert sec_proteins[0].branch_label == '1ZNF' + assert sec_proteins[0].atom_indices[400] == 400 + sec_res_group = sec_proteins[0].model_system + assert len(sec_res_group) == 16 + assert sec_res_group[14].branch_label == 'group_SER' + assert sec_res_group[14].atom_indices[2] == 136 + sec_res = sec_res_group[14].model_system + assert len(sec_res) == 3 + assert sec_res[0].branch_label == 'SER' + assert sec_res[0].atom_indices[10] == 144 + assert sec_res[0].custom_system_attributes[0].name == 'hydrophobicity' + assert sec_res[0].custom_system_attributes[0].value == '0.13' + assert sec_res[0].custom_system_attributes[0].unit is None + + ## OUTPUTS + ## CALCULATION + sec_outputs = sec_simulation.outputs + assert len(sec_outputs) == 5 + assert np.shape(sec_outputs[1].total_force[0].value) == (31583, 3) + assert sec_outputs[1].total_force[0].value[2100][2].to('newton').magnitude == approx( + 500.0 + ) + assert sec_outputs[2].temperature[0].value.to('kelvin').magnitude == approx(300.0) + assert len(sec_outputs[1].x_h5md_custom_outputs) == 1 + assert sec_outputs[1].x_h5md_custom_outputs[0].name == 'custom_thermo' + assert sec_outputs[1].x_h5md_custom_outputs[0].value == approx(100.0) + assert sec_outputs[1].x_h5md_custom_outputs[0].unit == 'newton / angstrom ** 2' + assert sec_outputs[2].time.to('ps').magnitude == approx(2.0) + # TODO add total energy value + assert sec_outputs[2].total_energy[0].contributions[0].m_def.name == 'KineticEnergy' + assert sec_outputs[2].total_energy[0].contributions[0].value.to('kilojoule').magnitude == approx(2.0) + assert sec_outputs[2].total_energy[0].contributions[1].m_def.name == 'PotentialEnergy' + assert sec_outputs[2].total_energy[0].contributions[1].value.to('kilojoule').magnitude == approx(1.0) + assert sec_outputs[1].total_energy[0].x_h5md_contributions[0].name == 'energy-custom' + assert sec_outputs[1].total_energy[0].x_h5md_contributions[0].value.magnitude == approx( + 3000.0 + ) + # TODO add custom force contributions \ No newline at end of file