diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 82e96d3fa..754576d38 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -39,11 +39,11 @@ jobs: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: ${{ env.pythonLocation }} key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }} @@ -69,10 +69,11 @@ jobs: - name: Run spec_test coverage run: HED_GITHUB_TOKEN=${{ secrets.HED_GITHUB_TOKEN }} coverage run --append -m unittest spec_tests/test_errors.py + continue-on-error: true - name: Archive code coverage results if: ${{matrix.python-version == '3.9'}} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: code-coverage-report path: .coverage @@ -102,11 +103,11 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python 3.9 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 - - uses: actions/cache@v3 + - uses: actions/cache@v4 with: path: ${{ env.pythonLocation }} key: ${{ env.pythonLocation }}-${{ hashFiles('setup.py') }}-${{ hashFiles('docs/requirements.txt') }} @@ -120,7 +121,7 @@ jobs: pip install -r docs/requirements.txt - name: Download a single artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: code-coverage-report diff --git a/.github/workflows/spec_tests.yaml b/.github/workflows/spec_tests.yaml index 607b650c6..c930c04a7 100644 --- a/.github/workflows/spec_tests.yaml +++ b/.github/workflows/spec_tests.yaml @@ -22,7 +22,7 @@ jobs: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -31,13 +31,30 @@ jobs: python -m pip install --upgrade --upgrade-strategy eager pip pip install -r requirements.txt - - name: Test with unittest + - name: Spec Error Tests + id: spec_error_tests + continue-on-error: true run: | - python -m unittest spec_tests/test_errors.py > test_results.txt + python -m unittest spec_tests/test_errors.py > error_results.txt - - name: Upload spec test results - uses: actions/upload-artifact@v3 + - name: Bids Validation Test + id: bids_validation_test + continue-on-error: true + run: | + python -m unittest spec_tests/validate_bids.py > validate_bids_results.txt + + - name: Upload error test results + uses: actions/upload-artifact@v4 + with: + name: error-test-results + path: error_results.txt + + - name: Upload bids test results + uses: actions/upload-artifact@v4 with: - name: spec-test-results - path: test_results.txt + name: bids-test-results + path: validate_bids_results.txt + - name: Fail if Tests Failed + if: steps.spec_error_tests.outcome == 'failure' || steps.bids_validation_test.outcome == 'failure' + run: exit 1 diff --git a/.github/workflows/test_installer.yaml b/.github/workflows/test_installer.yaml new file mode 100644 index 000000000..d9e6dc24c --- /dev/null +++ b/.github/workflows/test_installer.yaml @@ -0,0 +1,31 @@ +on: + push: + branches: [develop] + pull_request: + branches: [develop] + +jobs: + build: + + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: ["3.7", "3.10"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel + - name: Install package + run: | + pip install . + - name: Run post-installation test + run: | + python -c "import hed" \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index b4a35641d..d490a2944 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,8 @@ path = spec_tests/hed-specification url = https://github.com/hed-standard/hed-specification/ branch = develop + +[submodule "spec_tests/hed-examples"] + path = spec_tests/hed-examples + url = https://github.com/hed-standard/hed-examples/ + branch = develop \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index 59c144b44..db57d26ca 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,5 +1,6 @@ defusedxml>=0.7.1 inflect>=6.0.5 +jsonschema>=4.17.3 numpy>=1.21.6 openpyxl>=3.1.0 pandas>=1.3.5 @@ -8,4 +9,5 @@ semantic_version>=2.10.0 myst-parser>=1.0.0 Sphinx>=5.2.2 sphinx_rtd_theme>=1.0.0 -wordcloud==1.9.2 +wordcloud==1.9.3 +rdflib>=6 diff --git a/docs/source/_static/customize_icon.css b/docs/source/_static/customize_icon.css new file mode 100644 index 000000000..6c2036e7e --- /dev/null +++ b/docs/source/_static/customize_icon.css @@ -0,0 +1,13 @@ +.icon-home:before { + content: ''; + /* Other styles to adjust size, positioning, etc. */ + display: inline-block; + width: 24px; + height: 24px; + background: url('images/croppedWideLogo.png') no-repeat center center; + background-size: contain; +} + +.icon-home { + font-family: inherit; +} \ No newline at end of file diff --git a/docs/source/_templates/custom-class-template.rst b/docs/source/_templates/custom-class-template.rst index cf03326d1..1039bc702 100644 --- a/docs/source/_templates/custom-class-template.rst +++ b/docs/source/_templates/custom-class-template.rst @@ -8,34 +8,24 @@ .. rubric:: {{ _('Methods') }} .. autosummary:: -{% for item in methods %} - {{ module }}.{{ objname }}.{{ item }} +{%- for item in methods %} + {{ objname }}.{{ item }} {%- endfor %} .. rubric:: {{ _('Attributes') }} .. autosummary:: -{% for item in attributes %} - {{ module }}.{{ objname }}.{{ item }} +{%- for item in attributes %} + {{ objname }}.{{ item }} {%- endfor %} -.. toctree:: - :hidden: - -{% for item in methods %} - {{ fullname }}#method-{{ item }} -{%- endfor %} -{% for item in attributes %} - {{ fullname }}#attribute-{{ item }} -{%- endfor %} - -{% for item in methods %} +{%- for item in methods %} .. _method-{{ item }}: .. automethod:: {{ module }}.{{ objname }}.{{ item }} {%- endfor %} -{% for item in attributes %} +{%- for item in attributes %} .. _attribute-{{ item }}: .. autoattribute:: {{ module }}.{{ objname }}.{{ item }} diff --git a/docs/source/_templates/custom-module-template.rst b/docs/source/_templates/custom-module-template.rst index 9e9c8a771..6062649de 100644 --- a/docs/source/_templates/custom-module-template.rst +++ b/docs/source/_templates/custom-module-template.rst @@ -38,7 +38,7 @@ .. rubric:: {{ _('Classes') }} .. autosummary:: - :toctree: + :toctree: _generated_classes :template: custom-class-template.rst {% for item in classes %} {{ item }} diff --git a/docs/source/conf.py b/docs/source/conf.py index 5887981ec..6d69ae7c4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -39,7 +39,7 @@ "myst_parser", "sphinx.ext.autodoc", "sphinx.ext.autosummary", - "sphinx.ext.autosectionlabel", + # "sphinx.ext.autosectionlabel", "sphinx.ext.intersphinx", "sphinx.ext.coverage", "sphinx.ext.mathjax", @@ -97,3 +97,7 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] + +html_css_files = [ + 'customize_icon.css', +] diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index fbb72f414..3ff7f3c9c 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -42,10 +42,6 @@ Finding help The `HED online tools `_ provide an easy-to-use interface that requires no programming. -:Mailing lists and forums: - - * Don't hesitate to ask questions about the python hedtools on `NeuroStars - `_. :Issues and problems: * If you notice a bug in the python hedtools code or encounter other problems using the tools, please `open an issue`_ in the diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index 77c00d052..657aefbb4 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -25,6 +25,12 @@ def val_error_empty_group(tag): return f"HED tags cannot be empty. Extra delimiters found: '{tag}'" +@hed_tag_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR) +def val_error_hed_onset_with_no_column(tag): + return f"Cannot have Temporal tags without an 'Onset' column. Found tag: '{tag}'" + + + @hed_tag_error(ValidationErrors.TAG_EXTENDED, has_sub_tag=True, default_severity=ErrorSeverity.WARNING) def val_error_tag_extended(tag, problem_tag): return f"Hed tag is extended. '{problem_tag}' in {tag}" @@ -39,7 +45,7 @@ def val_error_invalid_char(source_string, char_index): @hed_tag_error(ValidationErrors.INVALID_TAG_CHARACTER, has_sub_tag=True, actual_code=ValidationErrors.CHARACTER_INVALID) def val_error_invalid_tag_character(tag, problem_tag): - return f"Invalid character '{problem_tag}' in {tag}" + return f"Invalid character '{problem_tag}' in tag '{tag}'" @hed_error(ValidationErrors.TILDES_UNSUPPORTED) @@ -48,6 +54,12 @@ def val_error_tildes_not_supported(source_string, char_index): return f"Tildes not supported. Replace (a ~ b ~ c) with (a, (b, c)). '{character}' at index {char_index}'" +@hed_tag_error(ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE, has_sub_tag=True, + actual_code=SidecarErrors.SIDECAR_BRACES_INVALID) +def val_error_CURLY_BRACE_UNSUPPORTED_HERE(tag, problem_tag): + return (f"Curly braces are only permitted in sidecars, fully wrapping text in place of a tag. " + f"Invalid character '{problem_tag}' in tag '{tag}'") + @hed_error(ValidationErrors.COMMA_MISSING) def val_error_comma_missing(tag): return f"Comma missing after - '{tag}'" @@ -227,7 +239,6 @@ def val_warning_capitalization(tag): @hed_tag_error(ValidationErrors.UNITS_MISSING, default_severity=ErrorSeverity.WARNING) def val_warning_default_units_used(tag, default_unit): - # todo: add a test case for no default unit. if default_unit is None: return f"No unit specified on - '{tag}'. No default unit is specified for type." return f"No unit specified. Using '{default_unit}' as the default - '{tag}'" @@ -383,7 +394,7 @@ def onset_wrong_placeholder(tag, has_placeholder): @hed_error(ColumnErrors.INVALID_COLUMN_REF, actual_code=SidecarErrors.SIDECAR_BRACES_INVALID) def invalid_column_ref(bad_ref): - return f"The column '{bad_ref}' is unknown.'" + return f"The column '{bad_ref}' is unknown or does not have HED annotations.'" @hed_error(ColumnErrors.SELF_COLUMN_REF, actual_code=SidecarErrors.SIDECAR_BRACES_INVALID) diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index 409656235..e67c40bc6 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -298,10 +298,9 @@ def format_error_from_context(error_type, error_context, *args, actual_error=Non if not error_func: error_object = ErrorHandler.val_error_unknown(*args, **kwargs) error_object['code'] = error_type - ErrorHandler._add_context_to_errors(error_object, error_context) - return [error_object] + else: + error_object = error_func(*args, **kwargs) - error_object = error_func(*args, **kwargs) if actual_error: error_object['code'] = actual_error @@ -321,8 +320,6 @@ def _add_context_to_errors(error_object, error_context_to_add): list: A list of dict with needed context strings added at the beginning of the list. """ - if error_object is None: - error_object = {} for (context_type, context) in error_context_to_add: error_object[context_type] = context @@ -345,11 +342,7 @@ def _get_tag_span_to_error_object(error_object): if ErrorContext.HED_STRING not in error_object: return None, None - if 'char_index' in error_object: - char_index = error_object['char_index'] - char_index_end = error_object.get('char_index_end', char_index + 1) - return char_index, char_index_end - elif 'source_tag' in error_object: + if 'source_tag' in error_object: source_tag = error_object['source_tag'] if isinstance(source_tag, int): return None, None @@ -364,7 +357,9 @@ def _get_tag_span_to_error_object(error_object): def _update_error_with_char_pos(error_object): # This part is optional as you can always generate these as needed. start, end = ErrorHandler._get_tag_span_to_error_object(error_object) - if start is not None and end is not None: + if start is not None: + # silence warning in pycharm + start = int(start) source_tag = error_object.get('source_tag', None) # Todo: Move this functionality somewhere more centralized. # If the tag has been modified from the original, don't try to use sub indexing. diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index a866ec326..5dc32737f 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -90,6 +90,8 @@ class ValidationErrors: INVALID_TAG_CHARACTER = 'invalidTagCharacter' + CURLY_BRACE_UNSUPPORTED_HERE = "CURLY_BRACE_UNSUPPORTED_HERE" + class SidecarErrors: @@ -164,7 +166,7 @@ class OnsetErrors: ONSET_TAG_OUTSIDE_OF_GROUP = "ONSET_TAG_OUTSIDE_OF_GROUP" INSET_BEFORE_ONSET = "INSET_BEFORE_ONSET" ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW" - + HED_ONSET_WITH_NO_COLUMN = 'HED_ONSET_WITH_NO_COLUMN' class ColumnErrors: INVALID_COLUMN_REF = "INVALID_COLUMN_REF" diff --git a/hed/errors/exceptions.py b/hed/errors/exceptions.py index 63e676c59..d222a1922 100644 --- a/hed/errors/exceptions.py +++ b/hed/errors/exceptions.py @@ -4,6 +4,7 @@ class HedExceptions: GENERIC_ERROR = 'GENERIC_ERROR' # A list of all exceptions that can be generated by the hedtools. + URL_ERROR = "URL_ERROR" FILE_NOT_FOUND = 'fileNotFound' BAD_PARAMETERS = 'badParameters' CANNOT_PARSE_XML = 'cannotParseXML' @@ -40,8 +41,13 @@ class HedExceptions: HED_SCHEMA_NODE_NAME_INVALID = 'HED_SCHEMA_NODE_NAME_INVALID' SCHEMA_DUPLICATE_PREFIX = 'schemaDuplicatePrefix' + SCHEMA_DUPLICATE_LIBRARY = "SCHEMA_LIBRARY_INVALID" BAD_COLUMN_NAMES = 'BAD_COLUMN_NAMES' + SCHEMA_DUPLICATE_NAMES = "SCHEMA_DUPLICATE_NAMES" + + CANNOT_PARSE_RDF = "CANNOT_PARSE_RDF" + class HedFileError(Exception): """Exception raised when a file cannot be parsed due to being malformed, file IO, etc.""" @@ -51,8 +57,4 @@ def __init__(self, code, message, filename, issues=None): self.filename = filename self.issues = issues if self.issues is None: - self.issues = [ - {'message': message, - ErrorContext.FILE_NAME: filename, - 'code': code} - ] + self.issues = [] diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index 0d689510c..49edf9e86 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -30,12 +30,14 @@ def __init__(self, def_dicts=None, hed_schema=None): self.add_definitions(def_dicts, hed_schema) def add_definitions(self, def_dicts, hed_schema=None): - """ Add definitions from dict(s) to this dict. + """ Add definitions from dict(s) or strings(s) to this dict. Parameters: - def_dicts (list, DefinitionDict, or dict): DefinitionDict or list of DefinitionDicts/strings/dicts whose - definitions should be added. - Note dict form expects DefinitionEntries in the same form as a DefinitionDict + def_dicts (list, DefinitionDict, dict, or str): DefinitionDict or list of DefinitionDicts/strings/dicts whose + definitions should be added. + Note - dict form expects DefinitionEntries in the same form as a DefinitionDict + Note - str or list of strings will parse the strings using the hed_schema. + Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input. hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. :raises TypeError: diff --git a/hed/models/expression_parser.py b/hed/models/expression_parser.py index 736ff562e..83e91adc8 100644 --- a/hed/models/expression_parser.py +++ b/hed/models/expression_parser.py @@ -332,7 +332,7 @@ def __init__(self, expression_string): '"Event"' - Finds the Event tag, but not any descendent tags - 'Def/DefName/*' - Find Def/DefName instances with placeholders, regardless of the value of the placeholder + `Def/DefName/*` - Find Def/DefName instances with placeholders, regardless of the value of the placeholder 'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py index 7f4b5321a..db6dd7e80 100644 --- a/hed/models/hed_tag.py +++ b/hed/models/hed_tag.py @@ -316,9 +316,12 @@ def _calculate_to_canonical_forms(self, hed_schema): return tag_issues - def get_stripped_unit_value(self): + def get_stripped_unit_value(self, extension_text): """ Return the extension divided into value and units, if the units are valid. + Parameters: + extension_text (str): The text to split, in case it's a portion of a tag. + Returns: stripped_unit_value (str): The extension portion with the units removed. unit (str or None): None if no valid unit found. @@ -328,7 +331,7 @@ def get_stripped_unit_value(self): """ tag_unit_classes = self.unit_classes - stripped_value, unit, _ = self._get_tag_units_portion(tag_unit_classes) + stripped_value, unit, _ = HedTag._get_tag_units_portion(extension_text, tag_unit_classes) if stripped_value: return stripped_value, unit @@ -354,7 +357,7 @@ def value_as_default_unit(self): unit_entry = self.default_unit unit = unit_entry.name else: - stripped_value, unit, unit_entry = self._get_tag_units_portion(tag_unit_classes) + stripped_value, unit, unit_entry = HedTag._get_tag_units_portion(self.extension, tag_unit_classes) if stripped_value: if unit_entry.get_conversion_factor(unit) is not None: @@ -496,6 +499,7 @@ def default_unit(self): """ Get the default unit class unit for this tag. Only a tag with a single unit class can have default units. + Returns: unit(UnitEntry or None): the default unit entry for this tag, or None """ @@ -544,7 +548,8 @@ def _get_schema_namespace(org_tag): return org_tag[:first_colon + 1] return "" - def _get_tag_units_portion(self, tag_unit_classes): + @staticmethod + def _get_tag_units_portion(extension_text, tag_unit_classes): """ Check that this string has valid units and remove them. Parameters: @@ -555,19 +560,19 @@ def _get_tag_units_portion(self, tag_unit_classes): This is filled in if there are no units as well. unit (UnitEntry or None): The matching unit entry if one is found """ - value, _, units = self.extension.rpartition(" ") + value, _, units = extension_text.rpartition(" ") if not units: return None, None, None for unit_class_entry in tag_unit_classes.values(): all_valid_unit_permutations = unit_class_entry.derivative_units - possible_match = self._find_modifier_unit_entry(units, all_valid_unit_permutations) + possible_match = HedTag._find_modifier_unit_entry(units, all_valid_unit_permutations) if possible_match and not possible_match.has_attribute(HedKey.UnitPrefix): return value, units, possible_match # Repeat the above, but as a prefix - possible_match = self._find_modifier_unit_entry(value, all_valid_unit_permutations) + possible_match = HedTag._find_modifier_unit_entry(value, all_valid_unit_permutations) if possible_match and possible_match.has_attribute(HedKey.UnitPrefix): return units, value, possible_match diff --git a/hed/models/spreadsheet_input.py b/hed/models/spreadsheet_input.py index d2bcbc1bd..6022c594e 100644 --- a/hed/models/spreadsheet_input.py +++ b/hed/models/spreadsheet_input.py @@ -12,20 +12,21 @@ def __init__(self, file=None, file_type=None, worksheet_name=None, tag_columns=N Parameters: file (str or file like): An xlsx/tsv file to open or a File object. - file_type (str or None): ".xlsx" for excel, ".tsv" or ".txt" for tsv. data. If file is a string, the + file_type (str or None): ".xlsx" for Excel, ".tsv" or ".txt" for tsv. data. worksheet_name (str or None): The name of the Excel workbook worksheet that contains the HED tags. Not applicable to tsv files. If omitted for Excel, the first worksheet is assumed. tag_columns (list): A list of ints containing the columns that contain the HED tags. The default value is [1] indicating only the second column has tags. - has_column_names (bool): True if file has column names. Validation will skip over the + has_column_names (bool): True if file has column names. Validation will skip over the first row. first line of the file if the spreadsheet as column names. - column_prefix_dictionary (dict): Dictionary with keys that are column numbers/names and values are HED tag + column_prefix_dictionary (dict or None): Dictionary with keys that are column numbers/names and values are HED tag prefixes to prepend to the tags in that column before processing. Notes: + - If file is a string, file_type is derived from file and this parameter is ignored. - column_prefix_dictionary may be deprecated/renamed. These are no longer prefixes, but rather converted to value columns. - eg. {"key": "Description", 1: "Label/"} will turn into value columns as + e.g. {"key": "Description", 1: "Label/"} will turn into value columns as {"key": "Description/#", 1: "Label/#"} It will be a validation issue if column 1 is called "key" in the above example. This means it no longer accepts anything but the value portion only in the columns. diff --git a/hed/schema/__init__.py b/hed/schema/__init__.py index 23902f0eb..54f4b07a3 100644 --- a/hed/schema/__init__.py +++ b/hed/schema/__init__.py @@ -6,4 +6,4 @@ from .hed_schema_io import load_schema, load_schema_version, from_string, get_hed_xml_version from .hed_schema_constants import HedKey, HedSectionKey from .hed_cache import cache_xml_versions, get_hed_versions, \ - get_path_from_hed_version, set_cache_directory, get_cache_directory + set_cache_directory, get_cache_directory diff --git a/hed/schema/hed_cache.py b/hed/schema/hed_cache.py index 299af6f66..0db0f145e 100644 --- a/hed/schema/hed_cache.py +++ b/hed/schema/hed_cache.py @@ -6,13 +6,14 @@ import json from hashlib import sha1 from shutil import copyfile -import urllib +from hed.errors.exceptions import HedFileError, HedExceptions import re from semantic_version import Version import portalocker import time from hed.schema.schema_io.schema_util import url_to_file, make_url_request from pathlib import Path +import urllib # From https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string HED_VERSION_P1 = r"(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)" @@ -29,9 +30,9 @@ DEFAULT_HED_LIST_VERSIONS_URL = "https://api.github.com/repos/hed-standard/hed-schemas/contents/standard_schema/hedxml" LIBRARY_HED_URL = "https://api.github.com/repos/hed-standard/hed-schemas/contents/library_schemas" -DEFAULT_URL_LIST = (DEFAULT_HED_LIST_VERSIONS_URL, LIBRARY_HED_URL, ) +DEFAULT_URL_LIST = (DEFAULT_HED_LIST_VERSIONS_URL, LIBRARY_HED_URL,) -DEFAULT_SKIP_FOLDERS = ('deprecated', ) +DEFAULT_SKIP_FOLDERS = ('deprecated',) HED_CACHE_DIRECTORY = os.path.join(Path.home(), '.hedtools/hed_cache/') TIMESTAMP_FILENAME = "last_update.txt" @@ -121,38 +122,29 @@ def cache_specific_url(hed_xml_url, xml_version=None, library_name=None, cache_f if not _check_if_url(hed_xml_url): return None - if _check_if_api_url(hed_xml_url): - return _download_latest_hed_xml_version_from_url(hed_xml_url, - xml_version=xml_version, - library_name=library_name, - cache_folder=cache_folder) - - if not _check_if_specific_xml(hed_xml_url): - return None - - filename = hed_xml_url.split('/')[-1] - cache_filename = os.path.join(cache_folder, filename) - - return _cache_specific_url(hed_xml_url, cache_filename) - - -def _cache_specific_url(hed_xml_url, cache_filename): - cache_folder = cache_filename.rpartition("/")[0] - os.makedirs(cache_folder, exist_ok=True) - temp_hed_xml_file = url_to_file(hed_xml_url) - if temp_hed_xml_file: - cache_filename = _safe_move_tmp_to_folder(temp_hed_xml_file, cache_filename) - os.remove(temp_hed_xml_file) - return cache_filename - return None - + try: + if _check_if_api_url(hed_xml_url): + return _download_latest_hed_xml_version_from_url(hed_xml_url, + xml_version=xml_version, + library_name=library_name, + cache_folder=cache_folder) + + if not _check_if_specific_xml(hed_xml_url): + return None + + filename = hed_xml_url.split('/')[-1] + cache_filename = os.path.join(cache_folder, filename) + + return _cache_specific_url(hed_xml_url, cache_filename) + except urllib.error.URLError as e: + raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_xml_url) from e -def get_hed_version_path(xml_version=None, library_name=None, local_hed_directory=None): - """ Get latest HED XML file path in a directory. Only returns filenames that exist. +def get_hed_version_path(xml_version, library_name=None, local_hed_directory=None): + """ Get HED XML file path in a directory. Only returns filenames that exist. Parameters: library_name (str or None): Optional the schema library name. - xml_version (str or None): If not None, return this version or None. + xml_version (str): Returns this version if it exists local_hed_directory (str): Path to local hed directory. Defaults to HED_CACHE_DIRECTORY Returns: @@ -163,46 +155,10 @@ def get_hed_version_path(xml_version=None, library_name=None, local_hed_director local_hed_directory = HED_CACHE_DIRECTORY hed_versions = get_hed_versions(local_hed_directory, library_name) - if not hed_versions: + if not hed_versions or not xml_version: return None - if xml_version: - if xml_version in hed_versions: - latest_hed_version = xml_version - else: - return None - else: - latest_hed_version = _get_latest_semantic_version_in_list(hed_versions) - return _create_xml_filename(latest_hed_version, library_name, local_hed_directory) - - -def get_path_from_hed_version(hed_version, library_name=None, local_hed_directory=None): - """ Return the HED XML file path for a version. - - Parameters: - hed_version (str): The HED version that is in the hed directory. - library_name (str or None): An optional schema library name. - local_hed_directory (str): The local hed path to use. - - Returns: - str: The HED XML file path in the hed directory that corresponds to the hed version specified. - - Notes: - - Note if no local directory is given, it defaults to HED_CACHE_DIRECTORY. - - """ - if not local_hed_directory: - local_hed_directory = HED_CACHE_DIRECTORY - return _create_xml_filename(hed_version, library_name, local_hed_directory) - - -def _copy_installed_schemas_to_cache(cache_folder): - installed_files = os.listdir(INSTALLED_CACHE_LOCATION) - for install_name in installed_files: - _, basename = os.path.split(install_name) - cache_name = os.path.join(cache_folder, basename) - install_name = os.path.join(INSTALLED_CACHE_LOCATION, basename) - if not os.path.exists(cache_name): - shutil.copy(install_name, cache_name) + if xml_version in hed_versions: + return _create_xml_filename(xml_version, library_name, local_hed_directory) def cache_local_versions(cache_folder): @@ -269,12 +225,33 @@ def cache_xml_versions(hed_base_urls=DEFAULT_URL_LIST, skip_folders=DEFAULT_SKIP _cache_hed_version(version, library_name, version_info, cache_folder=cache_folder) _write_last_cached_time(current_timestamp, cache_folder) - except portalocker.exceptions.LockException or ValueError: + except portalocker.exceptions.LockException or ValueError or urllib.errors.URLError: return -1 return 0 +def _cache_specific_url(hed_xml_url, cache_filename): + cache_folder = cache_filename.rpartition("/")[0] + os.makedirs(cache_folder, exist_ok=True) + temp_hed_xml_file = url_to_file(hed_xml_url) + if temp_hed_xml_file: + cache_filename = _safe_move_tmp_to_folder(temp_hed_xml_file, cache_filename) + os.remove(temp_hed_xml_file) + return cache_filename + return None + + +def _copy_installed_schemas_to_cache(cache_folder): + installed_files = os.listdir(INSTALLED_CACHE_LOCATION) + for install_name in installed_files: + _, basename = os.path.split(install_name) + cache_name = os.path.join(cache_folder, basename) + install_name = os.path.join(INSTALLED_CACHE_LOCATION, basename) + if not os.path.exists(cache_name): + shutil.copy(install_name, cache_name) + + def _read_last_cached_time(cache_folder): """ Check the given cache folder to see when it was last updated. @@ -377,7 +354,7 @@ def _get_hed_xml_versions_from_url(hed_base_url, library_name=None, sub_folder_versions = \ _get_hed_xml_versions_from_url(hed_base_url + "/" + file_entry['name'] + hedxml_suffix, skip_folders=skip_folders, get_libraries=True) - except urllib.error.HTTPError as e: + except urllib.error.URLError as e: # Silently ignore ones without a hedxml section for now. continue _merge_in_versions(all_hed_versions, sub_folder_versions) @@ -478,39 +455,3 @@ def _cache_hed_version(version, library_name, version_info, cache_folder): return possible_cache_filename return _cache_specific_url(download_url, possible_cache_filename) - - -def _get_latest_semantic_version_in_list(semantic_version_list): - """ Get the latest semantic version in a list. - - Parameters: - semantic_version_list (list): A list containing semantic versions. - - Returns: - str: The latest semantic version in the list. - - """ - if not semantic_version_list: - return '' - latest_semantic_version = semantic_version_list[0] - if len(semantic_version_list) > 1: - for semantic_version in semantic_version_list[1:]: - latest_semantic_version = _compare_semantic_versions(latest_semantic_version, - semantic_version) - return latest_semantic_version - - -def _compare_semantic_versions(first_semantic_version, second_semantic_version): - """ Compare two semantic versions. - - Parameters: - first_semantic_version (str): The first semantic version. - second_semantic_version (str): The second semantic version. - - Returns: - str: The later semantic version. - - """ - if Version(first_semantic_version) > Version(second_semantic_version): - return first_semantic_version - return second_semantic_version diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py index d688f7283..9c92788f3 100644 --- a/hed/schema/hed_schema.py +++ b/hed/schema/hed_schema.py @@ -1,10 +1,13 @@ import json +import os from hed.schema.hed_schema_constants import HedKey, HedSectionKey from hed.schema import hed_schema_constants as constants from hed.schema.schema_io import schema_util from hed.schema.schema_io.schema2xml import Schema2XML from hed.schema.schema_io.schema2wiki import Schema2Wiki +from hed.schema.schema_io.schema2owl import Schema2Owl +from hed.schema.schema_io.owl_constants import ext_to_format from hed.schema.hed_schema_section import HedSchemaSection, HedSchemaTagSection, HedSchemaUnitClassSection from hed.errors import ErrorHandler from hed.errors.error_types import ValidationErrors @@ -21,7 +24,6 @@ def __init__(self): A HedSchema can be used for validation, checking tag attributes, parsing tags, etc. """ super().__init__() - self._has_duplicate_tags = False self.header_attributes = {} self.filename = None self.prologue = "" @@ -31,6 +33,7 @@ def __init__(self): self._namespace = "" self._sections = self._create_empty_sections() + self.source_format = None # The type of file this was loaded from(mediawiki, xml, or owl - None if mixed) # =============================================== # Basic schema properties @@ -59,13 +62,27 @@ def library(self): Returns: str: Library name if any. - """ return self.header_attributes.get(constants.LIBRARY_ATTRIBUTE, "") + @property + def schema_namespace(self): + """Returns the schema namespace prefix""" + return self._namespace + + def can_save(self): + """ Returns if it's legal to save this schema. + + You cannot save schemas loaded as merged from multiple library schemas. + + Returns: + bool: True if this can be saved + """ + return not self.library or "," not in self.library + @property def with_standard(self): - """ The version of the base schema this is extended from, if it exists.. + """ The version of the base schema this is extended from, if it exists. Returns: str: HED version or "" @@ -102,6 +119,15 @@ def unit_classes(self): """ return self._sections[HedSectionKey.UnitClasses] + @property + def units(self): + """ Return the unit schema section. + + Returns: + HedSchemaSection: The unit section. + """ + return self._sections[HedSectionKey.Units] + @property def unit_modifiers(self): """ Return the modifiers classes schema section @@ -200,6 +226,11 @@ def valid_prefixes(self): # =============================================== # Creation and saving functions # =============================================== + + # todo: we may want to collapse these 6 functions into one like this + # def serialize(self, filename=None, save_merged=False, file_format=whatever is default): + # pass + def get_as_mediawiki_string(self, save_merged=False): """ Return the schema to a mediawiki string. @@ -214,6 +245,26 @@ def get_as_mediawiki_string(self, save_merged=False): output_strings = Schema2Wiki.process_schema(self, save_merged) return '\n'.join(output_strings) + def get_as_owl_string(self, save_merged=False, file_format="owl"): + """ Return the schema to a mediawiki string. + + Parameters: + save_merged (bool): If true, this will save the schema as a merged schema if it is a "withStandard" schema. + If it is not a "withStandard" schema, this setting has no effect. + file_format(str or None): Override format from filename extension. + Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld") + Other values should work, but aren't as fully supported. + Returns: + str: The schema as a string in mediawiki format. + + :raises rdflib.plugin.PluginException: + - Invalid format of file_format. Make sure you use a supported RDF format. + """ + if file_format == "owl": + file_format = "xml" + rdf_data = Schema2Owl.process_schema(self, save_merged) + return rdf_data.serialize(format=file_format) + def get_as_xml_string(self, save_merged=True): """ Return the schema to an XML string. @@ -226,39 +277,69 @@ def get_as_xml_string(self, save_merged=True): """ xml_tree = Schema2XML.process_schema(self, save_merged) - return schema_util._xml_element_2_str(xml_tree) + return schema_util.xml_element_2_str(xml_tree) - def save_as_mediawiki(self, filename=None, save_merged=False): + def save_as_mediawiki(self, filename, save_merged=False): """ Save as mediawiki to a file. filename: str - If present, move the resulting file to this location. + save location save_merged: bool If true, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect. - Returns: - str: The newly created schema filename. + :raises OSError: + - File cannot be saved for some reason """ output_strings = Schema2Wiki.process_schema(self, save_merged) - local_wiki_file = schema_util.write_strings_to_file(output_strings, ".mediawiki") - return schema_util.move_file(local_wiki_file, filename) + with open(filename, mode='w', encoding='utf-8') as opened_file: + for string in output_strings: + opened_file.write(string) + opened_file.write('\n') + + def save_as_owl(self, filename, save_merged=False, file_format=None): + """ Save as json to a file. - def save_as_xml(self, filename=None, save_merged=True): + filename: str + Save the file here + save_merged: bool + If true, this will save the schema as a merged schema if it is a "withStandard" schema. + If it is not a "withStandard" schema, this setting has no effect. + file_format(str or None): Required for owl formatted files other than the following: + .ttl: turtle + .owl: xml + .json-ld: json-ld + + :raises OSError: + - File cannot be saved for some reason + + :raises rdflib.plugin.PluginException: + - Invalid format of file_format. Make sure you use a supported RDF format. + """ + ext = os.path.splitext(filename.lower())[1] + if ext in ext_to_format and file_format is None: + file_format = ext_to_format[ext] + if file_format == "owl": + file_format = "xml" + rdf_data = Schema2Owl.process_schema(self, save_merged) + rdf_data.serialize(filename, format=file_format) + + def save_as_xml(self, filename, save_merged=True): """ Save as XML to a file. filename: str - If present, move the resulting file to this location. + save location save_merged: bool If true, this will save the schema as a merged schema if it is a "withStandard" schema. If it is not a "withStandard" schema, this setting has no effect. - Returns: - str: The name of the newly created schema file. + :raises OSError: + - File cannot be saved for some reason """ xml_tree = Schema2XML.process_schema(self, save_merged) - local_xml_file = schema_util.write_xml_tree_2_xml_file(xml_tree, ".xml") - return schema_util.move_file(local_xml_file, filename) + with open(filename, mode='w', encoding='utf-8') as opened_file: + xml_string = schema_util.xml_element_2_str(xml_tree) + opened_file.write(xml_string) def set_schema_prefix(self, schema_namespace): """ Set library namespace associated for this schema. @@ -296,11 +377,11 @@ def __eq__(self, other): return False if self.get_save_header_attributes() != other.get_save_header_attributes(): return False - if self._has_duplicate_tags != other._has_duplicate_tags: + if self.has_duplicates() != other.has_duplicates(): return False - if self.prologue != other.prologue: + if self.prologue.strip() != other.prologue.strip(): return False - if self.epilogue != other.epilogue: + if self.epilogue.strip() != other.epilogue.strip(): return False if self._sections != other._sections: # This block is useful for debugging when modifying the schema class itself. @@ -518,12 +599,21 @@ def _validate_remaining_terms(self, tag, working_tag, prefix_tag_adj, current_sl raise self._TagIdentifyError(error) word_start_index += len(name) + 1 + def has_duplicates(self): + """Returns the first duplicate tag/unit/etc if any section has a duplicate name""" + for section in self._sections.values(): + has_duplicates = bool(section.duplicate_names) + if has_duplicates: + # Return first entry of dict + return next(iter(section.duplicate_names)) + + return False + # =============================================== # Semi-private creation finalizing functions # =============================================== def finalize_dictionaries(self): """ Call to finish loading. """ - self._has_duplicate_tags = bool(self.tags.duplicate_names) self._update_all_entries() def _update_all_entries(self): @@ -736,13 +826,6 @@ def _get_attributes_for_section(self, key_class): # Semi private function used to create a schema in memory(usually from a source file) # =============================================== def _add_tag_to_dict(self, long_tag_name, new_entry, key_class): - # Add the InLibrary attribute to any library schemas as they are loaded - # These are later removed when they are saved out, if saving unmerged - if self.library and (not self.with_standard or (not self.merged and self.with_standard)): - # only add it if not already present - This is a rare case - if not new_entry.has_attribute(HedKey.InLibrary): - new_entry._set_attribute_value(HedKey.InLibrary, self.library) - section = self._sections[key_class] return section._add_to_dict(long_tag_name, new_entry) diff --git a/hed/schema/hed_schema_base.py b/hed/schema/hed_schema_base.py index 6651077e0..b81ea693e 100644 --- a/hed/schema/hed_schema_base.py +++ b/hed/schema/hed_schema_base.py @@ -9,11 +9,24 @@ class HedSchemaBase(ABC): """ Baseclass for schema and schema group. - Overriding the following functions will allow you to use the schema for validation etc. + + Implementing the abstract functions will allow you to use the schema for validation """ def __init__(self): + self._name = "" # User provided identifier for this schema(not used for equality comparison or saved) pass + @property + def name(self): + """User provided name for this schema, defaults to filename or version if no name provided.""" + if not self._name and hasattr(self, "filename"): + return self.filename + return self._name + + @name.setter + def name(self, name): + self._name = name + @abstractmethod def get_schema_versions(self): """ A list of HED version strings including namespace and library name if any of this schema. diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py index 60a1a9349..d53b8c595 100644 --- a/hed/schema/hed_schema_constants.py +++ b/hed/schema/hed_schema_constants.py @@ -43,15 +43,7 @@ class HedKey: Rooted = "rooted" DeprecatedFrom = "deprecatedFrom" ConversionFactor = "conversionFactor" - - # All known properties - BoolProperty = 'boolProperty' - UnitClassProperty = 'unitClassProperty' - UnitProperty = 'unitProperty' - UnitModifierProperty = 'unitModifierProperty' - ValueClassProperty = 'valueClassProperty' - ElementProperty = 'elementProperty' - IsInheritedProperty = 'isInheritedProperty' + Reserved = "reserved" SIUnit = 'SIUnit' UnitSymbol = 'unitSymbol' @@ -68,6 +60,17 @@ class HedKey: # Node attributes InLibrary = "inLibrary" + # All known properties + BoolProperty = 'boolProperty' + UnitClassProperty = 'unitClassProperty' + UnitProperty = 'unitProperty' + UnitModifierProperty = 'unitModifierProperty' + ValueClassProperty = 'valueClassProperty' + ElementProperty = 'elementProperty' + NodeProperty = 'nodeProperty' + IsInheritedProperty = 'isInheritedProperty' + + VERSION_ATTRIBUTE = 'version' LIBRARY_ATTRIBUTE = 'library' diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py index 936943e85..de066dbc0 100644 --- a/hed/schema/hed_schema_entry.py +++ b/hed/schema/hed_schema_entry.py @@ -119,7 +119,8 @@ def __eq__(self, other): # We only want to compare known attributes self_attr = self.get_known_attributes() other_attr = other.get_known_attributes() - if self_attr != other_attr: + # We can no longer be sure on the order of attribute values, since owl formatting has no order + if self_attr != other_attr and not self._compare_attributes_no_order(self_attr, other_attr): return False if self.description != other.description: return False @@ -135,6 +136,13 @@ def get_known_attributes(self): return {key: value for key, value in self.attributes.items() if not self._unknown_attributes or key not in self._unknown_attributes} + @staticmethod + def _compare_attributes_no_order(left, right): + left = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in left.items()} + right = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in right.items()} + + return left == right + class UnitClassEntry(HedSchemaEntry): """ A single unit class entry in the HedSchema. """ diff --git a/hed/schema/hed_schema_group.py b/hed/schema/hed_schema_group.py index ae0ac2b81..7f3d3f21a 100644 --- a/hed/schema/hed_schema_group.py +++ b/hed/schema/hed_schema_group.py @@ -1,7 +1,6 @@ """ """ -# todo: Switch various properties to this cached_property once we require python 3.8 import json from hed.errors.exceptions import HedExceptions, HedFileError @@ -18,7 +17,7 @@ class HedSchemaGroup(HedSchemaBase): - You cannot save/load/etc the combined schema object directly. """ - def __init__(self, schema_list): + def __init__(self, schema_list, name=""): """ Combine multiple HedSchema objects from a list. Parameters: @@ -34,13 +33,17 @@ def __init__(self, schema_list): super().__init__() if len(schema_list) == 0: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty list passed to HedSchemaGroup constructor.", - filename="Combined Schema") + filename=self.name) schema_prefixes = [hed_schema._namespace for hed_schema in schema_list] if len(set(schema_prefixes)) != len(schema_prefixes): raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_PREFIX, "Multiple schema share the same tag name_prefix. This is not allowed.", - filename="Combined Schema") + filename=self.name) self._schemas = {hed_schema._namespace: hed_schema for hed_schema in schema_list} + source_formats = [hed_schema.source_format for hed_schema in schema_list] + # All must be same source format or return None. + self.source_format = source_formats[0] if len(set(source_formats)) == 1 else None + self._name = name def get_schema_versions(self): """ A list of HED version strings including namespace and library name if any of this schema. diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py index 8f6bf62f3..8b53e6a4d 100644 --- a/hed/schema/hed_schema_io.py +++ b/hed/schema/hed_schema_io.py @@ -2,26 +2,36 @@ import os import json import functools +import urllib.error + from hed.schema.schema_io.xml2schema import SchemaLoaderXML from hed.schema.schema_io.wiki2schema import SchemaLoaderWiki +from hed.schema.schema_io.owl2schema import SchemaLoaderOWL from hed.schema import hed_cache from hed.errors.exceptions import HedFileError, HedExceptions from hed.schema.schema_io import schema_util from hed.schema.hed_schema_group import HedSchemaGroup from hed.schema.schema_validation_util import validate_version_string +from collections import defaultdict +from hed.schema.schema_io.owl_constants import ext_to_format -MAX_MEMORY_CACHE = 20 +MAX_MEMORY_CACHE = 40 -def from_string(schema_string, schema_format=".xml", schema_namespace=None): +def from_string(schema_string, schema_format=".xml", schema_namespace=None, schema=None, name=None): """ Create a schema from the given string. Parameters: - schema_string (str): An XML or mediawiki file as a single long string. + schema_string (str): An XML, mediawiki or OWL, file as a single long string schema_format (str): The schema format of the source schema string. + Allowed normal values: .mediawiki, .xml + Allowed owl values: xml, owl, pretty-xml, turtle (or any other value rdflib supports) schema_namespace (str, None): The name_prefix all tags in this schema will accept. + schema(HedSchema or None): A hed schema to merge this new file into + It must be a with-standard schema with the same value. + name(str or None): User supplied identifier for this schema Returns: (HedSchema): The loaded schema. @@ -36,27 +46,38 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None): """ if not schema_string: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty string passed to HedSchema.from_string", - filename=schema_string) + filename=name) + + # Replace carriage returns with new lines since this might not be done by the caller + schema_string = schema_string.replace("\r\n", "\n") if schema_format.endswith(".xml"): - hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string) + hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema, name=name) elif schema_format.endswith(".mediawiki"): - hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string) + hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema, name=name) + elif schema_format: + hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format, name=name) else: - raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=schema_format) + raise HedFileError(HedExceptions.INVALID_EXTENSION, f"Unknown schema extension {schema_format}", filename=name) if schema_namespace: hed_schema.set_schema_prefix(schema_namespace=schema_namespace) - return hed_schema -def load_schema(hed_path=None, schema_namespace=None): +def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None, name=None): """ Load a schema from the given file or URL path. Parameters: - hed_path (str or None): A filepath or url to open a schema from. + hed_path (str): A filepath or url to open a schema from. schema_namespace (str or None): The name_prefix all tags in this schema will accept. + schema(HedSchema or None): A hed schema to merge this new file into + It must be a with-standard schema with the same value. + file_format(str or None): Required for owl formatted files other than the following: + .ttl: turtle + .owl: xml + .json-ld: json-ld + name(str or None): User supplied identifier for this schema Returns: HedSchema: The loaded schema. @@ -71,15 +92,22 @@ def load_schema(hed_path=None, schema_namespace=None): raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file path passed to HedSchema.load_file", filename=hed_path) + ext = os.path.splitext(hed_path.lower())[1] is_url = hed_cache._check_if_url(hed_path) - if is_url: - file_as_string = schema_util.url_to_string(hed_path) - hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1]) + try: + file_as_string = schema_util.url_to_string(hed_path) + except urllib.error.URLError as e: + raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_path) from e + hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1], name=name) + elif ext in ext_to_format: + hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=ext_to_format[ext], name=name) + elif file_format: + hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=file_format, name=name) elif hed_path.lower().endswith(".xml"): - hed_schema = SchemaLoaderXML.load(hed_path) + hed_schema = SchemaLoaderXML.load(hed_path, schema=schema, name=name) elif hed_path.lower().endswith(".mediawiki"): - hed_schema = SchemaLoaderWiki.load(hed_path) + hed_schema = SchemaLoaderWiki.load(hed_path, schema=schema, name=name) else: raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=hed_path) @@ -108,10 +136,14 @@ def get_hed_xml_version(xml_file_path): @functools.lru_cache(maxsize=MAX_MEMORY_CACHE) def _load_schema_version(xml_version=None, xml_folder=None): - """ Return specified version or latest if not specified. + """ Return specified version Parameters: - xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_]X.Y.Z'. + xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_]X.Y.Z' + Further versions can be added comma separated after the version number/library name. + e.g. "lib:library_x.y.z,otherlibrary_x.y.z" will load "library" and "otherlibrary" into "lib:" + The schema namespace must be the same and not repeated if loading multiple merged schemas. + xml_folder (str): Path to a folder containing schema. Returns: @@ -120,25 +152,84 @@ def _load_schema_version(xml_version=None, xml_folder=None): :raises HedFileError: - The xml_version is not valid. - The specified version cannot be found or loaded + - Multiple schemas are being loaded with the same prefix, and they have duplicate tags - Other fatal errors loading the schema (These are unlikely if you are not editing them locally) - The prefix is invalid """ schema_namespace = "" - library_name = None + name = xml_version if xml_version: if ":" in xml_version: schema_namespace, _, xml_version = xml_version.partition(":") - if "_" in xml_version: - library_name, _, xml_version = xml_version.rpartition("_") - elif validate_version_string(xml_version): - library_name = xml_version - xml_version = None + + if xml_version: + xml_versions = xml_version.split(",") + # Add a blank entry to generate an error if we have no xml version + else: + xml_versions = [""] + + first_schema = _load_schema_version_sub(xml_versions[0], schema_namespace, xml_folder=xml_folder, + name=name) + filenames = [os.path.basename(first_schema.filename)] + for version in xml_versions[1:]: + _load_schema_version_sub(version, schema_namespace, xml_folder=xml_folder, schema=first_schema, + name=name) + + # Detect duplicate errors when merging schemas in the same namespace + current_filename = os.path.basename(first_schema.filename) + duplicate_name = first_schema.has_duplicates() + if duplicate_name: + issues = first_schema.check_compliance(check_for_warnings=False) + filename_string = ",".join(filenames) + msg = (f"A duplicate tag, '{duplicate_name}', was detected in the schema file '{current_filename}'. " + f"Previously loaded schemas include: {filename_string}. " + f"To resolve this, consider prefixing the final schema during load: " + f"custom_prefix:schema_version.") + raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_NAMES, msg, first_schema.filename, issues) + filenames.append(current_filename) + return first_schema + + +def _load_schema_version_sub(xml_version, schema_namespace="", xml_folder=None, schema=None, name=""): + """ Return specified version + + Parameters: + xml_version (str): HED version format string. Expected format: '[schema_namespace:][library_name_]X.Y.Z' + schema_namespace(str): Namespace to add this schema to, default none + xml_folder (str): Path to a folder containing schema. + schema(HedSchema or None): A hed schema to merge this new file into + It must be a with-standard schema with the same value. + + Returns: + HedSchema: The requested HedSchema object. + + :raises HedFileError: + - The xml_version is not valid. + - The specified version cannot be found or loaded + - Other fatal errors loading the schema (These are unlikely if you are not editing them locally) + - The prefix is invalid + """ + library_name = None + + if not xml_version: + out_name = schema_namespace if schema_namespace else "standard" + raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID, + f"Must specify a schema version by number, found no version on {out_name} schema.", + filename=None) + + if "_" in xml_version: + library_name, _, xml_version = xml_version.rpartition("_") + + if validate_version_string(xml_version): + raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID, + f"Must specify a schema version by number, found no version on {xml_version} schema.", + filename=name) try: final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder) if not final_hed_xml_file: hed_cache.cache_local_versions(xml_folder) final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder) - hed_schema = load_schema(final_hed_xml_file) + hed_schema = load_schema(final_hed_xml_file, schema=schema, name=name) except HedFileError as e: if e.code == HedExceptions.FILE_NOT_FOUND: hed_cache.cache_xml_versions(cache_folder=xml_folder) @@ -147,7 +238,7 @@ def _load_schema_version(xml_version=None, xml_folder=None): raise HedFileError(HedExceptions.FILE_NOT_FOUND, f"HED version '{xml_version}' not found in cache: {hed_cache.get_cache_directory()}", filename=xml_folder) - hed_schema = load_schema(final_hed_xml_file) + hed_schema = load_schema(final_hed_xml_file, schema=schema, name=name) else: raise e @@ -158,14 +249,13 @@ def _load_schema_version(xml_version=None, xml_folder=None): def load_schema_version(xml_version=None, xml_folder=None): - """ Return a HedSchema or HedSchemaGroup extracted from xml_version field. + """ Return a HedSchema or HedSchemaGroup extracted from xml_version Parameters: - xml_version (str or list or None): List or str specifying which official HED schemas to use. - An empty string returns the latest version + xml_version (str or list): List or str specifying which official HED schemas to use. A json str format is also supported, based on the output of HedSchema.get_formatted_version - Basic format: '[schema_namespace:][library_name_]X.Y.Z'. + Basic format: `[schema_namespace:][library_name_]X.Y.Z`. xml_folder (str): Path to a folder containing schema. Returns: @@ -185,10 +275,44 @@ def load_schema_version(xml_version=None, xml_folder=None): except json.decoder.JSONDecodeError as e: raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), xml_version) from e if xml_version and isinstance(xml_version, list): - schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in xml_version] + xml_versions = parse_version_list(xml_version) + schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in xml_versions.values()] if len(schemas) == 1: return schemas[0] - return HedSchemaGroup(schemas) + name = ",".join([schema.version for schema in schemas]) + return HedSchemaGroup(schemas, name=name) else: return _load_schema_version(xml_version=xml_version, xml_folder=xml_folder) + + +def parse_version_list(xml_version_list): + """Takes a list of xml versions and returns a dictionary split by prefix + + e.g. ["score", "testlib"] will return {"": "score, testlib"} + e.g. ["score", "testlib", "ol:otherlib"] will return {"": "score, testlib", "ol:": "otherlib"} + + Parameters: + xml_version_list (list): List of str specifying which hed schemas to use + + Returns: + HedSchema or HedSchemaGroup: The schema or schema group extracted. + """ + out_versions = defaultdict(list) + for version in xml_version_list: + schema_namespace = "" + if version and ":" in version: + schema_namespace, _, version = version.partition(":") + + if not isinstance(version, str): + raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID, + f"Must specify a schema version by number, found no version on {xml_version_list} schema.", + filename=None) + if version in out_versions[schema_namespace]: + raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_LIBRARY, f"Attempting to load the same library '{version}' twice: {out_versions[schema_namespace]}", + filename=None) + out_versions[schema_namespace].append(version) + + out_versions = {key: ",".join(value) if not key else f"{key}:" + ",".join(value) for key, value in out_versions.items()} + + return out_versions \ No newline at end of file diff --git a/hed/schema/hed_schema_section.py b/hed/schema/hed_schema_section.py index 7a866fc05..708dcf0de 100644 --- a/hed/schema/hed_schema_section.py +++ b/hed/schema/hed_schema_section.py @@ -151,6 +151,7 @@ def _finalize_section(self, hed_schema): class HedSchemaUnitClassSection(HedSchemaSection): def _check_if_duplicate(self, name_key, new_entry): + """Allow adding units to existing unit classes, using a placeholder one with no attributes.""" if name_key in self and len(new_entry.attributes) == 1\ and HedKey.InLibrary in new_entry.attributes: return self.all_names[name_key] diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index f53157bba..942c41670 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -1,13 +1,15 @@ """The built-in functions to validate known attributes. Template for the functions: -attribute_checker_template(hed_schema, tag_entry, attribute_name, possible_values): - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this tag. - attribute_name (str): The name of this attribute + +- ``attribute_checker_template(hed_schema, tag_entry, attribute_name)``: + - ``hed_schema (HedSchema)``: The schema to use for validation. + - ``tag_entry (HedSchemaEntry)``: The schema entry for this tag. + - ``attribute_name (str)``: The name of this attribute. + Returns: - bool -""" + - ``bool``: Description of the return value. + """ from hed.errors.error_types import SchemaWarnings, ValidationErrors, SchemaAttributeErrors from hed.errors.error_reporter import ErrorHandler @@ -208,7 +210,7 @@ def in_library_check(hed_schema, tag_entry, attribute_name): issues = [] library = tag_entry.attributes.get(attribute_name, "") - if hed_schema.library != library: + if library not in hed_schema.library.split(","): issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, tag_entry.name, library) diff --git a/hed/schema/schema_compare.py b/hed/schema/schema_compare.py index 1cd974c04..f128306d6 100644 --- a/hed/schema/schema_compare.py +++ b/hed/schema/schema_compare.py @@ -176,14 +176,14 @@ def compare_schemas(schema1, schema2, attribute_filter=HedKey.InLibrary, section schema1 (HedSchema): The first schema to be compared. schema2 (HedSchema): The second schema to be compared. attribute_filter (str, optional): The attribute to filter entries by. - Entries without this attribute are skipped. - The most common use would be HedKey.InLibrary - If it evaluates to False, no filtering is performed. + Entries without this attribute are skipped. + The most common use would be HedKey.InLibrary + If it evaluates to False, no filtering is performed. sections(list): the list of sections to compare. By default, just the tags section. - If None, checks all sections including header, prologue, and epilogue. + If None, checks all sections including header, prologue, and epilogue. Returns: - tuple: A tuple containing four dictionaries: + tuple: A tuple containing four dictionaries: - matches(dict): Entries present in both schemas and are equal. - not_in_schema1(dict): Entries present in schema2 but not in schema1. - not_in_schema2(dict): Entries present in schema1 but not in schema2. diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py index 1a68baf89..439f5d1df 100644 --- a/hed/schema/schema_compliance.py +++ b/hed/schema/schema_compliance.py @@ -124,4 +124,12 @@ def check_invalid_chars(self): for tag_name, desc in self.hed_schema.get_desc_iter(): issues_list += validate_schema_description(tag_name, desc) + + # todo Activate this session once we have clearer rules on spaces in unit names + # for unit in self.hed_schema.units: + # for i, char in enumerate(unit): + # if char == " ": + # issues_list += ErrorHandler.format_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_TAG, + # unit, char_index=i, problem_char=char) + return issues_list diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py index e3c4a351e..2f48775c5 100644 --- a/hed/schema/schema_io/base2schema.py +++ b/hed/schema/schema_io/base2schema.py @@ -1,8 +1,10 @@ import copy from hed.errors.exceptions import HedFileError, HedExceptions from hed.schema import HedSchema +from hed.schema.hed_schema_constants import HedKey from abc import abstractmethod, ABC from hed.schema import schema_validation_util +from hed.schema import hed_schema_constants class SchemaLoader(ABC): @@ -12,52 +14,84 @@ class SchemaLoader(ABC): SchemaLoaderXML(filename) will load just the header_attributes """ - def __init__(self, filename, schema_as_string=None): + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): """Loads the given schema from one of the two parameters. Parameters: filename(str or None): A valid filepath or None schema_as_string(str or None): A full schema as text or None + schema(HedSchema or None): A hed schema to merge this new file into + It must be a with-standard schema with the same value. + file_format(str or None): The format of this file if needed(only for owl currently) + name(str or None): Optional user supplied identifier, by default uses filename """ if schema_as_string and filename: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Invalid parameters to schema creation.", filename) - + self.file_format = file_format self.filename = filename + self.name = name if name else filename self.schema_as_string = schema_as_string - + self.appending_to_schema = False try: self.input_data = self._open_file() except OSError as e: - raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, filename) + raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, self.name) except TypeError as e: - raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), filename) + raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), self.name) except ValueError as e: - raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), filename) - - self._schema = HedSchema() - self._schema.filename = filename + raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), self.name) + + # self._schema.filename = filename hed_attributes = self._get_header_attributes(self.input_data) - schema_validation_util.validate_attributes(hed_attributes, filename=self.filename) + schema_validation_util.validate_attributes(hed_attributes, name=self.name) + + withStandard = hed_attributes.get(hed_schema_constants.WITH_STANDARD_ATTRIBUTE, "") + self.library = hed_attributes.get(hed_schema_constants.LIBRARY_ATTRIBUTE, "") + version_number = hed_attributes.get(hed_schema_constants.VERSION_ATTRIBUTE, "") + if not schema: + self._schema = HedSchema() + else: + self._schema = schema + self.appending_to_schema = True + if not self._schema.with_standard: + raise HedFileError(HedExceptions.SCHEMA_DUPLICATE_PREFIX, + "Trying to load multiple normal schemas as a merged one with the same namespace. " + "Ensure schemas have the withStandard header attribute set", + self.name) + elif withStandard != self._schema.with_standard: + raise HedFileError(HedExceptions.BAD_WITH_STANDARD_VERSION, + "When merging two schemas without a schema namespace, you they must have the same withStandard value.", self.name) + hed_attributes[hed_schema_constants.VERSION_ATTRIBUTE] = self._schema.version_number + f",{version_number}" + hed_attributes[hed_schema_constants.LIBRARY_ATTRIBUTE] = self._schema.library + f",{self.library}" + if name: + self._schema.name = name + self._schema.filename = filename self._schema.header_attributes = hed_attributes self._loading_merged = False + @property def schema(self): """ The partially loaded schema if you are after just header attributes.""" return self._schema @classmethod - def load(cls, filename=None, schema_as_string=None): + def load(cls, filename=None, schema_as_string=None, schema=None, file_format=None, name=""): """ Loads and returns the schema, including partnered schema if applicable. Parameters: filename(str or None): A valid filepath or None schema_as_string(str or None): A full schema as text or None + schema(HedSchema or None): A hed schema to merge this new file into + It must be a with-standard schema with the same value. + file_format(str or None): If this is an owl file being loaded, this is the format. + Allowed values include: turtle, json-ld, and owl(xml) + name(str or None): Optional user supplied identifier, by default uses filename Returns: schema(HedSchema): The new schema """ - loader = cls(filename, schema_as_string) + loader = cls(filename, schema_as_string, schema, file_format, name) return loader._load() def _load(self): @@ -68,7 +102,7 @@ def _load(self): """ self._loading_merged = True # Do a full load of the standard schema if this is a partnered schema - if self._schema.with_standard and not self._schema.merged: + if not self.appending_to_schema and self._schema.with_standard and not self._schema.merged: from hed.schema.hed_schema_io import load_schema_version saved_attr = self._schema.header_attributes try: @@ -102,3 +136,14 @@ def _get_header_attributes(self, input_data): def _parse_data(self): """Puts the input data into the new schema""" pass + + def _add_to_dict_base(self, entry, key_class): + if not entry.has_attribute(HedKey.InLibrary) and self.appending_to_schema and self._schema.merged: + return None + + if self.library and (not self._schema.with_standard or (not self._schema.merged and self._schema.with_standard)): + # only add it if not already present - This is a rare case + if not entry.has_attribute(HedKey.InLibrary): + entry._set_attribute_value(HedKey.InLibrary, self.library) + + return self._schema._add_tag_to_dict(entry.name, entry, key_class) \ No newline at end of file diff --git a/hed/schema/schema_io/owl2schema.py b/hed/schema/schema_io/owl2schema.py new file mode 100644 index 000000000..561fa8212 --- /dev/null +++ b/hed/schema/schema_io/owl2schema.py @@ -0,0 +1,291 @@ +""" +This module is used to create a HedSchema object from an OWL file or graph. +""" + + +from hed.errors.exceptions import HedFileError, HedExceptions +from hed.schema.hed_schema_constants import HedSectionKey, HedKey +from hed.schema import schema_validation_util +from .base2schema import SchemaLoader +import rdflib +from rdflib.exceptions import ParserError +from rdflib import Graph, RDF, RDFS, Literal, URIRef, OWL, XSD +from collections import defaultdict + +from hed.schema.schema_io.owl_constants import HED, HEDT, HEDU, HEDUM + + +class SchemaLoaderOWL(SchemaLoader): + """ Loads XML schemas from filenames or strings. + + Expected usage is SchemaLoaderXML.load(filename) + + SchemaLoaderXML(filename) will load just the header_attributes + """ + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): + if schema_as_string and not file_format: + raise HedFileError(HedExceptions.BAD_PARAMETERS, + "Must pass a file_format if loading owl schema as a string.", + name) + super().__init__(filename, schema_as_string, schema, file_format, name) + + self._schema.source_format = ".owl" + self.graph = None + # When loading, this stores rooted tag name -> full root path pairs + self._rooted_cache = {} + + def _open_file(self): + """Parses a Turtle/owl/etc file and returns the RDF graph.""" + + graph = rdflib.Graph() + try: + if self.filename: + graph.parse(self.filename, format=self.file_format) + else: + graph.parse(data=self.schema_as_string, format=self.file_format) + except FileNotFoundError as fnf_error: + raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(fnf_error), self.name) + except ParserError as parse_error: + raise HedFileError(HedExceptions.CANNOT_PARSE_RDF, str(parse_error), self.name) + + return graph + + def _read_prologue(self): + """Reads the Prologue section from the ontology.""" + prologue = self.graph.value(subject=HED.Prologue, predicate=HED.elementValue, any=False) + return str(prologue) if prologue else "" + + def _read_epilogue(self): + """Reads the Epilogue section from the ontology.""" + epilogue = self.graph.value(subject=HED.Epilogue, predicate=HED.elementValue, any=False) + return str(epilogue) if epilogue else "" + + def _get_header_attributes(self, graph): + """Parses header attributes from an RDF graph into a dictionary.""" + header_attributes = {} + for s, _, _ in graph.triples((None, RDF.type, HED.HeaderMember)): + label = graph.value(s, RDFS.label) + if label: + header_attribute = graph.value(s, HED.HeaderAttribute) + header_attributes[str(label)] = str(header_attribute) if header_attribute else None + return header_attributes + + def _parse_data(self): + self.graph = self.input_data + self.graph.bind("hed", HED) + self.graph.bind("hedt", HEDT) + self.graph.bind("hedu", HEDU) + self.graph.bind("hedum", HEDUM) + + + self._schema.epilogue = self._read_epilogue() + self._schema.prologue = self._read_prologue() + self._get_header_attributes(self.graph) + self._read_properties() + self._read_attributes() + self._read_units() + self._read_section(HedSectionKey.ValueClasses, HED.HedValueClass) + self._read_section(HedSectionKey.UnitModifiers, HED.HedUnitModifier) + self._read_tags() + + breakHere = 3 + + def get_local_names_from_uris(parent_chain, tag_uri): + """ + Extracts local names from URIs using RDFlib's n3() method. + """ + full_names = [] + for uri in parent_chain + [tag_uri]: + # Serialize the URI into N3 format and extract the local name + name = uri.n3(namespace_manager=HED.namespace_manager).split(':')[-1] + full_names.append(name) + + return full_names + + def sort_classes_by_hierarchy(self, classes): + """ + Sorts all tags based on assembled full name + + Returns: + list of tuples. + Left Tag URI, right side is parent labels(not including self) + """ + parent_chains = [] + full_tag_names = [] + for tag_uri in classes: + parent_chain = self._get_parent_chain(tag_uri) + parent_chain = [uri.n3(namespace_manager=self.graph.namespace_manager).split(':')[-1] for uri in parent_chain + [tag_uri]] + # parent_chain = [self.graph.value(p, RDFS.label) or p for p in parent_chain + [tag_uri]] + full_tag_names.append("/".join(parent_chain)) + parent_chains.append((tag_uri, parent_chain[:-1])) + + # Sort parent_chains by full_tag_names. + _, parent_chains = zip(*sorted(zip(full_tag_names, parent_chains))) + + return parent_chains + + def _get_parent_chain(self, cls): + """ Recursively builds the parent chain for a given class. """ + parent = self.graph.value(subject=cls, predicate=HED.hasHedParent) + if parent is None: + return [] + return self._get_parent_chain(parent) + [parent] + + def _parse_uri(self, uri, key_class, name=None): + if name: + label = name + else: + label = self.graph.value(subject=uri, predicate=RDFS.label) + if not label: + raise ValueError(f"Empty label value found in owl file in uri {uri}") + label = str(label) + + tag_entry = self._schema._create_tag_entry(label, key_class) + + description = self.graph.value(subject=uri, predicate=RDFS.comment) + if description: + tag_entry.description = str(description) + + section = self._schema._sections[key_class] + valid_attributes = section.valid_attributes + + new_values = defaultdict(list) + for predicate, obj in self.graph.predicate_objects(subject=uri): + # Convert predicate URI to a readable string, assuming it's in a known namespace + attr_name = predicate.n3(self.graph.namespace_manager).split(':')[1] + + if attr_name in valid_attributes: + if isinstance(obj, URIRef): + attr_value = obj.n3(self.graph.namespace_manager).split(':')[1] + else: + attr_value = str(obj) + + new_values[attr_name].append(attr_value) + + for name, value in new_values.items(): + value = ",".join(value) + if value == "true": + value = True + tag_entry._set_attribute_value(name, value) + + return tag_entry + + def _get_classes_with_subproperty(self, subproperty_uri, base_type): + """Iterates over all classes that have a specified rdfs:subPropertyOf.""" + classes = set() + for s in self.graph.subjects(RDF.type, base_type): + if (s, RDFS.subPropertyOf, subproperty_uri) in self.graph: + classes.add(s) + return classes + + def _get_all_subclasses(self, base_type): + """ + Recursively finds all subclasses of the given base_type. + """ + subclasses = set() + for subclass in self.graph.subjects(RDFS.subClassOf, base_type): + subclasses.add(subclass) + subclasses.update(self._get_all_subclasses(subclass)) + return subclasses + + def _get_classes(self, base_type): + """ + Retrieves all instances of the given base_type, including instances of its subclasses. + """ + classes = set() + # Add instances of the base type + for s in self.graph.subjects(RDF.type, base_type): + classes.add(s) + # Add instances of all subclasses + for subclass in self._get_all_subclasses(base_type): + for s in self.graph.subjects(RDF.type, subclass): + classes.add(s) + return classes + + def _read_properties(self): + key_class = HedSectionKey.Properties + self._schema._initialize_attributes(key_class) + prop_uris = self._get_classes_with_subproperty(HED.schemaProperty, OWL.AnnotationProperty) + for uri in prop_uris: + new_entry = self._parse_uri(uri, key_class) + self._add_to_dict(new_entry, key_class) + + def _read_attributes(self): + key_class = HedSectionKey.Attributes + self._schema._initialize_attributes(key_class) + prop_uris = self._get_classes_with_subproperty(HED.schemaAttributeDatatypeProperty, OWL.DatatypeProperty) + prop_uris.update(self._get_classes_with_subproperty(HED.schemaAttributeObjectProperty, OWL.ObjectProperty)) + + for uri in prop_uris: + new_entry = self._parse_uri(uri, key_class) + self._add_to_dict(new_entry, key_class) + + def _read_section(self, key_class, node_uri): + self._schema._initialize_attributes(key_class) + classes = self._get_classes(node_uri) + for uri in classes: + new_entry = self._parse_uri(uri, key_class) + self._add_to_dict(new_entry, key_class) + + def _read_units(self): + self._schema._initialize_attributes(HedSectionKey.UnitClasses) + self._schema._initialize_attributes(HedSectionKey.Units) + key_class = HedSectionKey.UnitClasses + classes = self._get_classes(HED.HedUnitClass) + unit_classes = {} + for uri in classes: + new_entry = self._parse_uri(uri, key_class) + self._add_to_dict(new_entry, key_class) + unit_classes[uri] = new_entry + + + + key_class = HedSectionKey.Units + units = self._get_classes(HED.HedUnit) + for uri in units: + new_entry = self._parse_uri(uri, key_class) + self._add_to_dict(new_entry, key_class) + unit_class_uri = self.graph.value(subject=uri, predicate=HED.unitClass) + class_entry = unit_classes.get(unit_class_uri) + class_entry.add_unit(new_entry) + breakHere = 3 + + def _add_tag_internal(self, uri, parent_tags): + tag_name = self.graph.value(uri, RDFS.label) + if not tag_name: + raise ValueError(f"No label for uri {uri}") + tag_name = str(tag_name) + parents_and_child = parent_tags + [tag_name] + if parent_tags and parents_and_child[0] in self._rooted_cache: + full_tag = "/".join([self._rooted_cache[parents_and_child[0]]] + parents_and_child[1:]) + else: + full_tag = "/".join(parents_and_child) + + tag_entry = self._parse_uri(uri, HedSectionKey.Tags, full_tag) + + rooted_entry = schema_validation_util.find_rooted_entry(tag_entry, self._schema, self._loading_merged) + if rooted_entry: + loading_from_chain = rooted_entry.name + "/" + tag_entry.short_tag_name + loading_from_chain_short = tag_entry.short_tag_name + self._rooted_cache[tag_entry.short_tag_name] = loading_from_chain + full_tag = full_tag.replace(loading_from_chain_short, loading_from_chain) + tag_entry = self._parse_uri(uri, HedSectionKey.Tags, full_tag) + + self._add_to_dict(tag_entry, HedSectionKey.Tags) + + def _read_tags(self): + """Populates a dictionary of dictionaries associated with tags and their attributes.""" + classes = self._get_classes(HED.HedTag) + classes.update(self._get_classes(HED.HedPlaceholder)) + sorted_classes = self.sort_classes_by_hierarchy(classes) + self._schema._initialize_attributes(HedSectionKey.Tags) + for uri, parents in sorted_classes: + self._add_tag_internal(uri, parents) + + def _add_to_dict(self, entry, key_class): + if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged and not self.appending_to_schema: + raise HedFileError(HedExceptions.IN_LIBRARY_IN_UNMERGED, + f"Library tag in unmerged schema has InLibrary attribute", + self.name) + + return self._add_to_dict_base(entry, key_class) diff --git a/hed/schema/schema_io/owl_constants.py b/hed/schema/schema_io/owl_constants.py new file mode 100644 index 000000000..8d450d901 --- /dev/null +++ b/hed/schema/schema_io/owl_constants.py @@ -0,0 +1,51 @@ +from rdflib import Namespace + +from hed.schema.hed_schema_constants import HedSectionKey + + +# Default file associations(notably owl maps to XML format, as we already use XML) +ext_to_format = { + ".ttl": "turtle", + ".owl": "xml", + ".json-ld": "json-ld" +} + +# Core schema structural types in owl +HED = Namespace("https://purl.org/hed#") +# Tags +HEDT = Namespace("https://purl.org/hed/tag#") +# Unit classes, value classes, and units +HEDU = Namespace("https://purl.org/hed/aux#") +# Unit Modifiers +HEDUM = Namespace("https://purl.org/hed/aux/unit_modifier#") + +# Some of this stuff may be commented back in later if needed + +# SECTION_ELEMENT_NAME = { +# HedSectionKey.Tags: "StartSchemaSection", +# HedSectionKey.UnitClasses: "UnitClassSection", +# HedSectionKey.Units: "UnitSection", +# HedSectionKey.UnitModifiers: "UnitModifiersSection", +# HedSectionKey.ValueClasses: "ValueClassesSection", +# HedSectionKey.Attributes: "AttributesSection", +# HedSectionKey.Properties: "PropertiesSection", +# } +# +# SECTION_ELEMENT_TYPE = { +# HedSectionKey.Tags: "HedStartSchemaSection", +# HedSectionKey.UnitClasses: "HedUnitClassSection", +# HedSectionKey.Units: "HedUnitSection", +# HedSectionKey.UnitModifiers: "HedUnitModifiersSection", +# HedSectionKey.ValueClasses: "HedValueClassesSection", +# HedSectionKey.Attributes: "HedAttributesSection", +# HedSectionKey.Properties: "HedPropertiesSection", +# } + +ELEMENT_NAMES = { + HedSectionKey.Tags: "HedTag", + HedSectionKey.Units: "HedUnit", + HedSectionKey.UnitClasses: "HedUnitClass", + HedSectionKey.UnitModifiers: "HedUnitModifier", + HedSectionKey.ValueClasses: "HedValueClass", +} + diff --git a/hed/schema/schema_io/schema2base.py b/hed/schema/schema_io/schema2base.py index d9d082a15..c54e9b977 100644 --- a/hed/schema/schema_io/schema2base.py +++ b/hed/schema/schema_io/schema2base.py @@ -1,5 +1,6 @@ """Baseclass for mediawiki/xml writers""" from hed.schema.hed_schema_constants import HedSectionKey, HedKey +from hed.errors.exceptions import HedFileError, HedExceptions class Schema2Base: @@ -29,6 +30,10 @@ def process_schema(cls, hed_schema, save_merged=False): Varies based on inherited class """ + if not hed_schema.can_save(): + raise HedFileError(HedExceptions.SCHEMA_LIBRARY_INVALID, + "Cannot save a schema merged from multiple library schemas", + hed_schema.filename) saver = cls() saver._save_lib = False saver._save_base = False diff --git a/hed/schema/schema_io/schema2owl.py b/hed/schema/schema_io/schema2owl.py new file mode 100644 index 000000000..0b683942e --- /dev/null +++ b/hed/schema/schema_io/schema2owl.py @@ -0,0 +1,314 @@ +"""Allows output of HedSchema objects as .xml format""" + +from hed.schema.hed_schema_constants import HedSectionKey, HedKey +from hed.schema.schema_io import owl_constants +from hed.schema.schema_io.schema2base import Schema2Base +from rdflib import Graph, RDF, RDFS, Literal, URIRef, OWL, XSD + +from hed.schema.schema_io.owl_constants import HED, HEDT, HEDU, HEDUM + +HED_URIS = { + None: HED, + HedSectionKey.Tags: HEDT, + HedSectionKey.UnitClasses: HEDU, + HedSectionKey.Units: HEDU, + HedSectionKey.UnitModifiers: HEDUM, + HedSectionKey.ValueClasses: HEDU, + HedSectionKey.Attributes: HED, + HedSectionKey.Properties: HED, +} + +HED_ATTR = { + "unitClass": HEDU, + "valueClass": HEDU, + "unit": HEDU, + "unitModifier": HEDUM, + "property": HED, + "suggestedTag": HEDT, + "relatedTag": HEDT, + "rooted": HEDT, +} + +float_attributes = {"conversionFactor"} + +hed_keys_with_types = { + HedKey.ExtensionAllowed: XSD["boolean"], + HedKey.Recommended: XSD["boolean"], + HedKey.Required: XSD["boolean"], + HedKey.RequireChild: XSD["boolean"], + HedKey.TagGroup: XSD["boolean"], + HedKey.TakesValue: XSD["boolean"], + HedKey.TopLevelTagGroup: XSD["boolean"], + HedKey.Unique: XSD["boolean"], + HedKey.UnitClass: HED["HedUnitClass"], + HedKey.ValueClass: HED["HedValueClass"], + HedKey.RelatedTag: HED["HedTag"], + HedKey.SuggestedTag: HED["HedTag"], + HedKey.Rooted: HED["HedTag"], + HedKey.DeprecatedFrom: XSD["string"], + HedKey.ConversionFactor: XSD["string"], + HedKey.Reserved: XSD["boolean"], + HedKey.SIUnit: XSD["boolean"], + HedKey.UnitSymbol: XSD["boolean"], + HedKey.DefaultUnits: HED["HedUnit"], + HedKey.UnitPrefix: XSD["boolean"], + HedKey.SIUnitModifier: XSD["boolean"], + HedKey.SIUnitSymbolModifier: XSD["boolean"], + HedKey.AllowedCharacter: XSD["string"], + HedKey.InLibrary: XSD["string"] +} + +object_properties = {key for key, value in hed_keys_with_types.items() if value.startswith(HED)} + + +class Schema2Owl(Schema2Base): + def __init__(self): + super().__init__() + self.owl_graph = Graph() + self.output = self.owl_graph + self.owl_graph.bind("hed", HED) + self.owl_graph.bind("hedt", HEDT) + self.owl_graph.bind("hedu", HEDU) + self.owl_graph.bind("hedum", HEDUM) + + # ========================================= + # Required baseclass function + # ========================================= + def _output_header(self, attributes, prologue): + # Create a dictionary mapping label names to property URIs + property_uris = { + "library": HED.Library, + "unmerged": HED.Unmerged, + "version": HED.Version, + "withStandard": HED.WithStandard, + "xmlns:xsi": HED.XSI, + "xsi:noNamespaceSchemaLocation": HED.XSINoNamespaceSchemaLocation + } + + for attrib_label, attrib_value in attributes.items(): + prop_uri = property_uris.get(attrib_label) + if prop_uri: + self.owl_graph.add((prop_uri, RDF.type, HED.HeaderMember)) + self.owl_graph.add((prop_uri, RDFS.label, Literal(attrib_label))) + self.owl_graph.add((prop_uri, HED.HeaderAttribute, Literal(attrib_value))) + + self.owl_graph.add((HED.Prologue, RDF.type, HED.HedElement)) + self.owl_graph.add((HED.Prologue, RDFS.label, Literal("epilogue"))) + if prologue: + self.owl_graph.add((HED.Prologue, HED["elementValue"], Literal(prologue))) + + def _output_footer(self, epilogue): + self.owl_graph.add((HED.Epilogue, RDF.type, HED.HedElement)) + self.owl_graph.add((HED.Epilogue, RDFS.label, Literal("epilogue"))) + if epilogue: + self.owl_graph.add((HED.Epilogue, HED["elementValue"], Literal(epilogue))) + + def _start_section(self, key_class): + return None + + def _end_tag_section(self): + pass + + def _write_attributes(self, entry_uri, entry): + for attribute, value in entry.attributes.items(): + is_bool = entry.attribute_has_property(attribute, "boolProperty") \ + or entry.section_key == HedSectionKey.Attributes + + if self._attribute_disallowed(attribute): + continue + + if is_bool: + self.owl_graph.add((entry_uri, HED[attribute], Literal(True))) + + elif attribute in float_attributes: + # Treat as a string for now + self.owl_graph.add((entry_uri, HED[attribute], Literal(value))) + else: + # Todo: further develop this if needed or merge into base tools + values = value.split(",") + for val2 in values: + clean_value = val2 + if attribute in HED_ATTR: + attribute_uri = HED_ATTR[attribute][clean_value] + else: + attribute_uri = Literal(clean_value) + + self.owl_graph.add((entry_uri, HED[attribute], attribute_uri)) + + def _add_entry(self, base_uri, tag_name, label, comment, parent=None, entry=None, + tag_type=HED.HedTag, unit_class_uri=None): + is_takes_value = entry.has_attribute("takesValue") + if is_takes_value: + tag_type = HED.HedPlaceholder + tag_name = entry.short_tag_name + "-Placeholder" + label = "#" + + tag_name = sanitize_for_turtle(tag_name) + uri = f"{base_uri}{tag_name}" + hed_tag_uri = URIRef(uri) + + self.owl_graph.add((hed_tag_uri, RDF.type, tag_type)) + self.owl_graph.add((hed_tag_uri, RDFS.label, Literal(label))) + if comment: + self.owl_graph.add((hed_tag_uri, RDFS.comment, Literal(comment))) + # Don't store the parent in unmerged rooted nodes + if parent is not None and (HedKey.Rooted not in entry.attributes or self._save_merged): + parent_uri = HEDT[parent] + self.owl_graph.add((hed_tag_uri, HED.hasHedParent, parent_uri)) + if unit_class_uri is not None: + self.owl_graph.add((hed_tag_uri, HED.unitClass, unit_class_uri)) + self._write_attributes(hed_tag_uri, entry) + return hed_tag_uri + + def _add_property(self, base_uri, name, label, comment, entry, + data_type, sub_type): + name = sanitize_for_turtle(name) + uri = f"{base_uri}{name}" + hed_tag_uri = URIRef(uri) + + self.owl_graph.add((hed_tag_uri, RDF.type, data_type)) + self.owl_graph.add((hed_tag_uri, RDFS.subPropertyOf, sub_type)) + self.owl_graph.add((hed_tag_uri, RDFS.range, XSD.boolean)) + self.owl_graph.add((hed_tag_uri, RDFS.label, Literal(label))) + self.owl_graph.add((hed_tag_uri, RDFS.comment, Literal(comment))) + self._write_attributes(hed_tag_uri, entry) + + return hed_tag_uri + + def _get_element_domains(self, entry): + domain_table = {HedKey.ValueClassProperty: "HedValueClass", + HedKey.UnitModifierProperty: "HedUnitModifier", + HedKey.UnitProperty: "HedUnit", + HedKey.ElementProperty: "HedElement", + HedKey.UnitClassProperty: "HedUnitClass", + HedKey.NodeProperty: "HedTag" + } + domains = [] + for attribute in entry.attributes: + if attribute in domain_table: + domains.append(domain_table[attribute]) + + if not domains: + domains.append(domain_table[HedKey.NodeProperty]) + + return domains + + def _add_attribute(self, base_uri, name, label, comment, entry): + domains = self._get_element_domains(entry) + name = sanitize_for_turtle(name) + uri = f"{base_uri}{name}" + hed_tag_uri = URIRef(uri) + data_type = OWL.ObjectProperty + sub_type = HED.schemaAttributeObjectProperty + if name not in object_properties: + data_type = OWL.DatatypeProperty + sub_type = HED.schemaAttributeDatatypeProperty + self.owl_graph.add((hed_tag_uri, RDF.type, data_type)) + for domain in domains: + self.owl_graph.add((hed_tag_uri, RDFS.domain, HED[domain])) + self.owl_graph.add((hed_tag_uri, RDFS.subPropertyOf, sub_type)) + self.owl_graph.add((hed_tag_uri, RDFS.range, hed_keys_with_types[name])) + self.owl_graph.add((hed_tag_uri, RDFS.label, Literal(label))) + self.owl_graph.add((hed_tag_uri, RDFS.comment, Literal(comment))) + self._write_attributes(hed_tag_uri, entry) + + return hed_tag_uri + + def _write_tag_entry(self, tag_entry, parent_node=None, level=0): + """ + Creates a tag node and adds it to the parent. + + Parameters + ---------- + tag_entry: HedTagEntry + The entry for that tag we want to write out + parent_node: Any + Unused + level: Any + Unused + + Returns + ------- + SubElement + The added node + """ + tag_name = tag_entry.short_tag_name + parent = tag_entry.parent + if parent: + parent = parent.short_tag_name + comment = tag_entry.description + return self._add_entry( + HEDT, + tag_name=tag_name, + label=tag_name, + comment=comment, + parent=parent, + entry=tag_entry + ) + + def _write_entry(self, entry, parent_node=None, include_props=True): + """ + Creates an entry node and adds it to the parent. + + Parameters: + entry(HedSchemaEntry): The entry for that tag we want to write out + parent_node(str): URI for unit class owner, if this is a unit + include_props(bool): Add the description and attributes to new node. + Returns: + str: The added URI + """ + key_class = entry.section_key + prefix = HED_URIS[key_class] + name = entry.name + comment = entry.description + if key_class == HedSectionKey.Attributes: + uri = self._add_attribute( + prefix, + name=name, + label=name, + comment=comment, + entry=entry + ) + elif key_class == HedSectionKey.Properties: + uri = self._add_property( + prefix, + name=name, + label=name, + comment=comment, + entry=entry, + data_type=OWL.AnnotationProperty, + sub_type=HED.schemaProperty + ) + else: + unit_class_uri = None + if key_class == HedSectionKey.Units: + unit_class_uri = parent_node + uri = self._add_entry( + prefix, + tag_name=name, + label=name, + comment=comment, + entry=entry, + tag_type=HED[owl_constants.ELEMENT_NAMES[key_class]], + unit_class_uri=unit_class_uri + ) + return uri + + +import re + + +def sanitize_for_turtle(name): + """ Sanitizes a string to be a valid IRIREF in Turtle, based on the SPARQL grammar. + + Excludes: `control characters, space, <, >, double quote, {, }, |, ^, backtick, and backslash.` + Replacing them with underscores + + Parameters: + name (str): The string to sanitize. + + Returns: + str: A sanitized string suitable for use as an IRIREF in Turtle. + """ + invalid_chars_pattern = r'[\x00-\x20<>"{}\|^`\\]' + return re.sub(invalid_chars_pattern, '_', name) diff --git a/hed/schema/schema_io/schema_util.py b/hed/schema/schema_io/schema_util.py index d2bf0721a..67a73c1f6 100644 --- a/hed/schema/schema_io/schema_util.py +++ b/hed/schema/schema_io/schema_util.py @@ -1,6 +1,5 @@ """ Utilities for writing content to files and for other file manipulation.""" -import shutil import tempfile import os import urllib.request @@ -72,65 +71,7 @@ def url_to_string(resource_url): return url_data -def write_strings_to_file(output_strings, extension=None): - """ Write output strings to a temporary file. - - Parameters: - output_strings ([str], str): Strings to output one per line. - extension (str): File extension of the temporary file. - - Returns: - file: Opened temporary file. - - """ - if isinstance(output_strings, str): - output_strings = [output_strings] - with tempfile.NamedTemporaryFile(suffix=extension, delete=False, mode='w', encoding='utf-8') as opened_file: - for string in output_strings: - opened_file.write(string) - opened_file.write('\n') - return opened_file.name - - -def move_file(input_path, target_path): - """ - If target_path is not empty, move input file to target file - - Parameters: - input_path(str): Path to an existing file - target_path(str or None): Path to move this file to - If None, the function does nothing and returns input_path - - Returns: - filepath(str): the original or moved filepath - """ - if target_path: - directory = os.path.dirname(target_path) - if directory and not os.path.exists(directory): - os.makedirs(directory) - shutil.move(input_path, target_path) - return target_path - return input_path - - -def write_xml_tree_2_xml_file(xml_tree, extension=".xml"): - """ Write an XML element tree object into an XML file. - - Parameters: - xml_tree (Element): An element representing an XML file. - extension (string): The file extension to use for the temporary file. - - Returns: - str: Name of the temporary file. - - """ - with tempfile.NamedTemporaryFile(suffix=extension, mode='w', delete=False, encoding='utf-8') as hed_xml_file: - xml_string = _xml_element_2_str(xml_tree) - hed_xml_file.write(xml_string) - return hed_xml_file.name - - -def _xml_element_2_str(elem): +def xml_element_2_str(elem): """ Convert an XML element to an XML string. Parameters: diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py index de18f9d6a..9a7360ec6 100644 --- a/hed/schema/schema_io/wiki2schema.py +++ b/hed/schema/schema_io/wiki2schema.py @@ -21,8 +21,6 @@ no_wiki_start_tag = '' no_wiki_end_tag = '' - - required_sections = [ HedWikiSection.Prologue, HedWikiSection.Schema, @@ -44,8 +42,10 @@ class SchemaLoaderWiki(SchemaLoader): SchemaLoaderWiki(filename) will load just the header_attributes """ - def __init__(self, filename, schema_as_string=None): - super().__init__(filename, schema_as_string) + + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): + super().__init__(filename, schema_as_string, schema, file_format, name) + self._schema.source_format = ".mediawiki" self.fatal_errors = [] def _open_file(self): @@ -66,7 +66,7 @@ def _get_header_attributes(self, file_data): hed_attributes = self._get_header_attributes_internal(line[len(wiki_constants.HEADER_LINE_STRING):]) return hed_attributes msg = f"First line of file should be HED, instead found: {line}" - raise HedFileError(HedExceptions.SCHEMA_HEADER_MISSING, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_HEADER_MISSING, msg, filename=self.name) def _parse_data(self): wiki_lines_by_section = self._split_lines_into_sections(self.input_data) @@ -88,13 +88,13 @@ def _parse_data(self): if section not in wiki_lines_by_section: error_code = HedExceptions.SCHEMA_SECTION_MISSING msg = f"Required section separator '{SectionNames[section]}' not found in file" - raise HedFileError(error_code, msg, filename=self.filename) + raise HedFileError(error_code, msg, filename=self.name) if self.fatal_errors: self.fatal_errors = error_reporter.sort_issues(self.fatal_errors) raise HedFileError(self.fatal_errors[0]['code'], f"{len(self.fatal_errors)} issues found when parsing schema. See the .issues " - f"parameter on this exception for more details.", self.filename, + f"parameter on this exception for more details.", self.name, issues=self.fatal_errors) def _parse_sections(self, wiki_lines_by_section, parse_order): @@ -114,7 +114,7 @@ def _read_header_section(self, lines): for line_number, line in lines: if line.strip(): msg = f"Extra content [{line}] between HED line and other sections" - raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.name) def _read_text_block(self, lines): text = "" @@ -273,7 +273,7 @@ def _get_header_attributes_internal(self, version_line): # todo: May shift this at some point to report all errors raise HedFileError(code=HedExceptions.SCHEMA_HEADER_INVALID, message=f"Header line has a malformed attribute {m}", - filename=self.filename) + filename=self.name) return attributes @staticmethod @@ -317,7 +317,7 @@ def _get_header_attributes_internal_old(self, version_line): divider_index = pair.find(':') if divider_index == -1: msg = f"Found poorly matched key:value pair in header: {pair}" - raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.name) key, value = pair[:divider_index], pair[divider_index + 1:] key = key.strip() value = value.strip() @@ -325,13 +325,6 @@ def _get_header_attributes_internal_old(self, version_line): return final_attributes - def _add_to_dict(self, line_number, line, entry, key_class): - if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged: - self._add_fatal_error(line_number, line, - f"Library tag in unmerged schema has InLibrary attribute", - HedExceptions.IN_LIBRARY_IN_UNMERGED) - return self._schema._add_tag_to_dict(entry.name, entry, key_class) - @staticmethod def _get_tag_level(tag_line): """ Get the tag level from a line in a wiki file. @@ -544,24 +537,24 @@ def _check_for_new_section(self, line, strings_for_section, current_section): if key in strings_for_section: msg = f"Found section {SectionNames[key]} twice" raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, - msg, filename=self.filename) + msg, filename=self.name) if current_section < key: new_section = key else: error_code = HedExceptions.SCHEMA_SECTION_MISSING msg = f"Found section {SectionNames[key]} out of order in file" - raise HedFileError(error_code, msg, filename=self.filename) + raise HedFileError(error_code, msg, filename=self.name) break return new_section def _handle_bad_section_sep(self, line, current_section): if current_section != HedWikiSection.Schema and line.startswith(wiki_constants.ROOT_TAG): msg = f"Invalid section separator '{line.strip()}'" - raise HedFileError(HedExceptions.SCHEMA_SECTION_MISSING, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_SECTION_MISSING, msg, filename=self.name) if line.startswith("!#"): msg = f"Invalid section separator '{line.strip()}'" - raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, msg, filename=self.filename) + raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, msg, filename=self.name) def _split_lines_into_sections(self, wiki_lines): """ Takes a list of lines, and splits it into valid wiki sections. @@ -598,3 +591,11 @@ def _split_lines_into_sections(self, wiki_lines): strings_for_section[current_section].append((line_number + 1, line)) return strings_for_section + + def _add_to_dict(self, line_number, line, entry, key_class): + if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged and not self.appending_to_schema: + self._add_fatal_error(line_number, line, + f"Library tag in unmerged schema has InLibrary attribute", + HedExceptions.IN_LIBRARY_IN_UNMERGED) + + return self._add_to_dict_base(entry, key_class) diff --git a/hed/schema/schema_io/xml2schema.py b/hed/schema/schema_io/xml2schema.py index c300439e3..8dbd4590a 100644 --- a/hed/schema/schema_io/xml2schema.py +++ b/hed/schema/schema_io/xml2schema.py @@ -21,22 +21,14 @@ class SchemaLoaderXML(SchemaLoader): SchemaLoaderXML(filename) will load just the header_attributes """ - def __init__(self, filename, schema_as_string=None): - super().__init__(filename, schema_as_string) + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): + super().__init__(filename, schema_as_string, schema, file_format, name) self._root_element = None self._parent_map = {} + self._schema.source_format = ".xml" def _open_file(self): - """Parses an XML file and returns the root element. - - Parameters - ---------- - Returns - ------- - RestrictedElement - The root element of the HED XML file. - - """ + """Parses an XML file and returns the root element.""" try: if self.filename: hed_xml_tree = ElementTree.parse(self.filename) @@ -44,18 +36,12 @@ def _open_file(self): else: root = ElementTree.fromstring(self.schema_as_string) except xml.etree.ElementTree.ParseError as e: - raise HedFileError(HedExceptions.CANNOT_PARSE_XML, e.msg, self.schema_as_string) + raise HedFileError(HedExceptions.CANNOT_PARSE_XML, e.msg, self.name) return root def _get_header_attributes(self, root_element): - """ - Gets the schema attributes form the XML root node - - Returns - ------- - attribute_dict: {str: str} - """ + """Gets the schema attributes from the XML root node""" return self._reformat_xsd_attrib(root_element.attrib) def _parse_data(self): @@ -82,7 +68,7 @@ def _parse_sections(self, root_element, parse_order): section_element = section_element[0] if isinstance(section_element, list): raise HedFileError(HedExceptions.INVALID_HED_FORMAT, - "Attempting to load an outdated or invalid XML schema", self.filename) + "Attempting to load an outdated or invalid XML schema", self.name) parse_func = parse_order[section_key] parse_func(section_element) @@ -128,17 +114,7 @@ def _add_tags_recursive(self, new_tags, parent_tags): self._add_tags_recursive(child_tags, parents_and_child) def _populate_tag_dictionaries(self, tag_section): - """Populates a dictionary of dictionaries associated with tags and their attributes. - - Parameters - ---------- - - Returns - ------- - {} - A dictionary of dictionaries that has been populated with dictionaries associated with tag attributes. - - """ + """Populates a dictionary of dictionaries associated with tags and their attributes.""" self._schema._initialize_attributes(HedSectionKey.Tags) root_tags = tag_section.findall("node") @@ -146,18 +122,7 @@ def _populate_tag_dictionaries(self, tag_section): def _populate_unit_class_dictionaries(self, unit_section): """Populates a dictionary of dictionaries associated with all the unit classes, unit class units, and unit - class default units. - - Parameters - ---------- - - Returns - ------- - {} - A dictionary of dictionaries associated with all the unit classes, unit class units, and unit class - default units. - - """ + class default units.""" self._schema._initialize_attributes(HedSectionKey.UnitClasses) self._schema._initialize_attributes(HedSectionKey.Units) def_element_name = xml_constants.ELEMENT_NAMES[HedSectionKey.UnitClasses] @@ -166,10 +131,11 @@ class default units. for unit_class_element in unit_class_elements: unit_class_entry = self._parse_node(unit_class_element, HedSectionKey.UnitClasses) unit_class_entry = self._add_to_dict(unit_class_entry, HedSectionKey.UnitClasses) + if unit_class_entry is None: + continue element_units = self._get_elements_by_name(xml_constants.UNIT_CLASS_UNIT_ELEMENT, unit_class_element) - element_unit_names = [self._get_element_tag_value(element) for element in element_units] - for unit, element in zip(element_unit_names, element_units): + for element in element_units: unit_class_unit_entry = self._parse_node(element, HedSectionKey.Units) self._add_to_dict(unit_class_unit_entry, HedSectionKey.Units) unit_class_entry.add_unit(unit_class_unit_entry) @@ -230,7 +196,7 @@ def _get_element_tag_value(self, element, tag_name=xml_constants.NAME_ELEMENT): if element.text is None and tag_name != "units": raise HedFileError(HedExceptions.HED_SCHEMA_NODE_NAME_INVALID, f"A Schema node is empty for tag of element name: '{tag_name}'.", - self._schema.filename) + self.name) return element.text return "" @@ -256,8 +222,9 @@ def _get_elements_by_name(self, element_name='node', parent_element=None): return elements def _add_to_dict(self, entry, key_class): - if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged: + if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged and not self.appending_to_schema: raise HedFileError(HedExceptions.IN_LIBRARY_IN_UNMERGED, f"Library tag in unmerged schema has InLibrary attribute", - self._schema.filename) - return self._schema._add_tag_to_dict(entry.name, entry, key_class) + self.name) + + return self._add_to_dict_base(entry, key_class) diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py index 25b27ab8c..7bbf10468 100644 --- a/hed/schema/schema_validation_util.py +++ b/hed/schema/schema_validation_util.py @@ -51,12 +51,12 @@ def validate_version_string(version_string): } -def validate_present_attributes(attrib_dict, filename): +def validate_present_attributes(attrib_dict, name): """ Validate combinations of attributes Parameters: attrib_dict (dict): Dictionary of attributes to be evaluated. - filename (str): File name to use in reporting errors. + name (str): File name to use in reporting errors. Returns: list: List of issues. Each issue is a dictionary. @@ -67,15 +67,15 @@ def validate_present_attributes(attrib_dict, filename): if constants.WITH_STANDARD_ATTRIBUTE in attrib_dict and constants.LIBRARY_ATTRIBUTE not in attrib_dict: raise HedFileError(HedExceptions.BAD_WITH_STANDARD, "withStandard header attribute found, but no library attribute is present", - filename) + name) -def validate_attributes(attrib_dict, filename): +def validate_attributes(attrib_dict, name): """ Validate attributes in the dictionary. Parameters: attrib_dict (dict): Dictionary of attributes to be evaluated. - filename (str): File name to use in reporting errors. + name (str): name to use in reporting errors. Returns: list: List of issues. Each issue is a dictionary. @@ -85,21 +85,21 @@ def validate_attributes(attrib_dict, filename): - Version not present - Invalid combinations of attributes in header """ - validate_present_attributes(attrib_dict, filename) + validate_present_attributes(attrib_dict, name) for attribute_name, attribute_value in attrib_dict.items(): if attribute_name in header_attribute_validators: validator, error_code = header_attribute_validators[attribute_name] had_error = validator(attribute_value) if had_error: - raise HedFileError(error_code, had_error, filename) + raise HedFileError(error_code, had_error, name) if attribute_name not in valid_header_attributes: raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE, - f"Unknown attribute {attribute_name} found in header line", filename=filename) + f"Unknown attribute {attribute_name} found in header line", filename=name) if constants.VERSION_ATTRIBUTE not in attrib_dict: raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID, - "No version attribute found in header", filename=filename) + "No version attribute found in header", filename=name) # Might move this to a baseclass version if one is ever made for wiki2schema/xml2schema @@ -127,28 +127,28 @@ def find_rooted_entry(tag_entry, schema, loading_merged): if not schema.with_standard: raise HedFileError(HedExceptions.ROOTED_TAG_INVALID, f"Rooted tag attribute found on '{tag_entry.short_tag_name}' in a standard schema.", - schema.filename) + schema.name) if not isinstance(rooted_tag, str): raise HedFileError(HedExceptions.ROOTED_TAG_INVALID, f'Rooted tag \'{tag_entry.short_tag_name}\' is not a string."', - schema.filename) + schema.name) if tag_entry.parent_name and not loading_merged: raise HedFileError(HedExceptions.ROOTED_TAG_INVALID, f'Found rooted tag \'{tag_entry.short_tag_name}\' as a non root node.', - schema.filename) + schema.name) if not tag_entry.parent_name and loading_merged: raise HedFileError(HedExceptions.ROOTED_TAG_INVALID, f'Found rooted tag \'{tag_entry.short_tag_name}\' as a root node in a merged schema.', - schema.filename) + schema.name) rooted_entry = schema.tags.get(rooted_tag) if not rooted_entry or rooted_entry.has_attribute(constants.HedKey.InLibrary): raise HedFileError(HedExceptions.ROOTED_TAG_DOES_NOT_EXIST, f"Rooted tag '{tag_entry.short_tag_name}' not found in paired standard schema", - schema.filename) + schema.name) if loading_merged: return None diff --git a/hed/tools/analysis/hed_type_defs.py b/hed/tools/analysis/hed_type_defs.py index 988b4bdae..fba665d78 100644 --- a/hed/tools/analysis/hed_type_defs.py +++ b/hed/tools/analysis/hed_type_defs.py @@ -11,7 +11,7 @@ class HedTypeDefs: def_map (dict): keys are definition names, values are dict {type_values, description, tags} Example: A definition 'famous-face-cond' with contents `(Condition-variable/Face-type,Description/A face that should be recognized by the - participants,(Image,(Face,Famous)))` + participants,(Image,(Face,Famous)))` would have type_values ['face_type']. All items are strings not objects. diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py index e2f7f535b..09d7f318f 100644 --- a/hed/tools/analysis/key_map.py +++ b/hed/tools/analysis/key_map.py @@ -59,7 +59,9 @@ def make_template(self, additional_cols=None, show_counts=True): Parameters: additional_cols (list or None): Optional list of additional columns to append to the returned dataframe. - show_counts (bool): If true, number of times each key combination appears is in first column + show_counts (bool): If True, number of times each key combination appears is in first column and + values are sorted in descending order by + Returns: DataFrame: A dataframe containing the template. @@ -79,6 +81,7 @@ def make_template(self, additional_cols=None, show_counts=True): df[additional_cols] = 'n/a' if show_counts: df.insert(0, 'key_counts', self._get_counts()) + df.sort_values(by=['key_counts'], inplace=True, ignore_index=True, ascending=False) return df def _get_counts(self): @@ -142,7 +145,7 @@ def resort(self): for index, row in self.col_map.iterrows(): key_hash = get_row_hash(row, self.key_cols) self.map_dict[key_hash] = index - + def update(self, data, allow_missing=True): """ Update the existing map with information from data. diff --git a/hed/tools/analysis/sequence_map.py b/hed/tools/analysis/sequence_map.py new file mode 100644 index 000000000..0ecd0fea9 --- /dev/null +++ b/hed/tools/analysis/sequence_map.py @@ -0,0 +1,173 @@ +""" A map of containing the number of times a particular sequence of values in a column of an event file. """ + + +import pandas as pd +from hed.tools.util.data_util import get_key_hash + + +class SequenceMap: + """ A map of unique sequences of column values of a particular length appear in an event file. + + Attributes: + + name (str): An optional name of this remap for identification purposes. + + Notes: This mapping converts all columns in the mapping to strings. + The remapping does not support other types of columns. + + """ + def __init__(self, codes=None, name=''): + """ Information for setting up the maps. + + Parameters: + codes (list or None): If None use all codes, otherwise only include listed codes in the map. + name (str): Name associated with this remap (usually a pathname of the events file). + + """ + + self.codes = codes + self.name = name + self.node_counts = {} + self.edges = {} # map of keys to n-element sequences + self.edge_counts = {} # Keeps a running count of the number of times a key appears in the data + + @property + + def __str__(self): + node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] + node_str = (" ").join(node_counts) + return node_str + # temp_list = [f"{self.name} counts for key [{str(self.key_cols)}]:"] + # for index, row in self.col_map.iterrows(): + # key_hash = get_row_hash(row, self.columns) + # temp_list.append(f"{str(list(row.values))}:\t{self.count_dict[key_hash]}") + # return "\n".join(temp_list) + + def dot_str(self, group_spec={}): + """ Produce a DOT string representing this sequence map. + + + """ + base = 'digraph g { \n' + if self.codes: + node_list = [f"{node};" for node in self.codes if node not in self.node_counts] + if node_list: + base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + ("\n").join(node_list) +"\n}\n" + if group_spec: + for group, spec in group_spec.items(): + group_list = [f"{node};" for node in self.node_counts if node in spec["nodes"]] + if group_list: + spec_color = spec["color"] + if spec_color[0] == '#': + spec_color = f'"{spec_color}"' + base = base + 'subgraph cluster_' + group + '{\n' + f'bgcolor={spec_color};\n' + \ + '\n'.join(group_list) + '\n}\n' + edge_list = self.get_edge_list(sort=True) + + dot_str = base + ("\n").join(edge_list) + "}\n" + return dot_str + + def edge_to_str(self, key): + value = self.edges.get(key, []) + if value: + return f"{value[0]} -> {value[1]} " + else: + return "" + def get_edge_list(self, sort=True): + """Produces a DOT format edge list with the option of sorting by edge counts. + + Parameters: + sort (bool): if true the edge list is sorted by edge counts + + Returns: + list: list of DOT strings representing the edges labeled by counts. + + """ + + df = pd.DataFrame(list(self.edge_counts.items()), columns=['Key', 'Counts']) + if sort: + df = df.sort_values(by='Counts', ascending=False) + edge_list = [f"{self.edge_to_str(row['Key'])} [label={str(self.edge_counts[row['Key']])}];" + for index, row in df.iterrows()] + return edge_list + + def filter_edges(self): + print("to here") + + def update(self, data): + """ Update the existing map with information from data. + + Parameters: + data (Series): DataFrame or filename of an events file or event map. + allow_missing (bool): If true allow missing keys and add as n/a columns. + + :raises HedFileError: + - If there are missing keys and allow_missing is False. + + """ + filtered = self.prep(data) + if self.codes: + mask = filtered.isin(self.codes) + filtered = filtered[mask] + for index, value in filtered.items(): + if value not in self.node_counts: + self.node_counts[value] = 1 + else: + self.node_counts[value] = self.node_counts[value] + 1 + if index + 1 >= len(filtered): + break + key_list = filtered[index:index+2].tolist() + key = get_key_hash(key_list) + if key in self.edges: + self.edge_counts[key] = self.edge_counts[key] + 1 + else: + self.edges[key] = key_list + self.edge_counts[key] = 1 + + def update(self, data): + """ Update the existing map with information from data. + + Parameters: + data (Series): DataFrame or filename of an events file or event map. + allow_missing (bool): If true allow missing keys and add as n/a columns. + + :raises HedFileError: + - If there are missing keys and allow_missing is False. + + """ + filtered = self.prep(data) + if self.codes: + mask = filtered.isin(self.codes) + filtered = filtered[mask] + for index, value in filtered.items(): + if value not in self.node_counts: + self.node_counts[value] = 1 + else: + self.node_counts[value] = self.node_counts[value] + 1 + if index + 1 >= len(filtered): + break + key_list = filtered[index:index + 2].tolist() + key = get_key_hash(key_list) + if key in self.edges: + self.edge_counts[key] = self.edge_counts[key] + 1 + else: + self.edges[key] = key_list + self.edge_counts[key] = 1 + + @staticmethod + def prep(data): + """ Remove quotes from the specified columns and convert to string. + + Parameters: + data (Series): Dataframe to process by removing quotes. + + Returns: Series + Notes: + - Replacement is done in place. + """ + + filtered = data.astype(str) + filtered.fillna('n/a').astype(str) + filtered = filtered.str.replace('"', '') + filtered = filtered.str.replace("'", "") + return filtered \ No newline at end of file diff --git a/hed/tools/analysis/sequence_map_new.py b/hed/tools/analysis/sequence_map_new.py new file mode 100644 index 000000000..0415f91ec --- /dev/null +++ b/hed/tools/analysis/sequence_map_new.py @@ -0,0 +1,160 @@ +""" A map of containing the number of times a particular sequence of values in a column of an event file. """ + +import pandas as pd +from hed.tools.util.data_util import get_key_hash + + +class SequenceMapNew: + """ A map of unique sequences of column values of a particular length appear in an event file. + + Attributes: + + name (str): An optional name of this remap for identification purposes. + + Notes: This mapping converts all columns in the mapping to strings. + The remapping does not support other types of columns. + + """ + + def __init__(self, codes=None, name='', seq=[0, -1]): + """ Information for setting up the maps. + + Parameters: + codes (list or None): If None use all codes, otherwise only include listed codes in the map. + name (str): Name associated with this remap (usually a pathname of the events file). + + """ + + self.codes = codes + self.name = name + self.seq = seq + self.nodes = {} # Node keys to node names + self.node_counts = {} # Node values to count + self.sequences = {} # Sequence keys to sequence + self.seq_counts = {} # Sequence keys to counts + self.edges = {} # map of edge keys to 2-element sequence keys + self.edge_counts = {} # edge keys to edge counts + + @property + def __str__(self): + node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] + node_str = (" ").join(node_counts) + return node_str + # temp_list = [f"{self.name} counts for key [{str(self.key_cols)}]:"] + # for index, row in self.col_map.iterrows(): + # key_hash = get_row_hash(row, self.columns) + # temp_list.append(f"{str(list(row.values))}:\t{self.count_dict[key_hash]}") + # return "\n".join(temp_list) + + def dot_str(self, group_spec={}): + """ Produce a DOT string representing this sequence map. + + + """ + base = 'digraph g { \n' + if self.codes: + node_list = [f"{node};" for node in self.codes if node not in self.node_counts] + if node_list: + base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + ("\n").join(node_list) + "\n}\n" + if group_spec: + for group, spec in group_spec.items(): + group_list = [f"{node};" for node in self.node_counts if node in spec["nodes"]] + if group_list: + spec_color = spec["color"] + if spec_color[0] == '#': + spec_color = f'"{spec_color}"' + base = base + 'subgraph cluster_' + group + '{\n' + f'bgcolor={spec_color};\n' + \ + '\n'.join(group_list) + '\n}\n' + edge_list = self.get_edge_list(sort=True) + + dot_str = base + ("\n").join(edge_list) + "}\n" + return dot_str + + def edge_to_str(self, key): + value = self.edges.get(key, []) + if value: + x = ("+").join(value[0]) + y = ("+").join(value[1]) + return f"{str(self.sequences[value[0]])} -> {str(self.sequences[value[1]])} " + else: + return "" + + def get_edge_list(self, sort=True): + """Produces a DOT format edge list with the option of sorting by edge counts. + + Parameters: + sort (bool): if true the edge list is sorted by edge counts + + Returns: + list: list of DOT strings representing the edges labeled by counts. + + """ + + df = pd.DataFrame(list(self.edge_counts.items()), columns=['Key', 'Counts']) + if sort: + df = df.sort_values(by='Counts', ascending=False) + edge_list = [] + for index, row in df.iterrows(): + edge_list.append(f"{self.edge_to_str(row['Key'])} [label={str(self.edge_counts[row['Key']])}];") + return edge_list + + def filter_edges(self): + print("to here") + + def update(self, data): + filtered = self.get_sequence_data(data) + last_seq_key = None + for index, row in filtered.iterrows(): + # Update node counts + this_node = row['value'] + self.node_counts[this_node] = self.node_counts.get(this_node, 0) + 1 + this_seq = row['seq'] + if not this_seq: + last_seq_key = None + continue; + this_seq_key = get_key_hash(this_seq) + self.sequences[this_seq_key] = this_seq + self.seq_counts[this_seq_key] = self.seq_counts.get(this_seq_key, 0) + 1 + if last_seq_key: + this_edge_key = get_key_hash([last_seq_key, this_seq_key]) + self.edges[this_edge_key] = [last_seq_key, this_seq_key] + self.edge_counts[this_edge_key] = self.edge_counts.get(this_edge_key, 0) + 1 + last_seq_key = this_seq_key + + def get_sequence_data(self, data): + filtered = self.prep(data) + empty_lists = [[] for _ in range(len(filtered))] + + # Create a DataFrame + df = pd.DataFrame({'value': filtered.values, 'seq': empty_lists}) + + for index, row in df.iterrows(): + df.at[index, 'seq'] = self.get_sequence(df, index) + return df + + def get_sequence(self, df, index): + seq_list = [] + for i, val in enumerate(self.seq): + df_ind = val + index + if df_ind < 0 or df_ind >= len(df): + return [] + seq_list.append(df.iloc[df_ind, 0]) + return seq_list + + @staticmethod + def prep(data): + """ Remove quotes from the specified columns and convert to string. + + Parameters: + data (Series): Dataframe to process by removing quotes. + + Returns: Series + Notes: + - Replacement is done in place. + """ + + filtered = data.astype(str) + filtered.fillna('n/a').astype(str) + filtered = filtered.str.replace('"', '') + filtered = filtered.str.replace("'", "") + return filtered diff --git a/hed/tools/bids/bids_dataset.py b/hed/tools/bids/bids_dataset.py index d6cd4592c..a5c475107 100644 --- a/hed/tools/bids/bids_dataset.py +++ b/hed/tools/bids/bids_dataset.py @@ -21,8 +21,8 @@ class BidsDataset: """ - def __init__(self, root_path, schema=None, tabular_types=None, - exclude_dirs=['sourcedata', 'derivatives', 'code', 'stimuli']): + def __init__(self, root_path, schema=None, tabular_types=['events'], + exclude_dirs=['sourcedata', 'derivatives', 'code', 'stimuli', 'phenotype']): """ Constructor for a BIDS dataset. Parameters: @@ -30,7 +30,7 @@ def __init__(self, root_path, schema=None, tabular_types=None, schema (HedSchema or HedSchemaGroup): A schema that overrides the one specified in dataset. tabular_types (list or None): List of strings specifying types of tabular types to include. If None or empty, then ['events'] is assumed. - exclude_dirs=['sourcedata', 'derivatives', 'code']: + exclude_dirs=['sourcedata', 'derivatives', 'code', 'phenotype']: """ self.root_path = os.path.realpath(root_path) @@ -42,7 +42,7 @@ def __init__(self, root_path, schema=None, tabular_types=None, self.schema = load_schema_version(self.dataset_description.get("HEDVersion", None)) self.exclude_dirs = exclude_dirs - self.tabular_files = {"participants": BidsFileGroup(root_path, suffix="participants", obj_type="tabular")} + self.tabular_files = {} if not tabular_types: self.tabular_files["events"] = BidsFileGroup(root_path, suffix="events", obj_type="tabular", exclude_dirs=exclude_dirs) diff --git a/hed/tools/remodeling/backup_manager.py b/hed/tools/remodeling/backup_manager.py index 75c6f4f1a..60ecf753c 100644 --- a/hed/tools/remodeling/backup_manager.py +++ b/hed/tools/remodeling/backup_manager.py @@ -224,7 +224,7 @@ def get_task(task_names, file_path): """ Return the task if the file name contains a task_xxx where xxx is in task_names. Parameters: - task_names (list): List of task names (without the task_ prefix). + task_names (list): List of task names (without the `task_` prefix). file_path (str): Path of the filename to be tested. Returns: diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py index 32af02eaf..0761eca5d 100644 --- a/hed/tools/remodeling/cli/run_remodel.py +++ b/hed/tools/remodeling/cli/run_remodel.py @@ -4,8 +4,9 @@ import json import argparse from hed.errors.exceptions import HedFileError -from hed.tools.util.io_util import get_file_list, get_task_from_file +from hed.tools.util.io_util import get_file_list, get_task_from_file, get_task_dict from hed.tools.bids.bids_dataset import BidsDataset +from hed.tools.remodeling.validator import RemodelerValidator from hed.tools.remodeling.dispatcher import Dispatcher from hed.tools.remodeling.backup_manager import BackupManager @@ -109,24 +110,18 @@ def parse_arguments(arg_list=None): print(f"Data directory: {args.data_dir}\nModel path: {args.model_path}") with open(args.model_path, 'r') as fp: operations = json.load(fp) - parsed_operations, errors = Dispatcher.parse_operations(operations) + validator = RemodelerValidator() + errors = validator.validate(operations) if errors: raise ValueError("UnableToFullyParseOperations", - f"Fatal operation error, cannot continue:\n{Dispatcher.errors_to_str(errors)}") + f"Fatal operation error, cannot continue:\n{errors}") return args, operations def parse_tasks(files, task_args): if not task_args: return {"": files} - task_dict = {} - for my_file in files: - task = get_task_from_file(my_file) - if not task: - continue - task_entry = task_dict.get(task, []) - task_entry.append(my_file) - task_dict[task] = task_entry + task_dict = get_task_dict(files) if task_args == "*" or isinstance(task_args, list) and task_args[0] == "*": return task_dict task_dict = {key: task_dict[key] for key in task_args if key in task_dict} diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py index 2bfb90b3f..039b05f27 100644 --- a/hed/tools/remodeling/dispatcher.py +++ b/hed/tools/remodeling/dispatcher.py @@ -22,7 +22,7 @@ def __init__(self, operation_list, data_root=None, """ Constructor for the dispatcher. Parameters: - operation_list (list): List of unparsed operations. + operation_list (list): List of valid unparsed operations. data_root (str or None): Root directory for the dataset. If none, then backups are not made. hed_versions (str, list, HedSchema, or HedSchemaGroup): The HED schema. @@ -42,11 +42,7 @@ def __init__(self, operation_list, data_root=None, raise HedFileError("BackupDoesNotExist", f"Remodeler cannot be run with a dataset without first creating the " f"{self.backup_name} backup for {self.data_root}", "") - op_list, errors = self.parse_operations(operation_list) - if errors: - these_errors = self.errors_to_str(errors, 'Dispatcher failed due to invalid operations') - raise ValueError("InvalidOperationList", f"{these_errors}") - self.parsed_ops = op_list + self.parsed_ops = self.parse_operations(operation_list) self.hed_schema = self.get_schema(hed_versions) self.summary_dicts = {} @@ -183,31 +179,11 @@ def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="s @staticmethod def parse_operations(operation_list): - errors = [] operations = [] for index, item in enumerate(operation_list): - try: - if not isinstance(item, dict): - raise TypeError("InvalidOperationFormat", - f"Each operations must be a dictionary but operation {str(item)} is {type(item)}") - if "operation" not in item: - raise KeyError("MissingOperation", - f"operation {str(item)} does not have a operation key") - if "parameters" not in item: - raise KeyError("MissingParameters", - f"Operation {str(item)} does not have a parameters key") - if item["operation"] not in valid_operations: - raise KeyError("OperationNotListedAsValid", - f"Operation {item['operation']} must be added to operations_list " - f"before it can be executed.") - new_operation = valid_operations[item["operation"]](item["parameters"]) - operations.append(new_operation) - except Exception as ex: - errors.append({"index": index, "item": f"{item}", "error_type": type(ex), - "error_code": ex.args[0], "error_msg": ex.args[1]}) - if errors: - return [], errors - return operations, [] + new_operation = valid_operations[item["operation"]](item["parameters"]) + operations.append(new_operation) + return operations @staticmethod def prep_data(df): diff --git a/hed/tools/remodeling/operations/base_op.py b/hed/tools/remodeling/operations/base_op.py index bc3e906c6..ffcdc4be4 100644 --- a/hed/tools/remodeling/operations/base_op.py +++ b/hed/tools/remodeling/operations/base_op.py @@ -1,71 +1,29 @@ """ Base class for remodeling operations. """ +from abc import ABC, abstractmethod -class BaseOp: - """ Base class for operations. All remodeling operations should extend this class. - - The base class holds the parameters and does basic parameter checking against the operation's specification. - - """ - - def __init__(self, op_spec, parameters): - """ Base class constructor for operations. +class BaseOp(ABC): + """ Base class for operations. All remodeling operations should extend this class.""" + def __init__(self, parameters): + """ Constructor for the BaseOp class. Should be extended by operations. + Parameters: - op_spec (dict): Specification for required and optional parameters. - parameters (dict): Actual values of the parameters for the operation. - - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - - :raises ValueError: - - If the specification is missing a valid operation. - + parameters (dict): A dictionary specifying the appropriate parameters for the operation. """ - self.operation = op_spec.get("operation", "") - if not self.operation: - raise ValueError("OpMustHaveOperation", "Op must have operation is empty") - self.required_params = op_spec.get("required_parameters", {}) - self.optional_params = op_spec.get("optional_parameters", {}) - self.check_parameters(parameters) - - def check_parameters(self, parameters): - """ Verify that the parameters meet the operation specification. - - Parameters: - parameters (dict): Dictionary of parameters for this operation. + self.parameters = parameters - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. + @property + @abstractmethod + def NAME(self): + pass - :raises TypeError: - - If a parameter has the wrong type. - - """ - - required = set(self.required_params.keys()) - required_missing = required.difference(set(parameters.keys())) - if required_missing: - raise KeyError("MissingRequiredParameters", - f"{self.operation} requires parameters {list(required_missing)}") - for param_name, param_value in parameters.items(): - if param_name in self.required_params: - param_type = self.required_params[param_name] - elif param_name in self.optional_params: - param_type = self.optional_params[param_name] - else: - raise KeyError("BadParameter", - f"{param_name} not a required or optional parameter for {self.operation}") - if isinstance(param_type, list): - self._check_list_type(param_value, param_type) - elif not isinstance(param_value, param_type): - raise TypeError("BadType", f"{param_value} has type {type(param_value)} not {param_type}") + @property + @abstractmethod + def PARAMS(self): + pass + @abstractmethod def do_op(self, dispatcher, df, name, sidecar=None): """ Base class method to be overridden by each operation. @@ -78,21 +36,13 @@ def do_op(self, dispatcher, df, name, sidecar=None): """ return df.copy() - + @staticmethod - def _check_list_type(param_value, param_type): - """ Check a parameter value against its specified type. - - Parameters: - param_value (any): The value to be checked. - param_type (any): Class to check the param_value against. - - :raises TypeError: - - If param_value is not an instance of param_type. - - """ - - for this_type in param_type: - if isinstance(param_value, this_type): - return - raise TypeError("BadType", f"{param_value} has type {type(param_value)} which is not in {str(param_type)}") + @abstractmethod + def validate_input_data(parameters): + '''Validates whether operation parameter input data meets specific criteria beyond what can be captured in json schema. + For example, whether two input arrays are the same length. Minimum implementation should return an empty list + to indicate no errors were found. If additional validation is necessary, method should perform the validation and + return a list with user friendly error strings. + ''' + return [] diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index e98a8cce5..3768f9feb 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -1,27 +1,53 @@ """ Convert the type of the specified columns of a tabular file. """ +#TODO finish implementation from hed.tools.remodeling.operations.base_op import BaseOp class ConvertColumnsOp(BaseOp): - """ Convert. + """ Convert data type in column Required remodeling parameters: - **column_names** (*list*): The list of columns to convert. - - **convert_to_** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.) + - **convert_to** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.) + + Optional remodeling parameters: - **decimal_places** (*int*): Number decimal places to keep (for fixed only). - - + """ - + NAME = "convert_columns" + PARAMS = { - "operation": "convert_columns", - "required_parameters": { - "column_names": list, - "convert_to": str + "type": "object", + "properties": { + "column_names": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "convert_to": { + "type": "string", + "enum": ['str', 'int', 'float', 'fixed'], + }, + "decimal_places": { + "type": "integer" + } + }, + "required": [ + "column_names", + "convert_to" + ], + "additionalProperties": False, + "if": { + "properties": { + "convert_to": {"const": "fixed"} + } }, - "optional_parameters": { - "decimal_places": int + "then": { + "required": ["decimal_places"] } } @@ -31,25 +57,11 @@ def __init__(self, parameters): Parameters: parameters (dict): Parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - - :raises ValueError: - - If convert_to is not one of the allowed values. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_names = parameters['column_names'] self.convert_to = parameters['convert_to'] self.decimal_places = parameters.get('decimal_places', None) - self.allowed_types = ['str', 'int', 'float', 'fixed'] - if self.convert_to not in self.allowed_types: - raise ValueError("CannotConvertToSpecifiedType", - f"The convert_to value {self.convert_to} must be one of {str(self.allowed_types)}") def do_op(self, dispatcher, df, name, sidecar=None): """ Convert the specified column to a specified type. @@ -67,3 +79,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): df_new = df.copy() return df_new + + @staticmethod + def validate_input_data(operations): + return [] diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py index e01a81d8b..992b8e8ba 100644 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ b/hed/tools/remodeling/operations/factor_column_op.py @@ -4,27 +4,52 @@ # TODO: Does not handle empty factor names. # TODO: Does not handle optional return columns. +# TODO: Same length factornames and factorvalues class FactorColumnOp(BaseOp): """ Create tabular file factor columns from column values. Required remodeling parameters: - - **column_name** (*str*): The name of a column in the DataFrame. - - **factor_values** (*list*): Values in the column column_name to create factors for. - - **factor_names** (*list*): Names to use as the factor columns. + - **column_name** (*str*): The name of a column in the DataFrame. + Optional remodeling parameters + - **factor_names** (*list*): Names to use as the factor columns. + - **factor_values** (*list*): Values in the column column_name to create factors for. """ + NAME = "factor_column" PARAMS = { - "operation": "factor_column", - "required_parameters": { - "column_name": str, - "factor_values": list, - "factor_names": list + "type": "object", + "properties": { + "column_name": { + "type": "string" + }, + "factor_names": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "factor_values": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + } }, - "optional_parameters": {} + "required": [ + "column_name" + ], + "dependentRequired": { + "factor_names": ["factor_values"] + }, + "additionalProperties": False } def __init__(self, parameters): @@ -33,25 +58,11 @@ def __init__(self, parameters): Parameters: parameters (dict): Parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - - :raises ValueError: - - If factor_names is not empty and is not the same length as factor_values. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_name = parameters['column_name'] self.factor_values = parameters['factor_values'] self.factor_names = parameters['factor_names'] - if self.factor_names and len(self.factor_values) != len(self.factor_names): - raise ValueError("FactorNamesLenBad", - f"The factor_names length {len(self.factor_names)} must be empty or equal" + - f"to the factor_values length {len(self.factor_values)} .") def do_op(self, dispatcher, df, name, sidecar=None): """ Create factor columns based on values in a specified column. @@ -71,11 +82,23 @@ def do_op(self, dispatcher, df, name, sidecar=None): factor_names = self.factor_names if len(factor_values) == 0: factor_values = df[self.column_name].unique() - factor_names = [self.column_name + '.' + str(column_value) for column_value in factor_values] + factor_names = [self.column_name + '.' + + str(column_value) for column_value in factor_values] df_new = df.copy() for index, factor_value in enumerate(factor_values): - factor_index = df_new[self.column_name].map(str).isin([str(factor_value)]) + factor_index = df_new[self.column_name].map( + str).isin([str(factor_value)]) column = factor_names[index] df_new[column] = factor_index.astype(int) return df_new + + @staticmethod + def validate_input_data(parameters): + if parameters.get("factor_names", False): + if len(parameters.get("factor_names")) != len(parameters.get("factor_values")): + return ["The list in factor_names, in the factor_column operation, should have the same number of items as factor_values."] + else: + return [] + else: + return [] diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index c5b2ca08f..d28fa5e83 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -15,27 +15,54 @@ class FactorHedTagsOp(BaseOp): """ Create tabular file factors from tag queries. Required remodeling parameters: - - **queries** (*list*): Queries to be applied successively as filters. - - **query_names** (*list*): Column names for the query factors. - - **remove_types** (*list*): Structural HED tags to be removed. - - **expand_context** (*bool*): Expand the context if True. + - **queries** (*list*): Queries to be applied successively as filters. + + Optional remodeling parameters: + - **expand_context** (*bool*): Expand the context if True. + - **query_names** (*list*): Column names for the query factors. + - **remove_types** (*list*): Structural HED tags to be removed. Notes: - - If factor column names are not provided, *query1*, *query2*, ... are used. + - If query names are not provided, *query1*, *query2*, ... are used. - When the context is expanded, the effect of events for temporal extent is accounted for. - - Context expansion is not implemented in the current version. """ - + NAME = "factor_hed_tags" + PARAMS = { - "operation": "factor_hed_tags", - "required_parameters": { - "queries": list, - "query_names": list, - "remove_types": list + "type": "object", + "properties": { + "queries": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "expand_context": { + "type": "boolean" + }, + "query_names": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "remove_types": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + } }, - "optional_parameters": { - "expand_context": bool - } + "required": [ + "queries" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -44,20 +71,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Actual values of the parameters for the operation. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - - :raises ValueError: - - If the specification is missing a valid operation. - - If the length of query names is not empty and not same length as queries. - - If there are duplicate query names. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.queries = parameters['queries'] self.query_names = parameters['query_names'] self.remove_types = parameters['remove_types'] @@ -93,9 +108,18 @@ def do_op(self, dispatcher, df, name, sidecar=None): event_man = EventManager(input_data, dispatcher.hed_schema) hed_strings, _ = get_assembled(input_data, sidecar, dispatcher.hed_schema, extra_def_dicts=None, join_columns=True, shrink_defs=False, expand_defs=True) - df_factors = search_strings(hed_strings, self.expression_parsers, query_names=self.query_names) + df_factors = search_strings( + hed_strings, self.expression_parsers, query_names=self.query_names) if len(df_factors.columns) > 0: df_list.append(df_factors) df_new = pd.concat(df_list, axis=1) df_new.replace('n/a', np.NaN, inplace=True) return df_new + + @staticmethod + def validate_input_data(parameters): + errors = [] + if parameters.get("query_names", False): + if len(parameters.get("query_names")) != len(parameters.get("queries")): + errors.append("The list in query_names, in the factor_hed_tags operation, should have the same number of items as queries.") + return errors diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index df13e3631..5c7f8885e 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -14,18 +14,33 @@ class FactorHedTypeOp(BaseOp): """ Create tabular file factors from type variables and append to tabular data. Required remodeling parameters: - - **type_tag** (*str*): HED tag used to find the factors (most commonly `condition-variable`). - - **type_values** (*list*): Factor values to include. If empty all values of that type_tag are used. + - **type_tag** (*str*): HED tag used to find the factors (most commonly `condition-variable`). - """ + Optional remodeling parameters: + - **type_values** (*list*): If provided, specifies which factor values to include. + """ + NAME = "factor_hed_type" + PARAMS = { - "operation": "factor_hed_type", - "required_parameters": { - "type_tag": str, - "type_values": list + "type": "object", + "properties": { + "type_tag": { + "type": "string" + }, + "type_values": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + } }, - "optional_parameters": {} + "required": [ + "type_tag" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -34,18 +49,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Actual values of the parameters for the operation. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - - :raises ValueError: - - If the specification is missing a valid operation. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.type_tag = parameters["type_tag"] self.type_values = parameters["type_values"] @@ -68,12 +73,18 @@ def do_op(self, dispatcher, df, name, sidecar=None): input_data = TabularInput(df, sidecar=sidecar, name=name) df_list = [input_data.dataframe.copy()] - var_manager = HedTypeManager(EventManager(input_data, dispatcher.hed_schema)) + var_manager = HedTypeManager( + EventManager(input_data, dispatcher.hed_schema)) var_manager.add_type(self.type_tag.lower()) - df_factors = var_manager.get_factor_vectors(self.type_tag, self.type_values, factor_encoding="one-hot") + df_factors = var_manager.get_factor_vectors( + self.type_tag, self.type_values, factor_encoding="one-hot") if len(df_factors.columns) > 0: df_list.append(df_factors) df_new = pd.concat(df_list, axis=1) df_new.replace('n/a', np.NaN, inplace=True) return df_new + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index 9ce7a16d7..89459b302 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -10,21 +10,47 @@ class MergeConsecutiveOp(BaseOp): Required remodeling parameters: - **column_name** (*str*): name of column whose consecutive values are to be compared (the merge column). - **event_code** (*str* or *int* or *float*): the particular value in the match column to be merged. - - **match_columns** (*list*): A list of columns whose values have to be matched for two events to be the same. - **set_durations** (*bool*): If true, set the duration of the merged event to the extent of the merged events. - - **ignore_missing** (*bool*): If true, missing match_columns are ignored. + - **ignore_missing** (*bool*): If true, missing match_columns are ignored. + + Optional remodeling parameters: + - **match_columns** (*list*): A list of columns whose values have to be matched for two events to be the same. """ + NAME = "merge_consecutive" + PARAMS = { - "operation": "merge_consecutive", - "required_parameters": { - "column_name": str, - "event_code": [str, int, float], - "match_columns": list, - "set_durations": bool, - "ignore_missing": bool + "type": "object", + "properties": { + "column_name": { + "type": "string" + }, + "event_code": { + "type": [ + "string", + "number" + ] + }, + "match_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "set_durations": { + "type": "boolean" + }, + "ignore_missing": { + "type": "boolean" + } }, - "optional_parameters": {} + "required": [ + "column_name", + "event_code", + "set_durations", + "ignore_missing" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -33,25 +59,11 @@ def __init__(self, parameters): Parameters: parameters (dict): Actual values of the parameters for the operation. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - - :raises ValueError: - - If the specification is missing a valid operation. - - If one of the match column is the merge column. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_name = parameters["column_name"] self.event_code = parameters["event_code"] self.match_columns = parameters["match_columns"] - if self.column_name in self.match_columns: - raise ValueError("MergeColumnCannotBeMatchColumn", - f"Column {self.column_name} cannot be one of the match columns: {str(self.match_columns)}") self.set_durations = parameters["set_durations"] self.ignore_missing = parameters["ignore_missing"] @@ -90,7 +102,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): raise ValueError("MissingMatchColumns", f"{name}: {str(missing)} columns are unmatched by data columns" f"[{str(df.columns)}] and not ignored") - match_columns = list(set(self.match_columns).intersection(set(df.columns))) + match_columns = list( + set(self.match_columns).intersection(set(df.columns))) df_new = df.copy() code_mask = df_new[self.column_name] == self.event_code @@ -140,8 +153,19 @@ def _update_durations(df_new, remove_groups): remove_df = pd.DataFrame(remove_groups, columns=["remove"]) max_groups = max(remove_groups) for index in range(max_groups): - df_group = df_new.loc[remove_df["remove"] == index + 1, ["onset", "duration"]] + df_group = df_new.loc[remove_df["remove"] + == index + 1, ["onset", "duration"]] max_group = df_group.sum(axis=1, skipna=True).max() anchor = df_group.index[0] - 1 - max_anchor = df_new.loc[anchor, ["onset", "duration"]].sum(skipna=True).max() - df_new.loc[anchor, "duration"] = max(max_group, max_anchor) - df_new.loc[anchor, "onset"] + max_anchor = df_new.loc[anchor, [ + "onset", "duration"]].sum(skipna=True).max() + df_new.loc[anchor, "duration"] = max( + max_group, max_anchor) - df_new.loc[anchor, "onset"] + + @staticmethod + def validate_input_data(parameters): + errors = [] + if parameters.get("match_columns", False): + if parameters.get("column_name") in parameters.get("match_columns"): + errors.append("The column_name in the merge_consecutive operation cannot be specified as a match_column.") + return errors diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py index d3a5467db..1a2bd1fa3 100644 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ b/hed/tools/remodeling/operations/number_groups_op.py @@ -8,47 +8,78 @@ class NumberGroupsOp(BaseOp): """ Implementation in progress. """ - + NAME = "number_groups" + PARAMS = { - "operation": "number_groups", - "required_parameters": { - "number_column_name": str, - "source_column": str, - "start": dict, - "stop": dict + "type": "object", + "properties": { + "number_column_name": { + "type": "string" + }, + "source_column": { + "type": "string" + }, + "start": { + "type": "object", + "properties": { + "values": { + "type": "array" + }, + "inclusion": { + "type": "string", + "enum": [ + "include", + "exclude" + ] + } + }, + "required": [ + "values", + "inclusion" + ], + "additionalProperties": False + }, + "stop": { + "type": "object", + "properties": { + "values": { + "type": "array" + }, + "inclusion": { + "type": "string", + "enum": [ + "include", + "exclude" + ] + } + }, + "required": [ + "values", + "inclusion" + ], + "additionalProperties": False + }, + "overwrite": { + "type": "boolean" + } }, - "optional_parameters": {"overwrite": bool} + "required": [ + "number_column_name", + "source_column", + "start", + "stop" + ], + "additionalProperties": False } def __init__(self, parameters): - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.number_column_name = parameters['number_column_name'] self.source_column = parameters['source_column'] self.start = parameters['start'] self.stop = parameters['stop'] self.start_stop_test = {"values": list, "inclusion": str} self.inclusion_test = ["include", "exclude"] - - required = set(self.start_stop_test.keys()) - for param_to_test in [self.start, self.stop]: - required_missing = required.difference(set(param_to_test.keys())) - if required_missing: - raise KeyError("MissingRequiredParameters", - f"Specified {param_to_test} for number_rows requires parameters" - f"{list(required_missing)}") - for param_name, param_value in param_to_test.items(): - param_type = str - if param_name in required: - param_type = self.start_stop_test[param_name] - else: - raise KeyError("BadParameter", - f"{param_name} not a required or optional parameter for {self.operation}") - # TODO: This has a syntax error - # if not isinstance(param_value, param_type): - # raise TypeError("BadType" f"{param_value} has type {type(param_value)} not {param_type}") - if (param_name == 'inclusion') & (param_value not in self.inclusion_test): - raise ValueError("BadValue" f" {param_name} must be one of {self.inclusion_test} not {param_value}") - self.overwrite = parameters.get('overwrite', False) def do_op(self, dispatcher, df, name, sidecar=None): @@ -104,3 +135,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): # df_new.loc[group, self.number_column_name] = i + 1 return df_new + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/number_rows_op.py b/hed/tools/remodeling/operations/number_rows_op.py index e37b180fb..c2b38a08a 100644 --- a/hed/tools/remodeling/operations/number_rows_op.py +++ b/hed/tools/remodeling/operations/number_rows_op.py @@ -7,36 +7,49 @@ class NumberRowsOp(BaseOp): """ Implementation in progress. """ + NAME = "number_rows" + PARAMS = { - "operation": "number_rows", - "required_parameters": { - "number_column_name": str + "type": "object", + "properties": { + "number_column_name": { + "type": "string" + }, + "overwrite": { + "type": "boolean" + }, + "match_value": { + "type": "object", + "properties": { + "column": { + "type": "string" + }, + "value": { + "type": [ + "string", + "number" + ] + } + }, + "required": [ + "column", + "value" + ], + "additionalProperties": False + } }, - "optional_parameters": {"overwrite": bool, "match_value": dict} + "required": [ + "number_column_name" + ], + "additionalProperties": False } def __init__(self, parameters): - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.number_column_name = parameters['number_column_name'] self.overwrite = parameters.get('overwrite', False) self.match_value = parameters.get('match_value', False) - if self.match_value: - self.match_value_params = {"column": str, "value": str} - required = set(self.match_value_params.keys()) - required_missing = required.difference(set(self.match_value.keys())) - if required_missing: - raise KeyError("MissingRequiredParameters", - f"Specified match_value for number_rows requires parameters {list(required_missing)}") - for param_name, param_value in self.match_value.items(): - if param_name in required: - param_type = self.match_value_params[param_name] - else: - raise KeyError("BadParameter", - f"{param_name} not a required or optional parameter for {self.operation}") - # TODO: this has a syntax error - # if not isinstance(param_value, param_type): - # raise TypeError("BadType" f"{param_value} has type {type(param_value)} not {param_type}") - + def do_op(self, dispatcher, df, name, sidecar=None): """ Add numbers events dataframe. @@ -74,3 +87,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): # df_new[self.number_column_name] = df_new.index + 1 return df_new + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index c83315795..176218be7 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -24,18 +24,59 @@ class RemapColumnsOp(BaseOp): TODO: Allow wildcards """ + NAME = "remap_columns" PARAMS = { - "operation": "remap_columns", - "required_parameters": { - "source_columns": list, - "destination_columns": list, - "map_list": list, - "ignore_missing": bool + "type": "object", + "properties": { + "source_columns": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "destination_columns": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "map_list": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + }, + "minItems" : 1 + }, + "minItems": 1, + "uniqueItems": True + }, + "ignore_missing": { + "type": "boolean" + }, + "integer_sources": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + } }, - "optional_parameters": { - "integer_sources": list - } + "required": [ + "source_columns", + "destination_columns", + "map_list", + "ignore_missing" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -44,52 +85,32 @@ def __init__(self, parameters): Parameters: parameters (dict): Parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - - :raises ValueError: - - If an integer column is not a key column. - - If a column designated as an integer source does not have valid integers. - - If no source columns are specified. - - If no destination columns are specified. - - If a map_list entry has the wrong number of items (source columns + destination columns). - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.source_columns = parameters['source_columns'] self.integer_sources = [] self.string_sources = self.source_columns if "integer_sources" in parameters: - self.integer_sources = parameters['integer_sources'] - if not set(self.integer_sources).issubset(set(self.source_columns)): - raise ValueError("IntegerSourceColumnsInvalid", - f"Integer courses {str(self.integer_sources)} must be in {str(self.source_columns)}") - self.string_sources = list(set(self.source_columns).difference(set(self.integer_sources))) + self.string_sources = list( + set(self.source_columns).difference(set(self.integer_sources))) self.destination_columns = parameters['destination_columns'] self.map_list = parameters['map_list'] self.ignore_missing = parameters['ignore_missing'] - if len(self.source_columns) < 1: - raise ValueError("EmptySourceColumns", - f"The source column list {str(self.source_columns)} must be non-empty") - - if len(self.destination_columns) < 1: - raise ValueError("EmptyDestinationColumns", - f"The destination column list {str(self.destination_columns)} must be non-empty") - entry_len = len(self.source_columns) + len(self.destination_columns) - for index, item in enumerate(self.map_list): - if len(item) != entry_len: - raise ValueError("BadColumnMapEntry", - f"Map list entry {index} has {len(item)} elements, but must have {entry_len} elements") self.key_map = self._make_key_map() def _make_key_map(self): - key_df = pd.DataFrame(self.map_list, columns=self.source_columns+self.destination_columns) - key_map = KeyMap(self.source_columns, target_cols=self.destination_columns, name="remap") + """ + + :raises ValueError: + - If a column designated as an integer source does not have valid integers. + + """ + + key_df = pd.DataFrame( + self.map_list, columns=self.source_columns+self.destination_columns) + key_map = KeyMap(self.source_columns, + target_cols=self.destination_columns, name="remap") key_map.update(key_df) return key_map @@ -110,7 +131,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): """ df1 = df.copy() - df1[self.source_columns] = df1[self.source_columns].replace(np.NaN, 'n/a') + df1[self.source_columns] = df1[self.source_columns].replace( + np.NaN, 'n/a') for column in self.integer_sources: int_mask = df1[column] != 'n/a' df1.loc[int_mask, column] = df1.loc[int_mask, column].astype(int) @@ -120,3 +142,16 @@ def do_op(self, dispatcher, df, name, sidecar=None): raise ValueError("MapSourceValueMissing", f"{name}: Ignore missing is false, but source values [{missing}] are in data but not map") return df_new + + @staticmethod + def validate_input_data(parameters): + errors = [] + if len(set([len(x) for x in parameters.get("map_list")])) != 1: + errors.append("The lists specified in the map_list parameter in the remap_columns operation should all have the same length.") + else: + if (len(parameters.get('source_columns')) + len(parameters.get("destination_columns"))) != len(parameters.get("map_list")[0]): + errors.append("The lists specified in the map_list parameter in the remap_columns operation should have a length equal to the number of source columns + the number of destination columns.") + if parameters.get("integer_sources", False): + if not all([(x in parameters.get("source_columns")) for x in parameters.get("integer_sources")]): + errors.append("All integer_sources in the remap_columns operation should be source_columns.") + return errors diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index 6901b6ce5..a20015d48 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -10,14 +10,28 @@ class RemoveColumnsOp(BaseOp): - **ignore_missing** (*boolean*): If true, names in column_names that are not columns in df should be ignored. """ - + NAME = "remove_columns" + PARAMS = { - "operation": "remove_columns", - "required_parameters": { - "column_names": list, - "ignore_missing": bool + "type": "object", + "properties": { + "column_names": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "ignore_missing": { + "type": "boolean" + } }, - "optional_parameters": {} + "required": [ + "column_names", + "ignore_missing" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -26,15 +40,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_names = parameters['column_names'] ignore_missing = parameters['ignore_missing'] if ignore_missing: @@ -65,3 +72,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): raise KeyError("MissingColumnCannotBeRemoved", f"{name}: Ignore missing is False but a column in {str(self.column_names)} is " f"not in the data columns [{str(df_new.columns)}]") + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index 217fb7934..181f70d15 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -11,14 +11,31 @@ class RemoveRowsOp(BaseOp): - **remove_values** (*list*): The values to test for row removal. """ - + NAME = "remove_rows" + PARAMS = { - "operation": "remove_rows", - "required_parameters": { - "column_name": str, - "remove_values": list + "type": "object", + "properties": { + "column_name": { + "type": "string" + }, + "remove_values": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + }, + "minItems": 1, + "uniqueItems": True + } }, - "optional_parameters": {} + "required": [ + "column_name", + "remove_values" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -27,15 +44,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_name = parameters["column_name"] self.remove_values = parameters["remove_values"] @@ -58,3 +68,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): for value in self.remove_values: df_new = df_new.loc[df_new[self.column_name] != value, :] return df_new + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index 0a3329b0c..160427b81 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -7,18 +7,33 @@ class RenameColumnsOp (BaseOp): """ Rename columns in a tabular file. Required remodeling parameters: - - **column_mapping** (*dict*): The names of the columns to be removed. + - **column_mapping** (*dict*): The names of the columns to be renamed. - **ignore_missing** (*bool*): If true, the names in column_mapping that are not columns and should be ignored. """ - + NAME = "rename_columns" + PARAMS = { - "operation": "rename_columns", - "required_parameters": { - "column_mapping": dict, - "ignore_missing": bool + "type": "object", + "properties": { + "column_mapping": { + "type": "object", + "patternProperties": { + ".*": { + "type": "string" + } + }, + "minProperties": 1 + }, + "ignore_missing": { + "type": "boolean" + } }, - "optional_parameters": {} + "required": [ + "column_mapping", + "ignore_missing" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -27,15 +42,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_mapping = parameters['column_mapping'] if parameters['ignore_missing']: self.error_handling = 'ignore' @@ -65,3 +73,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): raise KeyError("MappedColumnsMissingFromData", f"{name}: ignore_missing is False, mapping columns [{self.column_mapping}]" f" but df columns are [{str(df.columns)}") + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index 91fcfcc30..becf66e04 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -11,15 +11,32 @@ class ReorderColumnsOp(BaseOp): - keep_others (*bool*): If true, columns not in column_order are placed at end. """ - + NAME = "reorder_columns" + PARAMS = { - "operation": "reorder_columns", - "required_parameters": { - "column_order": list, - "ignore_missing": bool, - "keep_others": bool + "type": "object", + "properties": { + "column_order": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "ignore_missing": { + "type": "boolean" + }, + "keep_others": { + "type": "boolean" + } }, - "optional_parameters": {} + "required": [ + "column_order", + "ignore_missing", + "keep_others" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -28,15 +45,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_order = parameters['column_order'] self.ignore_missing = parameters['ignore_missing'] self.keep_others = parameters['keep_others'] @@ -59,15 +69,21 @@ def do_op(self, dispatcher, df, name, sidecar=None): """ df_new = df.copy() current_columns = list(df_new.columns) - missing_columns = set(self.column_order).difference(set(df_new.columns)) + missing_columns = set(self.column_order).difference( + set(df_new.columns)) ordered = self.column_order if missing_columns and not self.ignore_missing: raise ValueError("MissingReorderedColumns", f"{str(missing_columns)} are not in dataframe columns " f" [{str(df_new.columns)}] and not ignored.") elif missing_columns: - ordered = [elem for elem in self.column_order if elem not in list(missing_columns)] + ordered = [ + elem for elem in self.column_order if elem not in list(missing_columns)] if self.keep_others: ordered += [elem for elem in current_columns if elem not in ordered] df_new = df_new.loc[:, ordered] return df_new + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index ea0b5dc13..04dbb65df 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -14,15 +14,68 @@ class SplitRowsOp(BaseOp): - **remove_parent_row** (*bool*): If true, the original row that was split is removed. """ + NAME = "split_rows" PARAMS = { - "operation": "split_rows", - "required_parameters": { - "anchor_column": str, - "new_events": dict, - "remove_parent_row": bool + "type": "object", + "properties": { + "anchor_column": { + "type": "string" + }, + "new_events": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "onset_source": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + }, + "minItems": 1 + }, + "duration": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + }, + "minItems": 1 + }, + "copy_columns": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + } + }, + "required": [ + "onset_source", + "duration" + ], + "additionalProperties": False + } + }, + "minProperties": 1 + }, + "remove_parent_row": { + "type": "boolean" + } }, - "optional_parameters": {} + "required": [ + "anchor_column", + "new_events", + "remove_parent_row" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -31,15 +84,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.anchor_column = parameters['anchor_column'] self.new_events = parameters['new_events'] self.remove_parent_row = parameters['remove_parent_row'] @@ -85,7 +131,8 @@ def _split_rows(self, df, df_list): """ for event, event_parms in self.new_events.items(): add_events = pd.DataFrame([], columns=df.columns) - add_events['onset'] = self._create_onsets(df, event_parms['onset_source']) + add_events['onset'] = self._create_onsets( + df, event_parms['onset_source']) add_events[self.anchor_column] = event self._add_durations(df, add_events, event_parms['duration']) if len(event_parms['copy_columns']) > 0: @@ -103,7 +150,8 @@ def _add_durations(df, add_events, duration_sources): if isinstance(duration, float) or isinstance(duration, int): add_events['duration'] = add_events['duration'].add(duration) elif isinstance(duration, str) and duration in list(df.columns): - add_events['duration'] = add_events['duration'].add(pd.to_numeric(df[duration], errors='coerce')) + add_events['duration'] = add_events['duration'].add( + pd.to_numeric(df[duration], errors='coerce')) else: raise TypeError("BadDurationInModel", f"Remodeling duration {str(duration)} must either be numeric or a column name", "") @@ -134,3 +182,7 @@ def _create_onsets(df, onset_source): raise TypeError("BadOnsetInModel", f"Remodeling onset {str(onset)} must either be numeric or a column name.", "") return onsets + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index 5770a6185..f267eb439 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -9,22 +9,35 @@ class SummarizeColumnNamesOp(BaseOp): """ Summarize the column names in a collection of tabular files. Required remodeling parameters: - - **summary_name** (*str*) The name of the summary. - - **summary_filename** (*str*) Base filename of the summary. + - **summary_name** (*str*): The name of the summary. + - **summary_filename** (*str*): Base filename of the summary. + + Optional remodeling parameters: + - **append_timecode** (*bool*): If false (default), the timecode is not appended to the base filename when summary is saved, otherwise it is. The purpose is to check that all the tabular files have the same columns in same order. """ - + NAME = "summarize_column_names" + PARAMS = { - "operation": "summarize_column_names", - "required_parameters": { - "summary_name": str, - "summary_filename": str + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "append_timecode": { + "type": "boolean" + } }, - "optional_parameters": { - "append_timecode": bool - } + "required": [ + "summary_name", + "summary_filename" + ], + "additionalProperties": False } SUMMARY_TYPE = "column_names" @@ -35,15 +48,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.append_timecode = parameters.get('append_timecode', False) @@ -69,8 +75,13 @@ def do_op(self, dispatcher, df, name, sidecar=None): if not summary: summary = ColumnNamesSummary(self) dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary({"name": name, "column_names": list(df_new.columns)}) + summary.update_summary( + {"name": name, "column_names": list(df_new.columns)}) return df_new + + @staticmethod + def validate_input_data(parameters): + return [] class ColumnNamesSummary(BaseSummary): diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 4c4401881..40518c414 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -10,30 +10,60 @@ class SummarizeColumnValuesOp(BaseOp): Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. - - **skip_columns** (*list*): Names of columns to skip in the summary. - - **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns. + - **summary_filename** (*str*): Base filename of the summary. - Optional remodeling parameters: - - **max_categorical** (*int*): Maximum number of unique values to include in summary for a categorical column. + Optional remodeling parameters: + - **append_timecode** (*bool*): If false (default), the timecode is not appended to the base filename when summary is saved, otherwise it is. + - **max_categorical** (*int*): Maximum number of unique values to include in summary for a categorical column. + - **skip_columns** (*list*): Names of columns to skip in the summary. + - **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns. + - **values_per_line** (*int*): The number of values output per line in the summary. The purpose is to produce a summary of the values in a tabular file. """ - + NAME = "summarize_column_values" + PARAMS = { - "operation": "summarize_column_values", - "required_parameters": { - "summary_name": str, - "summary_filename": str, - "skip_columns": list, - "value_columns": list + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "append_timecode": { + "type": "boolean" + }, + "max_categorical": { + "type": "integer" + }, + "skip_columns": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "value_columns": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "values_per_line": { + "type": "integer" + } }, - "optional_parameters": { - "append_timecode": bool, - "max_categorical": int, - "values_per_line": int - } + "required": [ + "summary_name", + "summary_filename" + ], + "additionalProperties": False } SUMMARY_TYPE = 'column_values' @@ -46,23 +76,17 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.skip_columns = parameters['skip_columns'] self.value_columns = parameters['value_columns'] self.append_timecode = parameters.get('append_timecode', False) self.max_categorical = parameters.get('max_categorical', float('inf')) - self.values_per_line = parameters.get('values_per_line', self.VALUES_PER_LINE) + self.values_per_line = parameters.get( + 'values_per_line', self.VALUES_PER_LINE) def do_op(self, dispatcher, df, name, sidecar=None): """ Create a summary of the column values in df. @@ -86,9 +110,14 @@ def do_op(self, dispatcher, df, name, sidecar=None): if not summary: summary = ColumnValueSummary(self) dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name}) + summary.update_summary( + {'df': dispatcher.post_proc_data(df_new), 'name': name}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class ColumnValueSummary(BaseSummary): @@ -109,7 +138,8 @@ def update_summary(self, new_info): name = new_info['name'] if name not in self.summary_dict: self.summary_dict[name] = \ - TabularSummary(value_cols=self.op.value_columns, skip_cols=self.op.skip_columns, name=name) + TabularSummary(value_cols=self.op.value_columns, + skip_cols=self.op.skip_columns, name=name) self.summary_dict[name].update(new_info['df']) def get_details_dict(self, summary): @@ -123,11 +153,14 @@ def get_details_dict(self, summary): """ this_summary = summary.get_summary(as_json=False) - unique_counts = [(key, len(count_dict)) for key, count_dict in this_summary['Categorical columns'].items()] + unique_counts = [(key, len(count_dict)) for key, + count_dict in this_summary['Categorical columns'].items()] this_summary['Categorical counts'] = dict(unique_counts) for key, dict_entry in this_summary['Categorical columns'].items(): - num_disp, sorted_tuples = ColumnValueSummary.sort_dict(dict_entry, reverse=True) - this_summary['Categorical columns'][key] = dict(sorted_tuples[:min(num_disp, self.op.max_categorical)]) + num_disp, sorted_tuples = ColumnValueSummary.sort_dict( + dict_entry, reverse=True) + this_summary['Categorical columns'][key] = dict( + sorted_tuples[:min(num_disp, self.op.max_categorical)]) return {"Name": this_summary['Name'], "Total events": this_summary["Total events"], "Total files": this_summary['Total files'], "Files": list(this_summary['Files'].keys()), @@ -144,7 +177,8 @@ def merge_all_info(self): TabularSummary - the summary object for column values. """ - all_sum = TabularSummary(value_cols=self.op.value_columns, skip_cols=self.op.skip_columns, name='Dataset') + all_sum = TabularSummary( + value_cols=self.op.value_columns, skip_cols=self.op.skip_columns, name='Dataset') for counts in self.summary_dict.values(): all_sum.update_summary(counts) return all_sum @@ -190,10 +224,13 @@ def _get_categorical_string(self, result, offset="", indent=" "): if not cat_dict: return "" count_dict = result['Categorical counts'] - sum_list = [f"{offset}{indent}Categorical column values[Events, Files]:"] + sum_list = [ + f"{offset}{indent}Categorical column values[Events, Files]:"] sorted_tuples = sorted(cat_dict.items(), key=lambda x: x[0]) for entry in sorted_tuples: - sum_list = sum_list + self._get_categorical_col(entry, count_dict, offset="", indent=" ") + sum_list = sum_list + \ + self._get_categorical_col( + entry, count_dict, offset="", indent=" ") return "\n".join(sum_list) def _get_detail_list(self, result, indent=BaseSummary.DISPLAY_INDENT): @@ -209,12 +246,14 @@ def _get_detail_list(self, result, indent=BaseSummary.DISPLAY_INDENT): """ sum_list = [] specifics = result["Specifics"] - cat_string = self._get_categorical_string(specifics, offset="", indent=indent) + cat_string = self._get_categorical_string( + specifics, offset="", indent=indent) if cat_string: sum_list.append(cat_string) val_dict = specifics.get("Value column summaries", {}) if val_dict: - sum_list.append(ColumnValueSummary._get_value_string(val_dict, offset="", indent=indent)) + sum_list.append(ColumnValueSummary._get_value_string( + val_dict, offset="", indent=indent)) return sum_list def _get_categorical_col(self, entry, count_dict, offset="", indent=" "): @@ -236,7 +275,8 @@ def _get_categorical_col(self, entry, count_dict, offset="", indent=" "): # Create and partition the list of individual entries value_list = [f"{item[0]}{str(item[1])}" for item in entry[1].items()] value_list = value_list[:num_disp] - part_list = ColumnValueSummary.partition_list(value_list, self.op.values_per_line) + part_list = ColumnValueSummary.partition_list( + value_list, self.op.values_per_line) return col_list + [f"{offset}{indent * 3}{ColumnValueSummary.get_list_str(item)}" for item in part_list] @staticmethod @@ -266,5 +306,6 @@ def _get_value_string(val_dict, offset="", indent=""): @staticmethod def sort_dict(count_dict, reverse=False): - sorted_tuples = sorted(count_dict.items(), key=lambda x: x[1][0], reverse=reverse) + sorted_tuples = sorted( + count_dict.items(), key=lambda x: x[1][0], reverse=reverse) return len(sorted_tuples), sorted_tuples diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 28b0c6e55..99b06582a 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -11,21 +11,34 @@ class SummarizeDefinitionsOp(BaseOp): Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. + - **summary_filename** (*str*): Base filename of the summary. + + Optional remodeling parameters: + - **append_timecode** (*bool*): If false (default), the timecode is not appended to the base filename when summary is saved, otherwise it is. The purpose is to produce a summary of the values in a tabular file. """ - + NAME = "summarize_definitions" + PARAMS = { - "operation": "summarize_definitions", - "required_parameters": { - "summary_name": str, - "summary_filename": str + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "append_timecode": { + "type": "boolean" + } }, - "optional_parameters": { - "append_timecode": bool - } + "required": [ + "summary_name", + "summary_filename" + ], + "additionalProperties": False } SUMMARY_TYPE = 'type_defs' @@ -36,14 +49,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.append_timecode = parameters.get('append_timecode', False) @@ -71,11 +78,16 @@ def do_op(self, dispatcher, df, name, sidecar=None): 'schema': dispatcher.hed_schema}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class DefinitionSummary(BaseSummary): def __init__(self, sum_op, hed_schema, known_defs=None): super().__init__(sum_op) - self.def_gatherer = DefExpandGatherer(hed_schema, known_defs=known_defs) + self.def_gatherer = DefExpandGatherer( + hed_schema, known_defs=known_defs) def update_summary(self, new_info): """ Update the summary for a given tabular input file. @@ -87,8 +99,10 @@ def update_summary(self, new_info): - The summary needs a "name" str, a "schema" and a "Sidecar". """ - data_input = TabularInput(new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) - series, def_dict = data_input.series_a, data_input.get_def_dict(new_info['schema']) + data_input = TabularInput( + new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) + series, def_dict = data_input.series_a, data_input.get_def_dict( + new_info['schema']) self.def_gatherer.process_def_expands(series, def_dict) @staticmethod @@ -101,8 +115,10 @@ def _build_summary_dict(items_dict, title, process_func, display_description=Fal if "#" in str(value): key = key + "/#" if display_description: - description, value = DefinitionSummary._remove_description(value) - items[key] = {"description": description, "contents": str(value)} + description, value = DefinitionSummary._remove_description( + value) + items[key] = {"description": description, + "contents": str(value)} elif isinstance(value, list): items[key] = [str(x) for x in value] else: @@ -124,7 +140,8 @@ def get_details_dict(self, def_gatherer): display_description=True) ambiguous_defs_summary = self._build_summary_dict(def_gatherer.ambiguous_defs, "Ambiguous Definitions", def_gatherer.get_ambiguous_group) - errors_summary = self._build_summary_dict(def_gatherer.errors, "Errors", None) + errors_summary = self._build_summary_dict( + def_gatherer.errors, "Errors", None) known_defs_summary.update(ambiguous_defs_summary) known_defs_summary.update(errors_summary) @@ -166,7 +183,8 @@ def _nested_dict_to_string(data, indent, level=1): for key, value in data.items(): if isinstance(value, dict): result.append(f"{indent * level}{key}: {len(value)} items") - result.append(DefinitionSummary._nested_dict_to_string(value, indent, level + 1)) + result.append(DefinitionSummary._nested_dict_to_string( + value, indent, level + 1)) elif isinstance(value, list): result.append(f"{indent * level}{key}:") for item in value: diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 6e98abbed..7682f4e88 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -18,27 +18,66 @@ class SummarizeHedTagsOp(BaseOp): - **tags** (*dict*): Specifies how to organize the tag output. Optional remodeling parameters: - - **expand_context** (*bool*): If True, include counts from expanded context (not supported). + - **append_timecode** (*bool*): If True, the timecode is appended to the base filename when summary is saved. + - **include_context** (*bool*): If True, context of events is included in summary. + - **remove_types** (*list*): A list of type tags, such as Condition-variable or Task, to be excluded from the summary. + - **replace_defs** (*bool*): If True, the def tag is replaced by the contents of the definitions. The purpose of this op is to produce a summary of the occurrences of hed tags organized in a specified manner. The """ - + NAME = "summarize_hed_tags" + PARAMS = { - "operation": "summarize_hed_tags", - "required_parameters": { - "summary_name": str, - "summary_filename": str, - "tags": dict + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "tags": { + "type": "object", + "patternProperties": { + ".*": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "minProperties": 1, + "additionalProperties": False + } + }, + "append_timecode": { + "type": "boolean" + }, + "include_context": { + "type": "boolean" + }, + "remove_types": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "replace_defs": { + "type": "boolean" + } }, - "optional_parameters": { - "append_timecode": bool, - "include_context": bool, - "replace_defs": bool, - "remove_types": list - } + "required": [ + "summary_name", + "summary_filename", + "tags" + ], + "additionalProperties": False } SUMMARY_TYPE = "hed_tag_summary" @@ -48,23 +87,17 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - + """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.tags = parameters['tags'] self.append_timecode = parameters.get('append_timecode', False) self.include_context = parameters.get('include_context', True) self.replace_defs = parameters.get("replace_defs", True) - self.remove_types = parameters.get("remove_types", ["Condition-variable", "Task"]) + self.remove_types = parameters.get( + "remove_types", ["Condition-variable", "Task"]) def do_op(self, dispatcher, df, name, sidecar=None): """ Summarize the HED tags present in the dataset. @@ -93,6 +126,10 @@ def do_op(self, dispatcher, df, name, sidecar=None): 'schema': dispatcher.hed_schema, 'sidecar': sidecar}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class HedTagSummary(BaseSummary): @@ -110,11 +147,13 @@ def update_summary(self, new_info): - The summary needs a "name" str, a "schema", a "df, and a "Sidecar". """ - counts = HedTagCounts(new_info['name'], total_events=len(new_info['df'])) - input_data = TabularInput(new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) - tag_man = HedTagManager(EventManager(input_data, new_info['schema']), + counts = HedTagCounts( + new_info['name'], total_events=len(new_info['df'])) + input_data = TabularInput( + new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) + tag_man = HedTagManager(EventManager(input_data, new_info['schema']), remove_types=self.sum_op.remove_types) - hed_objs = tag_man.get_hed_objs(include_context=self.sum_op.include_context, + hed_objs = tag_man.get_hed_objs(include_context=self.sum_op.include_context, replace_defs=self.sum_op.replace_defs) for hed in hed_objs: counts.update_event_counts(hed, new_info['name']) @@ -190,7 +229,8 @@ def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): """ sum_list = [f"Dataset: Total events={result.get('Total events', 0)} " f"Total files={len(result.get('Files', 0))}"] - sum_list = sum_list + HedTagSummary._get_tag_list(result, indent=indent) + sum_list = sum_list + \ + HedTagSummary._get_tag_list(result, indent=indent) return "\n".join(sum_list) @staticmethod @@ -206,14 +246,16 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): """ sum_list = [f"Total events={result.get('Total events', 0)}"] - sum_list = sum_list + HedTagSummary._get_tag_list(result, indent=indent) + sum_list = sum_list + \ + HedTagSummary._get_tag_list(result, indent=indent) return "\n".join(sum_list) @staticmethod def _tag_details(tags): tag_list = [] for tag in tags: - tag_list.append(f"{tag['tag']}[{tag['events']},{len(tag['files'])}]") + tag_list.append( + f"{tag['tag']}[{tag['events']},{len(tag['files'])}]") return tag_list @staticmethod @@ -223,10 +265,12 @@ def _get_tag_list(result, indent=BaseSummary.DISPLAY_INDENT): for category, tags in tag_info['Main tags'].items(): sum_list.append(f"{indent}{indent}{category}:") if tags: - sum_list.append(f"{indent}{indent}{indent}{' '.join(HedTagSummary._tag_details(tags))}") + sum_list.append( + f"{indent}{indent}{indent}{' '.join(HedTagSummary._tag_details(tags))}") if tag_info['Other tags']: sum_list.append(f"{indent}Other tags[events,files]:") - sum_list.append(f"{indent}{indent}{' '.join(HedTagSummary._tag_details(tag_info['Other tags']))}") + sum_list.append( + f"{indent}{indent}{' '.join(HedTagSummary._tag_details(tag_info['Other tags']))}") return sum_list @staticmethod @@ -236,3 +280,7 @@ def _get_details(key_list, template, verbose=False): for tag_cnt in template[item.lower()]: key_details.append(tag_cnt.get_info(verbose=verbose)) return key_details + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 6aaa4c7ea..364c3d91f 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -15,23 +15,40 @@ class SummarizeHedTypeOp(BaseOp): Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - **summary_filename** (*str*): Base filename of the summary. - - **type_tag** (*str*):Type tag to get_summary (e.g. `condition-variable` or `task` tags). + - **type_tag** (*str*):Type tag to get_summary (e.g. `condition-variable` or `task` tags). + + Optional remodeling parameters: + - **append_timecode** (*bool*): If true, the timecode is appended to the base filename when summary is saved The purpose of this op is to produce a summary of the occurrences of specified tag. This summary is often used with `condition-variable` to produce a summary of the experimental design. """ + NAME = "summarize_hed_type" + PARAMS = { - "operation": "summarize_hed_type", - "required_parameters": { - "summary_name": str, - "summary_filename": str, - "type_tag": str + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "type_tag": { + "type": "string" + }, + "append_timecode": { + "type": "boolean" + } }, - "optional_parameters": { - "append_timecode": bool - } + "required": [ + "summary_name", + "summary_filename", + "type_tag" + ], + "additionalProperties": False } SUMMARY_TYPE = 'hed_type_summary' @@ -42,15 +59,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.type_tag = parameters['type_tag'].lower() @@ -81,6 +91,10 @@ def do_op(self, dispatcher, df, name, sidecar=None): 'schema': dispatcher.hed_schema, 'sidecar': sidecar}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class HedTypeSummary(BaseSummary): @@ -102,10 +116,13 @@ def update_summary(self, new_info): sidecar = new_info['sidecar'] if sidecar and not isinstance(sidecar, Sidecar): sidecar = Sidecar(sidecar) - input_data = TabularInput(new_info['df'], sidecar=sidecar, name=new_info['name']) - type_values = HedType(EventManager(input_data, new_info['schema']), new_info['name'], type_tag=self.type_tag) + input_data = TabularInput( + new_info['df'], sidecar=sidecar, name=new_info['name']) + type_values = HedType(EventManager( + input_data, new_info['schema']), new_info['name'], type_tag=self.type_tag) counts = HedTypeCounts(new_info['name'], self.type_tag) - counts.update_summary(type_values.get_summary(), type_values.total_events, new_info['name']) + counts.update_summary(type_values.get_summary(), + type_values.total_events, new_info['name']) counts.add_descriptions(type_values.type_defs) self.summary_dict[new_info["name"]] = counts @@ -183,10 +200,13 @@ def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): if item['direct_references']: str1 = str1 + f" Direct references:{item['direct_references']}" if item['events_with_multiple_refs']: - str1 = str1 + f" Multiple references:{item['events_with_multiple_refs']})" + str1 = str1 + \ + f" Multiple references:{item['events_with_multiple_refs']})" sum_list.append(f"{indent}{key}: {str1}") if item['level_counts']: - sum_list = sum_list + HedTypeSummary._level_details(item['level_counts'], indent=indent) + sum_list = sum_list + \ + HedTypeSummary._level_details( + item['level_counts'], indent=indent) return "\n".join(sum_list) @staticmethod @@ -207,12 +227,14 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): f"Total events={result.get('Total events', 0)}"] for key, item in type_info.items(): - sum_list.append(f"{indent*2}{key}: {item['levels']} levels in {item['events']} events") + sum_list.append( + f"{indent*2}{key}: {item['levels']} levels in {item['events']} events") str1 = "" if item['direct_references']: str1 = str1 + f" Direct references:{item['direct_references']}" if item['events_with_multiple_refs']: - str1 = str1 + f" (Multiple references:{item['events_with_multiple_refs']})" + str1 = str1 + \ + f" (Multiple references:{item['events_with_multiple_refs']})" if str1: sum_list.append(f"{indent*3}{str1}") if item['level_counts']: @@ -227,7 +249,13 @@ def _level_details(level_counts, offset="", indent=""): str1 = f"[{details['events']} events, {details['files']} files]:" level_list.append(f"{offset}{indent*2}{key} {str1}") if details['tags']: - level_list.append(f"{offset}{indent*3}Tags: {str(details['tags'])}") + level_list.append( + f"{offset}{indent*3}Tags: {str(details['tags'])}") if details['description']: - level_list.append(f"{offset}{indent*3}Description: {details['description']}") + level_list.append( + f"{offset}{indent*3}Description: {details['description']}") return level_list + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index a2948eb8b..6d43d9cfa 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -16,20 +16,39 @@ class SummarizeHedValidationOp(BaseOp): - **summary_filename** (*str*): Base filename of the summary. - **check_for_warnings** (*bool*): If true include warnings as well as errors. + Optional remodeling parameters: + - **append_timecode** (*bool*): If true, the timecode is appended to the base filename when summary is saved. + The purpose of this op is to produce a summary of the HED validation errors in a file. """ - + NAME = "summarize_hed_validation" + PARAMS = { - "operation": "summarize_hed_validation", - "required_parameters": { - "summary_name": str, - "summary_filename": str + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "append_timecode": { + "type": "boolean" + }, + "check_for_warnings": { + "type": "boolean" + }, + "append_timecode": { + "type": "boolean" + } }, - "optional_parameters": { - "append_timecode": bool, - "check_for_warnings": bool - } + "required": [ + "summary_name", + "summary_filename", + "check_for_warnings" + ], + "additionalProperties": False } SUMMARY_TYPE = 'hed_validation' @@ -40,15 +59,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.append_timecode = parameters.get('append_timecode', False) @@ -79,6 +91,10 @@ def do_op(self, dispatcher, df, name, sidecar=None): 'schema': dispatcher.hed_schema, 'sidecar': sidecar}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class HedValidationSummary(BaseSummary): @@ -105,10 +121,16 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): sum_list = [f"{name}: [{len(specifics['sidecar_files'])} sidecar files, " f"{len(specifics['event_files'])} event files]"] if specifics.get('is_merged'): - sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], count_only=True, indent=indent) - sum_list = sum_list + self.get_error_list(specifics['event_issues'], count_only=True, indent=indent) + sum_list = sum_list + \ + self.get_error_list( + specifics['sidecar_issues'], count_only=True, indent=indent) + sum_list = sum_list + \ + self.get_error_list( + specifics['event_issues'], count_only=True, indent=indent) else: - sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], indent=indent*2) + sum_list = sum_list + \ + self.get_error_list( + specifics['sidecar_issues'], indent=indent*2) if specifics['sidecar_had_issues']: sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], count_only=False, indent=indent*2) else: @@ -127,8 +149,10 @@ def update_summary(self, new_info): sidecar = new_info.get('sidecar', None) if sidecar and not isinstance(sidecar, Sidecar): - sidecar = Sidecar(files=new_info['sidecar'], name=os.path.basename(sidecar)) - results = self._get_sidecar_results(sidecar, new_info, self.check_for_warnings) + sidecar = Sidecar( + files=new_info['sidecar'], name=os.path.basename(sidecar)) + results = self._get_sidecar_results( + sidecar, new_info, self.check_for_warnings) if not results['sidecar_had_issues']: input_data = TabularInput(new_info['df'], sidecar=sidecar) issues = input_data.validate(new_info['schema']) @@ -183,7 +207,8 @@ def _update_events_results(results, ind_results): @staticmethod def _update_sidecar_results(results, ind_results): results["total_sidecar_issues"] += ind_results["total_sidecar_issues"] - results["sidecar_files"] = results["sidecar_files"] + ind_results["sidecar_files"] + results["sidecar_files"] = results["sidecar_files"] + \ + ind_results["sidecar_files"] for ikey, errors in ind_results["sidecar_issues"].items(): results["sidecar_issues"][ikey] = errors @@ -213,17 +238,21 @@ def get_error_list(error_dict, count_only=False, indent=BaseSummary.DISPLAY_INDE def _format_errors(error_list, name, errors, indent): error_list.append(f"{indent}{name} issues:") for this_item in errors: - error_list.append(f"{indent * 2}{HedValidationSummary._format_error(this_item)}") + error_list.append( + f"{indent * 2}{HedValidationSummary._format_error(this_item)}") @staticmethod def _format_error(error): error_str = error['code'] error_locations = [] - HedValidationSummary.update_error_location(error_locations, "row", "ec_row", error) - HedValidationSummary.update_error_location(error_locations, "column", "ec_column", error) + HedValidationSummary.update_error_location( + error_locations, "row", "ec_row", error) + HedValidationSummary.update_error_location( + error_locations, "column", "ec_column", error) HedValidationSummary.update_error_location(error_locations, "sidecar column", "ec_sidecarColumnName", error) - HedValidationSummary.update_error_location(error_locations, "sidecar key", "ec_sidecarKeyName", error) + HedValidationSummary.update_error_location( + error_locations, "sidecar key", "ec_sidecarKeyName", error) location_str = ",".join(error_locations) if location_str: error_str = error_str + f"[{location_str}]" @@ -244,7 +273,8 @@ def _get_sidecar_results(sidecar, new_info, check_for_warnings): results["sidecar_files"].append(sidecar.name) results["sidecar_issues"][sidecar.name] = [] sidecar_issues = sidecar.validate(new_info['schema']) - filtered_issues = ErrorHandler.filter_issues_by_severity(sidecar_issues, ErrorSeverity.ERROR) + filtered_issues = ErrorHandler.filter_issues_by_severity( + sidecar_issues, ErrorSeverity.ERROR) if filtered_issues: results["sidecar_had_issues"] = True if not check_for_warnings: @@ -253,3 +283,7 @@ def _get_sidecar_results(sidecar, new_info, check_for_warnings): results['sidecar_issues'][sidecar.name] = str_issues results['total_sidecar_issues'] = len(sidecar_issues) return results + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index f584ee1d3..e06765325 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -11,25 +11,52 @@ class SummarizeSidecarFromEventsOp(BaseOp): Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. + - **summary_filename** (*str*): Base filename of the summary. + + Optional remodeling parameters: + - **append_timecode** (*bool*): - **skip_columns** (*list*): Names of columns to skip in the summary. - **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns. The purpose is to produce a JSON sidecar template for annotating a dataset with HED tags. """ - + NAME = "summarize_sidecar_from_events" + PARAMS = { - "operation": "summarize_sidecar_from_events", - "required_parameters": { - "summary_name": str, - "summary_filename": str, - "skip_columns": list, - "value_columns": list, + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "skip_columns": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "value_columns": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True + }, + "append_timecode": { + "type": "boolean" + } }, - "optional_parameters": { - "append_timecode": bool - } + "required": [ + "summary_name", + "summary_filename" + ], + "additionalProperties": False } SUMMARY_TYPE = "events_to_sidecar" @@ -40,16 +67,9 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.skip_columns = parameters['skip_columns'] @@ -78,9 +98,14 @@ def do_op(self, dispatcher, df, name, sidecar=None): if not summary: summary = EventsToSidecarSummary(self) dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name}) + summary.update_summary( + {'df': dispatcher.post_proc_data(df_new), 'name': name}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class EventsToSidecarSummary(BaseSummary): @@ -100,7 +125,8 @@ def update_summary(self, new_info): """ - tab_sum = TabularSummary(value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_info["name"]) + tab_sum = TabularSummary( + value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_info["name"]) tab_sum.update(new_info['df'], new_info['name']) self.summary_dict[new_info["name"]] = tab_sum @@ -195,3 +221,7 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): f"Value columns: {str(specifics.get('Value info', {}).keys())}", f"Sidecar:\n{json.dumps(specifics['Sidecar'], indent=indent)}"] return "\n".join(sum_list) + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/valid_operations.py b/hed/tools/remodeling/operations/valid_operations.py index 8753ed1d6..52cf41162 100644 --- a/hed/tools/remodeling/operations/valid_operations.py +++ b/hed/tools/remodeling/operations/valid_operations.py @@ -1,6 +1,6 @@ """ The valid operations for the remodeling tools. """ -# from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp +from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp from hed.tools.remodeling.operations.factor_column_op import FactorColumnOp from hed.tools.remodeling.operations.factor_hed_tags_op import FactorHedTagsOp from hed.tools.remodeling.operations.factor_hed_type_op import FactorHedTypeOp @@ -27,8 +27,8 @@ 'factor_hed_tags': FactorHedTagsOp, 'factor_hed_type': FactorHedTypeOp, 'merge_consecutive': MergeConsecutiveOp, - 'number_groups_op': NumberGroupsOp, - 'number_rows_op': NumberRowsOp, + 'number_groups': NumberGroupsOp, + 'number_rows': NumberRowsOp, 'remap_columns': RemapColumnsOp, 'remove_columns': RemoveColumnsOp, 'remove_rows': RemoveRowsOp, @@ -38,8 +38,8 @@ 'summarize_column_names': SummarizeColumnNamesOp, 'summarize_column_values': SummarizeColumnValuesOp, 'summarize_definitions': SummarizeDefinitionsOp, - 'summarize_sidecar_from_events': SummarizeSidecarFromEventsOp, - 'summarize_hed_type': SummarizeHedTypeOp, 'summarize_hed_tags': SummarizeHedTagsOp, - 'summarize_hed_validation': SummarizeHedValidationOp + 'summarize_hed_type': SummarizeHedTypeOp, + 'summarize_hed_validation': SummarizeHedValidationOp, + 'summarize_sidecar_from_events': SummarizeSidecarFromEventsOp } diff --git a/hed/tools/remodeling/validator.py b/hed/tools/remodeling/validator.py new file mode 100644 index 000000000..60ce68bec --- /dev/null +++ b/hed/tools/remodeling/validator.py @@ -0,0 +1,196 @@ +import os +import json +from copy import deepcopy +from jsonschema import Draft202012Validator +from jsonschema.exceptions import ErrorTree +from hed.tools.remodeling.operations.valid_operations import valid_operations + + +class RemodelerValidator(): + """Validator for remodeler input files.""" + + MESSAGE_STRINGS = { + "0": { + "minItems": "There are no operations defined. Specify at least 1 operation for the remodeler to execute.", + "type": "Operations must be contained in a list or array. This is also true when you run a single operation." + }, + "1": { + "type": "Each operation must be defined in a dictionary. {instance} is not a dictionary object.", + "required": "Operation dictionary {operation_index} is missing '{missing_value}'. Every operation dictionary must specify the type of operation, a description, and the operation parameters.", + "additionalProperties": "Operation dictionary {operation_index} contains an unexpected field '{added_property}'. Every operation dictionary must specify the type of operation, a description, and the operation parameters." + }, + "2": { + "type": "Operation {operation_index}: {instance} is not a {validator_value}. {operation_field} should be of type {validator_value}.", + "enum": "{instance} is not a known remodeler operation. Accepted remodeler operations can be found in the documentation.", + "required": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} is a required parameter of {operation_name}.", + "additionalProperties": "Operation {operation_index}: Operation parameters for {operation_name} contain an unexpected field '{added_property}'.", + "dependentRequired": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} is a required parameter of {operation_name} when {dependent_on} is specified." + }, + "more": { + "type": "Operation {operation_index}: The value of {parameter_path}, in the {operation_name} operation, should be a {validator_value}. {instance} is not a {validator_value}.", + "minItems": "Operation {operation_index}: The list in {parameter_path}, in the {operation_name} operation, should have at least {validator_value} item(s).", + "required": "Operation {operation_index}: The field {missing_value} is missing in {parameter_path}. {missing_value} is a required parameter of {parameter_path}.", + "additionalProperties": "Operation {operation_index}: Operation parameters for {parameter_path} contain an unexpected field '{added_property}'.", + "enum": "Operation {operation_index}: Operation parameter {parameter_path}, in the {operation_name} operation, contains and unexpected value. Value should be one of {validator_value}.", + "uniqueItems": "Operation {operation_index}: The list in {parameter_path}, in the {operation_name} operation, should only contain unique items.", + "minProperties": "Operation {operation_index}: The dictionary in {parameter_path}, in the {operation_name} operation, should have at least {validator_value} key(s)." + } + } + + BASE_ARRAY = { + "type": "array", + "items": {}, + "minItems": 1 + } + + OPERATION_DICT = { + "type": "object", + "required": [ + "operation", + "description", + "parameters" + ], + "additionalProperties": False, + "properties": { + "operation": { + "type": "string", + "enum": [], + "default": "convert_columns" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "object", + "properties": {} + } + }, + "allOf": [] + } + + PARAMETER_SPECIFICATION_TEMPLATE = { + "if": { + "properties": { + "operation": { + "const": "" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": {} + } + } + } + + def __init__(self): + """ Constructor for remodeler Validator + + Parameters: + - **schema** (*dict*): The compiled json schema against which remodeler files should be validated + - **validator** (*Draft202012Validator*): The instantiated json schema validator + """ + self.schema = self._construct_schema() + self.validator = Draft202012Validator(self.schema) + + def validate(self, operations): + """ Validates a dictionary against the json schema specification for the remodeler file, plus any additional data validation that is + necessary and returns a list of user friendly error messages. + + Parameters: + **operations** (*dict*): Dictionary with input operations to run through the remodeler + + Returns: + **list_of_error_strings** (*list*): List with all error messages for every error identified by the validator + """ + + list_of_error_strings = [] + for error in sorted(self.validator.iter_errors(operations), key=lambda e: e.path): + list_of_error_strings.append( + self._parse_message(error, operations)) + if list_of_error_strings: + return list_of_error_strings + + operation_by_parameters = [(operation["operation"], operation["parameters"]) for operation in operations] + + for index, operation in enumerate(operation_by_parameters): + error_strings = valid_operations[operation[0]].validate_input_data(operation[1]) + for error_string in error_strings: + list_of_error_strings.append("Operation %s: %s" %(index+1, error_string)) + + return list_of_error_strings + + def _parse_message(self, error, operations): + ''' Return a user friendly error message based on the jsonschema validation error + + Parameters: + - **error** (*ValidationError*): A validation error from jsonschema validator + - **operations** (*dict*): The operations that were validated + + Note: + - json schema error does not contain all necessary information to return a + proper error message so we also take some information directly from the operations + that led to the error + - all necessary information is gathered into an error dict, message strings are predefined in a dictionary which are formatted with additional information + ''' + error_dict = vars(error) + + level = len(error_dict["path"]) + if level > 2: + level = "more" + # some information is in the validation error but not directly in a field so I need to + # modify before they can parsed in + # if they are necessary, they are there, if they are not there, they are not necessary + try: + error_dict["operation_index"] = error_dict["path"][0] + 1 + error_dict["operation_field"] = error_dict["path"][1].capitalize() + error_dict["operation_name"] = operations[int( + error_dict["path"][0])]['operation'] + # everything except the first two values reversed + parameter_path = [*error_dict['path']][:1:-1] + for ind, value in enumerate(parameter_path): + if isinstance(value, int): + parameter_path[ind] = f"item {value+1}" + error_dict["parameter_path"] = ", ".join(parameter_path) + except (IndexError, TypeError, KeyError): + pass + + type = str(error_dict["validator"]) + + # the missing value with required elements, or the wrong additional value is not known to the + # validation error object + # this is a known issue of jsonschema: https://github.com/python-jsonschema/jsonschema/issues/119 + # for now the simplest thing seems to be to extract it from the error message + if type == 'required': + error_dict["missing_value"] = error_dict["message"].split("'")[ + 1::2][0] + if type == 'additionalProperties': + error_dict["added_property"] = error_dict["message"].split("'")[ + 1::2][0] + + # dependent required provided both the missing value and the reason it is required in one dictionary + # it is split over two for the error message + if type == 'dependentRequired': + error_dict["missing_value"] = list(error_dict["validator_value"].keys())[0] + error_dict["dependent_on"] = list(error_dict["validator_value"].values())[0] + + return self.MESSAGE_STRINGS[str(level)][type].format(**error_dict) + + def _construct_schema(self): + + schema = deepcopy(self.BASE_ARRAY) + schema["items"] = deepcopy(self.OPERATION_DICT) + + for operation in valid_operations.items(): + schema["items"]["properties"]["operation"]["enum"].append(operation[0]) + + parameter_specification = deepcopy(self.PARAMETER_SPECIFICATION_TEMPLATE) + parameter_specification["if"]["properties"]["operation"]["const"] = operation[0] + parameter_specification["then"]["properties"]["parameters"] = operation[1].PARAMS + + schema["items"]["allOf"].append(deepcopy(parameter_specification)) + + return schema diff --git a/hed/tools/util/io_util.py b/hed/tools/util/io_util.py index 53fab27a4..1a00b34b8 100644 --- a/hed/tools/util/io_util.py +++ b/hed/tools/util/io_util.py @@ -328,3 +328,20 @@ def get_task_from_file(file_path): return "" splits = re.split(r'[_.]', basename[position+5:]) return splits[0] + +def get_task_dict(files): + """ Return a dictionary of the tasks that appear in the file names of a list of files. + + Parameters: + files = + + """ + task_dict = {} + for my_file in files: + task = get_task_from_file(my_file) + if not task: + continue + task_entry = task_dict.get(task, []) + task_entry.append(my_file) + task_dict[task] = task_entry + return task_dict diff --git a/hed/validator/__init__.py b/hed/validator/__init__.py index 4a8b94209..dbe240432 100644 --- a/hed/validator/__init__.py +++ b/hed/validator/__init__.py @@ -1,7 +1,6 @@ """Validation of HED tags.""" from .hed_validator import HedValidator -from .tag_validator import TagValidator from .sidecar_validator import SidecarValidator from .def_validator import DefValidator from .onset_validator import OnsetValidator diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py index 293c8ad06..13fcfa5f6 100644 --- a/hed/validator/def_validator.py +++ b/hed/validator/def_validator.py @@ -12,6 +12,7 @@ class DefValidator(DefinitionDict): """ Handles validating Def/ and Def-expand/, as well as Temporal groups: Onset, Inset, and Offset """ + def __init__(self, def_dicts=None, hed_schema=None): """ Initialize for definitions in hed strings. @@ -21,12 +22,12 @@ def __init__(self, def_dicts=None, hed_schema=None): """ super().__init__(def_dicts, hed_schema=hed_schema) - def validate_def_tags(self, hed_string_obj, tag_validator=None): + def validate_def_tags(self, hed_string_obj, hed_validator=None): """ Validate Def/Def-Expand tags. Parameters: hed_string_obj (HedString): The hed string to process. - tag_validator (TagValidator): Used to validate the placeholder replacement. + hed_validator (HedValidator): Used to validate the placeholder replacement. Returns: list: Issues found related to validating defs. Each issue is a dictionary. """ @@ -34,38 +35,12 @@ def validate_def_tags(self, hed_string_obj, tag_validator=None): if self._label_tag_name not in hed_string_lower: return [] + # This is needed primarily to validate the contents of a def-expand matches the default. def_issues = [] # We need to check for labels to expand in ALL groups for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True): - def_issues += self._validate_def_contents(def_tag, def_expand_group, tag_validator) - - return def_issues - - @staticmethod - def _validate_def_units(def_tag, placeholder_tag, tag_validator, is_def_expand_tag): - """Validate units and value classes on def/def-expand tags + def_issues += self._validate_def_contents(def_tag, def_expand_group, hed_validator) - Parameters: - def_tag(HedTag): The source tag - placeholder_tag(HedTag): The placeholder tag this def fills in - tag_validator(TagValidator): Used to validate the units/values - is_def_expand_tag(bool): If the given def_tag is a def-expand tag or not. - - Returns: - issues(list): Issues found from validating placeholders. - """ - def_issues = [] - error_code = ValidationErrors.DEF_INVALID - if is_def_expand_tag: - error_code = ValidationErrors.DEF_EXPAND_INVALID - if placeholder_tag.is_unit_class_tag(): - def_issues += tag_validator.check_tag_unit_class_units_are_valid(placeholder_tag, - report_as=def_tag, - error_code=error_code) - elif placeholder_tag.is_value_class_tag(): - def_issues += tag_validator.check_tag_value_class_valid(placeholder_tag, - report_as=def_tag, - error_code=error_code) return def_issues @staticmethod @@ -92,14 +67,14 @@ def _report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag): def_issues += ErrorHandler.format_error(error_code, tag=def_tag) return def_issues - def _validate_def_contents(self, def_tag, def_expand_group, tag_validator): + def _validate_def_contents(self, def_tag, def_expand_group, hed_validator): """ Check for issues with expanding a tag from Def to a Def-expand tag group Parameters: def_tag (HedTag): Source hed tag that may be a Def or Def-expand tag. def_expand_group (HedGroup or HedTag): Source group for this def-expand tag. Same as def_tag if this is not a def-expand tag. - tag_validator (TagValidator): Used to validate the placeholder replacement. + hed_validator (HedValidator): Used to validate the placeholder replacement. Returns: issues(list): Issues found from validating placeholders. @@ -117,21 +92,52 @@ def _validate_def_contents(self, def_tag, def_expand_group, tag_validator): def_issues += ErrorHandler.format_error(error_code, tag=def_tag) else: def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, - return_copy_of_tag=True) + return_copy_of_tag=True) if def_contents is not None: if is_def_expand_tag and def_expand_group != def_contents: def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID, tag=def_tag, actual_def=def_contents, found_def=def_expand_group) - if def_entry.takes_value and tag_validator: - placeholder_tag = def_contents.get_first_group().find_placeholder_tag() - def_issues += self._validate_def_units(def_tag, placeholder_tag, tag_validator, - is_def_expand_tag) else: def_issues += self._report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag) return def_issues + def validate_def_value_units(self, def_tag, hed_validator): + """Equivalent to HedValidator.validate_units for the special case of a Def or Def-expand tag""" + tag_label, _, placeholder = def_tag.extension.partition('/') + is_def_expand_tag = def_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY + + def_entry = self.defs.get(tag_label.lower()) + # These errors will be caught as can't match definition + if def_entry is None: + return [] + + error_code = ValidationErrors.DEF_INVALID + if is_def_expand_tag: + error_code = ValidationErrors.DEF_EXPAND_INVALID + + def_issues = [] + + # Validate the def name vs the name class + def_issues += hed_validator.validate_units(def_tag, + tag_label, + error_code=error_code) + + def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, return_copy_of_tag=True) + if def_contents and def_entry.takes_value and hed_validator: + placeholder_tag = def_contents.get_first_group().find_placeholder_tag() + # Handle the case where they're adding a unit as part of a placeholder. eg Speed/# mph + if placeholder_tag: + placeholder = placeholder_tag.extension + def_issues += hed_validator.validate_units(placeholder_tag, + placeholder, + report_as=def_tag, + error_code=error_code, + index_offset=len(tag_label) + 1) + + return def_issues + def validate_onset_offset(self, hed_string_obj): """ Validate onset/offset diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index 41ebb16db..2e509bb16 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -4,14 +4,13 @@ the get_validation_issues() function. """ - +import re from hed.errors.error_types import ValidationErrors, DefinitionErrors from hed.errors.error_reporter import ErrorHandler, check_for_any_errors from hed.models.hed_string import HedString -from hed.models import HedTag -from hed.validator.tag_validator import TagValidator from hed.validator.def_validator import DefValidator +from hed.validator.tag_util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator class HedValidator: @@ -25,14 +24,20 @@ def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False): def_dicts(DefinitionDict or list or dict): the def dicts to use for validation definitions_allowed(bool): If False, flag definitions found as errors """ - super().__init__() - self._tag_validator = None + if hed_schema is None: + raise ValueError("HedSchema required for validation") + self._hed_schema = hed_schema - self._tag_validator = TagValidator(hed_schema=self._hed_schema) self._def_validator = DefValidator(def_dicts, hed_schema) self._definitions_allowed = definitions_allowed + self._unit_validator = UnitValueValidator() + self._char_validator = CharValidator() + self._string_validator = StringValidator() + self._tag_validator = TagValidator() + self._group_validator = GroupValidator(hed_schema) + def validate(self, hed_string, allow_placeholders, error_handler=None): """ Validate the string using the schema @@ -57,13 +62,13 @@ def validate(self, hed_string, allow_placeholders, error_handler=None): def run_basic_checks(self, hed_string, allow_placeholders): issues = [] - issues += self._tag_validator.run_hed_string_validators(hed_string, allow_placeholders) + issues += self._run_hed_string_validators(hed_string, allow_placeholders) if check_for_any_errors(issues): return issues - if hed_string == "n/a" or not self._hed_schema: + if hed_string == "n/a": return issues for tag in hed_string.get_all_tags(): - self._tag_validator.run_validate_tag_characters(tag, allow_placeholders=allow_placeholders) + issues += self._run_validate_tag_characters(tag, allow_placeholders=allow_placeholders) issues += hed_string._calculate_to_canonical_forms(self._hed_schema) if check_for_any_errors(issues): return issues @@ -71,82 +76,107 @@ def run_basic_checks(self, hed_string, allow_placeholders): # e.g. checking units when a definition placeholder has units self._def_validator.construct_def_tags(hed_string) issues += self._validate_individual_tags_in_hed_string(hed_string, allow_placeholders=allow_placeholders) - issues += self._def_validator.validate_def_tags(hed_string, self._tag_validator) + issues += self._def_validator.validate_def_tags(hed_string, self) return issues def run_full_string_checks(self, hed_string): issues = [] - issues += self._validate_tags_in_hed_string(hed_string) - issues += self._validate_groups_in_hed_string(hed_string) + issues += self._group_validator.run_all_tags_validators(hed_string) + issues += self._group_validator.run_tag_level_validators(hed_string) issues += self._def_validator.validate_onset_offset(hed_string) return issues - def _validate_groups_in_hed_string(self, hed_string_obj): - """ Report invalid groups at each level. + # Todo: mark semi private/actually private below this + def _run_validate_tag_characters(self, original_tag, allow_placeholders): + """ Basic character validation of tags Parameters: - hed_string_obj (HedString): A HedString object. + original_tag (HedTag): A original tag. + allow_placeholders (bool): Allow value class or extensions to be placeholders rather than a specific value. Returns: - list: Issues associated with each level in the HED string. Each issue is a dictionary. - - Notes: - - This pertains to the top-level, all groups, and nested groups. + list: The validation issues associated with the characters. Each issue is dictionary. """ - validation_issues = [] - for original_tag_group, is_top_level in hed_string_obj.get_all_groups(also_return_depth=True): - is_group = original_tag_group.is_group - if not original_tag_group and is_group: - validation_issues += ErrorHandler.format_error(ValidationErrors.HED_GROUP_EMPTY, - tag=original_tag_group) - validation_issues += self._tag_validator.run_tag_level_validators(original_tag_group.tags(), is_top_level, - is_group) - - validation_issues += self._check_for_duplicate_groups(hed_string_obj) - return validation_issues + return self._char_validator.check_tag_invalid_chars(original_tag, allow_placeholders) - def _check_for_duplicate_groups_recursive(self, sorted_group, validation_issues): - prev_child = None - for child in sorted_group: - if child == prev_child: - if isinstance(child, HedTag): - error_code = ValidationErrors.HED_TAG_REPEATED - validation_issues += ErrorHandler.format_error(error_code, child) - else: - error_code = ValidationErrors.HED_TAG_REPEATED_GROUP - found_group = child - base_steps_up = 0 - while isinstance(found_group, list): - found_group = found_group[0] - base_steps_up += 1 - for _ in range(base_steps_up): - found_group = found_group._parent - validation_issues += ErrorHandler.format_error(error_code, found_group) - if not isinstance(child, HedTag): - self._check_for_duplicate_groups_recursive(child, validation_issues) - prev_child = child - - def _check_for_duplicate_groups(self, original_group): - sorted_group = original_group._sorted() + def _run_hed_string_validators(self, hed_string_obj, allow_placeholders=False): + """Basic high level checks of the hed string for illegal characters + + Catches fully banned characters, out of order parentheses, commas, repeated slashes, etc. + + Parameters: + hed_string_obj (HedString): A HED string. + allow_placeholders: Allow placeholder and curly brace characters + + Returns: + list: The validation issues associated with a HED string. Each issue is a dictionary. + """ validation_issues = [] - self._check_for_duplicate_groups_recursive(sorted_group, validation_issues) + validation_issues += self._char_validator.check_invalid_character_issues( + hed_string_obj.get_original_hed_string(), allow_placeholders) + validation_issues += self._string_validator.run_string_validator(hed_string_obj) + for original_tag in hed_string_obj.get_all_tags(): + validation_issues += self.check_tag_formatting(original_tag) return validation_issues - def _validate_tags_in_hed_string(self, hed_string_obj): - """ Report invalid the multi-tag properties in a hed string, e.g. required tags.. + pattern_doubleslash = re.compile(r"([ \t/]{2,}|^/|/$)") - Parameters: - hed_string_obj (HedString): A HedString object. + def check_tag_formatting(self, original_tag): + """ Report repeated or erroneous slashes. - Returns: - list: The issues associated with the tags in the HED string. Each issue is a dictionary. + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + + Returns: + list: Validation issues. Each issue is a dictionary. """ validation_issues = [] - tags = hed_string_obj.get_all_tags() - validation_issues += self._tag_validator.run_all_tags_validators(tags) + for match in self.pattern_doubleslash.finditer(original_tag.org_tag): + validation_issues += ErrorHandler.format_error(ValidationErrors.NODE_NAME_EMPTY, + tag=original_tag, + index_in_tag=match.start(), + index_in_tag_end=match.end()) + return validation_issues + def validate_units(self, original_tag, validate_text=None, report_as=None, error_code=None, + index_offset=0): + """Validate units and value classes + + Parameters: + original_tag(HedTag): The source tag + validate_text (str): the text we want to validate, if not the full extension. + report_as(HedTag): Report the error tag as coming from a different one. + Mostly for definitions that expand. + error_code(str): The code to override the error as. Again mostly for def/def-expand tags. + index_offset(int): Offset into the extension validate_text starts at + + Returns: + issues(list): Issues found from units + """ + if validate_text is None: + validate_text = original_tag.extension + issues = [] + if original_tag.is_unit_class_tag(): + issues += self._unit_validator.check_tag_unit_class_units_are_valid(original_tag, + validate_text, + report_as=report_as, + error_code=error_code, + index_offset=index_offset) + elif original_tag.is_value_class_tag(): + issues += self._unit_validator.check_tag_value_class_valid(original_tag, + validate_text, + report_as=report_as, + error_code=error_code, + index_offset=index_offset) + elif original_tag.extension: + issues += self._char_validator.check_for_invalid_extension_chars(original_tag, + validate_text, + index_offset=index_offset) + + return issues + def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placeholders=False): """ Validate individual tags in a HED string. @@ -170,7 +200,7 @@ def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placehol # todo: unclear if this should be restored at some point # if hed_tag.expandable and not hed_tag.expanded: # for tag in hed_tag.expandable.get_all_tags(): - # validation_issues += self._tag_validator. \ + # validation_issues += self._group_validator. \ # run_individual_tag_validators(tag, allow_placeholders=allow_placeholders, # is_definition=is_definition) # else: @@ -178,5 +208,9 @@ def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placehol run_individual_tag_validators(hed_tag, allow_placeholders=allow_placeholders, is_definition=is_definition) + if hed_tag.short_base_tag == DefTagNames.DEF_ORG_KEY or hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY: + validation_issues += self._def_validator.validate_def_value_units(hed_tag, self) + else: + validation_issues += self.validate_units(hed_tag) return validation_issues diff --git a/hed/validator/onset_validator.py b/hed/validator/onset_validator.py index 94be9d7ef..f18196360 100644 --- a/hed/validator/onset_validator.py +++ b/hed/validator/onset_validator.py @@ -63,3 +63,20 @@ def _handle_onset_or_offset(self, def_tag, onset_offset_tag): del self._onsets[full_def_name.lower()] return [] + + @staticmethod + def check_for_banned_tags(hed_string): + """ Returns an issue for every tag found from the banned list + + Parameters: + hed_string(HedString): the string to check + + Returns: + list: The validation issues associated with the characters. Each issue is dictionary. + """ + banned_tag_list = DefTagNames.TEMPORAL_KEYS + issues = [] + for tag in hed_string.get_all_tags(): + if tag in banned_tag_list: + issues += ErrorHandler.format_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, tag) + return issues diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index 751af9612..aad302837 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -28,8 +28,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): Validate the input data using the schema Parameters: - data (BaseInput or pd.DataFrame): Input data to be validated. - If a dataframe, it is assumed to be assembled already. + data (BaseInput): Input data to be validated. def_dicts(list of DefDict or DefDict): all definitions to use for validation name(str): The name to report errors from this file as error_handler (ErrorHandler): Error context to use. Creates a new one if None @@ -41,22 +40,27 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): if error_handler is None: error_handler = ErrorHandler() + if not isinstance(data, BaseInput): + raise TypeError("Invalid type passed to spreadsheet validator. Can only validate BaseInput objects.") + error_handler.push_error_context(ErrorContext.FILE_NAME, name) - self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts) - self._onset_validator = OnsetValidator() - onset_filtered = None # Adjust to account for 1 based row_adj = 1 - if isinstance(data, BaseInput): - # Adjust to account for column names - if data.has_column_names: - row_adj += 1 - issues += self._validate_column_structure(data, error_handler, row_adj) - onset_filtered = data.series_filtered - data = data.dataframe_a + # Adjust to account for column names + if data.has_column_names: + row_adj += 1 + issues += self._validate_column_structure(data, error_handler, row_adj) + onset_filtered = data.series_filtered + df = data.dataframe_a + + self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts) + if data.onsets is not None: + self._onset_validator = OnsetValidator() + else: + self._onset_validator = None # Check the rows of the input data - issues += self._run_checks(data, onset_filtered, error_handler=error_handler, row_adj=row_adj) + issues += self._run_checks(df, onset_filtered, error_handler=error_handler, row_adj=row_adj) error_handler.pop_error_context() issues = sort_issues(issues) @@ -98,7 +102,10 @@ def _run_checks(self, hed_df, onset_filtered, error_handler, row_adj): if row_string: error_handler.push_error_context(ErrorContext.HED_STRING, row_string) new_column_issues = self._hed_validator.run_full_string_checks(row_string) - new_column_issues += self._onset_validator.validate_temporal_relations(row_string) + if self._onset_validator is not None: + new_column_issues += self._onset_validator.validate_temporal_relations(row_string) + else: + new_column_issues += OnsetValidator.check_for_banned_tags(row_string) error_handler.add_context_and_filter(new_column_issues) error_handler.pop_error_context() issues += new_column_issues diff --git a/hed/validator/tag_util/__init__.py b/hed/validator/tag_util/__init__.py new file mode 100644 index 000000000..0f108a3ac --- /dev/null +++ b/hed/validator/tag_util/__init__.py @@ -0,0 +1,7 @@ +"""Validation of HED tags.""" + +from .char_util import CharValidator +from .string_util import StringValidator +from .class_util import UnitValueValidator +from .tag_util import TagValidator +from .group_util import GroupValidator diff --git a/hed/validator/tag_util/char_util.py b/hed/validator/tag_util/char_util.py new file mode 100644 index 000000000..873b8b100 --- /dev/null +++ b/hed/validator/tag_util/char_util.py @@ -0,0 +1,121 @@ +from hed.errors.error_reporter import ErrorHandler +from hed.errors.error_types import ValidationErrors + + +class CharValidator: + """Class responsible for basic character level validation of a string or tag.""" + + # # sign is allowed by default as it is specifically checked for separately. + DEFAULT_ALLOWED_PLACEHOLDER_CHARS = ".+-^ _#" + # Placeholder characters are checked elsewhere, but by default allowed + TAG_ALLOWED_CHARS = "-_/" + + INVALID_STRING_CHARS = '[]{}~' + INVALID_STRING_CHARS_PLACEHOLDERS = '[]~' + + def check_invalid_character_issues(self, hed_string, allow_placeholders): + """ Report invalid characters. + + Parameters: + hed_string (str): A hed string. + allow_placeholders: Allow placeholder and curly brace characters + + Returns: + list: Validation issues. Each issue is a dictionary. + + Notes: + - Invalid tag characters are defined by self.INVALID_STRING_CHARS or + self.INVALID_STRING_CHARS_PLACEHOLDERS + """ + validation_issues = [] + invalid_dict = self.INVALID_STRING_CHARS + if allow_placeholders: + invalid_dict = self.INVALID_STRING_CHARS_PLACEHOLDERS + for index, character in enumerate(hed_string): + if character in invalid_dict or ord(character) > 127: + validation_issues += self._report_invalid_character_error(hed_string, index) + + return validation_issues + + def check_tag_invalid_chars(self, original_tag, allow_placeholders): + """ Report invalid characters in the given tag. + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + allow_placeholders (bool): Allow placeholder characters(#) if True. + + Returns: + list: Validation issues. Each issue is a dictionary. + """ + validation_issues = self._check_invalid_prefix_issues(original_tag) + allowed_chars = self.TAG_ALLOWED_CHARS + if allow_placeholders: + allowed_chars += "#" + validation_issues += self._check_invalid_chars(original_tag.org_base_tag, allowed_chars, original_tag) + return validation_issues + + def check_for_invalid_extension_chars(self, original_tag, validate_text, error_code=None, + index_offset=0): + """Report invalid characters in extension/value. + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + validate_text (str): the text we want to validate, if not the full extension. + error_code(str): The code to override the error as. Again mostly for def/def-expand tags. + index_offset(int): Offset into the extension validate_text starts at + + Returns: + list: Validation issues. Each issue is a dictionary. + """ + allowed_chars = self.TAG_ALLOWED_CHARS + allowed_chars += self.DEFAULT_ALLOWED_PLACEHOLDER_CHARS + allowed_chars += " " + return self._check_invalid_chars(validate_text, allowed_chars, original_tag, + starting_index=len(original_tag.org_base_tag) + 1 + index_offset, + error_code=error_code) + + @staticmethod + def _check_invalid_chars(check_string, allowed_chars, source_tag, starting_index=0, error_code=None): + validation_issues = [] + for i, character in enumerate(check_string): + if character.isalnum(): + continue + if character in allowed_chars: + continue + # Todo: Remove this patch when clock times and invalid characters are more properly checked + if character == ":": + continue + validation_issues += ErrorHandler.format_error(ValidationErrors.INVALID_TAG_CHARACTER, + tag=source_tag, index_in_tag=starting_index + i, + index_in_tag_end=starting_index + i + 1, + actual_error=error_code) + return validation_issues + + @staticmethod + def _check_invalid_prefix_issues(original_tag): + """Check for invalid schema namespace.""" + issues = [] + schema_namespace = original_tag.schema_namespace + if schema_namespace and not schema_namespace[:-1].isalpha(): + issues += ErrorHandler.format_error(ValidationErrors.TAG_NAMESPACE_PREFIX_INVALID, + tag=original_tag, tag_namespace=schema_namespace) + return issues + + @staticmethod + def _report_invalid_character_error(hed_string, index): + """ Report an invalid character. + + Parameters: + hed_string (str): The HED string that caused the error. + index (int): The index of the invalid character in the HED string. + + Returns: + list: A singleton list with a dictionary representing the error. + + """ + error_type = ValidationErrors.CHARACTER_INVALID + character = hed_string[index] + if character == "~": + error_type = ValidationErrors.TILDES_UNSUPPORTED + return ErrorHandler.format_error(error_type, char_index=index, + source_string=hed_string) diff --git a/hed/validator/tag_util/class_util.py b/hed/validator/tag_util/class_util.py new file mode 100644 index 000000000..966f6009a --- /dev/null +++ b/hed/validator/tag_util/class_util.py @@ -0,0 +1,256 @@ +""" Utilities to support HED validation. """ +import datetime +import re + + +from hed.errors.error_reporter import ErrorHandler +from hed.errors.error_types import ValidationErrors + + +class UnitValueValidator: + DATE_TIME_VALUE_CLASS = 'dateTimeClass' + NUMERIC_VALUE_CLASS = "numericClass" + TEXT_VALUE_CLASS = "textClass" + NAME_VALUE_CLASS = "nameClass" + + DIGIT_OR_POUND_EXPRESSION = r'^(-?[\d.]+(?:e-?\d+)?|#)$' + + VALUE_CLASS_ALLOWED_CACHE=20 + + def __init__(self, value_validators=None): + """ Validates the unit and value classes on a given tag. + + Parameters: + value_validators(dict or None): Override or add value class validators + + """ + self._value_validators = self._get_default_value_class_validators() + if value_validators and isinstance(value_validators, dict): + self._value_validators.update(value_validators) + + def _get_default_value_class_validators(self): + validator_dict = { + self.DATE_TIME_VALUE_CLASS: is_date_time, + self.NUMERIC_VALUE_CLASS: validate_numeric_value_class, + self.TEXT_VALUE_CLASS: validate_text_value_class, + self.NAME_VALUE_CLASS: validate_text_value_class + } + + return validator_dict + + def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, report_as=None, error_code=None, + index_offset=0): + """ Report incorrect unit class or units. + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + validate_text (str): The text to validate + report_as (HedTag): Report errors as coming from this tag, rather than original_tag. + error_code (str): Override error codes + Returns: + list: Validation issues. Each issue is a dictionary. + """ + validation_issues = [] + if original_tag.is_unit_class_tag(): + stripped_value, unit = original_tag.get_stripped_unit_value(validate_text) + if not unit: + # Todo: in theory this should separately validate the number and the units, for units + # that are prefixes like $. Right now those are marked as unit invalid AND value_invalid. + bad_units = " " in validate_text + + if bad_units: + stripped_value = stripped_value.split(" ")[0] + + validation_issues += self._check_value_class(original_tag, stripped_value, report_as, error_code, + index_offset) + validation_issues += self._check_units(original_tag, bad_units, report_as) + + # We don't want to give this overall error twice + if error_code and not any(error_code == issue['code'] for issue in validation_issues): + new_issue = validation_issues[0].copy() + new_issue['code'] = error_code + validation_issues += [new_issue] + + return validation_issues + + def check_tag_value_class_valid(self, original_tag, validate_text, report_as=None, error_code=None, + index_offset=0): + """ Report an invalid value portion. + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + validate_text (str): The text to validate + report_as (HedTag): Report errors as coming from this tag, rather than original_tag. + error_code (str): Override error codes + index_offset(int): Offset into the extension validate_text starts at + + Returns: + list: Validation issues. + """ + return self._check_value_class(original_tag, validate_text, report_as, error_code, index_offset) + + # char_sets = { + # "letters": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), + # "blank": set(" "), + # "digits": set("0123456789"), + # "alphanumeric": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") + # } + # + # @functools.lru_cache(maxsize=VALUE_CLASS_ALLOWED_CACHE) + # def _get_allowed_characters(self, value_classes): + # # This could be pre-computed + # character_set = set() + # for value_class in value_classes: + # allowed_types = value_class.attributes.get(HedKey.AllowedCharacter, "") + # for single_type in allowed_types.split(","): + # if single_type in self.char_sets: + # character_set.update(self.char_sets[single_type]) + # else: + # character_set.add(single_type) + # return character_set + + def _get_problem_indexes(self, original_tag, stripped_value): + # Extra +1 for the slash + start_index = original_tag.extension.find(stripped_value) + len(original_tag.org_base_tag) + 1 + if start_index == -1: + return [] + + problem_indexes = [(char, index + start_index) for index, char in enumerate(stripped_value) if char in "{}"] + return problem_indexes + # Partial implementation of allowedCharacter + # allowed_characters = self._get_allowed_characters(original_tag.value_classes.values()) + # if allowed_characters: + # # Only test the strippedvalue - otherwise numericClass + unitClass won't validate reasonably. + # indexes = [index for index, char in enumerate(stripped_value) if char not in allowed_characters] + # pass + + def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0): + """Returns any issues found if this is a value tag""" + # todo: This function needs to check for allowed characters, not just {} + validation_issues = [] + if original_tag.is_takes_value_tag(): + report_as = report_as if report_as else original_tag + problem_indexes = self._get_problem_indexes(original_tag, stripped_value) + for char, index in problem_indexes: + tag_code = ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE if ( + char in "{}") else ValidationErrors.INVALID_TAG_CHARACTER + + index_adj = len(report_as.org_base_tag) - len(original_tag.org_base_tag) + index += index_adj + index_offset + validation_issues += ErrorHandler.format_error(tag_code, + tag=report_as, index_in_tag=index, + index_in_tag_end=index + 1) + if not self._validate_value_class_portion(original_tag, stripped_value): + validation_issues += ErrorHandler.format_error(ValidationErrors.VALUE_INVALID, report_as) + if error_code: + validation_issues += ErrorHandler.format_error(ValidationErrors.VALUE_INVALID, + report_as, actual_error=error_code) + return validation_issues + + @staticmethod + def _check_units(original_tag, bad_units, report_as): + """Returns an issue noting this is either bad units, or missing units""" + report_as = report_as if report_as else original_tag + if bad_units: + tag_unit_class_units = original_tag.get_tag_unit_class_units() + validation_issue = ErrorHandler.format_error(ValidationErrors.UNITS_INVALID, + tag=report_as, units=tag_unit_class_units) + else: + default_unit = original_tag.default_unit + validation_issue = ErrorHandler.format_error(ValidationErrors.UNITS_MISSING, + tag=report_as, default_unit=default_unit) + return validation_issue + + def _validate_value_class_portion(self, original_tag, portion_to_validate): + if portion_to_validate is None: + return False + + value_class_types = original_tag.value_classes + return self.validate_value_class_type(portion_to_validate, value_class_types) + + def validate_value_class_type(self, unit_or_value_portion, valid_types): + """ Report invalid unit or valid class values. + + Parameters: + unit_or_value_portion (str): The value portion to validate. + valid_types (list): The names of value class or unit class types (e.g. dateTime or dateTimeClass). + + Returns: + type_valid (bool): True if this is one of the valid_types validators. + + """ + for unit_class_type in valid_types: + valid_func = self._value_validators.get(unit_class_type) + if valid_func: + if valid_func(unit_or_value_portion): + return True + return False + + +def is_date_time(date_time_string): + """Check if the specified string is a valid datetime. + + Parameters: + date_time_string (str): A datetime string. + + Returns: + bool: True if the datetime string is valid. False, if otherwise. + + Notes: + - ISO 8601 datetime string. + + """ + try: + date_time_obj = datetime.datetime.fromisoformat(date_time_string) + return not date_time_obj.tzinfo + except ValueError: + return False + + +def validate_numeric_value_class(numeric_string): + """ Checks to see if valid numeric value. + + Parameters: + numeric_string (str): A string that should be only a number with no units. + + Returns: + bool: True if the numeric string is valid. False, if otherwise. + + """ + if re.search(UnitValueValidator.DIGIT_OR_POUND_EXPRESSION, numeric_string): + return True + + return False + + +def validate_text_value_class(text_string): + """ Placeholder for eventual text value class validation + + Parameters: + text_string (str): Text class. + + Returns: + bool: True + + """ + return True + + +def is_clock_face_time(time_string): + """ Check if a valid HH:MM time string. + + Parameters: + time_string (str): A time string. + + Returns: + bool: True if the time string is valid. False, if otherwise. + + Notes: + - This is deprecated and has no expected use going forward. + + """ + try: + time_obj = datetime.time.fromisoformat(time_string) + return not time_obj.tzinfo and not time_obj.microsecond + except ValueError: + return False diff --git a/hed/validator/tag_util/group_util.py b/hed/validator/tag_util/group_util.py new file mode 100644 index 000000000..b01a4f555 --- /dev/null +++ b/hed/validator/tag_util/group_util.py @@ -0,0 +1,194 @@ +""" +This module is used to validate the HED tags as strings. + +""" + +from hed.errors.error_reporter import ErrorHandler +from hed.models.model_constants import DefTagNames +from hed.schema import HedKey +from hed.models import HedTag +from hed.errors.error_types import ValidationErrors + + +class GroupValidator: + """ Validation for attributes across groups HED tags. + + This is things like Required, Unique, top level tags, etc. + """ + def __init__(self, hed_schema): + """ + + Parameters: + hed_schema (HedSchema): A HedSchema object. + """ + if hed_schema is None: + raise ValueError("HedSchema required for validation") + self._hed_schema = hed_schema + + def run_tag_level_validators(self, hed_string_obj): + """ Report invalid groups at each level. + + Parameters: + hed_string_obj (HedString): A HedString object. + + Returns: + list: Issues associated with each level in the HED string. Each issue is a dictionary. + + Notes: + - This pertains to the top-level, all groups, and nested groups. + """ + validation_issues = [] + for original_tag_group, is_top_level in hed_string_obj.get_all_groups(also_return_depth=True): + is_group = original_tag_group.is_group + if not original_tag_group and is_group: + validation_issues += ErrorHandler.format_error(ValidationErrors.HED_GROUP_EMPTY, + tag=original_tag_group) + validation_issues += self.check_tag_level_issue(original_tag_group.tags(), is_top_level, is_group) + + validation_issues += self._check_for_duplicate_groups(hed_string_obj) + return validation_issues + + def run_all_tags_validators(self, hed_string_obj): + """ Report invalid the multi-tag properties in a hed string, e.g. required tags. + + Parameters: + hed_string_obj (HedString): A HedString object. + + Returns: + list: The issues associated with the tags in the HED string. Each issue is a dictionary. + """ + validation_issues = [] + tags = hed_string_obj.get_all_tags() + validation_issues += self._validate_tags_in_hed_string(tags) + return validation_issues + + # ========================================================================== + # Mostly internal functions to check individual types of errors + # =========================================================================+ + + @staticmethod + def check_tag_level_issue(original_tag_list, is_top_level, is_group): + """ Report tags incorrectly positioned in hierarchy. + + Top-level groups can contain definitions, Onset, etc. tags. + + Parameters: + original_tag_list (list): HedTags containing the original tags. + is_top_level (bool): If True, this group is a "top level tag group" + is_group (bool): If true group should be contained by parenthesis + + Returns: + list: Validation issues. Each issue is a dictionary. + """ + validation_issues = [] + top_level_tags = [tag for tag in original_tag_list if + tag.base_tag_has_attribute(HedKey.TopLevelTagGroup)] + tag_group_tags = [tag for tag in original_tag_list if + tag.base_tag_has_attribute(HedKey.TagGroup)] + for tag_group_tag in tag_group_tags: + if not is_group: + validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TAG_GROUP_TAG, + tag=tag_group_tag) + for top_level_tag in top_level_tags: + if not is_top_level: + actual_code = None + if top_level_tag.short_base_tag == DefTagNames.DEFINITION_ORG_KEY: + actual_code = ValidationErrors.DEFINITION_INVALID + elif top_level_tag.short_base_tag in {DefTagNames.ONSET_ORG_KEY, DefTagNames.OFFSET_ORG_KEY}: + actual_code = ValidationErrors.ONSET_OFFSET_INSET_ERROR + + if actual_code: + validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, + tag=top_level_tag, + actual_error=actual_code) + validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, + tag=top_level_tag) + + if is_top_level and len(top_level_tags) > 1: + validation_issues += ErrorHandler.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, + tag=top_level_tags[0], + multiple_tags=top_level_tags[1:]) + + return validation_issues + + def check_for_required_tags(self, tags): + """ Report missing required tags. + + Parameters: + tags (list): HedTags containing the tags. + + Returns: + list: Validation issues. Each issue is a dictionary. + + """ + validation_issues = [] + required_prefixes = self._hed_schema.get_tags_with_attribute(HedKey.Required) + for required_prefix in required_prefixes: + if not any(tag.long_tag.lower().startswith(required_prefix.lower()) for tag in tags): + validation_issues += ErrorHandler.format_error(ValidationErrors.REQUIRED_TAG_MISSING, + tag_namespace=required_prefix) + return validation_issues + + def check_multiple_unique_tags_exist(self, tags): + """ Report if multiple identical unique tags exist + + A unique Term can only appear once in a given HedString. + Unique terms are terms with the 'unique' property in the schema. + + Parameters: + tags (list): HedTags containing the tags. + + Returns: + list: Validation issues. Each issue is a dictionary. + """ + validation_issues = [] + unique_prefixes = self._hed_schema.get_tags_with_attribute(HedKey.Unique) + for unique_prefix in unique_prefixes: + unique_tag_prefix_bool_mask = [x.long_tag.lower().startswith(unique_prefix.lower()) for x in tags] + if sum(unique_tag_prefix_bool_mask) > 1: + validation_issues += ErrorHandler.format_error(ValidationErrors.TAG_NOT_UNIQUE, + tag_namespace=unique_prefix) + return validation_issues + + def _validate_tags_in_hed_string(self, tags): + """ Validate the multi-tag properties in a hed string. + + Multi-tag properties include required tag, unique tag, etc. + + Parameters: + tags (list): A list containing the HedTags in a HED string. + + Returns: + list: The validation issues associated with the tags in a HED string. Each issue is a dictionary. + """ + validation_issues = [] + validation_issues += self.check_for_required_tags(tags) + validation_issues += self.check_multiple_unique_tags_exist(tags) + return validation_issues + + def _check_for_duplicate_groups_recursive(self, sorted_group, validation_issues): + prev_child = None + for child in sorted_group: + if child == prev_child: + if isinstance(child, HedTag): + error_code = ValidationErrors.HED_TAG_REPEATED + validation_issues += ErrorHandler.format_error(error_code, child) + else: + error_code = ValidationErrors.HED_TAG_REPEATED_GROUP + found_group = child + base_steps_up = 0 + while isinstance(found_group, list): + found_group = found_group[0] + base_steps_up += 1 + for _ in range(base_steps_up): + found_group = found_group._parent + validation_issues += ErrorHandler.format_error(error_code, found_group) + if not isinstance(child, HedTag): + self._check_for_duplicate_groups_recursive(child, validation_issues) + prev_child = child + + def _check_for_duplicate_groups(self, original_group): + sorted_group = original_group._sorted() + validation_issues = [] + self._check_for_duplicate_groups_recursive(sorted_group, validation_issues) + return validation_issues diff --git a/hed/validator/tag_util/string_util.py b/hed/validator/tag_util/string_util.py new file mode 100644 index 000000000..bf452fe58 --- /dev/null +++ b/hed/validator/tag_util/string_util.py @@ -0,0 +1,115 @@ +import re +from hed.errors.error_reporter import ErrorHandler +from hed.errors.error_types import ValidationErrors + + +class StringValidator: + """Runs checks on the raw string that depend on multiple characters, e.g. mismatched parentheses""" + OPENING_GROUP_CHARACTER = '(' + CLOSING_GROUP_CHARACTER = ')' + COMMA = ',' + + def run_string_validator(self, hed_string_obj): + validation_issues = [] + validation_issues += self.check_count_tag_group_parentheses(hed_string_obj.get_original_hed_string()) + validation_issues += self.check_delimiter_issues_in_hed_string(hed_string_obj.get_original_hed_string()) + return validation_issues + + @staticmethod + def check_count_tag_group_parentheses(hed_string): + """ Report unmatched parentheses. + + Parameters: + hed_string (str): A hed string. + + Returns: + list: A list of validation list. Each issue is a dictionary. + """ + validation_issues = [] + number_open_parentheses = hed_string.count('(') + number_closed_parentheses = hed_string.count(')') + if number_open_parentheses != number_closed_parentheses: + validation_issues += ErrorHandler.format_error(ValidationErrors.PARENTHESES_MISMATCH, + opening_parentheses_count=number_open_parentheses, + closing_parentheses_count=number_closed_parentheses) + return validation_issues + + def check_delimiter_issues_in_hed_string(self, hed_string): + """ Report missing commas or commas in value tags. + + Parameters: + hed_string (str): A hed string. + + Returns: + list: A validation issues list. Each issue is a dictionary. + """ + last_non_empty_valid_character = '' + last_non_empty_valid_index = 0 + current_tag = '' + issues = [] + + for i, current_character in enumerate(hed_string): + current_tag += current_character + if not current_character.strip(): + continue + if self._character_is_delimiter(current_character): + if current_tag.strip() == current_character: + issues += ErrorHandler.format_error(ValidationErrors.TAG_EMPTY, source_string=hed_string, + char_index=i) + current_tag = '' + continue + current_tag = '' + elif current_character == self.OPENING_GROUP_CHARACTER: + if current_tag.strip() == self.OPENING_GROUP_CHARACTER: + current_tag = '' + else: + issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag) + elif last_non_empty_valid_character == "," and current_character == self.CLOSING_GROUP_CHARACTER: + issues += ErrorHandler.format_error(ValidationErrors.TAG_EMPTY, source_string=hed_string, + char_index=i) + elif self._comma_is_missing_after_closing_parentheses(last_non_empty_valid_character, + current_character): + issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag[:-1]) + break + last_non_empty_valid_character = current_character + last_non_empty_valid_index = i + if self._character_is_delimiter(last_non_empty_valid_character): + issues += ErrorHandler.format_error(ValidationErrors.TAG_EMPTY, + char_index=last_non_empty_valid_index, + source_string=hed_string) + return issues + + @staticmethod + def _comma_is_missing_after_closing_parentheses(last_non_empty_character, current_character): + """ Checks if missing comma after a closing parentheses. + + Parameters: + last_non_empty_character (str): The last non-empty string in the HED string. + current_character (str): The current character in the HED string. + + Returns: + bool: True if a comma is missing after a closing parentheses. False, if otherwise. + + Notes: + - This is a helper function for the find_missing_commas_in_hed_string function. + + """ + return last_non_empty_character == StringValidator.CLOSING_GROUP_CHARACTER and \ + not (StringValidator._character_is_delimiter(current_character) + or current_character == StringValidator.CLOSING_GROUP_CHARACTER) + + @staticmethod + def _character_is_delimiter(character): + """ Checks if the character is a delimiter. + + Parameters: + character (str): A string character. + + Returns: + bool: Returns true if the character is a delimiter. False, if otherwise. + + Notes: + - A delimiter is a comma. + + """ + return character == StringValidator.COMMA diff --git a/hed/validator/tag_util/tag_util.py b/hed/validator/tag_util/tag_util.py new file mode 100644 index 000000000..64d9df1cb --- /dev/null +++ b/hed/validator/tag_util/tag_util.py @@ -0,0 +1,134 @@ +""" +This module is used to validate the HED tags as strings. + +""" + +import re +from hed.errors.error_reporter import ErrorHandler +from hed.schema import HedKey +from hed.errors.error_types import ValidationErrors + + +class TagValidator: + """ Validation for individual HED tags. """ + CAMEL_CASE_EXPRESSION = r'([A-Z]+\s*[a-z-]*)+' + + def run_individual_tag_validators(self, original_tag, allow_placeholders=False, + is_definition=False): + """ Runs the validators on the individual tags. + + This ignores most illegal characters except in extensions. + + Parameters: + original_tag (HedTag): A original tag. + allow_placeholders (bool): Allow value class or extensions to be placeholders rather than a specific value. + is_definition (bool): This tag is part of a Definition, not a normal line. + + Returns: + list: The validation issues associated with the tags. Each issue is dictionary. + + """ + validation_issues = [] + validation_issues += self.check_tag_exists_in_schema(original_tag) + if not allow_placeholders: + validation_issues += self.check_for_placeholder(original_tag, is_definition) + validation_issues += self.check_tag_requires_child(original_tag) + validation_issues += self.check_capitalization(original_tag) + return validation_issues + + # ========================================================================== + # Mostly internal functions to check individual types of errors + # =========================================================================+ + @staticmethod + def check_tag_exists_in_schema(original_tag): + """ Report invalid tag or doesn't take a value. + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + + Returns: + list: Validation issues. Each issue is a dictionary. + """ + validation_issues = [] + if original_tag.is_basic_tag() or original_tag.is_takes_value_tag(): + return validation_issues + + is_extension_tag = original_tag.has_attribute(HedKey.ExtensionAllowed) + if not is_extension_tag: + actual_error = None + if "#" in original_tag.extension: + actual_error = ValidationErrors.PLACEHOLDER_INVALID + validation_issues += ErrorHandler.format_error(ValidationErrors.TAG_EXTENSION_INVALID, tag=original_tag, + actual_error=actual_error) + else: + validation_issues += ErrorHandler.format_error(ValidationErrors.TAG_EXTENDED, tag=original_tag, + index_in_tag=len(original_tag.org_base_tag), + index_in_tag_end=None) + return validation_issues + + @staticmethod + def check_tag_requires_child(original_tag): + """ Report if tag is a leaf with 'requiredTag' attribute. + + Parameters: + original_tag (HedTag): The original tag that is used to report the error. + + Returns: + list: Validation issues. Each issue is a dictionary. + """ + validation_issues = [] + if original_tag.has_attribute(HedKey.RequireChild): + validation_issues += ErrorHandler.format_error(ValidationErrors.TAG_REQUIRES_CHILD, + tag=original_tag) + return validation_issues + + def check_capitalization(self, original_tag): + """Report warning if incorrect tag capitalization. + + Parameters: + original_tag (HedTag): The original tag used to report the warning. + + Returns: + list: Validation issues. Each issue is a dictionary. + """ + validation_issues = [] + tag_names = original_tag.org_base_tag.split("/") + for tag_name in tag_names: + correct_tag_name = tag_name.capitalize() + if tag_name != correct_tag_name and not re.search(self.CAMEL_CASE_EXPRESSION, tag_name): + validation_issues += ErrorHandler.format_error(ValidationErrors.STYLE_WARNING, + tag=original_tag) + break + return validation_issues + + # ========================================================================== + # Private utility functions + # =========================================================================+ + + @staticmethod + def check_for_placeholder(original_tag, is_definition=False): + """ Report invalid placeholder characters. + + Parameters: + original_tag (HedTag): The HedTag to be checked + is_definition (bool): If True, placeholders are allowed. + + Returns: + list: Validation issues. Each issue is a dictionary. + + Notes: + - Invalid placeholder may appear in the extension/value portion of a tag. + + """ + validation_issues = [] + if not is_definition: + starting_index = len(original_tag.org_base_tag) + 1 + for i, character in enumerate(original_tag.extension): + if character == "#": + validation_issues += ErrorHandler.format_error(ValidationErrors.INVALID_TAG_CHARACTER, + tag=original_tag, + index_in_tag=starting_index + i, + index_in_tag_end=starting_index + i + 1, + actual_error=ValidationErrors.PLACEHOLDER_INVALID) + + return validation_issues diff --git a/hed/validator/tag_validator.py b/hed/validator/tag_validator.py deleted file mode 100644 index be2c98409..000000000 --- a/hed/validator/tag_validator.py +++ /dev/null @@ -1,653 +0,0 @@ -""" -This module is used to validate the HED tags as strings. - -""" - -import re -from hed.errors.error_reporter import ErrorHandler -from hed.models.model_constants import DefTagNames -from hed.schema import HedKey -from hed.errors.error_types import ValidationErrors -from hed.validator import tag_validator_util - - -class TagValidator: - """ Validation for individual HED tags. """ - - CAMEL_CASE_EXPRESSION = r'([A-Z]+\s*[a-z-]*)+' - INVALID_STRING_CHARS = '[]{}~' - INVALID_STRING_CHARS_PLACEHOLDERS = '[]~' - OPENING_GROUP_CHARACTER = '(' - CLOSING_GROUP_CHARACTER = ')' - COMMA = ',' - - # # sign is allowed by default as it is specifically checked for separately. - DEFAULT_ALLOWED_PLACEHOLDER_CHARS = ".+-^ _#" - # Placeholder characters are checked elsewhere, but by default allowed - TAG_ALLOWED_CHARS = "-_/" - - def __init__(self, hed_schema): - """Constructor for the Tag_Validator class. - - Parameters: - hed_schema (HedSchema): A HedSchema object. - - Returns: - TagValidator: A Tag_Validator object. - - """ - self._hed_schema = hed_schema - - # Dict contains all the value portion validators for value class. e.g. "is this a number?" - self._value_unit_validators = self._register_default_value_validators() - - # ========================================================================== - # Top level validator functions - # =========================================================================+ - def run_hed_string_validators(self, hed_string_obj, allow_placeholders=False): - """Basic high level checks of the hed string - - Parameters: - hed_string_obj (HedString): A HED string. - allow_placeholders: Allow placeholder and curly brace characters - - Returns: - list: The validation issues associated with a HED string. Each issue is a dictionary. - - Notes: - - Used for basic invalid characters or bad delimiters. - - """ - validation_issues = [] - validation_issues += self.check_invalid_character_issues(hed_string_obj.get_original_hed_string(), - allow_placeholders) - validation_issues += self.check_count_tag_group_parentheses(hed_string_obj.get_original_hed_string()) - validation_issues += self.check_delimiter_issues_in_hed_string(hed_string_obj.get_original_hed_string()) - for tag in hed_string_obj.get_all_tags(): - validation_issues += self.check_tag_formatting(tag) - return validation_issues - - def run_validate_tag_characters(self, original_tag, allow_placeholders): - """ Basic character validation of tags - - Parameters: - original_tag (HedTag): A original tag. - allow_placeholders (bool): Allow value class or extensions to be placeholders rather than a specific value. - - Returns: - list: The validation issues associated with the characters. Each issue is dictionary. - - """ - return self.check_tag_invalid_chars(original_tag, allow_placeholders) - - def run_individual_tag_validators(self, original_tag, allow_placeholders=False, - is_definition=False): - """ Runs the hed_ops on the individual tags. - - Parameters: - original_tag (HedTag): A original tag. - allow_placeholders (bool): Allow value class or extensions to be placeholders rather than a specific value. - is_definition (bool): This tag is part of a Definition, not a normal line. - - Returns: - list: The validation issues associated with the tags. Each issue is dictionary. - - """ - validation_issues = [] - # validation_issues += self.check_tag_invalid_chars(original_tag, allow_placeholders) - if self._hed_schema: - validation_issues += self.check_tag_exists_in_schema(original_tag) - if original_tag.is_unit_class_tag(): - validation_issues += self.check_tag_unit_class_units_are_valid(original_tag) - elif original_tag.is_value_class_tag(): - validation_issues += self.check_tag_value_class_valid(original_tag) - elif original_tag.extension: - validation_issues += self.check_for_invalid_extension_chars(original_tag) - - if not allow_placeholders: - validation_issues += self.check_for_placeholder(original_tag, is_definition) - validation_issues += self.check_tag_requires_child(original_tag) - validation_issues += self.check_capitalization(original_tag) - return validation_issues - - def run_tag_level_validators(self, original_tag_list, is_top_level, is_group): - """ Run hed_ops at each level in a HED string. - - Parameters: - original_tag_list (list): A list containing the original HedTags. - is_top_level (bool): If True, this group is a "top level tag group". - is_group (bool): If true, group is contained by parenthesis. - - Returns: - list: The validation issues associated with each level in a HED string. - - Notes: - - This is for the top-level, all groups, and nested groups. - - This can contain definitions, Onset, etc tags. - - """ - validation_issues = [] - validation_issues += self.check_tag_level_issue(original_tag_list, is_top_level, is_group) - return validation_issues - - def run_all_tags_validators(self, tags): - """ Validate the multi-tag properties in a hed string. - - Parameters: - tags (list): A list containing the HedTags in a HED string. - - Returns: - list: The validation issues associated with the tags in a HED string. Each issue is a dictionary. - - Notes: - - Multi-tag properties include required tags. - - """ - validation_issues = [] - if self._hed_schema: - validation_issues += self.check_for_required_tags(tags) - validation_issues += self.check_multiple_unique_tags_exist(tags) - return validation_issues - - # ========================================================================== - # Mostly internal functions to check individual types of errors - # =========================================================================+ - def check_invalid_character_issues(self, hed_string, allow_placeholders): - """ Report invalid characters. - - Parameters: - hed_string (str): A hed string. - allow_placeholders: Allow placeholder and curly brace characters - - Returns: - list: Validation issues. Each issue is a dictionary. - - Notes: - - Invalid tag characters are defined by TagValidator.INVALID_STRING_CHARS or - TagValidator.INVALID_STRING_CHARS_PLACEHOLDERS - """ - validation_issues = [] - invalid_dict = TagValidator.INVALID_STRING_CHARS - if allow_placeholders: - invalid_dict = TagValidator.INVALID_STRING_CHARS_PLACEHOLDERS - for index, character in enumerate(hed_string): - if character in invalid_dict or ord(character) > 127: - validation_issues += self._report_invalid_character_error(hed_string, index) - - return validation_issues - - def check_count_tag_group_parentheses(self, hed_string): - """ Report unmatched parentheses. - - Parameters: - hed_string (str): A hed string. - - Returns: - list: A list of validation list. Each issue is a dictionary. - """ - validation_issues = [] - number_open_parentheses = hed_string.count('(') - number_closed_parentheses = hed_string.count(')') - if number_open_parentheses != number_closed_parentheses: - validation_issues += ErrorHandler.format_error(ValidationErrors.PARENTHESES_MISMATCH, - opening_parentheses_count=number_open_parentheses, - closing_parentheses_count=number_closed_parentheses) - return validation_issues - - def check_delimiter_issues_in_hed_string(self, hed_string): - """ Report missing commas or commas in value tags. - - Parameters: - hed_string (str): A hed string. - - Returns: - list: A validation issues list. Each issue is a dictionary. - """ - last_non_empty_valid_character = '' - last_non_empty_valid_index = 0 - current_tag = '' - issues = [] - - for i, current_character in enumerate(hed_string): - current_tag += current_character - if not current_character.strip(): - continue - if TagValidator._character_is_delimiter(current_character): - if current_tag.strip() == current_character: - issues += ErrorHandler.format_error(ValidationErrors.TAG_EMPTY, source_string=hed_string, - char_index=i) - current_tag = '' - continue - current_tag = '' - elif current_character == self.OPENING_GROUP_CHARACTER: - if current_tag.strip() == self.OPENING_GROUP_CHARACTER: - current_tag = '' - else: - issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag) - elif last_non_empty_valid_character == "," and current_character == self.CLOSING_GROUP_CHARACTER: - issues += ErrorHandler.format_error(ValidationErrors.TAG_EMPTY, source_string=hed_string, - char_index=i) - elif TagValidator._comma_is_missing_after_closing_parentheses(last_non_empty_valid_character, - current_character): - issues += ErrorHandler.format_error(ValidationErrors.COMMA_MISSING, tag=current_tag[:-1]) - break - last_non_empty_valid_character = current_character - last_non_empty_valid_index = i - if TagValidator._character_is_delimiter(last_non_empty_valid_character): - issues += ErrorHandler.format_error(ValidationErrors.TAG_EMPTY, - char_index=last_non_empty_valid_index, - source_string=hed_string) - return issues - - pattern_doubleslash = re.compile(r"([ \t/]{2,}|^/|/$)") - - def check_tag_formatting(self, original_tag): - """ Report repeated or erroneous slashes. - - Parameters: - original_tag (HedTag): The original tag that is used to report the error. - - Returns: - list: Validation issues. Each issue is a dictionary. - """ - validation_issues = [] - for match in self.pattern_doubleslash.finditer(original_tag.org_tag): - validation_issues += ErrorHandler.format_error(ValidationErrors.NODE_NAME_EMPTY, - tag=original_tag, - index_in_tag=match.start(), - index_in_tag_end=match.end()) - - return validation_issues - - def check_tag_invalid_chars(self, original_tag, allow_placeholders): - """ Report invalid characters in the given tag. - - Parameters: - original_tag (HedTag): The original tag that is used to report the error. - allow_placeholders (bool): Allow placeholder characters(#) if True. - - Returns: - list: Validation issues. Each issue is a dictionary. - """ - validation_issues = self._check_invalid_prefix_issues(original_tag) - allowed_chars = self.TAG_ALLOWED_CHARS - if allow_placeholders: - allowed_chars += "#" - validation_issues += self._check_invalid_chars(original_tag.org_base_tag, allowed_chars, original_tag) - return validation_issues - - def check_tag_exists_in_schema(self, original_tag): - """ Report invalid tag or doesn't take a value. - - Parameters: - original_tag (HedTag): The original tag that is used to report the error. - - Returns: - list: Validation issues. Each issue is a dictionary. - """ - validation_issues = [] - if original_tag.is_basic_tag() or original_tag.is_takes_value_tag(): - return validation_issues - - is_extension_tag = original_tag.has_attribute(HedKey.ExtensionAllowed) - if not is_extension_tag: - actual_error = None - if "#" in original_tag.extension: - actual_error = ValidationErrors.PLACEHOLDER_INVALID - validation_issues += ErrorHandler.format_error(ValidationErrors.TAG_EXTENSION_INVALID, tag=original_tag, - actual_error=actual_error) - else: - validation_issues += ErrorHandler.format_error(ValidationErrors.TAG_EXTENDED, tag=original_tag, - index_in_tag=len(original_tag.org_base_tag), - index_in_tag_end=None) - return validation_issues - - def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None): - """Returns any issues found if this is a value tag""" - validation_issues = [] - if original_tag.is_takes_value_tag() and \ - not self._validate_value_class_portion(original_tag, stripped_value): - validation_issues += ErrorHandler.format_error(ValidationErrors.VALUE_INVALID, report_as) - if error_code: - validation_issues += ErrorHandler.format_error(ValidationErrors.VALUE_INVALID, - report_as, actual_error=error_code) - return validation_issues - - def _check_units(self, original_tag, bad_units, report_as): - """Returns an issue noting this is either bad units, or missing units""" - if bad_units: - tag_unit_class_units = original_tag.get_tag_unit_class_units() - validation_issue = ErrorHandler.format_error(ValidationErrors.UNITS_INVALID, - tag=report_as, units=tag_unit_class_units) - else: - default_unit = original_tag.default_unit - validation_issue = ErrorHandler.format_error(ValidationErrors.UNITS_MISSING, - tag=report_as, default_unit=default_unit) - return validation_issue - - def check_tag_unit_class_units_are_valid(self, original_tag, report_as=None, error_code=None): - """ Report incorrect unit class or units. - - Parameters: - original_tag (HedTag): The original tag that is used to report the error. - report_as (HedTag): Report errors as coming from this tag, rather than original_tag. - error_code (str): Override error codes to this - Returns: - list: Validation issues. Each issue is a dictionary. - """ - validation_issues = [] - if original_tag.is_unit_class_tag(): - stripped_value, unit = original_tag.get_stripped_unit_value() - if not unit: - # Todo: in theory this should separately validate the number and the units, for units - # that are prefixes like $. Right now those are marked as unit invalid AND value_invalid. - bad_units = " " in original_tag.extension - report_as = report_as if report_as else original_tag - - if bad_units: - stripped_value = stripped_value.split(" ")[0] - - validation_issues += self._check_value_class(original_tag, stripped_value, report_as, error_code) - validation_issues += self._check_units(original_tag, bad_units, report_as) - - # We don't want to give this overall error twice - if error_code and not any(error_code == issue['code'] for issue in validation_issues): - new_issue = validation_issues[0].copy() - new_issue['code'] = error_code - validation_issues += [new_issue] - - return validation_issues - - def check_tag_value_class_valid(self, original_tag, report_as=None, error_code=None): - """ Report an invalid value portion. - - Parameters: - original_tag (HedTag): The original tag that is used to report the error. - report_as (HedTag): Report errors as coming from this tag, rather than original_tag. - error_code (str): Override error codes to this - Returns: - list: Validation issues. - """ - validation_issues = [] - if not self._validate_value_class_portion(original_tag, original_tag.extension): - validation_issues += ErrorHandler.format_error(ValidationErrors.VALUE_INVALID, - report_as if report_as else original_tag, - actual_error=error_code) - - return validation_issues - - def check_tag_requires_child(self, original_tag): - """ Report if tag is a leaf with 'requiredTag' attribute. - - Parameters: - original_tag (HedTag): The original tag that is used to report the error. - - Returns: - list: Validation issues. Each issue is a dictionary. - """ - validation_issues = [] - if original_tag.has_attribute(HedKey.RequireChild): - validation_issues += ErrorHandler.format_error(ValidationErrors.TAG_REQUIRES_CHILD, - tag=original_tag) - return validation_issues - - def check_for_invalid_extension_chars(self, original_tag): - """Report invalid characters in extension/value. - - Parameters: - original_tag (HedTag): The original tag that is used to report the error. - - Returns: - list: Validation issues. Each issue is a dictionary. - """ - allowed_chars = self.TAG_ALLOWED_CHARS - allowed_chars += self.DEFAULT_ALLOWED_PLACEHOLDER_CHARS - allowed_chars += " " - return self._check_invalid_chars(original_tag.extension, allowed_chars, original_tag, - starting_index=len(original_tag.org_base_tag) + 1) - - def check_capitalization(self, original_tag): - """Report warning if incorrect tag capitalization. - - Parameters: - original_tag (HedTag): The original tag used to report the warning. - - Returns: - list: Validation issues. Each issue is a dictionary. - """ - validation_issues = [] - tag_names = original_tag.org_base_tag.split("/") - for tag_name in tag_names: - correct_tag_name = tag_name.capitalize() - if tag_name != correct_tag_name and not re.search(self.CAMEL_CASE_EXPRESSION, tag_name): - validation_issues += ErrorHandler.format_error(ValidationErrors.STYLE_WARNING, - tag=original_tag) - break - return validation_issues - - def check_tag_level_issue(self, original_tag_list, is_top_level, is_group): - """ Report tags incorrectly positioned in hierarchy. - - Parameters: - original_tag_list (list): HedTags containing the original tags. - is_top_level (bool): If True, this group is a "top level tag group" - is_group (bool): If true group should be contained by parenthesis - - Returns: - list: Validation issues. Each issue is a dictionary. - - Notes: - - Top-level groups can contain definitions, Onset, etc tags. - """ - validation_issues = [] - top_level_tags = [tag for tag in original_tag_list if - tag.base_tag_has_attribute(HedKey.TopLevelTagGroup)] - tag_group_tags = [tag for tag in original_tag_list if - tag.base_tag_has_attribute(HedKey.TagGroup)] - for tag_group_tag in tag_group_tags: - if not is_group: - validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TAG_GROUP_TAG, - tag=tag_group_tag) - for top_level_tag in top_level_tags: - if not is_top_level: - actual_code = None - if top_level_tag.short_base_tag == DefTagNames.DEFINITION_ORG_KEY: - actual_code = ValidationErrors.DEFINITION_INVALID - elif top_level_tag.short_base_tag in {DefTagNames.ONSET_ORG_KEY, DefTagNames.OFFSET_ORG_KEY}: - actual_code = ValidationErrors.ONSET_OFFSET_INSET_ERROR - - if actual_code: - validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, - tag=top_level_tag, - actual_error=actual_code) - validation_issues += ErrorHandler.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, - tag=top_level_tag) - - if is_top_level and len(top_level_tags) > 1: - validation_issues += ErrorHandler.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, - tag=top_level_tags[0], - multiple_tags=top_level_tags[1:]) - - return validation_issues - - def check_for_required_tags(self, tags): - """ Report missing required tags. - - Parameters: - tags (list): HedTags containing the tags. - - Returns: - list: Validation issues. Each issue is a dictionary. - - """ - validation_issues = [] - required_prefixes = self._hed_schema.get_tags_with_attribute(HedKey.Required) - for required_prefix in required_prefixes: - if not any(tag.long_tag.lower().startswith(required_prefix.lower()) for tag in tags): - validation_issues += ErrorHandler.format_error(ValidationErrors.REQUIRED_TAG_MISSING, - tag_namespace=required_prefix) - return validation_issues - - def check_multiple_unique_tags_exist(self, tags): - """ Report if multiple identical unique tags exist - - A unique Term can only appear once in a given HedString. - Unique terms are terms with the 'unique' property in the schema. - - Parameters: - tags (list): HedTags containing the tags. - - Returns: - list: Validation issues. Each issue is a dictionary. - """ - validation_issues = [] - unique_prefixes = self._hed_schema.get_tags_with_attribute(HedKey.Unique) - for unique_prefix in unique_prefixes: - unique_tag_prefix_bool_mask = [x.long_tag.lower().startswith(unique_prefix.lower()) for x in tags] - if sum(unique_tag_prefix_bool_mask) > 1: - validation_issues += ErrorHandler.format_error(ValidationErrors.TAG_NOT_UNIQUE, - tag_namespace=unique_prefix) - return validation_issues - - # ========================================================================== - # Private utility functions - # =========================================================================+ - def _check_invalid_prefix_issues(self, original_tag): - """Check for invalid schema namespace.""" - issues = [] - schema_namespace = original_tag.schema_namespace - if schema_namespace and not schema_namespace[:-1].isalpha(): - issues += ErrorHandler.format_error(ValidationErrors.TAG_NAMESPACE_PREFIX_INVALID, - tag=original_tag, tag_namespace=schema_namespace) - return issues - - def _validate_value_class_portion(self, original_tag, portion_to_validate): - if portion_to_validate is None: - return False - - value_class_types = original_tag.value_classes - return self.validate_value_class_type(portion_to_validate, value_class_types) - - def _report_invalid_character_error(self, hed_string, index): - """ Report an invalid character. - - Parameters: - hed_string (str): The HED string that caused the error. - index (int): The index of the invalid character in the HED string. - - Returns: - list: A singleton list with a dictionary representing the error. - - """ - error_type = ValidationErrors.CHARACTER_INVALID - character = hed_string[index] - if character == "~": - error_type = ValidationErrors.TILDES_UNSUPPORTED - return ErrorHandler.format_error(error_type, char_index=index, - source_string=hed_string) - - @staticmethod - def _comma_is_missing_after_closing_parentheses(last_non_empty_character, current_character): - """ Checks if missing comma after a closing parentheses. - - Parameters: - last_non_empty_character (str): The last non-empty string in the HED string. - current_character (str): The current character in the HED string. - - Returns: - bool: True if a comma is missing after a closing parentheses. False, if otherwise. - - Notes: - - This is a helper function for the find_missing_commas_in_hed_string function. - - """ - return last_non_empty_character == TagValidator.CLOSING_GROUP_CHARACTER and \ - not (TagValidator._character_is_delimiter(current_character) - or current_character == TagValidator.CLOSING_GROUP_CHARACTER) - - @staticmethod - def _character_is_delimiter(character): - """ Checks if the character is a delimiter. - - Parameters: - character (str): A string character. - - Returns: - bool: Returns true if the character is a delimiter. False, if otherwise. - - Notes: - - A delimiter is a comma. - - """ - return character == TagValidator.COMMA - - def check_for_placeholder(self, original_tag, is_definition=False): - """ Report invalid placeholder characters. - - Parameters: - original_tag (HedTag): The HedTag to be checked - is_definition (bool): If True, placeholders are allowed. - - Returns: - list: Validation issues. Each issue is a dictionary. - - Notes: - - Invalid placeholder may appear in the extension/value portion of a tag. - - """ - validation_issues = [] - if not is_definition: - starting_index = len(original_tag.org_base_tag) + 1 - for i, character in enumerate(original_tag.extension): - if character == "#": - validation_issues += ErrorHandler.format_error(ValidationErrors.INVALID_TAG_CHARACTER, - tag=original_tag, - index_in_tag=starting_index + i, - index_in_tag_end=starting_index + i + 1, - actual_error=ValidationErrors.PLACEHOLDER_INVALID) - - return validation_issues - - def _check_invalid_chars(self, check_string, allowed_chars, source_tag, starting_index=0): - validation_issues = [] - for i, character in enumerate(check_string): - if character.isalnum(): - continue - if character in allowed_chars: - continue - # Todo: Remove this patch when clock times and invalid characters are more properly checked - if character == ":": - continue - validation_issues += ErrorHandler.format_error(ValidationErrors.INVALID_TAG_CHARACTER, - tag=source_tag, index_in_tag=starting_index + i, - index_in_tag_end=starting_index + i + 1) - return validation_issues - - @staticmethod - def _register_default_value_validators(): - validator_dict = { - tag_validator_util.DATE_TIME_VALUE_CLASS: tag_validator_util.is_date_time, - tag_validator_util.NUMERIC_VALUE_CLASS: tag_validator_util.validate_numeric_value_class, - tag_validator_util.TEXT_VALUE_CLASS: tag_validator_util.validate_text_value_class, - tag_validator_util.NAME_VALUE_CLASS: tag_validator_util.validate_text_value_class - } - - return validator_dict - - def validate_value_class_type(self, unit_or_value_portion, valid_types): - """ Report invalid unit or valid class values. - - Parameters: - unit_or_value_portion (str): The value portion to validate. - valid_types (list): The names of value class or unit class types (e.g. dateTime or dateTimeClass). - - Returns: - type_valid (bool): True if this is one of the valid_types validators. - - """ - for unit_class_type in valid_types: - valid_func = self._value_unit_validators.get(unit_class_type) - if valid_func: - if valid_func(unit_or_value_portion): - return True - return False diff --git a/hed/validator/tag_validator_util.py b/hed/validator/tag_validator_util.py deleted file mode 100644 index f59bc5066..000000000 --- a/hed/validator/tag_validator_util.py +++ /dev/null @@ -1,80 +0,0 @@ -""" Utilities to support HED validation. """ -import datetime -import re - - -DATE_TIME_VALUE_CLASS = 'dateTimeClass' -NUMERIC_VALUE_CLASS = "numericClass" -TEXT_VALUE_CLASS = "textClass" -NAME_VALUE_CLASS = "nameClass" - -DIGIT_OR_POUND_EXPRESSION = r'^(-?[\d.]+(?:e-?\d+)?|#)$' - - -def is_date_time(date_time_string): - """Check if the specified string is a valid datetime. - - Parameters: - date_time_string (str): A datetime string. - - Returns: - bool: True if the datetime string is valid. False, if otherwise. - - Notes: - - ISO 8601 datetime string. - - """ - try: - date_time_obj = datetime.datetime.fromisoformat(date_time_string) - return not date_time_obj.tzinfo - except ValueError: - return False - - -def validate_numeric_value_class(numeric_string): - """ Checks to see if valid numeric value. - - Parameters: - numeric_string (str): A string that should be only a number with no units. - - Returns: - bool: True if the numeric string is valid. False, if otherwise. - - """ - if re.search(DIGIT_OR_POUND_EXPRESSION, numeric_string): - return True - - return False - - -def validate_text_value_class(text_string): - """ Placeholder for eventual text value class validation - - Parameters: - text_string (str): Text class. - - Returns: - bool: True - - """ - return True - - -def is_clock_face_time(time_string): - """ Check if a valid HH:MM time string. - - Parameters: - time_string (str): A time string. - - Returns: - bool: True if the time string is valid. False, if otherwise. - - Notes: - - This is deprecated and has no expected use going forward. - - """ - try: - time_obj = datetime.time.fromisoformat(time_string) - return not time_obj.tzinfo and not time_obj.microsecond - except ValueError: - return False diff --git a/readthedocs.yml b/readthedocs.yml index f71981387..b20edf11f 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -8,6 +8,9 @@ build: os: "ubuntu-22.04" tools: python: "3.7" + jobs: + pre_build: + - sphinx-build -W --keep-going -q -b linkcheck -d docs/_build/doctrees docs/source/ docs/_build/linkcheck # Build documentation in the docs/ directory with Sphinx sphinx: @@ -15,7 +18,6 @@ sphinx: configuration: docs/source/conf.py fail_on_warning: false - python: install: - requirements: docs/requirements.txt diff --git a/requirements.txt b/requirements.txt index 07c3304d7..5c91d1ecb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,6 @@ openpyxl>=3.1.0 pandas>=1.3.5 portalocker>=2.7.0 semantic_version>=2.10.0 -wordcloud==1.9.2 \ No newline at end of file +wordcloud==1.9.3 +jsonschema>=4.17.3 +rdflib>=6 diff --git a/setup.cfg b/setup.cfg index fbd9ad553..c43b5f190 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,12 +25,14 @@ install_requires = et-xmlfile inflect jdcal + jsonschema numpy openpyxl pandas portalocker python-dateutil pytz + rdflib semantic-version six diff --git a/spec_tests/hed-examples b/spec_tests/hed-examples new file mode 160000 index 000000000..c9a7eb62a --- /dev/null +++ b/spec_tests/hed-examples @@ -0,0 +1 @@ +Subproject commit c9a7eb62a5a7b689106e42c1b46562ba15458d81 diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification index be850badc..9a22f1563 160000 --- a/spec_tests/hed-specification +++ b/spec_tests/hed-specification @@ -1 +1 @@ -Subproject commit be850badc6d4f3e1079df91fa3a4a4cb0feb31d9 +Subproject commit 9a22f1563501d47bf99a80f3a0f3d6c8725872a5 diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index 3e87fdbda..b67a44514 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -36,9 +36,6 @@ def run_single_test(self, test_file): test_info = json.load(fp) for info in test_info: error_code = info['error_code'] - verify_code = True - # To be deprecated once we add this to all tests - self._verify_code = verify_code if error_code in skip_tests: print(f"Skipping {error_code} test because: {skip_tests[error_code]}") continue @@ -47,7 +44,7 @@ def run_single_test(self, test_file): print(f"Skipping {name} test because: {skip_tests[name]}") continue - # if name != "attribute-invalid-in-library": + # if name != "sidecar-braces-invalid-spot": # continue description = info['description'] schema = info['schema'] @@ -79,7 +76,7 @@ def report_result(self, expected_result, issues, error_code, description, name, print(f"Passed '{test_type}' (which should fail) '{name}': {test}") print(get_printable_issue_string(issues)) self.fail_count.append(name) - elif self._verify_code: + else: if any(issue['code'] == error_code for issue in issues): return print(f"{error_code}: {description}") @@ -174,6 +171,9 @@ def _run_single_schema_test(self, info, error_code, description,name, error_hand issues = loaded_schema.check_compliance() except HedFileError as e: issues = e.issues + if not issues: + issues += [{"code": e.code, + "message": e.message}] self.report_result(result, issues, error_code, description, name, test, "schema_tests") def test_errors(self): diff --git a/spec_tests/validate_bids.py b/spec_tests/validate_bids.py new file mode 100644 index 000000000..0ee852934 --- /dev/null +++ b/spec_tests/validate_bids.py @@ -0,0 +1,35 @@ +import os +import unittest + +from hed.tools import BidsDataset +from hed.errors import get_printable_issue_string + + +class MyTestCase(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.base_dir = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'hed-examples/datasets')) + cls.fail_count = [] + + @classmethod + def tearDownClass(cls): + pass + + def test_validation(self): + base_dir = self.base_dir + for directory in os.listdir(base_dir): + dataset_path = os.path.join(base_dir, directory) + if not os.path.isdir(dataset_path): + continue + + bids_data = BidsDataset(dataset_path) + issues = bids_data.validate(check_for_warnings=False) + if issues: + self.fail_count.append((directory, issues)) + print(f"{len(self.fail_count)} tests got an unexpected result") + print("\n".join(get_printable_issue_string(issue, f"Errors in directory: {title}", skip_filename=False) for title, issue in self.fail_count)) + self.assertEqual(0, len(self.fail_count)) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/data/bids_tests/eeg_ds003645s_hed/dataset_description.json b/tests/data/bids_tests/eeg_ds003645s_hed/dataset_description.json index 91b31167a..f293dc7a6 100644 --- a/tests/data/bids_tests/eeg_ds003645s_hed/dataset_description.json +++ b/tests/data/bids_tests/eeg_ds003645s_hed/dataset_description.json @@ -1,7 +1,7 @@ { "Name": "Face processing MEEG dataset with HED annotation", - "BIDSVersion": "1.8.4", - "HEDVersion": "8.0.0", + "BIDSVersion": "1.9.0", + "HEDVersion": "8.2.0", "License": "CC0", "Authors": [ "Daniel G. Wakeman", diff --git a/tests/data/bids_tests/eeg_ds003645s_hed_inheritance/dataset_description.json b/tests/data/bids_tests/eeg_ds003645s_hed_inheritance/dataset_description.json index 91b31167a..f293dc7a6 100644 --- a/tests/data/bids_tests/eeg_ds003645s_hed_inheritance/dataset_description.json +++ b/tests/data/bids_tests/eeg_ds003645s_hed_inheritance/dataset_description.json @@ -1,7 +1,7 @@ { "Name": "Face processing MEEG dataset with HED annotation", - "BIDSVersion": "1.8.4", - "HEDVersion": "8.0.0", + "BIDSVersion": "1.9.0", + "HEDVersion": "8.2.0", "License": "CC0", "Authors": [ "Daniel G. Wakeman", diff --git a/tests/data/bids_tests/eeg_ds003645s_hed_library/dataset_description.json b/tests/data/bids_tests/eeg_ds003645s_hed_library/dataset_description.json index fbd1f5e8a..bdf62c28d 100644 --- a/tests/data/bids_tests/eeg_ds003645s_hed_library/dataset_description.json +++ b/tests/data/bids_tests/eeg_ds003645s_hed_library/dataset_description.json @@ -1,7 +1,7 @@ { "Name": "Face processing MEEG dataset with HED annotation", - "BIDSVersion": "1.8.4", - "HEDVersion": ["8.0.0", "sc:score_1.0.0", "test:testlib_1.0.2"], + "BIDSVersion": "1.9.0", + "HEDVersion": ["8.2.0", "sc:score_1.0.0", "test:testlib_1.0.2"], "License": "CC0", "Authors": [ "Daniel G. Wakeman", diff --git a/tests/data/remodel_tests/all_remodel_operations.json b/tests/data/remodel_tests/all_remodel_operations.json new file mode 100644 index 000000000..34e929f95 --- /dev/null +++ b/tests/data/remodel_tests/all_remodel_operations.json @@ -0,0 +1,278 @@ +[ + { + "operation": "remove_columns", + "description": "Remove unwanted columns prior to analysis", + "parameters": { + "column_names": [ + "value", + "sample" + ], + "ignore_missing": true + } + }, + { + "operation": "factor_column", + "description": "Create factors for the succesful_stop and unsuccesful_stop values.", + "parameters": { + "column_name": "trial_type", + "factor_values": [ + "succesful_stop", + "unsuccesful_stop" + ], + "factor_names": [ + "stopped", + "stop_failed" + ] + } + }, + { + "operation": "factor_hed_tags", + "description": "Create factors based on whether the event represented a correct or incorrect action.", + "parameters": { + "queries": [ + "correct-action", + "incorrect-action" + ], + "query_names": [ + "correct", + "incorrect" + ], + "expand_context": false + } + }, + { + "operation": "factor_hed_type", + "description": "Factor based on the sex of the images being presented.", + "parameters": { + "type_tag": "Condition-variable" + } + }, + { + "operation": "merge_consecutive", + "description": "Merge consecutive *succesful_stop* events that match the *match_columns.", + "parameters": { + "column_name": "trial_type", + "event_code": "succesful_stop", + "match_columns": [ + "stop_signal_delay", + "response_hand", + "sex" + ], + "set_durations": true, + "ignore_missing": true + } + }, + { + "operation": "remap_columns", + "description": "Map response_accuracy and response hand into a single column.", + "parameters": { + "source_columns": [ + "response_accuracy", + "response_hand" + ], + "destination_columns": [ + "response_type" + ], + "map_list": [ + [ + "correct", + "left", + "correct_left" + ], + [ + "correct", + "right", + "correct_right" + ], + [ + "incorrect", + "left", + "incorrect_left" + ], + [ + "incorrect", + "right", + "incorrect_left" + ], + [ + "n/a", + "n/a", + "n/a" + ] + ], + "ignore_missing": true + } + }, + { + "operation": "remove_columns", + "description": "Remove extra columns before the next step.", + "parameters": { + "column_names": [ + "stop_signal_delay", + "response_accuracy", + "face" + ], + "ignore_missing": true + } + }, + { + "operation": "remove_rows", + "description": "Remove rows where trial_type is either succesful_stop or unsuccesful_stop.", + "parameters": { + "column_name": "trial_type", + "remove_values": [ + "succesful_stop", + "unsuccesful_stop" + ] + } + }, + { + "operation": "rename_columns", + "description": "Rename columns to be more descriptive.", + "parameters": { + "column_mapping": { + "stop_signal_delay": "stop_delay", + "response_hand": "hand_used" + }, + "ignore_missing": true + } + }, + { + "operation": "reorder_columns", + "description": "Reorder columns.", + "parameters": { + "column_order": [ + "onset", + "duration", + "response_time", + "trial_type" + ], + "ignore_missing": true, + "keep_others": false + } + }, + { + "operation": "split_rows", + "description": "add response events to the trials.", + "parameters": { + "anchor_column": "trial_type", + "new_events": { + "response": { + "onset_source": [ + "response_time" + ], + "duration": [ + 0 + ], + "copy_columns": [ + "response_accuracy", + "response_hand", + "sex", + "trial_number" + ] + }, + "stop_signal": { + "onset_source": [ + "stop_signal_delay" + ], + "duration": [ + 0.5 + ], + "copy_columns": [ + "trial_number" + ] + } + }, + "remove_parent_row": false + } + }, + { + "operation": "summarize_column_names", + "description": "Summarize column names.", + "parameters": { + "summary_name": "AOMIC_column_names", + "summary_filename": "AOMIC_column_names" + } + }, + { + "operation": "summarize_column_values", + "description": "Summarize the column values in an excerpt.", + "parameters": { + "summary_name": "AOMIC_column_values", + "summary_filename": "AOMIC_column_values", + "skip_columns": [ + "onset", + "duration" + ], + "value_columns": [ + "response_time", + "stop_signal_delay" + ] + } + }, + { + "operation": "summarize_definitions", + "description": "Summarize the definitions used in this dataset.", + "parameters": { + "summary_name": "HED_column_definition_summary", + "summary_filename": "HED_column_definition_summary" + } + }, + { + "operation": "summarize_hed_tags", + "description": "Summarize the HED tags in the dataset.", + "parameters": { + "summary_name": "summarize_hed_tags", + "summary_filename": "summarize_hed_tags", + "tags": { + "Sensory events": [ + "Sensory-event", + "Sensory-presentation", + "Task-stimulus-role", + "Experimental-stimulus" + ], + "Agent actions": [ + "Agent-action", + "Agent", + "Action", + "Agent-task-role", + "Task-action-type", + "Participant-response" + ], + "Objects": [ + "Item" + ] + } + } + }, + { + "operation": "summarize_hed_type", + "description": "Summarize column names.", + "parameters": { + "summary_name": "AOMIC_condition_variables", + "summary_filename": "AOMIC_condition_variables", + "type_tag": "condition-variable" + } + }, + { + "operation": "summarize_hed_validation", + "description": "Summarize validation errors in the sample dataset.", + "parameters": { + "summary_name": "AOMIC_sample_validation", + "summary_filename": "AOMIC_sample_validation", + "check_for_warnings": true + } + }, + { + "operation": "summarize_sidecar_from_events", + "description": "Generate a sidecar from the excerpted events file.", + "parameters": { + "summary_name": "AOMIC_generate_sidecar", + "summary_filename": "AOMIC_generate_sidecar", + "value_columns": [ + "response_time", + "stop_signal_delay" + ] + } + } +] \ No newline at end of file diff --git a/tests/data/schema_tests/HED8.0.0.mediawiki b/tests/data/schema_tests/HED8.2.0.mediawiki similarity index 73% rename from tests/data/schema_tests/HED8.0.0.mediawiki rename to tests/data/schema_tests/HED8.2.0.mediawiki index 60149ac4e..26d775d08 100644 --- a/tests/data/schema_tests/HED8.0.0.mediawiki +++ b/tests/data/schema_tests/HED8.2.0.mediawiki @@ -1,12 +1,14 @@ -HED version="8.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://github.com/hed-standard/hed-specification/raw/master/hedxml/HED8.0.0.xsd" +HED version="8.2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://github.com/hed-standard/hed-specification/raw/master/hedxml/HED8.0.0.xsd" '''Prologue''' -This schema is the first official release that includes an xsd and requires unit class, unit modifier, value class, schema attribute and property sections. +The HED standard schema is a hierarchically-organized vocabulary for annotating events and experimental structure. HED annotations consist of comma-separated tags drawn from this vocabulary. This vocabulary can be augmented by terms drawn from specialized library schema. + +Each term in this vocabulary has a human-readable description and may include additional attributes that give additional properties or that specify how tools should treat the tag during analysis. The meaning of these attributes is described in the Additional schema properties section. !# start schema '''Event''' {suggestedTag=Task-property}[Something that happens at a given time and (typically) place. Elements of this tag subtree designate the general category in which an event falls.] -* Sensory-event {suggestedTag=Task-event-role, suggestedTag=Attribute/Sensory}[Something perceivable by the participant. An event meant to be an experimental stimulus should include the tag Task-property/Task-event-role/Experimental-stimulus.] +* Sensory-event {suggestedTag=Task-event-role, suggestedTag=Sensory-presentation}[Something perceivable by the participant. An event meant to be an experimental stimulus should include the tag Task-property/Task-event-role/Experimental-stimulus.] * Agent-action {suggestedTag=Task-event-role, suggestedTag=Agent}[Any action engaged in by an agent (see the Agent subtree for agent categories). A participant response to an experiment stimulus should include the tag Agent-property/Agent-task-role/Experiment-participant.] * Data-feature {suggestedTag=Data-property}[An event marking the occurrence of a data feature such as an interictal spike or alpha burst that is often added post hoc to the data record.] * Experiment-control [An event pertaining to the physical control of the experiment during its operation.] @@ -180,6 +182,7 @@ This schema is the first official release that includes an xsd and requires unit '''Item''' {extensionAllowed}[An independently existing thing (living or nonliving).] * Biological-item [An entity that is biological, that is related to living organisms.] ** Anatomical-item [A biological structure, system, fluid or other substance excluding single molecular entities.] +*** Body [The biological structure representing an organism.] *** Body-part [Any part of an organism.] **** Head [The upper part of the human body, or the front or upper part of the body of an animal, typically separated from the rest of the body by a neck, and containing the brain, mouth, and sense organs.] ***** Hair [The filamentous outgrowth of the epidermis.] @@ -210,7 +213,7 @@ This schema is the first official release that includes an xsd and requires unit ***** Torso-back [The rear surface of the human body from the shoulders to the hips.] ***** Buttocks [The round fleshy parts that form the lower rear area of a human trunk.] ***** Torso-chest [The anterior side of the thorax from the neck to the abdomen.] -***** Gentalia [The external organs of reproduction.] +***** Gentalia {deprecatedFrom=8.1.0}[The external organs of reproduction.] ***** Hip [The lateral prominence of the pelvis from the waist to the thigh.] ***** Waist [The abdominal circumference at the navel.] **** Upper-extremity [Refers to the whole superior limb (shoulder, arm, elbow, wrist, hand).] @@ -232,7 +235,7 @@ This schema is the first official release that includes an xsd and requires unit *** Animal [A living organism that has membranous cell walls, requires oxygen and organic foods, and is capable of voluntary movement.] *** Human [The bipedal primate mammal Homo sapiens.] *** Plant [Any living organism that typically synthesizes its food from inorganic substances and possesses cellulose cell walls.] -* Language-item {suggestedTag=Attribute/Sensory}[An entity related to a systematic means of communicating by the use of sounds, symbols, or gestures.] +* Language-item {suggestedTag=Sensory-presentation}[An entity related to a systematic means of communicating by the use of sounds, symbols, or gestures.] ** Character [A mark or symbol used in writing.] ** Clause [A unit of grammatical organization next below the sentence in rank, usually consisting of a subject and predicate.] ** Glyph [A hieroglyphic character, symbol, or pictograph.] @@ -244,12 +247,13 @@ This schema is the first official release that includes an xsd and requires unit ** Syllable [A unit of spoken language larger than a phoneme.] ** Textblock [A block of text.] ** Word [A word is the smallest free form (an item that may be expressed in isolation with semantic or pragmatic content) in a language.] -* Object {suggestedTag=Attribute/Sensory}[Something perceptible by one or more of the senses, especially by vision or touch. A material thing.] +* Object {suggestedTag=Sensory-presentation}[Something perceptible by one or more of the senses, especially by vision or touch. A material thing.] ** Geometric-object [An object or a representation that has structure and topology in space.] *** Pattern [An arrangement of objects, facts, behaviors, or other things which have scientific, mathematical, geometric, statistical, or other meaning.] **** Dots [A small round mark or spot.] **** LED-pattern [A pattern created by lighting selected members of a fixed light emitting diode array.] *** 2D-shape [A planar, two-dimensional shape.] +**** Arrow [A shape with a pointed end indicating direction.]. **** Clockface [The dial face of a clock. A location identifier based on clockface numbering or anatomic subregion.] **** Cross [A figure or mark formed by two intersecting lines crossing at their midpoints.] **** Dash [A horizontal stroke in writing or printing to mark a pause or break in sense or to represent omitted letters or words.] @@ -330,6 +334,7 @@ This schema is the first official release that includes an xsd and requires unit **** Note [A brief written record.] **** Book [A volume made up of pages fastened along one edge and enclosed between protective covers.] **** Notebook [A book for notes or memoranda.] +**** Questionnaire [A document consisting of questions and possibly responses, depending on whether it has been filled out.] *** Furnishing [Furniture, fittings, and other decorative accessories, such as curtains and carpets, for a house or room.] *** Manufactured-material [Substances created or extracted from raw materials.] **** Ceramic [A hard, brittle, heat-resistant and corrosion-resistant material made by shaping and then firing a nonmetallic mineral, such as clay, at a high temperature.] @@ -389,7 +394,7 @@ This schema is the first official release that includes an xsd and requires unit *** Vocalized-sound [Musical sound produced by vocal cords in a biological agent.] ** Named-animal-sound [A sound recognizable as being associated with particular animals.] *** Barking [Sharp explosive cries like sounds made by certain animals, especially a dog, fox, or seal.] -*** Bleating [Wavering cries like sounds made by a sheep, goat, or calf. ] +*** Bleating [Wavering cries like sounds made by a sheep, goat, or calf.] *** Crowing [Loud shrill sounds characteristic of roosters.] *** Chirping [Short, sharp, high-pitched noises like sounds made by small birds or an insects.] *** Growling [Low guttural sounds like those that made in the throat by a hostile dog or other animal.] @@ -416,6 +421,7 @@ This schema is the first official release that includes an xsd and requires unit **** Anesthetized [Having lost sensation to pain or having senses dulled due to the effects of an anesthetic.] **** Asleep [Having entered a periodic, readily reversible state of reduced awareness and metabolic activity, usually accompanied by physical relaxation and brain activity.] **** Attentive [Concentrating and focusing mental energy on the task or surroundings.] +**** Distracted [Lacking in concentration because of being preoccupied.] **** Awake [In a non sleeping state.] **** Brain-dead [Characterized by the irreversible absence of cortical and brain stem functioning.] **** Comatose [In a state of profound unconsciousness associated with markedly depressed cerebral activity.] @@ -461,7 +467,7 @@ This schema is the first official release that includes an xsd and requires unit **** On-treadmill [Ambulation on an exercise apparatus with an endless moving belt to support moving in place.] **** Prone [Positioned in a recumbent body position whereby the person lies on its stomach and faces downward.] **** Sitting [In a seated position.] -**** Standing [Assuming or maintaining an erect upright position. ] +**** Standing [Assuming or maintaining an erect upright position.] **** Seated-with-chin-rest [Using a device that supports the chin and head.] ** Agent-task-role [The function or part that is ascribed to an agent in performing the task.] *** Experiment-actor [An agent who plays a predetermined role to create the experiment scenario.] @@ -470,7 +476,7 @@ This schema is the first official release that includes an xsd and requires unit *** Experimenter [Person who is the owner of the experiment and has its responsibility.] ** Agent-trait [A genetically, environmentally, or socially determined characteristic of an agent.] *** Age [Length of time elapsed time since birth of the agent.] -**** # {takesValue} +**** # {takesValue, valueClass=numericClass} *** Agent-experience-level [Amount of skill or knowledge that the agent has as pertains to the task.] **** Expert-level {relatedTag=Intermediate-experience-level, relatedTag=Novice-level}[Having comprehensive and authoritative knowledge of or skill in a particular area related to the task.] **** Intermediate-experience-level {relatedTag=Expert-level, relatedTag=Novice-level}[Having a moderate amount of knowledge or skill related to the task.] @@ -480,31 +486,35 @@ This schema is the first official release that includes an xsd and requires unit **** Female [Biological sex of an individual with female sexual organs such ova.] **** Male [Biological sex of an individual with male sexual organs producing sperm.] **** Intersex [Having genitalia and/or secondary sexual characteristics of indeterminate sex.] +*** Ethnicity [Belong to a social group that has a common national or cultural tradition. Use with Label to avoid extension.] *** Handedness [Individual preference for use of a hand, known as the dominant hand.] **** Left-handed [Preference for using the left hand or foot for tasks requiring the use of a single hand or foot.] **** Right-handed [Preference for using the right hand or foot for tasks requiring the use of a single hand or foot.] **** Ambidextrous [Having no overall dominance in the use of right or left hand or foot in the performance of tasks that require one hand or foot.] +*** Race [Belonging to a group sharing physical or social qualities as defined within a specified society. Use with Label to avoid extension.] * Data-property {extensionAllowed}[Something that pertains to data or information.] ** Data-marker [An indicator placed to mark something.] +*** Data-break-marker [An indicator place to indicate a gap in the data.] *** Temporal-marker[An indicator placed at a particular time in the data.] -**** Onset {topLevelTagGroup}[Labels the start or beginning of something, usually an event.] -**** Offset {topLevelTagGroup}[Labels the time at which something stops.] +**** Inset {topLevelTagGroup,reserved,relatedTag=Onset,relatedTag=Offset}[Marks an intermediate point in an ongoing event of temporal extent.] +**** Onset {topLevelTagGroup,reserved,relatedTag=Inset,relatedTag=Offset}[Marks the start of an ongoing event of temporal extent.] +**** Offset {topLevelTagGroup,reserved,relatedTag=Onset, relatedTag=Inset}[Marks the end of an event of temporal extent.] **** Pause [Indicates the temporary interruption of the operation a process and subsequently wait for a signal to continue.] **** Time-out [A cancellation or cessation that automatically occurs when a predefined interval of time has passed without a certain event occurring.] **** Time-sync [A synchronization signal whose purpose to help synchronize different signals or processes. Often used to indicate a marker inserted into the recorded data to allow post hoc synchronization of concurrently recorded data streams.] ** Data-resolution [Smallest change in a quality being measured by an sensor that causes a perceptible change.] *** Printer-resolution [Resolution of a printer, usually expressed as the number of dots-per-inch for a printer.] -**** # {takesValue} +**** # {takesValue, valueClass=numericClass} *** Screen-resolution [Resolution of a screen, usually expressed as the of pixels in a dimension for a digital display device.] -**** # {takesValue} +**** # {takesValue, valueClass=numericClass} *** Sensory-resolution [Resolution of measurements by a sensing device.] -**** # {takesValue} +**** # {takesValue, valueClass=numericClass} *** Spatial-resolution [Linear spacing of a spatial measurement.] -**** # {takesValue} +**** # {takesValue, valueClass=numericClass} *** Spectral-resolution [Measures the ability of a sensor to resolve features in the electromagnetic spectrum.] -**** # {takesValue} +**** # {takesValue, valueClass=numericClass} *** Temporal-resolution [Measures the ability of a sensor to resolve features in time.] -**** # {takesValue} +**** # {takesValue, valueClass=numericClass} ** Data-source-type [The type of place, person, or thing from which the data comes or can be obtained.] *** Computed-feature[A feature computed from the data by a tool. This tag should be grouped with a label of the form Toolname_propertyName.] *** Computed-prediction [A computed extrapolation of known data.] @@ -515,16 +525,16 @@ This schema is the first official release that includes an xsd and requires unit *** Categorical-value [Indicates that something can take on a limited and usually fixed number of possible values.] **** Categorical-class-value [Categorical values that fall into discrete classes such as true or false. The grouping is absolute in the sense that it is the same for all participants.] ***** All {relatedTag=Some, relatedTag=None}[To a complete degree or to the full or entire extent.] -***** Correct {relatedTag=Incorrect}[Free from error. Especially conforming to fact or truth.] +***** Correct {relatedTag=Wrong}[Free from error. Especially conforming to fact or truth.] ***** Explicit {relatedTag=Implicit}[Stated clearly and in detail, leaving no room for confusion or doubt.] ***** False {relatedTag=True}[Not in accordance with facts, reality or definitive criteria.] ***** Implicit {relatedTag=Explicit}[Implied though not plainly expressed.] -***** Invalid {relatedTag=Valid}[Not true because based on erroneous information or unsound reasoning or not conforming to the correct format or specifications.] +***** Invalid {relatedTag=Valid}[Not allowed or not conforming to the correct format or specifications.] ***** None {relatedTag=All, relatedTag=Some}[No person or thing, nobody, not any.] ***** Some {relatedTag=All, relatedTag=None}[At least a small amount or number of, but not a large amount of, or often.] ***** True {relatedTag=False}[Conforming to facts, reality or definitive criteria.] ***** Valid {relatedTag=Invalid}[Allowable, usable, or acceptable.] -***** Wrong {relatedTag=Correct}[Not accurate, correct, or appropriate.] +***** Wrong {relatedTag=Correct}[Inaccurate or not correct.] **** Categorical-judgment-value [Categorical values that are based on the judgment or perception of the participant such familiar and famous.] ***** Abnormal {relatedTag=Normal}[Deviating in any way from the state, position, structure, condition, behavior, or rule which is considered a norm.] ***** Asymmetrical {relatedTag=Symmetrical}[Lacking symmetry or having parts that fail to correspond to one another in shape, size, or arrangement.] @@ -534,7 +544,7 @@ This schema is the first official release that includes an xsd and requires unit ***** Constrained {relatedTag=Unconstrained}[Keeping something within particular limits or bounds.] ***** Disordered {relatedTag=Ordered}[Not neatly arranged. Confused and untidy. A structural quality in which the parts of an object are non-rigid.] ***** Familiar {relatedTag=Unfamiliar, relatedTag=Famous}[Recognized, familiar, or within the scope of knowledge.] -***** Famous {relatedTag=Familiar,relatedTag=Unfamiliar}[A person who has a high degree of recognition by the general population for his or her success or accomplishments. A famous person.] +***** Famous {relatedTag=Familiar, relatedTag=Unfamiliar}[A person who has a high degree of recognition by the general population for his or her success or accomplishments. A famous person.] ***** Inaudible {relatedTag=Audible}[A sound below the threshold of perception of the participant.] ***** Incongruent {relatedTag=Congruent}[Not in agreement or harmony.] ***** Involuntary {relatedTag=Voluntary}[An action that is not made by choice. In the body, involuntary actions (such as blushing) occur automatically, and cannot be controlled by choice.] @@ -548,12 +558,13 @@ This schema is the first official release that includes an xsd and requires unit ***** Unmasked {relatedTag=Masked}[Information is revealed.] ***** Voluntary {relatedTag=Involuntary}[Using free will or design; not forced or compelled; controlled by individual volition.] **** Categorical-level-value [Categorical values based on dividing a continuous variable into levels such as high and low.] -***** Cold {relatedTag=Hot}[Characterized by an absence of heat.] +***** Cold {relatedTag=Hot}[Having an absence of heat.] ***** Deep {relatedTag=Shallow}[Extending relatively far inward or downward.] ***** High {relatedTag=Low, relatedTag=Medium}[Having a greater than normal degree, intensity, or amount.] -***** Hot {relatedTag=Cold}[Characterized by an excess of heat.] +***** Hot {relatedTag=Cold}[Having an excess of heat.] +***** Large {relatedTag=Small}[Having a great extent such as in physical dimensions, period of time, amplitude or frequency.] ***** Liminal {relatedTag=Subliminal, relatedTag=Supraliminal}[Situated at a sensory threshold that is barely perceptible or capable of eliciting a response.] -***** Loud {relatedTag=Quiet}[Characterizing a perceived high intensity of sound.] +***** Loud {relatedTag=Quiet}[Having a perceived high intensity of sound.] ***** Low {relatedTag=High}[Less than normal in degree, intensity or amount.] ***** Medium {relatedTag=Low, relatedTag=High}[Mid-way between small and large in number, quantity, magnitude or extent.] ***** Negative {relatedTag=Positive}[Involving disadvantage or harm.] @@ -561,6 +572,7 @@ This schema is the first official release that includes an xsd and requires unit ***** Quiet {relatedTag=Loud}[Characterizing a perceived low intensity of sound.] ***** Rough {relatedTag=Smooth}[Having a surface with perceptible bumps, ridges, or irregularities.] ***** Shallow {relatedTag=Deep}[Having a depth which is relatively low.] +***** Small {relatedTag=Large}[Having a small extent such as in physical dimensions, period of time, amplitude or frequency.] ***** Smooth {relatedTag=Rough}[Having a surface free from bumps, ridges, or irregularities.] ***** Subliminal {relatedTag=Liminal, relatedTag=Supraliminal}[Situated below a sensory threshold that is imperceptible or not capable of eliciting a response.] ***** Supraliminal {relatedTag=Liminal, relatedTag=Subliminal}[Situated above a sensory threshold that is perceptible or capable of eliciting a response.] @@ -571,20 +583,24 @@ This schema is the first official release that includes an xsd and requires unit ***** Downward {relatedTag=Leftward,relatedTag=Rightward,relatedTag=Upward}[Moving or leading toward a lower place or level.] ***** Forward {relatedTag=Backward}[At or near or directed toward the front.] ***** Horizontally-oriented {relatedTag=Vertically-oriented}[Oriented parallel to or in the plane of the horizon.] -***** Leftward {relatedTag=Downward,relatedTag=Rightward,relatedTag=Upward}[Going toward or facing the left.] +***** Leftward {relatedTag=Downward, relatedTag=Rightward,relatedTag=Upward}[Going toward or facing the left.] ***** Oblique {relatedTag=Rotated}[Slanting or inclined in direction, course, or position that is neither parallel nor perpendicular nor right-angular.] -***** Rightward {relatedTag=Downward,relatedTag=Leftward,relatedTag=Upward}[Going toward or situated on the right.] +***** Rightward {relatedTag=Downward, relatedTag=Leftward,relatedTag=Upward}[Going toward or situated on the right.] ***** Rotated [Positioned offset around an axis or center.] ***** Upward {relatedTag=Downward,relatedTag=Leftward,relatedTag=Rightward}[Moving, pointing, or leading to a higher place, point, or level.] ***** Vertically-oriented {relatedTag=Horizontally-oriented}[Oriented perpendicular to the plane of the horizon.] *** Physical-value [The value of some physical property of something.] **** Weight [The relative mass or the quantity of matter contained by something.] ***** # {takesValue, valueClass=numericClass, unitClass=weightUnits} +**** Temperature [A measure of hot or cold based on the average kinetic energy of the atoms or molecules in the system.] +***** # {takesValue, valueClass=numericClass, unitClass=temperatureUnits} *** Quantitative-value [Something capable of being estimated or expressed with numeric values.] -**** Fraction [A numerical value betwee 0 and 1.] +**** Fraction [A numerical value between 0 and 1.] ***** # {takesValue, valueClass=numericClass} **** Item-count [The integer count of something which is usually grouped with the entity it is counting. (Item-count/3, A) indicates that 3 of A have occurred up to this point.] -***** # {takesValue, valueClass=numericClass} +***** # {takesValue, valueClass=numericClass} +**** Item-index [The index of an item in a collection, sequence or other structure. (A (Item-index/3, B)) means that A is item number 3 in B.] +***** # {takesValue, valueClass=numericClass} **** Item-interval [An integer indicating how many items or entities have passed since the last one of these. An item interval of 0 indicates the current item.] ***** # {takesValue, valueClass=numericClass} **** Percentage [A fraction or ratio with 100 understood as the denominator.] @@ -629,8 +645,8 @@ This schema is the first official release that includes an xsd and requires unit ***** Temporal-rate [The number of items per unit of time.] ****** # {takesValue, valueClass=numericClass, unitClass=frequencyUnits} **** Spatial-value [Value of an item involving space.] -***** Angle{unitClass=angleUnits}[The amount of inclination of one line to another or the plane of one object to another.] -****** # {takesValue, valueClass=numericClass} +***** Angle[The amount of inclination of one line to another or the plane of one object to another.] +****** # {takesValue, unitClass=angleUnits, valueClass=numericClass} ***** Distance [A measure of the space separating two objects or points.] ****** # {takesValue, valueClass=numericClass, unitClass=physicalLengthUnits} ***** Position [A reference to the alignment of an object, a particular situation or view of a situation, or the location of an object. Coordinates with respect a specified frame of reference or the default Screen-frame if no frame is given.] @@ -654,9 +670,9 @@ This schema is the first official release that includes an xsd and requires unit ****** Volume [The amount of three dimensional space occupied by an object or the capacity of a space or container.] ******* # {takesValue, valueClass=numericClass, unitClass=volumeUnits} **** Temporal-value [A characteristic of or relating to time or limited by time.] -***** Delay [Time during which some action is awaited.] +***** Delay {topLevelTagGroup, reserved, relatedTag=Duration}[The time at which an event start time is delayed from the current onset time. This tag defines the start time of an event of temporal extent and may be used with the Duration tag.] ****** # {takesValue, valueClass=numericClass, unitClass=timeUnits} -***** Duration [The period of time during which something occurs or continues.] +***** Duration {topLevelTagGroup, reserved, relatedTag=Delay}[The period of time during which an event occurs. This tag defines the end time of an event of temporal extent and may be used with the Delay tag.] ****** # {takesValue, valueClass=numericClass, unitClass=timeUnits} ***** Time-interval [The period of time separating two instances, events, or occurrences.] ****** # {takesValue, valueClass=numericClass, unitClass=timeUnits} @@ -727,30 +743,30 @@ This schema is the first official release that includes an xsd and requires unit **** # {takesValue}[Usually is a semantic version.] ** Parameter [Something user-defined for this experiment.] *** Parameter-label [The name of the parameter.] -**** # {takesValue, valueClass=labelClass} +**** # {takesValue, valueClass=nameClass} *** Parameter-value [The value of the parameter.] **** # {takesValue, valueClass=textClass} * Organizational-property [Relating to an organization or the action of organizing something.] ** Collection [A tag designating a grouping of items such as in a set or list.] *** # {takesValue, valueClass=nameClass}[Name of the collection.] -** Condition-variable [An aspect of the experiment or task that is to be varied during the experiment. Task-conditions are sometimes called independent type_variables or contrasts.] +** Condition-variable [An aspect of the experiment or task that is to be varied during the experiment. Task-conditions are sometimes called independent variables or contrasts.] *** # {takesValue, valueClass=nameClass}[Name of the condition variable.] ** Control-variable [An aspect of the experiment that is fixed throughout the study and usually is explicitly controlled.] *** # {takesValue, valueClass=nameClass}[Name of the control variable.] -** Def {requireChild} [A HED-specific utility tag used with a defined name to represent the tags associated with that definition.] +** Def {requireChild,reserved} [A HED-specific utility tag used with a defined name to represent the tags associated with that definition.] *** # {takesValue, valueClass=nameClass}[Name of the definition.] -** Def-expand {requireChild, tagGroup} [A HED specific utility tag that is grouped with an expanded definition. The child value of the Def-expand is the name of the expanded definition.] +** Def-expand {requireChild,reserved,tagGroup} [A HED specific utility tag that is grouped with an expanded definition. The child value of the Def-expand is the name of the expanded definition.] *** # {takesValue, valueClass=nameClass} -** Definition {requireChild, topLevelTagGroup}[A HED-specific utility tag whose child value is the name of the concept and the tag group associated with the tag is an English language explanation of a concept.] +** Definition {requireChild,reserved,topLevelTagGroup}[A HED-specific utility tag whose child value is the name of the concept and the tag group associated with the tag is an English language explanation of a concept.] *** # {takesValue, valueClass=nameClass}[Name of the definition.] -** Event-context {topLevelTagGroup, unique}[A special HED tag inserted as part of a top-level tag group to contain information about the interrelated conditions under which the event occurs. The event context includes information about other events that are ongoing when this event happens.] +** Event-context {reserved,topLevelTagGroup,unique}[A special HED tag inserted as part of a top-level tag group to contain information about the interrelated conditions under which the event occurs. The event context includes information about other events that are ongoing when this event happens.] ** Event-stream [A special HED tag indicating that this event is a member of an ordered succession of events.] *** # {takesValue, valueClass=nameClass}[Name of the event stream.] ** Experimental-intertrial [A tag used to indicate a part of the experiment between trials usually where nothing is happening.] *** # {takesValue, valueClass=nameClass}[Optional label for the intertrial block.] ** Experimental-trial [Designates a run or execution of an activity, for example, one execution of a script. A tag used to indicate a particular organizational part in the experimental design often containing a stimulus-response pair or stimulus-response-feedback triad.] *** # {takesValue, valueClass=nameClass}[Optional label for the trial (often a numerical string).] -** Indicator-variable [An aspect of the experiment or task that is measured as task conditions are varied during the experiment. Experiment indicators are sometimes called dependent type_variables.] +** Indicator-variable [An aspect of the experiment or task that is measured as task conditions are varied during the experiment. Experiment indicators are sometimes called dependent variables.] *** # {takesValue, valueClass=nameClass}[Name of the indicator variable.] ** Recording [A tag designating the data recording. Recording tags are usually have temporal scope which is the entire recording.] *** # {takesValue, valueClass=nameClass}[Optional label for the recording.] @@ -762,7 +778,7 @@ This schema is the first official release that includes an xsd and requires unit ** Sensory-attribute [A sensory characteristic associated with another entity.] *** Auditory-attribute [Pertaining to the sense of hearing.] **** Loudness [Perceived intensity of a sound.] -***** # {takesValue, valueClass=numericClass} +***** # {takesValue, valueClass=numericClass, valueClass=nameClass} **** Pitch [A perceptual property that allows the user to order sounds on a frequency scale.] ***** # {takesValue, valueClass=numericClass, unitClass=frequencyUnits} **** Sound-envelope [Description of how a sound changes over time.] @@ -770,12 +786,14 @@ This schema is the first official release that includes an xsd and requires unit ****** # {takesValue, valueClass=numericClass, unitClass=timeUnits} ***** Sound-envelope-decay [The time taken for the subsequent run down from the attack level to the designated sustain level.] ****** # {takesValue, valueClass=numericClass, unitClass=timeUnits} -***** Sound-envelope-release [The time taken for the level to decay from the sustain level to zero after the key is released] +***** Sound-envelope-release [The time taken for the level to decay from the sustain level to zero after the key is released.] ****** # {takesValue, valueClass=numericClass, unitClass=timeUnits} ***** Sound-envelope-sustain [The time taken for the main sequence of the sound duration, until the key is released.] ****** # {takesValue, valueClass=numericClass, unitClass=timeUnits} **** Timbre [The perceived sound quality of a singing voice or musical instrument.] -***** # {takesValue, valueClass=labelClass} +***** # {takesValue, valueClass=nameClass} +**** Sound-volume [The sound pressure level (SPL) usually the ratio to a reference signal estimated as the lower bound of hearing.] +***** # {takesValue, valueClass=numericClass, unitClass=intensityUnits} *** Gustatory-attribute [Pertaining to the sense of taste.] **** Bitter [Having a sharp, pungent taste.] **** Salty [Tasting of or like salt.] @@ -794,178 +812,178 @@ This schema is the first official release that includes an xsd and requires unit *** Vestibular-attribute [Pertaining to the sense of balance or body position.] *** Visual-attribute [Pertaining to the sense of sight.] **** Color [The appearance of objects (or light sources) described in terms of perception of their hue and lightness (or brightness) and saturation.] -***** CSS-color [One of 140 colors supported by all browsers. For more details such as the color RGB or HEX values, check: https://www.w3schools.com/colors/colors_groups.asp] -****** Blue-color [CSS color group] -******* CadetBlue [CSS-color 0x5F9EA0] -******* SteelBlue [CSS-color 0x4682B4] -******* LightSteelBlue [CSS-color 0xB0C4DE] -******* LightBlue [CSS-color 0xADD8E6] -******* PowderBlue [CSS-color 0xB0E0E6] -******* LightSkyBlue [CSS-color 0x87CEFA] -******* SkyBlue [CSS-color 0x87CEEB] -******* CornflowerBlue [CSS-color 0x6495ED] -******* DeepSkyBlue [CSS-color 0x00BFFF] -******* DodgerBlue [CSS-color 0x1E90FF] -******* RoyalBlue [CSS-color 0x4169E1] -******* Blue [CSS-color 0x0000FF] -******* MediumBlue [CSS-color 0x0000CD] -******* DarkBlue [CSS-color 0x00008B] -******* Navy [CSS-color 0x000080] -******* MidnightBlue [CSS-color 0x191970] -****** Brown-color [CSS color group] -******* Cornsilk [CSS-color 0xFFF8DC] -******* BlanchedAlmond [CSS-color 0xFFEBCD] -******* Bisque [CSS-color 0xFFE4C4] -******* NavajoWhite [CSS-color 0xFFDEAD] -******* Wheat [CSS-color 0xF5DEB3] -******* BurlyWood [CSS-color 0xDEB887] -******* Tan [CSS-color 0xD2B48C] -******* RosyBrown [CSS-color 0xBC8F8F] -******* SandyBrown [CSS-color 0xF4A460] -******* GoldenRod [CSS-color 0xDAA520] -******* DarkGoldenRod [CSS-color 0xB8860B] -******* Peru [CSS-color 0xCD853F] -******* Chocolate [CSS-color 0xD2691E] -******* Olive [CSS-color 0x808000] -******* SaddleBrown [CSS-color 0x8B4513] -******* Sienna [CSS-color 0xA0522D] -******* Brown [CSS-color 0xA52A2A] -******* Maroon [CSS-color 0x800000] -****** Cyan-color [CSS color group] -******* Aqua [CSS-color 0x00FFFF] -******* Cyan [CSS-color 0x00FFFF] -******* LightCyan [CSS-color 0xE0FFFF] -******* PaleTurquoise [CSS-color 0xAFEEEE] -******* Aquamarine [CSS-color 0x7FFFD4] -******* Turquoise [CSS-color 0x40E0D0] -******* MediumTurquoise [CSS-color 0x48D1CC] -******* DarkTurquoise [CSS-color 0x00CED1] -****** Green-color [CSS color group] -******* GreenYellow [CSS-color 0xADFF2F] -******* Chartreuse [CSS-color 0x7FFF00] -******* LawnGreen [CSS-color 0x7CFC00] -******* Lime [CSS-color 0x00FF00] -******* LimeGreen [CSS-color 0x32CD32] -******* PaleGreen [CSS-color 0x98FB98] -******* LightGreen [CSS-color 0x90EE90] -******* MediumSpringGreen [CSS-color 0x00FA9A] -******* SpringGreen [CSS-color 0x00FF7F] -******* MediumSeaGreen [CSS-color 0x3CB371] -******* SeaGreen [CSS-color 0x2E8B57] -******* ForestGreen [CSS-color 0x228B22] -******* Green [CSS-color 0x008000] -******* DarkGreen [CSS-color 0x006400] -******* YellowGreen [CSS-color 0x9ACD32] -******* OliveDrab [CSS-color 0x6B8E23] -******* DarkOliveGreen [CSS-color 0x556B2F] -******* MediumAquaMarine [CSS-color 0x66CDAA] -******* DarkSeaGreen [CSS-color 0x8FBC8F] -******* LightSeaGreen [CSS-color 0x20B2AA] -******* DarkCyan [CSS-color 0x008B8B] -******* Teal [CSS-color 0x008080] -****** Gray-color [CSS color group] -******* Gainsboro [CSS-color 0xDCDCDC] -******* LightGray [CSS-color 0xD3D3D3] -******* Silver [CSS-color 0xC0C0C0] -******* DarkGray [CSS-color 0xA9A9A9] -******* DimGray [CSS-color 0x696969] -******* Gray [CSS-color 0x808080] -******* LightSlateGray [CSS-color 0x778899] -******* SlateGray [CSS-color 0x708090] -******* DarkSlateGray [CSS-color 0x2F4F4F] -******* Black [CSS-color 0x000000] -****** Orange-color [CSS color group] -******* Orange [CSS-color 0xFFA500] -******* DarkOrange [CSS-color 0xFF8C00] -******* Coral [CSS-color 0xFF7F50] -******* Tomato [CSS-color 0xFF6347] -******* OrangeRed [CSS-color 0xFF4500] -****** Pink-color [CSS color group] -******* Pink [CSS-color 0xFFC0CB] -******* LightPink [CSS-color 0xFFB6C1] -******* HotPink [CSS-color 0xFF69B4] -******* DeepPink [CSS-color 0xFF1493] -******* PaleVioletRed [CSS-color 0xDB7093] -******* MediumVioletRed [CSS-color 0xC71585] -****** Purple-color [CSS color group] -******* Lavender [CSS-color 0xE6E6FA] -******* Thistle [CSS-color 0xD8BFD8] -******* Plum [CSS-color 0xDDA0DD] -******* Orchid [CSS-color 0xDA70D6] -******* Violet [CSS-color 0xEE82EE] -******* Fuchsia [CSS-color 0xFF00FF] -******* Magenta [CSS-color 0xFF00FF] -******* MediumOrchid [CSS-color 0xBA55D3] -******* DarkOrchid [CSS-color 0x9932CC] -******* DarkViolet [CSS-color 0x9400D3] -******* BlueViolet [CSS-color 0x8A2BE2] -******* DarkMagenta [CSS-color 0x8B008B] -******* Purple [CSS-color 0x800080] -******* MediumPurple [CSS-color 0x9370DB] -******* MediumSlateBlue [CSS-color 0x7B68EE] -******* SlateBlue [CSS-color 0x6A5ACD] -******* DarkSlateBlue [CSS-color 0x483D8B] -******* RebeccaPurple [CSS-color 0x663399] -******* Indigo [CSS-color 0x4B0082] -****** Red-color [CSS color group] -******* LightSalmon [CSS-color 0xFFA07A] -******* Salmon [CSS-color 0xFA8072] -******* DarkSalmon [CSS-color 0xE9967A] -******* LightCoral [CSS-color 0xF08080] -******* IndianRed [CSS-color 0xCD5C5C] -******* Crimson [CSS-color 0xDC143C] -******* Red [CSS-color 0xFF0000] -******* FireBrick [CSS-color 0xB22222] -******* DarkRed [CSS-color 0x8B0000] -****** Yellow-color [CSS color group] -******* Gold [CSS-color 0xFFD700] -******* Yellow [CSS-color 0xFFFF00] -******* LightYellow [CSS-color 0xFFFFE0] -******* LemonChiffon [CSS-color 0xFFFACD] -******* LightGoldenRodYellow [CSS-color 0xFAFAD2] -******* PapayaWhip [CSS-color 0xFFEFD5] -******* Moccasin [CSS-color 0xFFE4B5] -******* PeachPuff [CSS-color 0xFFDAB9] -******* PaleGoldenRod [CSS-color 0xEEE8AA] -******* Khaki [CSS-color 0xF0E68C] -******* DarkKhaki [CSS-color 0xBDB76B] -****** White-color [CSS color group] -******* White [CSS-color 0xFFFFFF] -******* Snow [CSS-color 0xFFFAFA] -******* HoneyDew [CSS-color 0xF0FFF0] -******* MintCream [CSS-color 0xF5FFFA] -******* Azure [CSS-color 0xF0FFFF] -******* AliceBlue [CSS-color 0xF0F8FF] -******* GhostWhite [CSS-color 0xF8F8FF] -******* WhiteSmoke [CSS-color 0xF5F5F5] -******* SeaShell [CSS-color 0xFFF5EE] -******* Beige [CSS-color 0xF5F5DC] -******* OldLace [CSS-color 0xFDF5E6] -******* FloralWhite [CSS-color 0xFFFAF0] -******* Ivory [CSS-color 0xFFFFF0] -******* AntiqueWhite [CSS-color 0xFAEBD7] -******* Linen [CSS-color 0xFAF0E6] -******* LavenderBlush [CSS-color 0xFFF0F5] -******* MistyRose [CSS-color 0xFFE4E1] +***** CSS-color [One of 140 colors supported by all browsers. For more details such as the color RGB or HEX values, check: https://www.w3schools.com/colors/colors_groups.asp.] +****** Blue-color [CSS color group.] +******* CadetBlue [CSS-color 0x5F9EA0.] +******* SteelBlue [CSS-color 0x4682B4.] +******* LightSteelBlue [CSS-color 0xB0C4DE.] +******* LightBlue [CSS-color 0xADD8E6.] +******* PowderBlue [CSS-color 0xB0E0E6.] +******* LightSkyBlue [CSS-color 0x87CEFA.] +******* SkyBlue [CSS-color 0x87CEEB.] +******* CornflowerBlue [CSS-color 0x6495ED.] +******* DeepSkyBlue [CSS-color 0x00BFFF.] +******* DodgerBlue [CSS-color 0x1E90FF.] +******* RoyalBlue [CSS-color 0x4169E1.] +******* Blue [CSS-color 0x0000FF.] +******* MediumBlue [CSS-color 0x0000CD.] +******* DarkBlue [CSS-color 0x00008B.] +******* Navy [CSS-color 0x000080.] +******* MidnightBlue [CSS-color 0x191970.] +****** Brown-color [CSS color group.] +******* Cornsilk [CSS-color 0xFFF8DC.] +******* BlanchedAlmond [CSS-color 0xFFEBCD.] +******* Bisque [CSS-color 0xFFE4C4.] +******* NavajoWhite [CSS-color 0xFFDEAD.] +******* Wheat [CSS-color 0xF5DEB3.] +******* BurlyWood [CSS-color 0xDEB887.] +******* Tan [CSS-color 0xD2B48C.] +******* RosyBrown [CSS-color 0xBC8F8F.] +******* SandyBrown [CSS-color 0xF4A460.] +******* GoldenRod [CSS-color 0xDAA520.] +******* DarkGoldenRod [CSS-color 0xB8860B.] +******* Peru [CSS-color 0xCD853F.] +******* Chocolate [CSS-color 0xD2691E.] +******* Olive [CSS-color 0x808000.] +******* SaddleBrown [CSS-color 0x8B4513.] +******* Sienna [CSS-color 0xA0522D.] +******* Brown [CSS-color 0xA52A2A.] +******* Maroon [CSS-color 0x800000.] +****** Cyan-color [CSS color group.] +******* Aqua [CSS-color 0x00FFFF.] +******* Cyan [CSS-color 0x00FFFF.] +******* LightCyan [CSS-color 0xE0FFFF.] +******* PaleTurquoise [CSS-color 0xAFEEEE.] +******* Aquamarine [CSS-color 0x7FFFD4.] +******* Turquoise [CSS-color 0x40E0D0.] +******* MediumTurquoise [CSS-color 0x48D1CC.] +******* DarkTurquoise [CSS-color 0x00CED1.] +****** Green-color [CSS color group.] +******* GreenYellow [CSS-color 0xADFF2F.] +******* Chartreuse [CSS-color 0x7FFF00.] +******* LawnGreen [CSS-color 0x7CFC00.] +******* Lime [CSS-color 0x00FF00.] +******* LimeGreen [CSS-color 0x32CD32.] +******* PaleGreen [CSS-color 0x98FB98.] +******* LightGreen [CSS-color 0x90EE90.] +******* MediumSpringGreen [CSS-color 0x00FA9A.] +******* SpringGreen [CSS-color 0x00FF7F.] +******* MediumSeaGreen [CSS-color 0x3CB371.] +******* SeaGreen [CSS-color 0x2E8B57.] +******* ForestGreen [CSS-color 0x228B22.] +******* Green [CSS-color 0x008000.] +******* DarkGreen [CSS-color 0x006400.] +******* YellowGreen [CSS-color 0x9ACD32.] +******* OliveDrab [CSS-color 0x6B8E23.] +******* DarkOliveGreen [CSS-color 0x556B2F.] +******* MediumAquaMarine [CSS-color 0x66CDAA.] +******* DarkSeaGreen [CSS-color 0x8FBC8F.] +******* LightSeaGreen [CSS-color 0x20B2AA.] +******* DarkCyan [CSS-color 0x008B8B.] +******* Teal [CSS-color 0x008080.] +****** Gray-color [CSS color group.] +******* Gainsboro [CSS-color 0xDCDCDC.] +******* LightGray [CSS-color 0xD3D3D3.] +******* Silver [CSS-color 0xC0C0C0.] +******* DarkGray [CSS-color 0xA9A9A9.] +******* DimGray [CSS-color 0x696969.] +******* Gray [CSS-color 0x808080.] +******* LightSlateGray [CSS-color 0x778899.] +******* SlateGray [CSS-color 0x708090.] +******* DarkSlateGray [CSS-color 0x2F4F4F.] +******* Black [CSS-color 0x000000.] +****** Orange-color [CSS color group.] +******* Orange [CSS-color 0xFFA500.] +******* DarkOrange [CSS-color 0xFF8C00.] +******* Coral [CSS-color 0xFF7F50.] +******* Tomato [CSS-color 0xFF6347.] +******* OrangeRed [CSS-color 0xFF4500.] +****** Pink-color [CSS color group.] +******* Pink [CSS-color 0xFFC0CB.] +******* LightPink [CSS-color 0xFFB6C1.] +******* HotPink [CSS-color 0xFF69B4.] +******* DeepPink [CSS-color 0xFF1493.] +******* PaleVioletRed [CSS-color 0xDB7093.] +******* MediumVioletRed [CSS-color 0xC71585.] +****** Purple-color [CSS color group.] +******* Lavender [CSS-color 0xE6E6FA.] +******* Thistle [CSS-color 0xD8BFD8.] +******* Plum [CSS-color 0xDDA0DD.] +******* Orchid [CSS-color 0xDA70D6.] +******* Violet [CSS-color 0xEE82EE.] +******* Fuchsia [CSS-color 0xFF00FF.] +******* Magenta [CSS-color 0xFF00FF.] +******* MediumOrchid [CSS-color 0xBA55D3.] +******* DarkOrchid [CSS-color 0x9932CC.] +******* DarkViolet [CSS-color 0x9400D3.] +******* BlueViolet [CSS-color 0x8A2BE2.] +******* DarkMagenta [CSS-color 0x8B008B.] +******* Purple [CSS-color 0x800080.] +******* MediumPurple [CSS-color 0x9370DB.] +******* MediumSlateBlue [CSS-color 0x7B68EE.] +******* SlateBlue [CSS-color 0x6A5ACD.] +******* DarkSlateBlue [CSS-color 0x483D8B.] +******* RebeccaPurple [CSS-color 0x663399.] +******* Indigo [CSS-color 0x4B0082.] +****** Red-color [CSS color group.] +******* LightSalmon [CSS-color 0xFFA07A.] +******* Salmon [CSS-color 0xFA8072.] +******* DarkSalmon [CSS-color 0xE9967A.] +******* LightCoral [CSS-color 0xF08080.] +******* IndianRed [CSS-color 0xCD5C5C.] +******* Crimson [CSS-color 0xDC143C.] +******* Red [CSS-color 0xFF0000.] +******* FireBrick [CSS-color 0xB22222.] +******* DarkRed [CSS-color 0x8B0000.] +****** Yellow-color [CSS color group.] +******* Gold [CSS-color 0xFFD700.] +******* Yellow [CSS-color 0xFFFF00.] +******* LightYellow [CSS-color 0xFFFFE0.] +******* LemonChiffon [CSS-color 0xFFFACD.] +******* LightGoldenRodYellow [CSS-color 0xFAFAD2.] +******* PapayaWhip [CSS-color 0xFFEFD5.] +******* Moccasin [CSS-color 0xFFE4B5.] +******* PeachPuff [CSS-color 0xFFDAB9.] +******* PaleGoldenRod [CSS-color 0xEEE8AA.] +******* Khaki [CSS-color 0xF0E68C.] +******* DarkKhaki [CSS-color 0xBDB76B.] +****** White-color [CSS color group.] +******* White [CSS-color 0xFFFFFF.] +******* Snow [CSS-color 0xFFFAFA.] +******* HoneyDew [CSS-color 0xF0FFF0.] +******* MintCream [CSS-color 0xF5FFFA.] +******* Azure [CSS-color 0xF0FFFF.] +******* AliceBlue [CSS-color 0xF0F8FF.] +******* GhostWhite [CSS-color 0xF8F8FF.] +******* WhiteSmoke [CSS-color 0xF5F5F5.] +******* SeaShell [CSS-color 0xFFF5EE.] +******* Beige [CSS-color 0xF5F5DC.] +******* OldLace [CSS-color 0xFDF5E6.] +******* FloralWhite [CSS-color 0xFFFAF0.] +******* Ivory [CSS-color 0xFFFFF0.] +******* AntiqueWhite [CSS-color 0xFAEBD7.] +******* Linen [CSS-color 0xFAF0E6.] +******* LavenderBlush [CSS-color 0xFFF0F5.] +******* MistyRose [CSS-color 0xFFE4E1.] ***** Color-shade [A slight degree of difference between colors, especially with regard to how light or dark it is or as distinguished from one nearly like it.] ****** Dark-shade [A color tone not reflecting much light.] ****** Light-shade [A color tone reflecting more light.] ***** Grayscale [Using a color map composed of shades of gray, varying from black at the weakest intensity to white at the strongest.] -****** # {takesValue, valueClass=numericClass} [White intensity between 0 and 1] +****** # {takesValue, valueClass=numericClass} [White intensity between 0 and 1.] ***** HSV-color [A color representation that models how colors appear under light.] ****** Hue [Attribute of a visual sensation according to which an area appears to be similar to one of the perceived colors.] -******* # {takesValue, valueClass=numericClass} [Angular value between 0 and 360] +******* # {takesValue, valueClass=numericClass} [Angular value between 0 and 360.] ****** Saturation [Colorfulness of a stimulus relative to its own brightness.] -******* # {takesValue, valueClass=numericClass} [B value of RGB between 0 and 1] -****** HSV-value [AAttribute of a visual sensation according to which an area appears to emit more or less light.] +******* # {takesValue, valueClass=numericClass} [B value of RGB between 0 and 1.] +****** HSV-value [An attribute of a visual sensation according to which an area appears to emit more or less light.] ******* # {takesValue, valueClass=numericClass} ***** RGB-color [A color from the RGB schema.] ****** RGB-red [The red component.] -******* # {takesValue, valueClass=numericClass} [R value of RGB between 0 and 1] +******* # {takesValue, valueClass=numericClass} [R value of RGB between 0 and 1.] ****** RGB-blue [The blue component.] -******* # {takesValue, valueClass=numericClass} [B value of RGB between 0 and 1] +******* # {takesValue, valueClass=numericClass} [B value of RGB between 0 and 1.] ****** RGB-green [The green component.] -******* # {takesValue, valueClass=numericClass} [G value of RGB between 0 and 1] +******* # {takesValue, valueClass=numericClass} [G value of RGB between 0 and 1.] **** Luminance [A quality that exists by virtue of the luminous intensity per unit area projected in a given direction.] **** Opacity [A measure of impenetrability to light.] ** Sensory-presentation [The entity has a sensory manifestation.] @@ -1010,7 +1028,7 @@ This schema is the first official release that includes an xsd and requires unit *** Behavioral-evidence [An indication or conclusion based on the behavior of an agent.] ** Task-event-role [The purpose of an event with respect to the task.] *** Experimental-stimulus [Part of something designed to elicit a response in the experiment.] -*** Incidental [Usually associated with a sensory event intended to give instructions to the participant about the task or behavior.] +*** Incidental [A sensory or other type of event that is unrelated to the task or experiment.] *** Instructional [Usually associated with a sensory event intended to give instructions to the participant about the task or behavior.] *** Mishap [Unplanned disruption such as an equipment or experiment control abnormality or experimenter error.] *** Participant-response [Something related to a participant actions in performing the task.] @@ -1020,6 +1038,7 @@ This schema is the first official release that includes an xsd and requires unit *** Appropriate-action {relatedTag=Inappropriate-action}[An action suitable or proper in the circumstances.] *** Correct-action {relatedTag=Incorrect-action, relatedTag=Indeterminate-action}[An action that was a correct response in the context of the task.] *** Correction [An action offering an improvement to replace a mistake or error.] +*** Done-indication {relatedTag=Ready-indication}[An action that indicates that the participant has completed this step in the task.] *** Incorrect-action {relatedTag=Correct-action, relatedTag=Indeterminate-action}[An action considered wrong or incorrect in the context of the task.] *** Imagined-action [Form a mental image or concept of something. This is used to identity something that only happened in the imagination of the participant as in imagined movements in motor imagery paradigms.] *** Inappropriate-action {relatedTag=Appropriate-action}[An action not in keeping with what is correct or proper for the task.] @@ -1027,6 +1046,7 @@ This schema is the first official release that includes an xsd and requires unit *** Omitted-action [An expected response was skipped.] *** Miss {relatedTag=Near-miss}[An action considered to be a failure in the context of the task. For example, if the agent is supposed to try to hit a target and misses.] *** Near-miss {relatedTag=Miss}[An action barely satisfied the requirements of the task. In a driving experiment for example this could pertain to a narrowly avoided collision or other accident.] +*** Ready-indication {relatedTag=Done-indication}[An action that indicates that the participant is ready to perform the next step in the task.] ** Task-relationship [Specifying organizational importance of sub-tasks.] *** Background-subtask [A part of the task which should be performed in the background as for example inhibiting blinks due to instruction while performing the primary task.] *** Primary-subtask [A part of the task which should be the primary focus of the participant.] @@ -1056,164 +1076,183 @@ This schema is the first official release that includes an xsd and requires unit *** Unexpected {relatedTag=Expected}[Something that is not anticipated.] *** Unplanned {relatedTag=Planned}[Something that has not been planned as part of the task.] -'''Relation''' [Concerns the way in which two or more people or things are connected.] -* Comparative-relation [Something considered in comparison to something else.] -** Approximately-equal-to [(A (Approximately-equal-to B)) indicates that A and B have almost the same value. Here A and B could refer to sizes, orders, positions or other quantities.] -** Less-than [(A (Less-than B)) indicates that A is smaller than B. Here A and B could refer to sizes, orders, positions or other quantities.] -** Less-than-or-equal-to [(A (Less-than-or-equal-to B)) indicates that the relative size or order of A is smaller than or equal to B.] -** Greater-than [(A (Greater-than B)) indicates that the relative size or order of A is bigger than that of B.] -** Greater-than-or-equal-to [(A (Greater-than-or-equal-to B)) indicates that the relative size or order of A is bigger than or the same as that of B.] -** Equal-to [(A (Equal-to B)) indicates that the size or order of A is the same as that of B.] -** Not-equal-to [(A (Not-equal-to B)) indicates that the size or order of A is not the same as that of B.] -* Connective-relation [Indicates two items are related in some way.] -** Belongs-to [(A (Belongs-to B)) indicates that A is a member of B.] -** Connected-to [(A (Connected-to) B) indicates that A is related to B in some respect, usually through a direct link.] -** Contained-in [(A (Contained-in B)) indicates that A is completely inside of B.] -** Described-by [(A (Described-by B)) indicates that B provides information about A.] -** From-to [(A (From-to B)) indicates a directional relation from A to B. A is considered the source.] -** Group-of [(A (Group-of B)) indicates A is a group of items of type B.] -** Implied-by [(A (Implied-by B)) indicates B is suggested by A.] -** Interacts-with [(A (Interacts-with B)) indicates A and B interact, possibly reciprocally.] -** Member-of [(A (Member-of B)) indicates A is a member of group B.] -** Part-of [(A (Part-of B)) indicates A is a part of the whole B.] -** Performed-by [(A (Performed-by B)) Indicates that ction or procedure A was carried out by agent B.] -** Related-to [(A (Relative-to B)) indicates A is a part of the whole B.] -* Directional-relation [A relationship indicating direction of change.] -** Away-from [Go away from a place or object.] -** Towards [Moving in the direction of. A relation binding a relational quality or disposition to the relevant type of entity] -* Spatial-relation [Indicating information about position.] -** Above [(A (Adjacent-to B)) means A is in a place or position that is higher than B.] -** Across-from [(A (Across-from B)) means A is on the opposite side of something from B.] -** Adjacent-to [(A (Adjacent-to B)) indicates that A is next to B in time or space.] -** Ahead-of [(A (Ahead-of B)) indicates that A is further forward in time or space in B.] -** Around [(A (Around B)) means A is in or near the present place or situation of B.] -** Behind [(A (Behind B)) means A is at or to the far side of B, typically so as to be hidden by it.] -** Below [(A (Below B)) means A is in a place or position that is lower than the position of B.] -** Between [(A (Between, (B, C))) means A is in the space or interval separating B and C.] -** Bilateral-to [(A (Bilateral B)) means A is on both sides of B or affects both sides of B.] -** Bottom-edge-of {relatedTag=Left-edge-of,relatedTag=Right-edge-of,relatedTag=Top-edge-of}[(A (Bottom-edge-of B)) means A is on the bottom most part or or near the boundary of B.] -** Boundary-of [(A (Boundary-of B)) means A is on or part of the edge or boundary of B.] -** Center-of [(A (Center-of B)) means A is at a point or or in an area that is approximately central within B.] -** Close-to [(A (Close-to B)) means A is at a small distance from or is located near in space to B.] -** Far-from [(A (Far-from B)) means A is at a large distance from or is not located near in space to B.] -** In-front-of [(A (In-front-of B)) means A is in a position just ahead or at the front part of B, potentially partially blocking B from view.] -** Left-edge-of {relatedTag=Bottom-edge-of,relatedTag=Right-edge-of,relatedTag=Top-edge-of}[(A (Left-edge-of B)) means A is located on the left side of B on or near the boundary of B.] -** Left-side-of {relatedTag=Right-side-of}[(A (Left-side-of B)) means A is located on the left side of B usually as part of B.] -** Lower-left-of {relatedTag=Lower-right-of,relatedTag=Upper-left-of,relatedTag=Upper-right-of}[(A (Lower-left-of B)) means A is situated on the lower left part of B. This relation is often used to specify qualitative information about screen position.] -** Lower-right-of {relatedTag=Upper-left-of,relatedTag=Upper-left-of,relatedTag=Lower-right-of}[(A (Lower-right-of B)) means A is situated on the lower right part of B. This relation is often used to specify qualitative information about screen position.] -** Outside-of [(A (Outside-of B)) means A is located in the space around but not including B.] -** Over [(A (over B)) means A above is above B so as to cover or protect or A extends over the a general area as from a from a vantage point.] -** Right-edge-of {relatedTag=Bottom-edge-of,relatedTag=Left-edge-of,relatedTag=Top-edge-of}[(A (Right-edge-of B)) means A is located on the right side of B on or near the boundary of B.] -** Right-side-of {relatedTag=Left-side-of}[(A (Right-side-of B)) means A is located on the right side of B usually as part of B.] -** To-left-of [(A (To-left-of B)) means A is located on or directed toward the side to the west of B when B is facing north. This term is used when A is not part of B.] -** To-right-of [(A (To-right-of B)) means A is located on or directed toward the side to the east of B when B is facing north. This term is used when A is not part of B.] -** Top-edge-of {relatedTag=Left-edge-of,relatedTag=Right-edge-of,relatedTag=Bottom-edge-of}[(A (Top-edge-of B)) means A is on the uppermost part or or near the boundary of B.] -** Top-of [(A (Top-of B)) means A is on the uppermost part, side, or surface of B.] -** Upper-left-of {relatedTag=Lower-left-of,relatedTag=Lower-right-of,relatedTag=Upper-right-of}[(A (Upper-left-of B)) means A is situated on the upper left part of B. This relation is often used to specify qualitative information about screen position.] -** Upper-right-of {relatedTag=Lower-left-of,relatedTag=Upper-left-of,relatedTag=Lower-right-of}[(A (Upper-right-of B)) means A is situated on the upper right part of B. This relation is often used to specify qualitative information about screen position.] -** Underneath [(A (Underneath B)) means A is situated directly below and may be concealed by B.] -** Within [(A (Within B)) means A is on the inside of or contained in B.] -* Temporal-relation [Any relationship which includes a temporal or time-based component.] -** After [(A After B) means A happens at a time subsequent to a reference time related to B.] -** Asynchronous-with [(A Asynchronous-with B) means A happens at times not occurring at the same time or having the same period or phase as B.] -** Before [(A Before B) means A happens at a time earlier in time or order than B.] -** During [(A During B) means A happens at some point in a given period of time in which B is ongoing.] -** Synchronous-with [(A Synchronous-with B) means A happens at occurs at the same time or rate as B.] -** Waiting-for [(A Waiting-for B) means A pauses for something to happen in B.] +'''Relation''' {extensionAllowed}[Concerns the way in which two or more people or things are connected.] +* Comparative-relation [Something considered in comparison to something else. The first entity is the focus.] +** Approximately-equal-to [(A, (Approximately-equal-to, B)) indicates that A and B have almost the same value. Here A and B could refer to sizes, orders, positions or other quantities.] +** Less-than [(A, (Less-than, B)) indicates that A is smaller than B. Here A and B could refer to sizes, orders, positions or other quantities.] +** Less-than-or-equal-to [(A, (Less-than-or-equal-to, B)) indicates that the relative size or order of A is smaller than or equal to B.] +** Greater-than [(A, (Greater-than, B)) indicates that the relative size or order of A is bigger than that of B.] +** Greater-than-or-equal-to [(A, (Greater-than-or-equal-to, B)) indicates that the relative size or order of A is bigger than or the same as that of B.] +** Equal-to [(A, (Equal-to, B)) indicates that the size or order of A is the same as that of B.] +** Not-equal-to [(A, (Not-equal-to, B)) indicates that the size or order of A is not the same as that of B.] +* Connective-relation [Indicates two entities are related in some way. The first entity is the focus.] +** Belongs-to [(A, (Belongs-to, B)) indicates that A is a member of B.] +** Connected-to [(A, (Connected-to, B)) indicates that A is related to B in some respect, usually through a direct link.] +** Contained-in [(A, (Contained-in, B)) indicates that A is completely inside of B.] +** Described-by [(A, (Described-by, B)) indicates that B provides information about A.] +** From-to [(A, (From-to, B)) indicates a directional relation from A to B. A is considered the source.] +** Group-of [(A, (Group-of, B)) indicates A is a group of items of type B.] +** Implied-by [(A, (Implied-by, B)) indicates B is suggested by A.] +** Includes [(A, (Includes, B)) indicates that A has B as a member or part.] +** Interacts-with [(A, (Interacts-with, B)) indicates A and B interact, possibly reciprocally.] +** Member-of [(A, (Member-of, B)) indicates A is a member of group B.] +** Part-of [(A, (Part-of, B)) indicates A is a part of the whole B.] +** Performed-by [(A, (Performed-by, B)) indicates that the action or procedure A was carried out by agent B.] +** Performed-using [(A, (Performed-using, B)) indicates that the action or procedure A was accomplished using B.] +** Related-to [(A, (Related-to, B)) indicates A has some relationship to B.] +** Unrelated-to [(A, (Unrelated-to, B)) indicates that A is not related to B. For example, A is not related to Task.] +* Directional-relation [A relationship indicating direction of change of one entity relative to another. The first entity is the focus.] +** Away-from [(A, (Away-from, B)) indicates that A is going or has moved away from B. The meaning depends on A and B.] +** Towards [(A, (Towards, B)) indicates that A is going to or has moved to B. The meaning depends on A and B.] +* Logical-relation [Indicating a logical relationship between entities. The first entity is usually the focus.] +** And [(A, (And, B)) means A and B are both in effect.] +** Or [(A, (Or, B)) means at least one of A and B are in effect.] +* Spatial-relation [Indicating a relationship about position between entities.] +** Above [(A, (Above, B)) means A is in a place or position that is higher than B.] +** Across-from [(A, (Across-from, B)) means A is on the opposite side of something from B.] +** Adjacent-to [(A, (Adjacent-to, B)) indicates that A is next to B in time or space.] +** Ahead-of [(A, (Ahead-of, B)) indicates that A is further forward in time or space in B.] +** Around [(A, (Around, B)) means A is in or near the present place or situation of B.] +** Behind [(A, (Behind, B)) means A is at or to the far side of B, typically so as to be hidden by it.] +** Below [(A, (Below, B)) means A is in a place or position that is lower than the position of B.] +** Between [(A, (Between, (B, C))) means A is in the space or interval separating B and C.] +** Bilateral-to [(A, (Bilateral, B)) means A is on both sides of B or affects both sides of B.] +** Bottom-edge-of {relatedTag=Left-edge-of,relatedTag=Right-edge-of,relatedTag=Top-edge-of}[(A, (Bottom-edge-of, B)) means A is on the bottom most part or or near the boundary of B.] +** Boundary-of [(A, (Boundary-of, B)) means A is on or part of the edge or boundary of B.] +** Center-of [(A, (Center-of, B)) means A is at a point or or in an area that is approximately central within B.] +** Close-to [(A, (Close-to, B)) means A is at a small distance from or is located near in space to B.] +** Far-from [(A, (Far-from, B)) means A is at a large distance from or is not located near in space to B.] +** In-front-of [(A, (In-front-of, B)) means A is in a position just ahead or at the front part of B, potentially partially blocking B from view.] +** Left-edge-of {relatedTag=Bottom-edge-of,relatedTag=Right-edge-of,relatedTag=Top-edge-of}[(A, (Left-edge-of, B)) means A is located on the left side of B on or near the boundary of B.] +** Left-side-of {relatedTag=Right-side-of}[(A, (Left-side-of, B)) means A is located on the left side of B usually as part of B.] +** Lower-center-of {relatedTag=Center-of,relatedTag=Lower-left-of,relatedTag=Lower-right-of,relatedTag=Upper-center-of,relatedTag=Upper-right-of}[(A, (Lower-center-of, B)) means A is situated on the lower center part of B (due south). This relation is often used to specify qualitative information about screen position.] +** Lower-left-of {relatedTag=Center-of,relatedTag=Lower-center-of,relatedTag=Lower-right-of,relatedTag=Upper-center-of,relatedTag=Upper-left-of,relatedTag=Upper-right-of}[(A, (Lower-left-of, B)) means A is situated on the lower left part of B. This relation is often used to specify qualitative information about screen position.] +** Lower-right-of {relatedTag=Center-of,relatedTag=Lower-center-of,relatedTag=Lower-left-of,relatedTag=Upper-left-of,relatedTag=Upper-center-of,relatedTag=Upper-left-of,relatedTag=Lower-right-of}[(A, (Lower-right-of, B)) means A is situated on the lower right part of B. This relation is often used to specify qualitative information about screen position.] +** Outside-of [(A, (Outside-of, B)) means A is located in the space around but not including B.] +** Over [(A, (Over, B)) means A above is above B so as to cover or protect or A extends over the a general area as from a from a vantage point.] +** Right-edge-of {relatedTag=Bottom-edge-of,relatedTag=Left-edge-of,relatedTag=Top-edge-of}[(A, (Right-edge-of, B)) means A is located on the right side of B on or near the boundary of B.] +** Right-side-of {relatedTag=Left-side-of}[(A, (Right-side-of, B)) means A is located on the right side of B usually as part of B.] +** To-left-of [(A, (To-left-of, B)) means A is located on or directed toward the side to the west of B when B is facing north. This term is used when A is not part of B.] +** To-right-of [(A, (To-right-of, B)) means A is located on or directed toward the side to the east of B when B is facing north. This term is used when A is not part of B.] +** Top-edge-of {relatedTag=Left-edge-of,relatedTag=Right-edge-of,relatedTag=Bottom-edge-of}[(A, (Top-edge-of, B)) means A is on the uppermost part or or near the boundary of B.] +** Top-of [(A, (Top-of, B)) means A is on the uppermost part, side, or surface of B.] +** Upper-center-of {relatedTag=Center-of,relatedTag=Lower-center-of,relatedTag=Lower-left-of,relatedTag=Lower-right-of,relatedTag=Upper-center-of,relatedTag=Upper-right-of}[(A, (Upper-center-of, B)) means A is situated on the upper center part of B (due north). This relation is often used to specify qualitative information about screen position.] +** Upper-left-of {relatedTag=Center-of,relatedTag=Lower-center-of,relatedTag=Lower-left-of,relatedTag=Lower-right-of,relatedTag=Upper-center-of,relatedTag=Upper-right-of}[(A, (Upper-left-of, B)) means A is situated on the upper left part of B. This relation is often used to specify qualitative information about screen position.] +** Upper-right-of {relatedTag=Center-of,relatedTag=Lower-center-of,relatedTag=Lower-left-of,relatedTag=Upper-left-of,relatedTag=Upper-center-of,relatedTag=Lower-right-of}[(A, (Upper-right-of, B)) means A is situated on the upper right part of B. This relation is often used to specify qualitative information about screen position.] +** Underneath [(A, (Underneath, B)) means A is situated directly below and may be concealed by B.] +** Within [(A, (Within, B)) means A is on the inside of or contained in B.] +* Temporal-relation [A relationship that includes a temporal or time-based component.] +** After [(A, (After B)) means A happens at a time subsequent to a reference time related to B.] +** Asynchronous-with [(A, (Asynchronous-with, B)) means A happens at times not occurring at the same time or having the same period or phase as B.] +** Before [(A, (Before B)) means A happens at a time earlier in time or order than B.] +** During [(A, (During, B)) means A happens at some point in a given period of time in which B is ongoing.] +** Synchronous-with [(A, (Synchronous-with, B)) means A happens at occurs at the same time or rate as B.] +** Waiting-for [(A, (Waiting-for, B)) means A pauses for something to happen in B.] !# end schema '''Unit classes''' [Unit classes and the units for the nodes.] * accelerationUnits {defaultUnits=m-per-s^2} -** m-per-s^2 {SIUnit, unitSymbol} +** m-per-s^2 {SIUnit, unitSymbol, conversionFactor=1.0} * angleUnits {defaultUnits=radian} -** radian {SIUnit} -** rad {SIUnit, unitSymbol} -** degree +** radian {SIUnit, conversionFactor=1.0} +** rad {SIUnit, unitSymbol, conversionFactor=1.0} +** degree {conversionFactor=0.0174533} * areaUnits {defaultUnits=m^2} -** m^2 {SIUnit, unitSymbol} +** m^2 {SIUnit, unitSymbol, conversionFactor=1.0} * currencyUnits {defaultUnits=$}[Units indicating the worth of something.] -** dollar -** $ {unitPrefix, unitSymbol} -** point +** dollar {conversionFactor=1.0} +** $ {unitPrefix, unitSymbol, conversionFactor=1.0} +** euro +** point +* electricPotentialUnits {defaultUnits=uv} +** v {SIUnit, unitSymbol, conversionFactor=0.000001} +** Volt {SIUnit, conversionFactor=0.000001} * frequencyUnits {defaultUnits=Hz} -** hertz {SIUnit} -** Hz {SIUnit, unitSymbol} +** hertz {SIUnit, conversionFactor=1.0} +** Hz {SIUnit, unitSymbol, conversionFactor=1.0} * intensityUnits {defaultUnits=dB} -** dB {unitSymbol}[Intensity expressed as ratio to a threshold. Often used for sound intensity.] +** dB {unitSymbol, conversionFactor=1.0}[Intensity expressed as ratio to a threshold. May be used for sound intensity.] ** candela {SIUnit}[Units used to express light intensity.] ** cd {SIUnit, unitSymbol}[Units used to express light intensity.] * jerkUnits {defaultUnits=m-per-s^3} -** m-per-s^3 {unitSymbol} +** m-per-s^3 {unitSymbol, conversionFactor=1.0} +* magneticFieldUnits {defaultUnits=fT}[Units used to magnetic field intensity.] +** tesla {SIUnit, conversionFactor=10^-15} +** T {SIUnit, unitSymbol, conversionFactor=10^-15} * memorySizeUnits {defaultUnits=B} -** byte {SIUnit} -** B {SIUnit, unitSymbol} +** byte {SIUnit, conversionFactor=1.0} +** B {SIUnit, unitSymbol, conversionFactor=1.0} * physicalLengthUnits {defaultUnits=m} -** foot -** inch -** metre {SIUnit} -** m {SIUnit, unitSymbol} -** mile +** foot {conversionFactor=0.3048} +** inch {conversionFactor=0.0254} +** meter {SIUnit, conversionFactor=1.0} +** metre {SIUnit, conversionFactor=1.0} +** m {SIUnit, unitSymbol, conversionFactor=1.0} +** mile {conversionFactor=1609.34} * speedUnits {defaultUnits=m-per-s} -** m-per-s {SIUnit, unitSymbol} -** mph {unitSymbol} -** kph {unitSymbol} +** m-per-s {SIUnit, unitSymbol, conversionFactor=1.0} +** mph {unitSymbol, conversionFactor=0.44704} +** kph {unitSymbol, conversionFactor=0.277778} +* temperatureUnits {defaultUnits=degree Celsius} +** degree Celsius {SIUnit, conversionFactor=1.0} +** oC {SIUnit, unitSymbol, conversionFactor=1.0} * timeUnits {defaultUnits=s} -** second {SIUnit} -** s {SIUnit, unitSymbol} -** day -** minute -** hour [Should be in 24-hour format.] +** second {SIUnit, conversionFactor=1.0} +** s {SIUnit, unitSymbol, conversionFactor=1.0} +** day {conversionFactor=86400} +** minute {conversionFactor=60} +** hour {conversionFactor=3600}[Should be in 24-hour format.] * volumeUnits {defaultUnits=m^3} -** m^3 {SIUnit, unitSymbol} +** m^3 {SIUnit, unitSymbol, conversionFactor=1.0} * weightUnits {defaultUnits=g} -** g {SIUnit, unitSymbol} -** gram {SIUnit} -** pound -** lb +** g {SIUnit, unitSymbol, conversionFactor=1.0} +** gram {SIUnit, conversionFactor=1.0} +** pound {conversionFactor=453.592} +** lb {conversionFactor=453.592} '''Unit modifiers''' [Unit multiples and submultiples.] -* deca {SIUnitModifier} [SI unit multiple representing 10^1] -* da {SIUnitSymbolModifier} [SI unit multiple representing 10^1] -* hecto {SIUnitModifier} [SI unit multiple representing 10^2] -* h {SIUnitSymbolModifier} [SI unit multiple representing 10^2] -* kilo {SIUnitModifier} [SI unit multiple representing 10^3] -* k {SIUnitSymbolModifier} [SI unit multiple representing 10^3] -* mega {SIUnitModifier} [SI unit multiple representing 10^6] -* M {SIUnitSymbolModifier} [SI unit multiple representing 10^6] -* giga {SIUnitModifier} [SI unit multiple representing 10^9] -* G {SIUnitSymbolModifier} [SI unit multiple representing 10^9] -* tera {SIUnitModifier} [SI unit multiple representing 10^12] -* T {SIUnitSymbolModifier} [SI unit multiple representing 10^12] -* peta {SIUnitModifier} [SI unit multiple representing 10^15] -* P {SIUnitSymbolModifier} [SI unit multiple representing 10^15] -* exa {SIUnitModifier} [SI unit multiple representing 10^18] -* E {SIUnitSymbolModifier} [SI unit multiple representing 10^18] -* zetta {SIUnitModifier} [SI unit multiple representing 10^21] -* Z {SIUnitSymbolModifier} [SI unit multiple representing 10^21] -* yotta {SIUnitModifier} [SI unit multiple representing 10^24] -* Y {SIUnitSymbolModifier} [SI unit multiple representing 10^24] -* deci {SIUnitModifier} [SI unit submultiple representing 10^-1] -* d {SIUnitSymbolModifier} [SI unit submultiple representing 10^-1] -* centi {SIUnitModifier} [SI unit submultiple representing 10^-2] -* c {SIUnitSymbolModifier} [SI unit submultiple representing 10^-2] -* milli {SIUnitModifier} [SI unit submultiple representing 10^-3] -* m {SIUnitSymbolModifier} [SI unit submultiple representing 10^-3] -* micro {SIUnitModifier} [SI unit submultiple representing 10^-6] -* u {SIUnitSymbolModifier} [SI unit submultiple representing 10^-6] -* nano {SIUnitModifier} [SI unit submultiple representing 10^-9] -* n {SIUnitSymbolModifier} [SI unit submultiple representing 10^-9] -* pico {SIUnitModifier} [SI unit submultiple representing 10^-12] -* p {SIUnitSymbolModifier} [SI unit submultiple representing 10^-12] -* femto {SIUnitModifier} [SI unit submultiple representing 10^-15] -* f {SIUnitSymbolModifier} [SI unit submultiple representing 10^-15] -* atto {SIUnitModifier} [SI unit submultiple representing 10^-18] -* a {SIUnitSymbolModifier} [SI unit submultiple representing 10^-18] -* zepto {SIUnitModifier} [SI unit submultiple representing 10^-21] -* z {SIUnitSymbolModifier} [SI unit submultiple representing 10^-21] -* yocto {SIUnitModifier} [SI unit submultiple representing 10^-24] -* y {SIUnitSymbolModifier} [SI unit submultiple representing 10^-24] +* deca {SIUnitModifier, conversionFactor=10.0} [SI unit multiple representing 10^1.] +* da {SIUnitSymbolModifier, conversionFactor=10.0} [SI unit multiple representing 10^1.] +* hecto {SIUnitModifier, conversionFactor=100.0} [SI unit multiple representing 10^2.] +* h {SIUnitSymbolModifier, conversionFactor=100.0} [SI unit multiple representing 10^2.] +* kilo {SIUnitModifier, conversionFactor=1000.0} [SI unit multiple representing 10^3.] +* k {SIUnitSymbolModifier, conversionFactor=1000.0} [SI unit multiple representing 10^3.] +* mega {SIUnitModifier, conversionFactor=10^6} [SI unit multiple representing 10^6.] +* M {SIUnitSymbolModifier, conversionFactor=10^6} [SI unit multiple representing 10^6.] +* giga {SIUnitModifier, conversionFactor=10^9} [SI unit multiple representing 10^9.] +* G {SIUnitSymbolModifier, conversionFactor=10^9} [SI unit multiple representing 10^9.] +* tera {SIUnitModifier, conversionFactor=10^12} [SI unit multiple representing 10^12.] +* T {SIUnitSymbolModifier, conversionFactor=10^12} [SI unit multiple representing 10^12.] +* peta {SIUnitModifier, conversionFactor=10^15} [SI unit multiple representing 10^15.] +* P {SIUnitSymbolModifier, conversionFactor=10^15} [SI unit multiple representing 10^15.] +* exa {SIUnitModifier, conversionFactor=10^18} [SI unit multiple representing 10^18.] +* E {SIUnitSymbolModifier, conversionFactor=10^18} [SI unit multiple representing 10^18.] +* zetta {SIUnitModifier, conversionFactor=10^21} [SI unit multiple representing 10^21.] +* Z {SIUnitSymbolModifier, conversionFactor=10^21} [SI unit multiple representing 10^21.] +* yotta {SIUnitModifier, conversionFactor=10^24} [SI unit multiple representing 10^24.] +* Y {SIUnitSymbolModifier, conversionFactor=10^24} [SI unit multiple representing 10^24.] +* deci {SIUnitModifier, conversionFactor=0.1}[SI unit submultiple representing 10^-1.] +* d {SIUnitSymbolModifier, conversionFactor=0.1} [SI unit submultiple representing 10^-1.] +* centi {SIUnitModifier, conversionFactor=0.01} [SI unit submultiple representing 10^-2.] +* c {SIUnitSymbolModifier, conversionFactor=0.01} [SI unit submultiple representing 10^-2.] +* milli {SIUnitModifier, conversionFactor=0.001} [SI unit submultiple representing 10^-3.] +* m {SIUnitSymbolModifier, conversionFactor=0.001} [SI unit submultiple representing 10^-3.] +* micro {SIUnitModifier, conversionFactor=10^-6} [SI unit submultiple representing 10^-6.] +* u {SIUnitSymbolModifier, conversionFactor=10^-6} [SI unit submultiple representing 10^-6.] +* nano {SIUnitModifier, conversionFactor=10^-9} [SI unit submultiple representing 10^-9.] +* n {SIUnitSymbolModifier, conversionFactor=10^-9} [SI unit submultiple representing 10^-9.] +* pico {SIUnitModifier, conversionFactor=10^-12} [SI unit submultiple representing 10^-12.] +* p {SIUnitSymbolModifier, conversionFactor=10^-12} [SI unit submultiple representing 10^-12.] +* femto {SIUnitModifier, conversionFactor=10^-15} [SI unit submultiple representing 10^-15.] +* f {SIUnitSymbolModifier, conversionFactor=10^-15} [SI unit submultiple representing 10^-15.] +* atto {SIUnitModifier, conversionFactor=10^-18} [SI unit submultiple representing 10^-18.] +* a {SIUnitSymbolModifier, conversionFactor=10^-18} [SI unit submultiple representing 10^-18.] +* zepto {SIUnitModifier, conversionFactor=10^-21} [SI unit submultiple representing 10^-21.] +* z {SIUnitSymbolModifier, conversionFactor=10^-21} [SI unit submultiple representing 10^-21.] +* yocto {SIUnitModifier, conversionFactor=10^-24} [SI unit submultiple representing 10^-24.] +* y {SIUnitSymbolModifier, conversionFactor=10^-24} [SI unit submultiple representing 10^-24.] '''Value classes''' [Specification of the rules for the values provided by users.] @@ -1224,35 +1263,42 @@ This schema is the first official release that includes an xsd and requires unit * textClass {allowedCharacter=letters, allowedCharacter=digits, allowedCharacter=blank, allowedCharacter=+, allowedCharacter=-, allowedCharacter=:, allowedCharacter=;, allowedCharacter=., allowedCharacter=/, allowedCharacter=(, allowedCharacter=), allowedCharacter=?, allowedCharacter=*, allowedCharacter=%, allowedCharacter=$, allowedCharacter=@}[Value class designating values that have the characteristics of text such as in descriptions.] -'''Schema attributes''' [Allowed node, unit class or unit modifier attributes.] +'''Schema attributes''' [Allowed attribute modifiers of other sections of the schema.] * allowedCharacter {valueClassProperty}[A schema attribute of value classes specifying a special character that is allowed in expressing the value of a placeholder. Normally the allowed characters are listed individually. However, the word letters designates the upper and lower case alphabetic characters and the word digits designates the digits 0-9. The word blank designates the blank character.] -* defaultUnits {unitClassProperty}[A schema attribute of unit classes specifying the default units to use if the placeholder has a unit class but the substituted value has no units.] -* extensionAllowed {boolProperty}[A schema attribute indicating that users can add unlimited levels of child nodes under this tag. This tag is propagated to child nodes with the exception of the hashtag placeholders.] -* recommended {boolProperty}[A schema attribute indicating that the event-level HED string should include this tag.] -* relatedTag [A schema attribute suggesting HED tags that are closely related to this tag. This attribute is used by tagging tools.] -* requireChild {boolProperty}[A schema attribute indicating that one of the node elements descendants must be included when using this tag.] -* required {boolProperty}[A schema attribute indicating that every event-level HED string should include this tag.] +* conversionFactor {unitProperty, unitModifierProperty}[The multiplicative factor to multiply these units to convert to default units.] +* deprecatedFrom {elementProperty}[Indicates that this element is deprecated. The value of the attribute is the latest schema version in which the element appeared in undeprecated form.] +* defaultUnits {unitClassProperty}[A schema attribute of unit classes specifying the default units to use if the placeholder has a unit class but the substituted value has no units.] +* extensionAllowed {boolProperty, nodeProperty, isInheritedProperty}[A schema attribute indicating that users can add unlimited levels of child nodes under this tag. This tag is propagated to child nodes with the exception of the hashtag placeholders.] +* inLibrary {elementProperty} [Indicates this schema element came from the named library schema, not the standard schema. This attribute is added by tools when a library schema is merged into its partnered standard schema.] +* recommended {boolProperty, nodeProperty}[A schema attribute indicating that the event-level HED string should include this tag.] +* relatedTag {nodeProperty, isInheritedProperty}[A schema attribute suggesting HED tags that are closely related to this tag. This attribute is used by tagging tools.] +* requireChild {boolProperty, nodeProperty}[A schema attribute indicating that one of the node elements descendants must be included when using this tag.] +* required {boolProperty, nodeProperty}[A schema attribute indicating that every event-level HED string should include this tag.] +* reserved {boolProperty, nodeProperty}[A schema attribute indicating that this tag has special meaning and requires special handling by tools.] +* rooted {nodeProperty}[Indicates a top-level library schema node is identical to a node of the same name in the partnered standard schema. This attribute can only appear in nodes that have the inLibrary schema attribute.] * SIUnit {boolProperty, unitProperty}[A schema attribute indicating that this unit element is an SI unit and can be modified by multiple and submultiple names. Note that some units such as byte are designated as SI units although they are not part of the standard.] * SIUnitModifier {boolProperty, unitModifierProperty}[A schema attribute indicating that this SI unit modifier represents a multiple or submultiple of a base unit rather than a unit symbol.] * SIUnitSymbolModifier {boolProperty, unitModifierProperty}[A schema attribute indicating that this SI unit modifier represents a multiple or submultiple of a unit symbol rather than a base symbol.] -* suggestedTag [A schema attribute that indicates another tag that is often associated with this tag. This attribute is used by tagging tools to provide tagging suggestions.] -* tagGroup {boolProperty}[A schema attribute indicating the tag can only appear inside a tag group.] -* takesValue {boolProperty}[A schema attribute indicating the tag is a hashtag placeholder that is expected to be replaced with a user-defined value.] -* topLevelTagGroup {boolProperty}[A schema attribute indicating that this tag (or its descendants) can only appear in a top-level tag group.] -* unique {boolProperty}[A schema attribute indicating that only one of this tag or its descendants can be used in the event-level HED string.] -* unitClass [A schema attribute specifying which unit class this value tag belongs to.] +* suggestedTag {nodeProperty, isInheritedProperty}[A schema attribute that indicates another tag that is often associated with this tag. This attribute is used by tagging tools to provide tagging suggestions.] +* tagGroup {boolProperty, nodeProperty}[A schema attribute indicating the tag can only appear inside a tag group.] +* takesValue {boolProperty, nodeProperty}[A schema attribute indicating the tag is a hashtag placeholder that is expected to be replaced with a user-defined value.] +* topLevelTagGroup {boolProperty, nodeProperty}[A schema attribute indicating that this tag (or its descendants) can only appear in a top-level tag group. A tag group can have at most one tag with this attribute.] +* unique {boolProperty, nodeProperty}[A schema attribute indicating that only one of this tag or its descendants can be used in the event-level HED string.] +* unitClass {nodeProperty}[A schema attribute specifying which unit class this value tag belongs to.] * unitPrefix {boolProperty, unitProperty}[A schema attribute applied specifically to unit elements to designate that the unit indicator is a prefix (e.g., dollar sign in the currency units).] * unitSymbol {boolProperty, unitProperty}[A schema attribute indicating this tag is an abbreviation or symbol representing a type of unit. Unit symbols represent both the singular and the plural and thus cannot be pluralized.] -* valueClass [A schema attribute specifying which value class this value tag belongs to.] +* valueClass {nodeProperty}[A schema attribute specifying which value class this value tag belongs to.] '''Properties''' [Properties of the schema attributes themselves. These are used for schema handling and verification.] * boolProperty [Indicates that the schema attribute represents something that is either true or false and does not have a value. Attributes without this value are assumed to have string values.] +* elementProperty [Indicates this schema attribute can apply to any type of element(tag term, unit class, etc).] +* isInheritedProperty [Indicates that this attribute is inherited by child nodes. This property only applies to schema attributes for nodes.] +* nodeProperty [Indicates this schema attribute applies to node (tag-term) elements. This was added to allow for an attribute to apply to multiple elements.] * unitClassProperty [Indicates that the schema attribute is meant to be applied to unit classes.] * unitModifierProperty [Indicates that the schema attribute is meant to be applied to unit modifier classes.] * unitProperty [Indicates that the schema attribute is meant to be applied to units within a unit class.] * valueClassProperty [Indicates that the schema attribute is meant to be applied to value classes.] '''Epilogue''' -This is an updated version of the schema format. The properties are now part of the schema. The schema attributes are designed to be checked in software rather than hard-coded. The schema attributes, themselves have properties. - +This schema is released under the Creative Commons Attribution 4.0 International and is a product of the HED Working Group. The DOI for the latest version of the HED standard schema is 10.5281/zenodo.7876037. !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/HED8.0.0.xml b/tests/data/schema_tests/HED8.2.0.xml similarity index 84% rename from tests/data/schema_tests/HED8.0.0.xml rename to tests/data/schema_tests/HED8.2.0.xml index f8e99ff91..1f55c7ae8 100644 --- a/tests/data/schema_tests/HED8.0.0.xml +++ b/tests/data/schema_tests/HED8.2.0.xml @@ -1,6 +1,9 @@ - - This schema is the first official release that includes an xsd and requires unit class, unit modifier, value class, schema attribute and property sections. + + The HED standard schema is a hierarchically-organized vocabulary for annotating events and experimental structure. HED annotations consist of comma-separated tags drawn from this vocabulary. This vocabulary can be augmented by terms drawn from specialized library schema. + +Each term in this vocabulary has a human-readable description and may include additional attributes that give additional properties or that specify how tools should treat the tag during analysis. The meaning of these attributes is described in the Additional schema properties section. + @@ -16,7 +19,7 @@ suggestedTag Task-event-role - Attribute/Sensory + Sensory-presentation @@ -806,6 +809,10 @@ Anatomical-item A biological structure, system, fluid or other substance excluding single molecular entities. + + Body + The biological structure representing an organism. + Body-part Any part of an organism. @@ -927,6 +934,10 @@ Gentalia The external organs of reproduction. + + deprecatedFrom + 8.1.0 + Hip @@ -1021,7 +1032,7 @@ An entity related to a systematic means of communicating by the use of sounds, symbols, or gestures. suggestedTag - Attribute/Sensory + Sensory-presentation Character @@ -1073,7 +1084,7 @@ Something perceptible by one or more of the senses, especially by vision or touch. A material thing. suggestedTag - Attribute/Sensory + Sensory-presentation Geometric-object @@ -1093,6 +1104,10 @@ 2D-shape A planar, two-dimensional shape. + + Arrow + A shape with a pointed end indicating direction. + Clockface The dial face of a clock. A location identifier based on clockface numbering or anatomic subregion. @@ -1419,6 +1434,10 @@ Notebook A book for notes or memoranda. + + Questionnaire + A document consisting of questions and possibly responses, depending on whether it has been filled out. + Furnishing @@ -1765,6 +1784,10 @@ Attentive Concentrating and focusing mental energy on the task or surroundings. + + Distracted + Lacking in concentration because of being preoccupied. + Awake In a non sleeping state. @@ -2015,6 +2038,10 @@ takesValue + + valueClass + numericClass + @@ -2068,6 +2095,10 @@ Having genitalia and/or secondary sexual characteristics of indeterminate sex. + + Ethnicity + Belong to a social group that has a common national or cultural tradition. Use with Label to avoid extension. + Handedness Individual preference for use of a hand, known as the dominant hand. @@ -2084,6 +2115,10 @@ Having no overall dominance in the use of right or left hand or foot in the performance of tasks that require one hand or foot. + + Race + Belonging to a group sharing physical or social qualities as defined within a specified society. Use with Label to avoid extension. + @@ -2095,22 +2130,57 @@ Data-marker An indicator placed to mark something. + + Data-break-marker + An indicator place to indicate a gap in the data. + Temporal-marker An indicator placed at a particular time in the data. + + Inset + Marks an intermediate point in an ongoing event of temporal extent. + + topLevelTagGroup + + + reserved + + + relatedTag + Onset + Offset + + Onset - Labels the start or beginning of something, usually an event. + Marks the start of an ongoing event of temporal extent. topLevelTagGroup + + reserved + + + relatedTag + Inset + Offset + Offset - Labels the time at which something stops. + Marks the end of an event of temporal extent. topLevelTagGroup + + reserved + + + relatedTag + Onset + Inset + Pause @@ -2137,6 +2207,10 @@ takesValue + + valueClass + numericClass + @@ -2147,6 +2221,10 @@ takesValue + + valueClass + numericClass + @@ -2157,6 +2235,10 @@ takesValue + + valueClass + numericClass + @@ -2167,6 +2249,10 @@ takesValue + + valueClass + numericClass + @@ -2177,6 +2263,10 @@ takesValue + + valueClass + numericClass + @@ -2187,6 +2277,10 @@ takesValue + + valueClass + numericClass + @@ -2237,7 +2331,7 @@ Free from error. Especially conforming to fact or truth. relatedTag - Incorrect + Wrong @@ -2266,7 +2360,7 @@ Invalid - Not true because based on erroneous information or unsound reasoning or not conforming to the correct format or specifications. + Not allowed or not conforming to the correct format or specifications. relatedTag Valid @@ -2308,7 +2402,7 @@ Wrong - Not accurate, correct, or appropriate. + Inaccurate or not correct. relatedTag Correct @@ -2495,7 +2589,7 @@ Categorical values based on dividing a continuous variable into levels such as high and low. Cold - Characterized by an absence of heat. + Having an absence of heat. relatedTag Hot @@ -2520,12 +2614,20 @@ Hot - Characterized by an excess of heat. + Having an excess of heat. relatedTag Cold + + Large + Having a great extent such as in physical dimensions, period of time, amplitude or frequency. + + relatedTag + Small + + Liminal Situated at a sensory threshold that is barely perceptible or capable of eliciting a response. @@ -2537,7 +2639,7 @@ Loud - Characterizing a perceived high intensity of sound. + Having a perceived high intensity of sound. relatedTag Quiet @@ -2600,6 +2702,14 @@ Deep + + Small + Having a small extent such as in physical dimensions, period of time, amplitude or frequency. + + relatedTag + Large + + Smooth Having a surface free from bumps, ridges, or irregularities. @@ -2675,6 +2785,10 @@ Horizontally-oriented Oriented parallel to or in the plane of the horizon. + + relatedTag + Vertically-oriented + Leftward @@ -2721,6 +2835,10 @@ Vertically-oriented Oriented perpendicular to the plane of the horizon. + + relatedTag + Horizontally-oriented + @@ -2735,14 +2853,32 @@ takesValue + + valueClass + numericClass + unitClass weightUnits + + + + Temperature + A measure of hot or cold based on the average kinetic energy of the atoms or molecules in the system. + + # + + takesValue + valueClass numericClass + + unitClass + temperatureUnits + @@ -2751,7 +2887,7 @@ Something capable of being estimated or expressed with numeric values. Fraction - A numerical value betwee 0 and 1. + A numerical value between 0 and 1. # @@ -2777,6 +2913,20 @@ + + Item-index + The index of an item in a collection, sequence or other structure. (A (Item-index/3, B)) means that A is item number 3 in B. + + # + + takesValue + + + valueClass + numericClass + + + Item-interval An integer indicating how many items or entities have passed since the last one of these. An item interval of 0 indicates the current item. @@ -2981,14 +3131,14 @@ takesValue - - unitClass - accelerationUnits - valueClass numericClass + + unitClass + accelerationUnits + @@ -2999,14 +3149,14 @@ takesValue - - unitClass - frequencyUnits - valueClass numericClass + + unitClass + frequencyUnits + @@ -3017,14 +3167,14 @@ takesValue - - unitClass - jerkUnits - valueClass numericClass + + unitClass + jerkUnits + @@ -3063,14 +3213,14 @@ takesValue - - unitClass - speedUnits - valueClass numericClass + + unitClass + speedUnits + @@ -3081,14 +3231,14 @@ takesValue - - unitClass - frequencyUnits - valueClass numericClass + + unitClass + frequencyUnits + @@ -3098,15 +3248,15 @@ Angle The amount of inclination of one line to another or the plane of one object to another. - - unitClass - angleUnits - # takesValue + + unitClass + angleUnits + valueClass numericClass @@ -3121,14 +3271,14 @@ takesValue - - unitClass - physicalLengthUnits - valueClass numericClass + + unitClass + physicalLengthUnits + @@ -3142,14 +3292,14 @@ takesValue - - unitClass - physicalLengthUnits - valueClass numericClass + + unitClass + physicalLengthUnits + @@ -3160,14 +3310,14 @@ takesValue - - unitClass - physicalLengthUnits - valueClass numericClass + + unitClass + physicalLengthUnits + @@ -3178,14 +3328,14 @@ takesValue - - unitClass - physicalLengthUnits - valueClass numericClass + + unitClass + physicalLengthUnits + @@ -3200,14 +3350,14 @@ takesValue - - unitClass - areaUnits - valueClass numericClass + + unitClass + areaUnits + @@ -3218,14 +3368,14 @@ takesValue - - unitClass - physicalLengthUnits - valueClass numericClass + + unitClass + physicalLengthUnits + @@ -3236,14 +3386,14 @@ takesValue - - unitClass - physicalLengthUnits - valueClass numericClass + + unitClass + physicalLengthUnits + @@ -3254,14 +3404,14 @@ takesValue - - unitClass - physicalLengthUnits - valueClass numericClass + + unitClass + physicalLengthUnits + @@ -3272,14 +3422,14 @@ takesValue - - unitClass - physicalLengthUnits - valueClass numericClass + + unitClass + physicalLengthUnits + @@ -3290,14 +3440,14 @@ takesValue - - unitClass - volumeUnits - valueClass numericClass + + unitClass + volumeUnits + @@ -3307,38 +3457,58 @@ A characteristic of or relating to time or limited by time. Delay - Time during which some action is awaited. + The time at which an event start time is delayed from the current onset time. This tag defines the start time of an event of temporal extent and may be used with the Duration tag. + + topLevelTagGroup + + + reserved + + + relatedTag + Duration + # takesValue - - unitClass - timeUnits - valueClass numericClass + + unitClass + timeUnits + Duration - The period of time during which something occurs or continues. + The period of time during which an event occurs. This tag defines the end time of an event of temporal extent and may be used with the Delay tag. + + topLevelTagGroup + + + reserved + + + relatedTag + Delay + # takesValue - - unitClass - timeUnits - valueClass numericClass + + unitClass + timeUnits + @@ -3349,14 +3519,14 @@ takesValue - - unitClass - timeUnits - valueClass numericClass + + unitClass + timeUnits + @@ -3367,14 +3537,14 @@ takesValue - - unitClass - timeUnits - valueClass numericClass + + unitClass + timeUnits + @@ -3750,7 +3920,7 @@ valueClass - labelClass + nameClass @@ -3790,7 +3960,7 @@ Condition-variable - An aspect of the experiment or task that is to be varied during the experiment. Task-conditions are sometimes called independent type_variables or contrasts. + An aspect of the experiment or task that is to be varied during the experiment. Task-conditions are sometimes called independent variables or contrasts. # Name of the condition variable. @@ -3824,6 +3994,9 @@ requireChild + + reserved + # Name of the definition. @@ -3842,6 +4015,9 @@ requireChild + + reserved + tagGroup @@ -3862,6 +4038,9 @@ requireChild + + reserved + topLevelTagGroup @@ -3880,6 +4059,9 @@ Event-context A special HED tag inserted as part of a top-level tag group to contain information about the interrelated conditions under which the event occurs. The event context includes information about other events that are ongoing when this event happens. + + reserved + topLevelTagGroup @@ -3934,7 +4116,7 @@ Indicator-variable - An aspect of the experiment or task that is measured as task conditions are varied during the experiment. Experiment indicators are sometimes called dependent type_variables. + An aspect of the experiment or task that is measured as task conditions are varied during the experiment. Experiment indicators are sometimes called dependent variables. # Name of the indicator variable. @@ -4013,6 +4195,7 @@ valueClass numericClass + nameClass @@ -4024,14 +4207,14 @@ takesValue - - unitClass - frequencyUnits - valueClass numericClass + + unitClass + frequencyUnits + @@ -4045,14 +4228,14 @@ takesValue - - unitClass - timeUnits - valueClass numericClass + + unitClass + timeUnits + @@ -4063,32 +4246,32 @@ takesValue - - unitClass - timeUnits - valueClass numericClass + + unitClass + timeUnits + Sound-envelope-release - The time taken for the level to decay from the sustain level to zero after the key is released + The time taken for the level to decay from the sustain level to zero after the key is released. # takesValue - - unitClass - timeUnits - valueClass numericClass + + unitClass + timeUnits + @@ -4099,14 +4282,14 @@ takesValue - - unitClass - timeUnits - valueClass numericClass + + unitClass + timeUnits + @@ -4120,7 +4303,25 @@ valueClass - labelClass + nameClass + + + + + Sound-volume + The sound pressure level (SPL) usually the ratio to a reference signal estimated as the lower bound of hearing. + + # + + takesValue + + + valueClass + numericClass + + + unitClass + intensityUnits @@ -4197,613 +4398,613 @@ The appearance of objects (or light sources) described in terms of perception of their hue and lightness (or brightness) and saturation. CSS-color - One of 140 colors supported by all browsers. For more details such as the color RGB or HEX values, check: https://www.w3schools.com/colors/colors_groups.asp + One of 140 colors supported by all browsers. For more details such as the color RGB or HEX values, check: https://www.w3schools.com/colors/colors_groups.asp. Blue-color - CSS color group + CSS color group. CadetBlue - CSS-color 0x5F9EA0 + CSS-color 0x5F9EA0. SteelBlue - CSS-color 0x4682B4 + CSS-color 0x4682B4. LightSteelBlue - CSS-color 0xB0C4DE + CSS-color 0xB0C4DE. LightBlue - CSS-color 0xADD8E6 + CSS-color 0xADD8E6. PowderBlue - CSS-color 0xB0E0E6 + CSS-color 0xB0E0E6. LightSkyBlue - CSS-color 0x87CEFA + CSS-color 0x87CEFA. SkyBlue - CSS-color 0x87CEEB + CSS-color 0x87CEEB. CornflowerBlue - CSS-color 0x6495ED + CSS-color 0x6495ED. DeepSkyBlue - CSS-color 0x00BFFF + CSS-color 0x00BFFF. DodgerBlue - CSS-color 0x1E90FF + CSS-color 0x1E90FF. RoyalBlue - CSS-color 0x4169E1 + CSS-color 0x4169E1. Blue - CSS-color 0x0000FF + CSS-color 0x0000FF. MediumBlue - CSS-color 0x0000CD + CSS-color 0x0000CD. DarkBlue - CSS-color 0x00008B + CSS-color 0x00008B. Navy - CSS-color 0x000080 + CSS-color 0x000080. MidnightBlue - CSS-color 0x191970 + CSS-color 0x191970. Brown-color - CSS color group + CSS color group. Cornsilk - CSS-color 0xFFF8DC + CSS-color 0xFFF8DC. BlanchedAlmond - CSS-color 0xFFEBCD + CSS-color 0xFFEBCD. Bisque - CSS-color 0xFFE4C4 + CSS-color 0xFFE4C4. NavajoWhite - CSS-color 0xFFDEAD + CSS-color 0xFFDEAD. Wheat - CSS-color 0xF5DEB3 + CSS-color 0xF5DEB3. BurlyWood - CSS-color 0xDEB887 + CSS-color 0xDEB887. Tan - CSS-color 0xD2B48C + CSS-color 0xD2B48C. RosyBrown - CSS-color 0xBC8F8F + CSS-color 0xBC8F8F. SandyBrown - CSS-color 0xF4A460 + CSS-color 0xF4A460. GoldenRod - CSS-color 0xDAA520 + CSS-color 0xDAA520. DarkGoldenRod - CSS-color 0xB8860B + CSS-color 0xB8860B. Peru - CSS-color 0xCD853F + CSS-color 0xCD853F. Chocolate - CSS-color 0xD2691E + CSS-color 0xD2691E. Olive - CSS-color 0x808000 + CSS-color 0x808000. SaddleBrown - CSS-color 0x8B4513 + CSS-color 0x8B4513. Sienna - CSS-color 0xA0522D + CSS-color 0xA0522D. Brown - CSS-color 0xA52A2A + CSS-color 0xA52A2A. Maroon - CSS-color 0x800000 + CSS-color 0x800000. Cyan-color - CSS color group + CSS color group. Aqua - CSS-color 0x00FFFF + CSS-color 0x00FFFF. Cyan - CSS-color 0x00FFFF + CSS-color 0x00FFFF. LightCyan - CSS-color 0xE0FFFF + CSS-color 0xE0FFFF. PaleTurquoise - CSS-color 0xAFEEEE + CSS-color 0xAFEEEE. Aquamarine - CSS-color 0x7FFFD4 + CSS-color 0x7FFFD4. Turquoise - CSS-color 0x40E0D0 + CSS-color 0x40E0D0. MediumTurquoise - CSS-color 0x48D1CC + CSS-color 0x48D1CC. DarkTurquoise - CSS-color 0x00CED1 + CSS-color 0x00CED1. Green-color - CSS color group + CSS color group. GreenYellow - CSS-color 0xADFF2F + CSS-color 0xADFF2F. Chartreuse - CSS-color 0x7FFF00 + CSS-color 0x7FFF00. LawnGreen - CSS-color 0x7CFC00 + CSS-color 0x7CFC00. Lime - CSS-color 0x00FF00 + CSS-color 0x00FF00. LimeGreen - CSS-color 0x32CD32 + CSS-color 0x32CD32. PaleGreen - CSS-color 0x98FB98 + CSS-color 0x98FB98. LightGreen - CSS-color 0x90EE90 + CSS-color 0x90EE90. MediumSpringGreen - CSS-color 0x00FA9A + CSS-color 0x00FA9A. SpringGreen - CSS-color 0x00FF7F + CSS-color 0x00FF7F. MediumSeaGreen - CSS-color 0x3CB371 + CSS-color 0x3CB371. SeaGreen - CSS-color 0x2E8B57 + CSS-color 0x2E8B57. ForestGreen - CSS-color 0x228B22 + CSS-color 0x228B22. Green - CSS-color 0x008000 + CSS-color 0x008000. DarkGreen - CSS-color 0x006400 + CSS-color 0x006400. YellowGreen - CSS-color 0x9ACD32 + CSS-color 0x9ACD32. OliveDrab - CSS-color 0x6B8E23 + CSS-color 0x6B8E23. DarkOliveGreen - CSS-color 0x556B2F + CSS-color 0x556B2F. MediumAquaMarine - CSS-color 0x66CDAA + CSS-color 0x66CDAA. DarkSeaGreen - CSS-color 0x8FBC8F + CSS-color 0x8FBC8F. LightSeaGreen - CSS-color 0x20B2AA + CSS-color 0x20B2AA. DarkCyan - CSS-color 0x008B8B + CSS-color 0x008B8B. Teal - CSS-color 0x008080 + CSS-color 0x008080. Gray-color - CSS color group + CSS color group. Gainsboro - CSS-color 0xDCDCDC + CSS-color 0xDCDCDC. LightGray - CSS-color 0xD3D3D3 + CSS-color 0xD3D3D3. Silver - CSS-color 0xC0C0C0 + CSS-color 0xC0C0C0. DarkGray - CSS-color 0xA9A9A9 + CSS-color 0xA9A9A9. DimGray - CSS-color 0x696969 + CSS-color 0x696969. Gray - CSS-color 0x808080 + CSS-color 0x808080. LightSlateGray - CSS-color 0x778899 + CSS-color 0x778899. SlateGray - CSS-color 0x708090 + CSS-color 0x708090. DarkSlateGray - CSS-color 0x2F4F4F + CSS-color 0x2F4F4F. Black - CSS-color 0x000000 + CSS-color 0x000000. Orange-color - CSS color group + CSS color group. Orange - CSS-color 0xFFA500 + CSS-color 0xFFA500. DarkOrange - CSS-color 0xFF8C00 + CSS-color 0xFF8C00. Coral - CSS-color 0xFF7F50 + CSS-color 0xFF7F50. Tomato - CSS-color 0xFF6347 + CSS-color 0xFF6347. OrangeRed - CSS-color 0xFF4500 + CSS-color 0xFF4500. Pink-color - CSS color group + CSS color group. Pink - CSS-color 0xFFC0CB + CSS-color 0xFFC0CB. LightPink - CSS-color 0xFFB6C1 + CSS-color 0xFFB6C1. HotPink - CSS-color 0xFF69B4 + CSS-color 0xFF69B4. DeepPink - CSS-color 0xFF1493 + CSS-color 0xFF1493. PaleVioletRed - CSS-color 0xDB7093 + CSS-color 0xDB7093. MediumVioletRed - CSS-color 0xC71585 + CSS-color 0xC71585. Purple-color - CSS color group + CSS color group. Lavender - CSS-color 0xE6E6FA + CSS-color 0xE6E6FA. Thistle - CSS-color 0xD8BFD8 + CSS-color 0xD8BFD8. Plum - CSS-color 0xDDA0DD + CSS-color 0xDDA0DD. Orchid - CSS-color 0xDA70D6 + CSS-color 0xDA70D6. Violet - CSS-color 0xEE82EE + CSS-color 0xEE82EE. Fuchsia - CSS-color 0xFF00FF + CSS-color 0xFF00FF. Magenta - CSS-color 0xFF00FF + CSS-color 0xFF00FF. MediumOrchid - CSS-color 0xBA55D3 + CSS-color 0xBA55D3. DarkOrchid - CSS-color 0x9932CC + CSS-color 0x9932CC. DarkViolet - CSS-color 0x9400D3 + CSS-color 0x9400D3. BlueViolet - CSS-color 0x8A2BE2 + CSS-color 0x8A2BE2. DarkMagenta - CSS-color 0x8B008B + CSS-color 0x8B008B. Purple - CSS-color 0x800080 + CSS-color 0x800080. MediumPurple - CSS-color 0x9370DB + CSS-color 0x9370DB. MediumSlateBlue - CSS-color 0x7B68EE + CSS-color 0x7B68EE. SlateBlue - CSS-color 0x6A5ACD + CSS-color 0x6A5ACD. DarkSlateBlue - CSS-color 0x483D8B + CSS-color 0x483D8B. RebeccaPurple - CSS-color 0x663399 + CSS-color 0x663399. Indigo - CSS-color 0x4B0082 + CSS-color 0x4B0082. Red-color - CSS color group + CSS color group. LightSalmon - CSS-color 0xFFA07A + CSS-color 0xFFA07A. Salmon - CSS-color 0xFA8072 + CSS-color 0xFA8072. DarkSalmon - CSS-color 0xE9967A + CSS-color 0xE9967A. LightCoral - CSS-color 0xF08080 + CSS-color 0xF08080. IndianRed - CSS-color 0xCD5C5C + CSS-color 0xCD5C5C. Crimson - CSS-color 0xDC143C + CSS-color 0xDC143C. Red - CSS-color 0xFF0000 + CSS-color 0xFF0000. FireBrick - CSS-color 0xB22222 + CSS-color 0xB22222. DarkRed - CSS-color 0x8B0000 + CSS-color 0x8B0000. Yellow-color - CSS color group + CSS color group. Gold - CSS-color 0xFFD700 + CSS-color 0xFFD700. Yellow - CSS-color 0xFFFF00 + CSS-color 0xFFFF00. LightYellow - CSS-color 0xFFFFE0 + CSS-color 0xFFFFE0. LemonChiffon - CSS-color 0xFFFACD + CSS-color 0xFFFACD. LightGoldenRodYellow - CSS-color 0xFAFAD2 + CSS-color 0xFAFAD2. PapayaWhip - CSS-color 0xFFEFD5 + CSS-color 0xFFEFD5. Moccasin - CSS-color 0xFFE4B5 + CSS-color 0xFFE4B5. PeachPuff - CSS-color 0xFFDAB9 + CSS-color 0xFFDAB9. PaleGoldenRod - CSS-color 0xEEE8AA + CSS-color 0xEEE8AA. Khaki - CSS-color 0xF0E68C + CSS-color 0xF0E68C. DarkKhaki - CSS-color 0xBDB76B + CSS-color 0xBDB76B. White-color - CSS color group + CSS color group. White - CSS-color 0xFFFFFF + CSS-color 0xFFFFFF. Snow - CSS-color 0xFFFAFA + CSS-color 0xFFFAFA. HoneyDew - CSS-color 0xF0FFF0 + CSS-color 0xF0FFF0. MintCream - CSS-color 0xF5FFFA + CSS-color 0xF5FFFA. Azure - CSS-color 0xF0FFFF + CSS-color 0xF0FFFF. AliceBlue - CSS-color 0xF0F8FF + CSS-color 0xF0F8FF. GhostWhite - CSS-color 0xF8F8FF + CSS-color 0xF8F8FF. WhiteSmoke - CSS-color 0xF5F5F5 + CSS-color 0xF5F5F5. SeaShell - CSS-color 0xFFF5EE + CSS-color 0xFFF5EE. Beige - CSS-color 0xF5F5DC + CSS-color 0xF5F5DC. OldLace - CSS-color 0xFDF5E6 + CSS-color 0xFDF5E6. FloralWhite - CSS-color 0xFFFAF0 + CSS-color 0xFFFAF0. Ivory - CSS-color 0xFFFFF0 + CSS-color 0xFFFFF0. AntiqueWhite - CSS-color 0xFAEBD7 + CSS-color 0xFAEBD7. Linen - CSS-color 0xFAF0E6 + CSS-color 0xFAF0E6. LavenderBlush - CSS-color 0xFFF0F5 + CSS-color 0xFFF0F5. MistyRose - CSS-color 0xFFE4E1 + CSS-color 0xFFE4E1. @@ -4824,7 +5025,7 @@ Using a color map composed of shades of gray, varying from black at the weakest intensity to white at the strongest. # - White intensity between 0 and 1 + White intensity between 0 and 1. takesValue @@ -4842,7 +5043,7 @@ Attribute of a visual sensation according to which an area appears to be similar to one of the perceived colors. # - Angular value between 0 and 360 + Angular value between 0 and 360. takesValue @@ -4857,7 +5058,7 @@ Colorfulness of a stimulus relative to its own brightness. # - B value of RGB between 0 and 1 + B value of RGB between 0 and 1. takesValue @@ -4869,7 +5070,7 @@ HSV-value - AAttribute of a visual sensation according to which an area appears to emit more or less light. + An attribute of a visual sensation according to which an area appears to emit more or less light. # @@ -4890,7 +5091,7 @@ The red component. # - R value of RGB between 0 and 1 + R value of RGB between 0 and 1. takesValue @@ -4905,7 +5106,7 @@ The blue component. # - B value of RGB between 0 and 1 + B value of RGB between 0 and 1. takesValue @@ -4920,7 +5121,7 @@ The green component. # - G value of RGB between 0 and 1 + G value of RGB between 0 and 1. takesValue @@ -5142,7 +5343,7 @@ Incidental - Usually associated with a sensory event intended to give instructions to the participant about the task or behavior. + A sensory or other type of event that is unrelated to the task or experiment. Instructional @@ -5189,6 +5390,14 @@ Correction An action offering an improvement to replace a mistake or error. + + Done-indication + An action that indicates that the participant has completed this step in the task. + + relatedTag + Ready-indication + + Incorrect-action An action considered wrong or incorrect in the context of the task. @@ -5241,6 +5450,14 @@ Miss + + Ready-indication + An action that indicates that the participant is ready to perform the next step in the task. + + relatedTag + Done-indication + + Task-relationship @@ -5399,144 +5616,171 @@ Relation Concerns the way in which two or more people or things are connected. + + extensionAllowed + Comparative-relation - Something considered in comparison to something else. + Something considered in comparison to something else. The first entity is the focus. Approximately-equal-to - (A (Approximately-equal-to B)) indicates that A and B have almost the same value. Here A and B could refer to sizes, orders, positions or other quantities. + (A, (Approximately-equal-to, B)) indicates that A and B have almost the same value. Here A and B could refer to sizes, orders, positions or other quantities. Less-than - (A (Less-than B)) indicates that A is smaller than B. Here A and B could refer to sizes, orders, positions or other quantities. + (A, (Less-than, B)) indicates that A is smaller than B. Here A and B could refer to sizes, orders, positions or other quantities. Less-than-or-equal-to - (A (Less-than-or-equal-to B)) indicates that the relative size or order of A is smaller than or equal to B. + (A, (Less-than-or-equal-to, B)) indicates that the relative size or order of A is smaller than or equal to B. Greater-than - (A (Greater-than B)) indicates that the relative size or order of A is bigger than that of B. + (A, (Greater-than, B)) indicates that the relative size or order of A is bigger than that of B. Greater-than-or-equal-to - (A (Greater-than-or-equal-to B)) indicates that the relative size or order of A is bigger than or the same as that of B. + (A, (Greater-than-or-equal-to, B)) indicates that the relative size or order of A is bigger than or the same as that of B. Equal-to - (A (Equal-to B)) indicates that the size or order of A is the same as that of B. + (A, (Equal-to, B)) indicates that the size or order of A is the same as that of B. Not-equal-to - (A (Not-equal-to B)) indicates that the size or order of A is not the same as that of B. + (A, (Not-equal-to, B)) indicates that the size or order of A is not the same as that of B. Connective-relation - Indicates two items are related in some way. + Indicates two entities are related in some way. The first entity is the focus. Belongs-to - (A (Belongs-to B)) indicates that A is a member of B. + (A, (Belongs-to, B)) indicates that A is a member of B. Connected-to - (A (Connected-to) B) indicates that A is related to B in some respect, usually through a direct link. + (A, (Connected-to, B)) indicates that A is related to B in some respect, usually through a direct link. Contained-in - (A (Contained-in B)) indicates that A is completely inside of B. + (A, (Contained-in, B)) indicates that A is completely inside of B. Described-by - (A (Described-by B)) indicates that B provides information about A. + (A, (Described-by, B)) indicates that B provides information about A. From-to - (A (From-to B)) indicates a directional relation from A to B. A is considered the source. + (A, (From-to, B)) indicates a directional relation from A to B. A is considered the source. Group-of - (A (Group-of B)) indicates A is a group of items of type B. + (A, (Group-of, B)) indicates A is a group of items of type B. Implied-by - (A (Implied-by B)) indicates B is suggested by A. + (A, (Implied-by, B)) indicates B is suggested by A. + + + Includes + (A, (Includes, B)) indicates that A has B as a member or part. Interacts-with - (A (Interacts-with B)) indicates A and B interact, possibly reciprocally. + (A, (Interacts-with, B)) indicates A and B interact, possibly reciprocally. Member-of - (A (Member-of B)) indicates A is a member of group B. + (A, (Member-of, B)) indicates A is a member of group B. Part-of - (A (Part-of B)) indicates A is a part of the whole B. + (A, (Part-of, B)) indicates A is a part of the whole B. Performed-by - (A (Performed-by B)) Indicates that ction or procedure A was carried out by agent B. + (A, (Performed-by, B)) indicates that the action or procedure A was carried out by agent B. + + + Performed-using + (A, (Performed-using, B)) indicates that the action or procedure A was accomplished using B. Related-to - (A (Relative-to B)) indicates A is a part of the whole B. + (A, (Related-to, B)) indicates A has some relationship to B. + + + Unrelated-to + (A, (Unrelated-to, B)) indicates that A is not related to B. For example, A is not related to Task. Directional-relation - A relationship indicating direction of change. + A relationship indicating direction of change of one entity relative to another. The first entity is the focus. Away-from - Go away from a place or object. + (A, (Away-from, B)) indicates that A is going or has moved away from B. The meaning depends on A and B. Towards - Moving in the direction of. A relation binding a relational quality or disposition to the relevant type of entity + (A, (Towards, B)) indicates that A is going to or has moved to B. The meaning depends on A and B. + + + + Logical-relation + Indicating a logical relationship between entities. The first entity is usually the focus. + + And + (A, (And, B)) means A and B are both in effect. + + + Or + (A, (Or, B)) means at least one of A and B are in effect. Spatial-relation - Indicating information about position. + Indicating a relationship about position between entities. Above - (A (Adjacent-to B)) means A is in a place or position that is higher than B. + (A, (Above, B)) means A is in a place or position that is higher than B. Across-from - (A (Across-from B)) means A is on the opposite side of something from B. + (A, (Across-from, B)) means A is on the opposite side of something from B. Adjacent-to - (A (Adjacent-to B)) indicates that A is next to B in time or space. + (A, (Adjacent-to, B)) indicates that A is next to B in time or space. Ahead-of - (A (Ahead-of B)) indicates that A is further forward in time or space in B. + (A, (Ahead-of, B)) indicates that A is further forward in time or space in B. Around - (A (Around B)) means A is in or near the present place or situation of B. + (A, (Around, B)) means A is in or near the present place or situation of B. Behind - (A (Behind B)) means A is at or to the far side of B, typically so as to be hidden by it. + (A, (Behind, B)) means A is at or to the far side of B, typically so as to be hidden by it. Below - (A (Below B)) means A is in a place or position that is lower than the position of B. + (A, (Below, B)) means A is in a place or position that is lower than the position of B. Between - (A (Between, (B, C))) means A is in the space or interval separating B and C. + (A, (Between, (B, C))) means A is in the space or interval separating B and C. Bilateral-to - (A (Bilateral B)) means A is on both sides of B or affects both sides of B. + (A, (Bilateral, B)) means A is on both sides of B or affects both sides of B. Bottom-edge-of - (A (Bottom-edge-of B)) means A is on the bottom most part or or near the boundary of B. + (A, (Bottom-edge-of, B)) means A is on the bottom most part or or near the boundary of B. relatedTag Left-edge-of @@ -5546,27 +5790,27 @@ Boundary-of - (A (Boundary-of B)) means A is on or part of the edge or boundary of B. + (A, (Boundary-of, B)) means A is on or part of the edge or boundary of B. Center-of - (A (Center-of B)) means A is at a point or or in an area that is approximately central within B. + (A, (Center-of, B)) means A is at a point or or in an area that is approximately central within B. Close-to - (A (Close-to B)) means A is at a small distance from or is located near in space to B. + (A, (Close-to, B)) means A is at a small distance from or is located near in space to B. Far-from - (A (Far-from B)) means A is at a large distance from or is not located near in space to B. + (A, (Far-from, B)) means A is at a large distance from or is not located near in space to B. In-front-of - (A (In-front-of B)) means A is in a position just ahead or at the front part of B, potentially partially blocking B from view. + (A, (In-front-of, B)) means A is in a position just ahead or at the front part of B, potentially partially blocking B from view. Left-edge-of - (A (Left-edge-of B)) means A is located on the left side of B on or near the boundary of B. + (A, (Left-edge-of, B)) means A is located on the left side of B on or near the boundary of B. relatedTag Bottom-edge-of @@ -5576,39 +5820,62 @@ Left-side-of - (A (Left-side-of B)) means A is located on the left side of B usually as part of B. + (A, (Left-side-of, B)) means A is located on the left side of B usually as part of B. relatedTag Right-side-of + + Lower-center-of + (A, (Lower-center-of, B)) means A is situated on the lower center part of B (due south). This relation is often used to specify qualitative information about screen position. + + relatedTag + Center-of + Lower-left-of + Lower-right-of + Upper-center-of + Upper-right-of + + Lower-left-of - (A (Lower-left-of B)) means A is situated on the lower left part of B. This relation is often used to specify qualitative information about screen position. + (A, (Lower-left-of, B)) means A is situated on the lower left part of B. This relation is often used to specify qualitative information about screen position. relatedTag + Center-of + Lower-center-of Lower-right-of + Upper-center-of + Upper-left-of + Upper-right-of Lower-right-of - (A (Lower-right-of B)) means A is situated on the lower right part of B. This relation is often used to specify qualitative information about screen position. + (A, (Lower-right-of, B)) means A is situated on the lower right part of B. This relation is often used to specify qualitative information about screen position. relatedTag + Center-of + Lower-center-of + Lower-left-of Upper-left-of + Upper-center-of + Upper-left-of + Lower-right-of Outside-of - (A (Outside-of B)) means A is located in the space around but not including B. + (A, (Outside-of, B)) means A is located in the space around but not including B. Over - (A (over B)) means A above is above B so as to cover or protect or A extends over the a general area as from a from a vantage point. + (A, (Over, B)) means A above is above B so as to cover or protect or A extends over the a general area as from a from a vantage point. Right-edge-of - (A (Right-edge-of B)) means A is located on the right side of B on or near the boundary of B. + (A, (Right-edge-of, B)) means A is located on the right side of B on or near the boundary of B. relatedTag Bottom-edge-of @@ -5618,7 +5885,7 @@ Right-side-of - (A (Right-side-of B)) means A is located on the right side of B usually as part of B. + (A, (Right-side-of, B)) means A is located on the right side of B usually as part of B. relatedTag Left-side-of @@ -5626,15 +5893,15 @@ To-left-of - (A (To-left-of B)) means A is located on or directed toward the side to the west of B when B is facing north. This term is used when A is not part of B. + (A, (To-left-of, B)) means A is located on or directed toward the side to the west of B when B is facing north. This term is used when A is not part of B. To-right-of - (A (To-right-of B)) means A is located on or directed toward the side to the east of B when B is facing north. This term is used when A is not part of B. + (A, (To-right-of, B)) means A is located on or directed toward the side to the east of B when B is facing north. This term is used when A is not part of B. Top-edge-of - (A (Top-edge-of B)) means A is on the uppermost part or or near the boundary of B. + (A, (Top-edge-of, B)) means A is on the uppermost part or or near the boundary of B. relatedTag Left-edge-of @@ -5644,59 +5911,82 @@ Top-of - (A (Top-of B)) means A is on the uppermost part, side, or surface of B. + (A, (Top-of, B)) means A is on the uppermost part, side, or surface of B. - Underneath - (A (Underneath B)) means A is situated directly below and may be concealed by B. + Upper-center-of + (A, (Upper-center-of, B)) means A is situated on the upper center part of B (due north). This relation is often used to specify qualitative information about screen position. + + relatedTag + Center-of + Lower-center-of + Lower-left-of + Lower-right-of + Upper-center-of + Upper-right-of + Upper-left-of - (A (Upper-left-of B)) means A is situated on the upper left part of B. This relation is often used to specify qualitative information about screen position. + (A, (Upper-left-of, B)) means A is situated on the upper left part of B. This relation is often used to specify qualitative information about screen position. relatedTag + Center-of + Lower-center-of Lower-left-of + Lower-right-of + Upper-center-of + Upper-right-of Upper-right-of - (A (Upper-right-of B)) means A is situated on the upper right part of B. This relation is often used to specify qualitative information about screen position. + (A, (Upper-right-of, B)) means A is situated on the upper right part of B. This relation is often used to specify qualitative information about screen position. relatedTag + Center-of + Lower-center-of Lower-left-of + Upper-left-of + Upper-center-of + Lower-right-of + + Underneath + (A, (Underneath, B)) means A is situated directly below and may be concealed by B. + Within - (A (Within B)) means A is on the inside of or contained in B. + (A, (Within, B)) means A is on the inside of or contained in B. Temporal-relation - Any relationship which includes a temporal or time-based component. + A relationship that includes a temporal or time-based component. After - (A After B) means A happens at a time subsequent to a reference time related to B. + (A, (After B)) means A happens at a time subsequent to a reference time related to B. Asynchronous-with - (A Asynchronous-with B) means A happens at times not occurring at the same time or having the same period or phase as B. + (A, (Asynchronous-with, B)) means A happens at times not occurring at the same time or having the same period or phase as B. Before - (A Before B) means A happens at a time earlier in time or order than B. + (A, (Before B)) means A happens at a time earlier in time or order than B. During - (A During B) means A happens at some point in a given period of time in which B is ongoing. + (A, (During, B)) means A happens at some point in a given period of time in which B is ongoing. Synchronous-with - (A Synchronous-with B) means A happens at occurs at the same time or rate as B. + (A, (Synchronous-with, B)) means A happens at occurs at the same time or rate as B. Waiting-for - (A Waiting-for B) means A pauses for something to happen in B. + (A, (Waiting-for, B)) means A pauses for something to happen in B. @@ -5716,6 +6006,10 @@ unitSymbol + + conversionFactor + 1.0 + @@ -5729,6 +6023,10 @@ SIUnit + + conversionFactor + 1.0 + rad @@ -5738,9 +6036,17 @@ unitSymbol + + conversionFactor + 1.0 + degree + + conversionFactor + 0.0174533 + @@ -5757,6 +6063,10 @@ unitSymbol + + conversionFactor + 1.0 + @@ -5768,6 +6078,10 @@ dollar + + conversionFactor + 1.0 + $ @@ -5777,11 +6091,48 @@ unitSymbol + + conversionFactor + 1.0 + + + + euro point + + electricPotentialUnits + + defaultUnits + uv + + + v + + SIUnit + + + unitSymbol + + + conversionFactor + 0.000001 + + + + Volt + + SIUnit + + + conversionFactor + 0.000001 + + + frequencyUnits @@ -5793,6 +6144,10 @@ SIUnit + + conversionFactor + 1.0 + Hz @@ -5802,6 +6157,10 @@ unitSymbol + + conversionFactor + 1.0 + @@ -5812,10 +6171,14 @@ dB - Intensity expressed as ratio to a threshold. Often used for sound intensity. + Intensity expressed as ratio to a threshold. May be used for sound intensity. unitSymbol + + conversionFactor + 1.0 + candela @@ -5846,6 +6209,41 @@ unitSymbol + + conversionFactor + 1.0 + + + + + magneticFieldUnits + Units used to magnetic field intensity. + + defaultUnits + fT + + + tesla + + SIUnit + + + conversionFactor + 10^-15 + + + + T + + SIUnit + + + unitSymbol + + + conversionFactor + 10^-15 + @@ -5859,6 +6257,10 @@ SIUnit + + conversionFactor + 1.0 + B @@ -5868,6 +6270,10 @@ unitSymbol + + conversionFactor + 1.0 + @@ -5878,15 +6284,37 @@ foot + + conversionFactor + 0.3048 + inch + + conversionFactor + 0.0254 + + + + meter + + SIUnit + + + conversionFactor + 1.0 + metre SIUnit + + conversionFactor + 1.0 + m @@ -5896,9 +6324,17 @@ unitSymbol + + conversionFactor + 1.0 + mile + + conversionFactor + 1609.34 + @@ -5915,18 +6351,56 @@ unitSymbol + + conversionFactor + 1.0 + mph unitSymbol + + conversionFactor + 0.44704 + kph unitSymbol + + conversionFactor + 0.277778 + + + + + temperatureUnits + + degree Celsius + + SIUnit + + + conversionFactor + 1.0 + + + + oC + + SIUnit + + + unitSymbol + + + conversionFactor + 1.0 + @@ -5940,6 +6414,10 @@ SIUnit + + conversionFactor + 1.0 + s @@ -5949,16 +6427,32 @@ unitSymbol + + conversionFactor + 1.0 + day + + conversionFactor + 86400 + minute + + conversionFactor + 60 + hour Should be in 24-hour format. + + conversionFactor + 3600 + @@ -5975,6 +6469,10 @@ unitSymbol + + conversionFactor + 1.0 + @@ -5991,301 +6489,477 @@ unitSymbol + + conversionFactor + 1.0 + gram SIUnit + + conversionFactor + 1.0 + pound + + conversionFactor + 453.592 + lb + + conversionFactor + 453.592 + deca - SI unit multiple representing 10^1 + SI unit multiple representing 10^1. SIUnitModifier + + conversionFactor + 10.0 + da - SI unit multiple representing 10^1 + SI unit multiple representing 10^1. SIUnitSymbolModifier + + conversionFactor + 10.0 + hecto - SI unit multiple representing 10^2 + SI unit multiple representing 10^2. SIUnitModifier + + conversionFactor + 100.0 + h - SI unit multiple representing 10^2 + SI unit multiple representing 10^2. SIUnitSymbolModifier + + conversionFactor + 100.0 + kilo - SI unit multiple representing 10^3 + SI unit multiple representing 10^3. SIUnitModifier + + conversionFactor + 1000.0 + k - SI unit multiple representing 10^3 + SI unit multiple representing 10^3. SIUnitSymbolModifier + + conversionFactor + 1000.0 + mega - SI unit multiple representing 10^6 + SI unit multiple representing 10^6. SIUnitModifier + + conversionFactor + 10^6 + M - SI unit multiple representing 10^6 + SI unit multiple representing 10^6. SIUnitSymbolModifier + + conversionFactor + 10^6 + giga - SI unit multiple representing 10^9 + SI unit multiple representing 10^9. SIUnitModifier + + conversionFactor + 10^9 + G - SI unit multiple representing 10^9 + SI unit multiple representing 10^9. SIUnitSymbolModifier + + conversionFactor + 10^9 + tera - SI unit multiple representing 10^12 + SI unit multiple representing 10^12. SIUnitModifier + + conversionFactor + 10^12 + T - SI unit multiple representing 10^12 + SI unit multiple representing 10^12. SIUnitSymbolModifier + + conversionFactor + 10^12 + peta - SI unit multiple representing 10^15 + SI unit multiple representing 10^15. SIUnitModifier + + conversionFactor + 10^15 + P - SI unit multiple representing 10^15 + SI unit multiple representing 10^15. SIUnitSymbolModifier + + conversionFactor + 10^15 + exa - SI unit multiple representing 10^18 + SI unit multiple representing 10^18. SIUnitModifier + + conversionFactor + 10^18 + E - SI unit multiple representing 10^18 + SI unit multiple representing 10^18. SIUnitSymbolModifier + + conversionFactor + 10^18 + zetta - SI unit multiple representing 10^21 + SI unit multiple representing 10^21. SIUnitModifier + + conversionFactor + 10^21 + Z - SI unit multiple representing 10^21 + SI unit multiple representing 10^21. SIUnitSymbolModifier + + conversionFactor + 10^21 + yotta - SI unit multiple representing 10^24 + SI unit multiple representing 10^24. SIUnitModifier + + conversionFactor + 10^24 + Y - SI unit multiple representing 10^24 + SI unit multiple representing 10^24. SIUnitSymbolModifier + + conversionFactor + 10^24 + deci - SI unit submultiple representing 10^-1 + SI unit submultiple representing 10^-1. SIUnitModifier + + conversionFactor + 0.1 + d - SI unit submultiple representing 10^-1 + SI unit submultiple representing 10^-1. SIUnitSymbolModifier + + conversionFactor + 0.1 + centi - SI unit submultiple representing 10^-2 + SI unit submultiple representing 10^-2. SIUnitModifier + + conversionFactor + 0.01 + c - SI unit submultiple representing 10^-2 + SI unit submultiple representing 10^-2. SIUnitSymbolModifier + + conversionFactor + 0.01 + milli - SI unit submultiple representing 10^-3 + SI unit submultiple representing 10^-3. SIUnitModifier + + conversionFactor + 0.001 + m - SI unit submultiple representing 10^-3 + SI unit submultiple representing 10^-3. SIUnitSymbolModifier + + conversionFactor + 0.001 + micro - SI unit submultiple representing 10^-6 + SI unit submultiple representing 10^-6. SIUnitModifier + + conversionFactor + 10^-6 + u - SI unit submultiple representing 10^-6 + SI unit submultiple representing 10^-6. SIUnitSymbolModifier + + conversionFactor + 10^-6 + nano - SI unit submultiple representing 10^-9 + SI unit submultiple representing 10^-9. SIUnitModifier + + conversionFactor + 10^-9 + n - SI unit submultiple representing 10^-9 + SI unit submultiple representing 10^-9. SIUnitSymbolModifier + + conversionFactor + 10^-9 + pico - SI unit submultiple representing 10^-12 + SI unit submultiple representing 10^-12. SIUnitModifier + + conversionFactor + 10^-12 + p - SI unit submultiple representing 10^-12 + SI unit submultiple representing 10^-12. SIUnitSymbolModifier + + conversionFactor + 10^-12 + femto - SI unit submultiple representing 10^-15 + SI unit submultiple representing 10^-15. SIUnitModifier + + conversionFactor + 10^-15 + f - SI unit submultiple representing 10^-15 + SI unit submultiple representing 10^-15. SIUnitSymbolModifier + + conversionFactor + 10^-15 + atto - SI unit submultiple representing 10^-18 + SI unit submultiple representing 10^-18. SIUnitModifier + + conversionFactor + 10^-18 + a - SI unit submultiple representing 10^-18 + SI unit submultiple representing 10^-18. SIUnitSymbolModifier + + conversionFactor + 10^-18 + zepto - SI unit submultiple representing 10^-21 + SI unit submultiple representing 10^-21. SIUnitModifier + + conversionFactor + 10^-21 + z - SI unit submultiple representing 10^-21 + SI unit submultiple representing 10^-21. SIUnitSymbolModifier + + conversionFactor + 10^-21 + yocto - SI unit submultiple representing 10^-24 + SI unit submultiple representing 10^-24. SIUnitModifier + + conversionFactor + 10^-24 + y - SI unit submultiple representing 10^-24 + SI unit submultiple representing 10^-24. SIUnitSymbolModifier + + conversionFactor + 10^-24 + @@ -6367,6 +7041,23 @@ valueClassProperty + + conversionFactor + The multiplicative factor to multiply these units to convert to default units. + + unitProperty + + + unitModifierProperty + + + + deprecatedFrom + Indicates that this element is deprecated. The value of the attribute is the latest schema version in which the element appeared in undeprecated form. + + elementProperty + + defaultUnits A schema attribute of unit classes specifying the default units to use if the placeholder has a unit class but the substituted value has no units. @@ -6380,6 +7071,19 @@ boolProperty + + nodeProperty + + + isInheritedProperty + + + + inLibrary + Indicates this schema element came from the named library schema, not the standard schema. This attribute is added by tools when a library schema is merged into its partnered standard schema. + + elementProperty + recommended @@ -6387,10 +7091,19 @@ boolProperty + + nodeProperty + relatedTag A schema attribute suggesting HED tags that are closely related to this tag. This attribute is used by tagging tools. + + nodeProperty + + + isInheritedProperty + requireChild @@ -6398,6 +7111,9 @@ boolProperty + + nodeProperty + required @@ -6405,6 +7121,26 @@ boolProperty + + nodeProperty + + + + reserved + A schema attribute indicating that this tag has special meaning and requires special handling by tools. + + boolProperty + + + nodeProperty + + + + rooted + Indicates a top-level library schema node is identical to a node of the same name in the partnered standard schema. This attribute can only appear in nodes that have the inLibrary schema attribute. + + nodeProperty + SIUnit @@ -6439,6 +7175,12 @@ suggestedTag A schema attribute that indicates another tag that is often associated with this tag. This attribute is used by tagging tools to provide tagging suggestions. + + nodeProperty + + + isInheritedProperty + tagGroup @@ -6446,6 +7188,9 @@ boolProperty + + nodeProperty + takesValue @@ -6453,13 +7198,19 @@ boolProperty + + nodeProperty + topLevelTagGroup - A schema attribute indicating that this tag (or its descendants) can only appear in a top-level tag group. + A schema attribute indicating that this tag (or its descendants) can only appear in a top-level tag group. A tag group can have at most one tag with this attribute. boolProperty + + nodeProperty + unique @@ -6467,10 +7218,16 @@ boolProperty + + nodeProperty + unitClass A schema attribute specifying which unit class this value tag belongs to. + + nodeProperty + unitPrefix @@ -6495,6 +7252,9 @@ valueClass A schema attribute specifying which value class this value tag belongs to. + + nodeProperty + @@ -6502,6 +7262,18 @@ boolProperty Indicates that the schema attribute represents something that is either true or false and does not have a value. Attributes without this value are assumed to have string values. + + elementProperty + Indicates this schema attribute can apply to any type of element(tag term, unit class, etc). + + + isInheritedProperty + Indicates that this attribute is inherited by child nodes. This property only applies to schema attributes for nodes. + + + nodeProperty + Indicates this schema attribute applies to node (tag-term) elements. This was added to allow for an attribute to apply to multiple elements. + unitClassProperty Indicates that the schema attribute is meant to be applied to unit classes. @@ -6519,6 +7291,6 @@ Indicates that the schema attribute is meant to be applied to value classes. - This is an updated version of the schema format. The properties are now part of the schema. The schema attributes are designed to be checked in software rather than hard-coded. The schema attributes, themselves have properties. + This schema is released under the Creative Commons Attribution 4.0 International and is a product of the HED Working Group. The DOI for the latest version of the HED standard schema is 10.5281/zenodo.7876037. diff --git a/tests/data/schema_tests/wiki_tests/HED_header_unknown_attribute.mediawiki b/tests/data/schema_tests/wiki_tests/HED_header_unknown_attribute.mediawiki new file mode 100644 index 000000000..4874eef91 --- /dev/null +++ b/tests/data/schema_tests/wiki_tests/HED_header_unknown_attribute.mediawiki @@ -0,0 +1,3 @@ +HED unknownattribute:unknown + + diff --git a/tests/errors/test_error_reporter.py b/tests/errors/test_error_reporter.py index d4482314c..d7ac7c9a3 100644 --- a/tests/errors/test_error_reporter.py +++ b/tests/errors/test_error_reporter.py @@ -1,6 +1,7 @@ import unittest from hed.errors import ErrorHandler, ErrorContext, ErrorSeverity, ValidationErrors, SchemaWarnings, \ get_printable_issue_string, sort_issues, replace_tag_references +from hed.errors.error_reporter import hed_tag_error, get_printable_issue_string_html from hed import HedString from hed import load_schema_version @@ -9,7 +10,7 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): cls.error_handler = ErrorHandler() - cls._schema = load_schema_version() + cls._schema = load_schema_version("8.2.0") pass def test_push_error_context(self): @@ -33,6 +34,9 @@ def test_push_error_context(self): self.assertTrue(column_name == error_list[0][ErrorContext.COLUMN]) self.assertTrue(len(error_list) == 1) self.error_handler.reset_error_context() + self.error_handler.push_error_context(ErrorContext.ROW, None) + self.assertTrue(self.error_handler.error_context[0][1] == 0) + self.error_handler.reset_error_context() def test_pop_error_context(self): error_list = self.error_handler.format_error_with_context(ValidationErrors.TAG_NOT_UNIQUE, "") @@ -115,6 +119,18 @@ def test_printable_issue_string_with_filenames(self): self.assertTrue(len(printable_issues3) > len(printable_issues2)) self.assertEqual(printable_issues3.count(myfile), 1) + printable_issues = get_printable_issue_string_html(error_list, skip_filename=False) + self.assertTrue(len(printable_issues) > 10) + self.assertEqual(printable_issues.count(myfile), 1) + + printable_issues2 = get_printable_issue_string_html(error_list, severity=ErrorSeverity.ERROR, skip_filename=False) + self.assertTrue(len(printable_issues) > len(printable_issues2)) + self.assertEqual(printable_issues2.count(myfile), 1) + printable_issues3 = get_printable_issue_string_html(error_list, severity=ErrorSeverity.ERROR, skip_filename=False, + title="Later added custom title that is longer") + self.assertTrue(len(printable_issues3) > len(printable_issues2)) + self.assertEqual(printable_issues3.count(myfile), 1) + self.error_handler.reset_error_context() def test_sort_issues(self): @@ -160,3 +176,25 @@ def test_replace_tag_references(self): mixed = {'a': HedString('Hed1', self._schema), 'b': [2, 3, {'c': HedString('Hed2', self._schema)}, 4]} replace_tag_references(mixed) self.assertEqual(mixed, {'a': 'Hed1', 'b': [2, 3, {'c': 'Hed2'}, 4]}) + + + def test_register_error_twice(self): + test_code = "test_error_code" + @hed_tag_error(test_code) + def test_error_code(tag): + pass + + with self.assertRaises(KeyError): + @hed_tag_error(test_code) + def test_error_code(tag): + pass + + def test_format_unknown_error(self): + error_code = "Unknown error type" + error_list = self.error_handler.format_error(error_code, "param1", param2=0) + self.assertEqual(error_list[0]['code'], error_code) + + actual_code = "Actual unknown error type" + error_list = self.error_handler.format_error_from_context(error_code, self.error_handler.error_context, "param1", param2=0, + actual_error=actual_code) + self.assertEqual(error_list[0]['code'], actual_code) diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py index 02c7f34a1..b74e97ab3 100644 --- a/tests/models/test_base_input.py +++ b/tests/models/test_base_input.py @@ -32,7 +32,7 @@ def setUpClass(cls): bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/bids_tests/eeg_ds003645s_hed')) schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/schema_tests/HED8.0.0.xml')) + '../data/schema_tests/HED8.2.0.xml')) cls.bids_root_path = bids_root_path json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) events_path = os.path.realpath(os.path.join(bids_root_path, diff --git a/tests/models/test_basic_search.py b/tests/models/test_basic_search.py index 36fcc168d..519c9bae4 100644 --- a/tests/models/test_basic_search.py +++ b/tests/models/test_basic_search.py @@ -19,7 +19,7 @@ def setUpClass(cls): cls.events_path = os.path.realpath( os.path.join(bids_root_path, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) cls.base_input = TabularInput(cls.events_path, sidecar1_path) - cls.schema = load_schema_version() + cls.schema = load_schema_version("8.2.0") cls.df = cls.base_input.series_filtered def test_find_matching_results(self): diff --git a/tests/models/test_definition_dict.py b/tests/models/test_definition_dict.py index 5005f55c5..f3f24f5de 100644 --- a/tests/models/test_definition_dict.py +++ b/tests/models/test_definition_dict.py @@ -10,7 +10,7 @@ class TestDefBase(TestHedBase): @classmethod def setUpClass(cls): - cls.hed_schema = load_schema_version("8.0.0") + cls.hed_schema = load_schema_version("8.2.0") def check_def_base(self, test_strings, expected_issues): for test_key in test_strings: diff --git a/tests/models/test_df_util.py b/tests/models/test_df_util.py index 09f913466..280038ffe 100644 --- a/tests/models/test_df_util.py +++ b/tests/models/test_df_util.py @@ -9,7 +9,7 @@ class TestShrinkDefs(unittest.TestCase): def setUp(self): - self.schema = load_schema_version() + self.schema = load_schema_version("8.2.0") def test_shrink_defs_normal(self): df = pd.DataFrame({"column1": ["(Def-expand/TestDefNormal,(Acceleration/2471,Action/TestDef2)),Event/SomeEvent"]}) @@ -66,7 +66,7 @@ def test_shrink_defs_series_placeholder(self): class TestExpandDefs(unittest.TestCase): def setUp(self): - self.schema = load_schema_version() + self.schema = load_schema_version("8.2.0") self.def_dict = DefinitionDict(["(Definition/TestDefNormal,(Acceleration/2471,Action/TestDef2))", "(Definition/TestDefPlaceholder/#,(Acceleration/#,Action/TestDef2))"], hed_schema=self.schema) @@ -116,7 +116,7 @@ def test_expand_defs_series_placeholder(self): class TestConvertToForm(unittest.TestCase): def setUp(self): - self.schema = load_schema_version() + self.schema = load_schema_version("8.2.0") def test_convert_to_form_short_tags(self): df = pd.DataFrame({"column1": ["Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/CSS-color/White-color/Azure,Action/Perceive/See"]}) diff --git a/tests/models/test_hed_string.py b/tests/models/test_hed_string.py index 7f48db7f6..b740737fd 100644 --- a/tests/models/test_hed_string.py +++ b/tests/models/test_hed_string.py @@ -7,7 +7,7 @@ class TestHedStrings(unittest.TestCase): @classmethod def setUpClass(cls): - cls.schema = load_schema_version("8.0.0") + cls.schema = load_schema_version("8.2.0") def validator_scalar(self, test_strings, expected_results, test_function): for test_key in test_strings: diff --git a/tests/models/test_hed_tag.py b/tests/models/test_hed_tag.py index d21c46ced..f6be18f6a 100644 --- a/tests/models/test_hed_tag.py +++ b/tests/models/test_hed_tag.py @@ -143,11 +143,11 @@ def test_strip_off_units_from_value(self): # stripped_dollars_string_no_space = dollars_string_no_space._get_tag_units_portion(currency_units) # stripped_dollars_string = dollars_string._get_tag_units_portion(currency_units) # stripped_dollars_string_invalid = dollars_string_invalid._get_tag_units_portion(currency_units) - stripped_volume_string, _, _ = volume_string._get_tag_units_portion(volume_units) - stripped_volume_string_no_space, _, _ = volume_string_no_space._get_tag_units_portion(volume_units) - stripped_prefixed_volume_string, _, _ = prefixed_volume_string._get_tag_units_portion(volume_units) - stripped_invalid_volume_string, _, _ = invalid_volume_string._get_tag_units_portion(volume_units) - stripped_invalid_distance_string, _, _ = invalid_distance_string._get_tag_units_portion(distance_units) + stripped_volume_string, _, _ = HedTag._get_tag_units_portion(volume_string.extension, volume_units) + stripped_volume_string_no_space, _, _ = HedTag._get_tag_units_portion(volume_string_no_space.extension, volume_units) + stripped_prefixed_volume_string, _, _ = HedTag._get_tag_units_portion(prefixed_volume_string.extension, volume_units) + stripped_invalid_volume_string, _, _ = HedTag._get_tag_units_portion(invalid_volume_string.extension, volume_units) + stripped_invalid_distance_string, _, _ = HedTag._get_tag_units_portion(invalid_distance_string.extension, distance_units) # self.assertEqual(stripped_dollars_string_no_space, None) # self.assertEqual(stripped_dollars_string, '25.99') # self.assertEqual(stripped_dollars_string_invalid, None) diff --git a/tests/models/test_string_util.py b/tests/models/test_string_util.py index 27cb13879..472de83b6 100644 --- a/tests/models/test_string_util.py +++ b/tests/models/test_string_util.py @@ -7,7 +7,7 @@ class TestHedStringSplit(unittest.TestCase): @classmethod def setUpClass(cls): - cls.schema = load_schema_version() + cls.schema = load_schema_version("8.2.0") def check_split_base_tags(self, hed_string, base_tags, expected_string, expected_string2): # Test case 1: remove_group=False @@ -70,7 +70,7 @@ def test_case_5(self): class TestHedStringSplitDef(unittest.TestCase): @classmethod def setUpClass(cls): - cls.schema = load_schema_version() + cls.schema = load_schema_version("8.2.0") def check_split_def_tags(self, hed_string, def_names, expected_string, expected_string2): # Test case 1: remove_group=False @@ -133,7 +133,7 @@ def test_case_5(self): class TestGatherDescriptions(unittest.TestCase): def setUp(self): - self.schema = load_schema_version() + self.schema = load_schema_version("8.2.0") def test_gather_single_description(self): input_str = "Sensory-event, Description/This is a test." diff --git a/tests/models/test_tabular_input.py b/tests/models/test_tabular_input.py index 02ef32df6..e1c3bc7fb 100644 --- a/tests/models/test_tabular_input.py +++ b/tests/models/test_tabular_input.py @@ -17,7 +17,7 @@ def setUpClass(cls): bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/bids_tests/eeg_ds003645s_hed')) schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/schema_tests/HED8.0.0.xml')) + '../data/schema_tests/HED8.2.0.xml')) sidecar1_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) cls.events_path = os.path.realpath( os.path.join(bids_root_path, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) diff --git a/tests/schema/test_hed_cache.py b/tests/schema/test_hed_cache.py index 3a33155bf..c5595974e 100644 --- a/tests/schema/test_hed_cache.py +++ b/tests/schema/test_hed_cache.py @@ -4,7 +4,6 @@ import unittest import os import itertools -import urllib.error from hed.schema import hed_cache from hed import schema @@ -30,7 +29,7 @@ def setUpClass(cls): cls.specific_hed_url = "https://raw.githubusercontent.com/hed-standard/hed-schemas/master/standard_schema/hedxml/HED8.0.0.xml" try: hed_cache.cache_xml_versions(cache_folder=cls.hed_cache_dir) - except urllib.error.HTTPError as e: + except HedFileError as e: schema.set_cache_directory(cls.saved_cache_folder) raise e @@ -54,20 +53,6 @@ def test_get_cache_directory(self): # print(f"\nCache directory is {os.path.realpath(cache_dir)}\n") self.assertEqual(cache_dir, self.hed_cache_dir) - def test_get_hed_version_path(self): - latest_hed_version_path = hed_cache.get_hed_version_path() - self.assertIsInstance(latest_hed_version_path, str) - - def test_get_latest_semantic_version_in_list(self): - latest_version = hed_cache._get_latest_semantic_version_in_list(self.semantic_version_list) - self.assertIsInstance(latest_version, str) - self.assertEqual(latest_version, self.semantic_version_three) - - def test_compare_semantic_versions(self): - latest_version = hed_cache._compare_semantic_versions(self.semantic_version_one, self.semantic_version_two) - self.assertIsInstance(latest_version, str) - self.assertEqual(latest_version, self.semantic_version_two) - def test_set_cache_directory(self): hed_cache_dir = "TEST_SCHEMA_CACHE" saved_cache_dir = hed_cache.HED_CACHE_DIRECTORY @@ -81,6 +66,9 @@ def test_cache_specific_url(self): local_filename = hed_cache.cache_specific_url(self.specific_hed_url, None, cache_folder=self.hed_cache_dir) self.assertTrue(local_filename) + with self.assertRaises(HedFileError): + hed_cache.cache_specific_url("https://github.com/hed-standard/hed-python/notrealurl.xml") + def test_get_hed_versions_all(self): cached_versions = hed_cache.get_hed_versions(self.hed_cache_dir, library_name="all") self.assertIsInstance(cached_versions, dict) @@ -128,44 +116,40 @@ def tearDownClass(cls): schema.set_cache_directory(cls.saved_cache_folder) def test_local_cache(self): - final_hed_xml_file = hed_cache.get_hed_version_path("8.0.0", None, local_hed_directory=self.hed_cache_dir) + final_hed_xml_file = hed_cache.get_hed_version_path("8.2.0", None, local_hed_directory=self.hed_cache_dir) self.assertFalse(final_hed_xml_file) hed_cache.cache_local_versions(self.hed_cache_dir) - final_hed_xml_file = hed_cache.get_hed_version_path("8.0.0", None, local_hed_directory=self.hed_cache_dir) + final_hed_xml_file = hed_cache.get_hed_version_path("8.2.0", None, local_hed_directory=self.hed_cache_dir) self.assertTrue(final_hed_xml_file) def test_schema_load_schema_version_invalid(self): # This test was moved here from schema io as it will throw errors on github rate limiting like the cache tests. with self.assertRaises(HedFileError) as context1: load_schema_version("x.0.1") - self.assertEqual(context1.exception.args[0], 'fileNotFound') + self.assertEqual(context1.exception.args[0], 'SCHEMA_VERSION_INVALID') with self.assertRaises(HedFileError) as context2: load_schema_version("base:score_x.0.1") - self.assertEqual(context2.exception.args[0], 'fileNotFound') + self.assertEqual(context2.exception.args[0], 'SCHEMA_VERSION_INVALID') with self.assertRaises(HedFileError) as context3: load_schema_version(["", None]) - self.assertEqual(context3.exception.args[0], 'schemaDuplicatePrefix') + self.assertEqual(context3.exception.args[0], 'SCHEMA_VERSION_INVALID') with self.assertRaises(HedFileError) as context4: - load_schema_version(["8.0.0", "score_1.0.0"]) + load_schema_version(["8.2.0", "score_1.0.0"]) self.assertEqual(context4.exception.args[0], 'schemaDuplicatePrefix') with self.assertRaises(HedFileError) as context5: - load_schema_version(["sc:8.0.0", "sc:score_1.0.0"]) + load_schema_version(["sc:8.2.0", "sc:score_1.0.0"]) self.assertEqual(context5.exception.args[0], 'schemaDuplicatePrefix') with self.assertRaises(HedFileError) as context6: - load_schema_version(["", "score_1.0.0"]) + load_schema_version(["8.1.0", "score_1.0.0"]) self.assertEqual(context6.exception.args[0], 'schemaDuplicatePrefix') - with self.assertRaises(HedFileError) as context7: - load_schema_version(["", "score_"]) - self.assertEqual(context7.exception.args[0], 'schemaDuplicatePrefix') - with self.assertRaises(HedFileError) as context8: - load_schema_version(["", "notreallibrary"]) + load_schema_version(["8.1.0", "notreallibrary_1.0.0"]) self.assertEqual(context8.exception.args[0], 'fileNotFound') if __name__ == '__main__': diff --git a/tests/schema/test_hed_schema.py b/tests/schema/test_hed_schema.py index 9344df988..d62dcb1fd 100644 --- a/tests/schema/test_hed_schema.py +++ b/tests/schema/test_hed_schema.py @@ -8,7 +8,7 @@ class TestHedSchema(unittest.TestCase): schema_file_3g_xml = '../data/schema_tests/HED8.0.0t.xml' - schema_file_3g = '../data/schema_tests/HED8.0.0.mediawiki' + schema_file_3g = '../data/schema_tests/HED8.2.0.mediawiki' @classmethod def setUpClass(cls): @@ -30,7 +30,7 @@ def test_name(self): # We should have an error before we reach here. self.assertTrue(False) except HedFileError as e: - self.assertTrue(invalid_xml_file in get_printable_issue_string(e.issues, skip_filename=False)) + self.assertTrue(invalid_xml_file in e.filename) def test_tag_attribute(self): test_strings = { @@ -134,7 +134,7 @@ def test_get_hed_xml_version(self): self.assertEqual(get_hed_xml_version(self.hed_xml_3g), "8.0.0") def test_has_duplicate_tags(self): - self.assertFalse(self.hed_schema_3g._has_duplicate_tags) + self.assertFalse(self.hed_schema_3g.has_duplicates()) def test_short_tag_mapping(self): self.assertEqual(len(self.hed_schema_3g.tags.keys()), 1110) @@ -144,7 +144,7 @@ def test_schema_compliance(self): self.assertEqual(len(warnings), 14) def test_bad_prefixes(self): - schema = load_schema_version(xml_version="8.0.0") + schema = load_schema_version(xml_version="8.2.0") self.assertTrue(schema.get_tag_entry("Event")) self.assertFalse(schema.get_tag_entry("sc:Event")) @@ -155,7 +155,7 @@ def test_bad_prefixes(self): self.assertFalse(schema.get_tag_entry("Event", schema_namespace='unknown')) def test_bad_prefixes_library(self): - schema = load_schema_version(xml_version="tl:8.0.0") + schema = load_schema_version(xml_version="tl:8.2.0") self.assertTrue(schema.get_tag_entry("tl:Event", schema_namespace="tl:")) self.assertFalse(schema.get_tag_entry("sc:Event", schema_namespace="tl:")) diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py index a96d61f60..7e69a3f69 100644 --- a/tests/schema/test_hed_schema_io.py +++ b/tests/schema/test_hed_schema_io.py @@ -1,11 +1,19 @@ import unittest +import rdflib + from hed.errors import HedFileError from hed.errors.error_types import SchemaErrors from hed.schema import load_schema, HedSchemaGroup, load_schema_version, HedSchema +from hed.schema.hed_schema_io import parse_version_list, _load_schema_version +from tests.schema.test_schema_converters import with_temp_file, get_temp_filename + import os from hed.errors import HedExceptions from hed.schema import HedKey +from hed.schema import hed_cache +from hed import schema +import shutil # todo: speed up these tests @@ -51,27 +59,97 @@ class TestHedSchema(unittest.TestCase): # self.assertEqual(score_lib._namespace, "sc:") # self.assertTrue(score_lib.get_tag_entry("Modulator", schema_namespace="sc:")) + def test_load_schema_invalid_parameters(self): + bad_filename = "this_is_not_a_real_file.xml" + with self.assertRaises(HedFileError): + load_schema(bad_filename) + + bad_filename = "https://github.com/hed-standard/hed-python/bad_url.xml" + with self.assertRaises(HedFileError): + load_schema(bad_filename) + + def test_load_schema_name(self): + schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/schema_tests/HED8.2.0.mediawiki') + + schema = load_schema(schema_path, schema_namespace="testspace", name="Test Name") + self.assertEqual(schema.schema_namespace, "testspace:") + self.assertEqual(schema.name, "Test Name") + + schema = load_schema(schema_path, schema_namespace="testspace") + self.assertEqual(schema.schema_namespace, "testspace:") + self.assertEqual(schema.name, schema_path) + def test_load_schema_version(self): ver1 = "8.0.0" schemas1 = load_schema_version(ver1) self.assertIsInstance(schemas1, HedSchema, "load_schema_version returns a HedSchema if a string version") self.assertEqual(schemas1.version_number, "8.0.0", "load_schema_version has the right version") self.assertEqual(schemas1.library, "", "load_schema_version standard schema has no library") + self.assertEqual(schemas1.name, "8.0.0") ver2 = "base:8.0.0" schemas2 = load_schema_version(ver2) self.assertIsInstance(schemas2, HedSchema, "load_schema_version returns HedSchema version+namespace") self.assertEqual(schemas2.version_number, "8.0.0", "load_schema_version has the right version with namespace") self.assertEqual(schemas2._namespace, "base:", "load_schema_version has the right version with namespace") + self.assertEqual(schemas2.name, "base:8.0.0") ver3 = ["base:8.0.0"] schemas3 = load_schema_version(ver3) self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") self.assertEqual(schemas3.version_number, "8.0.0", "load_schema_version has the right version with namespace") self.assertEqual(schemas3._namespace, "base:", "load_schema_version has the right version with namespace") - ver3 = ["base:"] - schemas3 = load_schema_version(ver3) + self.assertEqual(schemas3.name, "base:8.0.0") + + def test_load_schema_version_merged(self): + ver4 = ["testlib_2.0.0", "score_1.1.0"] + schemas3 = load_schema_version(ver4) + issues = schemas3.check_compliance() self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") self.assertTrue(schemas3.version_number, "load_schema_version has the right version with namespace") - self.assertEqual(schemas3._namespace, "base:", "load_schema_version has the right version with namespace") + self.assertEqual(schemas3.schema_namespace, "", "load_schema_version has the right version with namespace") + self.assertEqual(schemas3.name, "testlib_2.0.0,score_1.1.0") + # Deprecated tag warnings + self.assertEqual(len(issues), 11) + + # Verify this cannot be saved + with self.assertRaises(HedFileError): + schemas3.save_as_mediawiki("filename") + + def test_load_and_verify_tags(self): + # Load 'testlib' by itself + testlib = load_schema_version('testlib_2.0.0') + + # Load 'score' by itself + score = load_schema_version('score_1.1.0') + + # Load both 'testlib' and 'score' together + schemas3 = load_schema_version(["testlib_2.0.0", "score_1.1.0"]) + + # Extract the tag names from each library + testlib_tags = set(testlib.tags.all_names.keys()) + score_tags = set(score.tags.all_names.keys()) + merged_tags = set(schemas3.tags.all_names.keys()) + + # Verify that all tags in 'testlib' and 'score' are in the merged library + for tag in testlib_tags: + self.assertIn(tag, merged_tags, f"Tag {tag} from testlib is missing in the merged schema.") + + for tag in score_tags: + self.assertIn(tag, merged_tags, f"Tag {tag} from score is missing in the merged schema.") + + # Negative test cases + # Ensure merged_tags is not a subset of testlib_tags or score_tags + self.assertFalse(merged_tags.issubset(testlib_tags), "The merged tags should not be a subset of testlib tags.") + self.assertFalse(merged_tags.issubset(score_tags), "The merged tags should not be a subset of score tags.") + + # Ensure there are tags that came uniquely from each library + unique_testlib_tags = testlib_tags - score_tags + unique_score_tags = score_tags - testlib_tags + + self.assertTrue(any(tag in merged_tags for tag in unique_testlib_tags), + "There should be unique tags from testlib in the merged schema.") + self.assertTrue(any(tag in merged_tags for tag in unique_score_tags), + "There should be unique tags from score in the merged schema.") def test_load_schema_version_libraries(self): ver1 = "score_1.0.0" @@ -81,16 +159,6 @@ def test_load_schema_version_libraries(self): self.assertEqual(schemas1.library, "score", "load_schema_version works with single library no namespace") self.assertEqual(schemas1.get_formatted_version(), '"score_1.0.0"', "load_schema_version gives correct version_string with single library no namespace") - ver1 = "score_" - schemas1 = load_schema_version(ver1) - self.assertIsInstance(schemas1, HedSchema, "load_schema_version returns a HedSchema if a string version") - self.assertTrue(schemas1.version_number, "load_schema_version has the right version") - self.assertEqual(schemas1.library, "score", "load_schema_version works with single library no namespace") - ver1 = "score" - schemas1 = load_schema_version(ver1) - self.assertIsInstance(schemas1, HedSchema, "load_schema_version returns a HedSchema if a string version") - self.assertTrue(schemas1.version_number, "load_schema_version has the right version") - self.assertEqual(schemas1.library, "score", "load_schema_version works with single library no namespace") ver2 = "base:score_1.0.0" schemas2 = load_schema_version(ver2) @@ -99,11 +167,13 @@ def test_load_schema_version_libraries(self): self.assertEqual(schemas2._namespace, "base:", "load_schema_version has the right version with namespace") self.assertEqual(schemas2.get_formatted_version(), '"base:score_1.0.0"', "load_schema_version gives correct version_string with single library with namespace") + self.assertEqual(schemas2.name, "base:score_1.0.0") ver3 = ["8.0.0", "sc:score_1.0.0"] schemas3 = load_schema_version(ver3) self.assertIsInstance(schemas3, HedSchemaGroup, "load_schema_version returns HedSchema version+namespace") self.assertIsInstance(schemas3._schemas, dict, "load_schema_version group keeps dictionary of hed versions") self.assertEqual(len(schemas3._schemas), 2, "load_schema_version group dictionary is right length") + self.assertEqual(schemas3.name, "8.0.0,sc:score_1.0.0") s = schemas3._schemas[""] self.assertEqual(s.version_number, "8.0.0", "load_schema_version has the right version with namespace") self.assertEqual(schemas3.get_formatted_version(), '["8.0.0", "sc:score_1.0.0"]', @@ -115,6 +185,7 @@ def test_load_schema_version_libraries(self): self.assertEqual(len(schemas4._schemas), 2, "load_schema_version group dictionary is right length") self.assertEqual(schemas4.get_formatted_version(), '["8.0.0", "sc:score_1.0.0"]', "load_schema_version gives correct version_string with multiple prefixes") + self.assertEqual(schemas4.name, "8.0.0,sc:score_1.0.0") s = schemas4._schemas["sc:"] self.assertEqual(s.version_number, "1.0.0", "load_schema_version has the right version with namespace") with self.assertRaises(KeyError) as context: @@ -130,23 +201,105 @@ def test_load_schema_version_libraries(self): with self.assertRaises(HedFileError) as context: load_schema_version("sc1:") - # def test_load_schema_version_empty(self): - # schemas = load_schema_version("") - # self.assertIsInstance(schemas, HedSchema, "load_schema_version for empty string returns latest version") - # self.assertTrue(schemas.version_number, "load_schema_version for empty string has a version") - # self.assertFalse(schemas.library, "load_schema_version for empty string is not a library") - # schemas = load_schema_version(None) - # self.assertIsInstance(schemas, HedSchema, "load_schema_version for None returns latest version") - # self.assertTrue(schemas.version_number, "load_schema_version for empty string has a version") - # self.assertFalse(schemas.library, "load_schema_version for empty string is not a library") - # schemas = load_schema_version([""]) - # self.assertIsInstance(schemas, HedSchema, "load_schema_version list with blank entry returns latest version") - # self.assertTrue(schemas.version_number, "load_schema_version for empty string has a version") - # self.assertFalse(schemas.library, "load_schema_version for empty string is not a library") - # schemas = load_schema_version([]) - # self.assertIsInstance(schemas, HedSchema, "load_schema_version list with blank entry returns latest version") - # self.assertTrue(schemas.version_number, "load_schema_version for empty string has a version") - # self.assertFalse(schemas.library, "load_schema_version for empty string is not a library") + + +class TestHedSchemaUnmerged(unittest.TestCase): + # Verify the hed cache can handle loading unmerged with_standard schemas in case they are ever used + @classmethod + def setUpClass(cls): + hed_cache_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../schema_cache_test_local_unmerged/') + if os.path.exists(hed_cache_dir) and os.path.isdir(hed_cache_dir): + shutil.rmtree(hed_cache_dir) + _load_schema_version.cache_clear() + cls.hed_cache_dir = hed_cache_dir + cls.saved_cache_folder = hed_cache.HED_CACHE_DIRECTORY + schema.set_cache_directory(cls.hed_cache_dir) + + # Copy source as dupe into cache for easily testing dupe detection + cls.dupe_library_name = "testscoredupe_1.1.0" + cls.source_library_name = "score_1.1.0" + + for filename in os.listdir(hed_cache.INSTALLED_CACHE_LOCATION): + loaded_schema = schema.load_schema(os.path.join(hed_cache.INSTALLED_CACHE_LOCATION, filename)) + loaded_schema.save_as_xml(os.path.join(cls.hed_cache_dir, filename), save_merged=False) + if filename == f"HED_{cls.source_library_name}.xml": + new_filename = f"HED_{cls.dupe_library_name}.xml" + loaded_schema.save_as_xml(os.path.join(cls.hed_cache_dir, new_filename), save_merged=False) + + + @classmethod + def tearDownClass(cls): + shutil.rmtree(cls.hed_cache_dir) + schema.set_cache_directory(cls.saved_cache_folder) + _load_schema_version.cache_clear() + + def test_load_schema_version(self): + ver1 = "8.0.0" + schemas1 = load_schema_version(ver1) + self.assertIsInstance(schemas1, HedSchema, "load_schema_version returns a HedSchema if a string version") + self.assertEqual(schemas1.version_number, "8.0.0", "load_schema_version has the right version") + self.assertEqual(schemas1.library, "", "load_schema_version standard schema has no library") + ver2 = "base:8.0.0" + schemas2 = load_schema_version(ver2) + self.assertIsInstance(schemas2, HedSchema, "load_schema_version returns HedSchema version+namespace") + self.assertEqual(schemas2.version_number, "8.0.0", "load_schema_version has the right version with namespace") + self.assertEqual(schemas2._namespace, "base:", "load_schema_version has the right version with namespace") + ver3 = ["base:8.0.0"] + schemas3 = load_schema_version(ver3) + self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") + self.assertEqual(schemas3.version_number, "8.0.0", "load_schema_version has the right version with namespace") + self.assertEqual(schemas3._namespace, "base:", "load_schema_version has the right version with namespace") + + def test_load_schema_version_merged(self): + ver4 = ["testlib_2.0.0", "score_1.1.0"] + schemas3 = load_schema_version(ver4) + issues = schemas3.check_compliance() + self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") + self.assertTrue(schemas3.version_number, "load_schema_version has the right version with namespace") + self.assertEqual(schemas3._namespace, "", "load_schema_version has the right version with namespace") + self.assertEqual(len(issues), 11) + + def test_load_schema_version_merged_duplicates(self): + ver4 = ["score_1.1.0", "testscoredupe_1.1.0"] + with self.assertRaises(HedFileError) as context: + load_schema_version(ver4) + self.assertEqual(len(context.exception.issues), 597) + + def test_load_and_verify_tags(self): + # Load 'testlib' by itself + testlib = load_schema_version('testlib_2.0.0') + + # Load 'score' by itself + score = load_schema_version('score_1.1.0') + + # Load both 'testlib' and 'score' together + schemas3 = load_schema_version(["testlib_2.0.0", "score_1.1.0"]) + + # Extract the tag names from each library + testlib_tags = set(testlib.tags.all_names.keys()) + score_tags = set(score.tags.all_names.keys()) + merged_tags = set(schemas3.tags.all_names.keys()) + + # Verify that all tags in 'testlib' and 'score' are in the merged library + for tag in testlib_tags: + self.assertIn(tag, merged_tags, f"Tag {tag} from testlib is missing in the merged schema.") + + for tag in score_tags: + self.assertIn(tag, merged_tags, f"Tag {tag} from score is missing in the merged schema.") + + # Negative test cases + # Ensure merged_tags is not a subset of testlib_tags or score_tags + self.assertFalse(merged_tags.issubset(testlib_tags), "The merged tags should not be a subset of testlib tags.") + self.assertFalse(merged_tags.issubset(score_tags), "The merged tags should not be a subset of score tags.") + + # Ensure there are tags that came uniquely from each library + unique_testlib_tags = testlib_tags - score_tags + unique_score_tags = score_tags - testlib_tags + + self.assertTrue(any(tag in merged_tags for tag in unique_testlib_tags), + "There should be unique tags from testlib in the merged schema.") + self.assertTrue(any(tag in merged_tags for tag in unique_score_tags), + "There should be unique tags from score in the merged schema.") class TestHedSchemaMerging(unittest.TestCase): @@ -166,36 +319,40 @@ def _base_merging_test(self, files): s1 = files[i] s2 = files[i + 1] self.assertEqual(s1, s2) + filename1 = get_temp_filename(".xml") + filename2 = get_temp_filename(".xml") try: - path1 = s1.save_as_xml(save_merged=save_merged) - path2 = s2.save_as_xml(save_merged=save_merged) - result = filecmp.cmp(path1, path2) + s1.save_as_xml(filename1, save_merged=save_merged) + s2.save_as_xml(filename2, save_merged=save_merged) + result = filecmp.cmp(filename1, filename2) # print(s1.filename) # print(s2.filename) self.assertTrue(result) - reload1 = load_schema(path1) - reload2 = load_schema(path2) + reload1 = load_schema(filename1) + reload2 = load_schema(filename2) self.assertEqual(reload1, reload2) except Exception: self.assertTrue(False) finally: - os.remove(path1) - os.remove(path2) + os.remove(filename1) + os.remove(filename2) try: - path1 = s1.save_as_mediawiki(save_merged=save_merged) - path2 = s2.save_as_mediawiki(save_merged=save_merged) - result = filecmp.cmp(path1, path2) + filename1 = get_temp_filename(".mediawiki") + filename2 = get_temp_filename(".mediawiki") + s1.save_as_mediawiki(filename1, save_merged=save_merged) + s2.save_as_mediawiki(filename2, save_merged=save_merged) + result = filecmp.cmp(filename1, filename2) self.assertTrue(result) - reload1 = load_schema(path1) - reload2 = load_schema(path2) + reload1 = load_schema(filename1) + reload2 = load_schema(filename2) self.assertEqual(reload1, reload2) except Exception: self.assertTrue(False) finally: - os.remove(path1) - os.remove(path2) + os.remove(filename1) + os.remove(filename2) lines1 = s1.get_as_mediawiki_string(save_merged=save_merged) lines2 = s2.get_as_mediawiki_string(save_merged=save_merged) @@ -232,13 +389,11 @@ def test_saving_merged_rooted_sorting(self): self._base_merging_test(files) - def test_saving_bad_sort(self): + @with_temp_file(".mediawiki") + def test_saving_bad_sort(self, filename): loaded_schema = load_schema(os.path.join(self.full_base_folder, "bad_sort_test.mediawiki")) - filename = loaded_schema.save_as_mediawiki() - try: - reloaded_schema = load_schema(filename) - finally: - os.remove(filename) + loaded_schema.save_as_mediawiki(filename) + reloaded_schema = load_schema(filename) self.assertEqual(loaded_schema, reloaded_schema) @@ -279,17 +434,17 @@ def _base_added_class_tests(self, schema): def test_saving_merged2(self): s1 = load_schema(os.path.join(self.full_base_folder, "add_all_types.mediawiki")) self._base_added_class_tests(s1) - path1 = "" - path2 = "" for save_merged in [True, False]: + path1 = get_temp_filename(".xml") + path2 = get_temp_filename(".mediawiki") try: - path1 = s1.save_as_xml(save_merged=save_merged) + s1.save_as_xml(path1, save_merged=save_merged) s2 = load_schema(path1) self.assertEqual(s1, s2) self._base_added_class_tests(s2) - path2 = s1.save_as_mediawiki(save_merged=save_merged) - s2 = load_schema(path1) + s1.save_as_mediawiki(path2, save_merged=save_merged) + s2 = load_schema(path2) self.assertEqual(s1, s2) self._base_added_class_tests(s2) finally: @@ -392,3 +547,111 @@ def test_saving_in_library_xml(self): score_count = schema_string.count("inLibrary") # One extra because this also finds the attribute definition, whereas in wiki it's a different format. self.assertEqual(score_count, 854, "There should be 854 in library entries in the saved score schema") + + +class TestParseVersionList(unittest.TestCase): + def test_empty_and_single_library(self): + """Test that an empty list returns an empty dictionary and a single library is handled correctly.""" + self.assertEqual(parse_version_list([]), {}) + self.assertEqual(parse_version_list(["score"]), {"": "score"}) + + def test_multiple_libraries_without_and_with_prefix(self): + """Test that multiple libraries without a prefix and with the same prefix are handled correctly.""" + self.assertEqual(parse_version_list(["score", "testlib"]), {"": "score,testlib"}) + self.assertEqual(parse_version_list(["test:score", "test:testlib"]), {"test": "test:score,testlib"}) + + def test_single_and_multiple_libraries_with_different_prefixes(self): + """Test that a single library with a prefix and multiple libraries with different prefixes are handled correctly.""" + self.assertEqual(parse_version_list(["ol:otherlib"]), {"ol": "ol:otherlib"}) + self.assertEqual(parse_version_list(["score", "ol:otherlib", "ul:anotherlib"]), {"": "score", "ol": "ol:otherlib", "ul": "ul:anotherlib"}) + + def test_duplicate_library_raises_error(self): + """Test that duplicate libraries raise the correct error.""" + with self.assertRaises(HedFileError): + parse_version_list(["score", "score"]) + with self.assertRaises(HedFileError): + parse_version_list(["ol:otherlib", "ol:otherlib"]) + + def test_triple_prefixes(self): + """Test that libraries with triple prefixes are handled correctly.""" + self.assertEqual(parse_version_list(["test:score", "ol:otherlib", "test:testlib", "abc:anotherlib"]), + {"test": "test:score,testlib", "ol": "ol:otherlib", "abc": "abc:anotherlib"}) + + +class TestOwlBase(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.base_schema = schema.load_schema_version("8.2.0") + + @with_temp_file(".owl") + def test_schema2xml(self, filename): + self.base_schema.save_as_owl(filename) + loaded_schema = schema.load_schema(filename) + + self.assertEqual(loaded_schema, self.base_schema) + + self.base_schema.save_as_owl(filename, save_merged=True) + loaded_schema = schema.load_schema(filename) + + self.assertEqual(loaded_schema, self.base_schema) + + @with_temp_file(".ttl") + def test_schema2turtle(self, filename): + self.base_schema.save_as_owl(filename) + loaded_schema = schema.load_schema(filename) + + self.assertEqual(loaded_schema, self.base_schema) + + self.base_schema.save_as_owl(filename, save_merged=True) + loaded_schema = schema.load_schema(filename) + + self.assertEqual(loaded_schema, self.base_schema) + + @with_temp_file(".json-ld") + def test_schema2jsonld(self, filename): + self.base_schema.save_as_owl(filename) + loaded_schema = schema.load_schema(filename) + + self.assertEqual(loaded_schema, self.base_schema) + + self.base_schema.save_as_owl(filename, save_merged=True) + loaded_schema = schema.load_schema(filename) + + self.assertEqual(loaded_schema, self.base_schema) + + def test_schema2owlstring(self): + owl_string = self.base_schema.get_as_owl_string(file_format="turtle") + loaded_schema = schema.from_string(owl_string, schema_format="turtle") + + self.assertEqual(loaded_schema, self.base_schema) + + owl_string = self.base_schema.get_as_owl_string(save_merged=True, file_format="turtle") + loaded_schema = schema.from_string(owl_string, schema_format="turtle") + + self.assertEqual(loaded_schema, self.base_schema) + + def test_schema2bad_filename(self): + with self.assertRaises(OSError): + self.base_schema.save_as_owl("", file_format="xml") + with self.assertRaises(OSError): + self.base_schema.save_as_owl("/////////", file_format="xml") + + def test_schema2bad_filename_rdf_format(self): + with self.assertRaises(rdflib.plugin.PluginException): + self.base_schema.save_as_owl("valid_filename.invalid_extension") + with self.assertRaises(rdflib.plugin.PluginException): + self.base_schema.save_as_owl("") + with self.assertRaises(rdflib.plugin.PluginException): + self.base_schema.save_as_owl("", file_format="unknown") + + +class TestOwlLibRooted(TestOwlBase): + @classmethod + def setUpClass(cls): + cls.base_schema = schema.load_schema_version("testlib_2.0.0") + + +class TestOwlLib(TestOwlBase): + @classmethod + def setUpClass(cls): + cls.base_schema = schema.load_schema_version("score_1.1.0") diff --git a/tests/schema/test_schema_attribute_validators.py b/tests/schema/test_schema_attribute_validators.py index 1411e928e..4b5f8e6f4 100644 --- a/tests/schema/test_schema_attribute_validators.py +++ b/tests/schema/test_schema_attribute_validators.py @@ -133,7 +133,7 @@ def test_allowed_characters_check(self): self.assertTrue(schema_attribute_validators.allowed_characters_check(self.hed_schema, tag_entry, attribute_name)) def test_in_library_check(self): - score = load_schema_version("score_") + score = load_schema_version("score_1.1.0") tag_entry = score.tags["Modulator"] attribute_name = "inLibrary" self.assertFalse(schema_attribute_validators.in_library_check(score, tag_entry, attribute_name)) diff --git a/tests/schema/test_schema_compliance.py b/tests/schema/test_schema_compliance.py index 9a73248cb..2ea0a04f9 100644 --- a/tests/schema/test_schema_compliance.py +++ b/tests/schema/test_schema_compliance.py @@ -10,7 +10,7 @@ def setUpClass(cls): cls.hed_schema = schema.load_schema_version("8.1.0") def test_validate_schema(self): - schema_path_with_issues = '../data/schema_tests/HED8.0.0.mediawiki' + schema_path_with_issues = '../data/schema_tests/HED8.0.0t.xml' schema_path_with_issues = os.path.join(os.path.dirname(os.path.realpath(__file__)), schema_path_with_issues) hed_schema = schema.load_schema(schema_path_with_issues) issues = hed_schema.check_compliance() diff --git a/tests/schema/test_schema_converters.py b/tests/schema/test_schema_converters.py index 30cacaba6..9073e50e2 100644 --- a/tests/schema/test_schema_converters.py +++ b/tests/schema/test_schema_converters.py @@ -3,11 +3,35 @@ import os from hed import schema +import tempfile +import functools + + +def get_temp_filename(extension): + with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as temp_file: + filename = temp_file.name + return filename + +# Function wrapper to create and clean up a single schema for testing +def with_temp_file(extension): + def decorator(test_func): + @functools.wraps(test_func) + def wrapper(*args, **kwargs): + # Create a temporary file with the given extension + filename = get_temp_filename(extension) + try: + # Call the test function with the filename + return test_func(*args, filename=filename, **kwargs) + finally: + # Clean up: Remove the temporary file + os.remove(filename) + return wrapper + return decorator class TestConverterBase(unittest.TestCase): - xml_file = '../data/schema_tests/HED8.0.0t.xml' - wiki_file = '../data/schema_tests/HED8.0.0.mediawiki' + xml_file = '../data/schema_tests/HED8.2.0.xml' + wiki_file = '../data/schema_tests/HED8.2.0.mediawiki' can_compare = True @classmethod @@ -17,21 +41,21 @@ def setUpClass(cls): cls.wiki_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.wiki_file) cls.hed_schema_wiki = schema.load_schema(cls.wiki_file) - def test_schema2xml(self): - saved_filename = self.hed_schema_xml.save_as_xml() - try: - loaded_schema = schema.load_schema(saved_filename) - finally: - os.remove(saved_filename) + # !BFK! - Delete default units as they aren't in the XML file. + if "HED8.2.0" in cls.wiki_file: + del cls.hed_schema_wiki.unit_classes["temperatureUnits"].attributes["defaultUnits"] + + @with_temp_file(".xml") + def test_schema2xml(self, filename): + self.hed_schema_xml.save_as_xml(filename) + loaded_schema = schema.load_schema(filename) self.assertEqual(loaded_schema, self.hed_schema_xml) - def test_schema2wiki(self): - saved_filename = self.hed_schema_xml.save_as_mediawiki() - try: - loaded_schema = schema.load_schema(saved_filename) - finally: - os.remove(saved_filename) + @with_temp_file(".mediawiki") + def test_schema2wiki(self, filename): + self.hed_schema_xml.save_as_mediawiki(filename) + loaded_schema = schema.load_schema(filename) self.assertEqual(loaded_schema, self.hed_schema_xml) @@ -48,25 +72,25 @@ def test_schema_as_string_wiki(self): hed_schema_as_string = "".join([line for line in file]) string_schema = schema.from_string(hed_schema_as_string, schema_format=".mediawiki") + #!BFK! - Same as before, 8.2.0 has a difference + if "HED8.2.0" in self.wiki_file: + del string_schema.unit_classes["temperatureUnits"].attributes["defaultUnits"] + self.assertEqual(string_schema, self.hed_schema_wiki) - def test_wikischema2xml(self): - saved_filename = self.hed_schema_wiki.save_as_xml() - try: - loaded_schema = schema.load_schema(saved_filename) - finally: - os.remove(saved_filename) + @with_temp_file(".xml") + def test_wikischema2xml(self, filename): + self.hed_schema_wiki.save_as_xml(filename) + loaded_schema = schema.load_schema(filename) wiki_schema_copy = copy.deepcopy(self.hed_schema_wiki) self.assertEqual(loaded_schema, wiki_schema_copy) - def test_wikischema2wiki(self): - saved_filename = self.hed_schema_wiki.save_as_mediawiki() - try: - loaded_schema = schema.load_schema(saved_filename) - finally: - os.remove(saved_filename) + @with_temp_file(".mediawiki") + def test_wikischema2wiki(self, filename): + self.hed_schema_wiki.save_as_mediawiki(filename) + loaded_schema = schema.load_schema(filename) self.assertEqual(loaded_schema, self.hed_schema_wiki) @@ -76,10 +100,11 @@ def test_compare_readers(self): class TestComplianceBase(unittest.TestCase): - xml_file = '../data/schema_tests/HED8.0.0t.xml' - wiki_file = '../data/schema_tests/HED8.0.0.mediawiki' + xml_file_old = '../data/schema_tests/HED8.0.0t.xml' + xml_file = '../data/schema_tests/HED8.2.0.xml' + wiki_file = '../data/schema_tests/HED8.2.0.mediawiki' can_compare = True - expected_issues = 7 + expected_issues = 0 @classmethod def setUpClass(cls): @@ -87,12 +112,19 @@ def setUpClass(cls): cls.hed_schema_xml = schema.load_schema(cls.xml_file) cls.wiki_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.wiki_file) cls.hed_schema_wiki = schema.load_schema(cls.wiki_file) + if "HED8.2.0" in cls.wiki_file: + del cls.hed_schema_wiki.unit_classes["temperatureUnits"].attributes["defaultUnits"] + cls.xml_file_old = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.xml_file_old) + cls.hed_schema_xml_old = schema.load_schema(cls.xml_file_old) def test_compliance(self): issues = self.hed_schema_wiki.check_compliance() self.assertEqual(len(issues), self.expected_issues) + issues_old = self.hed_schema_xml_old.check_compliance() + self.assertGreater(len(issues_old), 0) def test_compare_readers(self): + self.assertNotEqual(self.hed_schema_xml, self.hed_schema_xml_old) if self.can_compare: self.assertEqual(self.hed_schema_wiki, self.hed_schema_xml) @@ -159,12 +191,6 @@ class TestDuplicateUnitClass(TestComplianceBase): expected_issues = 1 -# class TestSchemaComplianceOld(TestComplianceBase): -# xml_file = '../data/legacy_xml/HED7.1.1.xml' -# wiki_file = '../data/legacy_xml/HED7.2.0.mediawiki' -# can_compare = False -# expected_issues = 1 - class TestConverterSavingPrefix(unittest.TestCase): xml_file = '../data/schema_tests/HED8.0.0t.xml' @@ -175,11 +201,9 @@ def setUpClass(cls): cls.hed_schema_xml = schema.load_schema(cls.xml_file) cls.hed_schema_xml_prefix = schema.load_schema(cls.xml_file, schema_namespace="tl:") - def test_saving_prefix(self): - saved_filename = self.hed_schema_xml_prefix.save_as_xml() - try: - loaded_schema = schema.load_schema(saved_filename) - finally: - os.remove(saved_filename) + @with_temp_file(".xml") + def test_saving_prefix(self, filename): + self.hed_schema_xml_prefix.save_as_xml(filename) + loaded_schema = schema.load_schema(filename) self.assertEqual(loaded_schema, self.hed_schema_xml) diff --git a/tests/schema/test_schema_wiki_fatal_errors.py b/tests/schema/test_schema_wiki_fatal_errors.py index 0759dba46..43348432e 100644 --- a/tests/schema/test_schema_wiki_fatal_errors.py +++ b/tests/schema/test_schema_wiki_fatal_errors.py @@ -21,6 +21,7 @@ def setUpClass(cls): "empty_file.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID, "HED_header_invalid_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID, "HED_header_missing_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID, + "HED_header_unknown_attribute.mediawiki": HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE, "HED_header_bad_library.mediawiki": HedExceptions.BAD_HED_LIBRARY_NAME, "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, "empty_node.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, diff --git a/tests/tools/analysis/test_analysis_util_assemble_hed.py b/tests/tools/analysis/test_analysis_util_assemble_hed.py index 018a7ead7..a7d2810c7 100644 --- a/tests/tools/analysis/test_analysis_util_assemble_hed.py +++ b/tests/tools/analysis/test_analysis_util_assemble_hed.py @@ -15,7 +15,7 @@ def setUpClass(cls): bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/bids_tests/eeg_ds003645s_hed')) schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/schema_tests/HED8.0.0.xml')) + '../../data/schema_tests/HED8.2.0.xml')) cls.bids_root_path = bids_root_path json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) events_path = os.path.realpath(os.path.join(bids_root_path, diff --git a/tests/tools/analysis/test_annotation_util.py b/tests/tools/analysis/test_annotation_util.py index c0e9b2b6f..abcfcdbba 100644 --- a/tests/tools/analysis/test_annotation_util.py +++ b/tests/tools/analysis/test_annotation_util.py @@ -23,7 +23,7 @@ def setUpClass(cls): bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/bids_tests/eeg_ds003645s_hed')) schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/schema_tests/HED8.0.0.xml')) + '../../data/schema_tests/HED8.2.0.xml')) cls.bids_root_path = bids_root_path json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) cls.json_path = json_path diff --git a/tests/tools/analysis/test_hed_tag_counts.py b/tests/tools/analysis/test_hed_tag_counts.py index 8f492466b..52f91feef 100644 --- a/tests/tools/analysis/test_hed_tag_counts.py +++ b/tests/tools/analysis/test_hed_tag_counts.py @@ -15,7 +15,7 @@ def setUpClass(cls): bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/bids_tests/eeg_ds003645s_hed')) schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../../data/schema_tests/HED8.0.0.xml')) + '../../data/schema_tests/HED8.2.0.xml')) cls.bids_root_path = bids_root_path json_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) events_path = os.path.realpath(os.path.join(bids_root_path, diff --git a/tests/tools/analysis/test_key_map.py b/tests/tools/analysis/test_key_map.py index d06300667..4ae2860d6 100644 --- a/tests/tools/analysis/test_key_map.py +++ b/tests/tools/analysis/test_key_map.py @@ -59,8 +59,9 @@ def test_make_template(self): df1 = t_map.make_template(show_counts=False) self.assertIsInstance(df1, pd.DataFrame, "make_template should return a DataFrame") self.assertEqual(len(df1.columns), 1, "make_template should return 1 column single key, no additional columns") - df2 = t_map.make_template() + df2 = t_map.make_template(show_counts=True) self.assertEqual(len(df2.columns), 2, "make_template returns an extra column for counts") + t_map2 = KeyMap(['event_type', 'type']) t_map2.update(self.stern_test1_path) df3 = t_map2.make_template() diff --git a/tests/tools/analysis/test_sequence_map.py b/tests/tools/analysis/test_sequence_map.py new file mode 100644 index 000000000..07112c777 --- /dev/null +++ b/tests/tools/analysis/test_sequence_map.py @@ -0,0 +1,50 @@ +import unittest +import os +import pandas as pd +from hed.errors.exceptions import HedFileError +from hed.tools.analysis.sequence_map import SequenceMap +from hed.tools.util.data_util import get_new_dataframe +from hed.tools.util.io_util import get_file_list + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + # curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/remodel_tests') + base_path = '' + cls.events_path = os.path.realpath(base_path + '/sub-01/ses-01/eeg/sub-01_ses-01_task-DriveRandomSound_run-1_events.tsv') + + + def test_constructor(self): + codes1 = ['1111', '1112', '1121', '1122', '1131', '1132', '1141', + '1142', '1311', '1312', '1321', '1322', + '4210', '4220', '4230', '4311', '4312'] + + smap1 = SequenceMap(codes=codes1) + self.assertIsInstance(smap1, SequenceMap) + # df = get_new_dataframe(self.events_path) + # data = df['value'] + # smap1.update(data) + # #print(f"{smap1.__str__}") + # print("to here") + + def test_update(self): + codes1 = ['1111', '1121', '1131', '1141', '1311', '1321', + '4210', '4220', '4230', '4311'] + codes1 = ['1111', '1121', '1131', '1141', '1311', '4311'] + #codes1 = ['1111', '1121', '1131', '1141', '1311'] + smap1 = SequenceMap(codes=codes1) + self.assertIsInstance(smap1, SequenceMap) + # df = get_new_dataframe(self.events_path) + # data = df['value'] + # smap1.update(data) + # print(f"{smap1.dot_str()}") + # group_spec = {"stimulus": {"color": "#FFAAAA", "nodes": ["1111", "1121", "1131", "1141", "1311"]}} + # print(f"{smap1.dot_str(group_spec=group_spec)}") + # + def test_str(self): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/tools/bids/test_bids_dataset.py b/tests/tools/bids/test_bids_dataset.py index e4af39331..ae0ceff6f 100644 --- a/tests/tools/bids/test_bids_dataset.py +++ b/tests/tools/bids/test_bids_dataset.py @@ -20,6 +20,9 @@ def test_constructor(self): bids = BidsDataset(Test.root_path) self.assertIsInstance(bids, BidsDataset, "BidsDataset should create a valid object from valid dataset") parts = bids.get_tabular_group("participants") + self.assertFalse(parts) + bids = BidsDataset(Test.root_path, tabular_types=['participants', 'events']) + parts = bids.get_tabular_group("participants") self.assertIsInstance(parts, BidsFileGroup, "BidsDataset participants should be a BidsFileGroup") self.assertEqual(len(parts.sidecar_dict), 1, "BidsDataset should have one participants.json file") self.assertEqual(len(parts.datafile_dict), 1, "BidsDataset should have one participants.tsv file") @@ -30,7 +33,7 @@ def test_constructor(self): self.assertIsInstance(bids.schema, HedSchema, "BidsDataset schema should be HedSchema") def test_constructor_libraries(self): - bids = BidsDataset(self.library_path) + bids = BidsDataset(self.library_path, tabular_types=['participants', 'events']) self.assertIsInstance(bids, BidsDataset, "BidsDataset with libraries should create a valid object from valid dataset") parts = bids.get_tabular_group("participants") @@ -48,9 +51,11 @@ def test_constructor_tabular(self): self.assertIsInstance(bids, BidsDataset, "BidsDataset with libraries should create a valid object from valid dataset") parts = bids.get_tabular_group("participants") - self.assertIsInstance(parts, BidsFileGroup, "BidsDataset participants should be a BidsFileGroup") - self.assertEqual(len(parts.sidecar_dict), 1, "BidsDataset should have one participants.json file") - self.assertEqual(len(parts.datafile_dict), 1, "BidsDataset should have one participants.tsv file") + self.assertFalse(parts) + chans = bids.get_tabular_group("channels") + self.assertIsInstance(chans, BidsFileGroup, "BidsDataset participants should be a BidsFileGroup") + self.assertFalse(chans.sidecar_dict) + self.assertEqual(len(chans.datafile_dict), 6, "BidsDataset should have one participants.tsv file") self.assertIsInstance(bids.dataset_description, dict, "BidsDataset dataset_description should be a dict") for group in bids.tabular_files.values(): self.assertIsInstance(group, BidsFileGroup, "BidsDataset event files should be in a BidsFileGroup") @@ -82,17 +87,17 @@ def test_validator_types(self): self.assertFalse(issues, "BidsDataset with participants and events validates") def test_with_schema_group(self): - base_version = '8.0.0' + base_version = '8.2.0' library1_url = "https://raw.githubusercontent.com/hed-standard/hed-schemas/main/" + \ "library_schemas/score/hedxml/HED_score_1.0.0.xml" library2_url = "https://raw.githubusercontent.com/hed-standard/hed-schemas/main/" + \ "library_schemas/testlib/hedxml/HED_testlib_1.0.2.xml" - schema_list = [load_schema_version(xml_version=base_version)] - schema_list.append(load_schema(library1_url, schema_namespace="sc")) - schema_list.append(load_schema(library2_url, schema_namespace="test")) + schema_list = [load_schema_version(xml_version=base_version), + load_schema(library1_url, schema_namespace="sc"), + load_schema(library2_url, schema_namespace="test")] x = HedSchemaGroup(schema_list) - bids = BidsDataset(self.library_path, schema=x) - self.assertIsInstance(bids, BidsDataset, + bids = BidsDataset(self.library_path, schema=x, tabular_types=["participants"] ) + self.assertIsInstance(bids, BidsDataset, "BidsDataset with libraries should create a valid object from valid dataset") parts = bids.get_tabular_group("participants") self.assertIsInstance(parts, BidsFileGroup, "BidsDataset participants should be a BidsFileGroup") @@ -105,7 +110,7 @@ def test_with_schema_group(self): self.assertIsInstance(bids.schema, HedSchemaGroup, "BidsDataset with libraries should have schema that is a HedSchemaGroup") issues = bids.validate(check_for_warnings=True) - self.assertTrue(issues, "BidsDataset validate should return issues when check_for_warnings is True") + self.assertFalse(issues) def test_get_summary(self): bids1 = BidsDataset(self.root_path) diff --git a/tests/tools/remodeling/operations/test_base_op.py b/tests/tools/remodeling/operations/test_base_op.py index 096a0ba12..e581cbdb7 100644 --- a/tests/tools/remodeling/operations/test_base_op.py +++ b/tests/tools/remodeling/operations/test_base_op.py @@ -4,44 +4,34 @@ class TestOp(BaseOp): + NAME = "test" PARAMS = { - "operation": "test_op", - "required_parameters": { - "op_name": str, - "skip_columns": list, - "keep_all": bool, + "type": "object", + "properties": { + "column_name": { + "type": "string" + } }, - "optional_parameters": { - "keep_columns": list - } + "required": [ + "column_name" + ], + "additionalProperties": False } - def __init__(self, parameters): - super().__init__(self.PARAMS, parameters) - def do_op(self, dispatcher, df, name, sidecar=None): return df + + @staticmethod + def validate_input_data(parameters): + return [] class Test(unittest.TestCase): @classmethod def setUpClass(cls): - cls.params = { - "operation": "test_op", - "required_parameters": { - "is_string": str, - "is_multiple": [str, int, float, list], - "is_bool": bool, - "is_list": list - }, - "optional_parameters": {} - } base_parameters = { - "is_string": "Condition-variable", - "is_multiple": ["a", "b", "c"], - "is_bool": False, - "is_list": [3, 4, 5] + "column_name": "a_descriptive_name" } cls.json_parameters = json.dumps(base_parameters) @@ -51,44 +41,30 @@ def tearDownClass(cls): def test_constructor(self): parameters = json.loads(self.json_parameters) - op1 = BaseOp(self.params, parameters) - self.assertIsInstance(op1, BaseOp, "constructor should create a BaseOp") - self.assertIn("is_string", op1.required_params, - "constructor required_params should contain an expected value") - - def test_constructor_no_operation(self): - with self.assertRaises(ValueError) as context: - BaseOp({}, {}) - self.assertEqual(context.exception.args[0], 'OpMustHaveOperation') - - def test_constructor_no_parameters(self): - op1 = BaseOp({"operation": "no_parameter_operation"}, {}) - self.assertIsInstance(op1, BaseOp, "constructor allows a operation with no parameters") - - def test_check_parameters_bad_element(self): - parameters = json.loads(self.json_parameters) - parameters["is_multiple"] = {"a": 1, "b": 2} - with self.assertRaises(TypeError) as context: - BaseOp(self.params, parameters) - self.assertEqual(context.exception.args[0], 'BadType') - - def test_parse_operations_missing_required(self): - parameters = json.loads(self.json_parameters) - parameters.pop("is_string") - with self.assertRaises(KeyError) as context: - BaseOp(TestOp.PARAMS, parameters) - self.assertEqual(context.exception.args[0], 'MissingRequiredParameters') - - def test_check_parameters_test(self): - parameters1 = {"op_name": "test", "skip_columns": ["onset", "duration"], "keep_all": True, "junk": "bad_parm"} - with self.assertRaises(KeyError) as context1: - BaseOp(TestOp.PARAMS, parameters1) - self.assertEqual(context1.exception.args[0], 'BadParameter') - parameters2 = {"op_name": "test", "skip_columns": ["onset", "duration"], "keep_all": "true"} - with self.assertRaises(TypeError) as context2: - TestOp(parameters2) - self.assertEqual(context2.exception.args[0], 'BadType') - + test_instantiate = TestOp(parameters) + self.assertDictEqual(test_instantiate.parameters, parameters) + + + def test_constructor_no_name(self): + class TestOpNoName(BaseOp): + PARAMS = { + "type": "object", + "properties": { + "column_name": { + "type": "string" + } + }, + "required": [ + "column_name" + ], + "additionalProperties": False + } + + def do_op(self, dispatcher, df, name, sidecar=None): + return df + + with self.assertRaises(TypeError): + instantiate = TestOpNoName({}) if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_base_summary.py b/tests/tools/remodeling/operations/test_base_summary.py index 5faed6467..e45d620a8 100644 --- a/tests/tools/remodeling/operations/test_base_summary.py +++ b/tests/tools/remodeling/operations/test_base_summary.py @@ -6,6 +6,7 @@ class TestOp(BaseOp): + NAME = "test_op" PARAMS = { "operation": "test_summary_op", "required_parameters": { @@ -20,11 +21,18 @@ class TestOp(BaseOp): SUMMARY_TYPE = "test_sum" def __init__(self, parameters): - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.append_timecode = parameters.get('append_timecode', False) + def do_op(self, dispatcher, df, name, sidecar=None): + return df.copy() + + @staticmethod + def validate_input_data(parameters): + return [] + class TestSummary(BaseSummary): diff --git a/tests/tools/remodeling/operations/test_convert_columns_op.py b/tests/tools/remodeling/operations/test_convert_columns_op.py index 48d177b0f..d988f616b 100644 --- a/tests/tools/remodeling/operations/test_convert_columns_op.py +++ b/tests/tools/remodeling/operations/test_convert_columns_op.py @@ -36,12 +36,5 @@ def setUp(self): def tearDownClass(cls): pass - def test_constructor_bad_convert_to(self): - self.base_parameters["convert_to"] = "blech" - with self.assertRaises(ValueError) as context: - ConvertColumnsOp(self.base_parameters) - self.assertEqual(context.exception.args[0], "CannotConvertToSpecifiedType") - - if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_factor_column_op.py b/tests/tools/remodeling/operations/test_factor_column_op.py index 402353d5e..454f91d81 100644 --- a/tests/tools/remodeling/operations/test_factor_column_op.py +++ b/tests/tools/remodeling/operations/test_factor_column_op.py @@ -40,13 +40,6 @@ def setUp(self): def tearDownClass(cls): pass - def test_bad_constructor(self): - self.base_parameters["factor_names"] = ["stopped"] - with self.assertRaises(ValueError) as context: - FactorColumnOp(self.base_parameters) - self.assertEqual(context.exception.args[0], "FactorNamesLenBad", - "factor_names and factor_values must be same length") - def test_no_names(self): self.base_parameters["factor_names"] = [] self.base_parameters["factor_values"] = [] diff --git a/tests/tools/remodeling/operations/test_merge_consecutive_op.py b/tests/tools/remodeling/operations/test_merge_consecutive_op.py index 9b504b221..5dcbf720f 100644 --- a/tests/tools/remodeling/operations/test_merge_consecutive_op.py +++ b/tests/tools/remodeling/operations/test_merge_consecutive_op.py @@ -50,18 +50,6 @@ def get_dfs(self, op): df_new = op.do_op(self.dispatch, self.dispatch.prep_data(df), 'run-01') return df, self.dispatch.post_proc_data(df_new) - def test_valid(self): - parms = json.loads(self.json_parms) - op = MergeConsecutiveOp(parms) - self.assertIsInstance(op, MergeConsecutiveOp) - - def test_invalid(self): - parms = json.loads(self.json_parms) - parms["column_name"] = "sex" - with self.assertRaises(ValueError) as context: - MergeConsecutiveOp(parms) - self.assertEqual(context.exception.args[0], "MergeColumnCannotBeMatchColumn") - def test_do_op_valid(self): # Test when no extras but ignored. parms = json.loads(self.json_parms) diff --git a/tests/tools/remodeling/operations/test_number_groups.py b/tests/tools/remodeling/operations/test_number_groups.py index 1bae16d80..fc3f056f7 100644 --- a/tests/tools/remodeling/operations/test_number_groups.py +++ b/tests/tools/remodeling/operations/test_number_groups.py @@ -203,73 +203,3 @@ def test_number_groups_new_column(self): # "split_rows should not change the input df columns") # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), # "split_rows should not change the input df values") - - # test expected breaks parameters - def test_missing_startstop_param(self): - # test when missing parameter - parms = json.loads(self.json_missing_startstop_parms) - - # with self.assertRaisesRegex(KeyError, "MissingRequiredParameters"): - # op = NumberGroupsOp(parms) - - def test_wrong_startstop_param(self): - # test when a start stop parameter is missing - parms = json.loads(self.json_wrong_startstop_parms) - - # with self.assertRaisesRegex(KeyError, "BadParameter"): - # op = NumberGroupsOp(parms) - - def test_wrong_startstop_type_param(self): - # Test when wrong type in start stop parameters - parms = json.loads(self.json_wrong_startstop_type_parms) - # TODO fix code and put back in - # with self.assertRaisesRegex(TypeError, "BadType"): - # op = NumberGroupsOp(parms) - - def test_wrong_value_inclusion(self): - # test when a wrong value is given for inclusion (only accept include and exclude string) - parms = json.loads(self.json_wrong_inclusion_parms) - - # with self.assertRaisesRegex(ValueError, "BadValue"): - # op = NumberGroupsOp(parms) - - # test expected breaks event file - parameters - def test_existing_column_overwrite_unspecified(self): - # Test when existing column name is given with overwrite unspecified (=False) - parms = json.loads(self.json_parms) - op = NumberGroupsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # - # with self.assertRaisesRegex(ValueError, "ExistingNumberColumn"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_existing_column_overwrite_false(self): - # Test when existing column name is given with overwrite specified False - parms = json.loads(self.json_overwrite_false_parms) - op = NumberGroupsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # - # with self.assertRaisesRegex(ValueError, "ExistingNumberColumn"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_missing_source_column(self): - # Test when source column does not exist in event file - parms = json.loads(self.json_parms) - op = NumberGroupsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # - # with self.assertRaisesRegex(ValueError, "ExistingNumberColumn"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_missing_startstop_value(self): - # Test when one of startstop values does not exist in source column - parms = json.loads(self.json_missing_startstop_value_parms) - op = NumberGroupsOp(parms) - # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # - # with self.assertRaisesRegex(ValueError, "MissingValue"): - # op.do_op(self.dispatcher, df_test, self.file_name) - diff --git a/tests/tools/remodeling/operations/test_number_rows_op.py b/tests/tools/remodeling/operations/test_number_rows_op.py index 9c60a63aa..78fdc6bcb 100644 --- a/tests/tools/remodeling/operations/test_number_rows_op.py +++ b/tests/tools/remodeling/operations/test_number_rows_op.py @@ -166,65 +166,8 @@ def setUpClass(cls): "number_column_name": "number" } - overwrite_false_parameters = { - "number_column_name": "number", - "overwrite": False - } - - overwrite_true_parameters = { - "number_column_name": "number", - "overwrite": True - } - - filter_complete_parameters = { - "number_column_name": "number", - "match_value": {"column": "code", "value": "40"} - } - - filter_incomplete_parameters = { - "number_column_name": "number", - "match_value": {"column": "code"} - } - - filter_invalid_parameters = { - "number_column_name": "number", - "match_value": {"column": "code", "value": "40", "label": "code"} - } - - filter_wrong_type_parameters = { - "number_column_name": "number", - "match_value": {"column": 246, "value": []} - } - - filter_missing_column_parameters = { - "number_column_name": "number", - "match_value": {"column": "trial_type", "value": "40"} - } - - filter_missing_value_parameters = { - "number_column_name": "number", - "match_value": {"column": "code", "value": "stop_trial"} - } - - filter_overwrite_parameters = { - "number_column_name": "number", - "match_value": {"column": "number", "value": "40"}, - "overwrite": True - } - cls.json_parms = json.dumps(base_parameters) - cls.json_overwrite_false_parms = json.dumps(overwrite_false_parameters) - cls.json_overwrite_true_parms = json.dumps(overwrite_true_parameters) - - cls.json_filter_complete_parameters = json.dumps(filter_complete_parameters) - cls.json_filter_incomplete_parameters = json.dumps(filter_incomplete_parameters) - cls.json_filter_invalid_parameters = json.dumps(filter_invalid_parameters) - cls.json_filter_wrong_type_parameters = json.dumps(filter_wrong_type_parameters) - cls.json_filter_missing_column_parameters = json.dumps(filter_missing_column_parameters) - cls.json_filter_missing_value_parameters = json.dumps(filter_missing_value_parameters) - cls.json_filter_overwrite_parameters = json.dumps(filter_overwrite_parameters) - cls.dispatcher = None cls.file_name = None @@ -257,136 +200,5 @@ def test_number_rows_new_column(self): # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), # "number_rows should not change the input df values") - def test_existing_column_overwrite_false(self): - # Test when existing column name is given with overwrite specified False - parms = json.loads(self.json_overwrite_false_parms) - op = NumberRowsOp(parms) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # with self.assertRaisesRegex(ValueError, "ExistingNumberColumn") as context: - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_existing_column_overwrite_unspecified(self): - # Test when existing column name is given with overwrite unspecified (=False) - parms = json.loads(self.json_parms) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - - # with self.assertRaisesRegex(ValueError, "ExistingNumberColumn"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_existing_column_overwrite_true(self): - # Test when existing column name is given with overwrite True - parms = json.loads(self.json_overwrite_true_parms) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_check = pd.DataFrame(self.overwritten_data, columns=self.existing_sample_columns) - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - # df_new = df_new.fillna('n/a') - - # self.assertTrue(list(df_new.columns) == list(self.existing_sample_columns), - # "numbered_events should have the same columns as original dataframe in case of overwrite") - # self.assertTrue(len(df_new) == len(df_test), - # "numbered_events should have same length as original dataframe") - # self.assertTrue(all([i + 1 == value for (i, value) in enumerate(df_new[parms['number_column_name']])]), - # "event should be numbered consecutively from 1 to length of the dataframe") - # self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - # "numbered_events should not differ from check") - - # Test that df has not been changed by the op - # self.assertTrue(list(df.columns) == list(df_test.columns), - # "split_rows should not change the input df columns") - # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), - # "split_rows should not change the input df values") - - def test_filter_complete_parameters(self): - # Test when valid complete filter/match_value parameters are given - parms = json.loads(self.json_filter_complete_parameters) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_check = pd.DataFrame(self.filter_numbered_data, columns=self.numbered_columns) - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - # df_new = df_new.fillna('n/a') - - # self.assertTrue(list(df_new.columns) == list(self.numbered_columns), - # "numbered_events should have expected columns") - # self.assertTrue(len(df_new) == len(df_test), - # "numbered_events should have same length as original dataframe") - # self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - # "numbered_events should not differ from check") - - # Test that df has not been changed by the op - # self.assertTrue(list(df.columns) == list(df_test.columns), - # "split_rows should not change the input df columns") - # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), - # "split_rows should not change the input df values") - - def test_filter_incomplete_parameters(self): - # Test when filter/match_value parameters are not complete - parms = json.loads(self.json_filter_incomplete_parameters) - - # with self.assertRaisesRegex(KeyError, "MissingRequiredParameters"): - # op = NumberRowsOp(parms) - - def test_filter_invalid_parameters(self): - # Test when invalid filter/match_value parameters are given - parms = json.loads(self.json_filter_invalid_parameters) - - # with self.assertRaisesRegex(KeyError, "BadParameter"): - # op = NumberRowsOp(parms) - - def test_filter_wrong_type_parameters(self): - # Test when invalid filter/match_value parameters are given - parms = json.loads(self.json_filter_wrong_type_parameters) - # TODO: need to fix type - # with self.assertRaisesRegex(TypeError, "BadType"): - # op = NumberRowsOp(parms) - - def test_filter_missing_column_parameters(self): - # Test when specified filter column is not in event file - parms = json.loads(self.json_filter_missing_column_parameters) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - - # with self.assertRaisesRegex(ValueError, "MissingMatchColumn"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_filter_missing_value_parameters(self): - # Test when specified filter value is not in event file - parms = json.loads(self.json_filter_missing_value_parameters) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - - # with self.assertRaisesRegex(ValueError, "MissingMatchValue"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_filter_overwrite(self): - # Test when specified filter value is not in event file - parms = json.loads(self.json_filter_overwrite_parameters) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_check = pd.DataFrame(self.filter_overwritten_numbered_data, columns=self.existing_sample_columns) - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - # df_new = df_new.fillna('n/a') - - # self.assertTrue(list(df_new.columns) == list(self.existing_sample_columns), - # "numbered_events should have expected columns") - # self.assertTrue(len(df_new) == len(df_test), - # "numbered_events should have same length as original dataframe") - # self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - # "numbered_events should not differ from check") - - # Test that df has not been changed by the op - # self.assertTrue(list(df.columns) == list(df_test.columns), - # "split_rows should not change the input df columns") - # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), - # "split_rows should not change the input df values") - - if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_remap_columns_op.py b/tests/tools/remodeling/operations/test_remap_columns_op.py index be4637d73..cd05c7ae3 100644 --- a/tests/tools/remodeling/operations/test_remap_columns_op.py +++ b/tests/tools/remodeling/operations/test_remap_columns_op.py @@ -64,47 +64,6 @@ def get_dfs(self, op, df=None): df_new = op.do_op(self.dispatch, self.dispatch.prep_data(df), 'run-01') return df, self.dispatch.post_proc_data(df_new) - def test_valid(self): - # Test when no extras but ignored. - parms = json.loads(self.json_parms) - op = RemapColumnsOp(parms) - df, df_test = self.get_dfs(op) - self.assertNotIn("response_type", df.columns, "remap_columns before does not have response_type column") - self.assertIn("response_type", df_test.columns, "remap_columns after has response_type column") - - def test_invalid_params(self): - parms1 = json.loads(self.json_parms) - parms1["source_columns"] = [] - with self.assertRaises(ValueError) as context1: - RemapColumnsOp(parms1) - self.assertEqual(context1.exception.args[0], "EmptySourceColumns") - - parms2 = json.loads(self.json_parms) - parms2["destination_columns"] = [] - with self.assertRaises(ValueError) as context2: - RemapColumnsOp(parms2) - self.assertEqual(context2.exception.args[0], "EmptyDestinationColumns") - - parms3 = json.loads(self.json_parms) - parms3["map_list"][1] = ["right", "correct_right"], - with self.assertRaises(ValueError) as context3: - RemapColumnsOp(parms3) - self.assertEqual(context3.exception.args[0], "BadColumnMapEntry") - - parms4 = json.loads(self.json_parms1) - parms4["integer_sources"] = ["test", "baloney"] - with self.assertRaises(ValueError) as context4: - RemapColumnsOp(parms4) - self.assertEqual(context4.exception.args[0], "IntegerSourceColumnsInvalid") - - def test_integer_sources(self): - parms1 = json.loads(self.json_parms1) - op1 = RemapColumnsOp(parms1) - self.assertIn('test', op1.integer_sources) - parms2 = json.loads(self.json_parms2) - op2 = RemapColumnsOp(parms2) - self.assertIn('test', op2.integer_sources) - def test_valid_missing(self): # Test when no extras but ignored. parms = json.loads(self.json_parms) diff --git a/tests/tools/remodeling/operations/test_split_rows_op.py b/tests/tools/remodeling/operations/test_split_rows_op.py index 9710e1b61..df9e4ec3a 100644 --- a/tests/tools/remodeling/operations/test_split_rows_op.py +++ b/tests/tools/remodeling/operations/test_split_rows_op.py @@ -155,8 +155,7 @@ def test_split_rows_from_files(self): df = pd.read_csv(self.events_path, delimiter='\t', header=0, dtype=str, keep_default_na=False, na_values=None) with open(self.model1_path) as fp: operation_list = json.load(fp) - operations, errors = Dispatcher.parse_operations(operation_list) - self.assertFalse(errors, 'split_rows should not give errors if operation is correct') + operations = Dispatcher.parse_operations(operation_list) dispatch = Dispatcher(operation_list) df = dispatch.prep_data(df) df_new = operations[0].do_op(dispatch, df, "Name") diff --git a/tests/tools/remodeling/operations/test_summarize_column_names_op.py b/tests/tools/remodeling/operations/test_summarize_column_names_op.py index 2aadd8e72..c0afbf1dc 100644 --- a/tests/tools/remodeling/operations/test_summarize_column_names_op.py +++ b/tests/tools/remodeling/operations/test_summarize_column_names_op.py @@ -56,7 +56,7 @@ def test_constructor(self): def test_summary_op(self): with open(self.model_path, 'r') as fp: parms = json.load(fp) - parsed_commands, errors = Dispatcher.parse_operations(parms) + parsed_commands = Dispatcher.parse_operations(parms) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions='8.1.0') df = dispatch.get_data_file(self.events_path) df = dispatch.prep_data(df) diff --git a/tests/tools/remodeling/operations/test_summarize_column_values_op.py b/tests/tools/remodeling/operations/test_summarize_column_values_op.py index 5fe53c4ab..9e838d5d5 100644 --- a/tests/tools/remodeling/operations/test_summarize_column_values_op.py +++ b/tests/tools/remodeling/operations/test_summarize_column_values_op.py @@ -93,7 +93,7 @@ def test_summary_op(self): '../../../data/remodel_tests/aomic_sub-0013_summary_all_rmdl.json')) with open(column_summary_path, 'r') as fp: parms = json.load(fp) - parsed_commands, errors = Dispatcher.parse_operations(parms) + parsed_commands = Dispatcher.parse_operations(parms) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) df = dispatch.get_data_file(events) old_len = len(df) diff --git a/tests/tools/remodeling/operations/test_summarize_definitions_op.py b/tests/tools/remodeling/operations/test_summarize_definitions_op.py index 3e6843b25..76a469d2e 100644 --- a/tests/tools/remodeling/operations/test_summarize_definitions_op.py +++ b/tests/tools/remodeling/operations/test_summarize_definitions_op.py @@ -24,18 +24,6 @@ def setUpClass(cls): def tearDownClass(cls): pass - def test_constructor(self): - parms = json.loads(self.json_parms) - parms["expand_context"] = "" - with self.assertRaises(KeyError) as context: - SummarizeDefinitionsOp(parms) - self.assertEqual(context.exception.args[0], "BadParameter") - parms2 = json.loads(self.json_parms) - parms2["mystery"] = True - with self.assertRaises(KeyError) as context: - SummarizeDefinitionsOp(parms2) - self.assertEqual(context.exception.args[0], "BadParameter") - def test_do_op(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) parms = json.loads(self.json_parms) diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py index 3e1c1d128..196a95758 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py @@ -44,23 +44,6 @@ def setUpClass(cls): def tearDownClass(cls): pass - def test_constructor(self): - parms = json.loads(self.json_parms) - sum_op1 = SummarizeHedTagsOp(parms) - self.assertIsInstance(sum_op1, SummarizeHedTagsOp, "constructor creates an object of the correct type") - - def test_constructor_bad_params(self): - parms = json.loads(self.json_parms) - parms["include_context"] = "" - with self.assertRaises(TypeError) as context: - SummarizeHedTagsOp(parms) - self.assertEqual(context.exception.args[0], "BadType") - parms2 = json.loads(self.json_parms) - parms2["mystery"] = True - with self.assertRaises(KeyError) as context: - SummarizeHedTagsOp(parms2) - self.assertEqual(context.exception.args[0], "BadParameter") - def test_do_op_no_replace_no_context_remove_on(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) parms = json.loads(self.json_parms) diff --git a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py index 642539967..b4cedafdc 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py @@ -63,7 +63,7 @@ def test_summary(self): parms = json.load(fp) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) df = dispatch.get_data_file(self.events) - parsed_commands, errors = Dispatcher.parse_operations(parms) + parsed_commands = Dispatcher.parse_operations(parms) sum_op = parsed_commands[2] sum_op.do_op(dispatch, dispatch.prep_data(df), 'run-01', sidecar=self.sidecar_path) context1 = dispatch.summary_dicts['AOMIC_condition_variables'] @@ -85,7 +85,7 @@ def test_text_summary_with_levels(self): parms = json.load(fp) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) df = dispatch.get_data_file(self.events_wh) - parsed_commands, errors = Dispatcher.parse_operations(parms) + parsed_commands = Dispatcher.parse_operations(parms) sum_op = parsed_commands[2] sum_op.do_op(dispatch, dispatch.prep_data(df), 'run-01', sidecar=self.sidecar_path_wh) context1 = dispatch.summary_dicts['AOMIC_condition_variables'] @@ -97,8 +97,7 @@ def test_text_summary(self): with open(self.summary_path, 'r') as fp: parms = json.load(fp) - parsed_commands, errors = Dispatcher.parse_operations(parms) - self.assertFalse(errors) + parsed_commands = Dispatcher.parse_operations(parms) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) df = dispatch.get_data_file(self.events) old_len = len(df) diff --git a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py index 97b87df83..cf86665cc 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py @@ -42,12 +42,6 @@ def test_constructor(self): sum_op1 = SummarizeHedValidationOp(parms) self.assertIsInstance(sum_op1, SummarizeHedValidationOp, "constructor creates an object of the correct type") - parms2 = json.loads(self.json_parms) - parms2["mystery"] = True - with self.assertRaises(KeyError) as context: - SummarizeHedValidationOp(parms2) - self.assertEqual(context.exception.args[0], "BadParameter") - def test_do_op(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) parms = json.loads(self.json_parms) diff --git a/tests/tools/remodeling/test_dispatcher.py b/tests/tools/remodeling/test_dispatcher.py index b91a3c6f8..ad6a01888 100644 --- a/tests/tools/remodeling/test_dispatcher.py +++ b/tests/tools/remodeling/test_dispatcher.py @@ -57,17 +57,6 @@ def test_constructor_empty_operations(self): self.assertIsInstance(disp, Dispatcher, "") self.assertFalse(disp.parsed_ops, "constructor empty operations list has empty parsed ops") - def test_constructor_bad_no_operation(self): - test = [{"operation": "remove_rows", "parameters": {"column_name": "response_time", "remove_values": ["n/a"]}}, - {"parameters": {"column_name": "trial_type", "remove_values": ["succesful_stop", "unsuccesful_stop"]}}] - operations, errors = Dispatcher.parse_operations(test) - self.assertFalse(operations, "parse_operations returns empty if no operation") - self.assertEqual(len(errors), 1, - "parse_operation returns a list of one error if one operation with no operation") - self.assertEqual(errors[0]['index'], 1, "parse_operation error has the correct index for missing operation") - self.assertEqual(errors[0]['error_type'], KeyError, - "parse_operation error has the correct type for missing operation") - def test_get_data_file(self): model_path1 = os.path.join(self.data_path, 'simple_reorder_rmdl.json') with open(model_path1) as fp: @@ -92,46 +81,6 @@ def test_get_summary_save_dir(self): dispatch2.get_summary_save_dir() self.assertEqual(context.exception.code, 'NoDataRoot') - def test_parse_operations_errors(self): - test = [{"operation": "remove_rows", "parameters": {"column_name": "response_time", "remove_values": ["n/a"]}}, - {"operation": "remove_rows"}] - operations, errors = Dispatcher.parse_operations(test) - self.assertFalse(operations, "parse_operations returns empty if no parameters") - self.assertEqual(len(errors), 1, - "parse_operation returns a list of one error if one operation with no parameters") - self.assertEqual(errors[0]['index'], 1, "parse_operation error has the correct index for missing parameters") - self.assertEqual(errors[0]['error_type'], KeyError, - "parse_operation error has the correct type for missing parameters") - - test = [{"operation": "remove_rows", - "parameters": {"column_name": "trial_type", "remove_values": ["succesful_stop", "unsuccesful_stop"]}}, - {"operation": "remove_rows", "parameters": {"column_name": "response_time"}}] - operations, errors = Dispatcher.parse_operations(test) - self.assertFalse(operations, "parse_operations returns empty if missing required") - self.assertEqual(len(errors), 1, - "parse_operation returns a list of one error if one operation with missing required") - self.assertEqual(errors[0]['index'], 1, "parse_operation error has the correct index for missing parameters") - self.assertEqual(errors[0]['error_type'], KeyError, - "parse_operation error has the correct type for missing required") - with self.assertRaises(ValueError) as context: - Dispatcher(test) - self.assertEqual(context.exception.args[0], 'InvalidOperationList') - - test2 = [{"operation": "blimey", - "parameters": {"column_name": "trial_type", "remove_values": ["succesful_stop", "unsuccesful_stop"]}}] - operations, errors = Dispatcher.parse_operations(test2) - self.assertFalse(operations, "parse_operations returns empty if missing required") - self.assertEqual(len(errors), 1, - "parse_operation returns a list of one error if bad operation") - self.assertEqual(errors[0]['index'], 0, "parse_operation error has the correct index for bad operation") - self.assertEqual(errors[0]['error_type'], KeyError, - "parse_operation error has the correct type for bad operation") - self.assertEqual(errors[0]['error_code'], 'OperationNotListedAsValid''', - "parse_operation error has has correct code for bad operation") - with self.assertRaises(ValueError) as context: - Dispatcher(test2) - self.assertEqual(context.exception.args[0], 'InvalidOperationList') - def test_parse_operation_list(self): test = [{"operation": "remove_rows", "parameters": {"column_name": "trial_type", "remove_values": ["succesful_stop", "unsuccesful_stop"]}}, @@ -176,8 +125,6 @@ def test_run_operations_hed(self): } } ] - operations, errors = Dispatcher.parse_operations(op_list) - self.assertFalse(errors) dispatch = Dispatcher(op_list, hed_versions=['8.1.0']) df = dispatch.run_operations(events_path, sidecar=sidecar_path, verbose=False) self.assertIsInstance(df, pd.DataFrame) diff --git a/tests/tools/remodeling/test_validator.py b/tests/tools/remodeling/test_validator.py new file mode 100644 index 000000000..15447edea --- /dev/null +++ b/tests/tools/remodeling/test_validator.py @@ -0,0 +1,152 @@ +import os +import json +import unittest +from copy import deepcopy +from hed.tools.remodeling.validator import RemodelerValidator + +class Test(unittest.TestCase): + + @classmethod + def setUpClass(cls): + with open(os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '../data/remodel_tests/all_remodel_operations.json'))) as f: + cls.remodel_file = json.load(f) + + @classmethod + def tearDownClass(cls): + pass + + def test_validator_build(self): + validator = RemodelerValidator() + + def test_validate_valid(self): + validator = RemodelerValidator() + error_strings = validator.validate(self.remodel_file) + self.assertFalse(error_strings) + + def test_validate_array(self): + validator = RemodelerValidator() + wrong_input_type = {"operation": "remove_columns"} + error_strings = validator.validate(wrong_input_type) + self.assertEqual(error_strings[0], "Operations must be contained in a list or array. This is also true when you run a single operation.") + + no_operations = [] + error_strings = validator.validate(no_operations) + self.assertEqual(error_strings[0], "There are no operations defined. Specify at least 1 operation for the remodeler to execute.") + + def test_validate_operations(self): + validator = RemodelerValidator() + + invalid_operation_type = ["string"] + error_strings = validator.validate(invalid_operation_type) + self.assertEqual(error_strings[0], "Each operation must be defined in a dictionary. string is not a dictionary object.") + + invalid_operation_missing = [self.remodel_file[0].copy()] + del invalid_operation_missing[0]["description"] + error_strings = validator.validate(invalid_operation_missing) + self.assertEqual(error_strings[0], "Operation dictionary 1 is missing 'description'. Every operation dictionary must specify the type of operation, a description, and the operation parameters.") + + invalid_operation_name = [self.remodel_file[0].copy()] + invalid_operation_name[0]["operation"] = "unlisted_operation" + error_strings = validator.validate(invalid_operation_name) + self.assertEqual(error_strings[0], "unlisted_operation is not a known remodeler operation. Accepted remodeler operations can be found in the documentation.") + + def test_validate_parameters(self): + validator = RemodelerValidator() + + missing_parameter = [deepcopy(self.remodel_file[0])] + del missing_parameter[0]["parameters"]["column_names"] + error_strings = validator.validate(missing_parameter) + self.assertEqual(error_strings[0], "Operation 1: The parameter column_names is missing. column_names is a required parameter of remove_columns.") + + missing_parameter_nested = [deepcopy(self.remodel_file[10])] + del missing_parameter_nested[0]["parameters"]["new_events"]["response"]["onset_source"] + error_strings = validator.validate(missing_parameter_nested) + self.assertEqual(error_strings[0], "Operation 1: The field onset_source is missing in response, new_events. onset_source is a required parameter of response, new_events.") + + invalid_parameter = [deepcopy(self.remodel_file[0])] + invalid_parameter[0]["parameters"]["invalid"] = "invalid_value" + error_strings = validator.validate(invalid_parameter) + self.assertEqual(error_strings[0], "Operation 1: Operation parameters for remove_columns contain an unexpected field 'invalid'.") + + invalid_parameter_nested = [deepcopy(self.remodel_file[10])] + invalid_parameter_nested[0]["parameters"]["new_events"]["response"]["invalid"] = "invalid_value" + error_strings = validator.validate(invalid_parameter_nested) + self.assertEqual(error_strings[0], "Operation 1: Operation parameters for response, new_events contain an unexpected field 'invalid'.") + + invalid_type = [deepcopy(self.remodel_file[0])] + invalid_type[0]["parameters"]["column_names"] = 0 + error_strings = validator.validate(invalid_type) + self.assertEqual(error_strings[0], "Operation 1: The value of column_names, in the remove_columns operation, should be a array. 0 is not a array.") + + invalid_type_nested = [deepcopy(self.remodel_file[10])] + invalid_type_nested[0]["parameters"]["new_events"]["response"]["onset_source"] = {"key": "value"} + error_strings = validator.validate(invalid_type_nested) + self.assertEqual(error_strings[0], "Operation 1: The value of onset_source, response, new_events, in the split_rows operation, should be a array. {'key': 'value'} is not a array.") + + empty_array = [deepcopy(self.remodel_file[0])] + empty_array[0]["parameters"]["column_names"] = [] + error_strings = validator.validate(empty_array) + self.assertEqual(error_strings[0], "Operation 1: The list in column_names, in the remove_columns operation, should have at least 1 item(s).") + + empty_array_nested = [deepcopy(self.remodel_file[5])] + empty_array_nested[0]["parameters"]["map_list"][0] = [] + error_strings = validator.validate(empty_array_nested) + self.assertEqual(error_strings[0], "Operation 1: The list in item 1, map_list, in the remap_columns operation, should have at least 1 item(s).") + + # invalid_value = [deepcopy(self.remodel_file[18])] + # invalid_value[0]["parameters"]["convert_to"] = "invalid_value" + # error_strings = validator.validate(invalid_value) + # self.assertEqual(error_strings[0], "Operation 1: Operation parameter convert_to, in the convert_columns operation, contains and unexpected value. Value should be one of ['str', 'int', 'float', 'fixed'].") + + # value_dependency = [deepcopy(self.remodel_file[18])] + # value_dependency[0]["parameters"]["convert_to"] = "fixed" + # error_strings = validator.validate(value_dependency) + # self.assertEqual(error_strings[0], "Operation 1: The parameter decimal_places is missing. decimal_places is a required parameter of convert_columns.") + + property_dependency = [deepcopy(self.remodel_file[1])] + del property_dependency[0]["parameters"]["factor_values"] + error_strings = validator.validate(property_dependency) + self.assertEqual(error_strings[0], "Operation 1: The parameter factor_names is missing. factor_names is a required parameter of factor_column when ['factor_values'] is specified.") + + double_item_in_array = [deepcopy(self.remodel_file[0])] + double_item_in_array[0]["parameters"]["column_names"] = ['response', 'response'] + error_strings = validator.validate(double_item_in_array) + self.assertEqual(error_strings[0], "Operation 1: The list in column_names, in the remove_columns operation, should only contain unique items.") + + double_item_in_array_nested = [deepcopy(self.remodel_file[10])] + double_item_in_array_nested[0]["parameters"]["new_events"]["response"]["copy_columns"] = ['response', 'response'] + error_strings = validator.validate(double_item_in_array_nested) + self.assertEqual(error_strings[0], "Operation 1: The list in copy_columns, response, new_events, in the split_rows operation, should only contain unique items.") + + def test_validate_parameter_data(self): + validator = RemodelerValidator() + + factor_column_validate = [deepcopy(self.remodel_file)[1]] + factor_column_validate[0]["parameters"]["factor_names"] = ["stopped"] + error_strings = validator.validate(factor_column_validate) + self.assertEqual(error_strings[0], "Operation 1: The list in factor_names, in the factor_column operation, should have the same number of items as factor_values.") + + factor_hed_tags_validate = [deepcopy(self.remodel_file)[2]] + factor_hed_tags_validate[0]["parameters"]["query_names"] = ["correct"] + error_strings = validator.validate(factor_hed_tags_validate) + self.assertEqual(error_strings[0], "Operation 1: The list in query_names, in the factor_hed_tags operation, should have the same number of items as queries.") + + merge_consecutive_validate = [deepcopy(self.remodel_file)[4]] + merge_consecutive_validate[0]["parameters"]["match_columns"].append("trial_type") + error_strings = validator.validate(merge_consecutive_validate) + self.assertEqual(error_strings[0], "Operation 1: The column_name in the merge_consecutive operation cannot be specified as a match_column.") + + remap_columns_validate_same_length = [deepcopy(self.remodel_file)[5]] + remap_columns_validate_same_length[0]["parameters"]["map_list"][0] = [""] + error_strings = validator.validate(remap_columns_validate_same_length) + self.assertEqual(error_strings[0], "Operation 1: The lists specified in the map_list parameter in the remap_columns operation should all have the same length.") + + remap_columns_validate_right_length = [deepcopy(self.remodel_file[5])] + remap_columns_validate_right_length[0]["parameters"]["map_list"] = [["string1", "string2"], ["string3", "string4"]] + error_strings = validator.validate(remap_columns_validate_right_length) + self.assertEqual(error_strings[0], "Operation 1: The lists specified in the map_list parameter in the remap_columns operation should have a length equal to the number of source columns + the number of destination columns.") + + remap_columns_integer_sources = [deepcopy(self.remodel_file[5])] + remap_columns_integer_sources[0]["parameters"]["integer_sources"] = ["unknown_column"] + error_strings = validator.validate(remap_columns_integer_sources) + self.assertEqual(error_strings[0], "Operation 1: All integer_sources in the remap_columns operation should be source_columns.") \ No newline at end of file diff --git a/tests/tools/visualization/__init__.py b/tests/tools/visualization/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/validator/test_hed_validator.py b/tests/validator/test_hed_validator.py index 0e3bcdfab..9c8c819e2 100644 --- a/tests/validator/test_hed_validator.py +++ b/tests/validator/test_hed_validator.py @@ -183,7 +183,7 @@ def test_org_tag_missing(self): def test_duplicate_group_in_definition(self): schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/schema_tests/HED8.0.0.mediawiki') + '../data/schema_tests/HED8.2.0.mediawiki') hed_schema = schema.load_schema(schema_path) string_with_def = \ '(Definition/TestDef,(Item,Item))' diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py index 2b60d391d..7285f9e33 100644 --- a/tests/validator/test_onset_validator.py +++ b/tests/validator/test_onset_validator.py @@ -15,7 +15,7 @@ class Test(TestHedBase): @classmethod def setUpClass(cls): cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') - hed_xml_file = os.path.join(cls.base_data_dir, "schema_tests/HED8.0.0.mediawiki") + hed_xml_file = os.path.join(cls.base_data_dir, "schema_tests/HED8.2.0.mediawiki") cls.hed_schema = schema.load_schema(hed_xml_file) cls.placeholder_label_def_string = "Def/TestDefPlaceholder/2471" cls.placeholder_def_contents = "(Acceleration/#,Action/TestDef2)" @@ -312,6 +312,18 @@ def test_onset_two_in_one_line(self): self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=False) + def test_check_for_banned_tags(self): + hed_string = HedString("Event, (Duration/Short, Label/Example)", self.hed_schema) + issues = OnsetValidator.check_for_banned_tags(hed_string) + self.assertEqual(len(issues), 0) + + hed_string = HedString("Onset, (Offset, Event)", self.hed_schema) + issues = OnsetValidator.check_for_banned_tags(hed_string) + self.assertEqual(len(issues), 2) + + hed_string = HedString("(Onset, Duration/Long), Label/Example", self.hed_schema) + issues = OnsetValidator.check_for_banned_tags(hed_string) + self.assertEqual(len(issues), 1) if __name__ == '__main__': unittest.main() diff --git a/tests/validator/test_spreadsheet_validator.py b/tests/validator/test_spreadsheet_validator.py index 1b1f57eb8..9c0691d41 100644 --- a/tests/validator/test_spreadsheet_validator.py +++ b/tests/validator/test_spreadsheet_validator.py @@ -5,12 +5,14 @@ import unittest from hed import load_schema_version, load_schema from hed.validator import SpreadsheetValidator -from hed import SpreadsheetInput +from hed import TabularInput, SpreadsheetInput +from hed.errors.error_types import ValidationErrors + class TestSpreadsheetValidation(unittest.TestCase): @classmethod def setUpClass(cls): - cls.schema = load_schema_version("8.1.0") + cls.schema = load_schema_version("8.2.0") cls.validator = SpreadsheetValidator(cls.schema) base = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') cls.base_data_dir = base @@ -45,3 +47,50 @@ def test_basic_validate(self): issues = file_input.validate(self.schema) self.assertTrue(len(issues), 1) + def test_invalid_onset_invalid_column(self): + def_dict = "(Definition/DefaultOnset, (Event))" + base_df = pd.DataFrame({ + 'HED': ["Event, (Age/5, Label/Example)", "Age/1, Label/Example", "Age/3, (Event)"] + }) + + self.df_with_onset = base_df.copy() + self.df_with_onset['onset'] = [1, 2, 3] + self.df_without_onset = base_df.copy() + + # No tags in either of these + issues = self.validator.validate(TabularInput(self.df_without_onset), def_dicts=def_dict) + self.assertEqual(len(issues), 0) + + issues = self.validator.validate(TabularInput(self.df_with_onset), def_dicts=def_dict) + self.assertEqual(len(issues), 1) + self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN) + + base_has_tags_df = pd.DataFrame({ + 'HED': ["(Onset, Def/DefaultOnset)", "(Inset, Def/DefaultOnset), (Event, Age/2)", "(Offset, Def/DefaultOnset), (Age/4)"] + }) + + self.df_with_onset_has_tags = base_has_tags_df.copy() + self.df_with_onset_has_tags['onset'] = [1, 2, 3] + self.df_without_onset_has_tags = base_has_tags_df.copy() + + issues = self.validator.validate(TabularInput(self.df_without_onset_has_tags), def_dicts=def_dict) + self.assertEqual(len(issues), 3) + self.assertEqual(issues[0]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR) + issues = self.validator.validate(TabularInput(self.df_with_onset_has_tags), def_dicts=def_dict) + self.assertEqual(len(issues), 1) + self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN) + + base_has_tags_unordered_df = pd.DataFrame({ + 'HED': ["(Onset, Def/DefaultOnset)", "(Offset, Def/DefaultOnset), (Age/4)", "(Inset, Def/DefaultOnset), (Event, Age/2)"] + }) + self.df_with_onset_has_tags_unordered = base_has_tags_unordered_df.copy() + self.df_with_onset_has_tags_unordered['onset'] = [1, 2, 3] + self.df_without_onset_has_tags_unordered = base_has_tags_unordered_df.copy() + + issues = self.validator.validate(TabularInput(self.df_without_onset_has_tags_unordered), def_dicts=def_dict) + self.assertEqual(len(issues), 3) + self.assertEqual(issues[0]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR) + issues = self.validator.validate(TabularInput(self.df_with_onset_has_tags_unordered), def_dicts=def_dict) + self.assertEqual(len(issues), 2) + self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN) + self.assertEqual(issues[1]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR) \ No newline at end of file diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index cffd62267..1505c28e7 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -5,8 +5,9 @@ from functools import partial +#todo: update these tests(TagValidator no longer exists) class TestHed(TestValidatorBase): - schema_file = "../data/schema_tests/HED8.0.0.mediawiki" + schema_file = "../data/schema_tests/HED8.2.0.mediawiki" class IndividualHedTagsShort(TestHed): @@ -353,7 +354,7 @@ def test_span_reporting(self): class TestTagLevels(TestHed): @staticmethod def string_obj_func(validator): - return validator._validate_groups_in_hed_string + return validator._group_validator.run_tag_level_validators def test_no_duplicates(self): test_strings = { @@ -499,7 +500,7 @@ class FullHedString(TestHed): @staticmethod def string_obj_func(validator): - return validator._tag_validator.run_hed_string_validators + return validator._run_hed_string_validators def test_invalid_placeholders(self): # We might want these to be banned later as invalid characters. @@ -829,7 +830,7 @@ class RequiredTags(TestHed): @staticmethod def string_obj_func(validator): - return partial(validator._validate_tags_in_hed_string) + return partial(validator._group_validator.run_all_tags_validators) def test_includes_all_required_tags(self): test_strings = { diff --git a/tests/validator/test_tag_validator_base.py b/tests/validator/test_tag_validator_base.py index 690ed79bd..5b2930cba 100644 --- a/tests/validator/test_tag_validator_base.py +++ b/tests/validator/test_tag_validator_base.py @@ -7,6 +7,7 @@ from hed import schema +#todo: update these tests(TagValidator no longer exists) class TestHedBase(unittest.TestCase): schema_file = None @@ -67,7 +68,6 @@ def setUpClass(cls): super().setUpClass() cls.error_handler = error_reporter.ErrorHandler() cls.semantic_hed_input_reader = HedValidator(hed_schema=cls.hed_schema) - cls.semantic_tag_validator = cls.semantic_hed_input_reader._tag_validator def validator_base(self, test_strings, expected_results, expected_issues, test_function, hed_schema, check_for_warnings=False): diff --git a/tests/validator/test_tag_validator_library.py b/tests/validator/test_tag_validator_library.py index 194705f02..d942c8ae3 100644 --- a/tests/validator/test_tag_validator_library.py +++ b/tests/validator/test_tag_validator_library.py @@ -303,7 +303,7 @@ def test_span_reporting(self): class TestTagLevels3(TestHed3): @staticmethod def string_obj_func(validator): - return validator._validate_groups_in_hed_string + return validator._group_validator.run_tag_level_validators def test_no_duplicates(self): test_strings = { @@ -420,7 +420,7 @@ def test_taggroup_validation(self): class RequiredTags(TestHed3): @staticmethod def string_obj_func(validator): - return partial(validator._validate_tags_in_hed_string) + return partial(validator._group_validator.run_all_tags_validators) def test_includes_all_required_tags(self): test_strings = { diff --git a/tests/validator/test_tag_validator_util.py b/tests/validator/test_tag_validator_util.py index 4522a0020..bf8d2a27e 100644 --- a/tests/validator/test_tag_validator_util.py +++ b/tests/validator/test_tag_validator_util.py @@ -1,7 +1,7 @@ import unittest -from hed.validator import tag_validator_util +from hed.validator.tag_util import class_util from tests.validator.test_tag_validator import TestHed @@ -27,10 +27,10 @@ def test_clock_face_times(self): 'invalidString': 'not a time', } for string in valid_test_strings.values(): - result = tag_validator_util.is_clock_face_time(string) + result = class_util.is_clock_face_time(string) self.assertEqual(result, True, string) for string in invalid_test_strings.values(): - result = tag_validator_util.is_clock_face_time(string) + result = class_util.is_clock_face_time(string) self.assertEqual(result, False, string) def test_date_times(self): @@ -54,10 +54,10 @@ def test_date_times(self): 'invalidString': 'not a time', } for string in valid_test_strings.values(): - result = tag_validator_util.is_date_time(string) + result = class_util.is_date_time(string) self.assertEqual(result, True, string) for string in invalid_test_strings.values(): - result = tag_validator_util.is_date_time(string) + result = class_util.is_date_time(string) self.assertEqual(result, False, string)