diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 919e5c8fb..ea78321f8 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -7,11 +7,29 @@ on:
branches: ["*"]
jobs:
+ determine_version:
+ runs-on: ubuntu-latest
+ outputs:
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
+ steps:
+ - id: set-matrix
+ run: |
+ if [[ "${{ github.event_name }}" == 'push' && "${{ github.ref }}" == 'refs/heads/master' ]]; then
+ # Push to master branch
+ echo 'matrix=["3.7", "3.9", "3.10", "3.11"]' >> $GITHUB_OUTPUT
+ elif [[ "${{ github.event_name }}" == 'pull_request' && "${{ github.event.pull_request.base.ref }}" == 'master' ]]; then
+ # PR to master branch
+ echo 'matrix=["3.7", "3.9", "3.10", "3.11"]' >> $GITHUB_OUTPUT
+ else
+ echo 'matrix=["3.9"]' >> $GITHUB_OUTPUT
+ fi
+
build:
+ needs: determine_version
strategy:
matrix:
platform: [ubuntu-latest]
- python-version: [3.7, 3.9]
+ python-version: ${{fromJson(needs.determine_version.outputs.matrix)}}
runs-on: ${{ matrix.platform }}
diff --git a/.github/workflows/codespell.yaml b/.github/workflows/codespell.yaml
new file mode 100644
index 000000000..9c6511f6c
--- /dev/null
+++ b/.github/workflows/codespell.yaml
@@ -0,0 +1,22 @@
+---
+name: Codespell
+
+on:
+ push:
+ branches: [develop]
+ pull_request:
+ branches: [develop]
+
+permissions:
+ contents: read
+
+jobs:
+ codespell:
+ name: Check for spelling errors
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Codespell
+ uses: codespell-project/actions-codespell@v2
\ No newline at end of file
diff --git a/docs/source/_templates/autosummary/class.rst b/docs/source/_templates/autosummary/class.rst
deleted file mode 100644
index 50efd957f..000000000
--- a/docs/source/_templates/autosummary/class.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-{{ fullname | escape | underline}}
-
-.. currentmodule:: {{ module }}
-
-.. autoclass:: {{ module }}.{{ objname }}
- {% block methods %}
- .. automethod:: __init__
-
- {% if methods %}
- .. rubric:: {{ _('Methods') }}
-
- .. autosummary::
- {% for item in methods %}
- ~{{ name }}.{{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block attributes %}
- {% if attributes %}
- .. rubric:: {{ _('Attributes') }}
-
- .. autosummary::
- {% for item in attributes %}
- ~{{ name }}.{{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
-.. autoclass:: {{ module }}.{{ objname }}
- :members:
- :undoc-members:
- :show-inheritance:
- :inherited-members:
diff --git a/docs/source/_templates/custom-class-template.rst b/docs/source/_templates/custom-class-template.rst
index 16ebb2f33..cf03326d1 100644
--- a/docs/source/_templates/custom-class-template.rst
+++ b/docs/source/_templates/custom-class-template.rst
@@ -1,32 +1,42 @@
-{{ fullname | escape | underline}}
+{{ fullname.split('.')[-1] | escape | underline }}
.. currentmodule:: {{ module }}
-.. autoclass:: {{ objname }}
- :members:
- :show-inheritance:
- :inherited-members:
-
- {% block methods %}
- .. automethod:: __init__
-
- {% if methods %}
- .. rubric:: {{ _('Methods') }}
-
- .. autosummary::
- {% for item in methods %}
- ~{{ name }}.{{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
-
- {% block attributes %}
- {% if attributes %}
- .. rubric:: {{ _('Attributes') }}
-
- .. autosummary::
- {% for item in attributes %}
- ~{{ name }}.{{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
\ No newline at end of file
+.. autoclass:: {{ module }}.{{ objname }}
+ :noindex:
+
+.. rubric:: {{ _('Methods') }}
+
+.. autosummary::
+{% for item in methods %}
+ {{ module }}.{{ objname }}.{{ item }}
+{%- endfor %}
+
+.. rubric:: {{ _('Attributes') }}
+
+.. autosummary::
+{% for item in attributes %}
+ {{ module }}.{{ objname }}.{{ item }}
+{%- endfor %}
+
+.. toctree::
+ :hidden:
+
+{% for item in methods %}
+ {{ fullname }}#method-{{ item }}
+{%- endfor %}
+{% for item in attributes %}
+ {{ fullname }}#attribute-{{ item }}
+{%- endfor %}
+
+{% for item in methods %}
+.. _method-{{ item }}:
+
+.. automethod:: {{ module }}.{{ objname }}.{{ item }}
+{%- endfor %}
+
+{% for item in attributes %}
+.. _attribute-{{ item }}:
+
+.. autoattribute:: {{ module }}.{{ objname }}.{{ item }}
+{%- endfor %}
diff --git a/docs/source/_templates/custom-module-template.rst b/docs/source/_templates/custom-module-template.rst
index 74078355f..9e9c8a771 100644
--- a/docs/source/_templates/custom-module-template.rst
+++ b/docs/source/_templates/custom-module-template.rst
@@ -1,4 +1,6 @@
-{{ fullname | escape | underline}}
+{{ fullname.split('.')[-1] | escape | underline}}
+
+.. currentmodule:: {{ module }}
.. automodule:: {{ fullname }}
@@ -14,17 +16,22 @@
{% endif %}
{% endblock %}
- {% block functions %}
- {% if functions %}
- .. rubric:: {{ _('Functions') }}
+ {% block functions %}
+ {% if functions %}
+ .. rubric:: {{ _('Functions') }}
- .. autosummary::
- :toctree:
- {% for item in functions %}
- {{ item }}
- {%- endfor %}
- {% endif %}
- {% endblock %}
+ .. autosummary::
+ {% for item in functions %}
+ {{ item }}
+ {% endfor %}
+
+ {% for item in functions %}
+ .. _{{ item }}:
+
+ .. autofunction:: {{ item }}
+ {% endfor %}
+ {% endif %}
+ {% endblock %}
{% block classes %}
{% if classes %}
diff --git a/docs/source/api2.rst b/docs/source/api2.rst
index 02d3aeaed..1f78f4698 100644
--- a/docs/source/api2.rst
+++ b/docs/source/api2.rst
@@ -1,5 +1,5 @@
-HED API reference (Auto style)
-==============================
+HED API reference
+=================
.. currentmodule:: hed
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 924fb2b7e..3b82987d3 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -14,14 +14,14 @@ Hierarchical Event Descriptor (HED) Python Tools
Note: this is a work in progress. More information is coming.
.. toctree::
- :maxdepth: 4
+ :maxdepth: 6
:caption: Contents:
introduction.md
user_guide.rst
.. toctree::
- :maxdepth: 4
+ :maxdepth: 6
:caption: HED Python API:
api2.rst
diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index 7305e7c6c..a866ec326 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -107,7 +107,7 @@ class SidecarErrors:
class SchemaErrors:
SCHEMA_DUPLICATE_NODE = 'SCHEMA_DUPLICATE_NODE'
- SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID'
+
SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID"
@@ -119,19 +119,22 @@ class SchemaWarnings:
SCHEMA_CHARACTER_INVALID = "SCHEMA_CHARACTER_INVALID"
SCHEMA_INVALID_CAPITALIZATION = 'invalidCaps'
SCHEMA_NON_PLACEHOLDER_HAS_CLASS = 'SCHEMA_NON_PLACEHOLDER_HAS_CLASS'
- SCHEMA_INVALID_ATTRIBUTE = "SCHEMA_INVALID_ATTRIBUTE"
class SchemaAttributeErrors:
+ SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID'
+ SCHEMA_ATTRIBUTE_VALUE_INVALID = 'SCHEMA_ATTRIBUTE_VALUE_INVALID'
SCHEMA_DEPRECATED_INVALID = "SCHEMA_DEPRECATED_INVALID"
SCHEMA_SUGGESTED_TAG_INVALID = "SCHEMA_SUGGESTED_TAG_INVALID"
- SCHEMA_RELATED_TAG_INVALID = "SCHEMA_RELATED_TAG_INVALID"
SCHEMA_UNIT_CLASS_INVALID = "SCHEMA_UNIT_CLASS_INVALID"
SCHEMA_VALUE_CLASS_INVALID = "SCHEMA_VALUE_CLASS_INVALID"
+ SCHEMA_ALLOWED_CHARACTERS_INVALID = "SCHEMA_ALLOWED_CHARACTERS_INVALID"
+ SCHEMA_IN_LIBRARY_INVALID = "SCHEMA_IN_LIBRARY_INVALID"
SCHEMA_DEFAULT_UNITS_INVALID = "SCHEMA_DEFAULT_UNITS_INVALID"
- SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED" # Reported as SCHEMA_DEPRECATED_INVALID
+ SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED"
+ SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE = "SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE"
class DefinitionErrors:
diff --git a/hed/errors/exceptions.py b/hed/errors/exceptions.py
index e7ee857b9..e368ec43c 100644
--- a/hed/errors/exceptions.py
+++ b/hed/errors/exceptions.py
@@ -14,8 +14,9 @@ class HedExceptions:
INVALID_DATAFRAME = 'INVALID_DATAFRAME'
INVALID_FILE_FORMAT = 'INVALID_FILE_FORMAT'
# These are actual schema issues, not that the file cannot be found or parsed
- SCHEMA_HEADER_MISSING = 'HED_SCHEMA_HEADER_INVALID'
- HED_SCHEMA_HEADER_INVALID = 'HED_SCHEMA_HEADER_INVALID'
+ SCHEMA_HEADER_MISSING = 'SCHEMA_HEADER_INVALID'
+ SCHEMA_HEADER_INVALID = 'SCHEMA_HEADER_INVALID'
+ SCHEMA_UNKNOWN_HEADER_ATTRIBUTE = "SCHEMA_HEADER_INVALID"
SCHEMA_LIBRARY_INVALID = "SCHEMA_LIBRARY_INVALID"
BAD_HED_LIBRARY_NAME = 'SCHEMA_LIBRARY_INVALID'
@@ -26,14 +27,14 @@ class HedExceptions:
ROOTED_TAG_DOES_NOT_EXIST = "SCHEMA_LIBRARY_INVALID"
IN_LIBRARY_IN_UNMERGED = "SCHEMA_LIBRARY_INVALID"
- HED_SCHEMA_VERSION_INVALID = 'HED_SCHEMA_VERSION_INVALID'
- SCHEMA_START_MISSING = 'HED_WIKI_SEPARATOR_INVALID'
- SCHEMA_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID'
- HED_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID'
- INVALID_SECTION_SEPARATOR = 'invalidSectionSeparator'
+ SCHEMA_VERSION_INVALID = 'SCHEMA_VERSION_INVALID'
+ SCHEMA_SECTION_MISSING = 'SCHEMA_SECTION_MISSING'
+
+ WIKI_SEPARATOR_INVALID = 'invalidSectionSeparator'
# This issue will contain a list of lines with issues.
- HED_WIKI_DELIMITERS_INVALID = 'HED_WIKI_DELIMITERS_INVALID'
+ WIKI_DELIMITERS_INVALID = 'WIKI_DELIMITERS_INVALID'
+ WIKI_LINE_START_INVALID = 'WIKI_LINE_START_INVALID'
HED_SCHEMA_NODE_NAME_INVALID = 'HED_SCHEMA_NODE_NAME_INVALID'
SCHEMA_DUPLICATE_PREFIX = 'schemaDuplicatePrefix'
diff --git a/hed/errors/known_error_codes.py b/hed/errors/known_error_codes.py
index b72e84708..b89626828 100644
--- a/hed/errors/known_error_codes.py
+++ b/hed/errors/known_error_codes.py
@@ -31,6 +31,7 @@
],
"schema_validation_errors": [
"SCHEMA_ATTRIBUTE_INVALID",
+ "SCHEMA_ATTRIBUTE_VALUE_INVALID",
"SCHEMA_CHARACTER_INVALID",
"SCHEMA_DUPLICATE_NODE",
"SCHEMA_HEADER_INVALID",
diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py
index b7fda9d50..8c196f9ed 100644
--- a/hed/errors/schema_error_messages.py
+++ b/hed/errors/schema_error_messages.py
@@ -16,7 +16,7 @@ def schema_error_hed_duplicate_from_library(tag, duplicate_tag_list, section):
f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"
-@hed_error(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_unknown_attribute(attribute_name, source_tag):
return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
f"or was used outside of it's defined class."
@@ -40,45 +40,58 @@ def schema_warning_SCHEMA_INVALID_CAPITALIZATION(tag_name, problem_char, char_in
f"Found character '{problem_char}' in tag '{tag_name}' at position {char_index}."
-@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING)
+@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING,
+ actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_warning_non_placeholder_class(tag_name, invalid_attribute_name):
return "Only placeholder nodes('#') can have a unit class, value class, or takes value." + \
f"Found {invalid_attribute_name} on {tag_name}"
-@hed_error(SchemaWarnings.SCHEMA_INVALID_ATTRIBUTE, default_severity=ErrorSeverity.ERROR)
-def schema_error_SCHEMA_INVALID_ATTRIBUTE(tag_name, invalid_attribute_name):
- return f"'{invalid_attribute_name}' should not be present in a loaded schema, found on '{tag_name}'." \
- f"Something went very wrong."
-
-@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_DEPRECATED_INVALID(tag_name, invalid_deprecated_version):
return f"'{tag_name}' has invalid or unknown value in attribute deprecatedFrom: '{invalid_deprecated_version}'."
@hed_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED,
- actual_code=SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)
+ actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_CHILD_OF_DEPRECATED(deprecated_tag, non_deprecated_child):
return f"Deprecated tag '{deprecated_tag}' has a child that is not deprecated: '{non_deprecated_child}'."
-@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_SUGGESTED_TAG_INVALID(suggestedTag, invalidSuggestedTag, attribute_name):
return f"Tag '{suggestedTag}' has an invalid {attribute_name}: '{invalidSuggestedTag}'."
-@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_UNIT_CLASS_INVALID(tag, unit_class, attribute_name):
return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'."
-@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_VALUE_CLASS_INVALID(tag, unit_class, attribute_name):
return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'."
-@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_DEFAULT_UNITS_INVALID(tag, bad_unit, valid_units):
valid_units = ",".join(valid_units)
return f"Tag '{tag}' has an invalid defaultUnit '{bad_unit}'. Valid units are: '{valid_units}'."
+
+
+@hed_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
+def schema_error_SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE(tag, conversion_factor):
+ return f"Tag '{tag}' has an invalid conversionFactor '{conversion_factor}'. Conversion factor must be positive."
+
+
+@hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
+def schema_error_SCHEMA_ALLOWED_CHARACTERS_INVALID(tag, invalid_character):
+ return (f"Tag '{tag}' has an invalid allowedCharacter: '{invalid_character}'. "
+ f"Allowed characters are: a single character, "
+ f"or one of the following - letters, blank, digits, alphanumeric.")
+
+
+@hed_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
+def schema_error_SCHEMA_IN_LIBRARY_INVALID(tag, bad_library):
+ return (f"Tag '{tag}' has an invalid inLibrary: '{bad_library}'. ")
diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index 12e2d8895..58bb94c94 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -137,7 +137,7 @@ def _indexed_dict_from_onsets(onsets):
@staticmethod
def _filter_by_index_list(original_series, indexed_dict):
- new_series = ["n/a"] * len(original_series) # Initialize new_series with "n/a"
+ new_series = pd.Series(["n/a"] * len(original_series), dtype=str)
for onset, indices in indexed_dict.items():
if indices:
diff --git a/hed/models/basic_search.py b/hed/models/basic_search.py
new file mode 100644
index 000000000..9301a0ccb
--- /dev/null
+++ b/hed/models/basic_search.py
@@ -0,0 +1,257 @@
+import re
+from itertools import combinations, product
+from collections import defaultdict
+import pandas as pd
+
+
+def find_matching(series, search_string, regex=False):
+ """ Finds lines in the series that match the search string and returns a mask.
+
+ Syntax Rules:
+ - '@': Prefixing a term in the search string means the term must appear anywhere within a line.
+ - '~': Prefixing a term in the search string means the term must NOT appear within a line.
+ - Parentheses: Elements within parentheses must appear in the line with the same level of nesting.
+ e.g.: Search string: "(A), (B)" will match "(A), (B, C)", but not "(A, B)", since they don't
+ start in the same group.
+ - "LongFormTag*": A * will match any remaining word(anything but a comma or parenthesis)
+ - An individual term can be arbitrary regex, but it is limited to single continuous words.
+
+ Notes:
+ - Specific words only care about their level relative to other specific words, not overall.
+ e.g. "(A, B)" will find: "A, B", "(A, B)", (A, (C), B)", or ((A, B))"
+ - If you have no grouping or anywhere words in the search, it assumes all terms are anywhere words.
+ - The format of the series should match the format of the search string, whether it's in short or long form.
+ - To enable support for matching parent tags, ensure that both the series and search string are in long form.
+
+ Args:
+ series (pd.Series): A Pandas Series object containing the lines to be searched.
+ search_string (str): The string to search for in each line of the series.
+ regex (bool): By default, translate any * wildcard characters to .*? regex
+ If True, do no translation and pass the words as is. Due to how it's setup, you must not include
+ the following characters: (),
+
+ Returns:
+ mask (pd.Series): A Boolean mask Series of the same length as the input series.
+ The mask has `True` for lines that match the search string and `False` otherwise.
+ """
+ if not regex:
+ # Replace *'s with a reasonable value for people who don't know regex
+ search_string = re.sub(r'(?= 3.9
- # negated_groups = [search_result(group, []) for group in hed_group.get_all_groups() if group not in groups]
+ # negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups() if group not in groups]
# Python 3.7/8 compatible version.
- negated_groups = [search_result(group, []) for group in hed_group.get_all_groups()
+ negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups()
if not any(group is found_group.group for found_group in found_groups)]
return negated_groups
-class ExpressionContainingGroup(Expression):
- def handle_expr(self, hed_group, exact=False):
- result = self.right.handle_expr(hed_group, exact=True)
- found_groups = result
- if result:
- found_parent_groups = []
- for group in found_groups:
- if not group.group.is_group:
- continue
- if group.group._parent:
- found_parent_groups.append(search_result(group.group._parent, group.group))
-
- if found_parent_groups:
- return found_parent_groups
-
- return []
-
-
class ExpressionDescendantGroup(Expression):
def handle_expr(self, hed_group, exact=False):
found_groups = self.right.handle_expr(hed_group)
- found_parent_groups = []
- if found_groups:
- for group in found_groups:
- if not group.group.is_group:
- continue
- if group.group._parent:
- found_parent_groups.append(search_result(group.group._parent, group.group))
-
- if found_parent_groups:
- return found_parent_groups
- return []
+ found_parent_groups = self._get_parent_groups(found_groups)
+ return found_parent_groups
class ExpressionExactMatch(Expression):
+ def __init__(self, token, left=None, right=None):
+ super().__init__(token, left, right)
+ self.optional = "any"
+
+ def _filter_exact_matches(self, search_results):
+ filtered_list = []
+ for group in search_results:
+ if len(group.group.children) == len(group.tags):
+ filtered_list.append(group)
+
+ return filtered_list
+
def handle_expr(self, hed_group, exact=False):
found_groups = self.right.handle_expr(hed_group, exact=True)
- if found_groups:
- return_list = []
- for group in found_groups:
- if len(group.group.children) == len(group.tags):
- return_list.append(group)
+ if self.optional == "any":
+ return self._get_parent_groups(found_groups)
- if return_list:
- return return_list
+ filtered_list = self._filter_exact_matches(found_groups)
+ if filtered_list:
+ return self._get_parent_groups(filtered_list)
# Basically if we don't have an exact match above, do the more complex matching including optional
if self.left:
optional_groups = self.left.handle_expr(hed_group, exact=True)
found_groups = ExpressionAnd.merge_groups(found_groups, optional_groups)
- if found_groups:
- return_list = []
- for group in found_groups:
- if len(group.group.children) == len(group.tags):
- return_list.append(group)
-
- if return_list:
- return return_list
+ filtered_list = self._filter_exact_matches(found_groups)
+ if filtered_list:
+ return self._get_parent_groups(filtered_list)
return []
@@ -337,7 +322,6 @@ class QueryParser:
def __init__(self, expression_string):
"""Compiles a QueryParser for a particular expression, so it can be used to search hed strings.
-
Basic Input Examples:
'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event
@@ -354,11 +338,15 @@ def __init__(self, expression_string):
'[Event and Action]' - Find a group that contains both Event and Action(at any level)
- '[[Event and Action]]' - Find a group with Event And Action at the same level.
+ '{Event and Action}' - Find a group with Event And Action at the same level.
+
+ '{Event and Action:}' - Find a group with Event And Action at the same level, and nothing else
+
+ '{Event and Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag.
Practical Complex Example:
- [[{(Onset or Offset), (Def or [[Def-expand]]): ???}]] - A group with an onset tag,
+ {(Onset or Offset), (Def or {Def-expand}): ???} - A group with an onset tag,
a def tag or def-expand group, and an optional wildcard group
Parameters:
@@ -392,15 +380,22 @@ def current_token(self):
def _handle_and_op(self):
expr = self._handle_negation()
- next_token = self._next_token_is([Token.And, Token.Or])
+ next_token = self._next_token_is([Token.And])
while next_token:
right = self._handle_negation()
if next_token.kind == Token.And:
expr = ExpressionAnd(next_token, expr, right)
- elif next_token.kind == Token.Or:
- expr = ExpressionOr(next_token, expr, right)
- next_token = self._next_token_is([Token.And, Token.Or])
+ next_token = self._next_token_is([Token.And])
+ return expr
+ def _handle_or_op(self):
+ expr = self._handle_and_op() # Note: calling _handle_and_op here
+ next_token = self._next_token_is([Token.Or])
+ while next_token:
+ right = self._handle_and_op() # Note: calling _handle_and_op here
+ if next_token.kind == Token.Or:
+ expr = ExpressionOr(next_token, expr, right)
+ next_token = self._next_token_is([Token.Or])
return expr
def _handle_negation(self):
@@ -417,33 +412,35 @@ def _handle_negation(self):
def _handle_grouping_op(self):
next_token = self._next_token_is(
- [Token.ContainingGroup, Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch])
- if next_token == Token.ContainingGroup:
- interior = self._handle_and_op()
- expr = ExpressionContainingGroup(next_token, right=interior)
- next_token = self._next_token_is([Token.ContainingGroupEnd])
- if next_token != Token.ContainingGroupEnd:
- raise ValueError("Parse error: Missing closing square brackets")
- # Can we move this to the and_or level? or does that break everything...?
- elif next_token == Token.LogicalGroup:
- expr = self._handle_and_op()
+ [Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch])
+ if next_token == Token.LogicalGroup:
+ expr = self._handle_or_op()
next_token = self._next_token_is([Token.LogicalGroupEnd])
if next_token != Token.LogicalGroupEnd:
raise ValueError("Parse error: Missing closing paren")
elif next_token == Token.DescendantGroup:
- interior = self._handle_and_op()
+ interior = self._handle_or_op()
expr = ExpressionDescendantGroup(next_token, right=interior)
next_token = self._next_token_is([Token.DescendantGroupEnd])
if next_token != Token.DescendantGroupEnd:
raise ValueError("Parse error: Missing closing square bracket")
elif next_token == Token.ExactMatch:
- interior = self._handle_and_op()
+ interior = self._handle_or_op()
expr = ExpressionExactMatch(next_token, right=interior)
next_token = self._next_token_is([Token.ExactMatchEnd, Token.ExactMatchOptional])
if next_token == Token.ExactMatchOptional:
- optional_portion = self._handle_and_op()
- expr.left = optional_portion
+ # We have an optional portion - this needs to now be an exact match
+ expr.optional = "none"
next_token = self._next_token_is([Token.ExactMatchEnd])
+ if next_token != Token.ExactMatchEnd:
+ optional_portion = self._handle_or_op()
+ expr.left = optional_portion
+ next_token = self._next_token_is([Token.ExactMatchEnd])
+ if "~" in str(expr):
+ raise ValueError("Cannot use negation in exact matching groups,"
+ " as it's not clear what is being matched.\n"
+ "{thing and ~(expression)} is allowed.")
+
if next_token is None:
raise ValueError("Parse error: Missing closing curly bracket")
else:
@@ -452,13 +449,15 @@ def _handle_grouping_op(self):
expr = ExpressionWildcardNew(next_token)
elif next_token:
expr = Expression(next_token)
+ else:
+ expr = None
return expr
def _parse(self, expression_string):
self.tokens = self._tokenize(expression_string)
- expr = self._handle_and_op()
+ expr = self._handle_or_op()
if self.at_token + 1 != len(self.tokens):
raise ValueError("Parse error in search string")
diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py
index 4cb3729c0..0857abe9c 100644
--- a/hed/schema/hed_schema.py
+++ b/hed/schema/hed_schema.py
@@ -731,7 +731,9 @@ def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
# Add the InLibrary attribute to any library schemas as they are loaded
# These are later removed when they are saved out, if saving unmerged
if self.library and (not self.with_standard or (not self.merged and self.with_standard)):
- new_entry._set_attribute_value(HedKey.InLibrary, self.library)
+ # only add it if not already present - This is a rare case
+ if not new_entry.has_attribute(HedKey.InLibrary):
+ new_entry._set_attribute_value(HedKey.InLibrary, self.library)
section = self._sections[key_class]
return section._add_to_dict(long_tag_name, new_entry)
diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py
index 0cecc4ab6..60a1a9349 100644
--- a/hed/schema/hed_schema_constants.py
+++ b/hed/schema/hed_schema_constants.py
@@ -42,6 +42,7 @@ class HedKey:
SuggestedTag = "suggestedTag"
Rooted = "rooted"
DeprecatedFrom = "deprecatedFrom"
+ ConversionFactor = "conversionFactor"
# All known properties
BoolProperty = 'boolProperty'
diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py
index 102795d80..936943e85 100644
--- a/hed/schema/hed_schema_entry.py
+++ b/hed/schema/hed_schema_entry.py
@@ -176,7 +176,6 @@ def __eq__(self, other):
return False
return True
-
class UnitEntry(HedSchemaEntry):
""" A single unit entry with modifiers in the HedSchema. """
def __init__(self, *args, **kwargs):
@@ -207,12 +206,13 @@ def finalize_entry(self, schema):
self.derivative_units = derivative_units
def _get_conversion_factor(self, modifier_entry):
-
- base_factor = float(self.attributes.get("conversionFactor", "1.0").replace("^", "e"))
- if modifier_entry:
- modifier_factor = float(modifier_entry.attributes.get("conversionFactor", "1.0").replace("^", "e"))
- else:
- modifier_factor = 1.0
+ base_factor = modifier_factor = 1.0
+ try:
+ base_factor = float(self.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e"))
+ if modifier_entry:
+ modifier_factor = float(modifier_entry.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e"))
+ except (ValueError, AttributeError) as e:
+ pass # Just default to 1.0
return base_factor * modifier_factor
def get_conversion_factor(self, unit_name):
@@ -224,7 +224,7 @@ def get_conversion_factor(self, unit_name):
Returns:
conversion_factor(float or None): Returns the conversion factor or None
"""
- if "conversionFactor" in self.attributes:
+ if HedKey.ConversionFactor in self.attributes:
return float(self.derivative_units.get(unit_name))
class HedTagEntry(HedSchemaEntry):
diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py
index d1d7f5ecb..0ccb9c333 100644
--- a/hed/schema/schema_attribute_validators.py
+++ b/hed/schema/schema_attribute_validators.py
@@ -150,4 +150,66 @@ def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name):
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED,
tag_entry.name,
child.name)
- return issues
\ No newline at end of file
+ return issues
+
+
+def conversion_factor(hed_schema, tag_entry, attribute_name):
+ issues = []
+ conversion_factor = tag_entry.attributes.get(attribute_name, "1.0")
+ try:
+ conversion_factor = float(conversion_factor.replace("^", "e"))
+ except (ValueError, AttributeError) as e:
+ pass
+ if not isinstance(conversion_factor, float) or conversion_factor <= 0.0:
+ issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE,
+ tag_entry.name,
+ conversion_factor)
+
+ return issues
+
+
+def allowed_characters_check(hed_schema, tag_entry, attribute_name):
+ """ Check allowed character has a valid value
+
+ Parameters:
+ hed_schema (HedSchema): The schema to use for validation
+ tag_entry (HedSchemaEntry): The schema entry for this attribute.
+ attribute_name (str): The name of this attribute
+
+ Returns:
+ list: A list of issues. Each issue is a dictionary.
+
+ """
+ issues = []
+ allowed_strings = {'letters', 'blank', 'digits', 'alphanumeric'}
+
+ char_string = tag_entry.attributes.get(attribute_name, "")
+ characters = char_string.split(",")
+ for character in characters:
+ if character not in allowed_strings and len(character) != 1:
+ issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
+ tag_entry.name,
+ character)
+ return issues
+
+
+def in_library_check(hed_schema, tag_entry, attribute_name):
+ """ Check allowed character has a valid value
+
+ Parameters:
+ hed_schema (HedSchema): The schema to use for validation
+ tag_entry (HedSchemaEntry): The schema entry for this attribute.
+ attribute_name (str): The name of this attribute
+
+ Returns:
+ list: A list of issues. Each issue is a dictionary.
+
+ """
+ issues = []
+
+ library = tag_entry.attributes.get(attribute_name, "")
+ if hed_schema.library != library:
+ issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
+ tag_entry.name,
+ library)
+ return issues
diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py
index c75c11de3..1a68baf89 100644
--- a/hed/schema/schema_compliance.py
+++ b/hed/schema/schema_compliance.py
@@ -45,27 +45,20 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl
class SchemaValidator:
"""Validator class to wrap some code. In general, just call check_compliance."""
attribute_validators = {
- HedKey.SuggestedTag: [(schema_attribute_validators.tag_exists_check,
- SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID)],
- HedKey.RelatedTag: [(schema_attribute_validators.tag_exists_check,
- SchemaAttributeErrors.SCHEMA_RELATED_TAG_INVALID)],
- HedKey.UnitClass: [(schema_attribute_validators.tag_is_placeholder_check,
- SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS),
- (schema_attribute_validators.unit_class_exists,
- SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID)],
- HedKey.ValueClass: [(schema_attribute_validators.tag_is_placeholder_check,
- SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS),
- (schema_attribute_validators.value_class_exists,
- SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID)],
+ HedKey.SuggestedTag: [schema_attribute_validators.tag_exists_check],
+ HedKey.RelatedTag: [schema_attribute_validators.tag_exists_check],
+ HedKey.UnitClass: [schema_attribute_validators.tag_is_placeholder_check,
+ schema_attribute_validators.unit_class_exists],
+ HedKey.ValueClass: [schema_attribute_validators.tag_is_placeholder_check,
+ schema_attribute_validators.value_class_exists],
# Rooted tag is implicitly verified on loading
- # HedKey.Rooted: [(schema_attribute_validators.tag_exists_base_schema_check,
- # SchemaAttributeErrors.SCHEMA_ROOTED_TAG_INVALID)],
- HedKey.DeprecatedFrom: [(schema_attribute_validators.tag_is_deprecated_check,
- SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)],
- HedKey.TakesValue: [(schema_attribute_validators.tag_is_placeholder_check,
- SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS)],
- HedKey.DefaultUnits: [(schema_attribute_validators.unit_exists,
- SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID)]
+ # HedKey.Rooted: [schema_attribute_validators.tag_exists_base_schema_check],
+ HedKey.DeprecatedFrom: [schema_attribute_validators.tag_is_deprecated_check],
+ HedKey.TakesValue: [schema_attribute_validators.tag_is_placeholder_check],
+ HedKey.DefaultUnits: [schema_attribute_validators.unit_exists],
+ HedKey.ConversionFactor: [schema_attribute_validators.conversion_factor],
+ HedKey.AllowedCharacter: [schema_attribute_validators.allowed_characters_check],
+ HedKey.InLibrary: [schema_attribute_validators.in_library_check]
}
def __init__(self, hed_schema, check_for_warnings=True, error_handler=None):
@@ -80,7 +73,7 @@ def check_unknown_attributes(self):
if unknown_attributes:
for attribute_name, source_tags in unknown_attributes.items():
for tag in source_tags:
- issues_list += self.error_handler.format_error_with_context(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID,
+ issues_list += self.error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID,
attribute_name,
source_tag=tag)
return issues_list
@@ -93,16 +86,14 @@ def check_attributes(self):
for tag_entry in self.hed_schema[section_key].values():
self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, tag_entry.name)
for attribute_name in tag_entry.attributes:
- validators = self.attribute_validators.get(attribute_name, None)
- if validators:
- for validator, error_code in validators:
- self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
- new_issues = validator(self.hed_schema, tag_entry, attribute_name)
- for issue in new_issues:
- issue['code'] = error_code
- issue['severity'] = ErrorSeverity.WARNING
- self.error_handler.add_context_and_filter(new_issues)
- issues_list += new_issues
+ validators = self.attribute_validators.get(attribute_name, [])
+ for validator in validators:
+ self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
+ new_issues = validator(self.hed_schema, tag_entry, attribute_name)
+ for issue in new_issues:
+ issue['severity'] = ErrorSeverity.WARNING
+ self.error_handler.add_context_and_filter(new_issues)
+ issues_list += new_issues
self.error_handler.pop_error_context()
self.error_handler.pop_error_context()
self.error_handler.pop_error_context()
diff --git a/hed/schema/schema_io/schema2base.py b/hed/schema/schema_io/schema2base.py
index e373cf1a1..d9d082a15 100644
--- a/hed/schema/schema_io/schema2base.py
+++ b/hed/schema/schema_io/schema2base.py
@@ -106,9 +106,6 @@ def _output_tags(self, tags):
self._end_tag_section()
def _output_units(self, unit_classes):
- if not unit_classes:
- return
-
section_node = self._start_section(HedSectionKey.UnitClasses)
for unit_class_entry in unit_classes.values():
@@ -128,8 +125,6 @@ def _output_units(self, unit_classes):
self._write_entry(unit_entry, unit_class_node)
def _output_section(self, hed_schema, key_class):
- if not hed_schema[key_class]:
- return
parent_node = self._start_section(key_class)
for entry in hed_schema[key_class].values():
if self._should_skip(entry):
diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py
index a02f9ed6e..de18f9d6a 100644
--- a/hed/schema/schema_io/wiki2schema.py
+++ b/hed/schema/schema_io/wiki2schema.py
@@ -22,12 +22,19 @@
no_wiki_end_tag = ''
-ErrorsBySection = {
- HedWikiSection.Schema: HedExceptions.SCHEMA_START_MISSING,
- HedWikiSection.EndSchema: HedExceptions.SCHEMA_END_INVALID,
- HedWikiSection.EndHed: HedExceptions.HED_END_INVALID
-}
-required_sections = [HedWikiSection.Schema, HedWikiSection.EndSchema, HedWikiSection.EndHed]
+
+required_sections = [
+ HedWikiSection.Prologue,
+ HedWikiSection.Schema,
+ HedWikiSection.EndSchema,
+ HedWikiSection.UnitsClasses,
+ HedWikiSection.UnitModifiers,
+ HedWikiSection.ValueClasses,
+ HedWikiSection.Attributes,
+ HedWikiSection.Properties,
+ HedWikiSection.Epilogue,
+ HedWikiSection.EndHed,
+]
class SchemaLoaderWiki(SchemaLoader):
@@ -79,15 +86,13 @@ def _parse_data(self):
# Validate we didn't miss any required sections.
for section in required_sections:
if section not in wiki_lines_by_section:
- error_code = HedExceptions.INVALID_SECTION_SEPARATOR
- if section in ErrorsBySection:
- error_code = ErrorsBySection[section]
+ error_code = HedExceptions.SCHEMA_SECTION_MISSING
msg = f"Required section separator '{SectionNames[section]}' not found in file"
raise HedFileError(error_code, msg, filename=self.filename)
if self.fatal_errors:
self.fatal_errors = error_reporter.sort_issues(self.fatal_errors)
- raise HedFileError(HedExceptions.HED_WIKI_DELIMITERS_INVALID,
+ raise HedFileError(self.fatal_errors[0]['code'],
f"{len(self.fatal_errors)} issues found when parsing schema. See the .issues "
f"parameter on this exception for more details.", self.filename,
issues=self.fatal_errors)
@@ -109,7 +114,7 @@ def _read_header_section(self, lines):
for line_number, line in lines:
if line.strip():
msg = f"Extra content [{line}] between HED line and other sections"
- raise HedFileError(HedExceptions.HED_SCHEMA_HEADER_INVALID, msg, filename=self.filename)
+ raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename)
def _read_text_block(self, lines):
text = ""
@@ -163,7 +168,8 @@ def _read_schema(self, lines):
parent_tags = parent_tags[:level]
elif level > len(parent_tags):
self._add_fatal_error(line_number, line,
- "Line has too many *'s at the front. You cannot skip a level.")
+ "Line has too many *'s at the front. You cannot skip a level."
+ , HedExceptions.WIKI_LINE_START_INVALID)
continue
# Create the entry
tag_entry = self._add_tag_line(parent_tags, line_number, line)
@@ -261,14 +267,37 @@ def _get_header_attributes_internal(self, version_line):
if "=" not in version_line:
return self._get_header_attributes_internal_old(version_line)
- final_attributes = {}
+ attributes, malformed = self._parse_attributes_line(version_line)
+
+ for m in malformed:
+ # todo: May shift this at some point to report all errors
+ raise HedFileError(code=HedExceptions.SCHEMA_HEADER_INVALID,
+ message=f"Header line has a malformed attribute {m}",
+ filename=self.filename)
+ return attributes
+
+ @staticmethod
+ def _parse_attributes_line(version_line):
+ matches = {}
+ unmatched = []
+ last_end = 0
for match in attr_re.finditer(version_line):
- attr_name = match.group(1)
- attr_value = match.group(2)
- final_attributes[attr_name] = attr_value
+ start, end = match.span()
- return final_attributes
+ # If there's unmatched content between the last match and the current one
+ if start > last_end:
+ unmatched.append(version_line[last_end:start])
+
+ matches[match.group(1)] = match.group(2)
+ last_end = end
+
+ # If there's unmatched content after the last match
+ if last_end < len(version_line):
+ unmatched.append(version_line[last_end:])
+
+ unmatched = [m.strip() for m in unmatched if m.strip()]
+ return matches, unmatched
def _get_header_attributes_internal_old(self, version_line):
""" Extracts all valid attributes like version from the HED line in .mediawiki format.
@@ -288,7 +317,7 @@ def _get_header_attributes_internal_old(self, version_line):
divider_index = pair.find(':')
if divider_index == -1:
msg = f"Found poorly matched key:value pair in header: {pair}"
- raise HedFileError(HedExceptions.HED_SCHEMA_HEADER_INVALID, msg, filename=self.filename)
+ raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename)
key, value = pair[:divider_index], pair[divider_index + 1:]
key = key.strip()
value = value.strip()
@@ -369,10 +398,17 @@ def _get_tag_name(self, tag_line):
return None, 0
@staticmethod
- def _get_tag_attributes(tag_line, starting_index):
+ def _validate_attribute_string(attribute_string):
+ pattern = r'^[A-Za-z]+(=.+)?$'
+ match = re.fullmatch(pattern, attribute_string)
+ if match:
+ return match.group()
+
+ def _get_tag_attributes(self, line_number, tag_line, starting_index):
""" Get the tag attributes from a line.
Parameters:
+ line_number (int): The line number to report errors as
tag_line (str): A tag line.
starting_index (int): The first index we can check for the brackets.
@@ -386,11 +422,14 @@ def _get_tag_attributes(tag_line, starting_index):
return None, starting_index
if attr_string:
attributes_split = [x.strip() for x in attr_string.split(',')]
- # Filter out attributes with spaces.
- attributes_split = [a for a in attributes_split if " " not in a]
final_attributes = {}
for attribute in attributes_split:
+ if self._validate_attribute_string(attribute) is None:
+ self._add_fatal_error(line_number, tag_line,
+ f"Malformed attribute found {attribute}. "
+ f"Valid formatting is: attribute, or attribute=\"value\".")
+ continue
split_attribute = attribute.split("=")
if len(split_attribute) == 1:
final_attributes[split_attribute[0]] = True
@@ -468,7 +507,7 @@ def _create_entry(self, line_number, tag_line, key_class, element_name=None):
if element_name:
node_name = element_name
- node_attributes, index = self._get_tag_attributes(tag_line, index)
+ node_attributes, index = self._get_tag_attributes(line_number, tag_line, index)
if node_attributes is None:
self._add_fatal_error(line_number, tag_line, "Attributes has mismatched delimiters")
return
@@ -489,7 +528,7 @@ def _create_entry(self, line_number, tag_line, key_class, element_name=None):
return tag_entry
def _add_fatal_error(self, line_number, line, warning_message="Schema term is empty or the line is malformed",
- error_code=HedExceptions.HED_WIKI_DELIMITERS_INVALID):
+ error_code=HedExceptions.WIKI_DELIMITERS_INVALID):
self.fatal_errors.append(
{'code': error_code,
ErrorContext.ROW: line_number,
@@ -504,14 +543,12 @@ def _check_for_new_section(self, line, strings_for_section, current_section):
if line.startswith(section_string):
if key in strings_for_section:
msg = f"Found section {SectionNames[key]} twice"
- raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR,
+ raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID,
msg, filename=self.filename)
if current_section < key:
new_section = key
else:
- error_code = HedExceptions.INVALID_SECTION_SEPARATOR
- if key in ErrorsBySection:
- error_code = ErrorsBySection[key]
+ error_code = HedExceptions.SCHEMA_SECTION_MISSING
msg = f"Found section {SectionNames[key]} out of order in file"
raise HedFileError(error_code, msg, filename=self.filename)
break
@@ -520,11 +557,11 @@ def _check_for_new_section(self, line, strings_for_section, current_section):
def _handle_bad_section_sep(self, line, current_section):
if current_section != HedWikiSection.Schema and line.startswith(wiki_constants.ROOT_TAG):
msg = f"Invalid section separator '{line.strip()}'"
- raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, msg, filename=self.filename)
+ raise HedFileError(HedExceptions.SCHEMA_SECTION_MISSING, msg, filename=self.filename)
if line.startswith("!#"):
msg = f"Invalid section separator '{line.strip()}'"
- raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, msg, filename=self.filename)
+ raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, msg, filename=self.filename)
def _split_lines_into_sections(self, wiki_lines):
""" Takes a list of lines, and splits it into valid wiki sections.
diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py
index 8404970e7..25b27ab8c 100644
--- a/hed/schema/schema_validation_util.py
+++ b/hed/schema/schema_validation_util.py
@@ -4,6 +4,7 @@
from hed.errors import ErrorHandler, SchemaWarnings
from hed.schema import hed_schema_constants as constants
from hed.errors.exceptions import HedExceptions, HedFileError
+from hed.schema.hed_schema_constants import valid_header_attributes
ALLOWED_TAG_CHARS = "-"
ALLOWED_DESC_CHARS = "-_:;,./()+ ^"
@@ -45,9 +46,9 @@ def validate_version_string(version_string):
header_attribute_validators = {
- constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.HED_SCHEMA_VERSION_INVALID),
- constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME)
- }
+ constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.SCHEMA_VERSION_INVALID),
+ constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME)
+}
def validate_present_attributes(attrib_dict, filename):
@@ -92,9 +93,12 @@ def validate_attributes(attrib_dict, filename):
had_error = validator(attribute_value)
if had_error:
raise HedFileError(error_code, had_error, filename)
+ if attribute_name not in valid_header_attributes:
+ raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE,
+ f"Unknown attribute {attribute_name} found in header line", filename=filename)
if constants.VERSION_ATTRIBUTE not in attrib_dict:
- raise HedFileError(HedExceptions.HED_SCHEMA_VERSION_INVALID,
+ raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID,
"No version attribute found in header", filename=filename)
diff --git a/hed/tools/remodeling/backup_manager.py b/hed/tools/remodeling/backup_manager.py
index ac18f2f0b..75c6f4f1a 100644
--- a/hed/tools/remodeling/backup_manager.py
+++ b/hed/tools/remodeling/backup_manager.py
@@ -10,7 +10,7 @@
class BackupManager:
DEFAULT_BACKUP_NAME = 'default_back'
- RELATIVE_BACKUP_LOCATION = 'derivatives/remodel'
+ RELATIVE_BACKUP_LOCATION = './derivatives/remodel/backups'
BACKUP_DICTIONARY = 'backup_lock.json'
BACKUP_ROOT = 'backup_root'
@@ -24,14 +24,15 @@ def __init__(self, data_root, backups_root=None):
:raises HedFileError:
- If the data_root does not correspond to a real directory.
+ Notes: The backup_root will have remodeling/backups appended.
"""
if not os.path.isdir(data_root):
raise HedFileError('NonExistentData', f"{data_root} is not an existing directory", "")
self.data_root = data_root
if backups_root:
- self.backups_path = os.path.join(backups_root, 'backups')
+ self.backups_path = backups_root
else:
- self.backups_path = os.path.join(data_root, self.RELATIVE_BACKUP_LOCATION, 'backups')
+ self.backups_path = os.path.join(data_root, self.RELATIVE_BACKUP_LOCATION)
self.backups_path = os.path.realpath(self.backups_path)
os.makedirs(self.backups_path, exist_ok=True)
self.backups_dict = self._get_backups()
diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py
index 45fce71ae..32af02eaf 100644
--- a/hed/tools/remodeling/cli/run_remodel.py
+++ b/hed/tools/remodeling/cli/run_remodel.py
@@ -20,6 +20,10 @@ def get_parser():
parser = argparse.ArgumentParser(description="Converts event files based on a json file specifying operations.")
parser.add_argument("data_dir", help="Full path of dataset root directory.")
parser.add_argument("model_path", help="Full path of the file with remodeling instructions.")
+ parser.add_argument("-bd", "--backup_dir", default="", dest="backup_dir",
+ help="Directory for the backup that is being created")
+ parser.add_argument("-bn", "--backup_name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
+ help="Name of the default backup for remodeling")
parser.add_argument("-b", "--bids-format", action='store_true', dest="use_bids",
help="If present, the dataset is in BIDS format with sidecars. HED analysis is available.")
parser.add_argument("-e", "--extensions", nargs="*", default=['.tsv'], dest="extensions",
@@ -31,8 +35,8 @@ def get_parser():
help="Controls individual file summaries ('none', 'separate', 'consolidated')")
parser.add_argument("-j", "--json-sidecar", dest="json_sidecar", nargs="?",
help="Optional path to JSON sidecar with HED information")
- parser.add_argument("-n", "--backup-name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
- help="Name of the default backup for remodeling")
+# parser.add_argument("-n", "--backup-name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
+# help="Name of the default backup for remodeling")
parser.add_argument("-nb", "--no-backup", action='store_true', dest="no_backup",
help="If present, the operations are run directly on the files with no backup.")
parser.add_argument("-ns", "--no-summaries", action='store_true', dest="no_summaries",
@@ -142,21 +146,21 @@ def run_bids_ops(dispatch, args, tabular_files):
dispatch.hed_schema = bids.schema
if args.verbose:
print(f"Successfully parsed BIDS dataset with HED schema {str(bids.schema.get_schema_versions())}")
- events = bids.get_tabular_group(args.file_suffix)
+ data = bids.get_tabular_group(args.file_suffix)
if args.verbose:
print(f"Processing {dispatch.data_root}")
- filtered_events = [events.datafile_dict[key] for key in tabular_files]
- for events_obj in filtered_events:
- sidecar_list = events.get_sidecars_from_path(events_obj)
+ filtered_events = [data.datafile_dict[key] for key in tabular_files]
+ for data_obj in filtered_events:
+ sidecar_list = data.get_sidecars_from_path(data_obj)
if sidecar_list:
- sidecar = events.sidecar_dict[sidecar_list[-1]].contents
+ sidecar = data.sidecar_dict[sidecar_list[-1]].contents
else:
sidecar = None
if args.verbose:
- print(f"Events {events_obj.file_path} sidecar {sidecar}")
- df = dispatch.run_operations(events_obj.file_path, sidecar=sidecar, verbose=args.verbose)
+ print(f"Tabular file {data_obj.file_path} sidecar {sidecar}")
+ df = dispatch.run_operations(data_obj.file_path, sidecar=sidecar, verbose=args.verbose)
if not args.no_update:
- df.to_csv(events_obj.file_path, sep='\t', index=False, header=True)
+ df.to_csv(data_obj.file_path, sep='\t', index=False, header=True)
def run_direct_ops(dispatch, args, tabular_files):
@@ -176,6 +180,8 @@ def run_direct_ops(dispatch, args, tabular_files):
else:
sidecar = None
for file_path in tabular_files:
+ if args.verbose:
+ print(f"Tabular file {file_path} sidecar {sidecar}")
df = dispatch.run_operations(file_path, verbose=args.verbose, sidecar=sidecar)
if not args.no_update:
df.to_csv(file_path, sep='\t', index=False, header=True)
diff --git a/hed/tools/remodeling/cli/run_remodel_backup.py b/hed/tools/remodeling/cli/run_remodel_backup.py
index 6d78465dd..3754a15da 100644
--- a/hed/tools/remodeling/cli/run_remodel_backup.py
+++ b/hed/tools/remodeling/cli/run_remodel_backup.py
@@ -1,5 +1,6 @@
""" Command-line program for creating a backup. """
+import os
import argparse
from hed.errors.exceptions import HedFileError
from hed.tools.util.io_util import get_file_list, get_filtered_by_element
@@ -15,21 +16,18 @@ def get_parser():
"""
parser = argparse.ArgumentParser(description="Creates a backup for the remodeling process.")
parser.add_argument("data_dir", help="Full path of dataset root directory.")
+ parser.add_argument("-bd", "--backup_dir", default="", dest="backup_dir",
+ help="Directory for the backup that is being created")
+ parser.add_argument("-bn", "--backup_name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
+ help="Name of the default backup for remodeling")
parser.add_argument("-e", "--extensions", nargs="*", default=['.tsv'], dest="extensions",
help="File extensions to allow in locating files. A * indicates all files allowed.")
parser.add_argument("-f", "--file-suffix", dest="file_suffix", nargs="*", default=['events'],
help="Filename suffix of files to be backed up. A * indicates all files allowed.")
- parser.add_argument("-n", "--backup_name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
- help="Name of the default backup for remodeling")
- parser.add_argument("-p", "--path-work", default="", dest="path_work",
- help="The root path for remodeling work if given, " +
- "otherwise [data_root]/derivatives/remodel is used.")
+
parser.add_argument("-t", "--task-names", dest="task_names", nargs="*", default=[], help="The name of the task.")
parser.add_argument("-v", "--verbose", action='store_true',
help="If present, output informative messages as computation progresses.")
- parser.add_argument("-w", "--work-dir", default="", dest="work_dir",
- help="If given, is the path to directory for saving, " +
- "otherwise [data_root]derivatives/remodel is used.")
parser.add_argument("-x", "--exclude-dirs", nargs="*", default=['derivatives'], dest="exclude_dirs",
help="Directories names to exclude from search for files. " +
"If omitted, no directories except the backup directory will be excluded." +
@@ -60,8 +58,8 @@ def main(arg_list=None):
exclude_dirs=exclude_dirs)
if args.task_names:
file_list = get_filtered_by_element(file_list, args.task_names)
- if args.work_dir:
- backups_root = args.work_dir
+ if args.backup_dir:
+ backups_root = args.backup_dir
else:
backups_root = None
backup_man = BackupManager(args.data_dir, backups_root=backups_root)
diff --git a/hed/tools/remodeling/cli/run_remodel_restore.py b/hed/tools/remodeling/cli/run_remodel_restore.py
index 960bd0916..72ba0c3c4 100644
--- a/hed/tools/remodeling/cli/run_remodel_restore.py
+++ b/hed/tools/remodeling/cli/run_remodel_restore.py
@@ -14,15 +14,13 @@ def get_parser():
"""
parser = argparse.ArgumentParser(description="Restores the backup files for the original data.")
parser.add_argument("data_dir", help="Full path of dataset root directory.")
- parser.add_argument("-n", "--backup_name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
+ parser.add_argument("-bd", "--backup_dir", default="", dest="backup_dir",
+ help="Directory for the backup that is being created")
+ parser.add_argument("-bn", "--backup_name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
help="Name of the default backup for remodeling")
-
parser.add_argument("-t", "--task-names", dest="task_names", nargs="*", default=[], help="The names of the task.")
parser.add_argument("-v", "--verbose", action='store_true',
help="If present, output informative messages as computation progresses.")
- parser.add_argument("-w", "--work_dir", default="", dest="work_dir",
- help="The root path for remodeling work if given, " +
- "otherwise [data_root]/derivatives/remodel is used.")
return parser
@@ -39,8 +37,8 @@ def main(arg_list=None):
"""
parser = get_parser()
args = parser.parse_args(arg_list)
- if args.work_dir:
- backups_root = args.work_dir
+ if args.backup_dir:
+ backups_root = args.backup_dir
else:
backups_root = None
backup_man = BackupManager(args.data_dir, backups_root=backups_root)
diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py
index 267a7039a..6901b6ce5 100644
--- a/hed/tools/remodeling/operations/remove_columns_op.py
+++ b/hed/tools/remodeling/operations/remove_columns_op.py
@@ -6,8 +6,8 @@ class RemoveColumnsOp(BaseOp):
""" Remove columns from a tabular file.
Required remodeling parameters:
- - **remove_names** (*list*): The names of the columns to be removed.
- - **ignore_missing** (*boolean*): If true, names in remove_names that are not columns in df should be ignored.
+ - **column_names** (*list*): The names of the columns to be removed.
+ - **ignore_missing** (*boolean*): If true, names in column_names that are not columns in df should be ignored.
"""
diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py
index 2a2f275a9..0a3329b0c 100644
--- a/hed/tools/remodeling/operations/rename_columns_op.py
+++ b/hed/tools/remodeling/operations/rename_columns_op.py
@@ -8,7 +8,7 @@ class RenameColumnsOp (BaseOp):
Required remodeling parameters:
- **column_mapping** (*dict*): The names of the columns to be removed.
- - **ignore_missing** (*bool*): If true, the names in remove_names that are not columns and should be ignored.
+ - **ignore_missing** (*bool*): If true, the names in column_mapping that are not columns and should be ignored.
"""
diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
index ffef53fb7..6e98abbed 100644
--- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
@@ -87,6 +87,8 @@ def do_op(self, dispatcher, df, name, sidecar=None):
if not summary:
summary = HedTagSummary(self)
dispatcher.summary_dicts[self.summary_name] = summary
+ x = {'df': dispatcher.post_proc_data(df_new), 'name': name,
+ 'schema': dispatcher.hed_schema, 'sidecar': sidecar}
summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name,
'schema': dispatcher.hed_schema, 'sidecar': sidecar})
return df_new
diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py
index cd3fc936b..a2948eb8b 100644
--- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py
@@ -1,7 +1,7 @@
""" Validate the HED tags in a dataset and report errors. """
import os
-from hed.errors import ErrorSeverity, ErrorHandler
+from hed.errors import ErrorSeverity, ErrorHandler, get_printable_issue_string
from hed.models.sidecar import Sidecar
from hed.models.tabular_input import TabularInput
from hed.tools.remodeling.operations.base_op import BaseOp
@@ -110,9 +110,9 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT):
else:
sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], indent=indent*2)
if specifics['sidecar_had_issues']:
- sum_list = sum_list + self.get_error_list(specifics['event_issues'], count_only=False, indent=indent*2)
+ sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], count_only=False, indent=indent*2)
else:
- sum_list = sum_list + [f"{indent*2}Event file validation was incomplete because of sidecar errors"]
+ sum_list = sum_list + self.get_error_list(specifics['event_issues'], count_only=False, indent=indent*2)
return "\n".join(sum_list)
def update_summary(self, new_info):
@@ -134,6 +134,7 @@ def update_summary(self, new_info):
issues = input_data.validate(new_info['schema'])
if not self.check_for_warnings:
issues = ErrorHandler.filter_issues_by_severity(issues, ErrorSeverity.ERROR)
+ issues = [get_printable_issue_string([issue], skip_filename=True) for issue in issues]
results['event_issues'][new_info["name"]] = issues
results['total_event_issues'] = len(issues)
self.summary_dict[new_info["name"]] = results
@@ -197,13 +198,15 @@ def get_error_list(error_dict, count_only=False, indent=BaseSummary.DISPLAY_INDE
error_list = []
for key, item in error_dict.items():
if count_only and isinstance(item, list):
- error_list.append(f"{indent}{key}: {len(item)} issues")
+ error_list.append(f"{key}: {len(item)} issues")
elif count_only:
- error_list.append(f"{indent}{key}: {item} issues")
+ error_list.append(f"{key}: {item} issues")
elif not len(item):
- error_list.append(f"{indent}{key} has no issues")
+ error_list.append(f"{key} has no issues")
else:
- HedValidationSummary._format_errors(error_list, key, item, indent)
+ error_list.append(f"{key}:")
+ error_list = error_list + item
+ #HedValidationSummary._format_errors(error_list, key, item, indent)
return error_list
@staticmethod
@@ -246,6 +249,7 @@ def _get_sidecar_results(sidecar, new_info, check_for_warnings):
results["sidecar_had_issues"] = True
if not check_for_warnings:
sidecar_issues = filtered_issues
- results['sidecar_issues'][sidecar.name] = sidecar_issues
+ str_issues = [get_printable_issue_string([issue], skip_filename=True) for issue in sidecar_issues]
+ results['sidecar_issues'][sidecar.name] = str_issues
results['total_sidecar_issues'] = len(sidecar_issues)
return results
diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py
index ebd647953..751af9612 100644
--- a/hed/validator/spreadsheet_validator.py
+++ b/hed/validator/spreadsheet_validator.py
@@ -45,23 +45,28 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
self._onset_validator = OnsetValidator()
onset_filtered = None
+ # Adjust to account for 1 based
+ row_adj = 1
if isinstance(data, BaseInput):
- issues += self._validate_column_structure(data, error_handler)
+ # Adjust to account for column names
+ if data.has_column_names:
+ row_adj += 1
+ issues += self._validate_column_structure(data, error_handler, row_adj)
onset_filtered = data.series_filtered
data = data.dataframe_a
# Check the rows of the input data
- issues += self._run_checks(data, onset_filtered, error_handler=error_handler)
+ issues += self._run_checks(data, onset_filtered, error_handler=error_handler, row_adj=row_adj)
error_handler.pop_error_context()
issues = sort_issues(issues)
return issues
- def _run_checks(self, hed_df, onset_filtered, error_handler):
+ def _run_checks(self, hed_df, onset_filtered, error_handler, row_adj):
issues = []
columns = list(hed_df.columns)
for row_number, text_file_row in enumerate(hed_df.itertuples(index=False)):
- error_handler.push_error_context(ErrorContext.ROW, row_number)
+ error_handler.push_error_context(ErrorContext.ROW, row_number + row_adj)
row_strings = []
new_column_issues = []
for column_number, cell in enumerate(text_file_row):
@@ -100,13 +105,14 @@ def _run_checks(self, hed_df, onset_filtered, error_handler):
error_handler.pop_error_context()
return issues
- def _validate_column_structure(self, base_input, error_handler):
+ def _validate_column_structure(self, base_input, error_handler, row_adj):
"""
Validate that each column in the input data has valid values.
Parameters:
base_input (BaseInput): The input data to be validated.
error_handler (ErrorHandler): Holds context
+ row_adj(int): Number to adjust row by for reporting errors
Returns:
List of issues associated with each invalid value. Each issue is a dictionary.
"""
@@ -120,7 +126,7 @@ def _validate_column_structure(self, base_input, error_handler):
valid_keys = column.hed_dict.keys()
for row_number, value in enumerate(base_input.dataframe[column.column_name]):
if value != "n/a" and value not in valid_keys:
- error_handler.push_error_context(ErrorContext.ROW, row_number)
+ error_handler.push_error_context(ErrorContext.ROW, row_number + row_adj)
issues += error_handler.format_error_with_context(ValidationErrors.SIDECAR_KEY_MISSING,
invalid_key=value,
category_keys=list(valid_keys))
diff --git a/pyproject.toml b/pyproject.toml
index 8bc13baf6..7ba509cc0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,7 @@
[build-system]
requires = ["setuptools>=42", "versioneer-518"]
build-backend = "setuptools.build_meta"
+
+[tool.codespell]
+skip = '*.git,*.pdf,*.xml,*.mediawiki,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests'
+ignore-words-list = 'te,parms'
diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification
index c47fff949..7f3868bdf 160000
--- a/spec_tests/hed-specification
+++ b/spec_tests/hed-specification
@@ -1 +1 @@
-Subproject commit c47fff949db70c9105c875bbdfdf0d11389ffd68
+Subproject commit 7f3868bdff542db5660e2bd1a3d0d565e3793502
diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py
index 972d53d4d..3e87fdbda 100644
--- a/spec_tests/test_errors.py
+++ b/spec_tests/test_errors.py
@@ -12,55 +12,11 @@
from hed.errors import ErrorHandler, get_printable_issue_string
-# To be removed eventually once all errors are being verified.
-known_errors = [
- 'SIDECAR_INVALID',
- 'CHARACTER_INVALID',
- 'COMMA_MISSING',
- "DEF_EXPAND_INVALID",
- "DEF_INVALID",
- "DEFINITION_INVALID",
- "NODE_NAME_EMPTY",
- "ONSET_OFFSET_INSET_ERROR",
- "PARENTHESES_MISMATCH",
- "PLACEHOLDER_INVALID",
- "REQUIRED_TAG_MISSING",
- "SIDECAR_INVALID",
- "SIDECAR_KEY_MISSING",
- "STYLE_WARNING",
- "TAG_EMPTY",
- "TAG_EXPRESSION_REPEATED",
- "TAG_EXTENDED",
- "TAG_EXTENSION_INVALID",
- "TAG_GROUP_ERROR",
- "TAG_INVALID",
- "TAG_NOT_UNIQUE",
- "TAG_NAMESPACE_PREFIX_INVALID",
- "TAG_REQUIRES_CHILD",
- "TILDES_UNSUPPORTED",
- "UNITS_INVALID",
- "UNITS_MISSING",
- "VALUE_INVALID",
-
- "SIDECAR_BRACES_INVALID",
- "SCHEMA_LIBRARY_INVALID",
-
- "SCHEMA_ATTRIBUTE_INVALID",
- "SCHEMA_UNIT_CLASS_INVALID",
- "SCHEMA_VALUE_CLASS_INVALID",
- "SCHEMA_DEPRECATED_INVALID",
- "SCHEMA_SUGGESTED_TAG_INVALID",
- "SCHEMA_RELATED_TAG_INVALID",
- "SCHEMA_NON_PLACEHOLDER_HAS_CLASS",
- "SCHEMA_DEFAULT_UNITS_INVALID"
-]
-
skip_tests = {
"VERSION_DEPRECATED": "Not applicable",
"tag-extension-invalid-bad-node-name": "Part of character invalid checking/didn't get to it yet",
}
-
class MyTestCase(unittest.TestCase):
@classmethod
def setUpClass(cls):
@@ -80,9 +36,7 @@ def run_single_test(self, test_file):
test_info = json.load(fp)
for info in test_info:
error_code = info['error_code']
- verify_code = False
- if error_code in known_errors:
- verify_code = True
+ verify_code = True
# To be deprecated once we add this to all tests
self._verify_code = verify_code
if error_code in skip_tests:
@@ -93,6 +47,8 @@ def run_single_test(self, test_file):
print(f"Skipping {name} test because: {skip_tests[name]}")
continue
+ # if name != "attribute-invalid-in-library":
+ # continue
description = info['description']
schema = info['schema']
check_for_warnings = info.get("warning", False)
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki
index a596775c1..e22463350 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki
@@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki
index 672792aa8..2b76a3a44 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki
@@ -17,6 +17,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki
index d5e6cf444..678a62496 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki
@@ -13,6 +13,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki
index 979f72bde..037c9bc7a 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki
@@ -13,6 +13,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki
index 3438be07c..f79d8361c 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki
@@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki
index 267a214e6..80454ef42 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki
@@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki
index ee20104a2..d3368e375 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki
@@ -33,6 +33,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
'''Unit classes'''
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
'''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki
index 8b3a3a864..641447086 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki
@@ -32,6 +32,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
'''Unit classes'''
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki
index 7939dfd95..f8bccd4d0 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki
@@ -32,6 +32,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
'''Unit classes'''
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki
index 4a084ebd2..eb283125c 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki
@@ -33,6 +33,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
'''Unit classes'''
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki
index f282aabba..289265f88 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki
@@ -34,6 +34,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
* weightUnits {defaultUnits=testUnit}
** testUnit {conversionFactor=100}
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
'''Epilogue'''
The Standardized Computer-based Organized Reporting of EEG (SCORE) is a standard terminology for scalp EEG data assessment designed for use in clinical practice that may also be used for research purposes.
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki
index b7c4d5aa7..ac67b8feb 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki
@@ -34,6 +34,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
* weightUnitsNew {defaultUnits=g}
** g {conversionFactor=100}
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
'''Epilogue'''
The Standardized Computer-based Organized Reporting of EEG (SCORE) is a standard terminology for scalp EEG data assessment designed for use in clinical practice that may also be used for research purposes.
diff --git a/tests/data/schema_tests/merge_tests/sorted_root.mediawiki b/tests/data/schema_tests/merge_tests/sorted_root.mediawiki
index d5e31f3b2..6536476c5 100644
--- a/tests/data/schema_tests/merge_tests/sorted_root.mediawiki
+++ b/tests/data/schema_tests/merge_tests/sorted_root.mediawiki
@@ -44,6 +44,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/wiki_tests/HED_default.mediawiki b/tests/data/schema_tests/wiki_tests/HED_default.mediawiki
index 049260f12..4327c6a48 100644
--- a/tests/data/schema_tests/wiki_tests/HED_default.mediawiki
+++ b/tests/data/schema_tests/wiki_tests/HED_default.mediawiki
@@ -1,6 +1,6 @@
HED version:8.0.0-alpha.1
-
+'''Prologue'''
This is a prologue line.
This is a second prologue line.
@@ -1098,7 +1098,15 @@ This is a second prologue line.
* z {SIUnitSymbolModifier} [SI unit submultiple representing 10^-21]
* yocto {SIUnitModifier} [SI unit submultiple representing 10^-24]
* y {SIUnitSymbolModifier} [SI unit submultiple representing 10^-24]
-!# end hed
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
+'''Epilogue'''
This is an epilogue.
-This is a second line of an epilogue.
\ No newline at end of file
+This is a second line of an epilogue.
+
+!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki b/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki
new file mode 100644
index 000000000..d2c398e33
--- /dev/null
+++ b/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki
@@ -0,0 +1,41 @@
+HED version="8.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://github.com/hed-standard/hed-specification/raw/master/hedxml/HED8.0.0.xsd"
+
+'''Prologue'''
+This schema tests AppendixB SCHEMA_ATTRIBUTE_INVALID
+
+!# start schema
+
+'''Tag1''' {suggestedTag=Tag1}[suggested tag is not registered in the schema]
+* Tag2 {valueClassAttribute}[value attribute is the wrong tag class]
+* Tag3 {unitAttribute}[unit attribute is the wrong tag class]
+
+!# end schema
+'''Unit classes'''
+* unitClass1 {unitAttribute}[Wrong attribute type]
+** unit1 {tagAttribute}[Wrong attribute type]
+
+'''Unit modifiers'''
+* mod1 {tagAttribute}[Wrong attribute type]
+
+'''Value classes'''
+* valueClass1 {tagAttribute}[Wrong attribute type]
+
+'''Schema attributes'''
+* tagAttribute
+* unitAttribute {unitProperty}
+* unitClassAttribute {unitClassProperty}
+* unitModifierAttribute {unitModifierProperty}
+* valueClassAttribute {valueClassProperty}
+* attribute1 {valueClassProperty}
+
+'''Properties'''
+* boolProperty
+* unitClassProperty
+* unitModifierProperty
+* unitProperty
+* valueClassProperty
+
+'''Epilogue'''
+This is an updated version of the schema format. The properties are now part of the schema. The schema attributes are designed to be checked in software rather than hard-coded. The schema attributes, themselves have properties.
+
+!# end hed
\ No newline at end of file
diff --git a/tests/data/validator_tests/bids_schema.mediawiki b/tests/data/validator_tests/bids_schema.mediawiki
index 971a9723b..b306003bd 100644
--- a/tests/data/validator_tests/bids_schema.mediawiki
+++ b/tests/data/validator_tests/bids_schema.mediawiki
@@ -1,5 +1,7 @@
HED version: 8.0.0-alpha.2
+'''Prologue'''
+
!# start schema
'''Event'''
@@ -1163,6 +1165,7 @@ HED version: 8.0.0-alpha.2
* yocto {SIUnitModifier} [SI unit submultiple representing 10^-24]
* y {SIUnitSymbolModifier} [SI unit submultiple representing 10^-24]
+'''Value classes'''
'''Schema attributes'''
* allowedCharacter {unitClassProperty}[An attribute of unit classes schema value placeholders indicating a special character that is allowed in expressing the value of that placeholder.]
@@ -1184,6 +1187,8 @@ HED version: 8.0.0-alpha.2
* unitSymbol {boolProperty, unitProperty}[Abbreviation or symbol representing a type of unit. Unit symbols represent both the singular and the plural and thus cannot be pluralized.]
* unitClass [Specifies the type of a unit for a tag.]
+'''Properties'''
+
'''Epilogue'''
This is the new format for the mediawiki schema
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
index f5b381eb3..02c7f34a1 100644
--- a/tests/models/test_base_input.py
+++ b/tests/models/test_base_input.py
@@ -304,25 +304,30 @@ def test_complex_onsets(self):
{3.5: [0, 1], 4.0: [2], 4.4: [3, 4], -1.0: [5]})
def test_empty_and_single_item_series(self):
- self.assertEqual(BaseInput._filter_by_index_list([], {}), [])
- self.assertEqual(BaseInput._filter_by_index_list(["apple"], {0: [0]}), ["apple"])
+ self.assertTrue(BaseInput._filter_by_index_list(pd.Series([], dtype=str), {}).equals(pd.Series([], dtype=str)))
+ self.assertTrue(BaseInput._filter_by_index_list(pd.Series(["apple"]), {0: [0]}).equals(pd.Series(["apple"])))
def test_two_item_series_with_same_onset(self):
- self.assertEqual(BaseInput._filter_by_index_list(["apple", "orange"], {0: [0, 1]}), ["apple,orange", "n/a"])
+ input_series = pd.Series(["apple", "orange"])
+ expected_series = pd.Series(["apple,orange", "n/a"])
+ self.assertTrue(BaseInput._filter_by_index_list(input_series, {0: [0, 1]}).equals(expected_series))
def test_multiple_item_series(self):
- original = ["apple", "orange", "banana", "mango"]
+ input_series = pd.Series(["apple", "orange", "banana", "mango"])
indexed_dict = {0: [0, 1], 1: [2], 2: [3]}
- self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), ["apple,orange", "n/a", "banana", "mango"])
+ expected_series = pd.Series(["apple,orange", "n/a", "banana", "mango"])
+ self.assertTrue(BaseInput._filter_by_index_list(input_series, indexed_dict).equals(expected_series))
def test_complex_scenarios(self):
# Test with negative, zero and positive onsets
- original = ["negative", "zero", "positive"]
+ original = pd.Series(["negative", "zero", "positive"])
indexed_dict = {-1: [0], 0: [1], 1: [2]}
- self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), ["negative", "zero", "positive"])
+ expected_series1 = pd.Series(["negative", "zero", "positive"])
+ self.assertTrue(BaseInput._filter_by_index_list(original, indexed_dict).equals(expected_series1))
# Test with more complex indexed_dict
- original = ["apple", "orange", "banana", "mango", "grape"]
- indexed_dict = {0: [0, 1], 1: [2], 2: [3, 4]}
- self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict),
- ["apple,orange", "n/a", "banana", "mango,grape", "n/a"])
+ original2 = ["apple", "orange", "banana", "mango", "grape"]
+ indexed_dict2= {0: [0, 1], 1: [2], 2: [3, 4]}
+ expected_series2 = pd.Series(["apple,orange", "n/a", "banana", "mango,grape", "n/a"])
+ self.assertTrue(BaseInput._filter_by_index_list(original2, indexed_dict2).equals(expected_series2))
+
diff --git a/tests/models/test_basic_search.py b/tests/models/test_basic_search.py
new file mode 100644
index 000000000..36fcc168d
--- /dev/null
+++ b/tests/models/test_basic_search.py
@@ -0,0 +1,340 @@
+import unittest
+import pandas as pd
+from hed import load_schema_version
+
+import os
+from hed import TabularInput
+from hed.models import df_util, basic_search
+from hed.models.basic_search import find_words, check_parentheses, reverse_and_flip_parentheses, \
+ construct_delimiter_map, verify_search_delimiters, find_matching
+import numpy as np
+
+
+class TestNewSearch(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+ '../data/bids_tests/eeg_ds003645s_hed'))
+ sidecar1_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
+ cls.events_path = os.path.realpath(
+ os.path.join(bids_root_path, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
+ cls.base_input = TabularInput(cls.events_path, sidecar1_path)
+ cls.schema = load_schema_version()
+ cls.df = cls.base_input.series_filtered
+
+ def test_find_matching_results(self):
+ result1 = basic_search.find_matching(self.df, "(Face, Item-interval/1)")
+ result2 = basic_search.find_matching(self.df, "(Face, Item-interval/1*)")
+
+ # Add assertions
+ self.assertTrue(np.sum(result1) > 0, "result1 should have some true values")
+ self.assertTrue(np.sum(result2) > 0, "result2 should have some true values")
+ self.assertTrue(np.sum(result1) < np.sum(result2), "result1 should have fewer true values than result2")
+
+
+class TestFindWords(unittest.TestCase):
+ def test_basic(self):
+ search_string = "@global (local1, local2)"
+ anywhere_words, _, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, ['global'])
+ self.assertEqual(specific_words, ['local1', 'local2'])
+
+ def test_no_anywhere_words(self):
+ search_string = "(local1, local2)"
+ anywhere_words, _, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, [])
+ self.assertEqual(specific_words, ['local1', 'local2'])
+
+ def test_no_specific_words(self):
+ search_string = "@global1, @global2"
+ anywhere_words, _, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, ['global1', 'global2'])
+ self.assertEqual(specific_words, [])
+
+ def test_empty_string(self):
+ search_string = ""
+ anywhere_words, _, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, [])
+ self.assertEqual(specific_words, [])
+
+ def test_mixed_words(self):
+ search_string = "@global (local1, local2), @another_global"
+ anywhere_words, _, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, ['global', 'another_global'])
+ self.assertEqual(specific_words, ['local1', 'local2'])
+
+ def test_whitespace(self):
+ search_string = " @Global , ( local1 , local2 ) "
+ anywhere_words, _, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, ['Global'])
+ self.assertEqual(specific_words, ['local1', 'local2'])
+
+
+class TestCheckParentheses(unittest.TestCase):
+ def test_balanced_parentheses(self):
+ self.assertEqual(check_parentheses("(())"), "")
+ self.assertEqual(check_parentheses("(someText())"), "")
+ self.assertEqual(check_parentheses("((some)text())"), "")
+ self.assertEqual(check_parentheses("()"), "")
+
+ def test_unbalanced_parentheses(self):
+ self.assertEqual(check_parentheses("(()"), "(")
+ self.assertEqual(check_parentheses("()someText("), "(")
+ self.assertEqual(check_parentheses("(text)text)"), ")")
+ self.assertEqual(check_parentheses("text)"), ")")
+
+ def test_mixed_parentheses(self):
+ self.assertEqual(check_parentheses("(()(())"), "(")
+ self.assertEqual(check_parentheses("(someText))((someText)"), ")(")
+ self.assertEqual(check_parentheses("((someText))someText"), "")
+ self.assertEqual(check_parentheses("(someText(someText))someText"), "")
+
+ def test_special_cases(self):
+ self.assertEqual(check_parentheses(""), "")
+ self.assertEqual(check_parentheses("abc"), "")
+ self.assertEqual(check_parentheses("((()))("), "(")
+ self.assertEqual(check_parentheses("text"), "")
+
+ def test_reverse_and_flip_parentheses(self):
+ self.assertEqual(reverse_and_flip_parentheses("(abc)"), "(cba)")
+ self.assertEqual(reverse_and_flip_parentheses("Hello()"), "()olleH")
+ self.assertEqual(reverse_and_flip_parentheses(")("), ")(")
+ self.assertEqual(reverse_and_flip_parentheses("((()))"), "((()))")
+ self.assertEqual(reverse_and_flip_parentheses("()()()"), "()()()")
+ self.assertEqual(reverse_and_flip_parentheses("abc"), "cba")
+ self.assertEqual(reverse_and_flip_parentheses("123(abc)321"), "123(cba)321")
+ self.assertEqual(reverse_and_flip_parentheses("a(bc)d"), "d(cb)a")
+
+
+class TestConstructDelimiterMap(unittest.TestCase):
+ def test_empty_text(self):
+ self.assertEqual(construct_delimiter_map("", ["word1", "word2"]), {})
+
+ def test_empty_words(self):
+ self.assertEqual(construct_delimiter_map("word1,word2", []), {})
+
+ def test_single_occurrence(self):
+ text = "word1,word2"
+ expected_result = {
+ ("word1", "word2"): "",
+ ("word2", "word1"): ""
+ }
+ self.assertEqual(construct_delimiter_map(text, ["word1", "word2"]), expected_result)
+
+ def test_multiple_words(self):
+ text = "word0,((word1),word2)"
+ expected_result = {
+ ("word0", "word1"): "((",
+ ("word0", "word2"): "(",
+ ("word1", "word0"): "))",
+ ("word1", "word2"): ")",
+ ("word2", "word1"): "(",
+ ("word2", "word0"): ")"
+ }
+ self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result)
+
+ text = "word0 , ( (word1 ), word2)"
+ self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result)
+
+
+class TestVerifyDelimiters(unittest.TestCase):
+ def base_verify_func(self, query_text, text, specific_words, expected_result):
+ delimiter_map = construct_delimiter_map(query_text, specific_words)
+ actual_result = verify_search_delimiters(text, specific_words, delimiter_map)
+ self.assertEqual(actual_result, expected_result)
+
+ def test_all_conditions_met(self):
+ query_text = "word0,((word1),word2)"
+ specific_words = ["word0", "word1", "word2"]
+ text = "word0,((word1),word2)"
+ self.base_verify_func(query_text, text, specific_words, True)
+ text = "((word1),word2), word0"
+ self.base_verify_func(query_text, text, specific_words, True)
+ text = "word0,(word2, (word1))"
+ self.base_verify_func(query_text, text, specific_words, True)
+ text = "word0,((word1),(ExtraGroup),word2)"
+ self.base_verify_func(query_text, text, specific_words, True)
+ text = "word0,((word2),word1)"
+ self.base_verify_func(query_text, text, specific_words, False)
+ text = "((word1),word0), word2"
+ self.base_verify_func(query_text, text, specific_words, False)
+ text = "word0,((word1))"
+ self.base_verify_func(query_text, text, specific_words, False)
+ text = "(word1),(ExtraGroup),word2)"
+ self.base_verify_func(query_text, text, specific_words, False)
+
+ def test_wildcard_matching_verify_delimiters(self):
+ query_text = "word0, ((word1.*?)), word2.*?"
+ delimiter_map = construct_delimiter_map(query_text, ["word0", "word1.*?", "word2.*?"])
+
+ # Positive test cases
+ text = "((word1)), word0, word2X"
+ self.assertTrue(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map))
+
+ text = "word0, ((word1Y)), word2Z"
+ self.assertTrue(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map))
+
+ # Negative test cases
+ text = "word0, (word1), word2"
+ self.assertFalse(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map))
+
+
+class TestFindMatching(unittest.TestCase):
+ def base_find_matching(self, series, search_string, expected):
+ mask = find_matching(series, search_string)
+ self.assertTrue(all(mask == expected), f"Expected {expected}, got {mask}")
+
+ def test_basic_matching(self):
+ series = pd.Series([
+ "word0, word1, word2",
+ "word0, (word1, word2)"
+ ])
+ search_string = "word0, word1"
+ expected = pd.Series([True, True])
+ self.base_find_matching(series, search_string, expected)
+ search_string = "(word0, word1)"
+ expected = pd.Series([True, False])
+ self.base_find_matching(series, search_string, expected)
+
+ def test_group_matching(self):
+ series = pd.Series([
+ "(word1), word0, ((word2))",
+ "word0, ((word1)), word2",
+ "(word1), word0, (word2)"
+ ])
+ search_string = "word0, ((word1)), word2"
+ expected = pd.Series([False, True, False])
+ self.base_find_matching(series, search_string, expected)
+
+ def test_anywhere_words(self):
+ series = pd.Series([
+ "(word1), word0, ((word2))",
+ "word0, ((word1)), word2",
+ "word0, (word3), ((word1)), word2"
+ ])
+ search_string = "@word3, word0, ((word1)), word2"
+ expected = pd.Series([False, False, True])
+ self.base_find_matching(series, search_string, expected)
+
+ def test_mismatched_parentheses(self):
+ series = pd.Series([
+ "(word1), word0, ((word2))",
+ "word0, ((word1)), word2",
+ "word0, (word1)), word2",
+ "word0, ((word1), word2"
+ ])
+ search_string = "word0, ((word1)), word2"
+ expected = pd.Series([False, True, False, False])
+ self.base_find_matching(series, search_string, expected)
+
+ def test_wildcard_matching(self):
+ series = pd.Series([
+ "word2, word0, ((word1X))",
+ "word0, ((word1Y)), word2Z",
+ "word0, ((word1)), word2",
+ "word0, (word1), word2"
+ ])
+ search_string = "word0, ((word1*)), word2*"
+ expected = pd.Series([True, True, True, False])
+ self.base_find_matching(series, search_string, expected)
+
+ def test_complex_case_with_word_identifiers(self):
+ query_text = "word0, ((word1), @word2, @word3, word4)"
+ series = pd.Series([
+ "word0, ((word1), word2, word3, word4)",
+ "word2, word0, ((word1), word3, word4)",
+ "word3, ((word1), word2, word4), word0",
+ "word0, ((word1), word4), word2, word3",
+ "word0, word1, word4, word2",
+ "word2, word3"
+ ])
+ expected = pd.Series([True, True, True, True, False, False])
+
+ self.base_find_matching(series, query_text, expected)
+
+ def test_very_complex_case_with_word_identifiers(self):
+ query_text = "word0, (((word1, word2), @word3)), ((word4, word5)))"
+ series = pd.Series([
+ "word0, (((word1, word2), word3)), ((word4, word5)))",
+ "word3, word0, (((word1, word2))), ((word4, word5)))",
+ "word0, ((word1, word2), word3), (word4, word5)",
+ "word0, (((word1, word2), word3)), (word4)",
+ "word0, (((word1, word2))), ((word4, word5)))"
+ ])
+ expected = pd.Series([True, True, False, False, False])
+
+ self.base_find_matching(series, query_text, expected)
+
+ def test_incorrect_single_delimiter(self):
+ query_text = "word0, ((word1)), word2"
+ series = pd.Series([
+ "word0, ((word1)), word2",
+ "(word0, ((word1)), word2)",
+ "word0, ((word1), word2)",
+ "word0, (word1)), word2"
+ ])
+ expected = pd.Series([True, True, False, False])
+ self.base_find_matching(series, query_text, expected)
+
+ def test_mismatched_parentheses2(self):
+ query_text = "word0, ((word1)), (word2, word3)"
+ series = pd.Series([
+ "word0, ((word1)), (word2, word3)",
+ "(word2, word3), word0, ((word1))",
+ "word0, someExtraText, ((word1)), someMoreText, (word2, word3)",
+ "word0, ((word1), (word2, word3))",
+ "word0, ((word1), ((word2, word3)"
+ ])
+ expected = pd.Series([True, True, True, False, False])
+ self.base_find_matching(series, query_text, expected)
+
+ def test_negative_words(self):
+ series = pd.Series([
+ "word0, word1",
+ "word0, word2",
+ "word0, word2, word3",
+ "word0, (word1), word2",
+ "word0, (word2, word3), word1",
+ "word0, word1suffix",
+ ])
+
+ # 1. Basic Negative Test Case
+ search_string1 = "~word1, word0"
+ expected1 = pd.Series([False, True, True, False, False, True])
+
+ # 2. Two Negative Words
+ search_string2 = "~word1, ~word3, word0"
+ expected2 = pd.Series([False, True, False, False, False, True])
+
+ # 3. Combination of Negative and Mandatory Words
+ search_string3 = "@word2, ~word1, word0"
+ expected3 = pd.Series([False, True, True, False, False, False])
+
+ # 4. Negative Words with Wildcards
+ search_string4 = "word0, ~word1*"
+ expected4 = pd.Series([False, True, True, False, False, False])
+
+ # Running tests
+ self.base_find_matching(series, search_string1, expected1)
+ self.base_find_matching(series, search_string2, expected2)
+ self.base_find_matching(series, search_string3, expected3)
+ self.base_find_matching(series, search_string4, expected4)
+
+ def test_negative_words_group(self):
+ series = pd.Series([
+ "word0, (word1, (word2))",
+ "word0, (word1, (word2)), word3",
+ "word0, (word1, (word2), word3)",
+ "word0, (word1, (word2, word3))",
+ ])
+ search_string = "word0, (word1, (word2))"
+ expected = pd.Series([True, True, True, True])
+ self.base_find_matching(series, search_string, expected)
+
+ search_string = "word0, (word1, (word2)), ~word3"
+ expected = pd.Series([True, False, False, False])
+ self.base_find_matching(series, search_string, expected)
+
+ search_string = "word0, (word1, (word2), ~word3)"
+ expected = pd.Series([True, False, False, False])
+ self.base_find_matching(series, search_string, expected)
diff --git a/tests/models/test_expression_parser.py b/tests/models/test_expression_parser.py
index cca544113..5bdb71b7c 100644
--- a/tests/models/test_expression_parser.py
+++ b/tests/models/test_expression_parser.py
@@ -118,7 +118,7 @@ def test_finding_tags2(self):
"Agent, (Event)": True,
"(Item), (Event)": True
}
- self.base_test("(Item or Agent) and [[Action or Event]]", test_strings)
+ self.base_test("(Item or Agent) and {Action or Event}", test_strings)
def test_exact_group(self):
test_strings = {
@@ -131,7 +131,7 @@ def test_exact_group(self):
"(A, B, (C, D))": True,
"(A, B, C)": True
}
- self.base_test("[[a, b]]", test_strings)
+ self.base_test("{a, b}", test_strings)
def test_exact_group_simple_complex(self):
test_strings = {
@@ -145,7 +145,7 @@ def test_exact_group_simple_complex(self):
"(E, F, (A, B, (C, D)))": True,
"(A, B, (E, F, (C, D)))": False, # TODO: Should this be True? [[c]] isn't directly inside an a group.
}
- self.base_test("[[a, [[c]] ]]", test_strings)
+ self.base_test("{a, {c} }", test_strings)
def test_exact_group_complex(self):
test_strings = {
@@ -155,7 +155,7 @@ def test_exact_group_complex(self):
"(A, B, ((C, D)))": False,
"(E, F, (A, B, (C, D)))": True,
}
- self.base_test("[[a, b, [[c, d]] ]]", test_strings)
+ self.base_test("{a, b, {c, d} }", test_strings)
def test_duplicate_search(self):
test_strings = {
@@ -183,7 +183,7 @@ def test_exact_group_complex_split(self):
"(E, F, (A, B, (C, D)))": False,
"((A, B), (C, D))": True,
}
- self.base_test("[[ [[a, b]], [[c, d]] ]]", test_strings)
+ self.base_test("{ {a, b}, {c, d} }", test_strings)
def test_mixed_group_split(self):
test_strings = {
@@ -192,7 +192,7 @@ def test_mixed_group_split(self):
"((Event), ((Clear-throat)))": True,
"((Event, Clear-throat))": False,
}
- self.base_test("[[ [Event], [Action] ]]", test_strings)
+ self.base_test("{ [Event], [Action] }", test_strings)
def test_exact_group_split(self):
test_strings = {
@@ -201,7 +201,7 @@ def test_exact_group_split(self):
"((Event), ((Clear-throat)))": False,
"((Event, Clear-throat))": False,
}
- self.base_test("[[ [[Event]], [[Action]] ]]", test_strings)
+ self.base_test("{ {Event}, {Action} }", test_strings)
def test_exact_group_split_or(self):
test_strings = {
@@ -210,17 +210,18 @@ def test_exact_group_split_or(self):
"((A), ((D)))": True,
"((A, D))": True,
}
- self.base_test("[[ [[a]] or [[d]] ]]", test_strings)
+ self.base_test("{ {a} or {d} }", test_strings)
def test_exact_group_split_or_negation(self):
test_strings = {
- "(Event, Clear-throat)": False,
+ # "(Event, Clear-throat)": False,
"((Event), (Clear-throat))": True,
"((Event))": False,
"((Event), ((Clear-throat)))": True,
"((Event, Clear-throat))": False,
}
- self.base_test("[[ [[~Event]] ]]", test_strings)
+ # Need to think this through more. How do you exact match a negative tag?
+ self.base_test("{ {~Event} }", test_strings)
def test_exact_group_split_or_negation_dual(self):
test_strings = {
@@ -233,7 +234,7 @@ def test_exact_group_split_or_negation_dual(self):
"((A), (B, C))": False,
"((A), ((B), C))": True,
}
- self.base_test("[[ [[~a and ~b]] ]]", test_strings)
+ self.base_test("{ {~a and ~b} }", test_strings)
def test_exact_group_split_or_negation_dual2(self):
test_strings = {
@@ -246,7 +247,7 @@ def test_exact_group_split_or_negation_dual2(self):
"((A), (B, C))": False,
"((A), ((B), C))": True,
}
- self.base_test("[[ [[~(a or b)]] ]]", test_strings)
+ self.base_test("{ {~(a or b)} }", test_strings)
def test_exact_group_split_or_negation_complex(self):
test_strings = {
@@ -260,7 +261,7 @@ def test_exact_group_split_or_negation_complex(self):
"((A), (B, C)), (D)": False,
"((A), (B, C)), (H)": False,
}
- self.base_test("[[ [[~(a or b)]] ]] and [[D or ~F]]", test_strings)
+ self.base_test("{ {~(a or b)} } and {D or ~F}", test_strings)
# TODO: Should this work, and what should it mean?
# Right now this is always true, since there is at least one group without ", (a)" in every string.
@@ -272,7 +273,7 @@ def test_exact_group_negation(self):
"((A), ((D)))": True,
"((A, D))": True,
}
- self.base_test("[[ ~[[a]] ]]", test_strings)
+ self.base_test("{ ~{a} }", test_strings)
def test_exact_group_negation2(self):
test_strings = {
@@ -282,9 +283,42 @@ def test_exact_group_negation2(self):
"((A), ((D, B)))": True,
"((A, D))": False,
"(B, (D))": True,
- "(B)": True
+ "(B)": True,
+ "((A), B)": False
}
- self.base_test("[[ ~[[a]], b]]", test_strings)
+ self.base_test("{ ~{a}, b}", test_strings)
+
+ def test_exact_group_negation3(self):
+ test_strings = {
+ "(A, D, B)": False,
+ "((A), (D), B)": True,
+ "((A))": False,
+ "((A), ((D, B)))": True,
+ "((A, D))": False,
+ "(B, (D))": True,
+ "(B)": True,
+ "((A), B)": True
+ }
+ self.base_test("{ ~a and b}", test_strings)
+
+ def test_exact_group_negation4(self):
+ test_strings = {
+ "(A, D, B)": False,
+ "((A), (D), B)": False,
+ "((A))": False,
+ "((A), ((D, B)))": False,
+ "((A, D))": False,
+ "(B)": True,
+ "(B, (D))": True,
+ "((A), B)": False
+ }
+ self.base_test("{ @c and @a and b: ???}", test_strings)
+
+ def test_exact_group_negation5(self):
+ test_string = "{ ~a and b:}"
+ with self.assertRaises(ValueError) as context:
+ QueryParser(test_string)
+ self.assertTrue(context.exception.args[0])
def test_mixed_group_complex_split(self):
test_strings = {
@@ -297,7 +331,7 @@ def test_mixed_group_complex_split(self):
"((A, B), (C, D))": True,
"((A, B, C, D))": False,
}
- self.base_test("[[ [a, b], [c, d] ]]", test_strings)
+ self.base_test("{ [a, b], [c, d] }", test_strings)
def test_exact_group_complex2(self):
test_strings = {
@@ -309,7 +343,7 @@ def test_exact_group_complex2(self):
"(B, (C)), (A, B, (C))": True,
"(A, B, (A, (C)))": False
}
- self.base_test("[[a, b, [[c]] ]]", test_strings)
+ self.base_test("{a, b, {c} }", test_strings)
def test_containing_group_complex2(self):
test_strings = {
@@ -362,13 +396,13 @@ def test_mixed_groups(self):
test_strings = {
"(A, B), (C, D, (E, F))": True
}
- self.base_test("[[a]], [[ [[e, f]] ]]", test_strings)
+ self.base_test("{a}, { {e, f} }", test_strings)
test_strings = {
"(A, B), (C, D, (E, F))": False
}
# This example works because it finds the group containing (c, d, (e, f)), rather than the ef group
- self.base_test("[[a]], [e, [[f]] ]", test_strings)
+ self.base_test("{a}, [e, {f} ]", test_strings)
def test_and(self):
test_strings = {
@@ -411,18 +445,17 @@ def test_and_wildcard_nothing_else(self):
"A": False,
"B": False,
"C": False,
- "A, B": True,
+ "A, B": False,
"A, C": False,
"B, C": False,
"A, B, C": False,
"D, A, B": False,
"A, B, (C)": False,
"(A, B), C": True,
- "(A, B, C)": False,
+ "(A, B, C)": True,
}
self.base_test("{a and b}", test_strings)
- def test_and_wildcard_nothing_else2(self):
test_strings = {
"A": False,
"B": False,
@@ -436,8 +469,7 @@ def test_and_wildcard_nothing_else2(self):
"(A, B), C": True,
"(A, B, C)": False,
}
- self.base_test("[{a and b}]", test_strings)
- self.base_test("[[{a and b}]]", test_strings)
+ self.base_test("{a and b:}", test_strings)
def test_and_logical_wildcard(self):
test_strings = {
@@ -450,9 +482,11 @@ def test_and_logical_wildcard(self):
self.base_test("A, B and ?", test_strings)
test_strings = {
- "A": False,
+ "A": True,
"A, C": True,
"A, B, C": True,
+ "B, C": False,
+ "B, C, D, E": True
}
self.base_test("(a or (b and c) and ?)", test_strings)
@@ -469,7 +503,7 @@ def test_double_wildcard(self):
def test_or_wildcard(self):
test_strings = {
- "A": False,
+ "A": True,
"B": False,
"C": False,
"A, B": True,
@@ -589,10 +623,10 @@ def test_and_or(self):
self.base_test("a and (b or c)", test_strings)
test_strings = {
- "A": False,
+ "A": True,
"B": False,
"C": False,
- "A, B": False,
+ "A, B": True,
"A, C": True,
"B, C": True
}
@@ -698,35 +732,43 @@ def test_not_in_line3(self):
def test_optional_exact_group(self):
test_strings = {
- "A, C": True,
+ "(A, C)": True,
}
self.base_test("{a and (b or c)}", test_strings)
test_strings = {
- "A, B, C, D": True,
+ "(A, B, C, D)": True,
}
self.base_test("{a and b: c and d}", test_strings)
test_strings = {
- "A, B, C": True,
- "A, B, C, D": False,
+ "(A, B, C)": True,
+ "(A, B, C, D)": False,
}
self.base_test("{a and b: c or d}", test_strings)
test_strings = {
- "A, C": True,
- "A, D": True,
- "A, B, C": False,
- "A, B, C, D": False,
+ "(A, C)": True,
+ "(A, D)": True,
+ "(A, B, C)": False,
+ "(A, B, C, D)": False,
}
self.base_test("{a or b: c or d}", test_strings)
test_strings = {
"(Onset, (Def-expand/taco))": True,
+ "(Onset, Def-expand/taco)": False,
+ "(Onset, Def/taco, (Def-expand/taco))": True, # this one validates
+ "(Onset, (Def/taco))": False,
"(Onset, (Def-expand/taco, (Label/DefContents)))": True,
"(Onset, (Def-expand/taco), (Label/OnsetContents))": True,
"(Onset, (Def-expand/taco), (Label/OnsetContents, Description/MoreContents))": True,
"Onset, (Def-expand/taco), (Label/OnsetContents)": False,
"(Onset, (Def-expand/taco), Label/OnsetContents)": False,
}
- self.base_test("[[{(Onset or Offset), (Def or [[Def-expand]]): ???}]]", test_strings)
\ No newline at end of file
+ self.base_test("{(Onset or Offset), (Def or {Def-expand}): ???}", test_strings)
+ test_strings = {
+ "(A, B)": True,
+ "(A, B, C)": True
+ }
+ self.base_test("{a or b}", test_strings)
\ No newline at end of file
diff --git a/tests/models/test_hed_group.py b/tests/models/test_hed_group.py
index 117872e8d..1a3632bf7 100644
--- a/tests/models/test_hed_group.py
+++ b/tests/models/test_hed_group.py
@@ -62,7 +62,7 @@ def test_find_tags_with_term(self):
# located tags now has found all 5 hed tags
# This will find no tags
- located_tags = basic_hed_string_obj.find_tags_with_term("bject", recursive=True, include_groups=0)
+ located_tags = basic_hed_string_obj.find_tags_with_term("reject", recursive=True, include_groups=0)
self.assertEqual(len(located_tags), 0)
# this will also find no tags
diff --git a/tests/models/test_tabular_input.py b/tests/models/test_tabular_input.py
index 7f5282e47..02ef32df6 100644
--- a/tests/models/test_tabular_input.py
+++ b/tests/models/test_tabular_input.py
@@ -5,7 +5,7 @@
from hed.models import DefinitionEntry, Sidecar, TabularInput
from hed import schema
from hed.errors import HedFileError
-from hed.errors import ErrorHandler
+from hed.errors import ErrorHandler, ErrorContext
class Test(unittest.TestCase):
@@ -58,6 +58,7 @@ def test_expand_column_issues(self):
input_file = TabularInput(events_path, sidecar=sidecar)
validation_issues = input_file.validate(hed_schema=self.hed_schema)
+ self.assertEqual(validation_issues[0][ErrorContext.ROW], 2)
self.assertEqual(len(validation_issues), 1)
def test_blank_and_duplicate_columns(self):
diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py
index f3591ead3..75f66d174 100644
--- a/tests/schema/test_hed_schema_io.py
+++ b/tests/schema/test_hed_schema_io.py
@@ -170,6 +170,8 @@ def _base_merging_test(self, files):
reload1 = load_schema(path1)
reload2 = load_schema(path2)
self.assertEqual(reload1, reload2)
+ except Exception:
+ self.assertTrue(False)
finally:
os.remove(path1)
os.remove(path2)
@@ -183,6 +185,8 @@ def _base_merging_test(self, files):
reload1 = load_schema(path1)
reload2 = load_schema(path2)
self.assertEqual(reload1, reload2)
+ except Exception:
+ self.assertTrue(False)
finally:
os.remove(path1)
os.remove(path2)
@@ -241,10 +245,10 @@ def _base_added_class_tests(self, schema):
unit_class_entry = schema.unit_classes["weightUnits"]
unit_entry = unit_class_entry.units["testUnit"]
- self.assertEqual(unit_entry.attributes["conversionFactor"], str(100))
+ self.assertEqual(unit_entry.attributes[HedKey.ConversionFactor], str(100))
unit_modifier_entry = schema.unit_modifiers["huge"]
- self.assertEqual(unit_modifier_entry.attributes["conversionFactor"], "10^100")
+ self.assertEqual(unit_modifier_entry.attributes[HedKey.ConversionFactor], "10^100")
self.assertTrue(unit_modifier_entry.attributes["customElementAttribute"])
value_class_entry = schema.value_classes["customValueClass"]
@@ -324,9 +328,9 @@ def test_cannot_load_schemas(self):
]
for file in files:
- with self.assertRaises(HedFileError):
- # print(file)
+ with self.assertRaises(HedFileError) as context:
load_schema(file)
+ self.assertEqual(context.exception.code, HedExceptions.SCHEMA_LIBRARY_INVALID)
def test_saving_in_library_wiki(self):
old_score_schema = load_schema_version("score_1.0.0")
diff --git a/tests/schema/test_schema_wiki_fatal_errors.py b/tests/schema/test_schema_wiki_fatal_errors.py
index 583579b17..0759dba46 100644
--- a/tests/schema/test_schema_wiki_fatal_errors.py
+++ b/tests/schema/test_schema_wiki_fatal_errors.py
@@ -1,7 +1,7 @@
import unittest
import os
-from hed import schema
+from hed import load_schema
from hed.errors import HedFileError, HedExceptions
@@ -12,25 +12,25 @@ class TestHedSchema(unittest.TestCase):
def setUpClass(cls):
cls.full_base_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.base_schema_dir)
cls.files_and_errors = {
- "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_START_MISSING,
- "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_END_INVALID,
- "HED_hed_no_end.mediawiki": HedExceptions.HED_END_INVALID,
- "HED_separator_invalid.mediawiki": HedExceptions.INVALID_SECTION_SEPARATOR,
+ "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+ "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+ "HED_hed_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+ "HED_separator_invalid.mediawiki": HedExceptions.WIKI_SEPARATOR_INVALID,
"HED_header_missing.mediawiki": HedExceptions.SCHEMA_HEADER_MISSING,
- "HED_header_invalid.mediawiki": HedExceptions.HED_SCHEMA_HEADER_INVALID,
- "empty_file.mediawiki": HedExceptions.HED_SCHEMA_HEADER_INVALID,
- "HED_header_invalid_version.mediawiki": HedExceptions.HED_SCHEMA_VERSION_INVALID,
- "HED_header_missing_version.mediawiki": HedExceptions.HED_SCHEMA_VERSION_INVALID,
+ "HED_header_invalid.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID,
+ "empty_file.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID,
+ "HED_header_invalid_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID,
+ "HED_header_missing_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID,
"HED_header_bad_library.mediawiki": HedExceptions.BAD_HED_LIBRARY_NAME,
- "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_START_MISSING,
- "empty_node.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line2.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line3.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line4.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line5.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line6.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line7.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
+ "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+ "empty_node.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line2.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line3.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line4.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line5.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line6.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line7.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
"empty_node.xml": HedExceptions.HED_SCHEMA_NODE_NAME_INVALID
}
@@ -60,9 +60,10 @@ def test_invalid_schema(self):
for filename, error in self.files_and_errors.items():
full_filename = self.full_base_folder + filename
with self.assertRaises(HedFileError) as context:
- schema.load_schema(full_filename)
+ load_schema(full_filename)
# all of these should produce exceptions.
- from hed.errors import ErrorHandler, ErrorContext, SchemaErrors, get_printable_issue_string
+ from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string
+
# Verify basic properties of exception
expected_line_numbers = self.expected_line_numbers.get(filename, [])
if expected_line_numbers:
@@ -82,9 +83,10 @@ def test_merging_errors_schema(self):
for filename, error in self.files_and_errors.items():
full_filename = self.full_base_folder + filename
with self.assertRaises(HedFileError) as context:
- schema.load_schema(full_filename)
+ load_schema(full_filename)
# all of these should produce exceptions.
- from hed.errors import ErrorHandler, ErrorContext, SchemaErrors, get_printable_issue_string
+ from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string
+ from hed.errors.error_types import SchemaAttributeErrors
# Verify basic properties of exception
expected_line_numbers = self.expected_line_numbers.get(filename, [])
if expected_line_numbers:
@@ -96,7 +98,7 @@ def test_merging_errors_schema(self):
error_handler.push_error_context(ErrorContext.ROW, 1)
error_handler.push_error_context(ErrorContext.COLUMN, 2)
- issues = error_handler.format_error_with_context(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID,
+ issues = error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID,
"error_attribute", source_tag="error_tag")
error_handler.pop_error_context()
error_handler.pop_error_context()
@@ -106,3 +108,9 @@ def test_merging_errors_schema(self):
self.assertTrue(context.exception.args[0] == error)
self.assertTrue(context.exception.filename == full_filename)
+
+ def test_attribute_invalid(self):
+ path = os.path.join(self.full_base_folder, "attribute_unknown1.mediawiki")
+ schema = load_schema(path)
+ issues = schema.check_compliance()
+ self.assertEqual(len(issues), 7)
\ No newline at end of file
diff --git a/tests/schema/util_create_schemas.py b/tests/schema/util_create_schemas.py
index 850d014eb..415b94dc2 100644
--- a/tests/schema/util_create_schemas.py
+++ b/tests/schema/util_create_schemas.py
@@ -10,13 +10,30 @@
"""
library_schema_end = """
-!# end schema
+
!# end hed
"""
-def _get_test_schema(node_lines):
- library_schema_string = library_schema_start + "\n".join(node_lines) + library_schema_end
+default_end_lines = """
+!# end schema
+"""
+
+required_non_tag = [
+"'''Unit classes'''",
+"'''Unit modifiers'''",
+"'''Value classes'''",
+"'''Schema attributes'''",
+"'''Properties'''",
+"'''Epilogue'''"
+]
+def _get_test_schema(node_lines, other_lines=(default_end_lines,)):
+ node_section = "\n".join(node_lines)
+ non_tag_section = "\n".join(other_lines)
+ for name in required_non_tag:
+ if name not in other_lines:
+ non_tag_section += f"\n{name}\n"
+ library_schema_string = library_schema_start + node_section + non_tag_section + library_schema_end
test_schema = from_string(library_schema_string, ".mediawiki")
return test_schema
diff --git a/tests/tools/remodeling/cli/test_run_remodel.py b/tests/tools/remodeling/cli/test_run_remodel.py
index a63b5be2f..1d2f4b919 100644
--- a/tests/tools/remodeling/cli/test_run_remodel.py
+++ b/tests/tools/remodeling/cli/test_run_remodel.py
@@ -51,7 +51,7 @@ def tearDownClass(cls):
def test_parse_arguments(self):
# Test no verbose
- arg_list1 = [self.data_root, self.model_path, '-x', 'derivatives', '-n', 'back1']
+ arg_list1 = [self.data_root, self.model_path, '-x', 'derivatives', '-bn', 'back1']
with patch('sys.stdout', new=io.StringIO()) as fp1:
args1, operations1 = parse_arguments(arg_list1)
self.assertFalse(fp1.getvalue())
@@ -60,7 +60,7 @@ def test_parse_arguments(self):
self.assertEqual(args1.file_suffix, 'events')
# Test * for extensions and suffix as well as verbose
- arg_list2 = [self.data_root, self.model_path, '-x', 'derivatives', '-n', 'back1', '-f', '*', '-e', '*', '-v']
+ arg_list2 = [self.data_root, self.model_path, '-x', 'derivatives', '-bn', 'back1', '-f', '*', '-e', '*', '-v']
with patch('sys.stdout', new=io.StringIO()) as fp2:
args2, operations2 = parse_arguments(arg_list2)
self.assertTrue(fp2.getvalue())
@@ -164,13 +164,13 @@ def test_main_bids_no_sidecar_with_hed_task(self):
def test_main_errors(self):
# Test bad data directory
- arg_list = ['junk/junk', self.model_path, '-x', 'derivatives', '-n', 'back1']
+ arg_list = ['junk/junk', self.model_path, '-x', 'derivatives', '-bn', 'back1']
with self.assertRaises(HedFileError) as context:
main(arg_list=arg_list)
self.assertEqual(context.exception.args[0], "DataDirectoryDoesNotExist")
# Test no backup
- arg_list = [self.data_root, self.model_path, '-x', 'derivatives', '-n', 'back1']
+ arg_list = [self.data_root, self.model_path, '-x', 'derivatives', '-bn', 'back1']
with self.assertRaises(HedFileError) as context:
main(arg_list=arg_list)
self.assertEqual(context.exception.args[0], "BackupDoesNotExist")
@@ -193,12 +193,6 @@ def test_run_bids_ops_verbose(self):
main(arg_list)
self.assertFalse(fp.getvalue())
- # def test_temp(self):
- # data_root = "g:/ds002718OpenNeuro"
- # model_path = 'G:/wh_excerpt_rmdl.json'
- # arg_list = [data_root, model_path, '-x', 'derivatives', 'code', 'stimuli', '-b', '-n', '']
- # main(arg_list)
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/tools/remodeling/cli/test_run_remodel_backup.py b/tests/tools/remodeling/cli/test_run_remodel_backup.py
index 65f2391b6..2dbf2770d 100644
--- a/tests/tools/remodeling/cli/test_run_remodel_backup.py
+++ b/tests/tools/remodeling/cli/test_run_remodel_backup.py
@@ -15,7 +15,9 @@ class Test(unittest.TestCase):
def setUpClass(cls):
file_list = ['top_level.tsv', 'sub1/sub1_events.tsv', 'sub2/sub2_events.tsv', 'sub2/sub2_next_events.tsv']
# cls.file_list = file_list
- cls.extract_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests')
+ extract_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests')
+ cls.alt_path = os.path.realpath(os.path.join(extract_path, 'temp'))
+ cls.extract_path = extract_path
test_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests/test_root')
cls.test_root = test_root
cls.test_paths = [os.path.join(test_root, file) for file in file_list]
@@ -34,8 +36,12 @@ def setUp(self):
zip_ref.extractall(self.extract_path)
def tearDown(self):
- shutil.rmtree(self.test_root)
- shutil.rmtree(self.data_root)
+ if os.path.exists(self.test_root):
+ shutil.rmtree(self.test_root)
+ if os.path.exists(self.data_root):
+ shutil.rmtree(self.data_root)
+ if os.path.exists(self.alt_path):
+ shutil.rmtree(self.alt_path)
@classmethod
def tearDownClass(cls):
@@ -43,11 +49,11 @@ def tearDownClass(cls):
def test_main_events(self):
self.assertFalse(os.path.exists(self.derv_path), 'backup directory does not exist before creation')
- arg_list = [self.test_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
+ arg_list = [self.test_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-bd', self.derv_path, '-x', 'derivatives',
'-f', 'events', '-e', '.tsv']
main(arg_list)
self.assertTrue(os.path.exists(self.derv_path), 'backup directory exists before creation')
- json_path = os.path.realpath(os.path.join(self.derv_path, 'backups', BackupManager.DEFAULT_BACKUP_NAME,
+ json_path = os.path.realpath(os.path.join(self.derv_path, BackupManager.DEFAULT_BACKUP_NAME,
BackupManager.BACKUP_DICTIONARY))
with open(json_path, 'r') as fp:
key_dict = json.load(fp)
@@ -56,19 +62,18 @@ def test_main_events(self):
self.assertEqual(len(file_list), 3, "The backup of events.tsv has the right number of files")
def test_main_all(self):
- arg_list = [self.test_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
- '-f', '*', '-e', '*']
+ arg_list = [self.test_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-bd', self.derv_path,
+ '-x', 'derivatives', '-f', '*', '-e', '*']
self.assertFalse(os.path.exists(self.derv_path), 'backup directory does not exist before creation')
main(arg_list)
self.assertTrue(os.path.exists(self.derv_path), 'backup directory exists before creation')
- json_path = os.path.realpath(os.path.join(self.derv_path, 'backups', BackupManager.DEFAULT_BACKUP_NAME,
+ json_path = os.path.realpath(os.path.join(self.derv_path, BackupManager.DEFAULT_BACKUP_NAME,
BackupManager.BACKUP_DICTIONARY))
with open(json_path, 'r') as fp:
key_dict = json.load(fp)
self.assertEqual(len(key_dict), 4, "The backup of events.tsv does not include top_level.tsv")
- back_path = os.path.realpath(os.path.join(self.derv_path, 'backups',
- BackupManager.DEFAULT_BACKUP_NAME, 'backup_root'))
+ back_path = os.path.realpath(os.path.join(self.derv_path, BackupManager.DEFAULT_BACKUP_NAME, 'backup_root'))
file_list1 = get_file_list(back_path)
self.assertIsInstance(file_list1, list)
self.assertEqual(len(file_list1), 4)
@@ -78,11 +83,11 @@ def test_main_task(self):
self.assertTrue(os.path.exists(der_path))
shutil.rmtree(der_path)
self.assertFalse(os.path.exists(der_path))
- arg_list = [self.data_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
+ arg_list = [self.data_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
'-f', 'events', '-e', '.tsv', '-t', 'FacePerception']
main(arg_list)
self.assertTrue(os.path.exists(der_path))
- back_path = os.path.realpath(os.path.join(self.data_root, BackupManager.RELATIVE_BACKUP_LOCATION, 'backups',
+ back_path = os.path.realpath(os.path.join(self.data_root, BackupManager.RELATIVE_BACKUP_LOCATION,
BackupManager.DEFAULT_BACKUP_NAME, 'backup_root'))
self.assertTrue(os.path.exists(back_path))
backed_files = get_file_list(back_path)
@@ -93,37 +98,33 @@ def test_main_bad_task(self):
self.assertTrue(os.path.exists(der_path))
shutil.rmtree(der_path)
self.assertFalse(os.path.exists(der_path))
- arg_list = [self.data_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
+ arg_list = [self.data_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
'-f', 'events', '-e', '.tsv', '-t', 'Baloney']
main(arg_list)
self.assertTrue(os.path.exists(der_path))
- back_path = os.path.realpath(os.path.join(self.data_root, BackupManager.RELATIVE_BACKUP_LOCATION, 'backups',
+ back_path = os.path.realpath(os.path.join(self.data_root, BackupManager.RELATIVE_BACKUP_LOCATION,
BackupManager.DEFAULT_BACKUP_NAME, 'backup_root'))
self.assertTrue(os.path.exists(back_path))
backed_files = get_file_list(back_path)
self.assertEqual(len(backed_files), 0)
def test_alt_loc(self):
- alt_path = os.path.realpath(os.path.join(self.extract_path, 'temp'))
- if os.path.exists(alt_path):
- shutil.rmtree(alt_path)
- self.assertFalse(os.path.exists(alt_path))
- arg_list = [self.data_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives', '-w', alt_path,
+ if os.path.exists(self.alt_path):
+ shutil.rmtree(self.alt_path)
+ self.assertFalse(os.path.exists(self.alt_path))
+ arg_list = [self.data_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives', '-bd', self.alt_path,
'-f', 'events', '-e', '.tsv', ]
main(arg_list)
- self.assertTrue(os.path.exists(alt_path))
- back_path = os.path.realpath(os.path.join(alt_path, 'backups', 'default_back', 'backup_root'))
- self.assertTrue(os.path.exists(back_path))
+ self.assertTrue(os.path.exists(self.alt_path))
+ back_path = os.path.realpath(os.path.join(self.alt_path, 'default_back/backup_root'))
self.assertTrue(os.path.exists(back_path))
backed_files = get_file_list(back_path)
self.assertEqual(len(backed_files), 6)
- if os.path.exists(alt_path):
- shutil.rmtree(alt_path)
def test_main_backup_exists(self):
der_path = os.path.realpath(os.path.join(self.data_root, 'derivatives'))
self.assertTrue(os.path.exists(der_path))
- arg_list = [self.data_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
+ arg_list = [self.data_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
'-f', 'events', '-e', '.tsv', '-t', 'Baloney']
with self.assertRaises(HedFileError) as context:
main(arg_list)
diff --git a/tests/tools/remodeling/cli/test_run_remodel_restore.py b/tests/tools/remodeling/cli/test_run_remodel_restore.py
index c3645f3ae..af75f1d2e 100644
--- a/tests/tools/remodeling/cli/test_run_remodel_restore.py
+++ b/tests/tools/remodeling/cli/test_run_remodel_restore.py
@@ -17,15 +17,19 @@ def setUpClass(cls):
'../../../data/remodel_tests/test_root_back1')
cls.test_zip_back1 = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../../data/remodel_tests/test_root_back1.zip')
-
- cls.extract_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests')
+ extract_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests')
+ cls.alt_path = os.path.realpath(os.path.join(extract_path, 'temp'))
+ cls.extract_path = extract_path
def setUp(self):
with zipfile.ZipFile(self.test_zip_back1, 'r') as zip_ref:
zip_ref.extractall(self.extract_path)
def tearDown(self):
- shutil.rmtree(self.test_root_back1)
+ if os.path.exists(self.test_root_back1):
+ shutil.rmtree(self.test_root_back1)
+ if os.path.exists(self.alt_path):
+ shutil.rmtree(self.alt_path)
def test_main_restore(self):
files1 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
@@ -35,7 +39,7 @@ def test_main_restore(self):
os.remove(os.path.realpath(os.path.join(self.test_root_back1, 'top_level.tsv')))
files2 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
self.assertFalse(files2, "run_restore starts with the right number of files.")
- arg_list = [self.test_root_back1, '-n', 'back1']
+ arg_list = [self.test_root_back1, '-bn', 'back1']
main(arg_list)
files3 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
self.assertEqual(len(files3), len(files1), "run_restore restores all the files after")
@@ -48,11 +52,10 @@ def test_no_backup(self):
self.assertEqual(context.exception.args[0], "BackupDoesNotExist")
def test_restore_alt_loc(self):
- alt_path = os.path.realpath(os.path.join(self.extract_path, 'temp'))
- if os.path.exists(alt_path):
- shutil.rmtree(alt_path)
- self.assertFalse(os.path.exists(alt_path))
- arg_list = [self.test_root_back1, '-n', 'back1', '-x', 'derivatives', '-w', alt_path,
+ if os.path.exists(self.alt_path):
+ shutil.rmtree(self.alt_path)
+ self.assertFalse(os.path.exists(self.alt_path))
+ arg_list = [self.test_root_back1, '-bn', 'back1', '-x', 'derivatives', '-bd', self.alt_path,
'-f', 'events', '-e', '.tsv']
back_main(arg_list)
files1 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
@@ -62,14 +65,11 @@ def test_restore_alt_loc(self):
os.remove(os.path.realpath(os.path.join(self.test_root_back1, 'top_level.tsv')))
files2 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
self.assertFalse(files2, "run_restore starts with the right number of files.")
- arg_list = [self.test_root_back1, '-n', 'back1', '-w', alt_path]
+ arg_list = [self.test_root_back1, '-bn', 'back1', '-bd', self.alt_path]
main(arg_list)
files3 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
self.assertEqual(len(files3)+1, len(files1), "run_restore restores all the files after")
- if os.path.exists(alt_path):
- shutil.rmtree(alt_path)
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/tools/remodeling/test_backup_manager.py b/tests/tools/remodeling/test_backup_manager.py
index 53c297cf2..1d9a50e5d 100644
--- a/tests/tools/remodeling/test_backup_manager.py
+++ b/tests/tools/remodeling/test_backup_manager.py
@@ -32,7 +32,7 @@ def setUpClass(cls):
test_root_bad = os.path.realpath(os.path.join(os.path.dirname(__file__),
'../../data/remodel_tests/test_root_bad'))
cls.test_root_bad = test_root_bad
- cls.test_root_bad_backups = os.path.join(test_root_bad, BackupManager.RELATIVE_BACKUP_LOCATION, 'backups')
+ cls.test_root_bad_backups = os.path.join(test_root_bad, BackupManager.RELATIVE_BACKUP_LOCATION)
cls.test_paths_bad = [os.path.join(test_root_bad, file) for file in file_list]
cls.test_zip_bad = os.path.realpath(os.path.join(os.path.dirname(__file__),
'../../data/remodel_tests/test_root_bad.zip'))