diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index 7305e7c6..a866ec32 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -107,7 +107,7 @@ class SidecarErrors:
class SchemaErrors:
SCHEMA_DUPLICATE_NODE = 'SCHEMA_DUPLICATE_NODE'
- SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID'
+
SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID"
@@ -119,19 +119,22 @@ class SchemaWarnings:
SCHEMA_CHARACTER_INVALID = "SCHEMA_CHARACTER_INVALID"
SCHEMA_INVALID_CAPITALIZATION = 'invalidCaps'
SCHEMA_NON_PLACEHOLDER_HAS_CLASS = 'SCHEMA_NON_PLACEHOLDER_HAS_CLASS'
- SCHEMA_INVALID_ATTRIBUTE = "SCHEMA_INVALID_ATTRIBUTE"
class SchemaAttributeErrors:
+ SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID'
+ SCHEMA_ATTRIBUTE_VALUE_INVALID = 'SCHEMA_ATTRIBUTE_VALUE_INVALID'
SCHEMA_DEPRECATED_INVALID = "SCHEMA_DEPRECATED_INVALID"
SCHEMA_SUGGESTED_TAG_INVALID = "SCHEMA_SUGGESTED_TAG_INVALID"
- SCHEMA_RELATED_TAG_INVALID = "SCHEMA_RELATED_TAG_INVALID"
SCHEMA_UNIT_CLASS_INVALID = "SCHEMA_UNIT_CLASS_INVALID"
SCHEMA_VALUE_CLASS_INVALID = "SCHEMA_VALUE_CLASS_INVALID"
+ SCHEMA_ALLOWED_CHARACTERS_INVALID = "SCHEMA_ALLOWED_CHARACTERS_INVALID"
+ SCHEMA_IN_LIBRARY_INVALID = "SCHEMA_IN_LIBRARY_INVALID"
SCHEMA_DEFAULT_UNITS_INVALID = "SCHEMA_DEFAULT_UNITS_INVALID"
- SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED" # Reported as SCHEMA_DEPRECATED_INVALID
+ SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED"
+ SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE = "SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE"
class DefinitionErrors:
diff --git a/hed/errors/exceptions.py b/hed/errors/exceptions.py
index e7ee857b..e368ec43 100644
--- a/hed/errors/exceptions.py
+++ b/hed/errors/exceptions.py
@@ -14,8 +14,9 @@ class HedExceptions:
INVALID_DATAFRAME = 'INVALID_DATAFRAME'
INVALID_FILE_FORMAT = 'INVALID_FILE_FORMAT'
# These are actual schema issues, not that the file cannot be found or parsed
- SCHEMA_HEADER_MISSING = 'HED_SCHEMA_HEADER_INVALID'
- HED_SCHEMA_HEADER_INVALID = 'HED_SCHEMA_HEADER_INVALID'
+ SCHEMA_HEADER_MISSING = 'SCHEMA_HEADER_INVALID'
+ SCHEMA_HEADER_INVALID = 'SCHEMA_HEADER_INVALID'
+ SCHEMA_UNKNOWN_HEADER_ATTRIBUTE = "SCHEMA_HEADER_INVALID"
SCHEMA_LIBRARY_INVALID = "SCHEMA_LIBRARY_INVALID"
BAD_HED_LIBRARY_NAME = 'SCHEMA_LIBRARY_INVALID'
@@ -26,14 +27,14 @@ class HedExceptions:
ROOTED_TAG_DOES_NOT_EXIST = "SCHEMA_LIBRARY_INVALID"
IN_LIBRARY_IN_UNMERGED = "SCHEMA_LIBRARY_INVALID"
- HED_SCHEMA_VERSION_INVALID = 'HED_SCHEMA_VERSION_INVALID'
- SCHEMA_START_MISSING = 'HED_WIKI_SEPARATOR_INVALID'
- SCHEMA_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID'
- HED_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID'
- INVALID_SECTION_SEPARATOR = 'invalidSectionSeparator'
+ SCHEMA_VERSION_INVALID = 'SCHEMA_VERSION_INVALID'
+ SCHEMA_SECTION_MISSING = 'SCHEMA_SECTION_MISSING'
+
+ WIKI_SEPARATOR_INVALID = 'invalidSectionSeparator'
# This issue will contain a list of lines with issues.
- HED_WIKI_DELIMITERS_INVALID = 'HED_WIKI_DELIMITERS_INVALID'
+ WIKI_DELIMITERS_INVALID = 'WIKI_DELIMITERS_INVALID'
+ WIKI_LINE_START_INVALID = 'WIKI_LINE_START_INVALID'
HED_SCHEMA_NODE_NAME_INVALID = 'HED_SCHEMA_NODE_NAME_INVALID'
SCHEMA_DUPLICATE_PREFIX = 'schemaDuplicatePrefix'
diff --git a/hed/errors/known_error_codes.py b/hed/errors/known_error_codes.py
index b72e8470..b8962682 100644
--- a/hed/errors/known_error_codes.py
+++ b/hed/errors/known_error_codes.py
@@ -31,6 +31,7 @@
],
"schema_validation_errors": [
"SCHEMA_ATTRIBUTE_INVALID",
+ "SCHEMA_ATTRIBUTE_VALUE_INVALID",
"SCHEMA_CHARACTER_INVALID",
"SCHEMA_DUPLICATE_NODE",
"SCHEMA_HEADER_INVALID",
diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py
index b7fda9d5..8c196f9e 100644
--- a/hed/errors/schema_error_messages.py
+++ b/hed/errors/schema_error_messages.py
@@ -16,7 +16,7 @@ def schema_error_hed_duplicate_from_library(tag, duplicate_tag_list, section):
f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"
-@hed_error(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_unknown_attribute(attribute_name, source_tag):
return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
f"or was used outside of it's defined class."
@@ -40,45 +40,58 @@ def schema_warning_SCHEMA_INVALID_CAPITALIZATION(tag_name, problem_char, char_in
f"Found character '{problem_char}' in tag '{tag_name}' at position {char_index}."
-@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING)
+@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING,
+ actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_warning_non_placeholder_class(tag_name, invalid_attribute_name):
return "Only placeholder nodes('#') can have a unit class, value class, or takes value." + \
f"Found {invalid_attribute_name} on {tag_name}"
-@hed_error(SchemaWarnings.SCHEMA_INVALID_ATTRIBUTE, default_severity=ErrorSeverity.ERROR)
-def schema_error_SCHEMA_INVALID_ATTRIBUTE(tag_name, invalid_attribute_name):
- return f"'{invalid_attribute_name}' should not be present in a loaded schema, found on '{tag_name}'." \
- f"Something went very wrong."
-
-@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_DEPRECATED_INVALID(tag_name, invalid_deprecated_version):
return f"'{tag_name}' has invalid or unknown value in attribute deprecatedFrom: '{invalid_deprecated_version}'."
@hed_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED,
- actual_code=SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)
+ actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_CHILD_OF_DEPRECATED(deprecated_tag, non_deprecated_child):
return f"Deprecated tag '{deprecated_tag}' has a child that is not deprecated: '{non_deprecated_child}'."
-@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_SUGGESTED_TAG_INVALID(suggestedTag, invalidSuggestedTag, attribute_name):
return f"Tag '{suggestedTag}' has an invalid {attribute_name}: '{invalidSuggestedTag}'."
-@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_UNIT_CLASS_INVALID(tag, unit_class, attribute_name):
return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'."
-@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_VALUE_CLASS_INVALID(tag, unit_class, attribute_name):
return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'."
-@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_DEFAULT_UNITS_INVALID(tag, bad_unit, valid_units):
valid_units = ",".join(valid_units)
return f"Tag '{tag}' has an invalid defaultUnit '{bad_unit}'. Valid units are: '{valid_units}'."
+
+
+@hed_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
+def schema_error_SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE(tag, conversion_factor):
+ return f"Tag '{tag}' has an invalid conversionFactor '{conversion_factor}'. Conversion factor must be positive."
+
+
+@hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
+def schema_error_SCHEMA_ALLOWED_CHARACTERS_INVALID(tag, invalid_character):
+ return (f"Tag '{tag}' has an invalid allowedCharacter: '{invalid_character}'. "
+ f"Allowed characters are: a single character, "
+ f"or one of the following - letters, blank, digits, alphanumeric.")
+
+
+@hed_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
+def schema_error_SCHEMA_IN_LIBRARY_INVALID(tag, bad_library):
+ return (f"Tag '{tag}' has an invalid inLibrary: '{bad_library}'. ")
diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index 12e2d889..9f437102 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -137,7 +137,7 @@ def _indexed_dict_from_onsets(onsets):
@staticmethod
def _filter_by_index_list(original_series, indexed_dict):
- new_series = ["n/a"] * len(original_series) # Initialize new_series with "n/a"
+ new_series = pd.Series(["n/a"] * len(original_series))
for onset, indices in indexed_dict.items():
if indices:
diff --git a/hed/models/basic_search.py b/hed/models/basic_search.py
new file mode 100644
index 00000000..ae47b71e
--- /dev/null
+++ b/hed/models/basic_search.py
@@ -0,0 +1,237 @@
+import re
+from itertools import combinations, product
+from collections import defaultdict
+import pandas as pd
+
+
+def find_matching(series, search_string, regex=False):
+ """ Finds lines in the series that match the search string and returns a mask.
+
+ Syntax Rules:
+ - '@': Prefixing a term in the search string means the object must appear anywhere within a line.
+ - Parentheses: Elements within parentheses must appear in the line with the same level of nesting.
+ eg: Search string: "(A), (B)" will match "(A), (B, C)", but not "(A, B)", since they don't
+ start in the same group.
+ - "LongFormTag*": A * will match any remaining word(anything but a comma or parenthesis)
+ - An individual term can be arbitrary regex, but it is limited to single continuous words.
+
+ Notes:
+ - The format of the series should match the format of the search string, whether it's in short or long form.
+ - To enable support for matching parent tags, ensure that both the series and search string are in long form.
+
+ Args:
+ series (pd.Series): A Pandas Series object containing the lines to be searched.
+ search_string (str): The string to search for in each line of the series.
+ regex (bool): By default, translate any * wildcard characters to .*? regex
+ If True, do no translation and pass the words as is. Due to how it's setup, you must not include
+ the following characters: (),
+
+ Returns:
+ mask (pd.Series): A Boolean mask Series of the same length as the input series.
+ The mask has `True` for lines that match the search string and `False` otherwise.
+ """
+ if not regex:
+ # Replace *'s with a reasonable value for people who don't know regex
+ search_string = re.sub(r'(?= 3.9
- # negated_groups = [search_result(group, []) for group in hed_group.get_all_groups() if group not in groups]
+ # negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups() if group not in groups]
# Python 3.7/8 compatible version.
- negated_groups = [search_result(group, []) for group in hed_group.get_all_groups()
+ negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups()
if not any(group is found_group.group for found_group in found_groups)]
return negated_groups
-class ExpressionContainingGroup(Expression):
- def handle_expr(self, hed_group, exact=False):
- result = self.right.handle_expr(hed_group, exact=True)
- found_groups = result
- if result:
- found_parent_groups = []
- for group in found_groups:
- if not group.group.is_group:
- continue
- if group.group._parent:
- found_parent_groups.append(search_result(group.group._parent, group.group))
-
- if found_parent_groups:
- return found_parent_groups
-
- return []
-
-
class ExpressionDescendantGroup(Expression):
def handle_expr(self, hed_group, exact=False):
found_groups = self.right.handle_expr(hed_group)
- found_parent_groups = []
- if found_groups:
- for group in found_groups:
- if not group.group.is_group:
- continue
- if group.group._parent:
- found_parent_groups.append(search_result(group.group._parent, group.group))
-
- if found_parent_groups:
- return found_parent_groups
- return []
+ found_parent_groups = self._get_parent_groups(found_groups)
+ return found_parent_groups
class ExpressionExactMatch(Expression):
+ def __init__(self, token, left=None, right=None):
+ super().__init__(token, left, right)
+ self.optional = "any"
+
+ def _filter_exact_matches(self, search_results):
+ filtered_list = []
+ for group in search_results:
+ if len(group.group.children) == len(group.tags):
+ filtered_list.append(group)
+
+ return filtered_list
+
def handle_expr(self, hed_group, exact=False):
found_groups = self.right.handle_expr(hed_group, exact=True)
- if found_groups:
- return_list = []
- for group in found_groups:
- if len(group.group.children) == len(group.tags):
- return_list.append(group)
+ if self.optional == "any":
+ return self._get_parent_groups(found_groups)
- if return_list:
- return return_list
+ filtered_list = self._filter_exact_matches(found_groups)
+ if filtered_list:
+ return self._get_parent_groups(filtered_list)
# Basically if we don't have an exact match above, do the more complex matching including optional
if self.left:
optional_groups = self.left.handle_expr(hed_group, exact=True)
found_groups = ExpressionAnd.merge_groups(found_groups, optional_groups)
- if found_groups:
- return_list = []
- for group in found_groups:
- if len(group.group.children) == len(group.tags):
- return_list.append(group)
-
- if return_list:
- return return_list
+ filtered_list = self._filter_exact_matches(found_groups)
+ if filtered_list:
+ return self._get_parent_groups(filtered_list)
return []
@@ -337,7 +322,6 @@ class QueryParser:
def __init__(self, expression_string):
"""Compiles a QueryParser for a particular expression, so it can be used to search hed strings.
-
Basic Input Examples:
'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event
@@ -354,11 +338,15 @@ def __init__(self, expression_string):
'[Event and Action]' - Find a group that contains both Event and Action(at any level)
- '[[Event and Action]]' - Find a group with Event And Action at the same level.
+ '{Event and Action}' - Find a group with Event And Action at the same level.
+
+ '{Event and Action:}' - Find a group with Event And Action at the same level, and nothing else
+
+ '{Event and Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag.
Practical Complex Example:
- [[{(Onset or Offset), (Def or [[Def-expand]]): ???}]] - A group with an onset tag,
+ {(Onset or Offset), (Def or {Def-expand}): ???} - A group with an onset tag,
a def tag or def-expand group, and an optional wildcard group
Parameters:
@@ -392,15 +380,22 @@ def current_token(self):
def _handle_and_op(self):
expr = self._handle_negation()
- next_token = self._next_token_is([Token.And, Token.Or])
+ next_token = self._next_token_is([Token.And])
while next_token:
right = self._handle_negation()
if next_token.kind == Token.And:
expr = ExpressionAnd(next_token, expr, right)
- elif next_token.kind == Token.Or:
- expr = ExpressionOr(next_token, expr, right)
- next_token = self._next_token_is([Token.And, Token.Or])
+ next_token = self._next_token_is([Token.And])
+ return expr
+ def _handle_or_op(self):
+ expr = self._handle_and_op() # Note: calling _handle_and_op here
+ next_token = self._next_token_is([Token.Or])
+ while next_token:
+ right = self._handle_and_op() # Note: calling _handle_and_op here
+ if next_token.kind == Token.Or:
+ expr = ExpressionOr(next_token, expr, right)
+ next_token = self._next_token_is([Token.Or])
return expr
def _handle_negation(self):
@@ -417,33 +412,35 @@ def _handle_negation(self):
def _handle_grouping_op(self):
next_token = self._next_token_is(
- [Token.ContainingGroup, Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch])
- if next_token == Token.ContainingGroup:
- interior = self._handle_and_op()
- expr = ExpressionContainingGroup(next_token, right=interior)
- next_token = self._next_token_is([Token.ContainingGroupEnd])
- if next_token != Token.ContainingGroupEnd:
- raise ValueError("Parse error: Missing closing square brackets")
- # Can we move this to the and_or level? or does that break everything...?
- elif next_token == Token.LogicalGroup:
- expr = self._handle_and_op()
+ [Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch])
+ if next_token == Token.LogicalGroup:
+ expr = self._handle_or_op()
next_token = self._next_token_is([Token.LogicalGroupEnd])
if next_token != Token.LogicalGroupEnd:
raise ValueError("Parse error: Missing closing paren")
elif next_token == Token.DescendantGroup:
- interior = self._handle_and_op()
+ interior = self._handle_or_op()
expr = ExpressionDescendantGroup(next_token, right=interior)
next_token = self._next_token_is([Token.DescendantGroupEnd])
if next_token != Token.DescendantGroupEnd:
raise ValueError("Parse error: Missing closing square bracket")
elif next_token == Token.ExactMatch:
- interior = self._handle_and_op()
+ interior = self._handle_or_op()
expr = ExpressionExactMatch(next_token, right=interior)
next_token = self._next_token_is([Token.ExactMatchEnd, Token.ExactMatchOptional])
if next_token == Token.ExactMatchOptional:
- optional_portion = self._handle_and_op()
- expr.left = optional_portion
+ # We have an optional portion - this needs to now be an exact match
+ expr.optional = "none"
next_token = self._next_token_is([Token.ExactMatchEnd])
+ if next_token != Token.ExactMatchEnd:
+ optional_portion = self._handle_or_op()
+ expr.left = optional_portion
+ next_token = self._next_token_is([Token.ExactMatchEnd])
+ if "~" in str(expr):
+ raise ValueError("Cannot use negation in exact matching groups,"
+ " as it's not clear what is being matched.\n"
+ "{thing and ~(expression)} is allowed.")
+
if next_token is None:
raise ValueError("Parse error: Missing closing curly bracket")
else:
@@ -452,13 +449,15 @@ def _handle_grouping_op(self):
expr = ExpressionWildcardNew(next_token)
elif next_token:
expr = Expression(next_token)
+ else:
+ expr = None
return expr
def _parse(self, expression_string):
self.tokens = self._tokenize(expression_string)
- expr = self._handle_and_op()
+ expr = self._handle_or_op()
if self.at_token + 1 != len(self.tokens):
raise ValueError("Parse error in search string")
diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py
index 4cb3729c..0857abe9 100644
--- a/hed/schema/hed_schema.py
+++ b/hed/schema/hed_schema.py
@@ -731,7 +731,9 @@ def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
# Add the InLibrary attribute to any library schemas as they are loaded
# These are later removed when they are saved out, if saving unmerged
if self.library and (not self.with_standard or (not self.merged and self.with_standard)):
- new_entry._set_attribute_value(HedKey.InLibrary, self.library)
+ # only add it if not already present - This is a rare case
+ if not new_entry.has_attribute(HedKey.InLibrary):
+ new_entry._set_attribute_value(HedKey.InLibrary, self.library)
section = self._sections[key_class]
return section._add_to_dict(long_tag_name, new_entry)
diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py
index 0cecc4ab..60a1a934 100644
--- a/hed/schema/hed_schema_constants.py
+++ b/hed/schema/hed_schema_constants.py
@@ -42,6 +42,7 @@ class HedKey:
SuggestedTag = "suggestedTag"
Rooted = "rooted"
DeprecatedFrom = "deprecatedFrom"
+ ConversionFactor = "conversionFactor"
# All known properties
BoolProperty = 'boolProperty'
diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py
index 102795d8..936943e8 100644
--- a/hed/schema/hed_schema_entry.py
+++ b/hed/schema/hed_schema_entry.py
@@ -176,7 +176,6 @@ def __eq__(self, other):
return False
return True
-
class UnitEntry(HedSchemaEntry):
""" A single unit entry with modifiers in the HedSchema. """
def __init__(self, *args, **kwargs):
@@ -207,12 +206,13 @@ def finalize_entry(self, schema):
self.derivative_units = derivative_units
def _get_conversion_factor(self, modifier_entry):
-
- base_factor = float(self.attributes.get("conversionFactor", "1.0").replace("^", "e"))
- if modifier_entry:
- modifier_factor = float(modifier_entry.attributes.get("conversionFactor", "1.0").replace("^", "e"))
- else:
- modifier_factor = 1.0
+ base_factor = modifier_factor = 1.0
+ try:
+ base_factor = float(self.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e"))
+ if modifier_entry:
+ modifier_factor = float(modifier_entry.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e"))
+ except (ValueError, AttributeError) as e:
+ pass # Just default to 1.0
return base_factor * modifier_factor
def get_conversion_factor(self, unit_name):
@@ -224,7 +224,7 @@ def get_conversion_factor(self, unit_name):
Returns:
conversion_factor(float or None): Returns the conversion factor or None
"""
- if "conversionFactor" in self.attributes:
+ if HedKey.ConversionFactor in self.attributes:
return float(self.derivative_units.get(unit_name))
class HedTagEntry(HedSchemaEntry):
diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py
index d1d7f5ec..0ccb9c33 100644
--- a/hed/schema/schema_attribute_validators.py
+++ b/hed/schema/schema_attribute_validators.py
@@ -150,4 +150,66 @@ def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name):
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED,
tag_entry.name,
child.name)
- return issues
\ No newline at end of file
+ return issues
+
+
+def conversion_factor(hed_schema, tag_entry, attribute_name):
+ issues = []
+ conversion_factor = tag_entry.attributes.get(attribute_name, "1.0")
+ try:
+ conversion_factor = float(conversion_factor.replace("^", "e"))
+ except (ValueError, AttributeError) as e:
+ pass
+ if not isinstance(conversion_factor, float) or conversion_factor <= 0.0:
+ issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE,
+ tag_entry.name,
+ conversion_factor)
+
+ return issues
+
+
+def allowed_characters_check(hed_schema, tag_entry, attribute_name):
+ """ Check allowed character has a valid value
+
+ Parameters:
+ hed_schema (HedSchema): The schema to use for validation
+ tag_entry (HedSchemaEntry): The schema entry for this attribute.
+ attribute_name (str): The name of this attribute
+
+ Returns:
+ list: A list of issues. Each issue is a dictionary.
+
+ """
+ issues = []
+ allowed_strings = {'letters', 'blank', 'digits', 'alphanumeric'}
+
+ char_string = tag_entry.attributes.get(attribute_name, "")
+ characters = char_string.split(",")
+ for character in characters:
+ if character not in allowed_strings and len(character) != 1:
+ issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
+ tag_entry.name,
+ character)
+ return issues
+
+
+def in_library_check(hed_schema, tag_entry, attribute_name):
+ """ Check allowed character has a valid value
+
+ Parameters:
+ hed_schema (HedSchema): The schema to use for validation
+ tag_entry (HedSchemaEntry): The schema entry for this attribute.
+ attribute_name (str): The name of this attribute
+
+ Returns:
+ list: A list of issues. Each issue is a dictionary.
+
+ """
+ issues = []
+
+ library = tag_entry.attributes.get(attribute_name, "")
+ if hed_schema.library != library:
+ issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
+ tag_entry.name,
+ library)
+ return issues
diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py
index c75c11de..1a68baf8 100644
--- a/hed/schema/schema_compliance.py
+++ b/hed/schema/schema_compliance.py
@@ -45,27 +45,20 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl
class SchemaValidator:
"""Validator class to wrap some code. In general, just call check_compliance."""
attribute_validators = {
- HedKey.SuggestedTag: [(schema_attribute_validators.tag_exists_check,
- SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID)],
- HedKey.RelatedTag: [(schema_attribute_validators.tag_exists_check,
- SchemaAttributeErrors.SCHEMA_RELATED_TAG_INVALID)],
- HedKey.UnitClass: [(schema_attribute_validators.tag_is_placeholder_check,
- SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS),
- (schema_attribute_validators.unit_class_exists,
- SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID)],
- HedKey.ValueClass: [(schema_attribute_validators.tag_is_placeholder_check,
- SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS),
- (schema_attribute_validators.value_class_exists,
- SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID)],
+ HedKey.SuggestedTag: [schema_attribute_validators.tag_exists_check],
+ HedKey.RelatedTag: [schema_attribute_validators.tag_exists_check],
+ HedKey.UnitClass: [schema_attribute_validators.tag_is_placeholder_check,
+ schema_attribute_validators.unit_class_exists],
+ HedKey.ValueClass: [schema_attribute_validators.tag_is_placeholder_check,
+ schema_attribute_validators.value_class_exists],
# Rooted tag is implicitly verified on loading
- # HedKey.Rooted: [(schema_attribute_validators.tag_exists_base_schema_check,
- # SchemaAttributeErrors.SCHEMA_ROOTED_TAG_INVALID)],
- HedKey.DeprecatedFrom: [(schema_attribute_validators.tag_is_deprecated_check,
- SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)],
- HedKey.TakesValue: [(schema_attribute_validators.tag_is_placeholder_check,
- SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS)],
- HedKey.DefaultUnits: [(schema_attribute_validators.unit_exists,
- SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID)]
+ # HedKey.Rooted: [schema_attribute_validators.tag_exists_base_schema_check],
+ HedKey.DeprecatedFrom: [schema_attribute_validators.tag_is_deprecated_check],
+ HedKey.TakesValue: [schema_attribute_validators.tag_is_placeholder_check],
+ HedKey.DefaultUnits: [schema_attribute_validators.unit_exists],
+ HedKey.ConversionFactor: [schema_attribute_validators.conversion_factor],
+ HedKey.AllowedCharacter: [schema_attribute_validators.allowed_characters_check],
+ HedKey.InLibrary: [schema_attribute_validators.in_library_check]
}
def __init__(self, hed_schema, check_for_warnings=True, error_handler=None):
@@ -80,7 +73,7 @@ def check_unknown_attributes(self):
if unknown_attributes:
for attribute_name, source_tags in unknown_attributes.items():
for tag in source_tags:
- issues_list += self.error_handler.format_error_with_context(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID,
+ issues_list += self.error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID,
attribute_name,
source_tag=tag)
return issues_list
@@ -93,16 +86,14 @@ def check_attributes(self):
for tag_entry in self.hed_schema[section_key].values():
self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, tag_entry.name)
for attribute_name in tag_entry.attributes:
- validators = self.attribute_validators.get(attribute_name, None)
- if validators:
- for validator, error_code in validators:
- self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
- new_issues = validator(self.hed_schema, tag_entry, attribute_name)
- for issue in new_issues:
- issue['code'] = error_code
- issue['severity'] = ErrorSeverity.WARNING
- self.error_handler.add_context_and_filter(new_issues)
- issues_list += new_issues
+ validators = self.attribute_validators.get(attribute_name, [])
+ for validator in validators:
+ self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
+ new_issues = validator(self.hed_schema, tag_entry, attribute_name)
+ for issue in new_issues:
+ issue['severity'] = ErrorSeverity.WARNING
+ self.error_handler.add_context_and_filter(new_issues)
+ issues_list += new_issues
self.error_handler.pop_error_context()
self.error_handler.pop_error_context()
self.error_handler.pop_error_context()
diff --git a/hed/schema/schema_io/schema2base.py b/hed/schema/schema_io/schema2base.py
index e373cf1a..d9d082a1 100644
--- a/hed/schema/schema_io/schema2base.py
+++ b/hed/schema/schema_io/schema2base.py
@@ -106,9 +106,6 @@ def _output_tags(self, tags):
self._end_tag_section()
def _output_units(self, unit_classes):
- if not unit_classes:
- return
-
section_node = self._start_section(HedSectionKey.UnitClasses)
for unit_class_entry in unit_classes.values():
@@ -128,8 +125,6 @@ def _output_units(self, unit_classes):
self._write_entry(unit_entry, unit_class_node)
def _output_section(self, hed_schema, key_class):
- if not hed_schema[key_class]:
- return
parent_node = self._start_section(key_class)
for entry in hed_schema[key_class].values():
if self._should_skip(entry):
diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py
index a02f9ed6..de18f9d6 100644
--- a/hed/schema/schema_io/wiki2schema.py
+++ b/hed/schema/schema_io/wiki2schema.py
@@ -22,12 +22,19 @@
no_wiki_end_tag = ''
-ErrorsBySection = {
- HedWikiSection.Schema: HedExceptions.SCHEMA_START_MISSING,
- HedWikiSection.EndSchema: HedExceptions.SCHEMA_END_INVALID,
- HedWikiSection.EndHed: HedExceptions.HED_END_INVALID
-}
-required_sections = [HedWikiSection.Schema, HedWikiSection.EndSchema, HedWikiSection.EndHed]
+
+required_sections = [
+ HedWikiSection.Prologue,
+ HedWikiSection.Schema,
+ HedWikiSection.EndSchema,
+ HedWikiSection.UnitsClasses,
+ HedWikiSection.UnitModifiers,
+ HedWikiSection.ValueClasses,
+ HedWikiSection.Attributes,
+ HedWikiSection.Properties,
+ HedWikiSection.Epilogue,
+ HedWikiSection.EndHed,
+]
class SchemaLoaderWiki(SchemaLoader):
@@ -79,15 +86,13 @@ def _parse_data(self):
# Validate we didn't miss any required sections.
for section in required_sections:
if section not in wiki_lines_by_section:
- error_code = HedExceptions.INVALID_SECTION_SEPARATOR
- if section in ErrorsBySection:
- error_code = ErrorsBySection[section]
+ error_code = HedExceptions.SCHEMA_SECTION_MISSING
msg = f"Required section separator '{SectionNames[section]}' not found in file"
raise HedFileError(error_code, msg, filename=self.filename)
if self.fatal_errors:
self.fatal_errors = error_reporter.sort_issues(self.fatal_errors)
- raise HedFileError(HedExceptions.HED_WIKI_DELIMITERS_INVALID,
+ raise HedFileError(self.fatal_errors[0]['code'],
f"{len(self.fatal_errors)} issues found when parsing schema. See the .issues "
f"parameter on this exception for more details.", self.filename,
issues=self.fatal_errors)
@@ -109,7 +114,7 @@ def _read_header_section(self, lines):
for line_number, line in lines:
if line.strip():
msg = f"Extra content [{line}] between HED line and other sections"
- raise HedFileError(HedExceptions.HED_SCHEMA_HEADER_INVALID, msg, filename=self.filename)
+ raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename)
def _read_text_block(self, lines):
text = ""
@@ -163,7 +168,8 @@ def _read_schema(self, lines):
parent_tags = parent_tags[:level]
elif level > len(parent_tags):
self._add_fatal_error(line_number, line,
- "Line has too many *'s at the front. You cannot skip a level.")
+ "Line has too many *'s at the front. You cannot skip a level."
+ , HedExceptions.WIKI_LINE_START_INVALID)
continue
# Create the entry
tag_entry = self._add_tag_line(parent_tags, line_number, line)
@@ -261,14 +267,37 @@ def _get_header_attributes_internal(self, version_line):
if "=" not in version_line:
return self._get_header_attributes_internal_old(version_line)
- final_attributes = {}
+ attributes, malformed = self._parse_attributes_line(version_line)
+
+ for m in malformed:
+ # todo: May shift this at some point to report all errors
+ raise HedFileError(code=HedExceptions.SCHEMA_HEADER_INVALID,
+ message=f"Header line has a malformed attribute {m}",
+ filename=self.filename)
+ return attributes
+
+ @staticmethod
+ def _parse_attributes_line(version_line):
+ matches = {}
+ unmatched = []
+ last_end = 0
for match in attr_re.finditer(version_line):
- attr_name = match.group(1)
- attr_value = match.group(2)
- final_attributes[attr_name] = attr_value
+ start, end = match.span()
- return final_attributes
+ # If there's unmatched content between the last match and the current one
+ if start > last_end:
+ unmatched.append(version_line[last_end:start])
+
+ matches[match.group(1)] = match.group(2)
+ last_end = end
+
+ # If there's unmatched content after the last match
+ if last_end < len(version_line):
+ unmatched.append(version_line[last_end:])
+
+ unmatched = [m.strip() for m in unmatched if m.strip()]
+ return matches, unmatched
def _get_header_attributes_internal_old(self, version_line):
""" Extracts all valid attributes like version from the HED line in .mediawiki format.
@@ -288,7 +317,7 @@ def _get_header_attributes_internal_old(self, version_line):
divider_index = pair.find(':')
if divider_index == -1:
msg = f"Found poorly matched key:value pair in header: {pair}"
- raise HedFileError(HedExceptions.HED_SCHEMA_HEADER_INVALID, msg, filename=self.filename)
+ raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename)
key, value = pair[:divider_index], pair[divider_index + 1:]
key = key.strip()
value = value.strip()
@@ -369,10 +398,17 @@ def _get_tag_name(self, tag_line):
return None, 0
@staticmethod
- def _get_tag_attributes(tag_line, starting_index):
+ def _validate_attribute_string(attribute_string):
+ pattern = r'^[A-Za-z]+(=.+)?$'
+ match = re.fullmatch(pattern, attribute_string)
+ if match:
+ return match.group()
+
+ def _get_tag_attributes(self, line_number, tag_line, starting_index):
""" Get the tag attributes from a line.
Parameters:
+ line_number (int): The line number to report errors as
tag_line (str): A tag line.
starting_index (int): The first index we can check for the brackets.
@@ -386,11 +422,14 @@ def _get_tag_attributes(tag_line, starting_index):
return None, starting_index
if attr_string:
attributes_split = [x.strip() for x in attr_string.split(',')]
- # Filter out attributes with spaces.
- attributes_split = [a for a in attributes_split if " " not in a]
final_attributes = {}
for attribute in attributes_split:
+ if self._validate_attribute_string(attribute) is None:
+ self._add_fatal_error(line_number, tag_line,
+ f"Malformed attribute found {attribute}. "
+ f"Valid formatting is: attribute, or attribute=\"value\".")
+ continue
split_attribute = attribute.split("=")
if len(split_attribute) == 1:
final_attributes[split_attribute[0]] = True
@@ -468,7 +507,7 @@ def _create_entry(self, line_number, tag_line, key_class, element_name=None):
if element_name:
node_name = element_name
- node_attributes, index = self._get_tag_attributes(tag_line, index)
+ node_attributes, index = self._get_tag_attributes(line_number, tag_line, index)
if node_attributes is None:
self._add_fatal_error(line_number, tag_line, "Attributes has mismatched delimiters")
return
@@ -489,7 +528,7 @@ def _create_entry(self, line_number, tag_line, key_class, element_name=None):
return tag_entry
def _add_fatal_error(self, line_number, line, warning_message="Schema term is empty or the line is malformed",
- error_code=HedExceptions.HED_WIKI_DELIMITERS_INVALID):
+ error_code=HedExceptions.WIKI_DELIMITERS_INVALID):
self.fatal_errors.append(
{'code': error_code,
ErrorContext.ROW: line_number,
@@ -504,14 +543,12 @@ def _check_for_new_section(self, line, strings_for_section, current_section):
if line.startswith(section_string):
if key in strings_for_section:
msg = f"Found section {SectionNames[key]} twice"
- raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR,
+ raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID,
msg, filename=self.filename)
if current_section < key:
new_section = key
else:
- error_code = HedExceptions.INVALID_SECTION_SEPARATOR
- if key in ErrorsBySection:
- error_code = ErrorsBySection[key]
+ error_code = HedExceptions.SCHEMA_SECTION_MISSING
msg = f"Found section {SectionNames[key]} out of order in file"
raise HedFileError(error_code, msg, filename=self.filename)
break
@@ -520,11 +557,11 @@ def _check_for_new_section(self, line, strings_for_section, current_section):
def _handle_bad_section_sep(self, line, current_section):
if current_section != HedWikiSection.Schema and line.startswith(wiki_constants.ROOT_TAG):
msg = f"Invalid section separator '{line.strip()}'"
- raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, msg, filename=self.filename)
+ raise HedFileError(HedExceptions.SCHEMA_SECTION_MISSING, msg, filename=self.filename)
if line.startswith("!#"):
msg = f"Invalid section separator '{line.strip()}'"
- raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, msg, filename=self.filename)
+ raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, msg, filename=self.filename)
def _split_lines_into_sections(self, wiki_lines):
""" Takes a list of lines, and splits it into valid wiki sections.
diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py
index 8404970e..25b27ab8 100644
--- a/hed/schema/schema_validation_util.py
+++ b/hed/schema/schema_validation_util.py
@@ -4,6 +4,7 @@
from hed.errors import ErrorHandler, SchemaWarnings
from hed.schema import hed_schema_constants as constants
from hed.errors.exceptions import HedExceptions, HedFileError
+from hed.schema.hed_schema_constants import valid_header_attributes
ALLOWED_TAG_CHARS = "-"
ALLOWED_DESC_CHARS = "-_:;,./()+ ^"
@@ -45,9 +46,9 @@ def validate_version_string(version_string):
header_attribute_validators = {
- constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.HED_SCHEMA_VERSION_INVALID),
- constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME)
- }
+ constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.SCHEMA_VERSION_INVALID),
+ constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME)
+}
def validate_present_attributes(attrib_dict, filename):
@@ -92,9 +93,12 @@ def validate_attributes(attrib_dict, filename):
had_error = validator(attribute_value)
if had_error:
raise HedFileError(error_code, had_error, filename)
+ if attribute_name not in valid_header_attributes:
+ raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE,
+ f"Unknown attribute {attribute_name} found in header line", filename=filename)
if constants.VERSION_ATTRIBUTE not in attrib_dict:
- raise HedFileError(HedExceptions.HED_SCHEMA_VERSION_INVALID,
+ raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID,
"No version attribute found in header", filename=filename)
diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification
index c47fff94..c1aad366 160000
--- a/spec_tests/hed-specification
+++ b/spec_tests/hed-specification
@@ -1 +1 @@
-Subproject commit c47fff949db70c9105c875bbdfdf0d11389ffd68
+Subproject commit c1aad366fee6c7f1e68fbd73d2ce6dc369444ad8
diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py
index 972d53d4..3e87fdbd 100644
--- a/spec_tests/test_errors.py
+++ b/spec_tests/test_errors.py
@@ -12,55 +12,11 @@
from hed.errors import ErrorHandler, get_printable_issue_string
-# To be removed eventually once all errors are being verified.
-known_errors = [
- 'SIDECAR_INVALID',
- 'CHARACTER_INVALID',
- 'COMMA_MISSING',
- "DEF_EXPAND_INVALID",
- "DEF_INVALID",
- "DEFINITION_INVALID",
- "NODE_NAME_EMPTY",
- "ONSET_OFFSET_INSET_ERROR",
- "PARENTHESES_MISMATCH",
- "PLACEHOLDER_INVALID",
- "REQUIRED_TAG_MISSING",
- "SIDECAR_INVALID",
- "SIDECAR_KEY_MISSING",
- "STYLE_WARNING",
- "TAG_EMPTY",
- "TAG_EXPRESSION_REPEATED",
- "TAG_EXTENDED",
- "TAG_EXTENSION_INVALID",
- "TAG_GROUP_ERROR",
- "TAG_INVALID",
- "TAG_NOT_UNIQUE",
- "TAG_NAMESPACE_PREFIX_INVALID",
- "TAG_REQUIRES_CHILD",
- "TILDES_UNSUPPORTED",
- "UNITS_INVALID",
- "UNITS_MISSING",
- "VALUE_INVALID",
-
- "SIDECAR_BRACES_INVALID",
- "SCHEMA_LIBRARY_INVALID",
-
- "SCHEMA_ATTRIBUTE_INVALID",
- "SCHEMA_UNIT_CLASS_INVALID",
- "SCHEMA_VALUE_CLASS_INVALID",
- "SCHEMA_DEPRECATED_INVALID",
- "SCHEMA_SUGGESTED_TAG_INVALID",
- "SCHEMA_RELATED_TAG_INVALID",
- "SCHEMA_NON_PLACEHOLDER_HAS_CLASS",
- "SCHEMA_DEFAULT_UNITS_INVALID"
-]
-
skip_tests = {
"VERSION_DEPRECATED": "Not applicable",
"tag-extension-invalid-bad-node-name": "Part of character invalid checking/didn't get to it yet",
}
-
class MyTestCase(unittest.TestCase):
@classmethod
def setUpClass(cls):
@@ -80,9 +36,7 @@ def run_single_test(self, test_file):
test_info = json.load(fp)
for info in test_info:
error_code = info['error_code']
- verify_code = False
- if error_code in known_errors:
- verify_code = True
+ verify_code = True
# To be deprecated once we add this to all tests
self._verify_code = verify_code
if error_code in skip_tests:
@@ -93,6 +47,8 @@ def run_single_test(self, test_file):
print(f"Skipping {name} test because: {skip_tests[name]}")
continue
+ # if name != "attribute-invalid-in-library":
+ # continue
description = info['description']
schema = info['schema']
check_for_warnings = info.get("warning", False)
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki
index a596775c..e2246335 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki
@@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki
index 672792aa..2b76a3a4 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki
@@ -17,6 +17,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki
index d5e6cf44..678a6249 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki
@@ -13,6 +13,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki
index 979f72bd..037c9bc7 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki
@@ -13,6 +13,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki
index 3438be07..f79d8361 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki
@@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki
index 267a214e..80454ef4 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki
@@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki
index ee20104a..d3368e37 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki
@@ -33,6 +33,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
'''Unit classes'''
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
'''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki
index 8b3a3a86..64144708 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki
@@ -32,6 +32,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
'''Unit classes'''
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki
index 7939dfd9..f8bccd4d 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki
@@ -32,6 +32,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
'''Unit classes'''
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki
index 4a084ebd..eb283125 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki
@@ -33,6 +33,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
'''Unit classes'''
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki
index f282aabb..289265f8 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki
@@ -34,6 +34,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
* weightUnits {defaultUnits=testUnit}
** testUnit {conversionFactor=100}
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
'''Epilogue'''
The Standardized Computer-based Organized Reporting of EEG (SCORE) is a standard terminology for scalp EEG data assessment designed for use in clinical practice that may also be used for research purposes.
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki
index b7c4d5aa..ac67b8fe 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki
@@ -34,6 +34,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
* weightUnitsNew {defaultUnits=g}
** g {conversionFactor=100}
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
'''Epilogue'''
The Standardized Computer-based Organized Reporting of EEG (SCORE) is a standard terminology for scalp EEG data assessment designed for use in clinical practice that may also be used for research purposes.
diff --git a/tests/data/schema_tests/merge_tests/sorted_root.mediawiki b/tests/data/schema_tests/merge_tests/sorted_root.mediawiki
index d5e31f3b..6536476c 100644
--- a/tests/data/schema_tests/merge_tests/sorted_root.mediawiki
+++ b/tests/data/schema_tests/merge_tests/sorted_root.mediawiki
@@ -44,6 +44,16 @@ This schema is the first official release that includes an xsd and requires unit
!# end schema
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
'''Epilogue'''
!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/wiki_tests/HED_default.mediawiki b/tests/data/schema_tests/wiki_tests/HED_default.mediawiki
index 049260f1..4327c6a4 100644
--- a/tests/data/schema_tests/wiki_tests/HED_default.mediawiki
+++ b/tests/data/schema_tests/wiki_tests/HED_default.mediawiki
@@ -1,6 +1,6 @@
HED version:8.0.0-alpha.1
-
+'''Prologue'''
This is a prologue line.
This is a second prologue line.
@@ -1098,7 +1098,15 @@ This is a second prologue line.
* z {SIUnitSymbolModifier} [SI unit submultiple representing 10^-21]
* yocto {SIUnitModifier} [SI unit submultiple representing 10^-24]
* y {SIUnitSymbolModifier} [SI unit submultiple representing 10^-24]
-!# end hed
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
+'''Epilogue'''
This is an epilogue.
-This is a second line of an epilogue.
\ No newline at end of file
+This is a second line of an epilogue.
+
+!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki b/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki
new file mode 100644
index 00000000..d2c398e3
--- /dev/null
+++ b/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki
@@ -0,0 +1,41 @@
+HED version="8.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://github.com/hed-standard/hed-specification/raw/master/hedxml/HED8.0.0.xsd"
+
+'''Prologue'''
+This schema tests AppendixB SCHEMA_ATTRIBUTE_INVALID
+
+!# start schema
+
+'''Tag1''' {suggestedTag=Tag1}[suggested tag is not registered in the schema]
+* Tag2 {valueClassAttribute}[value attribute is the wrong tag class]
+* Tag3 {unitAttribute}[unit attribute is the wrong tag class]
+
+!# end schema
+'''Unit classes'''
+* unitClass1 {unitAttribute}[Wrong attribute type]
+** unit1 {tagAttribute}[Wrong attribute type]
+
+'''Unit modifiers'''
+* mod1 {tagAttribute}[Wrong attribute type]
+
+'''Value classes'''
+* valueClass1 {tagAttribute}[Wrong attribute type]
+
+'''Schema attributes'''
+* tagAttribute
+* unitAttribute {unitProperty}
+* unitClassAttribute {unitClassProperty}
+* unitModifierAttribute {unitModifierProperty}
+* valueClassAttribute {valueClassProperty}
+* attribute1 {valueClassProperty}
+
+'''Properties'''
+* boolProperty
+* unitClassProperty
+* unitModifierProperty
+* unitProperty
+* valueClassProperty
+
+'''Epilogue'''
+This is an updated version of the schema format. The properties are now part of the schema. The schema attributes are designed to be checked in software rather than hard-coded. The schema attributes, themselves have properties.
+
+!# end hed
\ No newline at end of file
diff --git a/tests/data/validator_tests/bids_schema.mediawiki b/tests/data/validator_tests/bids_schema.mediawiki
index 971a9723..b306003b 100644
--- a/tests/data/validator_tests/bids_schema.mediawiki
+++ b/tests/data/validator_tests/bids_schema.mediawiki
@@ -1,5 +1,7 @@
HED version: 8.0.0-alpha.2
+'''Prologue'''
+
!# start schema
'''Event'''
@@ -1163,6 +1165,7 @@ HED version: 8.0.0-alpha.2
* yocto {SIUnitModifier} [SI unit submultiple representing 10^-24]
* y {SIUnitSymbolModifier} [SI unit submultiple representing 10^-24]
+'''Value classes'''
'''Schema attributes'''
* allowedCharacter {unitClassProperty}[An attribute of unit classes schema value placeholders indicating a special character that is allowed in expressing the value of that placeholder.]
@@ -1184,6 +1187,8 @@ HED version: 8.0.0-alpha.2
* unitSymbol {boolProperty, unitProperty}[Abbreviation or symbol representing a type of unit. Unit symbols represent both the singular and the plural and thus cannot be pluralized.]
* unitClass [Specifies the type of a unit for a tag.]
+'''Properties'''
+
'''Epilogue'''
This is the new format for the mediawiki schema
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
index f5b381eb..71e21386 100644
--- a/tests/models/test_base_input.py
+++ b/tests/models/test_base_input.py
@@ -304,25 +304,30 @@ def test_complex_onsets(self):
{3.5: [0, 1], 4.0: [2], 4.4: [3, 4], -1.0: [5]})
def test_empty_and_single_item_series(self):
- self.assertEqual(BaseInput._filter_by_index_list([], {}), [])
- self.assertEqual(BaseInput._filter_by_index_list(["apple"], {0: [0]}), ["apple"])
+ self.assertTrue(BaseInput._filter_by_index_list(pd.Series([]), {}).equals(pd.Series([])))
+ self.assertTrue(BaseInput._filter_by_index_list(pd.Series(["apple"]), {0: [0]}).equals(pd.Series(["apple"])))
def test_two_item_series_with_same_onset(self):
- self.assertEqual(BaseInput._filter_by_index_list(["apple", "orange"], {0: [0, 1]}), ["apple,orange", "n/a"])
+ input_series = pd.Series(["apple", "orange"])
+ expected_series = pd.Series(["apple,orange", "n/a"])
+ self.assertTrue(BaseInput._filter_by_index_list(input_series, {0: [0, 1]}).equals(expected_series))
def test_multiple_item_series(self):
- original = ["apple", "orange", "banana", "mango"]
+ input_series = pd.Series(["apple", "orange", "banana", "mango"])
indexed_dict = {0: [0, 1], 1: [2], 2: [3]}
- self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), ["apple,orange", "n/a", "banana", "mango"])
+ expected_series = pd.Series(["apple,orange", "n/a", "banana", "mango"])
+ self.assertTrue(BaseInput._filter_by_index_list(input_series, indexed_dict).equals(expected_series))
def test_complex_scenarios(self):
# Test with negative, zero and positive onsets
- original = ["negative", "zero", "positive"]
+ original = pd.Series(["negative", "zero", "positive"])
indexed_dict = {-1: [0], 0: [1], 1: [2]}
- self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), ["negative", "zero", "positive"])
+ expected_series1 = pd.Series(["negative", "zero", "positive"])
+ self.assertTrue(BaseInput._filter_by_index_list(original, indexed_dict).equals(expected_series1))
# Test with more complex indexed_dict
- original = ["apple", "orange", "banana", "mango", "grape"]
- indexed_dict = {0: [0, 1], 1: [2], 2: [3, 4]}
- self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict),
- ["apple,orange", "n/a", "banana", "mango,grape", "n/a"])
+ original2 = ["apple", "orange", "banana", "mango", "grape"]
+ indexed_dict2= {0: [0, 1], 1: [2], 2: [3, 4]}
+ expected_series2 = pd.Series(["apple,orange", "n/a", "banana", "mango,grape", "n/a"])
+ self.assertTrue(BaseInput._filter_by_index_list(original2, indexed_dict2).equals(expected_series2))
+
diff --git a/tests/models/test_basic_search.py b/tests/models/test_basic_search.py
new file mode 100644
index 00000000..0a942b93
--- /dev/null
+++ b/tests/models/test_basic_search.py
@@ -0,0 +1,313 @@
+import unittest
+import pandas as pd
+from hed import load_schema_version
+
+import os
+from hed import TabularInput
+from hed.models import df_util, basic_search
+from hed.models.basic_search import find_words, check_parentheses, reverse_and_flip_parentheses, \
+ construct_delimiter_map, verify_search_delimiters, find_matching
+import numpy as np
+
+
+class TestNewSearch(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+ '../data/bids_tests/eeg_ds003645s_hed'))
+ sidecar1_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
+ cls.events_path = os.path.realpath(
+ os.path.join(bids_root_path, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
+ cls.base_input = TabularInput(cls.events_path, sidecar1_path)
+ cls.schema = load_schema_version()
+ cls.df = cls.base_input.series_filtered
+
+ def test_find_matching_results(self):
+ result1 = basic_search.find_matching(self.df, "(Face, Item-interval/1)")
+ result2 = basic_search.find_matching(self.df, "(Face, Item-interval/1*)")
+
+ # Add assertions
+ self.assertTrue(np.sum(result1) > 0, "result1 should have some true values")
+ self.assertTrue(np.sum(result2) > 0, "result2 should have some true values")
+ self.assertTrue(np.sum(result1) < np.sum(result2), "result1 should have fewer true values than result2")
+
+
+class TestFindWords(unittest.TestCase):
+ def test_basic(self):
+ search_string = "@global (local1, local2)"
+ anywhere_words, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, ['global'])
+ self.assertEqual(specific_words, ['local1', 'local2'])
+
+ def test_no_anywhere_words(self):
+ search_string = "(local1, local2)"
+ anywhere_words, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, [])
+ self.assertEqual(specific_words, ['local1', 'local2'])
+
+ def test_no_specific_words(self):
+ search_string = "@global1, @global2"
+ anywhere_words, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, ['global1', 'global2'])
+ self.assertEqual(specific_words, [])
+
+ def test_empty_string(self):
+ search_string = ""
+ anywhere_words, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, [])
+ self.assertEqual(specific_words, [])
+
+ def test_mixed_words(self):
+ search_string = "@global (local1, local2), @another_global"
+ anywhere_words, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, ['global', 'another_global'])
+ self.assertEqual(specific_words, ['local1', 'local2'])
+
+ def test_whitespace(self):
+ search_string = " @Global , ( local1 , local2 ) "
+ anywhere_words, specific_words = find_words(search_string)
+ self.assertEqual(anywhere_words, ['Global'])
+ self.assertEqual(specific_words, ['local1', 'local2'])
+
+
+class TestCheckParentheses(unittest.TestCase):
+ def test_balanced_parentheses(self):
+ self.assertEqual(check_parentheses("(())"), "")
+ self.assertEqual(check_parentheses("(someText())"), "")
+ self.assertEqual(check_parentheses("((some)text())"), "")
+ self.assertEqual(check_parentheses("()"), "")
+
+ def test_unbalanced_parentheses(self):
+ self.assertEqual(check_parentheses("(()"), "(")
+ self.assertEqual(check_parentheses("()someText("), "(")
+ self.assertEqual(check_parentheses("(text)text)"), ")")
+ self.assertEqual(check_parentheses("text)"), ")")
+
+ def test_mixed_parentheses(self):
+ self.assertEqual(check_parentheses("(()(())"), "(")
+ self.assertEqual(check_parentheses("(someText))((someText)"), ")(")
+ self.assertEqual(check_parentheses("((someText))someText"), "")
+ self.assertEqual(check_parentheses("(someText(someText))someText"), "")
+
+ def test_special_cases(self):
+ self.assertEqual(check_parentheses(""), "")
+ self.assertEqual(check_parentheses("abc"), "")
+ self.assertEqual(check_parentheses("((()))("), "(")
+ self.assertEqual(check_parentheses("text"), "")
+
+ def test_reverse_and_flip_parentheses(self):
+ self.assertEqual(reverse_and_flip_parentheses("(abc)"), "(cba)")
+ self.assertEqual(reverse_and_flip_parentheses("Hello()"), "()olleH")
+ self.assertEqual(reverse_and_flip_parentheses(")("), ")(")
+ self.assertEqual(reverse_and_flip_parentheses("((()))"), "((()))")
+ self.assertEqual(reverse_and_flip_parentheses("()()()"), "()()()")
+ self.assertEqual(reverse_and_flip_parentheses("abc"), "cba")
+ self.assertEqual(reverse_and_flip_parentheses("123(abc)321"), "123(cba)321")
+ self.assertEqual(reverse_and_flip_parentheses("a(bc)d"), "d(cb)a")
+
+
+class TestConstructDelimiterMap(unittest.TestCase):
+ def test_empty_text(self):
+ self.assertEqual(construct_delimiter_map("", ["word1", "word2"]), {})
+
+ def test_empty_words(self):
+ self.assertEqual(construct_delimiter_map("word1,word2", []), {})
+
+ def test_single_occurrence(self):
+ text = "word1,word2"
+ expected_result = {
+ ("word1", "word2"): "",
+ ("word2", "word1"): ""
+ }
+ self.assertEqual(construct_delimiter_map(text, ["word1", "word2"]), expected_result)
+
+ def test_multiple_words(self):
+ text = "word0,((word1),word2)"
+ expected_result = {
+ ("word0", "word1"): "((",
+ ("word0", "word2"): "(",
+ ("word1", "word0"): "))",
+ ("word1", "word2"): ")",
+ ("word2", "word1"): "(",
+ ("word2", "word0"): ")"
+ }
+ self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result)
+
+ text = "word0 , ( (word1 ), word2)"
+ self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result)
+
+
+class TestVerifyDelimiters(unittest.TestCase):
+ def base_verify_func(self, query_text, text, anywhere_words, specific_words, expected_result):
+ delimiter_map = construct_delimiter_map(query_text, specific_words)
+ actual_result = verify_search_delimiters(text, anywhere_words, specific_words, delimiter_map)
+ self.assertEqual(actual_result, expected_result)
+
+ def test_all_conditions_met(self):
+ query_text = "word0,((word1),word2)"
+ specific_words = ["word0", "word1", "word2"]
+ text = "word0,((word1),word2)"
+ self.base_verify_func(query_text, text, [], specific_words, True)
+ text = "((word1),word2), word0"
+ self.base_verify_func(query_text, text, [], specific_words, True)
+ text = "word0,(word2, (word1))"
+ self.base_verify_func(query_text, text, [], specific_words, True)
+ text = "word0,((word1),(ExtraGroup),word2)"
+ self.base_verify_func(query_text, text, [], specific_words, True)
+ text = "word0,((word2),word1)"
+ self.base_verify_func(query_text, text, [], specific_words, False)
+ text = "((word1),word0), word2"
+ self.base_verify_func(query_text, text, [], specific_words, False)
+ text = "word0,((word1))"
+ self.base_verify_func(query_text, text, [], specific_words, False)
+ text = "(word1),(ExtraGroup),word2)"
+ self.base_verify_func(query_text, text, [], specific_words, False)
+
+ def test_complex_case_with_word_identifiers(self):
+ query_text = "word0,((word1),@word2,@word3,word4)"
+ specific_words = ["word0", "word1", "word4"]
+ anywhere_words = ["word2", "word3"]
+ text = "word0,((word1),word2,word3,word4)"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+ text = "word2,word0,((word1),word3,word4)"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+ text = "word3,((word1),word2,word4),word0"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+ text = "word0,((word1),word4),word2,word3"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+ text = "word0,word1,word4,word2" # Incorrect delimiters
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+ text = "word2,word3" # Missing specific words
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+ def test_very_complex_case_with_word_identifiers(self):
+ query_text = "word0,(((word1,word2),@word3)),((word4,word5)))"
+ specific_words = ["word0", "word1", "word2", "word4", "word5"]
+ anywhere_words = ["word3"]
+
+ # Test case where all conditions are met
+ text = "word0,(((word1,word2),word3)),((word4,word5)))"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+ # Test case with anywhere words out of specific context but still in the string
+ text = "word3,word0,(((word1,word2))),((word4,word5)))"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+ # Test case with correct specific words but incorrect delimiters
+ text = "word0,((word1,word2),word3),(word4,word5)"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+ # Test case missing one specific word
+ text = "word0,(((word1,word2),word3)),(word4))"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+ # Test case missing anywhere word
+ text = "word0,(((word1,word2))),((word4,word5)))"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+ def test_incorrect_single_delimiter(self):
+ query_text = "word0,((word1)),word2"
+ specific_words = ["word0", "word1", "word2"]
+ anywhere_words = []
+
+ # Positive case 1: Exact match
+ text = "word0,((word1)),word2"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+ # Positive case 2: Additional parentheses around the entire sequence
+ text = "(word0,((word1)),word2)"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+ # Single closing parenthesis missing between word1 and word2
+ text = "word0,((word1),word2)"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+ # Single opening parenthesis missing between word0 and word1
+ text = "word0,(word1)),word2"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+ def test_mismatched_parentheses(self):
+ query_text = "word0,((word1)),(word2,word3)"
+ specific_words = ["word0", "word1", "word2", "word3"]
+ anywhere_words = []
+
+ # Positive case 1: Exact match
+ text = "word0,((word1)),(word2,word3)"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+ # Positive case 2: Reordered sequence with the same delimiters
+ text = "(word2,word3),word0,((word1))"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+ # Positive case 3: Additional text in between but the delimiters remain the same
+ text = "word0,someExtraText,((word1)),someMoreText,(word2,word3)"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+ # Extra closing parenthesis between word2 and word3
+ text = "word0,((word1),(word2,word3))"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+ # Extra opening parenthesis between word1 and word2
+ text = "word0,((word1),((word2,word3)"
+ self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+ def test_wildcard_matching_verify_delimiters(self):
+ query_text = "word0, ((word1.*?)), word2.*?"
+ delimiter_map = construct_delimiter_map(query_text, ["word0", "word1.*?", "word2.*?"])
+
+ # Positive test cases
+ text = "((word1)), word0, word2X"
+ self.assertTrue(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map))
+
+ text = "word0, ((word1Y)), word2Z"
+ self.assertTrue(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map))
+
+ # Negative test cases
+ text = "word0, (word1), word2"
+ self.assertFalse(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map))
+
+class TestFindMatching(unittest.TestCase):
+ def base_find_matching(self, series, search_string, expected):
+ mask = find_matching(series, search_string)
+ self.assertTrue(all(mask == expected), f"Expected {expected}, got {mask}")
+
+ def test_basic_matching(self):
+ series = pd.Series([
+ "(word1), word0, ((word2))",
+ "word0, ((word1)), word2",
+ "(word1), word0, (word2)"
+ ])
+ search_string = "word0, ((word1)), word2"
+ expected = pd.Series([False, True, False])
+ self.base_find_matching(series, search_string, expected)
+
+ def test_anywhere_words(self):
+ series = pd.Series([
+ "(word1), word0, ((word2))",
+ "word0, ((word1)), word2",
+ "word0, (word3), ((word1)), word2"
+ ])
+ search_string = "@word3, word0, ((word1)), word2"
+ expected = pd.Series([False, False, True])
+ self.base_find_matching(series, search_string, expected)
+
+ def test_mismatched_parentheses(self):
+ series = pd.Series([
+ "(word1), word0, ((word2))",
+ "word0, ((word1)), word2",
+ "word0, (word1)), word2",
+ "word0, ((word1), word2"
+ ])
+ search_string = "word0, ((word1)), word2"
+ expected = pd.Series([False, True, False, False])
+ self.base_find_matching(series, search_string, expected)
+
+ def test_wildcard_matching(self):
+ series = pd.Series([
+ "word2, word0, ((word1X))",
+ "word0, ((word1Y)), word2Z",
+ "word0, (word1), word2"
+ ])
+ search_string = "word0, ((word1*)), word2*"
+ expected = pd.Series([True, True, False])
+ self.base_find_matching(series, search_string, expected)
diff --git a/tests/models/test_expression_parser.py b/tests/models/test_expression_parser.py
index cca54411..5bdb71b7 100644
--- a/tests/models/test_expression_parser.py
+++ b/tests/models/test_expression_parser.py
@@ -118,7 +118,7 @@ def test_finding_tags2(self):
"Agent, (Event)": True,
"(Item), (Event)": True
}
- self.base_test("(Item or Agent) and [[Action or Event]]", test_strings)
+ self.base_test("(Item or Agent) and {Action or Event}", test_strings)
def test_exact_group(self):
test_strings = {
@@ -131,7 +131,7 @@ def test_exact_group(self):
"(A, B, (C, D))": True,
"(A, B, C)": True
}
- self.base_test("[[a, b]]", test_strings)
+ self.base_test("{a, b}", test_strings)
def test_exact_group_simple_complex(self):
test_strings = {
@@ -145,7 +145,7 @@ def test_exact_group_simple_complex(self):
"(E, F, (A, B, (C, D)))": True,
"(A, B, (E, F, (C, D)))": False, # TODO: Should this be True? [[c]] isn't directly inside an a group.
}
- self.base_test("[[a, [[c]] ]]", test_strings)
+ self.base_test("{a, {c} }", test_strings)
def test_exact_group_complex(self):
test_strings = {
@@ -155,7 +155,7 @@ def test_exact_group_complex(self):
"(A, B, ((C, D)))": False,
"(E, F, (A, B, (C, D)))": True,
}
- self.base_test("[[a, b, [[c, d]] ]]", test_strings)
+ self.base_test("{a, b, {c, d} }", test_strings)
def test_duplicate_search(self):
test_strings = {
@@ -183,7 +183,7 @@ def test_exact_group_complex_split(self):
"(E, F, (A, B, (C, D)))": False,
"((A, B), (C, D))": True,
}
- self.base_test("[[ [[a, b]], [[c, d]] ]]", test_strings)
+ self.base_test("{ {a, b}, {c, d} }", test_strings)
def test_mixed_group_split(self):
test_strings = {
@@ -192,7 +192,7 @@ def test_mixed_group_split(self):
"((Event), ((Clear-throat)))": True,
"((Event, Clear-throat))": False,
}
- self.base_test("[[ [Event], [Action] ]]", test_strings)
+ self.base_test("{ [Event], [Action] }", test_strings)
def test_exact_group_split(self):
test_strings = {
@@ -201,7 +201,7 @@ def test_exact_group_split(self):
"((Event), ((Clear-throat)))": False,
"((Event, Clear-throat))": False,
}
- self.base_test("[[ [[Event]], [[Action]] ]]", test_strings)
+ self.base_test("{ {Event}, {Action} }", test_strings)
def test_exact_group_split_or(self):
test_strings = {
@@ -210,17 +210,18 @@ def test_exact_group_split_or(self):
"((A), ((D)))": True,
"((A, D))": True,
}
- self.base_test("[[ [[a]] or [[d]] ]]", test_strings)
+ self.base_test("{ {a} or {d} }", test_strings)
def test_exact_group_split_or_negation(self):
test_strings = {
- "(Event, Clear-throat)": False,
+ # "(Event, Clear-throat)": False,
"((Event), (Clear-throat))": True,
"((Event))": False,
"((Event), ((Clear-throat)))": True,
"((Event, Clear-throat))": False,
}
- self.base_test("[[ [[~Event]] ]]", test_strings)
+ # Need to think this through more. How do you exact match a negative tag?
+ self.base_test("{ {~Event} }", test_strings)
def test_exact_group_split_or_negation_dual(self):
test_strings = {
@@ -233,7 +234,7 @@ def test_exact_group_split_or_negation_dual(self):
"((A), (B, C))": False,
"((A), ((B), C))": True,
}
- self.base_test("[[ [[~a and ~b]] ]]", test_strings)
+ self.base_test("{ {~a and ~b} }", test_strings)
def test_exact_group_split_or_negation_dual2(self):
test_strings = {
@@ -246,7 +247,7 @@ def test_exact_group_split_or_negation_dual2(self):
"((A), (B, C))": False,
"((A), ((B), C))": True,
}
- self.base_test("[[ [[~(a or b)]] ]]", test_strings)
+ self.base_test("{ {~(a or b)} }", test_strings)
def test_exact_group_split_or_negation_complex(self):
test_strings = {
@@ -260,7 +261,7 @@ def test_exact_group_split_or_negation_complex(self):
"((A), (B, C)), (D)": False,
"((A), (B, C)), (H)": False,
}
- self.base_test("[[ [[~(a or b)]] ]] and [[D or ~F]]", test_strings)
+ self.base_test("{ {~(a or b)} } and {D or ~F}", test_strings)
# TODO: Should this work, and what should it mean?
# Right now this is always true, since there is at least one group without ", (a)" in every string.
@@ -272,7 +273,7 @@ def test_exact_group_negation(self):
"((A), ((D)))": True,
"((A, D))": True,
}
- self.base_test("[[ ~[[a]] ]]", test_strings)
+ self.base_test("{ ~{a} }", test_strings)
def test_exact_group_negation2(self):
test_strings = {
@@ -282,9 +283,42 @@ def test_exact_group_negation2(self):
"((A), ((D, B)))": True,
"((A, D))": False,
"(B, (D))": True,
- "(B)": True
+ "(B)": True,
+ "((A), B)": False
}
- self.base_test("[[ ~[[a]], b]]", test_strings)
+ self.base_test("{ ~{a}, b}", test_strings)
+
+ def test_exact_group_negation3(self):
+ test_strings = {
+ "(A, D, B)": False,
+ "((A), (D), B)": True,
+ "((A))": False,
+ "((A), ((D, B)))": True,
+ "((A, D))": False,
+ "(B, (D))": True,
+ "(B)": True,
+ "((A), B)": True
+ }
+ self.base_test("{ ~a and b}", test_strings)
+
+ def test_exact_group_negation4(self):
+ test_strings = {
+ "(A, D, B)": False,
+ "((A), (D), B)": False,
+ "((A))": False,
+ "((A), ((D, B)))": False,
+ "((A, D))": False,
+ "(B)": True,
+ "(B, (D))": True,
+ "((A), B)": False
+ }
+ self.base_test("{ @c and @a and b: ???}", test_strings)
+
+ def test_exact_group_negation5(self):
+ test_string = "{ ~a and b:}"
+ with self.assertRaises(ValueError) as context:
+ QueryParser(test_string)
+ self.assertTrue(context.exception.args[0])
def test_mixed_group_complex_split(self):
test_strings = {
@@ -297,7 +331,7 @@ def test_mixed_group_complex_split(self):
"((A, B), (C, D))": True,
"((A, B, C, D))": False,
}
- self.base_test("[[ [a, b], [c, d] ]]", test_strings)
+ self.base_test("{ [a, b], [c, d] }", test_strings)
def test_exact_group_complex2(self):
test_strings = {
@@ -309,7 +343,7 @@ def test_exact_group_complex2(self):
"(B, (C)), (A, B, (C))": True,
"(A, B, (A, (C)))": False
}
- self.base_test("[[a, b, [[c]] ]]", test_strings)
+ self.base_test("{a, b, {c} }", test_strings)
def test_containing_group_complex2(self):
test_strings = {
@@ -362,13 +396,13 @@ def test_mixed_groups(self):
test_strings = {
"(A, B), (C, D, (E, F))": True
}
- self.base_test("[[a]], [[ [[e, f]] ]]", test_strings)
+ self.base_test("{a}, { {e, f} }", test_strings)
test_strings = {
"(A, B), (C, D, (E, F))": False
}
# This example works because it finds the group containing (c, d, (e, f)), rather than the ef group
- self.base_test("[[a]], [e, [[f]] ]", test_strings)
+ self.base_test("{a}, [e, {f} ]", test_strings)
def test_and(self):
test_strings = {
@@ -411,18 +445,17 @@ def test_and_wildcard_nothing_else(self):
"A": False,
"B": False,
"C": False,
- "A, B": True,
+ "A, B": False,
"A, C": False,
"B, C": False,
"A, B, C": False,
"D, A, B": False,
"A, B, (C)": False,
"(A, B), C": True,
- "(A, B, C)": False,
+ "(A, B, C)": True,
}
self.base_test("{a and b}", test_strings)
- def test_and_wildcard_nothing_else2(self):
test_strings = {
"A": False,
"B": False,
@@ -436,8 +469,7 @@ def test_and_wildcard_nothing_else2(self):
"(A, B), C": True,
"(A, B, C)": False,
}
- self.base_test("[{a and b}]", test_strings)
- self.base_test("[[{a and b}]]", test_strings)
+ self.base_test("{a and b:}", test_strings)
def test_and_logical_wildcard(self):
test_strings = {
@@ -450,9 +482,11 @@ def test_and_logical_wildcard(self):
self.base_test("A, B and ?", test_strings)
test_strings = {
- "A": False,
+ "A": True,
"A, C": True,
"A, B, C": True,
+ "B, C": False,
+ "B, C, D, E": True
}
self.base_test("(a or (b and c) and ?)", test_strings)
@@ -469,7 +503,7 @@ def test_double_wildcard(self):
def test_or_wildcard(self):
test_strings = {
- "A": False,
+ "A": True,
"B": False,
"C": False,
"A, B": True,
@@ -589,10 +623,10 @@ def test_and_or(self):
self.base_test("a and (b or c)", test_strings)
test_strings = {
- "A": False,
+ "A": True,
"B": False,
"C": False,
- "A, B": False,
+ "A, B": True,
"A, C": True,
"B, C": True
}
@@ -698,35 +732,43 @@ def test_not_in_line3(self):
def test_optional_exact_group(self):
test_strings = {
- "A, C": True,
+ "(A, C)": True,
}
self.base_test("{a and (b or c)}", test_strings)
test_strings = {
- "A, B, C, D": True,
+ "(A, B, C, D)": True,
}
self.base_test("{a and b: c and d}", test_strings)
test_strings = {
- "A, B, C": True,
- "A, B, C, D": False,
+ "(A, B, C)": True,
+ "(A, B, C, D)": False,
}
self.base_test("{a and b: c or d}", test_strings)
test_strings = {
- "A, C": True,
- "A, D": True,
- "A, B, C": False,
- "A, B, C, D": False,
+ "(A, C)": True,
+ "(A, D)": True,
+ "(A, B, C)": False,
+ "(A, B, C, D)": False,
}
self.base_test("{a or b: c or d}", test_strings)
test_strings = {
"(Onset, (Def-expand/taco))": True,
+ "(Onset, Def-expand/taco)": False,
+ "(Onset, Def/taco, (Def-expand/taco))": True, # this one validates
+ "(Onset, (Def/taco))": False,
"(Onset, (Def-expand/taco, (Label/DefContents)))": True,
"(Onset, (Def-expand/taco), (Label/OnsetContents))": True,
"(Onset, (Def-expand/taco), (Label/OnsetContents, Description/MoreContents))": True,
"Onset, (Def-expand/taco), (Label/OnsetContents)": False,
"(Onset, (Def-expand/taco), Label/OnsetContents)": False,
}
- self.base_test("[[{(Onset or Offset), (Def or [[Def-expand]]): ???}]]", test_strings)
\ No newline at end of file
+ self.base_test("{(Onset or Offset), (Def or {Def-expand}): ???}", test_strings)
+ test_strings = {
+ "(A, B)": True,
+ "(A, B, C)": True
+ }
+ self.base_test("{a or b}", test_strings)
\ No newline at end of file
diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py
index f3591ead..75f66d17 100644
--- a/tests/schema/test_hed_schema_io.py
+++ b/tests/schema/test_hed_schema_io.py
@@ -170,6 +170,8 @@ def _base_merging_test(self, files):
reload1 = load_schema(path1)
reload2 = load_schema(path2)
self.assertEqual(reload1, reload2)
+ except Exception:
+ self.assertTrue(False)
finally:
os.remove(path1)
os.remove(path2)
@@ -183,6 +185,8 @@ def _base_merging_test(self, files):
reload1 = load_schema(path1)
reload2 = load_schema(path2)
self.assertEqual(reload1, reload2)
+ except Exception:
+ self.assertTrue(False)
finally:
os.remove(path1)
os.remove(path2)
@@ -241,10 +245,10 @@ def _base_added_class_tests(self, schema):
unit_class_entry = schema.unit_classes["weightUnits"]
unit_entry = unit_class_entry.units["testUnit"]
- self.assertEqual(unit_entry.attributes["conversionFactor"], str(100))
+ self.assertEqual(unit_entry.attributes[HedKey.ConversionFactor], str(100))
unit_modifier_entry = schema.unit_modifiers["huge"]
- self.assertEqual(unit_modifier_entry.attributes["conversionFactor"], "10^100")
+ self.assertEqual(unit_modifier_entry.attributes[HedKey.ConversionFactor], "10^100")
self.assertTrue(unit_modifier_entry.attributes["customElementAttribute"])
value_class_entry = schema.value_classes["customValueClass"]
@@ -324,9 +328,9 @@ def test_cannot_load_schemas(self):
]
for file in files:
- with self.assertRaises(HedFileError):
- # print(file)
+ with self.assertRaises(HedFileError) as context:
load_schema(file)
+ self.assertEqual(context.exception.code, HedExceptions.SCHEMA_LIBRARY_INVALID)
def test_saving_in_library_wiki(self):
old_score_schema = load_schema_version("score_1.0.0")
diff --git a/tests/schema/test_schema_wiki_fatal_errors.py b/tests/schema/test_schema_wiki_fatal_errors.py
index 583579b1..0759dba4 100644
--- a/tests/schema/test_schema_wiki_fatal_errors.py
+++ b/tests/schema/test_schema_wiki_fatal_errors.py
@@ -1,7 +1,7 @@
import unittest
import os
-from hed import schema
+from hed import load_schema
from hed.errors import HedFileError, HedExceptions
@@ -12,25 +12,25 @@ class TestHedSchema(unittest.TestCase):
def setUpClass(cls):
cls.full_base_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.base_schema_dir)
cls.files_and_errors = {
- "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_START_MISSING,
- "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_END_INVALID,
- "HED_hed_no_end.mediawiki": HedExceptions.HED_END_INVALID,
- "HED_separator_invalid.mediawiki": HedExceptions.INVALID_SECTION_SEPARATOR,
+ "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+ "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+ "HED_hed_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+ "HED_separator_invalid.mediawiki": HedExceptions.WIKI_SEPARATOR_INVALID,
"HED_header_missing.mediawiki": HedExceptions.SCHEMA_HEADER_MISSING,
- "HED_header_invalid.mediawiki": HedExceptions.HED_SCHEMA_HEADER_INVALID,
- "empty_file.mediawiki": HedExceptions.HED_SCHEMA_HEADER_INVALID,
- "HED_header_invalid_version.mediawiki": HedExceptions.HED_SCHEMA_VERSION_INVALID,
- "HED_header_missing_version.mediawiki": HedExceptions.HED_SCHEMA_VERSION_INVALID,
+ "HED_header_invalid.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID,
+ "empty_file.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID,
+ "HED_header_invalid_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID,
+ "HED_header_missing_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID,
"HED_header_bad_library.mediawiki": HedExceptions.BAD_HED_LIBRARY_NAME,
- "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_START_MISSING,
- "empty_node.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line2.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line3.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line4.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line5.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line6.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
- "malformed_line7.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
+ "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+ "empty_node.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line2.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line3.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line4.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line5.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line6.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+ "malformed_line7.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
"empty_node.xml": HedExceptions.HED_SCHEMA_NODE_NAME_INVALID
}
@@ -60,9 +60,10 @@ def test_invalid_schema(self):
for filename, error in self.files_and_errors.items():
full_filename = self.full_base_folder + filename
with self.assertRaises(HedFileError) as context:
- schema.load_schema(full_filename)
+ load_schema(full_filename)
# all of these should produce exceptions.
- from hed.errors import ErrorHandler, ErrorContext, SchemaErrors, get_printable_issue_string
+ from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string
+
# Verify basic properties of exception
expected_line_numbers = self.expected_line_numbers.get(filename, [])
if expected_line_numbers:
@@ -82,9 +83,10 @@ def test_merging_errors_schema(self):
for filename, error in self.files_and_errors.items():
full_filename = self.full_base_folder + filename
with self.assertRaises(HedFileError) as context:
- schema.load_schema(full_filename)
+ load_schema(full_filename)
# all of these should produce exceptions.
- from hed.errors import ErrorHandler, ErrorContext, SchemaErrors, get_printable_issue_string
+ from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string
+ from hed.errors.error_types import SchemaAttributeErrors
# Verify basic properties of exception
expected_line_numbers = self.expected_line_numbers.get(filename, [])
if expected_line_numbers:
@@ -96,7 +98,7 @@ def test_merging_errors_schema(self):
error_handler.push_error_context(ErrorContext.ROW, 1)
error_handler.push_error_context(ErrorContext.COLUMN, 2)
- issues = error_handler.format_error_with_context(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID,
+ issues = error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID,
"error_attribute", source_tag="error_tag")
error_handler.pop_error_context()
error_handler.pop_error_context()
@@ -106,3 +108,9 @@ def test_merging_errors_schema(self):
self.assertTrue(context.exception.args[0] == error)
self.assertTrue(context.exception.filename == full_filename)
+
+ def test_attribute_invalid(self):
+ path = os.path.join(self.full_base_folder, "attribute_unknown1.mediawiki")
+ schema = load_schema(path)
+ issues = schema.check_compliance()
+ self.assertEqual(len(issues), 7)
\ No newline at end of file
diff --git a/tests/schema/util_create_schemas.py b/tests/schema/util_create_schemas.py
index 850d014e..415b94dc 100644
--- a/tests/schema/util_create_schemas.py
+++ b/tests/schema/util_create_schemas.py
@@ -10,13 +10,30 @@
"""
library_schema_end = """
-!# end schema
+
!# end hed
"""
-def _get_test_schema(node_lines):
- library_schema_string = library_schema_start + "\n".join(node_lines) + library_schema_end
+default_end_lines = """
+!# end schema
+"""
+
+required_non_tag = [
+"'''Unit classes'''",
+"'''Unit modifiers'''",
+"'''Value classes'''",
+"'''Schema attributes'''",
+"'''Properties'''",
+"'''Epilogue'''"
+]
+def _get_test_schema(node_lines, other_lines=(default_end_lines,)):
+ node_section = "\n".join(node_lines)
+ non_tag_section = "\n".join(other_lines)
+ for name in required_non_tag:
+ if name not in other_lines:
+ non_tag_section += f"\n{name}\n"
+ library_schema_string = library_schema_start + node_section + non_tag_section + library_schema_end
test_schema = from_string(library_schema_string, ".mediawiki")
return test_schema