diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index 7305e7c6..a866ec32 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -107,7 +107,7 @@ class SidecarErrors: class SchemaErrors: SCHEMA_DUPLICATE_NODE = 'SCHEMA_DUPLICATE_NODE' - SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID' + SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID" @@ -119,19 +119,22 @@ class SchemaWarnings: SCHEMA_CHARACTER_INVALID = "SCHEMA_CHARACTER_INVALID" SCHEMA_INVALID_CAPITALIZATION = 'invalidCaps' SCHEMA_NON_PLACEHOLDER_HAS_CLASS = 'SCHEMA_NON_PLACEHOLDER_HAS_CLASS' - SCHEMA_INVALID_ATTRIBUTE = "SCHEMA_INVALID_ATTRIBUTE" class SchemaAttributeErrors: + SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID' + SCHEMA_ATTRIBUTE_VALUE_INVALID = 'SCHEMA_ATTRIBUTE_VALUE_INVALID' SCHEMA_DEPRECATED_INVALID = "SCHEMA_DEPRECATED_INVALID" SCHEMA_SUGGESTED_TAG_INVALID = "SCHEMA_SUGGESTED_TAG_INVALID" - SCHEMA_RELATED_TAG_INVALID = "SCHEMA_RELATED_TAG_INVALID" SCHEMA_UNIT_CLASS_INVALID = "SCHEMA_UNIT_CLASS_INVALID" SCHEMA_VALUE_CLASS_INVALID = "SCHEMA_VALUE_CLASS_INVALID" + SCHEMA_ALLOWED_CHARACTERS_INVALID = "SCHEMA_ALLOWED_CHARACTERS_INVALID" + SCHEMA_IN_LIBRARY_INVALID = "SCHEMA_IN_LIBRARY_INVALID" SCHEMA_DEFAULT_UNITS_INVALID = "SCHEMA_DEFAULT_UNITS_INVALID" - SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED" # Reported as SCHEMA_DEPRECATED_INVALID + SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED" + SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE = "SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE" class DefinitionErrors: diff --git a/hed/errors/exceptions.py b/hed/errors/exceptions.py index e7ee857b..e368ec43 100644 --- a/hed/errors/exceptions.py +++ b/hed/errors/exceptions.py @@ -14,8 +14,9 @@ class HedExceptions: INVALID_DATAFRAME = 'INVALID_DATAFRAME' INVALID_FILE_FORMAT = 'INVALID_FILE_FORMAT' # These are actual schema issues, not that the file cannot be found or parsed - SCHEMA_HEADER_MISSING = 'HED_SCHEMA_HEADER_INVALID' - HED_SCHEMA_HEADER_INVALID = 'HED_SCHEMA_HEADER_INVALID' + SCHEMA_HEADER_MISSING = 'SCHEMA_HEADER_INVALID' + SCHEMA_HEADER_INVALID = 'SCHEMA_HEADER_INVALID' + SCHEMA_UNKNOWN_HEADER_ATTRIBUTE = "SCHEMA_HEADER_INVALID" SCHEMA_LIBRARY_INVALID = "SCHEMA_LIBRARY_INVALID" BAD_HED_LIBRARY_NAME = 'SCHEMA_LIBRARY_INVALID' @@ -26,14 +27,14 @@ class HedExceptions: ROOTED_TAG_DOES_NOT_EXIST = "SCHEMA_LIBRARY_INVALID" IN_LIBRARY_IN_UNMERGED = "SCHEMA_LIBRARY_INVALID" - HED_SCHEMA_VERSION_INVALID = 'HED_SCHEMA_VERSION_INVALID' - SCHEMA_START_MISSING = 'HED_WIKI_SEPARATOR_INVALID' - SCHEMA_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID' - HED_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID' - INVALID_SECTION_SEPARATOR = 'invalidSectionSeparator' + SCHEMA_VERSION_INVALID = 'SCHEMA_VERSION_INVALID' + SCHEMA_SECTION_MISSING = 'SCHEMA_SECTION_MISSING' + + WIKI_SEPARATOR_INVALID = 'invalidSectionSeparator' # This issue will contain a list of lines with issues. - HED_WIKI_DELIMITERS_INVALID = 'HED_WIKI_DELIMITERS_INVALID' + WIKI_DELIMITERS_INVALID = 'WIKI_DELIMITERS_INVALID' + WIKI_LINE_START_INVALID = 'WIKI_LINE_START_INVALID' HED_SCHEMA_NODE_NAME_INVALID = 'HED_SCHEMA_NODE_NAME_INVALID' SCHEMA_DUPLICATE_PREFIX = 'schemaDuplicatePrefix' diff --git a/hed/errors/known_error_codes.py b/hed/errors/known_error_codes.py index b72e8470..b8962682 100644 --- a/hed/errors/known_error_codes.py +++ b/hed/errors/known_error_codes.py @@ -31,6 +31,7 @@ ], "schema_validation_errors": [ "SCHEMA_ATTRIBUTE_INVALID", + "SCHEMA_ATTRIBUTE_VALUE_INVALID", "SCHEMA_CHARACTER_INVALID", "SCHEMA_DUPLICATE_NODE", "SCHEMA_HEADER_INVALID", diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py index b7fda9d5..8c196f9e 100644 --- a/hed/errors/schema_error_messages.py +++ b/hed/errors/schema_error_messages.py @@ -16,7 +16,7 @@ def schema_error_hed_duplicate_from_library(tag, duplicate_tag_list, section): f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}" -@hed_error(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID) def schema_error_unknown_attribute(attribute_name, source_tag): return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \ f"or was used outside of it's defined class." @@ -40,45 +40,58 @@ def schema_warning_SCHEMA_INVALID_CAPITALIZATION(tag_name, problem_char, char_in f"Found character '{problem_char}' in tag '{tag_name}' at position {char_index}." -@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING) +@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING, + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_warning_non_placeholder_class(tag_name, invalid_attribute_name): return "Only placeholder nodes('#') can have a unit class, value class, or takes value." + \ f"Found {invalid_attribute_name} on {tag_name}" -@hed_error(SchemaWarnings.SCHEMA_INVALID_ATTRIBUTE, default_severity=ErrorSeverity.ERROR) -def schema_error_SCHEMA_INVALID_ATTRIBUTE(tag_name, invalid_attribute_name): - return f"'{invalid_attribute_name}' should not be present in a loaded schema, found on '{tag_name}'." \ - f"Something went very wrong." - -@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_DEPRECATED_INVALID(tag_name, invalid_deprecated_version): return f"'{tag_name}' has invalid or unknown value in attribute deprecatedFrom: '{invalid_deprecated_version}'." @hed_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED, - actual_code=SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID) + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_CHILD_OF_DEPRECATED(deprecated_tag, non_deprecated_child): return f"Deprecated tag '{deprecated_tag}' has a child that is not deprecated: '{non_deprecated_child}'." -@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_SUGGESTED_TAG_INVALID(suggestedTag, invalidSuggestedTag, attribute_name): return f"Tag '{suggestedTag}' has an invalid {attribute_name}: '{invalidSuggestedTag}'." -@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_UNIT_CLASS_INVALID(tag, unit_class, attribute_name): return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'." -@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_VALUE_CLASS_INVALID(tag, unit_class, attribute_name): return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'." -@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_DEFAULT_UNITS_INVALID(tag, bad_unit, valid_units): valid_units = ",".join(valid_units) return f"Tag '{tag}' has an invalid defaultUnit '{bad_unit}'. Valid units are: '{valid_units}'." + + +@hed_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +def schema_error_SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE(tag, conversion_factor): + return f"Tag '{tag}' has an invalid conversionFactor '{conversion_factor}'. Conversion factor must be positive." + + +@hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +def schema_error_SCHEMA_ALLOWED_CHARACTERS_INVALID(tag, invalid_character): + return (f"Tag '{tag}' has an invalid allowedCharacter: '{invalid_character}'. " + f"Allowed characters are: a single character, " + f"or one of the following - letters, blank, digits, alphanumeric.") + + +@hed_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +def schema_error_SCHEMA_IN_LIBRARY_INVALID(tag, bad_library): + return (f"Tag '{tag}' has an invalid inLibrary: '{bad_library}'. ") diff --git a/hed/models/base_input.py b/hed/models/base_input.py index 12e2d889..9f437102 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -137,7 +137,7 @@ def _indexed_dict_from_onsets(onsets): @staticmethod def _filter_by_index_list(original_series, indexed_dict): - new_series = ["n/a"] * len(original_series) # Initialize new_series with "n/a" + new_series = pd.Series(["n/a"] * len(original_series)) for onset, indices in indexed_dict.items(): if indices: diff --git a/hed/models/basic_search.py b/hed/models/basic_search.py new file mode 100644 index 00000000..ae47b71e --- /dev/null +++ b/hed/models/basic_search.py @@ -0,0 +1,237 @@ +import re +from itertools import combinations, product +from collections import defaultdict +import pandas as pd + + +def find_matching(series, search_string, regex=False): + """ Finds lines in the series that match the search string and returns a mask. + + Syntax Rules: + - '@': Prefixing a term in the search string means the object must appear anywhere within a line. + - Parentheses: Elements within parentheses must appear in the line with the same level of nesting. + eg: Search string: "(A), (B)" will match "(A), (B, C)", but not "(A, B)", since they don't + start in the same group. + - "LongFormTag*": A * will match any remaining word(anything but a comma or parenthesis) + - An individual term can be arbitrary regex, but it is limited to single continuous words. + + Notes: + - The format of the series should match the format of the search string, whether it's in short or long form. + - To enable support for matching parent tags, ensure that both the series and search string are in long form. + + Args: + series (pd.Series): A Pandas Series object containing the lines to be searched. + search_string (str): The string to search for in each line of the series. + regex (bool): By default, translate any * wildcard characters to .*? regex + If True, do no translation and pass the words as is. Due to how it's setup, you must not include + the following characters: (), + + Returns: + mask (pd.Series): A Boolean mask Series of the same length as the input series. + The mask has `True` for lines that match the search string and `False` otherwise. + """ + if not regex: + # Replace *'s with a reasonable value for people who don't know regex + search_string = re.sub(r'(?= 3.9 - # negated_groups = [search_result(group, []) for group in hed_group.get_all_groups() if group not in groups] + # negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups() if group not in groups] # Python 3.7/8 compatible version. - negated_groups = [search_result(group, []) for group in hed_group.get_all_groups() + negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups() if not any(group is found_group.group for found_group in found_groups)] return negated_groups -class ExpressionContainingGroup(Expression): - def handle_expr(self, hed_group, exact=False): - result = self.right.handle_expr(hed_group, exact=True) - found_groups = result - if result: - found_parent_groups = [] - for group in found_groups: - if not group.group.is_group: - continue - if group.group._parent: - found_parent_groups.append(search_result(group.group._parent, group.group)) - - if found_parent_groups: - return found_parent_groups - - return [] - - class ExpressionDescendantGroup(Expression): def handle_expr(self, hed_group, exact=False): found_groups = self.right.handle_expr(hed_group) - found_parent_groups = [] - if found_groups: - for group in found_groups: - if not group.group.is_group: - continue - if group.group._parent: - found_parent_groups.append(search_result(group.group._parent, group.group)) - - if found_parent_groups: - return found_parent_groups - return [] + found_parent_groups = self._get_parent_groups(found_groups) + return found_parent_groups class ExpressionExactMatch(Expression): + def __init__(self, token, left=None, right=None): + super().__init__(token, left, right) + self.optional = "any" + + def _filter_exact_matches(self, search_results): + filtered_list = [] + for group in search_results: + if len(group.group.children) == len(group.tags): + filtered_list.append(group) + + return filtered_list + def handle_expr(self, hed_group, exact=False): found_groups = self.right.handle_expr(hed_group, exact=True) - if found_groups: - return_list = [] - for group in found_groups: - if len(group.group.children) == len(group.tags): - return_list.append(group) + if self.optional == "any": + return self._get_parent_groups(found_groups) - if return_list: - return return_list + filtered_list = self._filter_exact_matches(found_groups) + if filtered_list: + return self._get_parent_groups(filtered_list) # Basically if we don't have an exact match above, do the more complex matching including optional if self.left: optional_groups = self.left.handle_expr(hed_group, exact=True) found_groups = ExpressionAnd.merge_groups(found_groups, optional_groups) - if found_groups: - return_list = [] - for group in found_groups: - if len(group.group.children) == len(group.tags): - return_list.append(group) - - if return_list: - return return_list + filtered_list = self._filter_exact_matches(found_groups) + if filtered_list: + return self._get_parent_groups(filtered_list) return [] @@ -337,7 +322,6 @@ class QueryParser: def __init__(self, expression_string): """Compiles a QueryParser for a particular expression, so it can be used to search hed strings. - Basic Input Examples: 'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event @@ -354,11 +338,15 @@ def __init__(self, expression_string): '[Event and Action]' - Find a group that contains both Event and Action(at any level) - '[[Event and Action]]' - Find a group with Event And Action at the same level. + '{Event and Action}' - Find a group with Event And Action at the same level. + + '{Event and Action:}' - Find a group with Event And Action at the same level, and nothing else + + '{Event and Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag. Practical Complex Example: - [[{(Onset or Offset), (Def or [[Def-expand]]): ???}]] - A group with an onset tag, + {(Onset or Offset), (Def or {Def-expand}): ???} - A group with an onset tag, a def tag or def-expand group, and an optional wildcard group Parameters: @@ -392,15 +380,22 @@ def current_token(self): def _handle_and_op(self): expr = self._handle_negation() - next_token = self._next_token_is([Token.And, Token.Or]) + next_token = self._next_token_is([Token.And]) while next_token: right = self._handle_negation() if next_token.kind == Token.And: expr = ExpressionAnd(next_token, expr, right) - elif next_token.kind == Token.Or: - expr = ExpressionOr(next_token, expr, right) - next_token = self._next_token_is([Token.And, Token.Or]) + next_token = self._next_token_is([Token.And]) + return expr + def _handle_or_op(self): + expr = self._handle_and_op() # Note: calling _handle_and_op here + next_token = self._next_token_is([Token.Or]) + while next_token: + right = self._handle_and_op() # Note: calling _handle_and_op here + if next_token.kind == Token.Or: + expr = ExpressionOr(next_token, expr, right) + next_token = self._next_token_is([Token.Or]) return expr def _handle_negation(self): @@ -417,33 +412,35 @@ def _handle_negation(self): def _handle_grouping_op(self): next_token = self._next_token_is( - [Token.ContainingGroup, Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch]) - if next_token == Token.ContainingGroup: - interior = self._handle_and_op() - expr = ExpressionContainingGroup(next_token, right=interior) - next_token = self._next_token_is([Token.ContainingGroupEnd]) - if next_token != Token.ContainingGroupEnd: - raise ValueError("Parse error: Missing closing square brackets") - # Can we move this to the and_or level? or does that break everything...? - elif next_token == Token.LogicalGroup: - expr = self._handle_and_op() + [Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch]) + if next_token == Token.LogicalGroup: + expr = self._handle_or_op() next_token = self._next_token_is([Token.LogicalGroupEnd]) if next_token != Token.LogicalGroupEnd: raise ValueError("Parse error: Missing closing paren") elif next_token == Token.DescendantGroup: - interior = self._handle_and_op() + interior = self._handle_or_op() expr = ExpressionDescendantGroup(next_token, right=interior) next_token = self._next_token_is([Token.DescendantGroupEnd]) if next_token != Token.DescendantGroupEnd: raise ValueError("Parse error: Missing closing square bracket") elif next_token == Token.ExactMatch: - interior = self._handle_and_op() + interior = self._handle_or_op() expr = ExpressionExactMatch(next_token, right=interior) next_token = self._next_token_is([Token.ExactMatchEnd, Token.ExactMatchOptional]) if next_token == Token.ExactMatchOptional: - optional_portion = self._handle_and_op() - expr.left = optional_portion + # We have an optional portion - this needs to now be an exact match + expr.optional = "none" next_token = self._next_token_is([Token.ExactMatchEnd]) + if next_token != Token.ExactMatchEnd: + optional_portion = self._handle_or_op() + expr.left = optional_portion + next_token = self._next_token_is([Token.ExactMatchEnd]) + if "~" in str(expr): + raise ValueError("Cannot use negation in exact matching groups," + " as it's not clear what is being matched.\n" + "{thing and ~(expression)} is allowed.") + if next_token is None: raise ValueError("Parse error: Missing closing curly bracket") else: @@ -452,13 +449,15 @@ def _handle_grouping_op(self): expr = ExpressionWildcardNew(next_token) elif next_token: expr = Expression(next_token) + else: + expr = None return expr def _parse(self, expression_string): self.tokens = self._tokenize(expression_string) - expr = self._handle_and_op() + expr = self._handle_or_op() if self.at_token + 1 != len(self.tokens): raise ValueError("Parse error in search string") diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py index 4cb3729c..0857abe9 100644 --- a/hed/schema/hed_schema.py +++ b/hed/schema/hed_schema.py @@ -731,7 +731,9 @@ def _add_tag_to_dict(self, long_tag_name, new_entry, key_class): # Add the InLibrary attribute to any library schemas as they are loaded # These are later removed when they are saved out, if saving unmerged if self.library and (not self.with_standard or (not self.merged and self.with_standard)): - new_entry._set_attribute_value(HedKey.InLibrary, self.library) + # only add it if not already present - This is a rare case + if not new_entry.has_attribute(HedKey.InLibrary): + new_entry._set_attribute_value(HedKey.InLibrary, self.library) section = self._sections[key_class] return section._add_to_dict(long_tag_name, new_entry) diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py index 0cecc4ab..60a1a934 100644 --- a/hed/schema/hed_schema_constants.py +++ b/hed/schema/hed_schema_constants.py @@ -42,6 +42,7 @@ class HedKey: SuggestedTag = "suggestedTag" Rooted = "rooted" DeprecatedFrom = "deprecatedFrom" + ConversionFactor = "conversionFactor" # All known properties BoolProperty = 'boolProperty' diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py index 102795d8..936943e8 100644 --- a/hed/schema/hed_schema_entry.py +++ b/hed/schema/hed_schema_entry.py @@ -176,7 +176,6 @@ def __eq__(self, other): return False return True - class UnitEntry(HedSchemaEntry): """ A single unit entry with modifiers in the HedSchema. """ def __init__(self, *args, **kwargs): @@ -207,12 +206,13 @@ def finalize_entry(self, schema): self.derivative_units = derivative_units def _get_conversion_factor(self, modifier_entry): - - base_factor = float(self.attributes.get("conversionFactor", "1.0").replace("^", "e")) - if modifier_entry: - modifier_factor = float(modifier_entry.attributes.get("conversionFactor", "1.0").replace("^", "e")) - else: - modifier_factor = 1.0 + base_factor = modifier_factor = 1.0 + try: + base_factor = float(self.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e")) + if modifier_entry: + modifier_factor = float(modifier_entry.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e")) + except (ValueError, AttributeError) as e: + pass # Just default to 1.0 return base_factor * modifier_factor def get_conversion_factor(self, unit_name): @@ -224,7 +224,7 @@ def get_conversion_factor(self, unit_name): Returns: conversion_factor(float or None): Returns the conversion factor or None """ - if "conversionFactor" in self.attributes: + if HedKey.ConversionFactor in self.attributes: return float(self.derivative_units.get(unit_name)) class HedTagEntry(HedSchemaEntry): diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index d1d7f5ec..0ccb9c33 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -150,4 +150,66 @@ def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name): issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED, tag_entry.name, child.name) - return issues \ No newline at end of file + return issues + + +def conversion_factor(hed_schema, tag_entry, attribute_name): + issues = [] + conversion_factor = tag_entry.attributes.get(attribute_name, "1.0") + try: + conversion_factor = float(conversion_factor.replace("^", "e")) + except (ValueError, AttributeError) as e: + pass + if not isinstance(conversion_factor, float) or conversion_factor <= 0.0: + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, + tag_entry.name, + conversion_factor) + + return issues + + +def allowed_characters_check(hed_schema, tag_entry, attribute_name): + """ Check allowed character has a valid value + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this attribute. + attribute_name (str): The name of this attribute + + Returns: + list: A list of issues. Each issue is a dictionary. + + """ + issues = [] + allowed_strings = {'letters', 'blank', 'digits', 'alphanumeric'} + + char_string = tag_entry.attributes.get(attribute_name, "") + characters = char_string.split(",") + for character in characters: + if character not in allowed_strings and len(character) != 1: + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, + tag_entry.name, + character) + return issues + + +def in_library_check(hed_schema, tag_entry, attribute_name): + """ Check allowed character has a valid value + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this attribute. + attribute_name (str): The name of this attribute + + Returns: + list: A list of issues. Each issue is a dictionary. + + """ + issues = [] + + library = tag_entry.attributes.get(attribute_name, "") + if hed_schema.library != library: + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, + tag_entry.name, + library) + return issues diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py index c75c11de..1a68baf8 100644 --- a/hed/schema/schema_compliance.py +++ b/hed/schema/schema_compliance.py @@ -45,27 +45,20 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl class SchemaValidator: """Validator class to wrap some code. In general, just call check_compliance.""" attribute_validators = { - HedKey.SuggestedTag: [(schema_attribute_validators.tag_exists_check, - SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID)], - HedKey.RelatedTag: [(schema_attribute_validators.tag_exists_check, - SchemaAttributeErrors.SCHEMA_RELATED_TAG_INVALID)], - HedKey.UnitClass: [(schema_attribute_validators.tag_is_placeholder_check, - SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS), - (schema_attribute_validators.unit_class_exists, - SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID)], - HedKey.ValueClass: [(schema_attribute_validators.tag_is_placeholder_check, - SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS), - (schema_attribute_validators.value_class_exists, - SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID)], + HedKey.SuggestedTag: [schema_attribute_validators.tag_exists_check], + HedKey.RelatedTag: [schema_attribute_validators.tag_exists_check], + HedKey.UnitClass: [schema_attribute_validators.tag_is_placeholder_check, + schema_attribute_validators.unit_class_exists], + HedKey.ValueClass: [schema_attribute_validators.tag_is_placeholder_check, + schema_attribute_validators.value_class_exists], # Rooted tag is implicitly verified on loading - # HedKey.Rooted: [(schema_attribute_validators.tag_exists_base_schema_check, - # SchemaAttributeErrors.SCHEMA_ROOTED_TAG_INVALID)], - HedKey.DeprecatedFrom: [(schema_attribute_validators.tag_is_deprecated_check, - SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)], - HedKey.TakesValue: [(schema_attribute_validators.tag_is_placeholder_check, - SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS)], - HedKey.DefaultUnits: [(schema_attribute_validators.unit_exists, - SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID)] + # HedKey.Rooted: [schema_attribute_validators.tag_exists_base_schema_check], + HedKey.DeprecatedFrom: [schema_attribute_validators.tag_is_deprecated_check], + HedKey.TakesValue: [schema_attribute_validators.tag_is_placeholder_check], + HedKey.DefaultUnits: [schema_attribute_validators.unit_exists], + HedKey.ConversionFactor: [schema_attribute_validators.conversion_factor], + HedKey.AllowedCharacter: [schema_attribute_validators.allowed_characters_check], + HedKey.InLibrary: [schema_attribute_validators.in_library_check] } def __init__(self, hed_schema, check_for_warnings=True, error_handler=None): @@ -80,7 +73,7 @@ def check_unknown_attributes(self): if unknown_attributes: for attribute_name, source_tags in unknown_attributes.items(): for tag in source_tags: - issues_list += self.error_handler.format_error_with_context(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID, + issues_list += self.error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID, attribute_name, source_tag=tag) return issues_list @@ -93,16 +86,14 @@ def check_attributes(self): for tag_entry in self.hed_schema[section_key].values(): self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, tag_entry.name) for attribute_name in tag_entry.attributes: - validators = self.attribute_validators.get(attribute_name, None) - if validators: - for validator, error_code in validators: - self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name) - new_issues = validator(self.hed_schema, tag_entry, attribute_name) - for issue in new_issues: - issue['code'] = error_code - issue['severity'] = ErrorSeverity.WARNING - self.error_handler.add_context_and_filter(new_issues) - issues_list += new_issues + validators = self.attribute_validators.get(attribute_name, []) + for validator in validators: + self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name) + new_issues = validator(self.hed_schema, tag_entry, attribute_name) + for issue in new_issues: + issue['severity'] = ErrorSeverity.WARNING + self.error_handler.add_context_and_filter(new_issues) + issues_list += new_issues self.error_handler.pop_error_context() self.error_handler.pop_error_context() self.error_handler.pop_error_context() diff --git a/hed/schema/schema_io/schema2base.py b/hed/schema/schema_io/schema2base.py index e373cf1a..d9d082a1 100644 --- a/hed/schema/schema_io/schema2base.py +++ b/hed/schema/schema_io/schema2base.py @@ -106,9 +106,6 @@ def _output_tags(self, tags): self._end_tag_section() def _output_units(self, unit_classes): - if not unit_classes: - return - section_node = self._start_section(HedSectionKey.UnitClasses) for unit_class_entry in unit_classes.values(): @@ -128,8 +125,6 @@ def _output_units(self, unit_classes): self._write_entry(unit_entry, unit_class_node) def _output_section(self, hed_schema, key_class): - if not hed_schema[key_class]: - return parent_node = self._start_section(key_class) for entry in hed_schema[key_class].values(): if self._should_skip(entry): diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py index a02f9ed6..de18f9d6 100644 --- a/hed/schema/schema_io/wiki2schema.py +++ b/hed/schema/schema_io/wiki2schema.py @@ -22,12 +22,19 @@ no_wiki_end_tag = '' -ErrorsBySection = { - HedWikiSection.Schema: HedExceptions.SCHEMA_START_MISSING, - HedWikiSection.EndSchema: HedExceptions.SCHEMA_END_INVALID, - HedWikiSection.EndHed: HedExceptions.HED_END_INVALID -} -required_sections = [HedWikiSection.Schema, HedWikiSection.EndSchema, HedWikiSection.EndHed] + +required_sections = [ + HedWikiSection.Prologue, + HedWikiSection.Schema, + HedWikiSection.EndSchema, + HedWikiSection.UnitsClasses, + HedWikiSection.UnitModifiers, + HedWikiSection.ValueClasses, + HedWikiSection.Attributes, + HedWikiSection.Properties, + HedWikiSection.Epilogue, + HedWikiSection.EndHed, +] class SchemaLoaderWiki(SchemaLoader): @@ -79,15 +86,13 @@ def _parse_data(self): # Validate we didn't miss any required sections. for section in required_sections: if section not in wiki_lines_by_section: - error_code = HedExceptions.INVALID_SECTION_SEPARATOR - if section in ErrorsBySection: - error_code = ErrorsBySection[section] + error_code = HedExceptions.SCHEMA_SECTION_MISSING msg = f"Required section separator '{SectionNames[section]}' not found in file" raise HedFileError(error_code, msg, filename=self.filename) if self.fatal_errors: self.fatal_errors = error_reporter.sort_issues(self.fatal_errors) - raise HedFileError(HedExceptions.HED_WIKI_DELIMITERS_INVALID, + raise HedFileError(self.fatal_errors[0]['code'], f"{len(self.fatal_errors)} issues found when parsing schema. See the .issues " f"parameter on this exception for more details.", self.filename, issues=self.fatal_errors) @@ -109,7 +114,7 @@ def _read_header_section(self, lines): for line_number, line in lines: if line.strip(): msg = f"Extra content [{line}] between HED line and other sections" - raise HedFileError(HedExceptions.HED_SCHEMA_HEADER_INVALID, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename) def _read_text_block(self, lines): text = "" @@ -163,7 +168,8 @@ def _read_schema(self, lines): parent_tags = parent_tags[:level] elif level > len(parent_tags): self._add_fatal_error(line_number, line, - "Line has too many *'s at the front. You cannot skip a level.") + "Line has too many *'s at the front. You cannot skip a level." + , HedExceptions.WIKI_LINE_START_INVALID) continue # Create the entry tag_entry = self._add_tag_line(parent_tags, line_number, line) @@ -261,14 +267,37 @@ def _get_header_attributes_internal(self, version_line): if "=" not in version_line: return self._get_header_attributes_internal_old(version_line) - final_attributes = {} + attributes, malformed = self._parse_attributes_line(version_line) + + for m in malformed: + # todo: May shift this at some point to report all errors + raise HedFileError(code=HedExceptions.SCHEMA_HEADER_INVALID, + message=f"Header line has a malformed attribute {m}", + filename=self.filename) + return attributes + + @staticmethod + def _parse_attributes_line(version_line): + matches = {} + unmatched = [] + last_end = 0 for match in attr_re.finditer(version_line): - attr_name = match.group(1) - attr_value = match.group(2) - final_attributes[attr_name] = attr_value + start, end = match.span() - return final_attributes + # If there's unmatched content between the last match and the current one + if start > last_end: + unmatched.append(version_line[last_end:start]) + + matches[match.group(1)] = match.group(2) + last_end = end + + # If there's unmatched content after the last match + if last_end < len(version_line): + unmatched.append(version_line[last_end:]) + + unmatched = [m.strip() for m in unmatched if m.strip()] + return matches, unmatched def _get_header_attributes_internal_old(self, version_line): """ Extracts all valid attributes like version from the HED line in .mediawiki format. @@ -288,7 +317,7 @@ def _get_header_attributes_internal_old(self, version_line): divider_index = pair.find(':') if divider_index == -1: msg = f"Found poorly matched key:value pair in header: {pair}" - raise HedFileError(HedExceptions.HED_SCHEMA_HEADER_INVALID, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename) key, value = pair[:divider_index], pair[divider_index + 1:] key = key.strip() value = value.strip() @@ -369,10 +398,17 @@ def _get_tag_name(self, tag_line): return None, 0 @staticmethod - def _get_tag_attributes(tag_line, starting_index): + def _validate_attribute_string(attribute_string): + pattern = r'^[A-Za-z]+(=.+)?$' + match = re.fullmatch(pattern, attribute_string) + if match: + return match.group() + + def _get_tag_attributes(self, line_number, tag_line, starting_index): """ Get the tag attributes from a line. Parameters: + line_number (int): The line number to report errors as tag_line (str): A tag line. starting_index (int): The first index we can check for the brackets. @@ -386,11 +422,14 @@ def _get_tag_attributes(tag_line, starting_index): return None, starting_index if attr_string: attributes_split = [x.strip() for x in attr_string.split(',')] - # Filter out attributes with spaces. - attributes_split = [a for a in attributes_split if " " not in a] final_attributes = {} for attribute in attributes_split: + if self._validate_attribute_string(attribute) is None: + self._add_fatal_error(line_number, tag_line, + f"Malformed attribute found {attribute}. " + f"Valid formatting is: attribute, or attribute=\"value\".") + continue split_attribute = attribute.split("=") if len(split_attribute) == 1: final_attributes[split_attribute[0]] = True @@ -468,7 +507,7 @@ def _create_entry(self, line_number, tag_line, key_class, element_name=None): if element_name: node_name = element_name - node_attributes, index = self._get_tag_attributes(tag_line, index) + node_attributes, index = self._get_tag_attributes(line_number, tag_line, index) if node_attributes is None: self._add_fatal_error(line_number, tag_line, "Attributes has mismatched delimiters") return @@ -489,7 +528,7 @@ def _create_entry(self, line_number, tag_line, key_class, element_name=None): return tag_entry def _add_fatal_error(self, line_number, line, warning_message="Schema term is empty or the line is malformed", - error_code=HedExceptions.HED_WIKI_DELIMITERS_INVALID): + error_code=HedExceptions.WIKI_DELIMITERS_INVALID): self.fatal_errors.append( {'code': error_code, ErrorContext.ROW: line_number, @@ -504,14 +543,12 @@ def _check_for_new_section(self, line, strings_for_section, current_section): if line.startswith(section_string): if key in strings_for_section: msg = f"Found section {SectionNames[key]} twice" - raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, + raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, msg, filename=self.filename) if current_section < key: new_section = key else: - error_code = HedExceptions.INVALID_SECTION_SEPARATOR - if key in ErrorsBySection: - error_code = ErrorsBySection[key] + error_code = HedExceptions.SCHEMA_SECTION_MISSING msg = f"Found section {SectionNames[key]} out of order in file" raise HedFileError(error_code, msg, filename=self.filename) break @@ -520,11 +557,11 @@ def _check_for_new_section(self, line, strings_for_section, current_section): def _handle_bad_section_sep(self, line, current_section): if current_section != HedWikiSection.Schema and line.startswith(wiki_constants.ROOT_TAG): msg = f"Invalid section separator '{line.strip()}'" - raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_SECTION_MISSING, msg, filename=self.filename) if line.startswith("!#"): msg = f"Invalid section separator '{line.strip()}'" - raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, msg, filename=self.filename) + raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, msg, filename=self.filename) def _split_lines_into_sections(self, wiki_lines): """ Takes a list of lines, and splits it into valid wiki sections. diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py index 8404970e..25b27ab8 100644 --- a/hed/schema/schema_validation_util.py +++ b/hed/schema/schema_validation_util.py @@ -4,6 +4,7 @@ from hed.errors import ErrorHandler, SchemaWarnings from hed.schema import hed_schema_constants as constants from hed.errors.exceptions import HedExceptions, HedFileError +from hed.schema.hed_schema_constants import valid_header_attributes ALLOWED_TAG_CHARS = "-" ALLOWED_DESC_CHARS = "-_:;,./()+ ^" @@ -45,9 +46,9 @@ def validate_version_string(version_string): header_attribute_validators = { - constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.HED_SCHEMA_VERSION_INVALID), - constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME) - } + constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.SCHEMA_VERSION_INVALID), + constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME) +} def validate_present_attributes(attrib_dict, filename): @@ -92,9 +93,12 @@ def validate_attributes(attrib_dict, filename): had_error = validator(attribute_value) if had_error: raise HedFileError(error_code, had_error, filename) + if attribute_name not in valid_header_attributes: + raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE, + f"Unknown attribute {attribute_name} found in header line", filename=filename) if constants.VERSION_ATTRIBUTE not in attrib_dict: - raise HedFileError(HedExceptions.HED_SCHEMA_VERSION_INVALID, + raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID, "No version attribute found in header", filename=filename) diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification index c47fff94..c1aad366 160000 --- a/spec_tests/hed-specification +++ b/spec_tests/hed-specification @@ -1 +1 @@ -Subproject commit c47fff949db70c9105c875bbdfdf0d11389ffd68 +Subproject commit c1aad366fee6c7f1e68fbd73d2ce6dc369444ad8 diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index 972d53d4..3e87fdbd 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -12,55 +12,11 @@ from hed.errors import ErrorHandler, get_printable_issue_string -# To be removed eventually once all errors are being verified. -known_errors = [ - 'SIDECAR_INVALID', - 'CHARACTER_INVALID', - 'COMMA_MISSING', - "DEF_EXPAND_INVALID", - "DEF_INVALID", - "DEFINITION_INVALID", - "NODE_NAME_EMPTY", - "ONSET_OFFSET_INSET_ERROR", - "PARENTHESES_MISMATCH", - "PLACEHOLDER_INVALID", - "REQUIRED_TAG_MISSING", - "SIDECAR_INVALID", - "SIDECAR_KEY_MISSING", - "STYLE_WARNING", - "TAG_EMPTY", - "TAG_EXPRESSION_REPEATED", - "TAG_EXTENDED", - "TAG_EXTENSION_INVALID", - "TAG_GROUP_ERROR", - "TAG_INVALID", - "TAG_NOT_UNIQUE", - "TAG_NAMESPACE_PREFIX_INVALID", - "TAG_REQUIRES_CHILD", - "TILDES_UNSUPPORTED", - "UNITS_INVALID", - "UNITS_MISSING", - "VALUE_INVALID", - - "SIDECAR_BRACES_INVALID", - "SCHEMA_LIBRARY_INVALID", - - "SCHEMA_ATTRIBUTE_INVALID", - "SCHEMA_UNIT_CLASS_INVALID", - "SCHEMA_VALUE_CLASS_INVALID", - "SCHEMA_DEPRECATED_INVALID", - "SCHEMA_SUGGESTED_TAG_INVALID", - "SCHEMA_RELATED_TAG_INVALID", - "SCHEMA_NON_PLACEHOLDER_HAS_CLASS", - "SCHEMA_DEFAULT_UNITS_INVALID" -] - skip_tests = { "VERSION_DEPRECATED": "Not applicable", "tag-extension-invalid-bad-node-name": "Part of character invalid checking/didn't get to it yet", } - class MyTestCase(unittest.TestCase): @classmethod def setUpClass(cls): @@ -80,9 +36,7 @@ def run_single_test(self, test_file): test_info = json.load(fp) for info in test_info: error_code = info['error_code'] - verify_code = False - if error_code in known_errors: - verify_code = True + verify_code = True # To be deprecated once we add this to all tests self._verify_code = verify_code if error_code in skip_tests: @@ -93,6 +47,8 @@ def run_single_test(self, test_file): print(f"Skipping {name} test because: {skip_tests[name]}") continue + # if name != "attribute-invalid-in-library": + # continue description = info['description'] schema = info['schema'] check_for_warnings = info.get("warning", False) diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki index a596775c..e2246335 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki @@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki index 672792aa..2b76a3a4 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki @@ -17,6 +17,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki index d5e6cf44..678a6249 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki @@ -13,6 +13,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki index 979f72bd..037c9bc7 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki @@ -13,6 +13,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki index 3438be07..f79d8361 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki @@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki index 267a214e..80454ef4 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki @@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki index ee20104a..d3368e37 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki @@ -33,6 +33,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind '''Unit classes''' +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' '''Epilogue''' diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki index 8b3a3a86..64144708 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki @@ -32,6 +32,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind '''Unit classes''' +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki index 7939dfd9..f8bccd4d 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki @@ -32,6 +32,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind '''Unit classes''' +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki index 4a084ebd..eb283125 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki @@ -33,6 +33,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind '''Unit classes''' +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki index f282aabb..289265f8 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki @@ -34,6 +34,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind * weightUnits {defaultUnits=testUnit} ** testUnit {conversionFactor=100} +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' '''Epilogue''' The Standardized Computer-based Organized Reporting of EEG (SCORE) is a standard terminology for scalp EEG data assessment designed for use in clinical practice that may also be used for research purposes. diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki index b7c4d5aa..ac67b8fe 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki @@ -34,6 +34,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind * weightUnitsNew {defaultUnits=g} ** g {conversionFactor=100} +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' '''Epilogue''' The Standardized Computer-based Organized Reporting of EEG (SCORE) is a standard terminology for scalp EEG data assessment designed for use in clinical practice that may also be used for research purposes. diff --git a/tests/data/schema_tests/merge_tests/sorted_root.mediawiki b/tests/data/schema_tests/merge_tests/sorted_root.mediawiki index d5e31f3b..6536476c 100644 --- a/tests/data/schema_tests/merge_tests/sorted_root.mediawiki +++ b/tests/data/schema_tests/merge_tests/sorted_root.mediawiki @@ -44,6 +44,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/wiki_tests/HED_default.mediawiki b/tests/data/schema_tests/wiki_tests/HED_default.mediawiki index 049260f1..4327c6a4 100644 --- a/tests/data/schema_tests/wiki_tests/HED_default.mediawiki +++ b/tests/data/schema_tests/wiki_tests/HED_default.mediawiki @@ -1,6 +1,6 @@ HED version:8.0.0-alpha.1 - +'''Prologue''' This is a prologue line. This is a second prologue line. @@ -1098,7 +1098,15 @@ This is a second prologue line. * z {SIUnitSymbolModifier} [SI unit submultiple representing 10^-21] * yocto {SIUnitModifier} [SI unit submultiple representing 10^-24] * y {SIUnitSymbolModifier} [SI unit submultiple representing 10^-24] -!# end hed +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + +'''Epilogue''' This is an epilogue. -This is a second line of an epilogue. \ No newline at end of file +This is a second line of an epilogue. + +!# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki b/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki new file mode 100644 index 00000000..d2c398e3 --- /dev/null +++ b/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki @@ -0,0 +1,41 @@ +HED version="8.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://github.com/hed-standard/hed-specification/raw/master/hedxml/HED8.0.0.xsd" + +'''Prologue''' +This schema tests AppendixB SCHEMA_ATTRIBUTE_INVALID + +!# start schema + +'''Tag1''' {suggestedTag=Tag1}[suggested tag is not registered in the schema] +* Tag2 {valueClassAttribute}[value attribute is the wrong tag class] +* Tag3 {unitAttribute}[unit attribute is the wrong tag class] + +!# end schema +'''Unit classes''' +* unitClass1 {unitAttribute}[Wrong attribute type] +** unit1 {tagAttribute}[Wrong attribute type] + +'''Unit modifiers''' +* mod1 {tagAttribute}[Wrong attribute type] + +'''Value classes''' +* valueClass1 {tagAttribute}[Wrong attribute type] + +'''Schema attributes''' +* tagAttribute +* unitAttribute {unitProperty} +* unitClassAttribute {unitClassProperty} +* unitModifierAttribute {unitModifierProperty} +* valueClassAttribute {valueClassProperty} +* attribute1 {valueClassProperty} + +'''Properties''' +* boolProperty +* unitClassProperty +* unitModifierProperty +* unitProperty +* valueClassProperty + +'''Epilogue''' +This is an updated version of the schema format. The properties are now part of the schema. The schema attributes are designed to be checked in software rather than hard-coded. The schema attributes, themselves have properties. + +!# end hed \ No newline at end of file diff --git a/tests/data/validator_tests/bids_schema.mediawiki b/tests/data/validator_tests/bids_schema.mediawiki index 971a9723..b306003b 100644 --- a/tests/data/validator_tests/bids_schema.mediawiki +++ b/tests/data/validator_tests/bids_schema.mediawiki @@ -1,5 +1,7 @@ HED version: 8.0.0-alpha.2 +'''Prologue''' + !# start schema '''Event''' @@ -1163,6 +1165,7 @@ HED version: 8.0.0-alpha.2 * yocto {SIUnitModifier} [SI unit submultiple representing 10^-24] * y {SIUnitSymbolModifier} [SI unit submultiple representing 10^-24] +'''Value classes''' '''Schema attributes''' * allowedCharacter {unitClassProperty}[An attribute of unit classes schema value placeholders indicating a special character that is allowed in expressing the value of that placeholder.] @@ -1184,6 +1187,8 @@ HED version: 8.0.0-alpha.2 * unitSymbol {boolProperty, unitProperty}[Abbreviation or symbol representing a type of unit. Unit symbols represent both the singular and the plural and thus cannot be pluralized.] * unitClass [Specifies the type of a unit for a tag.] +'''Properties''' + '''Epilogue''' This is the new format for the mediawiki schema diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py index f5b381eb..71e21386 100644 --- a/tests/models/test_base_input.py +++ b/tests/models/test_base_input.py @@ -304,25 +304,30 @@ def test_complex_onsets(self): {3.5: [0, 1], 4.0: [2], 4.4: [3, 4], -1.0: [5]}) def test_empty_and_single_item_series(self): - self.assertEqual(BaseInput._filter_by_index_list([], {}), []) - self.assertEqual(BaseInput._filter_by_index_list(["apple"], {0: [0]}), ["apple"]) + self.assertTrue(BaseInput._filter_by_index_list(pd.Series([]), {}).equals(pd.Series([]))) + self.assertTrue(BaseInput._filter_by_index_list(pd.Series(["apple"]), {0: [0]}).equals(pd.Series(["apple"]))) def test_two_item_series_with_same_onset(self): - self.assertEqual(BaseInput._filter_by_index_list(["apple", "orange"], {0: [0, 1]}), ["apple,orange", "n/a"]) + input_series = pd.Series(["apple", "orange"]) + expected_series = pd.Series(["apple,orange", "n/a"]) + self.assertTrue(BaseInput._filter_by_index_list(input_series, {0: [0, 1]}).equals(expected_series)) def test_multiple_item_series(self): - original = ["apple", "orange", "banana", "mango"] + input_series = pd.Series(["apple", "orange", "banana", "mango"]) indexed_dict = {0: [0, 1], 1: [2], 2: [3]} - self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), ["apple,orange", "n/a", "banana", "mango"]) + expected_series = pd.Series(["apple,orange", "n/a", "banana", "mango"]) + self.assertTrue(BaseInput._filter_by_index_list(input_series, indexed_dict).equals(expected_series)) def test_complex_scenarios(self): # Test with negative, zero and positive onsets - original = ["negative", "zero", "positive"] + original = pd.Series(["negative", "zero", "positive"]) indexed_dict = {-1: [0], 0: [1], 1: [2]} - self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), ["negative", "zero", "positive"]) + expected_series1 = pd.Series(["negative", "zero", "positive"]) + self.assertTrue(BaseInput._filter_by_index_list(original, indexed_dict).equals(expected_series1)) # Test with more complex indexed_dict - original = ["apple", "orange", "banana", "mango", "grape"] - indexed_dict = {0: [0, 1], 1: [2], 2: [3, 4]} - self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), - ["apple,orange", "n/a", "banana", "mango,grape", "n/a"]) + original2 = ["apple", "orange", "banana", "mango", "grape"] + indexed_dict2= {0: [0, 1], 1: [2], 2: [3, 4]} + expected_series2 = pd.Series(["apple,orange", "n/a", "banana", "mango,grape", "n/a"]) + self.assertTrue(BaseInput._filter_by_index_list(original2, indexed_dict2).equals(expected_series2)) + diff --git a/tests/models/test_basic_search.py b/tests/models/test_basic_search.py new file mode 100644 index 00000000..0a942b93 --- /dev/null +++ b/tests/models/test_basic_search.py @@ -0,0 +1,313 @@ +import unittest +import pandas as pd +from hed import load_schema_version + +import os +from hed import TabularInput +from hed.models import df_util, basic_search +from hed.models.basic_search import find_words, check_parentheses, reverse_and_flip_parentheses, \ + construct_delimiter_map, verify_search_delimiters, find_matching +import numpy as np + + +class TestNewSearch(unittest.TestCase): + @classmethod + def setUpClass(cls): + bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/bids_tests/eeg_ds003645s_hed')) + sidecar1_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json')) + cls.events_path = os.path.realpath( + os.path.join(bids_root_path, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')) + cls.base_input = TabularInput(cls.events_path, sidecar1_path) + cls.schema = load_schema_version() + cls.df = cls.base_input.series_filtered + + def test_find_matching_results(self): + result1 = basic_search.find_matching(self.df, "(Face, Item-interval/1)") + result2 = basic_search.find_matching(self.df, "(Face, Item-interval/1*)") + + # Add assertions + self.assertTrue(np.sum(result1) > 0, "result1 should have some true values") + self.assertTrue(np.sum(result2) > 0, "result2 should have some true values") + self.assertTrue(np.sum(result1) < np.sum(result2), "result1 should have fewer true values than result2") + + +class TestFindWords(unittest.TestCase): + def test_basic(self): + search_string = "@global (local1, local2)" + anywhere_words, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, ['global']) + self.assertEqual(specific_words, ['local1', 'local2']) + + def test_no_anywhere_words(self): + search_string = "(local1, local2)" + anywhere_words, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, []) + self.assertEqual(specific_words, ['local1', 'local2']) + + def test_no_specific_words(self): + search_string = "@global1, @global2" + anywhere_words, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, ['global1', 'global2']) + self.assertEqual(specific_words, []) + + def test_empty_string(self): + search_string = "" + anywhere_words, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, []) + self.assertEqual(specific_words, []) + + def test_mixed_words(self): + search_string = "@global (local1, local2), @another_global" + anywhere_words, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, ['global', 'another_global']) + self.assertEqual(specific_words, ['local1', 'local2']) + + def test_whitespace(self): + search_string = " @Global , ( local1 , local2 ) " + anywhere_words, specific_words = find_words(search_string) + self.assertEqual(anywhere_words, ['Global']) + self.assertEqual(specific_words, ['local1', 'local2']) + + +class TestCheckParentheses(unittest.TestCase): + def test_balanced_parentheses(self): + self.assertEqual(check_parentheses("(())"), "") + self.assertEqual(check_parentheses("(someText())"), "") + self.assertEqual(check_parentheses("((some)text())"), "") + self.assertEqual(check_parentheses("()"), "") + + def test_unbalanced_parentheses(self): + self.assertEqual(check_parentheses("(()"), "(") + self.assertEqual(check_parentheses("()someText("), "(") + self.assertEqual(check_parentheses("(text)text)"), ")") + self.assertEqual(check_parentheses("text)"), ")") + + def test_mixed_parentheses(self): + self.assertEqual(check_parentheses("(()(())"), "(") + self.assertEqual(check_parentheses("(someText))((someText)"), ")(") + self.assertEqual(check_parentheses("((someText))someText"), "") + self.assertEqual(check_parentheses("(someText(someText))someText"), "") + + def test_special_cases(self): + self.assertEqual(check_parentheses(""), "") + self.assertEqual(check_parentheses("abc"), "") + self.assertEqual(check_parentheses("((()))("), "(") + self.assertEqual(check_parentheses("text"), "") + + def test_reverse_and_flip_parentheses(self): + self.assertEqual(reverse_and_flip_parentheses("(abc)"), "(cba)") + self.assertEqual(reverse_and_flip_parentheses("Hello()"), "()olleH") + self.assertEqual(reverse_and_flip_parentheses(")("), ")(") + self.assertEqual(reverse_and_flip_parentheses("((()))"), "((()))") + self.assertEqual(reverse_and_flip_parentheses("()()()"), "()()()") + self.assertEqual(reverse_and_flip_parentheses("abc"), "cba") + self.assertEqual(reverse_and_flip_parentheses("123(abc)321"), "123(cba)321") + self.assertEqual(reverse_and_flip_parentheses("a(bc)d"), "d(cb)a") + + +class TestConstructDelimiterMap(unittest.TestCase): + def test_empty_text(self): + self.assertEqual(construct_delimiter_map("", ["word1", "word2"]), {}) + + def test_empty_words(self): + self.assertEqual(construct_delimiter_map("word1,word2", []), {}) + + def test_single_occurrence(self): + text = "word1,word2" + expected_result = { + ("word1", "word2"): "", + ("word2", "word1"): "" + } + self.assertEqual(construct_delimiter_map(text, ["word1", "word2"]), expected_result) + + def test_multiple_words(self): + text = "word0,((word1),word2)" + expected_result = { + ("word0", "word1"): "((", + ("word0", "word2"): "(", + ("word1", "word0"): "))", + ("word1", "word2"): ")", + ("word2", "word1"): "(", + ("word2", "word0"): ")" + } + self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result) + + text = "word0 , ( (word1 ), word2)" + self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result) + + +class TestVerifyDelimiters(unittest.TestCase): + def base_verify_func(self, query_text, text, anywhere_words, specific_words, expected_result): + delimiter_map = construct_delimiter_map(query_text, specific_words) + actual_result = verify_search_delimiters(text, anywhere_words, specific_words, delimiter_map) + self.assertEqual(actual_result, expected_result) + + def test_all_conditions_met(self): + query_text = "word0,((word1),word2)" + specific_words = ["word0", "word1", "word2"] + text = "word0,((word1),word2)" + self.base_verify_func(query_text, text, [], specific_words, True) + text = "((word1),word2), word0" + self.base_verify_func(query_text, text, [], specific_words, True) + text = "word0,(word2, (word1))" + self.base_verify_func(query_text, text, [], specific_words, True) + text = "word0,((word1),(ExtraGroup),word2)" + self.base_verify_func(query_text, text, [], specific_words, True) + text = "word0,((word2),word1)" + self.base_verify_func(query_text, text, [], specific_words, False) + text = "((word1),word0), word2" + self.base_verify_func(query_text, text, [], specific_words, False) + text = "word0,((word1))" + self.base_verify_func(query_text, text, [], specific_words, False) + text = "(word1),(ExtraGroup),word2)" + self.base_verify_func(query_text, text, [], specific_words, False) + + def test_complex_case_with_word_identifiers(self): + query_text = "word0,((word1),@word2,@word3,word4)" + specific_words = ["word0", "word1", "word4"] + anywhere_words = ["word2", "word3"] + text = "word0,((word1),word2,word3,word4)" + self.base_verify_func(query_text, text, anywhere_words, specific_words, True) + text = "word2,word0,((word1),word3,word4)" + self.base_verify_func(query_text, text, anywhere_words, specific_words, True) + text = "word3,((word1),word2,word4),word0" + self.base_verify_func(query_text, text, anywhere_words, specific_words, True) + text = "word0,((word1),word4),word2,word3" + self.base_verify_func(query_text, text, anywhere_words, specific_words, True) + text = "word0,word1,word4,word2" # Incorrect delimiters + self.base_verify_func(query_text, text, anywhere_words, specific_words, False) + text = "word2,word3" # Missing specific words + self.base_verify_func(query_text, text, anywhere_words, specific_words, False) + + def test_very_complex_case_with_word_identifiers(self): + query_text = "word0,(((word1,word2),@word3)),((word4,word5)))" + specific_words = ["word0", "word1", "word2", "word4", "word5"] + anywhere_words = ["word3"] + + # Test case where all conditions are met + text = "word0,(((word1,word2),word3)),((word4,word5)))" + self.base_verify_func(query_text, text, anywhere_words, specific_words, True) + + # Test case with anywhere words out of specific context but still in the string + text = "word3,word0,(((word1,word2))),((word4,word5)))" + self.base_verify_func(query_text, text, anywhere_words, specific_words, True) + + # Test case with correct specific words but incorrect delimiters + text = "word0,((word1,word2),word3),(word4,word5)" + self.base_verify_func(query_text, text, anywhere_words, specific_words, False) + + # Test case missing one specific word + text = "word0,(((word1,word2),word3)),(word4))" + self.base_verify_func(query_text, text, anywhere_words, specific_words, False) + + # Test case missing anywhere word + text = "word0,(((word1,word2))),((word4,word5)))" + self.base_verify_func(query_text, text, anywhere_words, specific_words, False) + + def test_incorrect_single_delimiter(self): + query_text = "word0,((word1)),word2" + specific_words = ["word0", "word1", "word2"] + anywhere_words = [] + + # Positive case 1: Exact match + text = "word0,((word1)),word2" + self.base_verify_func(query_text, text, anywhere_words, specific_words, True) + + # Positive case 2: Additional parentheses around the entire sequence + text = "(word0,((word1)),word2)" + self.base_verify_func(query_text, text, anywhere_words, specific_words, True) + + # Single closing parenthesis missing between word1 and word2 + text = "word0,((word1),word2)" + self.base_verify_func(query_text, text, anywhere_words, specific_words, False) + + # Single opening parenthesis missing between word0 and word1 + text = "word0,(word1)),word2" + self.base_verify_func(query_text, text, anywhere_words, specific_words, False) + + def test_mismatched_parentheses(self): + query_text = "word0,((word1)),(word2,word3)" + specific_words = ["word0", "word1", "word2", "word3"] + anywhere_words = [] + + # Positive case 1: Exact match + text = "word0,((word1)),(word2,word3)" + self.base_verify_func(query_text, text, anywhere_words, specific_words, True) + + # Positive case 2: Reordered sequence with the same delimiters + text = "(word2,word3),word0,((word1))" + self.base_verify_func(query_text, text, anywhere_words, specific_words, True) + + # Positive case 3: Additional text in between but the delimiters remain the same + text = "word0,someExtraText,((word1)),someMoreText,(word2,word3)" + self.base_verify_func(query_text, text, anywhere_words, specific_words, True) + + # Extra closing parenthesis between word2 and word3 + text = "word0,((word1),(word2,word3))" + self.base_verify_func(query_text, text, anywhere_words, specific_words, False) + + # Extra opening parenthesis between word1 and word2 + text = "word0,((word1),((word2,word3)" + self.base_verify_func(query_text, text, anywhere_words, specific_words, False) + + def test_wildcard_matching_verify_delimiters(self): + query_text = "word0, ((word1.*?)), word2.*?" + delimiter_map = construct_delimiter_map(query_text, ["word0", "word1.*?", "word2.*?"]) + + # Positive test cases + text = "((word1)), word0, word2X" + self.assertTrue(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map)) + + text = "word0, ((word1Y)), word2Z" + self.assertTrue(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map)) + + # Negative test cases + text = "word0, (word1), word2" + self.assertFalse(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map)) + +class TestFindMatching(unittest.TestCase): + def base_find_matching(self, series, search_string, expected): + mask = find_matching(series, search_string) + self.assertTrue(all(mask == expected), f"Expected {expected}, got {mask}") + + def test_basic_matching(self): + series = pd.Series([ + "(word1), word0, ((word2))", + "word0, ((word1)), word2", + "(word1), word0, (word2)" + ]) + search_string = "word0, ((word1)), word2" + expected = pd.Series([False, True, False]) + self.base_find_matching(series, search_string, expected) + + def test_anywhere_words(self): + series = pd.Series([ + "(word1), word0, ((word2))", + "word0, ((word1)), word2", + "word0, (word3), ((word1)), word2" + ]) + search_string = "@word3, word0, ((word1)), word2" + expected = pd.Series([False, False, True]) + self.base_find_matching(series, search_string, expected) + + def test_mismatched_parentheses(self): + series = pd.Series([ + "(word1), word0, ((word2))", + "word0, ((word1)), word2", + "word0, (word1)), word2", + "word0, ((word1), word2" + ]) + search_string = "word0, ((word1)), word2" + expected = pd.Series([False, True, False, False]) + self.base_find_matching(series, search_string, expected) + + def test_wildcard_matching(self): + series = pd.Series([ + "word2, word0, ((word1X))", + "word0, ((word1Y)), word2Z", + "word0, (word1), word2" + ]) + search_string = "word0, ((word1*)), word2*" + expected = pd.Series([True, True, False]) + self.base_find_matching(series, search_string, expected) diff --git a/tests/models/test_expression_parser.py b/tests/models/test_expression_parser.py index cca54411..5bdb71b7 100644 --- a/tests/models/test_expression_parser.py +++ b/tests/models/test_expression_parser.py @@ -118,7 +118,7 @@ def test_finding_tags2(self): "Agent, (Event)": True, "(Item), (Event)": True } - self.base_test("(Item or Agent) and [[Action or Event]]", test_strings) + self.base_test("(Item or Agent) and {Action or Event}", test_strings) def test_exact_group(self): test_strings = { @@ -131,7 +131,7 @@ def test_exact_group(self): "(A, B, (C, D))": True, "(A, B, C)": True } - self.base_test("[[a, b]]", test_strings) + self.base_test("{a, b}", test_strings) def test_exact_group_simple_complex(self): test_strings = { @@ -145,7 +145,7 @@ def test_exact_group_simple_complex(self): "(E, F, (A, B, (C, D)))": True, "(A, B, (E, F, (C, D)))": False, # TODO: Should this be True? [[c]] isn't directly inside an a group. } - self.base_test("[[a, [[c]] ]]", test_strings) + self.base_test("{a, {c} }", test_strings) def test_exact_group_complex(self): test_strings = { @@ -155,7 +155,7 @@ def test_exact_group_complex(self): "(A, B, ((C, D)))": False, "(E, F, (A, B, (C, D)))": True, } - self.base_test("[[a, b, [[c, d]] ]]", test_strings) + self.base_test("{a, b, {c, d} }", test_strings) def test_duplicate_search(self): test_strings = { @@ -183,7 +183,7 @@ def test_exact_group_complex_split(self): "(E, F, (A, B, (C, D)))": False, "((A, B), (C, D))": True, } - self.base_test("[[ [[a, b]], [[c, d]] ]]", test_strings) + self.base_test("{ {a, b}, {c, d} }", test_strings) def test_mixed_group_split(self): test_strings = { @@ -192,7 +192,7 @@ def test_mixed_group_split(self): "((Event), ((Clear-throat)))": True, "((Event, Clear-throat))": False, } - self.base_test("[[ [Event], [Action] ]]", test_strings) + self.base_test("{ [Event], [Action] }", test_strings) def test_exact_group_split(self): test_strings = { @@ -201,7 +201,7 @@ def test_exact_group_split(self): "((Event), ((Clear-throat)))": False, "((Event, Clear-throat))": False, } - self.base_test("[[ [[Event]], [[Action]] ]]", test_strings) + self.base_test("{ {Event}, {Action} }", test_strings) def test_exact_group_split_or(self): test_strings = { @@ -210,17 +210,18 @@ def test_exact_group_split_or(self): "((A), ((D)))": True, "((A, D))": True, } - self.base_test("[[ [[a]] or [[d]] ]]", test_strings) + self.base_test("{ {a} or {d} }", test_strings) def test_exact_group_split_or_negation(self): test_strings = { - "(Event, Clear-throat)": False, + # "(Event, Clear-throat)": False, "((Event), (Clear-throat))": True, "((Event))": False, "((Event), ((Clear-throat)))": True, "((Event, Clear-throat))": False, } - self.base_test("[[ [[~Event]] ]]", test_strings) + # Need to think this through more. How do you exact match a negative tag? + self.base_test("{ {~Event} }", test_strings) def test_exact_group_split_or_negation_dual(self): test_strings = { @@ -233,7 +234,7 @@ def test_exact_group_split_or_negation_dual(self): "((A), (B, C))": False, "((A), ((B), C))": True, } - self.base_test("[[ [[~a and ~b]] ]]", test_strings) + self.base_test("{ {~a and ~b} }", test_strings) def test_exact_group_split_or_negation_dual2(self): test_strings = { @@ -246,7 +247,7 @@ def test_exact_group_split_or_negation_dual2(self): "((A), (B, C))": False, "((A), ((B), C))": True, } - self.base_test("[[ [[~(a or b)]] ]]", test_strings) + self.base_test("{ {~(a or b)} }", test_strings) def test_exact_group_split_or_negation_complex(self): test_strings = { @@ -260,7 +261,7 @@ def test_exact_group_split_or_negation_complex(self): "((A), (B, C)), (D)": False, "((A), (B, C)), (H)": False, } - self.base_test("[[ [[~(a or b)]] ]] and [[D or ~F]]", test_strings) + self.base_test("{ {~(a or b)} } and {D or ~F}", test_strings) # TODO: Should this work, and what should it mean? # Right now this is always true, since there is at least one group without ", (a)" in every string. @@ -272,7 +273,7 @@ def test_exact_group_negation(self): "((A), ((D)))": True, "((A, D))": True, } - self.base_test("[[ ~[[a]] ]]", test_strings) + self.base_test("{ ~{a} }", test_strings) def test_exact_group_negation2(self): test_strings = { @@ -282,9 +283,42 @@ def test_exact_group_negation2(self): "((A), ((D, B)))": True, "((A, D))": False, "(B, (D))": True, - "(B)": True + "(B)": True, + "((A), B)": False } - self.base_test("[[ ~[[a]], b]]", test_strings) + self.base_test("{ ~{a}, b}", test_strings) + + def test_exact_group_negation3(self): + test_strings = { + "(A, D, B)": False, + "((A), (D), B)": True, + "((A))": False, + "((A), ((D, B)))": True, + "((A, D))": False, + "(B, (D))": True, + "(B)": True, + "((A), B)": True + } + self.base_test("{ ~a and b}", test_strings) + + def test_exact_group_negation4(self): + test_strings = { + "(A, D, B)": False, + "((A), (D), B)": False, + "((A))": False, + "((A), ((D, B)))": False, + "((A, D))": False, + "(B)": True, + "(B, (D))": True, + "((A), B)": False + } + self.base_test("{ @c and @a and b: ???}", test_strings) + + def test_exact_group_negation5(self): + test_string = "{ ~a and b:}" + with self.assertRaises(ValueError) as context: + QueryParser(test_string) + self.assertTrue(context.exception.args[0]) def test_mixed_group_complex_split(self): test_strings = { @@ -297,7 +331,7 @@ def test_mixed_group_complex_split(self): "((A, B), (C, D))": True, "((A, B, C, D))": False, } - self.base_test("[[ [a, b], [c, d] ]]", test_strings) + self.base_test("{ [a, b], [c, d] }", test_strings) def test_exact_group_complex2(self): test_strings = { @@ -309,7 +343,7 @@ def test_exact_group_complex2(self): "(B, (C)), (A, B, (C))": True, "(A, B, (A, (C)))": False } - self.base_test("[[a, b, [[c]] ]]", test_strings) + self.base_test("{a, b, {c} }", test_strings) def test_containing_group_complex2(self): test_strings = { @@ -362,13 +396,13 @@ def test_mixed_groups(self): test_strings = { "(A, B), (C, D, (E, F))": True } - self.base_test("[[a]], [[ [[e, f]] ]]", test_strings) + self.base_test("{a}, { {e, f} }", test_strings) test_strings = { "(A, B), (C, D, (E, F))": False } # This example works because it finds the group containing (c, d, (e, f)), rather than the ef group - self.base_test("[[a]], [e, [[f]] ]", test_strings) + self.base_test("{a}, [e, {f} ]", test_strings) def test_and(self): test_strings = { @@ -411,18 +445,17 @@ def test_and_wildcard_nothing_else(self): "A": False, "B": False, "C": False, - "A, B": True, + "A, B": False, "A, C": False, "B, C": False, "A, B, C": False, "D, A, B": False, "A, B, (C)": False, "(A, B), C": True, - "(A, B, C)": False, + "(A, B, C)": True, } self.base_test("{a and b}", test_strings) - def test_and_wildcard_nothing_else2(self): test_strings = { "A": False, "B": False, @@ -436,8 +469,7 @@ def test_and_wildcard_nothing_else2(self): "(A, B), C": True, "(A, B, C)": False, } - self.base_test("[{a and b}]", test_strings) - self.base_test("[[{a and b}]]", test_strings) + self.base_test("{a and b:}", test_strings) def test_and_logical_wildcard(self): test_strings = { @@ -450,9 +482,11 @@ def test_and_logical_wildcard(self): self.base_test("A, B and ?", test_strings) test_strings = { - "A": False, + "A": True, "A, C": True, "A, B, C": True, + "B, C": False, + "B, C, D, E": True } self.base_test("(a or (b and c) and ?)", test_strings) @@ -469,7 +503,7 @@ def test_double_wildcard(self): def test_or_wildcard(self): test_strings = { - "A": False, + "A": True, "B": False, "C": False, "A, B": True, @@ -589,10 +623,10 @@ def test_and_or(self): self.base_test("a and (b or c)", test_strings) test_strings = { - "A": False, + "A": True, "B": False, "C": False, - "A, B": False, + "A, B": True, "A, C": True, "B, C": True } @@ -698,35 +732,43 @@ def test_not_in_line3(self): def test_optional_exact_group(self): test_strings = { - "A, C": True, + "(A, C)": True, } self.base_test("{a and (b or c)}", test_strings) test_strings = { - "A, B, C, D": True, + "(A, B, C, D)": True, } self.base_test("{a and b: c and d}", test_strings) test_strings = { - "A, B, C": True, - "A, B, C, D": False, + "(A, B, C)": True, + "(A, B, C, D)": False, } self.base_test("{a and b: c or d}", test_strings) test_strings = { - "A, C": True, - "A, D": True, - "A, B, C": False, - "A, B, C, D": False, + "(A, C)": True, + "(A, D)": True, + "(A, B, C)": False, + "(A, B, C, D)": False, } self.base_test("{a or b: c or d}", test_strings) test_strings = { "(Onset, (Def-expand/taco))": True, + "(Onset, Def-expand/taco)": False, + "(Onset, Def/taco, (Def-expand/taco))": True, # this one validates + "(Onset, (Def/taco))": False, "(Onset, (Def-expand/taco, (Label/DefContents)))": True, "(Onset, (Def-expand/taco), (Label/OnsetContents))": True, "(Onset, (Def-expand/taco), (Label/OnsetContents, Description/MoreContents))": True, "Onset, (Def-expand/taco), (Label/OnsetContents)": False, "(Onset, (Def-expand/taco), Label/OnsetContents)": False, } - self.base_test("[[{(Onset or Offset), (Def or [[Def-expand]]): ???}]]", test_strings) \ No newline at end of file + self.base_test("{(Onset or Offset), (Def or {Def-expand}): ???}", test_strings) + test_strings = { + "(A, B)": True, + "(A, B, C)": True + } + self.base_test("{a or b}", test_strings) \ No newline at end of file diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py index f3591ead..75f66d17 100644 --- a/tests/schema/test_hed_schema_io.py +++ b/tests/schema/test_hed_schema_io.py @@ -170,6 +170,8 @@ def _base_merging_test(self, files): reload1 = load_schema(path1) reload2 = load_schema(path2) self.assertEqual(reload1, reload2) + except Exception: + self.assertTrue(False) finally: os.remove(path1) os.remove(path2) @@ -183,6 +185,8 @@ def _base_merging_test(self, files): reload1 = load_schema(path1) reload2 = load_schema(path2) self.assertEqual(reload1, reload2) + except Exception: + self.assertTrue(False) finally: os.remove(path1) os.remove(path2) @@ -241,10 +245,10 @@ def _base_added_class_tests(self, schema): unit_class_entry = schema.unit_classes["weightUnits"] unit_entry = unit_class_entry.units["testUnit"] - self.assertEqual(unit_entry.attributes["conversionFactor"], str(100)) + self.assertEqual(unit_entry.attributes[HedKey.ConversionFactor], str(100)) unit_modifier_entry = schema.unit_modifiers["huge"] - self.assertEqual(unit_modifier_entry.attributes["conversionFactor"], "10^100") + self.assertEqual(unit_modifier_entry.attributes[HedKey.ConversionFactor], "10^100") self.assertTrue(unit_modifier_entry.attributes["customElementAttribute"]) value_class_entry = schema.value_classes["customValueClass"] @@ -324,9 +328,9 @@ def test_cannot_load_schemas(self): ] for file in files: - with self.assertRaises(HedFileError): - # print(file) + with self.assertRaises(HedFileError) as context: load_schema(file) + self.assertEqual(context.exception.code, HedExceptions.SCHEMA_LIBRARY_INVALID) def test_saving_in_library_wiki(self): old_score_schema = load_schema_version("score_1.0.0") diff --git a/tests/schema/test_schema_wiki_fatal_errors.py b/tests/schema/test_schema_wiki_fatal_errors.py index 583579b1..0759dba4 100644 --- a/tests/schema/test_schema_wiki_fatal_errors.py +++ b/tests/schema/test_schema_wiki_fatal_errors.py @@ -1,7 +1,7 @@ import unittest import os -from hed import schema +from hed import load_schema from hed.errors import HedFileError, HedExceptions @@ -12,25 +12,25 @@ class TestHedSchema(unittest.TestCase): def setUpClass(cls): cls.full_base_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.base_schema_dir) cls.files_and_errors = { - "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_START_MISSING, - "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_END_INVALID, - "HED_hed_no_end.mediawiki": HedExceptions.HED_END_INVALID, - "HED_separator_invalid.mediawiki": HedExceptions.INVALID_SECTION_SEPARATOR, + "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "HED_hed_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "HED_separator_invalid.mediawiki": HedExceptions.WIKI_SEPARATOR_INVALID, "HED_header_missing.mediawiki": HedExceptions.SCHEMA_HEADER_MISSING, - "HED_header_invalid.mediawiki": HedExceptions.HED_SCHEMA_HEADER_INVALID, - "empty_file.mediawiki": HedExceptions.HED_SCHEMA_HEADER_INVALID, - "HED_header_invalid_version.mediawiki": HedExceptions.HED_SCHEMA_VERSION_INVALID, - "HED_header_missing_version.mediawiki": HedExceptions.HED_SCHEMA_VERSION_INVALID, + "HED_header_invalid.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID, + "empty_file.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID, + "HED_header_invalid_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID, + "HED_header_missing_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID, "HED_header_bad_library.mediawiki": HedExceptions.BAD_HED_LIBRARY_NAME, - "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_START_MISSING, - "empty_node.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line2.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line3.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line4.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line5.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line6.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line7.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, + "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "empty_node.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line2.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line3.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line4.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line5.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line6.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line7.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, "empty_node.xml": HedExceptions.HED_SCHEMA_NODE_NAME_INVALID } @@ -60,9 +60,10 @@ def test_invalid_schema(self): for filename, error in self.files_and_errors.items(): full_filename = self.full_base_folder + filename with self.assertRaises(HedFileError) as context: - schema.load_schema(full_filename) + load_schema(full_filename) # all of these should produce exceptions. - from hed.errors import ErrorHandler, ErrorContext, SchemaErrors, get_printable_issue_string + from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string + # Verify basic properties of exception expected_line_numbers = self.expected_line_numbers.get(filename, []) if expected_line_numbers: @@ -82,9 +83,10 @@ def test_merging_errors_schema(self): for filename, error in self.files_and_errors.items(): full_filename = self.full_base_folder + filename with self.assertRaises(HedFileError) as context: - schema.load_schema(full_filename) + load_schema(full_filename) # all of these should produce exceptions. - from hed.errors import ErrorHandler, ErrorContext, SchemaErrors, get_printable_issue_string + from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string + from hed.errors.error_types import SchemaAttributeErrors # Verify basic properties of exception expected_line_numbers = self.expected_line_numbers.get(filename, []) if expected_line_numbers: @@ -96,7 +98,7 @@ def test_merging_errors_schema(self): error_handler.push_error_context(ErrorContext.ROW, 1) error_handler.push_error_context(ErrorContext.COLUMN, 2) - issues = error_handler.format_error_with_context(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID, + issues = error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID, "error_attribute", source_tag="error_tag") error_handler.pop_error_context() error_handler.pop_error_context() @@ -106,3 +108,9 @@ def test_merging_errors_schema(self): self.assertTrue(context.exception.args[0] == error) self.assertTrue(context.exception.filename == full_filename) + + def test_attribute_invalid(self): + path = os.path.join(self.full_base_folder, "attribute_unknown1.mediawiki") + schema = load_schema(path) + issues = schema.check_compliance() + self.assertEqual(len(issues), 7) \ No newline at end of file diff --git a/tests/schema/util_create_schemas.py b/tests/schema/util_create_schemas.py index 850d014e..415b94dc 100644 --- a/tests/schema/util_create_schemas.py +++ b/tests/schema/util_create_schemas.py @@ -10,13 +10,30 @@ """ library_schema_end = """ -!# end schema + !# end hed """ -def _get_test_schema(node_lines): - library_schema_string = library_schema_start + "\n".join(node_lines) + library_schema_end +default_end_lines = """ +!# end schema +""" + +required_non_tag = [ +"'''Unit classes'''", +"'''Unit modifiers'''", +"'''Value classes'''", +"'''Schema attributes'''", +"'''Properties'''", +"'''Epilogue'''" +] +def _get_test_schema(node_lines, other_lines=(default_end_lines,)): + node_section = "\n".join(node_lines) + non_tag_section = "\n".join(other_lines) + for name in required_non_tag: + if name not in other_lines: + non_tag_section += f"\n{name}\n" + library_schema_string = library_schema_start + node_section + non_tag_section + library_schema_end test_schema = from_string(library_schema_string, ".mediawiki") return test_schema