diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index 7305e7c6..a866ec32 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -107,7 +107,7 @@ class SidecarErrors: class SchemaErrors: SCHEMA_DUPLICATE_NODE = 'SCHEMA_DUPLICATE_NODE' - SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID' + SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID" @@ -119,19 +119,22 @@ class SchemaWarnings: SCHEMA_CHARACTER_INVALID = "SCHEMA_CHARACTER_INVALID" SCHEMA_INVALID_CAPITALIZATION = 'invalidCaps' SCHEMA_NON_PLACEHOLDER_HAS_CLASS = 'SCHEMA_NON_PLACEHOLDER_HAS_CLASS' - SCHEMA_INVALID_ATTRIBUTE = "SCHEMA_INVALID_ATTRIBUTE" class SchemaAttributeErrors: + SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID' + SCHEMA_ATTRIBUTE_VALUE_INVALID = 'SCHEMA_ATTRIBUTE_VALUE_INVALID' SCHEMA_DEPRECATED_INVALID = "SCHEMA_DEPRECATED_INVALID" SCHEMA_SUGGESTED_TAG_INVALID = "SCHEMA_SUGGESTED_TAG_INVALID" - SCHEMA_RELATED_TAG_INVALID = "SCHEMA_RELATED_TAG_INVALID" SCHEMA_UNIT_CLASS_INVALID = "SCHEMA_UNIT_CLASS_INVALID" SCHEMA_VALUE_CLASS_INVALID = "SCHEMA_VALUE_CLASS_INVALID" + SCHEMA_ALLOWED_CHARACTERS_INVALID = "SCHEMA_ALLOWED_CHARACTERS_INVALID" + SCHEMA_IN_LIBRARY_INVALID = "SCHEMA_IN_LIBRARY_INVALID" SCHEMA_DEFAULT_UNITS_INVALID = "SCHEMA_DEFAULT_UNITS_INVALID" - SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED" # Reported as SCHEMA_DEPRECATED_INVALID + SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED" + SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE = "SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE" class DefinitionErrors: diff --git a/hed/errors/exceptions.py b/hed/errors/exceptions.py index e7ee857b..e368ec43 100644 --- a/hed/errors/exceptions.py +++ b/hed/errors/exceptions.py @@ -14,8 +14,9 @@ class HedExceptions: INVALID_DATAFRAME = 'INVALID_DATAFRAME' INVALID_FILE_FORMAT = 'INVALID_FILE_FORMAT' # These are actual schema issues, not that the file cannot be found or parsed - SCHEMA_HEADER_MISSING = 'HED_SCHEMA_HEADER_INVALID' - HED_SCHEMA_HEADER_INVALID = 'HED_SCHEMA_HEADER_INVALID' + SCHEMA_HEADER_MISSING = 'SCHEMA_HEADER_INVALID' + SCHEMA_HEADER_INVALID = 'SCHEMA_HEADER_INVALID' + SCHEMA_UNKNOWN_HEADER_ATTRIBUTE = "SCHEMA_HEADER_INVALID" SCHEMA_LIBRARY_INVALID = "SCHEMA_LIBRARY_INVALID" BAD_HED_LIBRARY_NAME = 'SCHEMA_LIBRARY_INVALID' @@ -26,14 +27,14 @@ class HedExceptions: ROOTED_TAG_DOES_NOT_EXIST = "SCHEMA_LIBRARY_INVALID" IN_LIBRARY_IN_UNMERGED = "SCHEMA_LIBRARY_INVALID" - HED_SCHEMA_VERSION_INVALID = 'HED_SCHEMA_VERSION_INVALID' - SCHEMA_START_MISSING = 'HED_WIKI_SEPARATOR_INVALID' - SCHEMA_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID' - HED_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID' - INVALID_SECTION_SEPARATOR = 'invalidSectionSeparator' + SCHEMA_VERSION_INVALID = 'SCHEMA_VERSION_INVALID' + SCHEMA_SECTION_MISSING = 'SCHEMA_SECTION_MISSING' + + WIKI_SEPARATOR_INVALID = 'invalidSectionSeparator' # This issue will contain a list of lines with issues. - HED_WIKI_DELIMITERS_INVALID = 'HED_WIKI_DELIMITERS_INVALID' + WIKI_DELIMITERS_INVALID = 'WIKI_DELIMITERS_INVALID' + WIKI_LINE_START_INVALID = 'WIKI_LINE_START_INVALID' HED_SCHEMA_NODE_NAME_INVALID = 'HED_SCHEMA_NODE_NAME_INVALID' SCHEMA_DUPLICATE_PREFIX = 'schemaDuplicatePrefix' diff --git a/hed/errors/known_error_codes.py b/hed/errors/known_error_codes.py index b72e8470..b8962682 100644 --- a/hed/errors/known_error_codes.py +++ b/hed/errors/known_error_codes.py @@ -31,6 +31,7 @@ ], "schema_validation_errors": [ "SCHEMA_ATTRIBUTE_INVALID", + "SCHEMA_ATTRIBUTE_VALUE_INVALID", "SCHEMA_CHARACTER_INVALID", "SCHEMA_DUPLICATE_NODE", "SCHEMA_HEADER_INVALID", diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py index b7fda9d5..8c196f9e 100644 --- a/hed/errors/schema_error_messages.py +++ b/hed/errors/schema_error_messages.py @@ -16,7 +16,7 @@ def schema_error_hed_duplicate_from_library(tag, duplicate_tag_list, section): f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}" -@hed_error(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID) def schema_error_unknown_attribute(attribute_name, source_tag): return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \ f"or was used outside of it's defined class." @@ -40,45 +40,58 @@ def schema_warning_SCHEMA_INVALID_CAPITALIZATION(tag_name, problem_char, char_in f"Found character '{problem_char}' in tag '{tag_name}' at position {char_index}." -@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING) +@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING, + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_warning_non_placeholder_class(tag_name, invalid_attribute_name): return "Only placeholder nodes('#') can have a unit class, value class, or takes value." + \ f"Found {invalid_attribute_name} on {tag_name}" -@hed_error(SchemaWarnings.SCHEMA_INVALID_ATTRIBUTE, default_severity=ErrorSeverity.ERROR) -def schema_error_SCHEMA_INVALID_ATTRIBUTE(tag_name, invalid_attribute_name): - return f"'{invalid_attribute_name}' should not be present in a loaded schema, found on '{tag_name}'." \ - f"Something went very wrong." - -@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_DEPRECATED_INVALID(tag_name, invalid_deprecated_version): return f"'{tag_name}' has invalid or unknown value in attribute deprecatedFrom: '{invalid_deprecated_version}'." @hed_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED, - actual_code=SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID) + actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_CHILD_OF_DEPRECATED(deprecated_tag, non_deprecated_child): return f"Deprecated tag '{deprecated_tag}' has a child that is not deprecated: '{non_deprecated_child}'." -@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_SUGGESTED_TAG_INVALID(suggestedTag, invalidSuggestedTag, attribute_name): return f"Tag '{suggestedTag}' has an invalid {attribute_name}: '{invalidSuggestedTag}'." -@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_UNIT_CLASS_INVALID(tag, unit_class, attribute_name): return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'." -@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_VALUE_CLASS_INVALID(tag, unit_class, attribute_name): return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'." -@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID) +@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) def schema_error_SCHEMA_DEFAULT_UNITS_INVALID(tag, bad_unit, valid_units): valid_units = ",".join(valid_units) return f"Tag '{tag}' has an invalid defaultUnit '{bad_unit}'. Valid units are: '{valid_units}'." + + +@hed_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +def schema_error_SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE(tag, conversion_factor): + return f"Tag '{tag}' has an invalid conversionFactor '{conversion_factor}'. Conversion factor must be positive." + + +@hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +def schema_error_SCHEMA_ALLOWED_CHARACTERS_INVALID(tag, invalid_character): + return (f"Tag '{tag}' has an invalid allowedCharacter: '{invalid_character}'. " + f"Allowed characters are: a single character, " + f"or one of the following - letters, blank, digits, alphanumeric.") + + +@hed_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) +def schema_error_SCHEMA_IN_LIBRARY_INVALID(tag, bad_library): + return (f"Tag '{tag}' has an invalid inLibrary: '{bad_library}'. ") diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py index 4cb3729c..0857abe9 100644 --- a/hed/schema/hed_schema.py +++ b/hed/schema/hed_schema.py @@ -731,7 +731,9 @@ def _add_tag_to_dict(self, long_tag_name, new_entry, key_class): # Add the InLibrary attribute to any library schemas as they are loaded # These are later removed when they are saved out, if saving unmerged if self.library and (not self.with_standard or (not self.merged and self.with_standard)): - new_entry._set_attribute_value(HedKey.InLibrary, self.library) + # only add it if not already present - This is a rare case + if not new_entry.has_attribute(HedKey.InLibrary): + new_entry._set_attribute_value(HedKey.InLibrary, self.library) section = self._sections[key_class] return section._add_to_dict(long_tag_name, new_entry) diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py index 0cecc4ab..60a1a934 100644 --- a/hed/schema/hed_schema_constants.py +++ b/hed/schema/hed_schema_constants.py @@ -42,6 +42,7 @@ class HedKey: SuggestedTag = "suggestedTag" Rooted = "rooted" DeprecatedFrom = "deprecatedFrom" + ConversionFactor = "conversionFactor" # All known properties BoolProperty = 'boolProperty' diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py index 102795d8..936943e8 100644 --- a/hed/schema/hed_schema_entry.py +++ b/hed/schema/hed_schema_entry.py @@ -176,7 +176,6 @@ def __eq__(self, other): return False return True - class UnitEntry(HedSchemaEntry): """ A single unit entry with modifiers in the HedSchema. """ def __init__(self, *args, **kwargs): @@ -207,12 +206,13 @@ def finalize_entry(self, schema): self.derivative_units = derivative_units def _get_conversion_factor(self, modifier_entry): - - base_factor = float(self.attributes.get("conversionFactor", "1.0").replace("^", "e")) - if modifier_entry: - modifier_factor = float(modifier_entry.attributes.get("conversionFactor", "1.0").replace("^", "e")) - else: - modifier_factor = 1.0 + base_factor = modifier_factor = 1.0 + try: + base_factor = float(self.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e")) + if modifier_entry: + modifier_factor = float(modifier_entry.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e")) + except (ValueError, AttributeError) as e: + pass # Just default to 1.0 return base_factor * modifier_factor def get_conversion_factor(self, unit_name): @@ -224,7 +224,7 @@ def get_conversion_factor(self, unit_name): Returns: conversion_factor(float or None): Returns the conversion factor or None """ - if "conversionFactor" in self.attributes: + if HedKey.ConversionFactor in self.attributes: return float(self.derivative_units.get(unit_name)) class HedTagEntry(HedSchemaEntry): diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index d1d7f5ec..0ccb9c33 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -150,4 +150,66 @@ def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name): issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED, tag_entry.name, child.name) - return issues \ No newline at end of file + return issues + + +def conversion_factor(hed_schema, tag_entry, attribute_name): + issues = [] + conversion_factor = tag_entry.attributes.get(attribute_name, "1.0") + try: + conversion_factor = float(conversion_factor.replace("^", "e")) + except (ValueError, AttributeError) as e: + pass + if not isinstance(conversion_factor, float) or conversion_factor <= 0.0: + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, + tag_entry.name, + conversion_factor) + + return issues + + +def allowed_characters_check(hed_schema, tag_entry, attribute_name): + """ Check allowed character has a valid value + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this attribute. + attribute_name (str): The name of this attribute + + Returns: + list: A list of issues. Each issue is a dictionary. + + """ + issues = [] + allowed_strings = {'letters', 'blank', 'digits', 'alphanumeric'} + + char_string = tag_entry.attributes.get(attribute_name, "") + characters = char_string.split(",") + for character in characters: + if character not in allowed_strings and len(character) != 1: + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, + tag_entry.name, + character) + return issues + + +def in_library_check(hed_schema, tag_entry, attribute_name): + """ Check allowed character has a valid value + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this attribute. + attribute_name (str): The name of this attribute + + Returns: + list: A list of issues. Each issue is a dictionary. + + """ + issues = [] + + library = tag_entry.attributes.get(attribute_name, "") + if hed_schema.library != library: + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, + tag_entry.name, + library) + return issues diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py index c75c11de..1a68baf8 100644 --- a/hed/schema/schema_compliance.py +++ b/hed/schema/schema_compliance.py @@ -45,27 +45,20 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl class SchemaValidator: """Validator class to wrap some code. In general, just call check_compliance.""" attribute_validators = { - HedKey.SuggestedTag: [(schema_attribute_validators.tag_exists_check, - SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID)], - HedKey.RelatedTag: [(schema_attribute_validators.tag_exists_check, - SchemaAttributeErrors.SCHEMA_RELATED_TAG_INVALID)], - HedKey.UnitClass: [(schema_attribute_validators.tag_is_placeholder_check, - SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS), - (schema_attribute_validators.unit_class_exists, - SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID)], - HedKey.ValueClass: [(schema_attribute_validators.tag_is_placeholder_check, - SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS), - (schema_attribute_validators.value_class_exists, - SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID)], + HedKey.SuggestedTag: [schema_attribute_validators.tag_exists_check], + HedKey.RelatedTag: [schema_attribute_validators.tag_exists_check], + HedKey.UnitClass: [schema_attribute_validators.tag_is_placeholder_check, + schema_attribute_validators.unit_class_exists], + HedKey.ValueClass: [schema_attribute_validators.tag_is_placeholder_check, + schema_attribute_validators.value_class_exists], # Rooted tag is implicitly verified on loading - # HedKey.Rooted: [(schema_attribute_validators.tag_exists_base_schema_check, - # SchemaAttributeErrors.SCHEMA_ROOTED_TAG_INVALID)], - HedKey.DeprecatedFrom: [(schema_attribute_validators.tag_is_deprecated_check, - SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)], - HedKey.TakesValue: [(schema_attribute_validators.tag_is_placeholder_check, - SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS)], - HedKey.DefaultUnits: [(schema_attribute_validators.unit_exists, - SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID)] + # HedKey.Rooted: [schema_attribute_validators.tag_exists_base_schema_check], + HedKey.DeprecatedFrom: [schema_attribute_validators.tag_is_deprecated_check], + HedKey.TakesValue: [schema_attribute_validators.tag_is_placeholder_check], + HedKey.DefaultUnits: [schema_attribute_validators.unit_exists], + HedKey.ConversionFactor: [schema_attribute_validators.conversion_factor], + HedKey.AllowedCharacter: [schema_attribute_validators.allowed_characters_check], + HedKey.InLibrary: [schema_attribute_validators.in_library_check] } def __init__(self, hed_schema, check_for_warnings=True, error_handler=None): @@ -80,7 +73,7 @@ def check_unknown_attributes(self): if unknown_attributes: for attribute_name, source_tags in unknown_attributes.items(): for tag in source_tags: - issues_list += self.error_handler.format_error_with_context(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID, + issues_list += self.error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID, attribute_name, source_tag=tag) return issues_list @@ -93,16 +86,14 @@ def check_attributes(self): for tag_entry in self.hed_schema[section_key].values(): self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, tag_entry.name) for attribute_name in tag_entry.attributes: - validators = self.attribute_validators.get(attribute_name, None) - if validators: - for validator, error_code in validators: - self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name) - new_issues = validator(self.hed_schema, tag_entry, attribute_name) - for issue in new_issues: - issue['code'] = error_code - issue['severity'] = ErrorSeverity.WARNING - self.error_handler.add_context_and_filter(new_issues) - issues_list += new_issues + validators = self.attribute_validators.get(attribute_name, []) + for validator in validators: + self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name) + new_issues = validator(self.hed_schema, tag_entry, attribute_name) + for issue in new_issues: + issue['severity'] = ErrorSeverity.WARNING + self.error_handler.add_context_and_filter(new_issues) + issues_list += new_issues self.error_handler.pop_error_context() self.error_handler.pop_error_context() self.error_handler.pop_error_context() diff --git a/hed/schema/schema_io/schema2base.py b/hed/schema/schema_io/schema2base.py index e373cf1a..d9d082a1 100644 --- a/hed/schema/schema_io/schema2base.py +++ b/hed/schema/schema_io/schema2base.py @@ -106,9 +106,6 @@ def _output_tags(self, tags): self._end_tag_section() def _output_units(self, unit_classes): - if not unit_classes: - return - section_node = self._start_section(HedSectionKey.UnitClasses) for unit_class_entry in unit_classes.values(): @@ -128,8 +125,6 @@ def _output_units(self, unit_classes): self._write_entry(unit_entry, unit_class_node) def _output_section(self, hed_schema, key_class): - if not hed_schema[key_class]: - return parent_node = self._start_section(key_class) for entry in hed_schema[key_class].values(): if self._should_skip(entry): diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py index a02f9ed6..de18f9d6 100644 --- a/hed/schema/schema_io/wiki2schema.py +++ b/hed/schema/schema_io/wiki2schema.py @@ -22,12 +22,19 @@ no_wiki_end_tag = '' -ErrorsBySection = { - HedWikiSection.Schema: HedExceptions.SCHEMA_START_MISSING, - HedWikiSection.EndSchema: HedExceptions.SCHEMA_END_INVALID, - HedWikiSection.EndHed: HedExceptions.HED_END_INVALID -} -required_sections = [HedWikiSection.Schema, HedWikiSection.EndSchema, HedWikiSection.EndHed] + +required_sections = [ + HedWikiSection.Prologue, + HedWikiSection.Schema, + HedWikiSection.EndSchema, + HedWikiSection.UnitsClasses, + HedWikiSection.UnitModifiers, + HedWikiSection.ValueClasses, + HedWikiSection.Attributes, + HedWikiSection.Properties, + HedWikiSection.Epilogue, + HedWikiSection.EndHed, +] class SchemaLoaderWiki(SchemaLoader): @@ -79,15 +86,13 @@ def _parse_data(self): # Validate we didn't miss any required sections. for section in required_sections: if section not in wiki_lines_by_section: - error_code = HedExceptions.INVALID_SECTION_SEPARATOR - if section in ErrorsBySection: - error_code = ErrorsBySection[section] + error_code = HedExceptions.SCHEMA_SECTION_MISSING msg = f"Required section separator '{SectionNames[section]}' not found in file" raise HedFileError(error_code, msg, filename=self.filename) if self.fatal_errors: self.fatal_errors = error_reporter.sort_issues(self.fatal_errors) - raise HedFileError(HedExceptions.HED_WIKI_DELIMITERS_INVALID, + raise HedFileError(self.fatal_errors[0]['code'], f"{len(self.fatal_errors)} issues found when parsing schema. See the .issues " f"parameter on this exception for more details.", self.filename, issues=self.fatal_errors) @@ -109,7 +114,7 @@ def _read_header_section(self, lines): for line_number, line in lines: if line.strip(): msg = f"Extra content [{line}] between HED line and other sections" - raise HedFileError(HedExceptions.HED_SCHEMA_HEADER_INVALID, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename) def _read_text_block(self, lines): text = "" @@ -163,7 +168,8 @@ def _read_schema(self, lines): parent_tags = parent_tags[:level] elif level > len(parent_tags): self._add_fatal_error(line_number, line, - "Line has too many *'s at the front. You cannot skip a level.") + "Line has too many *'s at the front. You cannot skip a level." + , HedExceptions.WIKI_LINE_START_INVALID) continue # Create the entry tag_entry = self._add_tag_line(parent_tags, line_number, line) @@ -261,14 +267,37 @@ def _get_header_attributes_internal(self, version_line): if "=" not in version_line: return self._get_header_attributes_internal_old(version_line) - final_attributes = {} + attributes, malformed = self._parse_attributes_line(version_line) + + for m in malformed: + # todo: May shift this at some point to report all errors + raise HedFileError(code=HedExceptions.SCHEMA_HEADER_INVALID, + message=f"Header line has a malformed attribute {m}", + filename=self.filename) + return attributes + + @staticmethod + def _parse_attributes_line(version_line): + matches = {} + unmatched = [] + last_end = 0 for match in attr_re.finditer(version_line): - attr_name = match.group(1) - attr_value = match.group(2) - final_attributes[attr_name] = attr_value + start, end = match.span() - return final_attributes + # If there's unmatched content between the last match and the current one + if start > last_end: + unmatched.append(version_line[last_end:start]) + + matches[match.group(1)] = match.group(2) + last_end = end + + # If there's unmatched content after the last match + if last_end < len(version_line): + unmatched.append(version_line[last_end:]) + + unmatched = [m.strip() for m in unmatched if m.strip()] + return matches, unmatched def _get_header_attributes_internal_old(self, version_line): """ Extracts all valid attributes like version from the HED line in .mediawiki format. @@ -288,7 +317,7 @@ def _get_header_attributes_internal_old(self, version_line): divider_index = pair.find(':') if divider_index == -1: msg = f"Found poorly matched key:value pair in header: {pair}" - raise HedFileError(HedExceptions.HED_SCHEMA_HEADER_INVALID, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename) key, value = pair[:divider_index], pair[divider_index + 1:] key = key.strip() value = value.strip() @@ -369,10 +398,17 @@ def _get_tag_name(self, tag_line): return None, 0 @staticmethod - def _get_tag_attributes(tag_line, starting_index): + def _validate_attribute_string(attribute_string): + pattern = r'^[A-Za-z]+(=.+)?$' + match = re.fullmatch(pattern, attribute_string) + if match: + return match.group() + + def _get_tag_attributes(self, line_number, tag_line, starting_index): """ Get the tag attributes from a line. Parameters: + line_number (int): The line number to report errors as tag_line (str): A tag line. starting_index (int): The first index we can check for the brackets. @@ -386,11 +422,14 @@ def _get_tag_attributes(tag_line, starting_index): return None, starting_index if attr_string: attributes_split = [x.strip() for x in attr_string.split(',')] - # Filter out attributes with spaces. - attributes_split = [a for a in attributes_split if " " not in a] final_attributes = {} for attribute in attributes_split: + if self._validate_attribute_string(attribute) is None: + self._add_fatal_error(line_number, tag_line, + f"Malformed attribute found {attribute}. " + f"Valid formatting is: attribute, or attribute=\"value\".") + continue split_attribute = attribute.split("=") if len(split_attribute) == 1: final_attributes[split_attribute[0]] = True @@ -468,7 +507,7 @@ def _create_entry(self, line_number, tag_line, key_class, element_name=None): if element_name: node_name = element_name - node_attributes, index = self._get_tag_attributes(tag_line, index) + node_attributes, index = self._get_tag_attributes(line_number, tag_line, index) if node_attributes is None: self._add_fatal_error(line_number, tag_line, "Attributes has mismatched delimiters") return @@ -489,7 +528,7 @@ def _create_entry(self, line_number, tag_line, key_class, element_name=None): return tag_entry def _add_fatal_error(self, line_number, line, warning_message="Schema term is empty or the line is malformed", - error_code=HedExceptions.HED_WIKI_DELIMITERS_INVALID): + error_code=HedExceptions.WIKI_DELIMITERS_INVALID): self.fatal_errors.append( {'code': error_code, ErrorContext.ROW: line_number, @@ -504,14 +543,12 @@ def _check_for_new_section(self, line, strings_for_section, current_section): if line.startswith(section_string): if key in strings_for_section: msg = f"Found section {SectionNames[key]} twice" - raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, + raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, msg, filename=self.filename) if current_section < key: new_section = key else: - error_code = HedExceptions.INVALID_SECTION_SEPARATOR - if key in ErrorsBySection: - error_code = ErrorsBySection[key] + error_code = HedExceptions.SCHEMA_SECTION_MISSING msg = f"Found section {SectionNames[key]} out of order in file" raise HedFileError(error_code, msg, filename=self.filename) break @@ -520,11 +557,11 @@ def _check_for_new_section(self, line, strings_for_section, current_section): def _handle_bad_section_sep(self, line, current_section): if current_section != HedWikiSection.Schema and line.startswith(wiki_constants.ROOT_TAG): msg = f"Invalid section separator '{line.strip()}'" - raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, msg, filename=self.filename) + raise HedFileError(HedExceptions.SCHEMA_SECTION_MISSING, msg, filename=self.filename) if line.startswith("!#"): msg = f"Invalid section separator '{line.strip()}'" - raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, msg, filename=self.filename) + raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, msg, filename=self.filename) def _split_lines_into_sections(self, wiki_lines): """ Takes a list of lines, and splits it into valid wiki sections. diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py index 8404970e..25b27ab8 100644 --- a/hed/schema/schema_validation_util.py +++ b/hed/schema/schema_validation_util.py @@ -4,6 +4,7 @@ from hed.errors import ErrorHandler, SchemaWarnings from hed.schema import hed_schema_constants as constants from hed.errors.exceptions import HedExceptions, HedFileError +from hed.schema.hed_schema_constants import valid_header_attributes ALLOWED_TAG_CHARS = "-" ALLOWED_DESC_CHARS = "-_:;,./()+ ^" @@ -45,9 +46,9 @@ def validate_version_string(version_string): header_attribute_validators = { - constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.HED_SCHEMA_VERSION_INVALID), - constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME) - } + constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.SCHEMA_VERSION_INVALID), + constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME) +} def validate_present_attributes(attrib_dict, filename): @@ -92,9 +93,12 @@ def validate_attributes(attrib_dict, filename): had_error = validator(attribute_value) if had_error: raise HedFileError(error_code, had_error, filename) + if attribute_name not in valid_header_attributes: + raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE, + f"Unknown attribute {attribute_name} found in header line", filename=filename) if constants.VERSION_ATTRIBUTE not in attrib_dict: - raise HedFileError(HedExceptions.HED_SCHEMA_VERSION_INVALID, + raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID, "No version attribute found in header", filename=filename) diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification index c47fff94..c1aad366 160000 --- a/spec_tests/hed-specification +++ b/spec_tests/hed-specification @@ -1 +1 @@ -Subproject commit c47fff949db70c9105c875bbdfdf0d11389ffd68 +Subproject commit c1aad366fee6c7f1e68fbd73d2ce6dc369444ad8 diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index 972d53d4..3e87fdbd 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -12,55 +12,11 @@ from hed.errors import ErrorHandler, get_printable_issue_string -# To be removed eventually once all errors are being verified. -known_errors = [ - 'SIDECAR_INVALID', - 'CHARACTER_INVALID', - 'COMMA_MISSING', - "DEF_EXPAND_INVALID", - "DEF_INVALID", - "DEFINITION_INVALID", - "NODE_NAME_EMPTY", - "ONSET_OFFSET_INSET_ERROR", - "PARENTHESES_MISMATCH", - "PLACEHOLDER_INVALID", - "REQUIRED_TAG_MISSING", - "SIDECAR_INVALID", - "SIDECAR_KEY_MISSING", - "STYLE_WARNING", - "TAG_EMPTY", - "TAG_EXPRESSION_REPEATED", - "TAG_EXTENDED", - "TAG_EXTENSION_INVALID", - "TAG_GROUP_ERROR", - "TAG_INVALID", - "TAG_NOT_UNIQUE", - "TAG_NAMESPACE_PREFIX_INVALID", - "TAG_REQUIRES_CHILD", - "TILDES_UNSUPPORTED", - "UNITS_INVALID", - "UNITS_MISSING", - "VALUE_INVALID", - - "SIDECAR_BRACES_INVALID", - "SCHEMA_LIBRARY_INVALID", - - "SCHEMA_ATTRIBUTE_INVALID", - "SCHEMA_UNIT_CLASS_INVALID", - "SCHEMA_VALUE_CLASS_INVALID", - "SCHEMA_DEPRECATED_INVALID", - "SCHEMA_SUGGESTED_TAG_INVALID", - "SCHEMA_RELATED_TAG_INVALID", - "SCHEMA_NON_PLACEHOLDER_HAS_CLASS", - "SCHEMA_DEFAULT_UNITS_INVALID" -] - skip_tests = { "VERSION_DEPRECATED": "Not applicable", "tag-extension-invalid-bad-node-name": "Part of character invalid checking/didn't get to it yet", } - class MyTestCase(unittest.TestCase): @classmethod def setUpClass(cls): @@ -80,9 +36,7 @@ def run_single_test(self, test_file): test_info = json.load(fp) for info in test_info: error_code = info['error_code'] - verify_code = False - if error_code in known_errors: - verify_code = True + verify_code = True # To be deprecated once we add this to all tests self._verify_code = verify_code if error_code in skip_tests: @@ -93,6 +47,8 @@ def run_single_test(self, test_file): print(f"Skipping {name} test because: {skip_tests[name]}") continue + # if name != "attribute-invalid-in-library": + # continue description = info['description'] schema = info['schema'] check_for_warnings = info.get("warning", False) diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki index a596775c..e2246335 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki @@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki index 672792aa..2b76a3a4 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki @@ -17,6 +17,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki index d5e6cf44..678a6249 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki @@ -13,6 +13,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki index 979f72bd..037c9bc7 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki @@ -13,6 +13,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki index 3438be07..f79d8361 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki @@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki index 267a214e..80454ef4 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki @@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki index ee20104a..d3368e37 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki @@ -33,6 +33,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind '''Unit classes''' +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' '''Epilogue''' diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki index 8b3a3a86..64144708 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki @@ -32,6 +32,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind '''Unit classes''' +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki index 7939dfd9..f8bccd4d 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki @@ -32,6 +32,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind '''Unit classes''' +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki index 4a084ebd..eb283125 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki @@ -33,6 +33,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind '''Unit classes''' +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki index f282aabb..289265f8 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki @@ -34,6 +34,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind * weightUnits {defaultUnits=testUnit} ** testUnit {conversionFactor=100} +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' '''Epilogue''' The Standardized Computer-based Organized Reporting of EEG (SCORE) is a standard terminology for scalp EEG data assessment designed for use in clinical practice that may also be used for research purposes. diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki index b7c4d5aa..ac67b8fe 100644 --- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki +++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki @@ -34,6 +34,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind * weightUnitsNew {defaultUnits=g} ** g {conversionFactor=100} +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' '''Epilogue''' The Standardized Computer-based Organized Reporting of EEG (SCORE) is a standard terminology for scalp EEG data assessment designed for use in clinical practice that may also be used for research purposes. diff --git a/tests/data/schema_tests/merge_tests/sorted_root.mediawiki b/tests/data/schema_tests/merge_tests/sorted_root.mediawiki index d5e31f3b..6536476c 100644 --- a/tests/data/schema_tests/merge_tests/sorted_root.mediawiki +++ b/tests/data/schema_tests/merge_tests/sorted_root.mediawiki @@ -44,6 +44,16 @@ This schema is the first official release that includes an xsd and requires unit !# end schema +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + '''Epilogue''' !# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/wiki_tests/HED_default.mediawiki b/tests/data/schema_tests/wiki_tests/HED_default.mediawiki index 049260f1..4327c6a4 100644 --- a/tests/data/schema_tests/wiki_tests/HED_default.mediawiki +++ b/tests/data/schema_tests/wiki_tests/HED_default.mediawiki @@ -1,6 +1,6 @@ HED version:8.0.0-alpha.1 - +'''Prologue''' This is a prologue line. This is a second prologue line. @@ -1098,7 +1098,15 @@ This is a second prologue line. * z {SIUnitSymbolModifier} [SI unit submultiple representing 10^-21] * yocto {SIUnitModifier} [SI unit submultiple representing 10^-24] * y {SIUnitSymbolModifier} [SI unit submultiple representing 10^-24] -!# end hed +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + +'''Epilogue''' This is an epilogue. -This is a second line of an epilogue. \ No newline at end of file +This is a second line of an epilogue. + +!# end hed \ No newline at end of file diff --git a/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki b/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki new file mode 100644 index 00000000..d2c398e3 --- /dev/null +++ b/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki @@ -0,0 +1,41 @@ +HED version="8.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://github.com/hed-standard/hed-specification/raw/master/hedxml/HED8.0.0.xsd" + +'''Prologue''' +This schema tests AppendixB SCHEMA_ATTRIBUTE_INVALID + +!# start schema + +'''Tag1''' {suggestedTag=Tag1}[suggested tag is not registered in the schema] +* Tag2 {valueClassAttribute}[value attribute is the wrong tag class] +* Tag3 {unitAttribute}[unit attribute is the wrong tag class] + +!# end schema +'''Unit classes''' +* unitClass1 {unitAttribute}[Wrong attribute type] +** unit1 {tagAttribute}[Wrong attribute type] + +'''Unit modifiers''' +* mod1 {tagAttribute}[Wrong attribute type] + +'''Value classes''' +* valueClass1 {tagAttribute}[Wrong attribute type] + +'''Schema attributes''' +* tagAttribute +* unitAttribute {unitProperty} +* unitClassAttribute {unitClassProperty} +* unitModifierAttribute {unitModifierProperty} +* valueClassAttribute {valueClassProperty} +* attribute1 {valueClassProperty} + +'''Properties''' +* boolProperty +* unitClassProperty +* unitModifierProperty +* unitProperty +* valueClassProperty + +'''Epilogue''' +This is an updated version of the schema format. The properties are now part of the schema. The schema attributes are designed to be checked in software rather than hard-coded. The schema attributes, themselves have properties. + +!# end hed \ No newline at end of file diff --git a/tests/data/validator_tests/bids_schema.mediawiki b/tests/data/validator_tests/bids_schema.mediawiki index 971a9723..b306003b 100644 --- a/tests/data/validator_tests/bids_schema.mediawiki +++ b/tests/data/validator_tests/bids_schema.mediawiki @@ -1,5 +1,7 @@ HED version: 8.0.0-alpha.2 +'''Prologue''' + !# start schema '''Event''' @@ -1163,6 +1165,7 @@ HED version: 8.0.0-alpha.2 * yocto {SIUnitModifier} [SI unit submultiple representing 10^-24] * y {SIUnitSymbolModifier} [SI unit submultiple representing 10^-24] +'''Value classes''' '''Schema attributes''' * allowedCharacter {unitClassProperty}[An attribute of unit classes schema value placeholders indicating a special character that is allowed in expressing the value of that placeholder.] @@ -1184,6 +1187,8 @@ HED version: 8.0.0-alpha.2 * unitSymbol {boolProperty, unitProperty}[Abbreviation or symbol representing a type of unit. Unit symbols represent both the singular and the plural and thus cannot be pluralized.] * unitClass [Specifies the type of a unit for a tag.] +'''Properties''' + '''Epilogue''' This is the new format for the mediawiki schema diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py index c21d839a..75f66d17 100644 --- a/tests/schema/test_hed_schema_io.py +++ b/tests/schema/test_hed_schema_io.py @@ -245,10 +245,10 @@ def _base_added_class_tests(self, schema): unit_class_entry = schema.unit_classes["weightUnits"] unit_entry = unit_class_entry.units["testUnit"] - self.assertEqual(unit_entry.attributes["conversionFactor"], str(100)) + self.assertEqual(unit_entry.attributes[HedKey.ConversionFactor], str(100)) unit_modifier_entry = schema.unit_modifiers["huge"] - self.assertEqual(unit_modifier_entry.attributes["conversionFactor"], "10^100") + self.assertEqual(unit_modifier_entry.attributes[HedKey.ConversionFactor], "10^100") self.assertTrue(unit_modifier_entry.attributes["customElementAttribute"]) value_class_entry = schema.value_classes["customValueClass"] @@ -328,9 +328,9 @@ def test_cannot_load_schemas(self): ] for file in files: - with self.assertRaises(HedFileError): - # print(file) + with self.assertRaises(HedFileError) as context: load_schema(file) + self.assertEqual(context.exception.code, HedExceptions.SCHEMA_LIBRARY_INVALID) def test_saving_in_library_wiki(self): old_score_schema = load_schema_version("score_1.0.0") diff --git a/tests/schema/test_schema_wiki_fatal_errors.py b/tests/schema/test_schema_wiki_fatal_errors.py index 583579b1..0759dba4 100644 --- a/tests/schema/test_schema_wiki_fatal_errors.py +++ b/tests/schema/test_schema_wiki_fatal_errors.py @@ -1,7 +1,7 @@ import unittest import os -from hed import schema +from hed import load_schema from hed.errors import HedFileError, HedExceptions @@ -12,25 +12,25 @@ class TestHedSchema(unittest.TestCase): def setUpClass(cls): cls.full_base_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.base_schema_dir) cls.files_and_errors = { - "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_START_MISSING, - "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_END_INVALID, - "HED_hed_no_end.mediawiki": HedExceptions.HED_END_INVALID, - "HED_separator_invalid.mediawiki": HedExceptions.INVALID_SECTION_SEPARATOR, + "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "HED_hed_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "HED_separator_invalid.mediawiki": HedExceptions.WIKI_SEPARATOR_INVALID, "HED_header_missing.mediawiki": HedExceptions.SCHEMA_HEADER_MISSING, - "HED_header_invalid.mediawiki": HedExceptions.HED_SCHEMA_HEADER_INVALID, - "empty_file.mediawiki": HedExceptions.HED_SCHEMA_HEADER_INVALID, - "HED_header_invalid_version.mediawiki": HedExceptions.HED_SCHEMA_VERSION_INVALID, - "HED_header_missing_version.mediawiki": HedExceptions.HED_SCHEMA_VERSION_INVALID, + "HED_header_invalid.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID, + "empty_file.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID, + "HED_header_invalid_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID, + "HED_header_missing_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID, "HED_header_bad_library.mediawiki": HedExceptions.BAD_HED_LIBRARY_NAME, - "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_START_MISSING, - "empty_node.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line2.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line3.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line4.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line5.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line6.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, - "malformed_line7.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID, + "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING, + "empty_node.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line2.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line3.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line4.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line5.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line6.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, + "malformed_line7.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID, "empty_node.xml": HedExceptions.HED_SCHEMA_NODE_NAME_INVALID } @@ -60,9 +60,10 @@ def test_invalid_schema(self): for filename, error in self.files_and_errors.items(): full_filename = self.full_base_folder + filename with self.assertRaises(HedFileError) as context: - schema.load_schema(full_filename) + load_schema(full_filename) # all of these should produce exceptions. - from hed.errors import ErrorHandler, ErrorContext, SchemaErrors, get_printable_issue_string + from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string + # Verify basic properties of exception expected_line_numbers = self.expected_line_numbers.get(filename, []) if expected_line_numbers: @@ -82,9 +83,10 @@ def test_merging_errors_schema(self): for filename, error in self.files_and_errors.items(): full_filename = self.full_base_folder + filename with self.assertRaises(HedFileError) as context: - schema.load_schema(full_filename) + load_schema(full_filename) # all of these should produce exceptions. - from hed.errors import ErrorHandler, ErrorContext, SchemaErrors, get_printable_issue_string + from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string + from hed.errors.error_types import SchemaAttributeErrors # Verify basic properties of exception expected_line_numbers = self.expected_line_numbers.get(filename, []) if expected_line_numbers: @@ -96,7 +98,7 @@ def test_merging_errors_schema(self): error_handler.push_error_context(ErrorContext.ROW, 1) error_handler.push_error_context(ErrorContext.COLUMN, 2) - issues = error_handler.format_error_with_context(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID, + issues = error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID, "error_attribute", source_tag="error_tag") error_handler.pop_error_context() error_handler.pop_error_context() @@ -106,3 +108,9 @@ def test_merging_errors_schema(self): self.assertTrue(context.exception.args[0] == error) self.assertTrue(context.exception.filename == full_filename) + + def test_attribute_invalid(self): + path = os.path.join(self.full_base_folder, "attribute_unknown1.mediawiki") + schema = load_schema(path) + issues = schema.check_compliance() + self.assertEqual(len(issues), 7) \ No newline at end of file diff --git a/tests/schema/util_create_schemas.py b/tests/schema/util_create_schemas.py index 850d014e..415b94dc 100644 --- a/tests/schema/util_create_schemas.py +++ b/tests/schema/util_create_schemas.py @@ -10,13 +10,30 @@ """ library_schema_end = """ -!# end schema + !# end hed """ -def _get_test_schema(node_lines): - library_schema_string = library_schema_start + "\n".join(node_lines) + library_schema_end +default_end_lines = """ +!# end schema +""" + +required_non_tag = [ +"'''Unit classes'''", +"'''Unit modifiers'''", +"'''Value classes'''", +"'''Schema attributes'''", +"'''Properties'''", +"'''Epilogue'''" +] +def _get_test_schema(node_lines, other_lines=(default_end_lines,)): + node_section = "\n".join(node_lines) + non_tag_section = "\n".join(other_lines) + for name in required_non_tag: + if name not in other_lines: + non_tag_section += f"\n{name}\n" + library_schema_string = library_schema_start + node_section + non_tag_section + library_schema_end test_schema = from_string(library_schema_string, ".mediawiki") return test_schema