Skip to content

Commit

Permalink
Update schema validation and tests
Browse files Browse the repository at this point in the history
Validate most attribute values now
Update error codes to match spec more
All sections are now required in mediawiki schema
misc minor fixes
  • Loading branch information
IanCa committed Sep 28, 2023
1 parent c5f0386 commit ab2a25c
Show file tree
Hide file tree
Showing 33 changed files with 445 additions and 185 deletions.
11 changes: 7 additions & 4 deletions hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class SidecarErrors:

class SchemaErrors:
SCHEMA_DUPLICATE_NODE = 'SCHEMA_DUPLICATE_NODE'
SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID'

SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID"


Expand All @@ -119,19 +119,22 @@ class SchemaWarnings:
SCHEMA_CHARACTER_INVALID = "SCHEMA_CHARACTER_INVALID"
SCHEMA_INVALID_CAPITALIZATION = 'invalidCaps'
SCHEMA_NON_PLACEHOLDER_HAS_CLASS = 'SCHEMA_NON_PLACEHOLDER_HAS_CLASS'
SCHEMA_INVALID_ATTRIBUTE = "SCHEMA_INVALID_ATTRIBUTE"


class SchemaAttributeErrors:
SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID'
SCHEMA_ATTRIBUTE_VALUE_INVALID = 'SCHEMA_ATTRIBUTE_VALUE_INVALID'
SCHEMA_DEPRECATED_INVALID = "SCHEMA_DEPRECATED_INVALID"
SCHEMA_SUGGESTED_TAG_INVALID = "SCHEMA_SUGGESTED_TAG_INVALID"
SCHEMA_RELATED_TAG_INVALID = "SCHEMA_RELATED_TAG_INVALID"

SCHEMA_UNIT_CLASS_INVALID = "SCHEMA_UNIT_CLASS_INVALID"
SCHEMA_VALUE_CLASS_INVALID = "SCHEMA_VALUE_CLASS_INVALID"
SCHEMA_ALLOWED_CHARACTERS_INVALID = "SCHEMA_ALLOWED_CHARACTERS_INVALID"
SCHEMA_IN_LIBRARY_INVALID = "SCHEMA_IN_LIBRARY_INVALID"

SCHEMA_DEFAULT_UNITS_INVALID = "SCHEMA_DEFAULT_UNITS_INVALID"
SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED" # Reported as SCHEMA_DEPRECATED_INVALID
SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED"
SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE = "SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE"


class DefinitionErrors:
Expand Down
17 changes: 9 additions & 8 deletions hed/errors/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ class HedExceptions:
INVALID_DATAFRAME = 'INVALID_DATAFRAME'
INVALID_FILE_FORMAT = 'INVALID_FILE_FORMAT'
# These are actual schema issues, not that the file cannot be found or parsed
SCHEMA_HEADER_MISSING = 'HED_SCHEMA_HEADER_INVALID'
HED_SCHEMA_HEADER_INVALID = 'HED_SCHEMA_HEADER_INVALID'
SCHEMA_HEADER_MISSING = 'SCHEMA_HEADER_INVALID'
SCHEMA_HEADER_INVALID = 'SCHEMA_HEADER_INVALID'
SCHEMA_UNKNOWN_HEADER_ATTRIBUTE = "SCHEMA_HEADER_INVALID"

SCHEMA_LIBRARY_INVALID = "SCHEMA_LIBRARY_INVALID"
BAD_HED_LIBRARY_NAME = 'SCHEMA_LIBRARY_INVALID'
Expand All @@ -26,14 +27,14 @@ class HedExceptions:
ROOTED_TAG_DOES_NOT_EXIST = "SCHEMA_LIBRARY_INVALID"
IN_LIBRARY_IN_UNMERGED = "SCHEMA_LIBRARY_INVALID"

HED_SCHEMA_VERSION_INVALID = 'HED_SCHEMA_VERSION_INVALID'
SCHEMA_START_MISSING = 'HED_WIKI_SEPARATOR_INVALID'
SCHEMA_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID'
HED_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID'
INVALID_SECTION_SEPARATOR = 'invalidSectionSeparator'
SCHEMA_VERSION_INVALID = 'SCHEMA_VERSION_INVALID'
SCHEMA_SECTION_MISSING = 'SCHEMA_SECTION_MISSING'

WIKI_SEPARATOR_INVALID = 'invalidSectionSeparator'

# This issue will contain a list of lines with issues.
HED_WIKI_DELIMITERS_INVALID = 'HED_WIKI_DELIMITERS_INVALID'
WIKI_DELIMITERS_INVALID = 'WIKI_DELIMITERS_INVALID'
WIKI_LINE_START_INVALID = 'WIKI_LINE_START_INVALID'
HED_SCHEMA_NODE_NAME_INVALID = 'HED_SCHEMA_NODE_NAME_INVALID'

SCHEMA_DUPLICATE_PREFIX = 'schemaDuplicatePrefix'
Expand Down
1 change: 1 addition & 0 deletions hed/errors/known_error_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
],
"schema_validation_errors": [
"SCHEMA_ATTRIBUTE_INVALID",
"SCHEMA_ATTRIBUTE_VALUE_INVALID",
"SCHEMA_CHARACTER_INVALID",
"SCHEMA_DUPLICATE_NODE",
"SCHEMA_HEADER_INVALID",
Expand Down
39 changes: 26 additions & 13 deletions hed/errors/schema_error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def schema_error_hed_duplicate_from_library(tag, duplicate_tag_list, section):
f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"


@hed_error(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID)
@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_unknown_attribute(attribute_name, source_tag):
return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
f"or was used outside of it's defined class."
Expand All @@ -40,45 +40,58 @@ def schema_warning_SCHEMA_INVALID_CAPITALIZATION(tag_name, problem_char, char_in
f"Found character '{problem_char}' in tag '{tag_name}' at position {char_index}."


@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING)
@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING,
actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_warning_non_placeholder_class(tag_name, invalid_attribute_name):
return "Only placeholder nodes('#') can have a unit class, value class, or takes value." + \
f"Found {invalid_attribute_name} on {tag_name}"


@hed_error(SchemaWarnings.SCHEMA_INVALID_ATTRIBUTE, default_severity=ErrorSeverity.ERROR)
def schema_error_SCHEMA_INVALID_ATTRIBUTE(tag_name, invalid_attribute_name):
return f"'{invalid_attribute_name}' should not be present in a loaded schema, found on '{tag_name}'." \
f"Something went very wrong."


@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)
@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_DEPRECATED_INVALID(tag_name, invalid_deprecated_version):
return f"'{tag_name}' has invalid or unknown value in attribute deprecatedFrom: '{invalid_deprecated_version}'."


@hed_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED,
actual_code=SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)
actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_CHILD_OF_DEPRECATED(deprecated_tag, non_deprecated_child):
return f"Deprecated tag '{deprecated_tag}' has a child that is not deprecated: '{non_deprecated_child}'."


@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID)
@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_SUGGESTED_TAG_INVALID(suggestedTag, invalidSuggestedTag, attribute_name):
return f"Tag '{suggestedTag}' has an invalid {attribute_name}: '{invalidSuggestedTag}'."


@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID)
@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_UNIT_CLASS_INVALID(tag, unit_class, attribute_name):
return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'."


@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID)
@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_VALUE_CLASS_INVALID(tag, unit_class, attribute_name):
return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'."


@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID)
@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_DEFAULT_UNITS_INVALID(tag, bad_unit, valid_units):
valid_units = ",".join(valid_units)
return f"Tag '{tag}' has an invalid defaultUnit '{bad_unit}'. Valid units are: '{valid_units}'."


@hed_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE(tag, conversion_factor):
return f"Tag '{tag}' has an invalid conversionFactor '{conversion_factor}'. Conversion factor must be positive."


@hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_ALLOWED_CHARACTERS_INVALID(tag, invalid_character):
return (f"Tag '{tag}' has an invalid allowedCharacter: '{invalid_character}'. "
f"Allowed characters are: a single character, "
f"or one of the following - letters, blank, digits, alphanumeric.")


@hed_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
def schema_error_SCHEMA_IN_LIBRARY_INVALID(tag, bad_library):
return (f"Tag '{tag}' has an invalid inLibrary: '{bad_library}'. ")
4 changes: 3 additions & 1 deletion hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,9 @@ def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
# Add the InLibrary attribute to any library schemas as they are loaded
# These are later removed when they are saved out, if saving unmerged
if self.library and (not self.with_standard or (not self.merged and self.with_standard)):
new_entry._set_attribute_value(HedKey.InLibrary, self.library)
# only add it if not already present - This is a rare case
if not new_entry.has_attribute(HedKey.InLibrary):
new_entry._set_attribute_value(HedKey.InLibrary, self.library)

section = self._sections[key_class]
return section._add_to_dict(long_tag_name, new_entry)
Expand Down
1 change: 1 addition & 0 deletions hed/schema/hed_schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class HedKey:
SuggestedTag = "suggestedTag"
Rooted = "rooted"
DeprecatedFrom = "deprecatedFrom"
ConversionFactor = "conversionFactor"

# All known properties
BoolProperty = 'boolProperty'
Expand Down
16 changes: 8 additions & 8 deletions hed/schema/hed_schema_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ def __eq__(self, other):
return False
return True


class UnitEntry(HedSchemaEntry):
""" A single unit entry with modifiers in the HedSchema. """
def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -207,12 +206,13 @@ def finalize_entry(self, schema):
self.derivative_units = derivative_units

def _get_conversion_factor(self, modifier_entry):

base_factor = float(self.attributes.get("conversionFactor", "1.0").replace("^", "e"))
if modifier_entry:
modifier_factor = float(modifier_entry.attributes.get("conversionFactor", "1.0").replace("^", "e"))
else:
modifier_factor = 1.0
base_factor = modifier_factor = 1.0
try:
base_factor = float(self.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e"))
if modifier_entry:
modifier_factor = float(modifier_entry.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e"))
except (ValueError, AttributeError) as e:
pass # Just default to 1.0
return base_factor * modifier_factor

def get_conversion_factor(self, unit_name):
Expand All @@ -224,7 +224,7 @@ def get_conversion_factor(self, unit_name):
Returns:
conversion_factor(float or None): Returns the conversion factor or None
"""
if "conversionFactor" in self.attributes:
if HedKey.ConversionFactor in self.attributes:
return float(self.derivative_units.get(unit_name))

class HedTagEntry(HedSchemaEntry):
Expand Down
64 changes: 63 additions & 1 deletion hed/schema/schema_attribute_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,66 @@ def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name):
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED,
tag_entry.name,
child.name)
return issues
return issues


def conversion_factor(hed_schema, tag_entry, attribute_name):
issues = []
conversion_factor = tag_entry.attributes.get(attribute_name, "1.0")
try:
conversion_factor = float(conversion_factor.replace("^", "e"))
except (ValueError, AttributeError) as e:
pass
if not isinstance(conversion_factor, float) or conversion_factor <= 0.0:
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE,
tag_entry.name,
conversion_factor)

return issues


def allowed_characters_check(hed_schema, tag_entry, attribute_name):
""" Check allowed character has a valid value
Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this attribute.
attribute_name (str): The name of this attribute
Returns:
list: A list of issues. Each issue is a dictionary.
"""
issues = []
allowed_strings = {'letters', 'blank', 'digits', 'alphanumeric'}

char_string = tag_entry.attributes.get(attribute_name, "")
characters = char_string.split(",")
for character in characters:
if character not in allowed_strings and len(character) != 1:
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
tag_entry.name,
character)
return issues


def in_library_check(hed_schema, tag_entry, attribute_name):
""" Check allowed character has a valid value
Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this attribute.
attribute_name (str): The name of this attribute
Returns:
list: A list of issues. Each issue is a dictionary.
"""
issues = []

library = tag_entry.attributes.get(attribute_name, "")
if hed_schema.library != library:
issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
tag_entry.name,
library)
return issues
53 changes: 22 additions & 31 deletions hed/schema/schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,27 +45,20 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl
class SchemaValidator:
"""Validator class to wrap some code. In general, just call check_compliance."""
attribute_validators = {
HedKey.SuggestedTag: [(schema_attribute_validators.tag_exists_check,
SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID)],
HedKey.RelatedTag: [(schema_attribute_validators.tag_exists_check,
SchemaAttributeErrors.SCHEMA_RELATED_TAG_INVALID)],
HedKey.UnitClass: [(schema_attribute_validators.tag_is_placeholder_check,
SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS),
(schema_attribute_validators.unit_class_exists,
SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID)],
HedKey.ValueClass: [(schema_attribute_validators.tag_is_placeholder_check,
SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS),
(schema_attribute_validators.value_class_exists,
SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID)],
HedKey.SuggestedTag: [schema_attribute_validators.tag_exists_check],
HedKey.RelatedTag: [schema_attribute_validators.tag_exists_check],
HedKey.UnitClass: [schema_attribute_validators.tag_is_placeholder_check,
schema_attribute_validators.unit_class_exists],
HedKey.ValueClass: [schema_attribute_validators.tag_is_placeholder_check,
schema_attribute_validators.value_class_exists],
# Rooted tag is implicitly verified on loading
# HedKey.Rooted: [(schema_attribute_validators.tag_exists_base_schema_check,
# SchemaAttributeErrors.SCHEMA_ROOTED_TAG_INVALID)],
HedKey.DeprecatedFrom: [(schema_attribute_validators.tag_is_deprecated_check,
SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)],
HedKey.TakesValue: [(schema_attribute_validators.tag_is_placeholder_check,
SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS)],
HedKey.DefaultUnits: [(schema_attribute_validators.unit_exists,
SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID)]
# HedKey.Rooted: [schema_attribute_validators.tag_exists_base_schema_check],
HedKey.DeprecatedFrom: [schema_attribute_validators.tag_is_deprecated_check],
HedKey.TakesValue: [schema_attribute_validators.tag_is_placeholder_check],
HedKey.DefaultUnits: [schema_attribute_validators.unit_exists],
HedKey.ConversionFactor: [schema_attribute_validators.conversion_factor],
HedKey.AllowedCharacter: [schema_attribute_validators.allowed_characters_check],
HedKey.InLibrary: [schema_attribute_validators.in_library_check]
}

def __init__(self, hed_schema, check_for_warnings=True, error_handler=None):
Expand All @@ -80,7 +73,7 @@ def check_unknown_attributes(self):
if unknown_attributes:
for attribute_name, source_tags in unknown_attributes.items():
for tag in source_tags:
issues_list += self.error_handler.format_error_with_context(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID,
issues_list += self.error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID,
attribute_name,
source_tag=tag)
return issues_list
Expand All @@ -93,16 +86,14 @@ def check_attributes(self):
for tag_entry in self.hed_schema[section_key].values():
self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, tag_entry.name)
for attribute_name in tag_entry.attributes:
validators = self.attribute_validators.get(attribute_name, None)
if validators:
for validator, error_code in validators:
self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
new_issues = validator(self.hed_schema, tag_entry, attribute_name)
for issue in new_issues:
issue['code'] = error_code
issue['severity'] = ErrorSeverity.WARNING
self.error_handler.add_context_and_filter(new_issues)
issues_list += new_issues
validators = self.attribute_validators.get(attribute_name, [])
for validator in validators:
self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
new_issues = validator(self.hed_schema, tag_entry, attribute_name)
for issue in new_issues:
issue['severity'] = ErrorSeverity.WARNING
self.error_handler.add_context_and_filter(new_issues)
issues_list += new_issues
self.error_handler.pop_error_context()
self.error_handler.pop_error_context()
self.error_handler.pop_error_context()
Expand Down
5 changes: 0 additions & 5 deletions hed/schema/schema_io/schema2base.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,6 @@ def _output_tags(self, tags):
self._end_tag_section()

def _output_units(self, unit_classes):
if not unit_classes:
return

section_node = self._start_section(HedSectionKey.UnitClasses)

for unit_class_entry in unit_classes.values():
Expand All @@ -128,8 +125,6 @@ def _output_units(self, unit_classes):
self._write_entry(unit_entry, unit_class_node)

def _output_section(self, hed_schema, key_class):
if not hed_schema[key_class]:
return
parent_node = self._start_section(key_class)
for entry in hed_schema[key_class].values():
if self._should_skip(entry):
Expand Down
Loading

0 comments on commit ab2a25c

Please sign in to comment.