Skip to content

Commit

Permalink
Merge pull request #805 from IanCa/develop
Browse files Browse the repository at this point in the history
Fix bugs/improve unit and def validation code
  • Loading branch information
VisLab authored Nov 30, 2023
2 parents b6df3ea + 3e2770c commit a9a30ad
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 79 deletions.
18 changes: 11 additions & 7 deletions hed/models/hed_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,12 @@ def _calculate_to_canonical_forms(self, hed_schema):

return tag_issues

def get_stripped_unit_value(self):
def get_stripped_unit_value(self, extension_text):
""" Return the extension divided into value and units, if the units are valid.
Parameters:
extension_text (str): The text to split, in case it's a portion of a tag.
Returns:
stripped_unit_value (str): The extension portion with the units removed.
unit (str or None): None if no valid unit found.
Expand All @@ -328,7 +331,7 @@ def get_stripped_unit_value(self):
"""
tag_unit_classes = self.unit_classes
stripped_value, unit, _ = self._get_tag_units_portion(tag_unit_classes)
stripped_value, unit, _ = HedTag._get_tag_units_portion(extension_text, tag_unit_classes)
if stripped_value:
return stripped_value, unit

Expand All @@ -354,7 +357,7 @@ def value_as_default_unit(self):
unit_entry = self.default_unit
unit = unit_entry.name
else:
stripped_value, unit, unit_entry = self._get_tag_units_portion(tag_unit_classes)
stripped_value, unit, unit_entry = HedTag._get_tag_units_portion(self.extension, tag_unit_classes)

if stripped_value:
if unit_entry.get_conversion_factor(unit) is not None:
Expand Down Expand Up @@ -544,7 +547,8 @@ def _get_schema_namespace(org_tag):
return org_tag[:first_colon + 1]
return ""

def _get_tag_units_portion(self, tag_unit_classes):
@staticmethod
def _get_tag_units_portion(extension_text, tag_unit_classes):
""" Check that this string has valid units and remove them.
Parameters:
Expand All @@ -555,19 +559,19 @@ def _get_tag_units_portion(self, tag_unit_classes):
This is filled in if there are no units as well.
unit (UnitEntry or None): The matching unit entry if one is found
"""
value, _, units = self.extension.rpartition(" ")
value, _, units = extension_text.rpartition(" ")
if not units:
return None, None, None

for unit_class_entry in tag_unit_classes.values():
all_valid_unit_permutations = unit_class_entry.derivative_units

possible_match = self._find_modifier_unit_entry(units, all_valid_unit_permutations)
possible_match = HedTag._find_modifier_unit_entry(units, all_valid_unit_permutations)
if possible_match and not possible_match.has_attribute(HedKey.UnitPrefix):
return value, units, possible_match

# Repeat the above, but as a prefix
possible_match = self._find_modifier_unit_entry(value, all_valid_unit_permutations)
possible_match = HedTag._find_modifier_unit_entry(value, all_valid_unit_permutations)
if possible_match and possible_match.has_attribute(HedKey.UnitPrefix):
return units, value, possible_match

Expand Down
11 changes: 0 additions & 11 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,17 +753,6 @@ def _get_attributes_for_section(self, key_class):
# Semi private function used to create a schema in memory(usually from a source file)
# ===============================================
def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
# Add the InLibrary attribute to any library schemas as they are loaded
# These are later removed when they are saved out, if saving unmerged
# if self.library and (not self.with_standard or (not self.merged and self.with_standard)):
# # only add it if not already present - This is a rare case
# Todo ian: I think this should be moved up one level for parity with the other loading changes
# .library will be updated to potentially be a list
# Cannot save schema if .library is a list
#
# if not new_entry.has_attribute(HedKey.InLibrary):
# new_entry._set_attribute_value(HedKey.InLibrary, self.library)

section = self._sections[key_class]
return section._add_to_dict(long_tag_name, new_entry)

Expand Down
64 changes: 36 additions & 28 deletions hed/validator/def_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,37 +35,14 @@ def validate_def_tags(self, hed_string_obj, hed_validator=None):
if self._label_tag_name not in hed_string_lower:
return []

# This is needed primarily to validate the contents of a def-expand matches the default.
def_issues = []
# We need to check for labels to expand in ALL groups
for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True):
def_issues += self._validate_def_contents(def_tag, def_expand_group, hed_validator)

return def_issues

@staticmethod
def _validate_def_units(def_tag, placeholder_tag, hed_validator, is_def_expand_tag):
"""Validate units and value classes on def/def-expand tags
Parameters:
def_tag(HedTag): The source tag
placeholder_tag(HedTag): The placeholder tag this def fills in
hed_validator(HedValidator): Used to validate the units/values
is_def_expand_tag(bool): If the given def_tag is a def-expand tag or not.
Returns:
issues(list): Issues found from validating placeholders.
"""
def_issues = []
error_code = ValidationErrors.DEF_INVALID
if is_def_expand_tag:
error_code = ValidationErrors.DEF_EXPAND_INVALID

def_issues += hed_validator.validate_units(placeholder_tag,
report_as=def_tag,
error_code=error_code)

return def_issues

@staticmethod
def _report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag):
"""Returns the correct error for this type of def tag
Expand Down Expand Up @@ -121,15 +98,46 @@ def _validate_def_contents(self, def_tag, def_expand_group, hed_validator):
def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID,
tag=def_tag, actual_def=def_contents,
found_def=def_expand_group)
if def_entry.takes_value and hed_validator:
placeholder_tag = def_contents.get_first_group().find_placeholder_tag()
def_issues += self._validate_def_units(def_tag, placeholder_tag, hed_validator,
is_def_expand_tag)
else:
def_issues += self._report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag)

return def_issues

def validate_def_value_units(self, def_tag, hed_validator):
"""Equivalent to HedValidator.validate_units for the special case of a Def or Def-expand tag"""
tag_label, _, placeholder = def_tag.extension.partition('/')
is_def_expand_tag = def_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY

def_entry = self.defs.get(tag_label.lower())
# These errors will be caught as can't match definition
if def_entry is None:
return []

error_code = ValidationErrors.DEF_INVALID
if is_def_expand_tag:
error_code = ValidationErrors.DEF_EXPAND_INVALID

def_issues = []

# Validate the def name vs the name class
def_issues += hed_validator.validate_units(def_tag,
tag_label,
error_code=error_code)

def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder, return_copy_of_tag=True)
if def_contents and def_entry.takes_value and hed_validator:
placeholder_tag = def_contents.get_first_group().find_placeholder_tag()
# Handle the case where they're adding a unit as part of a placeholder. eg Speed/# mph
if placeholder_tag:
placeholder = placeholder_tag.extension
def_issues += hed_validator.validate_units(placeholder_tag,
placeholder,
report_as=def_tag,
error_code=error_code,
index_offset=len(tag_label) + 1)

return def_issues

def validate_onset_offset(self, hed_string_obj):
""" Validate onset/offset
Expand Down
25 changes: 19 additions & 6 deletions hed/validator/hed_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,30 +140,40 @@ def check_tag_formatting(self, original_tag):

return validation_issues

def validate_units(self, original_tag, report_as=None, error_code=None):
def validate_units(self, original_tag, validate_text=None, report_as=None, error_code=None,
index_offset=0):
"""Validate units and value classes
Parameters:
original_tag(HedTag): The source tag
validate_text (str): the text we want to validate, if not the full extension.
report_as(HedTag): Report the error tag as coming from a different one.
Mostly for definitions that expand.
error_code(str): The code to override the error as. Again mostly for def/def-expand tags.
index_offset(int): Offset into the extension validate_text starts at
Returns:
issues(list): Issues found from units
"""
if validate_text is None:
validate_text = original_tag.extension
issues = []
if original_tag.is_unit_class_tag():
issues += self._unit_validator.check_tag_unit_class_units_are_valid(original_tag,
validate_text,
report_as=report_as,
error_code=error_code)
error_code=error_code,
index_offset=index_offset)
elif original_tag.is_value_class_tag():
issues += self._unit_validator.check_tag_value_class_valid(original_tag,
validate_text,
report_as=report_as,
error_code=error_code)
# todo: potentially make this one have a report_as
error_code=error_code,
index_offset=index_offset)
elif original_tag.extension:
issues += self._char_validator.check_for_invalid_extension_chars(original_tag)
issues += self._char_validator.check_for_invalid_extension_chars(original_tag,
validate_text,
index_offset=index_offset)

return issues

Expand Down Expand Up @@ -198,6 +208,9 @@ def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placehol
run_individual_tag_validators(hed_tag,
allow_placeholders=allow_placeholders,
is_definition=is_definition)
validation_issues += self.validate_units(hed_tag)
if hed_tag.short_base_tag == DefTagNames.DEF_ORG_KEY or hed_tag.short_base_tag == DefTagNames.DEF_EXPAND_ORG_KEY:
validation_issues += self._def_validator.validate_def_value_units(hed_tag, self)
else:
validation_issues += self.validate_units(hed_tag)

return validation_issues
16 changes: 11 additions & 5 deletions hed/validator/tag_util/char_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,23 +54,28 @@ def check_tag_invalid_chars(self, original_tag, allow_placeholders):
validation_issues += self._check_invalid_chars(original_tag.org_base_tag, allowed_chars, original_tag)
return validation_issues

def check_for_invalid_extension_chars(self, original_tag):
def check_for_invalid_extension_chars(self, original_tag, validate_text, error_code=None,
index_offset=0):
"""Report invalid characters in extension/value.
Parameters:
original_tag (HedTag): The original tag that is used to report the error.
validate_text (str): the text we want to validate, if not the full extension.
error_code(str): The code to override the error as. Again mostly for def/def-expand tags.
index_offset(int): Offset into the extension validate_text starts at
Returns:
list: Validation issues. Each issue is a dictionary.
"""
allowed_chars = self.TAG_ALLOWED_CHARS
allowed_chars += self.DEFAULT_ALLOWED_PLACEHOLDER_CHARS
allowed_chars += " "
return self._check_invalid_chars(original_tag.extension, allowed_chars, original_tag,
starting_index=len(original_tag.org_base_tag) + 1)
return self._check_invalid_chars(validate_text, allowed_chars, original_tag,
starting_index=len(original_tag.org_base_tag) + 1 + index_offset,
error_code=error_code)

@staticmethod
def _check_invalid_chars(check_string, allowed_chars, source_tag, starting_index=0):
def _check_invalid_chars(check_string, allowed_chars, source_tag, starting_index=0, error_code=None):
validation_issues = []
for i, character in enumerate(check_string):
if character.isalnum():
Expand All @@ -82,7 +87,8 @@ def _check_invalid_chars(check_string, allowed_chars, source_tag, starting_index
continue
validation_issues += ErrorHandler.format_error(ValidationErrors.INVALID_TAG_CHARACTER,
tag=source_tag, index_in_tag=starting_index + i,
index_in_tag_end=starting_index + i + 1)
index_in_tag_end=starting_index + i + 1,
actual_error=error_code)
return validation_issues

@staticmethod
Expand Down
33 changes: 21 additions & 12 deletions hed/validator/tag_util/class_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

from hed.errors.error_reporter import ErrorHandler
from hed.errors.error_types import ValidationErrors
from hed.schema.hed_schema_constants import HedKey
import functools


class UnitValueValidator:
Expand All @@ -18,6 +16,7 @@ class UnitValueValidator:
DIGIT_OR_POUND_EXPRESSION = r'^(-?[\d.]+(?:e-?\d+)?|#)$'

VALUE_CLASS_ALLOWED_CACHE=20

def __init__(self, value_validators=None):
""" Validates the unit and value classes on a given tag.
Expand All @@ -39,28 +38,31 @@ def _get_default_value_class_validators(self):

return validator_dict

def check_tag_unit_class_units_are_valid(self, original_tag, report_as=None, error_code=None):
def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, report_as=None, error_code=None,
index_offset=0):
""" Report incorrect unit class or units.
Parameters:
original_tag (HedTag): The original tag that is used to report the error.
validate_text (str): The text to validate
report_as (HedTag): Report errors as coming from this tag, rather than original_tag.
error_code (str): Override error codes
Returns:
list: Validation issues. Each issue is a dictionary.
"""
validation_issues = []
if original_tag.is_unit_class_tag():
stripped_value, unit = original_tag.get_stripped_unit_value()
stripped_value, unit = original_tag.get_stripped_unit_value(validate_text)
if not unit:
# Todo: in theory this should separately validate the number and the units, for units
# that are prefixes like $. Right now those are marked as unit invalid AND value_invalid.
bad_units = " " in original_tag.extension
bad_units = " " in validate_text

if bad_units:
stripped_value = stripped_value.split(" ")[0]

validation_issues += self._check_value_class(original_tag, stripped_value, report_as, error_code)
validation_issues += self._check_value_class(original_tag, stripped_value, report_as, error_code,
index_offset)
validation_issues += self._check_units(original_tag, bad_units, report_as)

# We don't want to give this overall error twice
Expand All @@ -71,17 +73,21 @@ def check_tag_unit_class_units_are_valid(self, original_tag, report_as=None, err

return validation_issues

def check_tag_value_class_valid(self, original_tag, report_as=None, error_code=None):
def check_tag_value_class_valid(self, original_tag, validate_text, report_as=None, error_code=None,
index_offset=0):
""" Report an invalid value portion.
Parameters:
original_tag (HedTag): The original tag that is used to report the error.
validate_text (str): The text to validate
report_as (HedTag): Report errors as coming from this tag, rather than original_tag.
error_code (str): Override error codes
index_offset(int): Offset into the extension validate_text starts at
Returns:
list: Validation issues.
"""
return self._check_value_class(original_tag, original_tag.extension, report_as, error_code)
return self._check_value_class(original_tag, validate_text, report_as, error_code, index_offset)

# char_sets = {
# "letters": set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
Expand Down Expand Up @@ -118,17 +124,20 @@ def _get_problem_indexes(self, original_tag, stripped_value):
# indexes = [index for index, char in enumerate(stripped_value) if char not in allowed_characters]
# pass

def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None):
def _check_value_class(self, original_tag, stripped_value, report_as, error_code=None, index_offset=0):
"""Returns any issues found if this is a value tag"""
# todo: This function needs to check for allowed characters, not just {}
validation_issues = []
if original_tag.is_takes_value_tag():
report_as = report_as if report_as else original_tag
problem_indexes = self._get_problem_indexes(original_tag, stripped_value)
for char, index in problem_indexes:
error_code = ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE \
if char in "{}" else ValidationErrors.INVALID_TAG_CHARACTER
validation_issues += ErrorHandler.format_error(error_code,
tag_code = ValidationErrors.CURLY_BRACE_UNSUPPORTED_HERE if (
char in "{}") else ValidationErrors.INVALID_TAG_CHARACTER

index_adj = len(report_as.org_base_tag) - len(original_tag.org_base_tag)
index += index_adj + index_offset
validation_issues += ErrorHandler.format_error(tag_code,
tag=report_as, index_in_tag=index,
index_in_tag_end=index + 1)
if not self._validate_value_class_portion(original_tag, stripped_value):
Expand Down
Loading

0 comments on commit a9a30ad

Please sign in to comment.