Skip to content

Commit

Permalink
Merge hed validator and tag validator, then split out functionality t…
Browse files Browse the repository at this point in the history
…o various sub files
  • Loading branch information
IanCa committed Nov 4, 2023
1 parent 2aced07 commit b2c754f
Show file tree
Hide file tree
Showing 15 changed files with 877 additions and 826 deletions.
1 change: 0 additions & 1 deletion hed/validator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Validation of HED tags."""

from .hed_validator import HedValidator
from .tag_validator import TagValidator
from .sidecar_validator import SidecarValidator
from .def_validator import DefValidator
from .onset_validator import OnsetValidator
Expand Down
34 changes: 16 additions & 18 deletions hed/validator/def_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class DefValidator(DefinitionDict):
""" Handles validating Def/ and Def-expand/, as well as Temporal groups: Onset, Inset, and Offset
"""

def __init__(self, def_dicts=None, hed_schema=None):
""" Initialize for definitions in hed strings.
Expand All @@ -21,12 +22,12 @@ def __init__(self, def_dicts=None, hed_schema=None):
"""
super().__init__(def_dicts, hed_schema=hed_schema)

def validate_def_tags(self, hed_string_obj, tag_validator=None):
def validate_def_tags(self, hed_string_obj, hed_validator=None):
""" Validate Def/Def-Expand tags.
Parameters:
hed_string_obj (HedString): The hed string to process.
tag_validator (TagValidator): Used to validate the placeholder replacement.
hed_validator (HedValidator): Used to validate the placeholder replacement.
Returns:
list: Issues found related to validating defs. Each issue is a dictionary.
"""
Expand All @@ -37,18 +38,18 @@ def validate_def_tags(self, hed_string_obj, tag_validator=None):
def_issues = []
# We need to check for labels to expand in ALL groups
for def_tag, def_expand_group, def_group in hed_string_obj.find_def_tags(recursive=True):
def_issues += self._validate_def_contents(def_tag, def_expand_group, tag_validator)
def_issues += self._validate_def_contents(def_tag, def_expand_group, hed_validator)

return def_issues

@staticmethod
def _validate_def_units(def_tag, placeholder_tag, tag_validator, is_def_expand_tag):
def _validate_def_units(def_tag, placeholder_tag, hed_validator, is_def_expand_tag):
"""Validate units and value classes on def/def-expand tags
Parameters:
def_tag(HedTag): The source tag
placeholder_tag(HedTag): The placeholder tag this def fills in
tag_validator(TagValidator): Used to validate the units/values
hed_validator(HedValidator): Used to validate the units/values
is_def_expand_tag(bool): If the given def_tag is a def-expand tag or not.
Returns:
Expand All @@ -58,14 +59,11 @@ def _validate_def_units(def_tag, placeholder_tag, tag_validator, is_def_expand_t
error_code = ValidationErrors.DEF_INVALID
if is_def_expand_tag:
error_code = ValidationErrors.DEF_EXPAND_INVALID
if placeholder_tag.is_unit_class_tag():
def_issues += tag_validator.check_tag_unit_class_units_are_valid(placeholder_tag,
report_as=def_tag,
error_code=error_code)
elif placeholder_tag.is_value_class_tag():
def_issues += tag_validator.check_tag_value_class_valid(placeholder_tag,
report_as=def_tag,
error_code=error_code)

def_issues += hed_validator.validate_units(placeholder_tag,
report_as=def_tag,
error_code=error_code)

return def_issues

@staticmethod
Expand All @@ -92,14 +90,14 @@ def _report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag):
def_issues += ErrorHandler.format_error(error_code, tag=def_tag)
return def_issues

def _validate_def_contents(self, def_tag, def_expand_group, tag_validator):
def _validate_def_contents(self, def_tag, def_expand_group, hed_validator):
""" Check for issues with expanding a tag from Def to a Def-expand tag group
Parameters:
def_tag (HedTag): Source hed tag that may be a Def or Def-expand tag.
def_expand_group (HedGroup or HedTag): Source group for this def-expand tag.
Same as def_tag if this is not a def-expand tag.
tag_validator (TagValidator): Used to validate the placeholder replacement.
hed_validator (HedValidator): Used to validate the placeholder replacement.
Returns:
issues(list): Issues found from validating placeholders.
Expand All @@ -117,15 +115,15 @@ def _validate_def_contents(self, def_tag, def_expand_group, tag_validator):
def_issues += ErrorHandler.format_error(error_code, tag=def_tag)
else:
def_contents = def_entry.get_definition(def_tag, placeholder_value=placeholder,
return_copy_of_tag=True)
return_copy_of_tag=True)
if def_contents is not None:
if is_def_expand_tag and def_expand_group != def_contents:
def_issues += ErrorHandler.format_error(ValidationErrors.HED_DEF_EXPAND_INVALID,
tag=def_tag, actual_def=def_contents,
found_def=def_expand_group)
if def_entry.takes_value and tag_validator:
if def_entry.takes_value and hed_validator:
placeholder_tag = def_contents.get_first_group().find_placeholder_tag()
def_issues += self._validate_def_units(def_tag, placeholder_tag, tag_validator,
def_issues += self._validate_def_units(def_tag, placeholder_tag, hed_validator,
is_def_expand_tag)
else:
def_issues += self._report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag)
Expand Down
147 changes: 84 additions & 63 deletions hed/validator/hed_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
the get_validation_issues() function.
"""

import re
from hed.errors.error_types import ValidationErrors, DefinitionErrors
from hed.errors.error_reporter import ErrorHandler, check_for_any_errors

from hed.models.hed_string import HedString
from hed.models import HedTag
from hed.validator.tag_validator import TagValidator
from hed.validator.def_validator import DefValidator
from hed.validator.tag_util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator


class HedValidator:
Expand All @@ -25,14 +24,20 @@ def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False):
def_dicts(DefinitionDict or list or dict): the def dicts to use for validation
definitions_allowed(bool): If False, flag definitions found as errors
"""
super().__init__()
self._tag_validator = None
if hed_schema is None:
raise ValueError("HedSchema required for validation")

self._hed_schema = hed_schema

self._tag_validator = TagValidator(hed_schema=self._hed_schema)
self._def_validator = DefValidator(def_dicts, hed_schema)
self._definitions_allowed = definitions_allowed

self._unit_validator = UnitValueValidator()
self._char_validator = CharValidator()
self._string_validator = StringValidator()
self._tag_validator = TagValidator()
self._group_validator = GroupValidator(hed_schema)

def validate(self, hed_string, allow_placeholders, error_handler=None):
"""
Validate the string using the schema
Expand All @@ -57,96 +62,111 @@ def validate(self, hed_string, allow_placeholders, error_handler=None):

def run_basic_checks(self, hed_string, allow_placeholders):
issues = []
issues += self._tag_validator.run_hed_string_validators(hed_string, allow_placeholders)
issues += self._run_hed_string_validators(hed_string, allow_placeholders)
if check_for_any_errors(issues):
return issues
if hed_string == "n/a" or not self._hed_schema:
if hed_string == "n/a":
return issues
for tag in hed_string.get_all_tags():
self._tag_validator.run_validate_tag_characters(tag, allow_placeholders=allow_placeholders)
issues += self._run_validate_tag_characters(tag, allow_placeholders=allow_placeholders)
issues += hed_string._calculate_to_canonical_forms(self._hed_schema)
if check_for_any_errors(issues):
return issues
# This is required so it can validate the tag a tag expands into
# e.g. checking units when a definition placeholder has units
self._def_validator.construct_def_tags(hed_string)
issues += self._validate_individual_tags_in_hed_string(hed_string, allow_placeholders=allow_placeholders)
issues += self._def_validator.validate_def_tags(hed_string, self._tag_validator)
issues += self._def_validator.validate_def_tags(hed_string, self)
return issues

def run_full_string_checks(self, hed_string):
issues = []
issues += self._validate_tags_in_hed_string(hed_string)
issues += self._validate_groups_in_hed_string(hed_string)
issues += self._group_validator.run_all_tags_validators(hed_string)
issues += self._group_validator.run_tag_level_validators(hed_string)
issues += self._def_validator.validate_onset_offset(hed_string)
return issues

def _validate_groups_in_hed_string(self, hed_string_obj):
""" Report invalid groups at each level.
# Todo: mark semi private/actually private below this
def _run_validate_tag_characters(self, original_tag, allow_placeholders):
""" Basic character validation of tags
Parameters:
hed_string_obj (HedString): A HedString object.
original_tag (HedTag): A original tag.
allow_placeholders (bool): Allow value class or extensions to be placeholders rather than a specific value.
Returns:
list: Issues associated with each level in the HED string. Each issue is a dictionary.
Notes:
- This pertains to the top-level, all groups, and nested groups.
list: The validation issues associated with the characters. Each issue is dictionary.
"""
validation_issues = []
for original_tag_group, is_top_level in hed_string_obj.get_all_groups(also_return_depth=True):
is_group = original_tag_group.is_group
if not original_tag_group and is_group:
validation_issues += ErrorHandler.format_error(ValidationErrors.HED_GROUP_EMPTY,
tag=original_tag_group)
validation_issues += self._tag_validator.run_tag_level_validators(original_tag_group.tags(), is_top_level,
is_group)

validation_issues += self._check_for_duplicate_groups(hed_string_obj)
return validation_issues
return self._char_validator.check_tag_invalid_chars(original_tag, allow_placeholders)

def _run_hed_string_validators(self, hed_string_obj, allow_placeholders=False):
"""Basic high level checks of the hed string for illegal characters
def _check_for_duplicate_groups_recursive(self, sorted_group, validation_issues):
prev_child = None
for child in sorted_group:
if child == prev_child:
if isinstance(child, HedTag):
error_code = ValidationErrors.HED_TAG_REPEATED
validation_issues += ErrorHandler.format_error(error_code, child)
else:
error_code = ValidationErrors.HED_TAG_REPEATED_GROUP
found_group = child
base_steps_up = 0
while isinstance(found_group, list):
found_group = found_group[0]
base_steps_up += 1
for _ in range(base_steps_up):
found_group = found_group._parent
validation_issues += ErrorHandler.format_error(error_code, found_group)
if not isinstance(child, HedTag):
self._check_for_duplicate_groups_recursive(child, validation_issues)
prev_child = child

def _check_for_duplicate_groups(self, original_group):
sorted_group = original_group._sorted()
Catches fully banned characters, out of order parentheses, commas, repeated slashes, etc.
Parameters:
hed_string_obj (HedString): A HED string.
allow_placeholders: Allow placeholder and curly brace characters
Returns:
list: The validation issues associated with a HED string. Each issue is a dictionary.
"""
validation_issues = []
self._check_for_duplicate_groups_recursive(sorted_group, validation_issues)
validation_issues += self._char_validator.check_invalid_character_issues(
hed_string_obj.get_original_hed_string(), allow_placeholders)
validation_issues += self._string_validator.run_string_validator(hed_string_obj)
for original_tag in hed_string_obj.get_all_tags():
validation_issues += self.check_tag_formatting(original_tag)
return validation_issues

def _validate_tags_in_hed_string(self, hed_string_obj):
""" Report invalid the multi-tag properties in a hed string, e.g. required tags..
pattern_doubleslash = re.compile(r"([ \t/]{2,}|^/|/$)")

Parameters:
hed_string_obj (HedString): A HedString object.
def check_tag_formatting(self, original_tag):
""" Report repeated or erroneous slashes.
Returns:
list: The issues associated with the tags in the HED string. Each issue is a dictionary.
Parameters:
original_tag (HedTag): The original tag that is used to report the error.
Returns:
list: Validation issues. Each issue is a dictionary.
"""
validation_issues = []
tags = hed_string_obj.get_all_tags()
validation_issues += self._tag_validator.run_all_tags_validators(tags)
for match in self.pattern_doubleslash.finditer(original_tag.org_tag):
validation_issues += ErrorHandler.format_error(ValidationErrors.NODE_NAME_EMPTY,
tag=original_tag,
index_in_tag=match.start(),
index_in_tag_end=match.end())

return validation_issues

def validate_units(self, original_tag, report_as=None, error_code=None):
"""Validate units and value classes
Parameters:
original_tag(HedTag): The source tag
report_as(HedTag): Report the error tag as coming from a different one.
Mostly for definitions that expand.
error_code(str): The code to override the error as. Again mostly for def/def-expand tags.
Returns:
issues(list): Issues found from units
"""
issues = []
if original_tag.is_unit_class_tag():
issues += self._unit_validator.check_tag_unit_class_units_are_valid(original_tag,
report_as=report_as,
error_code=error_code)
elif original_tag.is_value_class_tag():
issues += self._unit_validator.check_tag_value_class_valid(original_tag,
report_as=report_as,
error_code=error_code)
# todo: potentially make this one have a report_as
elif original_tag.extension:
issues += self._char_validator.check_for_invalid_extension_chars(original_tag)

return issues

def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placeholders=False):
""" Validate individual tags in a HED string.
Expand All @@ -170,13 +190,14 @@ def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placehol
# todo: unclear if this should be restored at some point
# if hed_tag.expandable and not hed_tag.expanded:
# for tag in hed_tag.expandable.get_all_tags():
# validation_issues += self._tag_validator. \
# validation_issues += self._group_validator. \
# run_individual_tag_validators(tag, allow_placeholders=allow_placeholders,
# is_definition=is_definition)
# else:
validation_issues += self._tag_validator. \
run_individual_tag_validators(hed_tag,
allow_placeholders=allow_placeholders,
is_definition=is_definition)
validation_issues += self.validate_units(hed_tag)

return validation_issues
7 changes: 7 additions & 0 deletions hed/validator/tag_util/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Validation of HED tags."""

from .char_util import CharValidator
from .string_util import StringValidator
from .class_util import UnitValueValidator
from .tag_util import TagValidator
from .group_util import GroupValidator
Loading

0 comments on commit b2c754f

Please sign in to comment.