Skip to content

Commit

Permalink
Explicitly ban temporal tags when no onset column present
Browse files Browse the repository at this point in the history
  • Loading branch information
IanCa committed Jan 9, 2024
1 parent 639397d commit 5107fbc
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 17 deletions.
6 changes: 6 additions & 0 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ def val_error_empty_group(tag):
return f"HED tags cannot be empty. Extra delimiters found: '{tag}'"


@hed_tag_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
def val_error_hed_onset_with_no_column(tag):
return f"Cannot have Temporal tags without an 'Onset' column. Found tag: '{tag}'"



@hed_tag_error(ValidationErrors.TAG_EXTENDED, has_sub_tag=True, default_severity=ErrorSeverity.WARNING)
def val_error_tag_extended(tag, problem_tag):
return f"Hed tag is extended. '{problem_tag}' in {tag}"
Expand Down
2 changes: 1 addition & 1 deletion hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ class OnsetErrors:
ONSET_TAG_OUTSIDE_OF_GROUP = "ONSET_TAG_OUTSIDE_OF_GROUP"
INSET_BEFORE_ONSET = "INSET_BEFORE_ONSET"
ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW"

HED_ONSET_WITH_NO_COLUMN = 'HED_ONSET_WITH_NO_COLUMN'

class ColumnErrors:
INVALID_COLUMN_REF = "INVALID_COLUMN_REF"
Expand Down
17 changes: 17 additions & 0 deletions hed/validator/onset_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,20 @@ def _handle_onset_or_offset(self, def_tag, onset_offset_tag):
del self._onsets[full_def_name.lower()]

return []

@staticmethod
def check_for_banned_tags(hed_string):
""" Returns an issue for every tag found from the banned list
Parameters:
hed_string(HedString): the string to check
Returns:
list: The validation issues associated with the characters. Each issue is dictionary.
"""
banned_tag_list = DefTagNames.TEMPORAL_KEYS
issues = []
for tag in hed_string.get_all_tags():
if tag in banned_tag_list:
issues += ErrorHandler.format_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, tag)
return issues
35 changes: 21 additions & 14 deletions hed/validator/spreadsheet_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
Validate the input data using the schema
Parameters:
data (BaseInput or pd.DataFrame): Input data to be validated.
If a dataframe, it is assumed to be assembled already.
data (BaseInput): Input data to be validated.
def_dicts(list of DefDict or DefDict): all definitions to use for validation
name(str): The name to report errors from this file as
error_handler (ErrorHandler): Error context to use. Creates a new one if None
Expand All @@ -41,22 +40,27 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
if error_handler is None:
error_handler = ErrorHandler()

if not isinstance(data, BaseInput):
raise TypeError("Invalid type passed to spreadsheet validator. Can only validate BaseInput objects.")

error_handler.push_error_context(ErrorContext.FILE_NAME, name)
self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
self._onset_validator = OnsetValidator()
onset_filtered = None
# Adjust to account for 1 based
row_adj = 1
if isinstance(data, BaseInput):
# Adjust to account for column names
if data.has_column_names:
row_adj += 1
issues += self._validate_column_structure(data, error_handler, row_adj)
onset_filtered = data.series_filtered
data = data.dataframe_a
# Adjust to account for column names
if data.has_column_names:
row_adj += 1
issues += self._validate_column_structure(data, error_handler, row_adj)
onset_filtered = data.series_filtered
df = data.dataframe_a

self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
if data.onsets is not None:
self._onset_validator = OnsetValidator()
else:
self._onset_validator = None

# Check the rows of the input data
issues += self._run_checks(data, onset_filtered, error_handler=error_handler, row_adj=row_adj)
issues += self._run_checks(df, onset_filtered, error_handler=error_handler, row_adj=row_adj)
error_handler.pop_error_context()

issues = sort_issues(issues)
Expand Down Expand Up @@ -98,7 +102,10 @@ def _run_checks(self, hed_df, onset_filtered, error_handler, row_adj):
if row_string:
error_handler.push_error_context(ErrorContext.HED_STRING, row_string)
new_column_issues = self._hed_validator.run_full_string_checks(row_string)
new_column_issues += self._onset_validator.validate_temporal_relations(row_string)
if self._onset_validator is not None:
new_column_issues += self._onset_validator.validate_temporal_relations(row_string)
else:
new_column_issues += OnsetValidator.check_for_banned_tags(row_string)
error_handler.add_context_and_filter(new_column_issues)
error_handler.pop_error_context()
issues += new_column_issues
Expand Down
12 changes: 12 additions & 0 deletions tests/validator/test_onset_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,18 @@ def test_onset_two_in_one_line(self):

self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=False)

def test_check_for_banned_tags(self):
hed_string = HedString("Event, (Duration/Short, Label/Example)", self.hed_schema)
issues = OnsetValidator.check_for_banned_tags(hed_string)
self.assertEqual(len(issues), 0)

hed_string = HedString("Onset, (Offset, Event)", self.hed_schema)
issues = OnsetValidator.check_for_banned_tags(hed_string)
self.assertEqual(len(issues), 2)

hed_string = HedString("(Onset, Duration/Long), Label/Example", self.hed_schema)
issues = OnsetValidator.check_for_banned_tags(hed_string)
self.assertEqual(len(issues), 1)

if __name__ == '__main__':
unittest.main()
53 changes: 51 additions & 2 deletions tests/validator/test_spreadsheet_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import unittest
from hed import load_schema_version, load_schema
from hed.validator import SpreadsheetValidator
from hed import SpreadsheetInput
from hed import TabularInput, SpreadsheetInput
from hed.errors.error_types import ValidationErrors


class TestSpreadsheetValidation(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.schema = load_schema_version("8.1.0")
cls.schema = load_schema_version("8.2.0")
cls.validator = SpreadsheetValidator(cls.schema)
base = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/')
cls.base_data_dir = base
Expand Down Expand Up @@ -45,3 +47,50 @@ def test_basic_validate(self):
issues = file_input.validate(self.schema)
self.assertTrue(len(issues), 1)

def test_invalid_onset_invalid_column(self):
def_dict = "(Definition/DefaultOnset, (Event))"
base_df = pd.DataFrame({
'HED': ["Event, (Age/5, Label/Example)", "Age/1, Label/Example", "Age/3, (Event)"]
})

self.df_with_onset = base_df.copy()
self.df_with_onset['onset'] = [1, 2, 3]
self.df_without_onset = base_df.copy()

# No tags in either of these
issues = self.validator.validate(TabularInput(self.df_without_onset), def_dicts=def_dict)
self.assertEqual(len(issues), 0)

issues = self.validator.validate(TabularInput(self.df_with_onset), def_dicts=def_dict)
self.assertEqual(len(issues), 1)
self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN)

base_has_tags_df = pd.DataFrame({
'HED': ["(Onset, Def/DefaultOnset)", "(Inset, Def/DefaultOnset), (Event, Age/2)", "(Offset, Def/DefaultOnset), (Age/4)"]
})

self.df_with_onset_has_tags = base_has_tags_df.copy()
self.df_with_onset_has_tags['onset'] = [1, 2, 3]
self.df_without_onset_has_tags = base_has_tags_df.copy()

issues = self.validator.validate(TabularInput(self.df_without_onset_has_tags), def_dicts=def_dict)
self.assertEqual(len(issues), 3)
self.assertEqual(issues[0]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR)
issues = self.validator.validate(TabularInput(self.df_with_onset_has_tags), def_dicts=def_dict)
self.assertEqual(len(issues), 1)
self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN)

base_has_tags_unordered_df = pd.DataFrame({
'HED': ["(Onset, Def/DefaultOnset)", "(Offset, Def/DefaultOnset), (Age/4)", "(Inset, Def/DefaultOnset), (Event, Age/2)"]
})
self.df_with_onset_has_tags_unordered = base_has_tags_unordered_df.copy()
self.df_with_onset_has_tags_unordered['onset'] = [1, 2, 3]
self.df_without_onset_has_tags_unordered = base_has_tags_unordered_df.copy()

issues = self.validator.validate(TabularInput(self.df_without_onset_has_tags_unordered), def_dicts=def_dict)
self.assertEqual(len(issues), 3)
self.assertEqual(issues[0]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR)
issues = self.validator.validate(TabularInput(self.df_with_onset_has_tags_unordered), def_dicts=def_dict)
self.assertEqual(len(issues), 2)
self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN)
self.assertEqual(issues[1]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR)

0 comments on commit 5107fbc

Please sign in to comment.