Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explicitly ban temporal tags when no onset column present #826

Merged
merged 1 commit into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ def val_error_empty_group(tag):
return f"HED tags cannot be empty. Extra delimiters found: '{tag}'"


@hed_tag_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
def val_error_hed_onset_with_no_column(tag):
return f"Cannot have Temporal tags without an 'Onset' column. Found tag: '{tag}'"



@hed_tag_error(ValidationErrors.TAG_EXTENDED, has_sub_tag=True, default_severity=ErrorSeverity.WARNING)
def val_error_tag_extended(tag, problem_tag):
return f"Hed tag is extended. '{problem_tag}' in {tag}"
Expand Down
2 changes: 1 addition & 1 deletion hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ class OnsetErrors:
ONSET_TAG_OUTSIDE_OF_GROUP = "ONSET_TAG_OUTSIDE_OF_GROUP"
INSET_BEFORE_ONSET = "INSET_BEFORE_ONSET"
ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW"

HED_ONSET_WITH_NO_COLUMN = 'HED_ONSET_WITH_NO_COLUMN'

class ColumnErrors:
INVALID_COLUMN_REF = "INVALID_COLUMN_REF"
Expand Down
17 changes: 17 additions & 0 deletions hed/validator/onset_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,20 @@ def _handle_onset_or_offset(self, def_tag, onset_offset_tag):
del self._onsets[full_def_name.lower()]

return []

@staticmethod
def check_for_banned_tags(hed_string):
""" Returns an issue for every tag found from the banned list

Parameters:
hed_string(HedString): the string to check

Returns:
list: The validation issues associated with the characters. Each issue is dictionary.
"""
banned_tag_list = DefTagNames.TEMPORAL_KEYS
issues = []
for tag in hed_string.get_all_tags():
if tag in banned_tag_list:
issues += ErrorHandler.format_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, tag)
return issues
35 changes: 21 additions & 14 deletions hed/validator/spreadsheet_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
Validate the input data using the schema

Parameters:
data (BaseInput or pd.DataFrame): Input data to be validated.
If a dataframe, it is assumed to be assembled already.
data (BaseInput): Input data to be validated.
def_dicts(list of DefDict or DefDict): all definitions to use for validation
name(str): The name to report errors from this file as
error_handler (ErrorHandler): Error context to use. Creates a new one if None
Expand All @@ -41,22 +40,27 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
if error_handler is None:
error_handler = ErrorHandler()

if not isinstance(data, BaseInput):
raise TypeError("Invalid type passed to spreadsheet validator. Can only validate BaseInput objects.")

error_handler.push_error_context(ErrorContext.FILE_NAME, name)
self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
self._onset_validator = OnsetValidator()
onset_filtered = None
# Adjust to account for 1 based
row_adj = 1
if isinstance(data, BaseInput):
# Adjust to account for column names
if data.has_column_names:
row_adj += 1
issues += self._validate_column_structure(data, error_handler, row_adj)
onset_filtered = data.series_filtered
data = data.dataframe_a
# Adjust to account for column names
if data.has_column_names:
row_adj += 1
issues += self._validate_column_structure(data, error_handler, row_adj)
onset_filtered = data.series_filtered
df = data.dataframe_a

self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
if data.onsets is not None:
self._onset_validator = OnsetValidator()
else:
self._onset_validator = None

# Check the rows of the input data
issues += self._run_checks(data, onset_filtered, error_handler=error_handler, row_adj=row_adj)
issues += self._run_checks(df, onset_filtered, error_handler=error_handler, row_adj=row_adj)
error_handler.pop_error_context()

issues = sort_issues(issues)
Expand Down Expand Up @@ -98,7 +102,10 @@ def _run_checks(self, hed_df, onset_filtered, error_handler, row_adj):
if row_string:
error_handler.push_error_context(ErrorContext.HED_STRING, row_string)
new_column_issues = self._hed_validator.run_full_string_checks(row_string)
new_column_issues += self._onset_validator.validate_temporal_relations(row_string)
if self._onset_validator is not None:
new_column_issues += self._onset_validator.validate_temporal_relations(row_string)
else:
new_column_issues += OnsetValidator.check_for_banned_tags(row_string)
error_handler.add_context_and_filter(new_column_issues)
error_handler.pop_error_context()
issues += new_column_issues
Expand Down
12 changes: 12 additions & 0 deletions tests/validator/test_onset_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,18 @@ def test_onset_two_in_one_line(self):

self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=False)

def test_check_for_banned_tags(self):
hed_string = HedString("Event, (Duration/Short, Label/Example)", self.hed_schema)
issues = OnsetValidator.check_for_banned_tags(hed_string)
self.assertEqual(len(issues), 0)

hed_string = HedString("Onset, (Offset, Event)", self.hed_schema)
issues = OnsetValidator.check_for_banned_tags(hed_string)
self.assertEqual(len(issues), 2)

hed_string = HedString("(Onset, Duration/Long), Label/Example", self.hed_schema)
issues = OnsetValidator.check_for_banned_tags(hed_string)
self.assertEqual(len(issues), 1)

if __name__ == '__main__':
unittest.main()
53 changes: 51 additions & 2 deletions tests/validator/test_spreadsheet_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import unittest
from hed import load_schema_version, load_schema
from hed.validator import SpreadsheetValidator
from hed import SpreadsheetInput
from hed import TabularInput, SpreadsheetInput
from hed.errors.error_types import ValidationErrors


class TestSpreadsheetValidation(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.schema = load_schema_version("8.1.0")
cls.schema = load_schema_version("8.2.0")
cls.validator = SpreadsheetValidator(cls.schema)
base = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/')
cls.base_data_dir = base
Expand Down Expand Up @@ -45,3 +47,50 @@ def test_basic_validate(self):
issues = file_input.validate(self.schema)
self.assertTrue(len(issues), 1)

def test_invalid_onset_invalid_column(self):
def_dict = "(Definition/DefaultOnset, (Event))"
base_df = pd.DataFrame({
'HED': ["Event, (Age/5, Label/Example)", "Age/1, Label/Example", "Age/3, (Event)"]
})

self.df_with_onset = base_df.copy()
self.df_with_onset['onset'] = [1, 2, 3]
self.df_without_onset = base_df.copy()

# No tags in either of these
issues = self.validator.validate(TabularInput(self.df_without_onset), def_dicts=def_dict)
self.assertEqual(len(issues), 0)

issues = self.validator.validate(TabularInput(self.df_with_onset), def_dicts=def_dict)
self.assertEqual(len(issues), 1)
self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN)

base_has_tags_df = pd.DataFrame({
'HED': ["(Onset, Def/DefaultOnset)", "(Inset, Def/DefaultOnset), (Event, Age/2)", "(Offset, Def/DefaultOnset), (Age/4)"]
})

self.df_with_onset_has_tags = base_has_tags_df.copy()
self.df_with_onset_has_tags['onset'] = [1, 2, 3]
self.df_without_onset_has_tags = base_has_tags_df.copy()

issues = self.validator.validate(TabularInput(self.df_without_onset_has_tags), def_dicts=def_dict)
self.assertEqual(len(issues), 3)
self.assertEqual(issues[0]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR)
issues = self.validator.validate(TabularInput(self.df_with_onset_has_tags), def_dicts=def_dict)
self.assertEqual(len(issues), 1)
self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN)

base_has_tags_unordered_df = pd.DataFrame({
'HED': ["(Onset, Def/DefaultOnset)", "(Offset, Def/DefaultOnset), (Age/4)", "(Inset, Def/DefaultOnset), (Event, Age/2)"]
})
self.df_with_onset_has_tags_unordered = base_has_tags_unordered_df.copy()
self.df_with_onset_has_tags_unordered['onset'] = [1, 2, 3]
self.df_without_onset_has_tags_unordered = base_has_tags_unordered_df.copy()

issues = self.validator.validate(TabularInput(self.df_without_onset_has_tags_unordered), def_dicts=def_dict)
self.assertEqual(len(issues), 3)
self.assertEqual(issues[0]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR)
issues = self.validator.validate(TabularInput(self.df_with_onset_has_tags_unordered), def_dicts=def_dict)
self.assertEqual(len(issues), 2)
self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN)
self.assertEqual(issues[1]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR)
Loading