diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index 67922025..657aefbb 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -25,6 +25,12 @@ def val_error_empty_group(tag): return f"HED tags cannot be empty. Extra delimiters found: '{tag}'" +@hed_tag_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR) +def val_error_hed_onset_with_no_column(tag): + return f"Cannot have Temporal tags without an 'Onset' column. Found tag: '{tag}'" + + + @hed_tag_error(ValidationErrors.TAG_EXTENDED, has_sub_tag=True, default_severity=ErrorSeverity.WARNING) def val_error_tag_extended(tag, problem_tag): return f"Hed tag is extended. '{problem_tag}' in {tag}" diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index 7f9a6443..5dc32737 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -166,7 +166,7 @@ class OnsetErrors: ONSET_TAG_OUTSIDE_OF_GROUP = "ONSET_TAG_OUTSIDE_OF_GROUP" INSET_BEFORE_ONSET = "INSET_BEFORE_ONSET" ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW" - + HED_ONSET_WITH_NO_COLUMN = 'HED_ONSET_WITH_NO_COLUMN' class ColumnErrors: INVALID_COLUMN_REF = "INVALID_COLUMN_REF" diff --git a/hed/validator/onset_validator.py b/hed/validator/onset_validator.py index 94be9d7e..f1819636 100644 --- a/hed/validator/onset_validator.py +++ b/hed/validator/onset_validator.py @@ -63,3 +63,20 @@ def _handle_onset_or_offset(self, def_tag, onset_offset_tag): del self._onsets[full_def_name.lower()] return [] + + @staticmethod + def check_for_banned_tags(hed_string): + """ Returns an issue for every tag found from the banned list + + Parameters: + hed_string(HedString): the string to check + + Returns: + list: The validation issues associated with the characters. Each issue is dictionary. + """ + banned_tag_list = DefTagNames.TEMPORAL_KEYS + issues = [] + for tag in hed_string.get_all_tags(): + if tag in banned_tag_list: + issues += ErrorHandler.format_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, tag) + return issues diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index 751af961..aad30283 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -28,8 +28,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): Validate the input data using the schema Parameters: - data (BaseInput or pd.DataFrame): Input data to be validated. - If a dataframe, it is assumed to be assembled already. + data (BaseInput): Input data to be validated. def_dicts(list of DefDict or DefDict): all definitions to use for validation name(str): The name to report errors from this file as error_handler (ErrorHandler): Error context to use. Creates a new one if None @@ -41,22 +40,27 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None): if error_handler is None: error_handler = ErrorHandler() + if not isinstance(data, BaseInput): + raise TypeError("Invalid type passed to spreadsheet validator. Can only validate BaseInput objects.") + error_handler.push_error_context(ErrorContext.FILE_NAME, name) - self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts) - self._onset_validator = OnsetValidator() - onset_filtered = None # Adjust to account for 1 based row_adj = 1 - if isinstance(data, BaseInput): - # Adjust to account for column names - if data.has_column_names: - row_adj += 1 - issues += self._validate_column_structure(data, error_handler, row_adj) - onset_filtered = data.series_filtered - data = data.dataframe_a + # Adjust to account for column names + if data.has_column_names: + row_adj += 1 + issues += self._validate_column_structure(data, error_handler, row_adj) + onset_filtered = data.series_filtered + df = data.dataframe_a + + self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts) + if data.onsets is not None: + self._onset_validator = OnsetValidator() + else: + self._onset_validator = None # Check the rows of the input data - issues += self._run_checks(data, onset_filtered, error_handler=error_handler, row_adj=row_adj) + issues += self._run_checks(df, onset_filtered, error_handler=error_handler, row_adj=row_adj) error_handler.pop_error_context() issues = sort_issues(issues) @@ -98,7 +102,10 @@ def _run_checks(self, hed_df, onset_filtered, error_handler, row_adj): if row_string: error_handler.push_error_context(ErrorContext.HED_STRING, row_string) new_column_issues = self._hed_validator.run_full_string_checks(row_string) - new_column_issues += self._onset_validator.validate_temporal_relations(row_string) + if self._onset_validator is not None: + new_column_issues += self._onset_validator.validate_temporal_relations(row_string) + else: + new_column_issues += OnsetValidator.check_for_banned_tags(row_string) error_handler.add_context_and_filter(new_column_issues) error_handler.pop_error_context() issues += new_column_issues diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py index 2b60d391..42e6c958 100644 --- a/tests/validator/test_onset_validator.py +++ b/tests/validator/test_onset_validator.py @@ -312,6 +312,18 @@ def test_onset_two_in_one_line(self): self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=False) + def test_check_for_banned_tags(self): + hed_string = HedString("Event, (Duration/Short, Label/Example)", self.hed_schema) + issues = OnsetValidator.check_for_banned_tags(hed_string) + self.assertEqual(len(issues), 0) + + hed_string = HedString("Onset, (Offset, Event)", self.hed_schema) + issues = OnsetValidator.check_for_banned_tags(hed_string) + self.assertEqual(len(issues), 2) + + hed_string = HedString("(Onset, Duration/Long), Label/Example", self.hed_schema) + issues = OnsetValidator.check_for_banned_tags(hed_string) + self.assertEqual(len(issues), 1) if __name__ == '__main__': unittest.main() diff --git a/tests/validator/test_spreadsheet_validator.py b/tests/validator/test_spreadsheet_validator.py index 1b1f57eb..9c0691d4 100644 --- a/tests/validator/test_spreadsheet_validator.py +++ b/tests/validator/test_spreadsheet_validator.py @@ -5,12 +5,14 @@ import unittest from hed import load_schema_version, load_schema from hed.validator import SpreadsheetValidator -from hed import SpreadsheetInput +from hed import TabularInput, SpreadsheetInput +from hed.errors.error_types import ValidationErrors + class TestSpreadsheetValidation(unittest.TestCase): @classmethod def setUpClass(cls): - cls.schema = load_schema_version("8.1.0") + cls.schema = load_schema_version("8.2.0") cls.validator = SpreadsheetValidator(cls.schema) base = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') cls.base_data_dir = base @@ -45,3 +47,50 @@ def test_basic_validate(self): issues = file_input.validate(self.schema) self.assertTrue(len(issues), 1) + def test_invalid_onset_invalid_column(self): + def_dict = "(Definition/DefaultOnset, (Event))" + base_df = pd.DataFrame({ + 'HED': ["Event, (Age/5, Label/Example)", "Age/1, Label/Example", "Age/3, (Event)"] + }) + + self.df_with_onset = base_df.copy() + self.df_with_onset['onset'] = [1, 2, 3] + self.df_without_onset = base_df.copy() + + # No tags in either of these + issues = self.validator.validate(TabularInput(self.df_without_onset), def_dicts=def_dict) + self.assertEqual(len(issues), 0) + + issues = self.validator.validate(TabularInput(self.df_with_onset), def_dicts=def_dict) + self.assertEqual(len(issues), 1) + self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN) + + base_has_tags_df = pd.DataFrame({ + 'HED': ["(Onset, Def/DefaultOnset)", "(Inset, Def/DefaultOnset), (Event, Age/2)", "(Offset, Def/DefaultOnset), (Age/4)"] + }) + + self.df_with_onset_has_tags = base_has_tags_df.copy() + self.df_with_onset_has_tags['onset'] = [1, 2, 3] + self.df_without_onset_has_tags = base_has_tags_df.copy() + + issues = self.validator.validate(TabularInput(self.df_without_onset_has_tags), def_dicts=def_dict) + self.assertEqual(len(issues), 3) + self.assertEqual(issues[0]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR) + issues = self.validator.validate(TabularInput(self.df_with_onset_has_tags), def_dicts=def_dict) + self.assertEqual(len(issues), 1) + self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN) + + base_has_tags_unordered_df = pd.DataFrame({ + 'HED': ["(Onset, Def/DefaultOnset)", "(Offset, Def/DefaultOnset), (Age/4)", "(Inset, Def/DefaultOnset), (Event, Age/2)"] + }) + self.df_with_onset_has_tags_unordered = base_has_tags_unordered_df.copy() + self.df_with_onset_has_tags_unordered['onset'] = [1, 2, 3] + self.df_without_onset_has_tags_unordered = base_has_tags_unordered_df.copy() + + issues = self.validator.validate(TabularInput(self.df_without_onset_has_tags_unordered), def_dicts=def_dict) + self.assertEqual(len(issues), 3) + self.assertEqual(issues[0]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR) + issues = self.validator.validate(TabularInput(self.df_with_onset_has_tags_unordered), def_dicts=def_dict) + self.assertEqual(len(issues), 2) + self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN) + self.assertEqual(issues[1]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR) \ No newline at end of file