Explicitly ban temporal tags when no onset column present

hed-standard · Jan 9, 2024 · 5107fbc · 5107fbc
1 parent 639397d
commit 5107fbc
Show file tree

Hide file tree

Showing 6 changed files with 108 additions and 17 deletions.
diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
@@ -25,6 +25,12 @@ def val_error_empty_group(tag):
     return f"HED tags cannot be empty.  Extra delimiters found: '{tag}'"
 
 
+@hed_tag_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
+def val_error_hed_onset_with_no_column(tag):
+    return f"Cannot have Temporal tags without an 'Onset' column.  Found tag: '{tag}'"
+
+
+
 @hed_tag_error(ValidationErrors.TAG_EXTENDED, has_sub_tag=True, default_severity=ErrorSeverity.WARNING)
 def val_error_tag_extended(tag, problem_tag):
     return f"Hed tag is extended. '{problem_tag}' in {tag}"

diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
@@ -166,7 +166,7 @@ class OnsetErrors:
     ONSET_TAG_OUTSIDE_OF_GROUP = "ONSET_TAG_OUTSIDE_OF_GROUP"
     INSET_BEFORE_ONSET = "INSET_BEFORE_ONSET"
     ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW"
-
+    HED_ONSET_WITH_NO_COLUMN = 'HED_ONSET_WITH_NO_COLUMN'
 
 class ColumnErrors:
     INVALID_COLUMN_REF = "INVALID_COLUMN_REF"

diff --git a/hed/validator/onset_validator.py b/hed/validator/onset_validator.py
@@ -63,3 +63,20 @@ def _handle_onset_or_offset(self, def_tag, onset_offset_tag):
                 del self._onsets[full_def_name.lower()]
 
         return []
+
+    @staticmethod
+    def check_for_banned_tags(hed_string):
+        """ Returns an issue for every tag found from the banned list
+
+        Parameters:
+            hed_string(HedString): the string to check
+
+        Returns:
+            list: The validation issues associated with the characters. Each issue is dictionary.
+        """
+        banned_tag_list = DefTagNames.TEMPORAL_KEYS
+        issues = []
+        for tag in hed_string.get_all_tags():
+            if tag in banned_tag_list:
+                issues += ErrorHandler.format_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, tag)
+        return issues
diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py
@@ -28,8 +28,7 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
         Validate the input data using the schema
 
         Parameters:
-            data (BaseInput or pd.DataFrame): Input data to be validated.
-                If a dataframe, it is assumed to be assembled already.
+            data (BaseInput): Input data to be validated.
             def_dicts(list of DefDict or DefDict): all definitions to use for validation
             name(str): The name to report errors from this file as
             error_handler (ErrorHandler): Error context to use.  Creates a new one if None
@@ -41,22 +40,27 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
         if error_handler is None:
             error_handler = ErrorHandler()
 
+        if not isinstance(data, BaseInput):
+            raise TypeError("Invalid type passed to spreadsheet validator.  Can only validate BaseInput objects.")
+
         error_handler.push_error_context(ErrorContext.FILE_NAME, name)
-        self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
-        self._onset_validator = OnsetValidator()
-        onset_filtered = None
         # Adjust to account for 1 based
         row_adj = 1
-        if isinstance(data, BaseInput):
-            # Adjust to account for column names
-            if data.has_column_names:
-                row_adj += 1
-            issues += self._validate_column_structure(data, error_handler, row_adj)
-            onset_filtered = data.series_filtered
-            data = data.dataframe_a
+        # Adjust to account for column names
+        if data.has_column_names:
+            row_adj += 1
+        issues += self._validate_column_structure(data, error_handler, row_adj)
+        onset_filtered = data.series_filtered
+        df = data.dataframe_a
+
+        self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
+        if data.onsets is not None:
+            self._onset_validator = OnsetValidator()
+        else:
+            self._onset_validator = None
 
         # Check the rows of the input data
-        issues += self._run_checks(data, onset_filtered, error_handler=error_handler, row_adj=row_adj)
+        issues += self._run_checks(df, onset_filtered, error_handler=error_handler, row_adj=row_adj)
         error_handler.pop_error_context()
 
         issues = sort_issues(issues)
@@ -98,7 +102,10 @@ def _run_checks(self, hed_df, onset_filtered, error_handler, row_adj):
             if row_string:
                 error_handler.push_error_context(ErrorContext.HED_STRING, row_string)
                 new_column_issues = self._hed_validator.run_full_string_checks(row_string)
-                new_column_issues += self._onset_validator.validate_temporal_relations(row_string)
+                if self._onset_validator is not None:
+                    new_column_issues += self._onset_validator.validate_temporal_relations(row_string)
+                else:
+                    new_column_issues += OnsetValidator.check_for_banned_tags(row_string)
                 error_handler.add_context_and_filter(new_column_issues)
                 error_handler.pop_error_context()
                 issues += new_column_issues

diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py
@@ -312,6 +312,18 @@ def test_onset_two_in_one_line(self):
 
         self._test_issues_base(test_strings, test_issues, expected_context, placeholder_def_only=False)
 
+    def test_check_for_banned_tags(self):
+        hed_string = HedString("Event, (Duration/Short, Label/Example)", self.hed_schema)
+        issues = OnsetValidator.check_for_banned_tags(hed_string)
+        self.assertEqual(len(issues), 0)
+
+        hed_string = HedString("Onset, (Offset, Event)", self.hed_schema)
+        issues = OnsetValidator.check_for_banned_tags(hed_string)
+        self.assertEqual(len(issues), 2)
+
+        hed_string = HedString("(Onset, Duration/Long), Label/Example", self.hed_schema)
+        issues = OnsetValidator.check_for_banned_tags(hed_string)
+        self.assertEqual(len(issues), 1)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/validator/test_spreadsheet_validator.py b/tests/validator/test_spreadsheet_validator.py
@@ -5,12 +5,14 @@
 import unittest
 from hed import load_schema_version, load_schema
 from hed.validator import SpreadsheetValidator
-from hed import SpreadsheetInput
+from hed import TabularInput, SpreadsheetInput
+from hed.errors.error_types import ValidationErrors
+
 
 class TestSpreadsheetValidation(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        cls.schema = load_schema_version("8.1.0")
+        cls.schema = load_schema_version("8.2.0")
         cls.validator = SpreadsheetValidator(cls.schema)
         base = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/')
         cls.base_data_dir = base
@@ -45,3 +47,50 @@ def test_basic_validate(self):
         issues = file_input.validate(self.schema)
         self.assertTrue(len(issues), 1)
 
+    def test_invalid_onset_invalid_column(self):
+        def_dict = "(Definition/DefaultOnset, (Event))"
+        base_df = pd.DataFrame({
+            'HED': ["Event, (Age/5, Label/Example)", "Age/1, Label/Example", "Age/3, (Event)"]
+        })
+
+        self.df_with_onset = base_df.copy()
+        self.df_with_onset['onset'] = [1, 2, 3]
+        self.df_without_onset = base_df.copy()
+
+        # No tags in either of these
+        issues = self.validator.validate(TabularInput(self.df_without_onset), def_dicts=def_dict)
+        self.assertEqual(len(issues), 0)
+
+        issues = self.validator.validate(TabularInput(self.df_with_onset), def_dicts=def_dict)
+        self.assertEqual(len(issues), 1)
+        self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN)
+
+        base_has_tags_df = pd.DataFrame({
+            'HED': ["(Onset, Def/DefaultOnset)", "(Inset, Def/DefaultOnset), (Event, Age/2)", "(Offset, Def/DefaultOnset), (Age/4)"]
+        })
+
+        self.df_with_onset_has_tags = base_has_tags_df.copy()
+        self.df_with_onset_has_tags['onset'] = [1, 2, 3]
+        self.df_without_onset_has_tags = base_has_tags_df.copy()
+
+        issues = self.validator.validate(TabularInput(self.df_without_onset_has_tags), def_dicts=def_dict)
+        self.assertEqual(len(issues), 3)
+        self.assertEqual(issues[0]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR)
+        issues = self.validator.validate(TabularInput(self.df_with_onset_has_tags), def_dicts=def_dict)
+        self.assertEqual(len(issues), 1)
+        self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN)
+
+        base_has_tags_unordered_df = pd.DataFrame({
+            'HED': ["(Onset, Def/DefaultOnset)", "(Offset, Def/DefaultOnset), (Age/4)", "(Inset, Def/DefaultOnset), (Event, Age/2)"]
+        })
+        self.df_with_onset_has_tags_unordered = base_has_tags_unordered_df.copy()
+        self.df_with_onset_has_tags_unordered['onset'] = [1, 2, 3]
+        self.df_without_onset_has_tags_unordered = base_has_tags_unordered_df.copy()
+
+        issues = self.validator.validate(TabularInput(self.df_without_onset_has_tags_unordered), def_dicts=def_dict)
+        self.assertEqual(len(issues), 3)
+        self.assertEqual(issues[0]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR)
+        issues = self.validator.validate(TabularInput(self.df_with_onset_has_tags_unordered), def_dicts=def_dict)
+        self.assertEqual(len(issues), 2)
+        self.assertEqual(issues[0]['code'], ValidationErrors.HED_UNKNOWN_COLUMN)
+        self.assertEqual(issues[1]['code'], ValidationErrors.ONSET_OFFSET_INSET_ERROR)