Skip to content

Commit

Permalink
Merge pull request #879 from IanCa/develop
Browse files Browse the repository at this point in the history
Add support for Duration tag, and validation support for Delay
  • Loading branch information
VisLab authored Mar 5, 2024
2 parents 0b23b2a + cbfd00a commit 50a6df1
Show file tree
Hide file tree
Showing 20 changed files with 235 additions and 110 deletions.
2 changes: 1 addition & 1 deletion hed/errors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
""" Error handling module for HED. """

from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues, replace_tag_references
from .error_types import DefinitionErrors, OnsetErrors, SchemaErrors, SchemaWarnings, SidecarErrors, \
from .error_types import DefinitionErrors, TemporalErrors, SchemaErrors, SchemaWarnings, SidecarErrors, \
ValidationErrors, ColumnErrors
from .error_types import ErrorContext, ErrorSeverity
from .exceptions import HedExceptions, HedFileError
34 changes: 23 additions & 11 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from hed.errors.error_reporter import hed_error, hed_tag_error
from hed.errors.error_types import (ValidationErrors, SidecarErrors, ErrorSeverity, DefinitionErrors,
OnsetErrors, ColumnErrors)
TemporalErrors, ColumnErrors)


@hed_tag_error(ValidationErrors.UNITS_INVALID)
Expand All @@ -26,7 +26,7 @@ def val_error_empty_group(tag):
return f"HED tags cannot be empty. Extra delimiters found: '{tag}'"


@hed_tag_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.HED_ONSET_WITH_NO_COLUMN, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def val_error_hed_onset_with_no_column(tag):
return f"Cannot have Temporal tags without an 'Onset' column. Found tag: '{tag}'"

Expand Down Expand Up @@ -350,57 +350,69 @@ def def_error_bad_location(tag):
return f"Tag '{str(tag)}' is found in a location it is not allowed to be."


@hed_tag_error(OnsetErrors.ONSET_DEF_UNMATCHED, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_DEF_UNMATCHED, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_error_def_unmatched(tag):
return f"The def tag in an onset/offset tag is unmatched. Def tag: '{tag}'"


@hed_tag_error(OnsetErrors.OFFSET_BEFORE_ONSET, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.OFFSET_BEFORE_ONSET, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_error_offset_before_onset(tag):
return f"Offset tag '{tag}' does not have a matching onset."


@hed_tag_error(OnsetErrors.ONSET_SAME_DEFS_ONE_ROW, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_SAME_DEFS_ONE_ROW, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_error_same_defs_one_row(tag, def_name):
return f"'{tag}' uses name '{def_name}', which was already used at this onset time."


@hed_tag_error(OnsetErrors.INSET_BEFORE_ONSET, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.INSET_BEFORE_ONSET, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_error_inset_before_onset(tag):
return f"Inset tag '{tag}' does not have a matching onset."


@hed_tag_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_NO_DEF_TAG_FOUND, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_no_def_found(tag):
return f"'{tag}' tag has no def or def-expand tag in string."


@hed_tag_error(OnsetErrors.ONSET_TOO_MANY_DEFS, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_TOO_MANY_DEFS, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_too_many_defs(tag, tag_list):
tag_list_strings = [str(tag) for tag in tag_list]
return f"Too many def tags found in onset for {tag}. Expected 1, also found: {tag_list_strings}"


@hed_tag_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_too_many_groups(tag, tag_list):
tag_list_strings = [str(a_tag) for a_tag in tag_list]
return f"An onset tag should have at most 2 sibling nodes, an offset tag should have 1. " \
f"Found {len(tag_list_strings)}: {tag_list_strings}"


@hed_tag_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.DURATION_WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_DURATION_WRONG_NUMBER_GROUPS(tag, tag_list):
tag_list_strings = [str(a_tag) for a_tag in tag_list]
return f"A duration and/or delay tag '{tag}'should have exactly one child group." \
f"Found {len(tag_list_strings)}: {tag_list_strings}"


@hed_tag_error(TemporalErrors.ONSET_TAG_OUTSIDE_OF_GROUP, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_wrong_type_tag(tag, def_tag):
return f"Onset def tag '{def_tag}' has an improper sibling tag '{tag}'. All onset context tags must be " \
f"in a single group together."


@hed_tag_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_PLACEHOLDER_WRONG, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_wrong_placeholder(tag, has_placeholder):
if has_placeholder:
return f"Onset/offset def tag {tag} expects a placeholder value, but does not have one."
return f"Onset/offset def tag {tag} should not have a placeholder, but has one."


@hed_tag_error(TemporalErrors.DURATION_HAS_OTHER_TAGS, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_DURATION_HAS_OTHER_TAGS(tag):
return f"Tag '{tag}' should not be grouped with Duration or Delay. Context tags should be in a sub-group."


@hed_error(ColumnErrors.INVALID_COLUMN_REF, actual_code=SidecarErrors.SIDECAR_BRACES_INVALID)
def invalid_column_ref(bad_ref):
return f"The column '{bad_ref}' is unknown or does not have HED annotations.'"
Expand Down
9 changes: 6 additions & 3 deletions hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class ValidationErrors:
DEFINITION_INVALID = "DEFINITION_INVALID"
ELEMENT_DEPRECATED = "ELEMENT_DEPRECATED"
NODE_NAME_EMPTY = 'NODE_NAME_EMPTY'
ONSET_OFFSET_INSET_ERROR = 'ONSET_OFFSET_INSET_ERROR'
TEMPORAL_TAG_ERROR = 'TEMPORAL_TAG_ERROR'
PARENTHESES_MISMATCH = 'PARENTHESES_MISMATCH'
PLACEHOLDER_INVALID = 'PLACEHOLDER_INVALID'
REQUIRED_TAG_MISSING = 'REQUIRED_TAG_MISSING'
Expand Down Expand Up @@ -159,8 +159,8 @@ class DefinitionErrors:
BAD_DEFINITION_LOCATION = 'BAD_DEFINITION_LOCATION'


class OnsetErrors:
# These are all ONSET_OFFSET_INSET_ERROR
class TemporalErrors:
# These are all TEMPORAL_TAG_ERROR
OFFSET_BEFORE_ONSET = "OFFSET_BEFORE_ONSET"
ONSET_DEF_UNMATCHED = "ONSET_DEF_UNMATCHED"
ONSET_WRONG_NUMBER_GROUPS = "ONSET_WRONG_NUMBER_GROUPS"
Expand All @@ -172,6 +172,9 @@ class OnsetErrors:
ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW"
HED_ONSET_WITH_NO_COLUMN = 'HED_ONSET_WITH_NO_COLUMN'

DURATION_HAS_OTHER_TAGS = "DURATION_HAS_OTHER_TAGS"
DURATION_WRONG_NUMBER_GROUPS = "DURATION_WRONG_NUMBER_GROUPS"


class ColumnErrors:
INVALID_COLUMN_REF = "INVALID_COLUMN_REF"
Expand Down
2 changes: 1 addition & 1 deletion hed/errors/known_error_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"DEF_INVALID",
"DEFINITION_INVALID",
"NODE_NAME_EMPTY",
"ONSET_OFFSET_INSET_ERROR",
"TEMPORAL_TAG_ERROR",
"PARENTHESES_MISMATCH",
"PLACEHOLDER_INVALID",
"REQUIRED_TAG_MISSING",
Expand Down
2 changes: 1 addition & 1 deletion hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def _indexed_dict_from_onsets(onsets):
# This would need to store the index list -> So it can optionally apply to other columns on request.
@staticmethod
def _filter_by_index_list(original_series, indexed_dict):
new_series = pd.Series(["n/a"] * len(original_series), dtype=str)
new_series = pd.Series([""] * len(original_series), dtype=str)

for onset, indices in indexed_dict.items():
if indices:
Expand Down
11 changes: 7 additions & 4 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from hed.models.hed_string import HedString


def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True):
def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True, return_filtered=False):
""" Create an array of assembled HedString objects (or list of these) of the same length as tabular file input.
Parameters:
Expand All @@ -13,17 +13,20 @@ def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=
extra_def_dicts: list of DefinitionDict, optional
Any extra DefinitionDict objects to use when parsing the HED tags.
defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them.
return_filtered (bool): If true, combines lines with the same onset.
Further lines with that onset are marked n/a
Returns:
tuple:
hed_strings(list of HedStrings): A list of HedStrings or a list of lists of HedStrings
hed_strings(list of HedStrings): A list of HedStrings
def_dict(DefinitionDict): The definitions from this Sidecar.
"""

def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts)
series_a = tabular_file.series_a if not return_filtered else tabular_file.series_filtered
if defs_expanded:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
return [HedString(x, hed_schema, def_dict).expand_defs() for x in series_a], def_dict
else:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in series_a], def_dict


def convert_to_form(df, hed_schema, tag_form, columns=None):
Expand Down
25 changes: 24 additions & 1 deletion hed/models/hed_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ def find_top_level_tags(self, anchor_tags, include_groups=2):
If 1: return only groups.
If 2 or any other value: return both.
Returns:
list or tuple: The returned result depends on include_groups.
list: The returned result depends on include_groups.
"""
top_level_tags = []
for group in self.groups():
Expand All @@ -365,6 +365,29 @@ def find_top_level_tags(self, anchor_tags, include_groups=2):
return [tag[include_groups] for tag in top_level_tags]
return top_level_tags

def find_top_level_tags_grouped(self, anchor_tags):
""" Find top level groups with an anchor tag.
This is an alternate one designed to be easy to use with Delay/Duration tag.
Parameters:
anchor_tags (container): A list/set/etc. of short_base_tags to find groups by.
Returns:
list of tuples:
list of tags: the tags in the same subgroup
group: the subgroup containing the tags
"""
top_level_tags = []
for group in self.groups():
tags = []
for tag in group.tags():
if tag.short_base_tag.lower() in anchor_tags:
tags.append(tag)
if tags:
top_level_tags.append((tags, group))

return top_level_tags

def remove_refs(self):
""" Remove any refs(tags contained entirely inside curly braces) from the string.
Expand Down
13 changes: 7 additions & 6 deletions hed/models/model_constants.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
""" Defined constants for definitions, def labels, and expanded labels. """
COLUMN_TO_HED_TAGS = "column_to_hed_tags"
ROW_HED_STRING = "HED"
COLUMN_ISSUES = "column_issues"
ROW_ISSUES = "row_issues"


class DefTagNames:
""" Source names for definitions, def labels, and expanded labels. """

Expand All @@ -19,9 +13,16 @@ class DefTagNames:
ONSET_ORG_KEY = "Onset"
OFFSET_ORG_KEY = "Offset"
INSET_ORG_KEY = "Inset"
DURATION_ORG_KEY = "Duration"
DELAY_ORG_KEY = "Delay"

ONSET_KEY = ONSET_ORG_KEY.lower()
OFFSET_KEY = OFFSET_ORG_KEY.lower()
INSET_KEY = INSET_ORG_KEY.lower()
DURATION_KEY = DURATION_ORG_KEY.lower()
DELAY_KEY = DELAY_ORG_KEY.lower()

TEMPORAL_KEYS = {ONSET_KEY, OFFSET_KEY, INSET_KEY}
DURATION_KEYS = {DURATION_KEY, DELAY_KEY}

ALL_TIME_KEYS = TEMPORAL_KEYS.union(DURATION_KEYS)
7 changes: 4 additions & 3 deletions hed/models/query_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def search_strings(hed_strings, queries, query_names):
df_factors = pd.DataFrame(0, index=range(len(hed_strings)), columns=query_names)
for parse_ind, parser in enumerate(queries):
for index, next_item in enumerate(hed_strings):
match = parser.search(next_item)
if match:
df_factors.at[index, query_names[parse_ind]] = 1
if next_item:
match = parser.search(next_item)
if match:
df_factors.at[index, query_names[parse_ind]] = 1
return df_factors
32 changes: 25 additions & 7 deletions hed/tools/analysis/event_manager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Manager of events of temporal extent. """
import pandas as pd
import bisect

from hed.errors import HedFileError
from hed.models import HedString
Expand Down Expand Up @@ -52,15 +53,31 @@ def _create_event_list(self, input_data):
Notes:
"""
hed_strings, def_dict = get_assembled(input_data, self.hed_schema, extra_def_dicts=None, defs_expanded=False)
hed_strings, def_dict = get_assembled(input_data, self.hed_schema, extra_def_dicts=None, defs_expanded=False,
return_filtered=True)
onset_dict = {} # Temporary dictionary keeping track of temporal events that haven't ended yet.
for event_index, hed in enumerate(hed_strings):
self._extract_temporal_events(hed, event_index, onset_dict)
self._extract_duration_events(hed, event_index)
# Now handle the events that extend to end of list
for item in onset_dict.values():
item.set_end(len(self.onsets), None)
self.hed_strings = hed_strings

def _extract_duration_events(self, hed, event_index):
groups = hed.find_top_level_tags(anchor_tags={DefTagNames.DURATION_KEY})
to_remove = []
for duration_tag, group in groups:
start_time = self.onsets[event_index]
new_event = TemporalEvent(group, event_index, start_time)
end_time = new_event.end_time
# Todo: This may need updating. end_index==len(self.onsets) in the edge
end_index = bisect.bisect_left(self.onsets, end_time)
new_event.set_end(end_index, end_time)
self.event_list[event_index].append(new_event)
to_remove.append(group)
hed.remove(to_remove)

def _extract_temporal_events(self, hed, event_index, onset_dict):
""" Extract the temporal events and remove them from the other HED strings.
Expand All @@ -77,18 +94,19 @@ def _extract_temporal_events(self, hed, event_index, onset_dict):
return
group_tuples = hed.find_top_level_tags(anchor_tags={DefTagNames.ONSET_KEY, DefTagNames.OFFSET_KEY},
include_groups=2)

to_remove = []
for tup in group_tuples:
anchor_tag = tup[1].find_def_tags(recursive=False, include_groups=0)[0]
for def_tag, group in group_tuples:
anchor_tag = group.find_def_tags(recursive=False, include_groups=0)[0]
anchor = anchor_tag.extension.lower()
if anchor in onset_dict or tup[0].short_base_tag.lower() == DefTagNames.OFFSET_KEY:
if anchor in onset_dict or def_tag.short_base_tag.lower() == DefTagNames.OFFSET_KEY:
temporal_event = onset_dict.pop(anchor)
temporal_event.set_end(event_index, self.onsets[event_index])
if tup[0] == DefTagNames.ONSET_KEY:
new_event = TemporalEvent(tup[1], event_index, self.onsets[event_index])
if def_tag == DefTagNames.ONSET_KEY:
new_event = TemporalEvent(group, event_index, self.onsets[event_index])
self.event_list[event_index].append(new_event)
onset_dict[anchor] = new_event
to_remove.append(tup[1])
to_remove.append(group)
hed.remove(to_remove)

def unfold_context(self, remove_types=[]):
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/analysis/temporal_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def _split_group(self, contents):
to_remove.append(item)
elif item.short_base_tag.lower() == "duration":
to_remove.append(item)
self.end_time = self.start_time + float(item.extension.lower()) # Will need to be fixed for units
self.end_time = self.start_time + item.value_as_default_unit()
elif item.short_base_tag.lower() == "def":
self.anchor = item.short_tag
contents.remove(to_remove)
Expand Down
14 changes: 7 additions & 7 deletions hed/validator/def_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from hed.errors.error_types import ValidationErrors
from hed.errors.error_reporter import ErrorHandler
from hed.models.model_constants import DefTagNames
from hed.errors.error_types import OnsetErrors
from hed.errors.error_types import TemporalErrors


class DefValidator(DefinitionDict):
Expand Down Expand Up @@ -152,11 +152,11 @@ def validate_onset_offset(self, hed_string_obj):

def_tags = found_group.find_def_tags()
if not def_tags:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, found_onset)
onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_NO_DEF_TAG_FOUND, found_onset)
continue

if len(def_tags) > 1:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS,
onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TOO_MANY_DEFS,
tag=def_tags[0][0],
tag_list=[tag[0] for tag in def_tags[1:]])
continue
Expand All @@ -171,7 +171,7 @@ def validate_onset_offset(self, hed_string_obj):
if found_onset.short_base_tag == DefTagNames.OFFSET_ORG_KEY:
max_children = 0
if len(children) > max_children:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS,
onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_WRONG_NUMBER_GROUPS,
def_tag,
found_group.children)
continue
Expand All @@ -180,7 +180,7 @@ def validate_onset_offset(self, hed_string_obj):
# Make this a loop if max_children can be > 1
child = children[0]
if not isinstance(child, HedGroup):
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP,
onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TAG_OUTSIDE_OF_GROUP,
child,
def_tag)

Expand All @@ -197,9 +197,9 @@ def _handle_onset_or_offset(self, def_tag):

def_entry = self.defs.get(def_name.lower())
if def_entry is None:
return ErrorHandler.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=def_tag)
return ErrorHandler.format_error(TemporalErrors.ONSET_DEF_UNMATCHED, tag=def_tag)
if bool(def_entry.takes_value) != bool(placeholder):
return ErrorHandler.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag,
return ErrorHandler.format_error(TemporalErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag,
has_placeholder=bool(def_entry.takes_value))

return []
Loading

0 comments on commit 50a6df1

Please sign in to comment.