Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Duration tag, and validation support for Delay #879

Merged
merged 2 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hed/errors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
""" Error handling module for HED. """

from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues, replace_tag_references
from .error_types import DefinitionErrors, OnsetErrors, SchemaErrors, SchemaWarnings, SidecarErrors, \
from .error_types import DefinitionErrors, TemporalErrors, SchemaErrors, SchemaWarnings, SidecarErrors, \
ValidationErrors, ColumnErrors
from .error_types import ErrorContext, ErrorSeverity
from .exceptions import HedExceptions, HedFileError
34 changes: 23 additions & 11 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from hed.errors.error_reporter import hed_error, hed_tag_error
from hed.errors.error_types import (ValidationErrors, SidecarErrors, ErrorSeverity, DefinitionErrors,
OnsetErrors, ColumnErrors)
TemporalErrors, ColumnErrors)


@hed_tag_error(ValidationErrors.UNITS_INVALID)
Expand All @@ -26,7 +26,7 @@ def val_error_empty_group(tag):
return f"HED tags cannot be empty. Extra delimiters found: '{tag}'"


@hed_tag_error(OnsetErrors.HED_ONSET_WITH_NO_COLUMN, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.HED_ONSET_WITH_NO_COLUMN, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def val_error_hed_onset_with_no_column(tag):
return f"Cannot have Temporal tags without an 'Onset' column. Found tag: '{tag}'"

Expand Down Expand Up @@ -350,57 +350,69 @@ def def_error_bad_location(tag):
return f"Tag '{str(tag)}' is found in a location it is not allowed to be."


@hed_tag_error(OnsetErrors.ONSET_DEF_UNMATCHED, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_DEF_UNMATCHED, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_error_def_unmatched(tag):
return f"The def tag in an onset/offset tag is unmatched. Def tag: '{tag}'"


@hed_tag_error(OnsetErrors.OFFSET_BEFORE_ONSET, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.OFFSET_BEFORE_ONSET, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_error_offset_before_onset(tag):
return f"Offset tag '{tag}' does not have a matching onset."


@hed_tag_error(OnsetErrors.ONSET_SAME_DEFS_ONE_ROW, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_SAME_DEFS_ONE_ROW, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_error_same_defs_one_row(tag, def_name):
return f"'{tag}' uses name '{def_name}', which was already used at this onset time."


@hed_tag_error(OnsetErrors.INSET_BEFORE_ONSET, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.INSET_BEFORE_ONSET, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_error_inset_before_onset(tag):
return f"Inset tag '{tag}' does not have a matching onset."


@hed_tag_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_NO_DEF_TAG_FOUND, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_no_def_found(tag):
return f"'{tag}' tag has no def or def-expand tag in string."


@hed_tag_error(OnsetErrors.ONSET_TOO_MANY_DEFS, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_TOO_MANY_DEFS, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_too_many_defs(tag, tag_list):
tag_list_strings = [str(tag) for tag in tag_list]
return f"Too many def tags found in onset for {tag}. Expected 1, also found: {tag_list_strings}"


@hed_tag_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_too_many_groups(tag, tag_list):
tag_list_strings = [str(a_tag) for a_tag in tag_list]
return f"An onset tag should have at most 2 sibling nodes, an offset tag should have 1. " \
f"Found {len(tag_list_strings)}: {tag_list_strings}"


@hed_tag_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.DURATION_WRONG_NUMBER_GROUPS, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_DURATION_WRONG_NUMBER_GROUPS(tag, tag_list):
tag_list_strings = [str(a_tag) for a_tag in tag_list]
return f"A duration and/or delay tag '{tag}'should have exactly one child group." \
f"Found {len(tag_list_strings)}: {tag_list_strings}"


@hed_tag_error(TemporalErrors.ONSET_TAG_OUTSIDE_OF_GROUP, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_wrong_type_tag(tag, def_tag):
return f"Onset def tag '{def_tag}' has an improper sibling tag '{tag}'. All onset context tags must be " \
f"in a single group together."


@hed_tag_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
@hed_tag_error(TemporalErrors.ONSET_PLACEHOLDER_WRONG, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_wrong_placeholder(tag, has_placeholder):
if has_placeholder:
return f"Onset/offset def tag {tag} expects a placeholder value, but does not have one."
return f"Onset/offset def tag {tag} should not have a placeholder, but has one."


@hed_tag_error(TemporalErrors.DURATION_HAS_OTHER_TAGS, actual_code=ValidationErrors.TEMPORAL_TAG_ERROR)
def onset_DURATION_HAS_OTHER_TAGS(tag):
return f"Tag '{tag}' should not be grouped with Duration or Delay. Context tags should be in a sub-group."


@hed_error(ColumnErrors.INVALID_COLUMN_REF, actual_code=SidecarErrors.SIDECAR_BRACES_INVALID)
def invalid_column_ref(bad_ref):
return f"The column '{bad_ref}' is unknown or does not have HED annotations.'"
Expand Down
9 changes: 6 additions & 3 deletions hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class ValidationErrors:
DEFINITION_INVALID = "DEFINITION_INVALID"
ELEMENT_DEPRECATED = "ELEMENT_DEPRECATED"
NODE_NAME_EMPTY = 'NODE_NAME_EMPTY'
ONSET_OFFSET_INSET_ERROR = 'ONSET_OFFSET_INSET_ERROR'
TEMPORAL_TAG_ERROR = 'TEMPORAL_TAG_ERROR'
PARENTHESES_MISMATCH = 'PARENTHESES_MISMATCH'
PLACEHOLDER_INVALID = 'PLACEHOLDER_INVALID'
REQUIRED_TAG_MISSING = 'REQUIRED_TAG_MISSING'
Expand Down Expand Up @@ -159,8 +159,8 @@ class DefinitionErrors:
BAD_DEFINITION_LOCATION = 'BAD_DEFINITION_LOCATION'


class OnsetErrors:
# These are all ONSET_OFFSET_INSET_ERROR
class TemporalErrors:
# These are all TEMPORAL_TAG_ERROR
OFFSET_BEFORE_ONSET = "OFFSET_BEFORE_ONSET"
ONSET_DEF_UNMATCHED = "ONSET_DEF_UNMATCHED"
ONSET_WRONG_NUMBER_GROUPS = "ONSET_WRONG_NUMBER_GROUPS"
Expand All @@ -172,6 +172,9 @@ class OnsetErrors:
ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW"
HED_ONSET_WITH_NO_COLUMN = 'HED_ONSET_WITH_NO_COLUMN'

DURATION_HAS_OTHER_TAGS = "DURATION_HAS_OTHER_TAGS"
DURATION_WRONG_NUMBER_GROUPS = "DURATION_WRONG_NUMBER_GROUPS"


class ColumnErrors:
INVALID_COLUMN_REF = "INVALID_COLUMN_REF"
Expand Down
2 changes: 1 addition & 1 deletion hed/errors/known_error_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"DEF_INVALID",
"DEFINITION_INVALID",
"NODE_NAME_EMPTY",
"ONSET_OFFSET_INSET_ERROR",
"TEMPORAL_TAG_ERROR",
"PARENTHESES_MISMATCH",
"PLACEHOLDER_INVALID",
"REQUIRED_TAG_MISSING",
Expand Down
2 changes: 1 addition & 1 deletion hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def _indexed_dict_from_onsets(onsets):
# This would need to store the index list -> So it can optionally apply to other columns on request.
@staticmethod
def _filter_by_index_list(original_series, indexed_dict):
new_series = pd.Series(["n/a"] * len(original_series), dtype=str)
new_series = pd.Series([""] * len(original_series), dtype=str)

for onset, indices in indexed_dict.items():
if indices:
Expand Down
11 changes: 7 additions & 4 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from hed.models.hed_string import HedString


def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True):
def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True, return_filtered=False):
""" Create an array of assembled HedString objects (or list of these) of the same length as tabular file input.

Parameters:
Expand All @@ -13,17 +13,20 @@ def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=
extra_def_dicts: list of DefinitionDict, optional
Any extra DefinitionDict objects to use when parsing the HED tags.
defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them.
return_filtered (bool): If true, combines lines with the same onset.
Further lines with that onset are marked n/a
Returns:
tuple:
hed_strings(list of HedStrings): A list of HedStrings or a list of lists of HedStrings
hed_strings(list of HedStrings): A list of HedStrings
def_dict(DefinitionDict): The definitions from this Sidecar.
"""

def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts)
series_a = tabular_file.series_a if not return_filtered else tabular_file.series_filtered
if defs_expanded:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in tabular_file.series_a], def_dict
return [HedString(x, hed_schema, def_dict).expand_defs() for x in series_a], def_dict
else:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in tabular_file.series_a], def_dict
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in series_a], def_dict


def convert_to_form(df, hed_schema, tag_form, columns=None):
Expand Down
25 changes: 24 additions & 1 deletion hed/models/hed_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ def find_top_level_tags(self, anchor_tags, include_groups=2):
If 1: return only groups.
If 2 or any other value: return both.
Returns:
list or tuple: The returned result depends on include_groups.
list: The returned result depends on include_groups.
"""
top_level_tags = []
for group in self.groups():
Expand All @@ -365,6 +365,29 @@ def find_top_level_tags(self, anchor_tags, include_groups=2):
return [tag[include_groups] for tag in top_level_tags]
return top_level_tags

def find_top_level_tags_grouped(self, anchor_tags):
""" Find top level groups with an anchor tag.

This is an alternate one designed to be easy to use with Delay/Duration tag.

Parameters:
anchor_tags (container): A list/set/etc. of short_base_tags to find groups by.
Returns:
list of tuples:
list of tags: the tags in the same subgroup
group: the subgroup containing the tags
"""
top_level_tags = []
for group in self.groups():
tags = []
for tag in group.tags():
if tag.short_base_tag.lower() in anchor_tags:
tags.append(tag)
if tags:
top_level_tags.append((tags, group))

return top_level_tags

def remove_refs(self):
""" Remove any refs(tags contained entirely inside curly braces) from the string.

Expand Down
13 changes: 7 additions & 6 deletions hed/models/model_constants.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
""" Defined constants for definitions, def labels, and expanded labels. """
COLUMN_TO_HED_TAGS = "column_to_hed_tags"
ROW_HED_STRING = "HED"
COLUMN_ISSUES = "column_issues"
ROW_ISSUES = "row_issues"


class DefTagNames:
""" Source names for definitions, def labels, and expanded labels. """

Expand All @@ -19,9 +13,16 @@ class DefTagNames:
ONSET_ORG_KEY = "Onset"
OFFSET_ORG_KEY = "Offset"
INSET_ORG_KEY = "Inset"
DURATION_ORG_KEY = "Duration"
DELAY_ORG_KEY = "Delay"

ONSET_KEY = ONSET_ORG_KEY.lower()
OFFSET_KEY = OFFSET_ORG_KEY.lower()
INSET_KEY = INSET_ORG_KEY.lower()
DURATION_KEY = DURATION_ORG_KEY.lower()
DELAY_KEY = DELAY_ORG_KEY.lower()

TEMPORAL_KEYS = {ONSET_KEY, OFFSET_KEY, INSET_KEY}
DURATION_KEYS = {DURATION_KEY, DELAY_KEY}

ALL_TIME_KEYS = TEMPORAL_KEYS.union(DURATION_KEYS)
7 changes: 4 additions & 3 deletions hed/models/query_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def search_strings(hed_strings, queries, query_names):
df_factors = pd.DataFrame(0, index=range(len(hed_strings)), columns=query_names)
for parse_ind, parser in enumerate(queries):
for index, next_item in enumerate(hed_strings):
match = parser.search(next_item)
if match:
df_factors.at[index, query_names[parse_ind]] = 1
if next_item:
match = parser.search(next_item)
if match:
df_factors.at[index, query_names[parse_ind]] = 1
return df_factors
32 changes: 25 additions & 7 deletions hed/tools/analysis/event_manager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Manager of events of temporal extent. """
import pandas as pd
import bisect

from hed.errors import HedFileError
from hed.models import HedString
Expand Down Expand Up @@ -52,15 +53,31 @@ def _create_event_list(self, input_data):
Notes:

"""
hed_strings, def_dict = get_assembled(input_data, self.hed_schema, extra_def_dicts=None, defs_expanded=False)
hed_strings, def_dict = get_assembled(input_data, self.hed_schema, extra_def_dicts=None, defs_expanded=False,
return_filtered=True)
onset_dict = {} # Temporary dictionary keeping track of temporal events that haven't ended yet.
for event_index, hed in enumerate(hed_strings):
self._extract_temporal_events(hed, event_index, onset_dict)
self._extract_duration_events(hed, event_index)
# Now handle the events that extend to end of list
for item in onset_dict.values():
item.set_end(len(self.onsets), None)
self.hed_strings = hed_strings

def _extract_duration_events(self, hed, event_index):
groups = hed.find_top_level_tags(anchor_tags={DefTagNames.DURATION_KEY})
to_remove = []
for duration_tag, group in groups:
start_time = self.onsets[event_index]
new_event = TemporalEvent(group, event_index, start_time)
end_time = new_event.end_time
# Todo: This may need updating. end_index==len(self.onsets) in the edge
end_index = bisect.bisect_left(self.onsets, end_time)
new_event.set_end(end_index, end_time)
self.event_list[event_index].append(new_event)
to_remove.append(group)
hed.remove(to_remove)

def _extract_temporal_events(self, hed, event_index, onset_dict):
""" Extract the temporal events and remove them from the other HED strings.

Expand All @@ -77,18 +94,19 @@ def _extract_temporal_events(self, hed, event_index, onset_dict):
return
group_tuples = hed.find_top_level_tags(anchor_tags={DefTagNames.ONSET_KEY, DefTagNames.OFFSET_KEY},
include_groups=2)

to_remove = []
for tup in group_tuples:
anchor_tag = tup[1].find_def_tags(recursive=False, include_groups=0)[0]
for def_tag, group in group_tuples:
anchor_tag = group.find_def_tags(recursive=False, include_groups=0)[0]
anchor = anchor_tag.extension.lower()
if anchor in onset_dict or tup[0].short_base_tag.lower() == DefTagNames.OFFSET_KEY:
if anchor in onset_dict or def_tag.short_base_tag.lower() == DefTagNames.OFFSET_KEY:
temporal_event = onset_dict.pop(anchor)
temporal_event.set_end(event_index, self.onsets[event_index])
if tup[0] == DefTagNames.ONSET_KEY:
new_event = TemporalEvent(tup[1], event_index, self.onsets[event_index])
if def_tag == DefTagNames.ONSET_KEY:
new_event = TemporalEvent(group, event_index, self.onsets[event_index])
self.event_list[event_index].append(new_event)
onset_dict[anchor] = new_event
to_remove.append(tup[1])
to_remove.append(group)
hed.remove(to_remove)

def unfold_context(self, remove_types=[]):
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/analysis/temporal_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def _split_group(self, contents):
to_remove.append(item)
elif item.short_base_tag.lower() == "duration":
to_remove.append(item)
self.end_time = self.start_time + float(item.extension.lower()) # Will need to be fixed for units
self.end_time = self.start_time + item.value_as_default_unit()
elif item.short_base_tag.lower() == "def":
self.anchor = item.short_tag
contents.remove(to_remove)
Expand Down
14 changes: 7 additions & 7 deletions hed/validator/def_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from hed.errors.error_types import ValidationErrors
from hed.errors.error_reporter import ErrorHandler
from hed.models.model_constants import DefTagNames
from hed.errors.error_types import OnsetErrors
from hed.errors.error_types import TemporalErrors


class DefValidator(DefinitionDict):
Expand Down Expand Up @@ -152,11 +152,11 @@ def validate_onset_offset(self, hed_string_obj):

def_tags = found_group.find_def_tags()
if not def_tags:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, found_onset)
onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_NO_DEF_TAG_FOUND, found_onset)
continue

if len(def_tags) > 1:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS,
onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TOO_MANY_DEFS,
tag=def_tags[0][0],
tag_list=[tag[0] for tag in def_tags[1:]])
continue
Expand All @@ -171,7 +171,7 @@ def validate_onset_offset(self, hed_string_obj):
if found_onset.short_base_tag == DefTagNames.OFFSET_ORG_KEY:
max_children = 0
if len(children) > max_children:
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS,
onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_WRONG_NUMBER_GROUPS,
def_tag,
found_group.children)
continue
Expand All @@ -180,7 +180,7 @@ def validate_onset_offset(self, hed_string_obj):
# Make this a loop if max_children can be > 1
child = children[0]
if not isinstance(child, HedGroup):
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP,
onset_issues += ErrorHandler.format_error(TemporalErrors.ONSET_TAG_OUTSIDE_OF_GROUP,
child,
def_tag)

Expand All @@ -197,9 +197,9 @@ def _handle_onset_or_offset(self, def_tag):

def_entry = self.defs.get(def_name.lower())
if def_entry is None:
return ErrorHandler.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=def_tag)
return ErrorHandler.format_error(TemporalErrors.ONSET_DEF_UNMATCHED, tag=def_tag)
if bool(def_entry.takes_value) != bool(placeholder):
return ErrorHandler.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag,
return ErrorHandler.format_error(TemporalErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag,
has_placeholder=bool(def_entry.takes_value))

return []
Loading
Loading