diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6edc97cc..3b7d08ca 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -39,7 +39,7 @@ jobs: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -73,7 +73,7 @@ jobs: - name: Archive code coverage results if: ${{matrix.python-version == '3.9'}} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: code-coverage-report path: .coverage @@ -103,7 +103,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python 3.9 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 @@ -121,7 +121,7 @@ jobs: pip install -r docs/requirements.txt - name: Download a single artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: code-coverage-report diff --git a/.github/workflows/spec_tests.yaml b/.github/workflows/spec_tests.yaml index d495bd51..c930c04a 100644 --- a/.github/workflows/spec_tests.yaml +++ b/.github/workflows/spec_tests.yaml @@ -22,7 +22,7 @@ jobs: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -44,13 +44,13 @@ jobs: python -m unittest spec_tests/validate_bids.py > validate_bids_results.txt - name: Upload error test results - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: error-test-results path: error_results.txt - name: Upload bids test results - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: bids-test-results path: validate_bids_results.txt diff --git a/docs/requirements.txt b/docs/requirements.txt index 59c144b4..1e05aebd 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -8,4 +8,4 @@ semantic_version>=2.10.0 myst-parser>=1.0.0 Sphinx>=5.2.2 sphinx_rtd_theme>=1.0.0 -wordcloud==1.9.2 +wordcloud==1.9.3 diff --git a/docs/source/_templates/custom-class-template.rst b/docs/source/_templates/custom-class-template.rst index cf03326d..1039bc70 100644 --- a/docs/source/_templates/custom-class-template.rst +++ b/docs/source/_templates/custom-class-template.rst @@ -8,34 +8,24 @@ .. rubric:: {{ _('Methods') }} .. autosummary:: -{% for item in methods %} - {{ module }}.{{ objname }}.{{ item }} +{%- for item in methods %} + {{ objname }}.{{ item }} {%- endfor %} .. rubric:: {{ _('Attributes') }} .. autosummary:: -{% for item in attributes %} - {{ module }}.{{ objname }}.{{ item }} +{%- for item in attributes %} + {{ objname }}.{{ item }} {%- endfor %} -.. toctree:: - :hidden: - -{% for item in methods %} - {{ fullname }}#method-{{ item }} -{%- endfor %} -{% for item in attributes %} - {{ fullname }}#attribute-{{ item }} -{%- endfor %} - -{% for item in methods %} +{%- for item in methods %} .. _method-{{ item }}: .. automethod:: {{ module }}.{{ objname }}.{{ item }} {%- endfor %} -{% for item in attributes %} +{%- for item in attributes %} .. _attribute-{{ item }}: .. autoattribute:: {{ module }}.{{ objname }}.{{ item }} diff --git a/docs/source/_templates/custom-module-template.rst b/docs/source/_templates/custom-module-template.rst index 9e9c8a77..6062649d 100644 --- a/docs/source/_templates/custom-module-template.rst +++ b/docs/source/_templates/custom-module-template.rst @@ -38,7 +38,7 @@ .. rubric:: {{ _('Classes') }} .. autosummary:: - :toctree: + :toctree: _generated_classes :template: custom-class-template.rst {% for item in classes %} {{ item }} diff --git a/docs/source/conf.py b/docs/source/conf.py index 39ee127f..6d69ae7c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -39,7 +39,7 @@ "myst_parser", "sphinx.ext.autodoc", "sphinx.ext.autosummary", - "sphinx.ext.autosectionlabel", + # "sphinx.ext.autosectionlabel", "sphinx.ext.intersphinx", "sphinx.ext.coverage", "sphinx.ext.mathjax", diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index fbb72f41..3ff7f3c9 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -42,10 +42,6 @@ Finding help The `HED online tools `_ provide an easy-to-use interface that requires no programming. -:Mailing lists and forums: - - * Don't hesitate to ask questions about the python hedtools on `NeuroStars - `_. :Issues and problems: * If you notice a bug in the python hedtools code or encounter other problems using the tools, please `open an issue`_ in the diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index 0d689510..49edf9e8 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -30,12 +30,14 @@ def __init__(self, def_dicts=None, hed_schema=None): self.add_definitions(def_dicts, hed_schema) def add_definitions(self, def_dicts, hed_schema=None): - """ Add definitions from dict(s) to this dict. + """ Add definitions from dict(s) or strings(s) to this dict. Parameters: - def_dicts (list, DefinitionDict, or dict): DefinitionDict or list of DefinitionDicts/strings/dicts whose - definitions should be added. - Note dict form expects DefinitionEntries in the same form as a DefinitionDict + def_dicts (list, DefinitionDict, dict, or str): DefinitionDict or list of DefinitionDicts/strings/dicts whose + definitions should be added. + Note - dict form expects DefinitionEntries in the same form as a DefinitionDict + Note - str or list of strings will parse the strings using the hed_schema. + Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input. hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. :raises TypeError: diff --git a/hed/models/expression_parser.py b/hed/models/expression_parser.py index 736ff562..83e91adc 100644 --- a/hed/models/expression_parser.py +++ b/hed/models/expression_parser.py @@ -332,7 +332,7 @@ def __init__(self, expression_string): '"Event"' - Finds the Event tag, but not any descendent tags - 'Def/DefName/*' - Find Def/DefName instances with placeholders, regardless of the value of the placeholder + `Def/DefName/*` - Find Def/DefName instances with placeholders, regardless of the value of the placeholder 'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py index bdbfa852..db6dd7e8 100644 --- a/hed/models/hed_tag.py +++ b/hed/models/hed_tag.py @@ -499,6 +499,7 @@ def default_unit(self): """ Get the default unit class unit for this tag. Only a tag with a single unit class can have default units. + Returns: unit(UnitEntry or None): the default unit entry for this tag, or None """ diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py index 326df76b..027c63d6 100644 --- a/hed/schema/hed_schema_io.py +++ b/hed/schema/hed_schema_io.py @@ -223,7 +223,7 @@ def load_schema_version(xml_version=None, xml_folder=None): An empty string returns the latest version A json str format is also supported, based on the output of HedSchema.get_formatted_version - Basic format: '[schema_namespace:][library_name_][X.Y.Z]'. + Basic format: `[schema_namespace:][library_name_][X.Y.Z]`. xml_folder (str): Path to a folder containing schema. Returns: diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index c08a11a2..942c4167 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -1,13 +1,15 @@ """The built-in functions to validate known attributes. Template for the functions: -attribute_checker_template(hed_schema, tag_entry, attribute_name, possible_values): - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this tag. - attribute_name (str): The name of this attribute + +- ``attribute_checker_template(hed_schema, tag_entry, attribute_name)``: + - ``hed_schema (HedSchema)``: The schema to use for validation. + - ``tag_entry (HedSchemaEntry)``: The schema entry for this tag. + - ``attribute_name (str)``: The name of this attribute. + Returns: - bool -""" + - ``bool``: Description of the return value. + """ from hed.errors.error_types import SchemaWarnings, ValidationErrors, SchemaAttributeErrors from hed.errors.error_reporter import ErrorHandler diff --git a/hed/schema/schema_compare.py b/hed/schema/schema_compare.py index 1cd974c0..f128306d 100644 --- a/hed/schema/schema_compare.py +++ b/hed/schema/schema_compare.py @@ -176,14 +176,14 @@ def compare_schemas(schema1, schema2, attribute_filter=HedKey.InLibrary, section schema1 (HedSchema): The first schema to be compared. schema2 (HedSchema): The second schema to be compared. attribute_filter (str, optional): The attribute to filter entries by. - Entries without this attribute are skipped. - The most common use would be HedKey.InLibrary - If it evaluates to False, no filtering is performed. + Entries without this attribute are skipped. + The most common use would be HedKey.InLibrary + If it evaluates to False, no filtering is performed. sections(list): the list of sections to compare. By default, just the tags section. - If None, checks all sections including header, prologue, and epilogue. + If None, checks all sections including header, prologue, and epilogue. Returns: - tuple: A tuple containing four dictionaries: + tuple: A tuple containing four dictionaries: - matches(dict): Entries present in both schemas and are equal. - not_in_schema1(dict): Entries present in schema2 but not in schema1. - not_in_schema2(dict): Entries present in schema1 but not in schema2. diff --git a/hed/tools/analysis/hed_type_defs.py b/hed/tools/analysis/hed_type_defs.py index 988b4bda..fba665d7 100644 --- a/hed/tools/analysis/hed_type_defs.py +++ b/hed/tools/analysis/hed_type_defs.py @@ -11,7 +11,7 @@ class HedTypeDefs: def_map (dict): keys are definition names, values are dict {type_values, description, tags} Example: A definition 'famous-face-cond' with contents `(Condition-variable/Face-type,Description/A face that should be recognized by the - participants,(Image,(Face,Famous)))` + participants,(Image,(Face,Famous)))` would have type_values ['face_type']. All items are strings not objects. diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py index e2f7f535..09d7f318 100644 --- a/hed/tools/analysis/key_map.py +++ b/hed/tools/analysis/key_map.py @@ -59,7 +59,9 @@ def make_template(self, additional_cols=None, show_counts=True): Parameters: additional_cols (list or None): Optional list of additional columns to append to the returned dataframe. - show_counts (bool): If true, number of times each key combination appears is in first column + show_counts (bool): If True, number of times each key combination appears is in first column and + values are sorted in descending order by + Returns: DataFrame: A dataframe containing the template. @@ -79,6 +81,7 @@ def make_template(self, additional_cols=None, show_counts=True): df[additional_cols] = 'n/a' if show_counts: df.insert(0, 'key_counts', self._get_counts()) + df.sort_values(by=['key_counts'], inplace=True, ignore_index=True, ascending=False) return df def _get_counts(self): @@ -142,7 +145,7 @@ def resort(self): for index, row in self.col_map.iterrows(): key_hash = get_row_hash(row, self.key_cols) self.map_dict[key_hash] = index - + def update(self, data, allow_missing=True): """ Update the existing map with information from data. diff --git a/hed/tools/analysis/sequence_map.py b/hed/tools/analysis/sequence_map.py new file mode 100644 index 00000000..0ecd0fea --- /dev/null +++ b/hed/tools/analysis/sequence_map.py @@ -0,0 +1,173 @@ +""" A map of containing the number of times a particular sequence of values in a column of an event file. """ + + +import pandas as pd +from hed.tools.util.data_util import get_key_hash + + +class SequenceMap: + """ A map of unique sequences of column values of a particular length appear in an event file. + + Attributes: + + name (str): An optional name of this remap for identification purposes. + + Notes: This mapping converts all columns in the mapping to strings. + The remapping does not support other types of columns. + + """ + def __init__(self, codes=None, name=''): + """ Information for setting up the maps. + + Parameters: + codes (list or None): If None use all codes, otherwise only include listed codes in the map. + name (str): Name associated with this remap (usually a pathname of the events file). + + """ + + self.codes = codes + self.name = name + self.node_counts = {} + self.edges = {} # map of keys to n-element sequences + self.edge_counts = {} # Keeps a running count of the number of times a key appears in the data + + @property + + def __str__(self): + node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] + node_str = (" ").join(node_counts) + return node_str + # temp_list = [f"{self.name} counts for key [{str(self.key_cols)}]:"] + # for index, row in self.col_map.iterrows(): + # key_hash = get_row_hash(row, self.columns) + # temp_list.append(f"{str(list(row.values))}:\t{self.count_dict[key_hash]}") + # return "\n".join(temp_list) + + def dot_str(self, group_spec={}): + """ Produce a DOT string representing this sequence map. + + + """ + base = 'digraph g { \n' + if self.codes: + node_list = [f"{node};" for node in self.codes if node not in self.node_counts] + if node_list: + base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + ("\n").join(node_list) +"\n}\n" + if group_spec: + for group, spec in group_spec.items(): + group_list = [f"{node};" for node in self.node_counts if node in spec["nodes"]] + if group_list: + spec_color = spec["color"] + if spec_color[0] == '#': + spec_color = f'"{spec_color}"' + base = base + 'subgraph cluster_' + group + '{\n' + f'bgcolor={spec_color};\n' + \ + '\n'.join(group_list) + '\n}\n' + edge_list = self.get_edge_list(sort=True) + + dot_str = base + ("\n").join(edge_list) + "}\n" + return dot_str + + def edge_to_str(self, key): + value = self.edges.get(key, []) + if value: + return f"{value[0]} -> {value[1]} " + else: + return "" + def get_edge_list(self, sort=True): + """Produces a DOT format edge list with the option of sorting by edge counts. + + Parameters: + sort (bool): if true the edge list is sorted by edge counts + + Returns: + list: list of DOT strings representing the edges labeled by counts. + + """ + + df = pd.DataFrame(list(self.edge_counts.items()), columns=['Key', 'Counts']) + if sort: + df = df.sort_values(by='Counts', ascending=False) + edge_list = [f"{self.edge_to_str(row['Key'])} [label={str(self.edge_counts[row['Key']])}];" + for index, row in df.iterrows()] + return edge_list + + def filter_edges(self): + print("to here") + + def update(self, data): + """ Update the existing map with information from data. + + Parameters: + data (Series): DataFrame or filename of an events file or event map. + allow_missing (bool): If true allow missing keys and add as n/a columns. + + :raises HedFileError: + - If there are missing keys and allow_missing is False. + + """ + filtered = self.prep(data) + if self.codes: + mask = filtered.isin(self.codes) + filtered = filtered[mask] + for index, value in filtered.items(): + if value not in self.node_counts: + self.node_counts[value] = 1 + else: + self.node_counts[value] = self.node_counts[value] + 1 + if index + 1 >= len(filtered): + break + key_list = filtered[index:index+2].tolist() + key = get_key_hash(key_list) + if key in self.edges: + self.edge_counts[key] = self.edge_counts[key] + 1 + else: + self.edges[key] = key_list + self.edge_counts[key] = 1 + + def update(self, data): + """ Update the existing map with information from data. + + Parameters: + data (Series): DataFrame or filename of an events file or event map. + allow_missing (bool): If true allow missing keys and add as n/a columns. + + :raises HedFileError: + - If there are missing keys and allow_missing is False. + + """ + filtered = self.prep(data) + if self.codes: + mask = filtered.isin(self.codes) + filtered = filtered[mask] + for index, value in filtered.items(): + if value not in self.node_counts: + self.node_counts[value] = 1 + else: + self.node_counts[value] = self.node_counts[value] + 1 + if index + 1 >= len(filtered): + break + key_list = filtered[index:index + 2].tolist() + key = get_key_hash(key_list) + if key in self.edges: + self.edge_counts[key] = self.edge_counts[key] + 1 + else: + self.edges[key] = key_list + self.edge_counts[key] = 1 + + @staticmethod + def prep(data): + """ Remove quotes from the specified columns and convert to string. + + Parameters: + data (Series): Dataframe to process by removing quotes. + + Returns: Series + Notes: + - Replacement is done in place. + """ + + filtered = data.astype(str) + filtered.fillna('n/a').astype(str) + filtered = filtered.str.replace('"', '') + filtered = filtered.str.replace("'", "") + return filtered \ No newline at end of file diff --git a/hed/tools/analysis/sequence_map_new.py b/hed/tools/analysis/sequence_map_new.py new file mode 100644 index 00000000..0415f91e --- /dev/null +++ b/hed/tools/analysis/sequence_map_new.py @@ -0,0 +1,160 @@ +""" A map of containing the number of times a particular sequence of values in a column of an event file. """ + +import pandas as pd +from hed.tools.util.data_util import get_key_hash + + +class SequenceMapNew: + """ A map of unique sequences of column values of a particular length appear in an event file. + + Attributes: + + name (str): An optional name of this remap for identification purposes. + + Notes: This mapping converts all columns in the mapping to strings. + The remapping does not support other types of columns. + + """ + + def __init__(self, codes=None, name='', seq=[0, -1]): + """ Information for setting up the maps. + + Parameters: + codes (list or None): If None use all codes, otherwise only include listed codes in the map. + name (str): Name associated with this remap (usually a pathname of the events file). + + """ + + self.codes = codes + self.name = name + self.seq = seq + self.nodes = {} # Node keys to node names + self.node_counts = {} # Node values to count + self.sequences = {} # Sequence keys to sequence + self.seq_counts = {} # Sequence keys to counts + self.edges = {} # map of edge keys to 2-element sequence keys + self.edge_counts = {} # edge keys to edge counts + + @property + def __str__(self): + node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] + node_str = (" ").join(node_counts) + return node_str + # temp_list = [f"{self.name} counts for key [{str(self.key_cols)}]:"] + # for index, row in self.col_map.iterrows(): + # key_hash = get_row_hash(row, self.columns) + # temp_list.append(f"{str(list(row.values))}:\t{self.count_dict[key_hash]}") + # return "\n".join(temp_list) + + def dot_str(self, group_spec={}): + """ Produce a DOT string representing this sequence map. + + + """ + base = 'digraph g { \n' + if self.codes: + node_list = [f"{node};" for node in self.codes if node not in self.node_counts] + if node_list: + base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + ("\n").join(node_list) + "\n}\n" + if group_spec: + for group, spec in group_spec.items(): + group_list = [f"{node};" for node in self.node_counts if node in spec["nodes"]] + if group_list: + spec_color = spec["color"] + if spec_color[0] == '#': + spec_color = f'"{spec_color}"' + base = base + 'subgraph cluster_' + group + '{\n' + f'bgcolor={spec_color};\n' + \ + '\n'.join(group_list) + '\n}\n' + edge_list = self.get_edge_list(sort=True) + + dot_str = base + ("\n").join(edge_list) + "}\n" + return dot_str + + def edge_to_str(self, key): + value = self.edges.get(key, []) + if value: + x = ("+").join(value[0]) + y = ("+").join(value[1]) + return f"{str(self.sequences[value[0]])} -> {str(self.sequences[value[1]])} " + else: + return "" + + def get_edge_list(self, sort=True): + """Produces a DOT format edge list with the option of sorting by edge counts. + + Parameters: + sort (bool): if true the edge list is sorted by edge counts + + Returns: + list: list of DOT strings representing the edges labeled by counts. + + """ + + df = pd.DataFrame(list(self.edge_counts.items()), columns=['Key', 'Counts']) + if sort: + df = df.sort_values(by='Counts', ascending=False) + edge_list = [] + for index, row in df.iterrows(): + edge_list.append(f"{self.edge_to_str(row['Key'])} [label={str(self.edge_counts[row['Key']])}];") + return edge_list + + def filter_edges(self): + print("to here") + + def update(self, data): + filtered = self.get_sequence_data(data) + last_seq_key = None + for index, row in filtered.iterrows(): + # Update node counts + this_node = row['value'] + self.node_counts[this_node] = self.node_counts.get(this_node, 0) + 1 + this_seq = row['seq'] + if not this_seq: + last_seq_key = None + continue; + this_seq_key = get_key_hash(this_seq) + self.sequences[this_seq_key] = this_seq + self.seq_counts[this_seq_key] = self.seq_counts.get(this_seq_key, 0) + 1 + if last_seq_key: + this_edge_key = get_key_hash([last_seq_key, this_seq_key]) + self.edges[this_edge_key] = [last_seq_key, this_seq_key] + self.edge_counts[this_edge_key] = self.edge_counts.get(this_edge_key, 0) + 1 + last_seq_key = this_seq_key + + def get_sequence_data(self, data): + filtered = self.prep(data) + empty_lists = [[] for _ in range(len(filtered))] + + # Create a DataFrame + df = pd.DataFrame({'value': filtered.values, 'seq': empty_lists}) + + for index, row in df.iterrows(): + df.at[index, 'seq'] = self.get_sequence(df, index) + return df + + def get_sequence(self, df, index): + seq_list = [] + for i, val in enumerate(self.seq): + df_ind = val + index + if df_ind < 0 or df_ind >= len(df): + return [] + seq_list.append(df.iloc[df_ind, 0]) + return seq_list + + @staticmethod + def prep(data): + """ Remove quotes from the specified columns and convert to string. + + Parameters: + data (Series): Dataframe to process by removing quotes. + + Returns: Series + Notes: + - Replacement is done in place. + """ + + filtered = data.astype(str) + filtered.fillna('n/a').astype(str) + filtered = filtered.str.replace('"', '') + filtered = filtered.str.replace("'", "") + return filtered diff --git a/hed/tools/remodeling/backup_manager.py b/hed/tools/remodeling/backup_manager.py index 75c6f4f1..60ecf753 100644 --- a/hed/tools/remodeling/backup_manager.py +++ b/hed/tools/remodeling/backup_manager.py @@ -224,7 +224,7 @@ def get_task(task_names, file_path): """ Return the task if the file name contains a task_xxx where xxx is in task_names. Parameters: - task_names (list): List of task names (without the task_ prefix). + task_names (list): List of task names (without the `task_` prefix). file_path (str): Path of the filename to be tested. Returns: diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py index 6c18625d..0761eca5 100644 --- a/hed/tools/remodeling/cli/run_remodel.py +++ b/hed/tools/remodeling/cli/run_remodel.py @@ -4,7 +4,7 @@ import json import argparse from hed.errors.exceptions import HedFileError -from hed.tools.util.io_util import get_file_list, get_task_from_file +from hed.tools.util.io_util import get_file_list, get_task_from_file, get_task_dict from hed.tools.bids.bids_dataset import BidsDataset from hed.tools.remodeling.validator import RemodelerValidator from hed.tools.remodeling.dispatcher import Dispatcher @@ -121,14 +121,7 @@ def parse_arguments(arg_list=None): def parse_tasks(files, task_args): if not task_args: return {"": files} - task_dict = {} - for my_file in files: - task = get_task_from_file(my_file) - if not task: - continue - task_entry = task_dict.get(task, []) - task_entry.append(my_file) - task_dict[task] = task_entry + task_dict = get_task_dict(files) if task_args == "*" or isinstance(task_args, list) and task_args[0] == "*": return task_dict task_dict = {key: task_dict[key] for key in task_args if key in task_dict} diff --git a/hed/tools/util/io_util.py b/hed/tools/util/io_util.py index 53fab27a..1a00b34b 100644 --- a/hed/tools/util/io_util.py +++ b/hed/tools/util/io_util.py @@ -328,3 +328,20 @@ def get_task_from_file(file_path): return "" splits = re.split(r'[_.]', basename[position+5:]) return splits[0] + +def get_task_dict(files): + """ Return a dictionary of the tasks that appear in the file names of a list of files. + + Parameters: + files = + + """ + task_dict = {} + for my_file in files: + task = get_task_from_file(my_file) + if not task: + continue + task_entry = task_dict.get(task, []) + task_entry.append(my_file) + task_dict[task] = task_entry + return task_dict diff --git a/hed/validator/tag_util/group_util.py b/hed/validator/tag_util/group_util.py index 490f5668..b01a4f55 100644 --- a/hed/validator/tag_util/group_util.py +++ b/hed/validator/tag_util/group_util.py @@ -71,6 +71,7 @@ def check_tag_level_issue(original_tag_list, is_top_level, is_group): """ Report tags incorrectly positioned in hierarchy. Top-level groups can contain definitions, Onset, etc. tags. + Parameters: original_tag_list (list): HedTags containing the original tags. is_top_level (bool): If True, this group is a "top level tag group" diff --git a/readthedocs.yml b/readthedocs.yml index f7198138..b20edf11 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -8,6 +8,9 @@ build: os: "ubuntu-22.04" tools: python: "3.7" + jobs: + pre_build: + - sphinx-build -W --keep-going -q -b linkcheck -d docs/_build/doctrees docs/source/ docs/_build/linkcheck # Build documentation in the docs/ directory with Sphinx sphinx: @@ -15,7 +18,6 @@ sphinx: configuration: docs/source/conf.py fail_on_warning: false - python: install: - requirements: docs/requirements.txt diff --git a/requirements.txt b/requirements.txt index d82b76e9..799f90f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,5 @@ openpyxl>=3.1.0 pandas>=1.3.5 portalocker>=2.7.0 semantic_version>=2.10.0 -wordcloud==1.9.2 +wordcloud==1.9.3 jsonschema==4.18.4 \ No newline at end of file diff --git a/spec_tests/hed-examples b/spec_tests/hed-examples index ae000a6b..a4b01682 160000 --- a/spec_tests/hed-examples +++ b/spec_tests/hed-examples @@ -1 +1 @@ -Subproject commit ae000a6be5fa994d3f6808a4b56cd56ff665cae5 +Subproject commit a4b016822b4666285b92715917355ec6bd2ae9d1 diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification index 38a59932..570ae3e5 160000 --- a/spec_tests/hed-specification +++ b/spec_tests/hed-specification @@ -1 +1 @@ -Subproject commit 38a5993234b0b29e6adb8edd4647f9ad33c6eb1f +Subproject commit 570ae3e56c042c05a6f488e3cfe56fb70d1fda72 diff --git a/tests/tools/analysis/test_key_map.py b/tests/tools/analysis/test_key_map.py index d0630066..4ae2860d 100644 --- a/tests/tools/analysis/test_key_map.py +++ b/tests/tools/analysis/test_key_map.py @@ -59,8 +59,9 @@ def test_make_template(self): df1 = t_map.make_template(show_counts=False) self.assertIsInstance(df1, pd.DataFrame, "make_template should return a DataFrame") self.assertEqual(len(df1.columns), 1, "make_template should return 1 column single key, no additional columns") - df2 = t_map.make_template() + df2 = t_map.make_template(show_counts=True) self.assertEqual(len(df2.columns), 2, "make_template returns an extra column for counts") + t_map2 = KeyMap(['event_type', 'type']) t_map2.update(self.stern_test1_path) df3 = t_map2.make_template() diff --git a/tests/tools/analysis/test_sequence_map.py b/tests/tools/analysis/test_sequence_map.py new file mode 100644 index 00000000..07112c77 --- /dev/null +++ b/tests/tools/analysis/test_sequence_map.py @@ -0,0 +1,50 @@ +import unittest +import os +import pandas as pd +from hed.errors.exceptions import HedFileError +from hed.tools.analysis.sequence_map import SequenceMap +from hed.tools.util.data_util import get_new_dataframe +from hed.tools.util.io_util import get_file_list + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + # curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/remodel_tests') + base_path = '' + cls.events_path = os.path.realpath(base_path + '/sub-01/ses-01/eeg/sub-01_ses-01_task-DriveRandomSound_run-1_events.tsv') + + + def test_constructor(self): + codes1 = ['1111', '1112', '1121', '1122', '1131', '1132', '1141', + '1142', '1311', '1312', '1321', '1322', + '4210', '4220', '4230', '4311', '4312'] + + smap1 = SequenceMap(codes=codes1) + self.assertIsInstance(smap1, SequenceMap) + # df = get_new_dataframe(self.events_path) + # data = df['value'] + # smap1.update(data) + # #print(f"{smap1.__str__}") + # print("to here") + + def test_update(self): + codes1 = ['1111', '1121', '1131', '1141', '1311', '1321', + '4210', '4220', '4230', '4311'] + codes1 = ['1111', '1121', '1131', '1141', '1311', '4311'] + #codes1 = ['1111', '1121', '1131', '1141', '1311'] + smap1 = SequenceMap(codes=codes1) + self.assertIsInstance(smap1, SequenceMap) + # df = get_new_dataframe(self.events_path) + # data = df['value'] + # smap1.update(data) + # print(f"{smap1.dot_str()}") + # group_spec = {"stimulus": {"color": "#FFAAAA", "nodes": ["1111", "1121", "1131", "1141", "1311"]}} + # print(f"{smap1.dot_str(group_spec=group_spec)}") + # + def test_str(self): + pass + + +if __name__ == '__main__': + unittest.main()