From 5a235bd038db1687a1cc67d391943a0b614749b8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Dec 2023 11:18:03 +0000 Subject: [PATCH 01/18] Bump wordcloud from 1.9.2 to 1.9.3 Bumps [wordcloud](https://github.com/amueller/word_cloud) from 1.9.2 to 1.9.3. - [Release notes](https://github.com/amueller/word_cloud/releases) - [Changelog](https://github.com/amueller/word_cloud/blob/main/doc/changelog.rst) - [Commits](https://github.com/amueller/word_cloud/compare/1.9.2...1.9.3) --- updated-dependencies: - dependency-name: wordcloud dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- docs/requirements.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 59c144b4..1e05aebd 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -8,4 +8,4 @@ semantic_version>=2.10.0 myst-parser>=1.0.0 Sphinx>=5.2.2 sphinx_rtd_theme>=1.0.0 -wordcloud==1.9.2 +wordcloud==1.9.3 diff --git a/requirements.txt b/requirements.txt index 07c3304d..ee756574 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ openpyxl>=3.1.0 pandas>=1.3.5 portalocker>=2.7.0 semantic_version>=2.10.0 -wordcloud==1.9.2 \ No newline at end of file +wordcloud==1.9.3 \ No newline at end of file From 950f0d55288f03900b0502b5a3eb666df6b5f6ac Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 11 Dec 2023 11:48:00 +0000 Subject: [PATCH 02/18] Bump actions/setup-python from 4 to 5 Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 5. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/ci.yaml | 4 ++-- .github/workflows/spec_tests.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6edc97cc..1a167eed 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -39,7 +39,7 @@ jobs: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -103,7 +103,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python 3.9 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 diff --git a/.github/workflows/spec_tests.yaml b/.github/workflows/spec_tests.yaml index d495bd51..5cc3ed8c 100644 --- a/.github/workflows/spec_tests.yaml +++ b/.github/workflows/spec_tests.yaml @@ -22,7 +22,7 @@ jobs: submodules: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} From 2eaebb8e220a4f7b6233d8921969ce57db280e93 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:37:51 -0600 Subject: [PATCH 03/18] Updated sequence map --- hed/tools/analysis/key_map.py | 7 +- hed/tools/analysis/sequence_map.py | 118 ++++++++++++++++++++++ tests/tools/analysis/test_key_map.py | 3 +- tests/tools/analysis/test_sequence_map.py | 50 +++++++++ 4 files changed, 175 insertions(+), 3 deletions(-) create mode 100644 hed/tools/analysis/sequence_map.py create mode 100644 tests/tools/analysis/test_sequence_map.py diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py index e2f7f535..09d7f318 100644 --- a/hed/tools/analysis/key_map.py +++ b/hed/tools/analysis/key_map.py @@ -59,7 +59,9 @@ def make_template(self, additional_cols=None, show_counts=True): Parameters: additional_cols (list or None): Optional list of additional columns to append to the returned dataframe. - show_counts (bool): If true, number of times each key combination appears is in first column + show_counts (bool): If True, number of times each key combination appears is in first column and + values are sorted in descending order by + Returns: DataFrame: A dataframe containing the template. @@ -79,6 +81,7 @@ def make_template(self, additional_cols=None, show_counts=True): df[additional_cols] = 'n/a' if show_counts: df.insert(0, 'key_counts', self._get_counts()) + df.sort_values(by=['key_counts'], inplace=True, ignore_index=True, ascending=False) return df def _get_counts(self): @@ -142,7 +145,7 @@ def resort(self): for index, row in self.col_map.iterrows(): key_hash = get_row_hash(row, self.key_cols) self.map_dict[key_hash] = index - + def update(self, data, allow_missing=True): """ Update the existing map with information from data. diff --git a/hed/tools/analysis/sequence_map.py b/hed/tools/analysis/sequence_map.py new file mode 100644 index 00000000..e5c81ebf --- /dev/null +++ b/hed/tools/analysis/sequence_map.py @@ -0,0 +1,118 @@ +""" A map of containing the number of times a particular sequence of values in a column of an event file. """ + + +import pandas as pd +from hed.tools.util.data_util import get_key_hash + + +class SequenceMap: + """ A map of unique sequences of column values of a particular length appear in an event file. + + Attributes: + + name (str): An optional name of this remap for identification purposes. + + Notes: This mapping converts all columns in the mapping to strings. + The remapping does not support other types of columns. + + """ + def __init__(self, codes=None, name=''): + """ Information for setting up the maps. + + Parameters: + codes (list or None): If None use all codes, otherwise only include listed codes in the map. + name (str): Name associated with this remap (usually a pathname of the events file). + + """ + + self.codes = codes + self.name = name + self.node_counts = {} + self.edges = {} # map of keys to n-element sequences + self.edge_counts = {} # Keeps a running count of the number of times a key appears in the data + + @property + + def __str__(self): + node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] + node_str = (" ").join(node_counts) + return node_str + # temp_list = [f"{self.name} counts for key [{str(self.key_cols)}]:"] + # for index, row in self.col_map.iterrows(): + # key_hash = get_row_hash(row, self.columns) + # temp_list.append(f"{str(list(row.values))}:\t{self.count_dict[key_hash]}") + # return "\n".join(temp_list) + + def dot_str(self, group_spec={}): + base = 'digraph g { \n' + node_list = [f"{node};" for node in self.codes if node not in self.node_counts] + if node_list: + base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + ("\n").join(node_list) +"\n}\n" + if group_spec: + for group, spec in group_spec.items(): + group_list = [f"{node};" for node in self.node_counts if node in spec["nodes"]] + if group_list: + spec_color = spec["color"] + if spec_color[0] == '#': + spec_color = f'"{spec_color}"' + base = base + 'subgraph cluster_' + group + '{\n' + f'bgcolor={spec_color};\n' + \ + '\n'.join(group_list) + '\n}\n' + edge_list = [f"{value[0]} -> {value[1]} [label={str(self.edge_counts[key])}];" + for key, value in self.edges.items()] + dot_str = base + ("\n").join(edge_list) + "}\n" + return dot_str + + # def resort(self): + # """ Sort the col_map in place by the key columns. """ + # self.col_map.sort_values(by=self.key_cols, inplace=True, ignore_index=True) + # for index, row in self.col_map.iterrows(): + # key_hash = get_row_hash(row, self.key_cols) + # self.map_dict[key_hash] = index + + def update(self, data): + """ Update the existing map with information from data. + + Parameters: + data (Series): DataFrame or filename of an events file or event map. + allow_missing (bool): If true allow missing keys and add as n/a columns. + + :raises HedFileError: + - If there are missing keys and allow_missing is False. + + """ + filtered = self.prep(data) + if self.codes: + mask = filtered.isin(self.codes) + filtered = filtered[mask] + for index, value in filtered.items(): + if value not in self.node_counts: + self.node_counts[value] = 1 + else: + self.node_counts[value] = self.node_counts[value] + 1 + if index + 1 >= len(filtered): + break + key_list = filtered[index:index+2].tolist() + key = get_key_hash(key_list) + if key in self.edges: + self.edge_counts[key] = self.edge_counts[key] + 1 + else: + self.edges[key] = key_list + self.edge_counts[key] = 1 + + @staticmethod + def prep(data): + """ Remove quotes from the specified columns and convert to string. + + Parameters: + data (Series): Dataframe to process by removing quotes. + + Returns: Series + Notes: + - Replacement is done in place. + """ + + filtered = data.astype(str) + filtered.fillna('n/a').astype(str) + filtered = filtered.str.replace('"', '') + filtered = filtered.str.replace("'", "") + return filtered \ No newline at end of file diff --git a/tests/tools/analysis/test_key_map.py b/tests/tools/analysis/test_key_map.py index d0630066..4ae2860d 100644 --- a/tests/tools/analysis/test_key_map.py +++ b/tests/tools/analysis/test_key_map.py @@ -59,8 +59,9 @@ def test_make_template(self): df1 = t_map.make_template(show_counts=False) self.assertIsInstance(df1, pd.DataFrame, "make_template should return a DataFrame") self.assertEqual(len(df1.columns), 1, "make_template should return 1 column single key, no additional columns") - df2 = t_map.make_template() + df2 = t_map.make_template(show_counts=True) self.assertEqual(len(df2.columns), 2, "make_template returns an extra column for counts") + t_map2 = KeyMap(['event_type', 'type']) t_map2.update(self.stern_test1_path) df3 = t_map2.make_template() diff --git a/tests/tools/analysis/test_sequence_map.py b/tests/tools/analysis/test_sequence_map.py new file mode 100644 index 00000000..07e3130e --- /dev/null +++ b/tests/tools/analysis/test_sequence_map.py @@ -0,0 +1,50 @@ +import unittest +import os +import pandas as pd +from hed.errors.exceptions import HedFileError +from hed.tools.analysis.sequence_map import SequenceMap +from hed.tools.util.data_util import get_new_dataframe +from hed.tools.util.io_util import get_file_list + + +class Test(unittest.TestCase): + @classmethod + def setUpClass(cls): + # curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/remodel_tests') + base_path = 'T:/summaryTests/ds004105-download' + cls.events_path = os.path.realpath(base_path + '/sub-01/ses-01/eeg/sub-01_ses-01_task-DriveRandomSound_run-1_events.tsv') + + + def test_constructor(self): + codes1 = ['1111', '1112', '1121', '1122', '1131', '1132', '1141', + '1142', '1311', '1312', '1321', '1322', + '4210', '4220', '4230', '4311', '4312'] + + smap1 = SequenceMap(codes=codes1) + self.assertIsInstance(smap1, SequenceMap) + df = get_new_dataframe(self.events_path) + data = df['value'] + smap1.update(data) + #print(f"{smap1.__str__}") + print("to here") + + def test_update(self): + codes1 = ['1111', '1121', '1131', '1141', '1311', '1321', + '4210', '4220', '4230', '4311'] + codes1 = ['1111', '1121', '1131', '1141', '1311', '4311'] + #codes1 = ['1111', '1121', '1131', '1141', '1311'] + smap1 = SequenceMap(codes=codes1) + self.assertIsInstance(smap1, SequenceMap) + df = get_new_dataframe(self.events_path) + data = df['value'] + smap1.update(data) + print(f"{smap1.dot_str()}") + group_spec = {"stimulus": {"color": "#FFAAAA", "nodes": ["1111", "1121", "1131", "1141", "1311"]}} + print(f"{smap1.dot_str(group_spec=group_spec)}") + + def test_str(self): + pass + + +if __name__ == '__main__': + unittest.main() From 7ba94380737f046f38fbf743326b4df3245502a0 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:00:24 -0600 Subject: [PATCH 04/18] Corrected sequence map tests --- tests/tools/analysis/test_sequence_map.py | 26 +++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/tools/analysis/test_sequence_map.py b/tests/tools/analysis/test_sequence_map.py index 07e3130e..07112c77 100644 --- a/tests/tools/analysis/test_sequence_map.py +++ b/tests/tools/analysis/test_sequence_map.py @@ -11,7 +11,7 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): # curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/remodel_tests') - base_path = 'T:/summaryTests/ds004105-download' + base_path = '' cls.events_path = os.path.realpath(base_path + '/sub-01/ses-01/eeg/sub-01_ses-01_task-DriveRandomSound_run-1_events.tsv') @@ -22,11 +22,11 @@ def test_constructor(self): smap1 = SequenceMap(codes=codes1) self.assertIsInstance(smap1, SequenceMap) - df = get_new_dataframe(self.events_path) - data = df['value'] - smap1.update(data) - #print(f"{smap1.__str__}") - print("to here") + # df = get_new_dataframe(self.events_path) + # data = df['value'] + # smap1.update(data) + # #print(f"{smap1.__str__}") + # print("to here") def test_update(self): codes1 = ['1111', '1121', '1131', '1141', '1311', '1321', @@ -35,13 +35,13 @@ def test_update(self): #codes1 = ['1111', '1121', '1131', '1141', '1311'] smap1 = SequenceMap(codes=codes1) self.assertIsInstance(smap1, SequenceMap) - df = get_new_dataframe(self.events_path) - data = df['value'] - smap1.update(data) - print(f"{smap1.dot_str()}") - group_spec = {"stimulus": {"color": "#FFAAAA", "nodes": ["1111", "1121", "1131", "1141", "1311"]}} - print(f"{smap1.dot_str(group_spec=group_spec)}") - + # df = get_new_dataframe(self.events_path) + # data = df['value'] + # smap1.update(data) + # print(f"{smap1.dot_str()}") + # group_spec = {"stimulus": {"color": "#FFAAAA", "nodes": ["1111", "1121", "1131", "1141", "1311"]}} + # print(f"{smap1.dot_str(group_spec=group_spec)}") + # def test_str(self): pass From bdf2042e8d82b0bb992045ecd7876f999c0987ba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Dec 2023 11:39:59 +0000 Subject: [PATCH 05/18] Bump actions/download-artifact from 3 to 4 Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 3 to 4. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1a167eed..ece1f83c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -121,7 +121,7 @@ jobs: pip install -r docs/requirements.txt - name: Download a single artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: code-coverage-report From 8f0b6dd057e08414b9900cf246fdd14374ef3607 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Dec 2023 11:40:02 +0000 Subject: [PATCH 06/18] Bump actions/upload-artifact from 3 to 4 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3 to 4. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/ci.yaml | 2 +- .github/workflows/spec_tests.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1a167eed..24a778c6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -73,7 +73,7 @@ jobs: - name: Archive code coverage results if: ${{matrix.python-version == '3.9'}} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: code-coverage-report path: .coverage diff --git a/.github/workflows/spec_tests.yaml b/.github/workflows/spec_tests.yaml index 5cc3ed8c..c930c04a 100644 --- a/.github/workflows/spec_tests.yaml +++ b/.github/workflows/spec_tests.yaml @@ -44,13 +44,13 @@ jobs: python -m unittest spec_tests/validate_bids.py > validate_bids_results.txt - name: Upload error test results - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: error-test-results path: error_results.txt - name: Upload bids test results - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: bids-test-results path: validate_bids_results.txt From c420ff9fe1819eae96c884ae6e3bd620fea6daba Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Sat, 23 Dec 2023 19:30:21 -0600 Subject: [PATCH 07/18] Sequence map in process --- hed/tools/analysis/sequence_map.py | 77 ++++++++++-- hed/tools/analysis/sequence_map_new.py | 160 ++++++++++++++++++++++++ hed/tools/remodeling/cli/run_remodel.py | 11 +- hed/tools/util/io_util.py | 17 +++ 4 files changed, 245 insertions(+), 20 deletions(-) create mode 100644 hed/tools/analysis/sequence_map_new.py diff --git a/hed/tools/analysis/sequence_map.py b/hed/tools/analysis/sequence_map.py index e5c81ebf..0ecd0fea 100644 --- a/hed/tools/analysis/sequence_map.py +++ b/hed/tools/analysis/sequence_map.py @@ -44,10 +44,15 @@ def __str__(self): # return "\n".join(temp_list) def dot_str(self, group_spec={}): + """ Produce a DOT string representing this sequence map. + + + """ base = 'digraph g { \n' - node_list = [f"{node};" for node in self.codes if node not in self.node_counts] - if node_list: - base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + ("\n").join(node_list) +"\n}\n" + if self.codes: + node_list = [f"{node};" for node in self.codes if node not in self.node_counts] + if node_list: + base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + ("\n").join(node_list) +"\n}\n" if group_spec: for group, spec in group_spec.items(): group_list = [f"{node};" for node in self.node_counts if node in spec["nodes"]] @@ -57,17 +62,37 @@ def dot_str(self, group_spec={}): spec_color = f'"{spec_color}"' base = base + 'subgraph cluster_' + group + '{\n' + f'bgcolor={spec_color};\n' + \ '\n'.join(group_list) + '\n}\n' - edge_list = [f"{value[0]} -> {value[1]} [label={str(self.edge_counts[key])}];" - for key, value in self.edges.items()] + edge_list = self.get_edge_list(sort=True) + dot_str = base + ("\n").join(edge_list) + "}\n" return dot_str + + def edge_to_str(self, key): + value = self.edges.get(key, []) + if value: + return f"{value[0]} -> {value[1]} " + else: + return "" + def get_edge_list(self, sort=True): + """Produces a DOT format edge list with the option of sorting by edge counts. + + Parameters: + sort (bool): if true the edge list is sorted by edge counts + + Returns: + list: list of DOT strings representing the edges labeled by counts. + + """ + + df = pd.DataFrame(list(self.edge_counts.items()), columns=['Key', 'Counts']) + if sort: + df = df.sort_values(by='Counts', ascending=False) + edge_list = [f"{self.edge_to_str(row['Key'])} [label={str(self.edge_counts[row['Key']])}];" + for index, row in df.iterrows()] + return edge_list - # def resort(self): - # """ Sort the col_map in place by the key columns. """ - # self.col_map.sort_values(by=self.key_cols, inplace=True, ignore_index=True) - # for index, row in self.col_map.iterrows(): - # key_hash = get_row_hash(row, self.key_cols) - # self.map_dict[key_hash] = index + def filter_edges(self): + print("to here") def update(self, data): """ Update the existing map with information from data. @@ -99,6 +124,36 @@ def update(self, data): self.edges[key] = key_list self.edge_counts[key] = 1 + def update(self, data): + """ Update the existing map with information from data. + + Parameters: + data (Series): DataFrame or filename of an events file or event map. + allow_missing (bool): If true allow missing keys and add as n/a columns. + + :raises HedFileError: + - If there are missing keys and allow_missing is False. + + """ + filtered = self.prep(data) + if self.codes: + mask = filtered.isin(self.codes) + filtered = filtered[mask] + for index, value in filtered.items(): + if value not in self.node_counts: + self.node_counts[value] = 1 + else: + self.node_counts[value] = self.node_counts[value] + 1 + if index + 1 >= len(filtered): + break + key_list = filtered[index:index + 2].tolist() + key = get_key_hash(key_list) + if key in self.edges: + self.edge_counts[key] = self.edge_counts[key] + 1 + else: + self.edges[key] = key_list + self.edge_counts[key] = 1 + @staticmethod def prep(data): """ Remove quotes from the specified columns and convert to string. diff --git a/hed/tools/analysis/sequence_map_new.py b/hed/tools/analysis/sequence_map_new.py new file mode 100644 index 00000000..0415f91e --- /dev/null +++ b/hed/tools/analysis/sequence_map_new.py @@ -0,0 +1,160 @@ +""" A map of containing the number of times a particular sequence of values in a column of an event file. """ + +import pandas as pd +from hed.tools.util.data_util import get_key_hash + + +class SequenceMapNew: + """ A map of unique sequences of column values of a particular length appear in an event file. + + Attributes: + + name (str): An optional name of this remap for identification purposes. + + Notes: This mapping converts all columns in the mapping to strings. + The remapping does not support other types of columns. + + """ + + def __init__(self, codes=None, name='', seq=[0, -1]): + """ Information for setting up the maps. + + Parameters: + codes (list or None): If None use all codes, otherwise only include listed codes in the map. + name (str): Name associated with this remap (usually a pathname of the events file). + + """ + + self.codes = codes + self.name = name + self.seq = seq + self.nodes = {} # Node keys to node names + self.node_counts = {} # Node values to count + self.sequences = {} # Sequence keys to sequence + self.seq_counts = {} # Sequence keys to counts + self.edges = {} # map of edge keys to 2-element sequence keys + self.edge_counts = {} # edge keys to edge counts + + @property + def __str__(self): + node_counts = [f"{value}({str(count)})" for value, count in self.node_counts.items()] + node_str = (" ").join(node_counts) + return node_str + # temp_list = [f"{self.name} counts for key [{str(self.key_cols)}]:"] + # for index, row in self.col_map.iterrows(): + # key_hash = get_row_hash(row, self.columns) + # temp_list.append(f"{str(list(row.values))}:\t{self.count_dict[key_hash]}") + # return "\n".join(temp_list) + + def dot_str(self, group_spec={}): + """ Produce a DOT string representing this sequence map. + + + """ + base = 'digraph g { \n' + if self.codes: + node_list = [f"{node};" for node in self.codes if node not in self.node_counts] + if node_list: + base = base + 'subgraph cluster_unused {\n bgcolor="#cAcAcA";\n' + ("\n").join(node_list) + "\n}\n" + if group_spec: + for group, spec in group_spec.items(): + group_list = [f"{node};" for node in self.node_counts if node in spec["nodes"]] + if group_list: + spec_color = spec["color"] + if spec_color[0] == '#': + spec_color = f'"{spec_color}"' + base = base + 'subgraph cluster_' + group + '{\n' + f'bgcolor={spec_color};\n' + \ + '\n'.join(group_list) + '\n}\n' + edge_list = self.get_edge_list(sort=True) + + dot_str = base + ("\n").join(edge_list) + "}\n" + return dot_str + + def edge_to_str(self, key): + value = self.edges.get(key, []) + if value: + x = ("+").join(value[0]) + y = ("+").join(value[1]) + return f"{str(self.sequences[value[0]])} -> {str(self.sequences[value[1]])} " + else: + return "" + + def get_edge_list(self, sort=True): + """Produces a DOT format edge list with the option of sorting by edge counts. + + Parameters: + sort (bool): if true the edge list is sorted by edge counts + + Returns: + list: list of DOT strings representing the edges labeled by counts. + + """ + + df = pd.DataFrame(list(self.edge_counts.items()), columns=['Key', 'Counts']) + if sort: + df = df.sort_values(by='Counts', ascending=False) + edge_list = [] + for index, row in df.iterrows(): + edge_list.append(f"{self.edge_to_str(row['Key'])} [label={str(self.edge_counts[row['Key']])}];") + return edge_list + + def filter_edges(self): + print("to here") + + def update(self, data): + filtered = self.get_sequence_data(data) + last_seq_key = None + for index, row in filtered.iterrows(): + # Update node counts + this_node = row['value'] + self.node_counts[this_node] = self.node_counts.get(this_node, 0) + 1 + this_seq = row['seq'] + if not this_seq: + last_seq_key = None + continue; + this_seq_key = get_key_hash(this_seq) + self.sequences[this_seq_key] = this_seq + self.seq_counts[this_seq_key] = self.seq_counts.get(this_seq_key, 0) + 1 + if last_seq_key: + this_edge_key = get_key_hash([last_seq_key, this_seq_key]) + self.edges[this_edge_key] = [last_seq_key, this_seq_key] + self.edge_counts[this_edge_key] = self.edge_counts.get(this_edge_key, 0) + 1 + last_seq_key = this_seq_key + + def get_sequence_data(self, data): + filtered = self.prep(data) + empty_lists = [[] for _ in range(len(filtered))] + + # Create a DataFrame + df = pd.DataFrame({'value': filtered.values, 'seq': empty_lists}) + + for index, row in df.iterrows(): + df.at[index, 'seq'] = self.get_sequence(df, index) + return df + + def get_sequence(self, df, index): + seq_list = [] + for i, val in enumerate(self.seq): + df_ind = val + index + if df_ind < 0 or df_ind >= len(df): + return [] + seq_list.append(df.iloc[df_ind, 0]) + return seq_list + + @staticmethod + def prep(data): + """ Remove quotes from the specified columns and convert to string. + + Parameters: + data (Series): Dataframe to process by removing quotes. + + Returns: Series + Notes: + - Replacement is done in place. + """ + + filtered = data.astype(str) + filtered.fillna('n/a').astype(str) + filtered = filtered.str.replace('"', '') + filtered = filtered.str.replace("'", "") + return filtered diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py index 32af02ea..c640ba78 100644 --- a/hed/tools/remodeling/cli/run_remodel.py +++ b/hed/tools/remodeling/cli/run_remodel.py @@ -4,7 +4,7 @@ import json import argparse from hed.errors.exceptions import HedFileError -from hed.tools.util.io_util import get_file_list, get_task_from_file +from hed.tools.util.io_util import get_file_list, get_task_from_file, get_task_dict from hed.tools.bids.bids_dataset import BidsDataset from hed.tools.remodeling.dispatcher import Dispatcher from hed.tools.remodeling.backup_manager import BackupManager @@ -119,14 +119,7 @@ def parse_arguments(arg_list=None): def parse_tasks(files, task_args): if not task_args: return {"": files} - task_dict = {} - for my_file in files: - task = get_task_from_file(my_file) - if not task: - continue - task_entry = task_dict.get(task, []) - task_entry.append(my_file) - task_dict[task] = task_entry + task_dict = get_task_dict(files) if task_args == "*" or isinstance(task_args, list) and task_args[0] == "*": return task_dict task_dict = {key: task_dict[key] for key in task_args if key in task_dict} diff --git a/hed/tools/util/io_util.py b/hed/tools/util/io_util.py index 53fab27a..1a00b34b 100644 --- a/hed/tools/util/io_util.py +++ b/hed/tools/util/io_util.py @@ -328,3 +328,20 @@ def get_task_from_file(file_path): return "" splits = re.split(r'[_.]', basename[position+5:]) return splits[0] + +def get_task_dict(files): + """ Return a dictionary of the tasks that appear in the file names of a list of files. + + Parameters: + files = + + """ + task_dict = {} + for my_file in files: + task = get_task_from_file(my_file) + if not task: + continue + task_entry = task_dict.get(task, []) + task_entry.append(my_file) + task_dict[task] = task_entry + return task_dict From 8e0f6c218c94fa24aee1e20ca7bd94a2cc3e34ba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jan 2024 11:26:43 +0000 Subject: [PATCH 08/18] Bump spec_tests/hed-specification from `38a5993` to `570ae3e` Bumps [spec_tests/hed-specification](https://github.com/hed-standard/hed-specification) from `38a5993` to `570ae3e`. - [Release notes](https://github.com/hed-standard/hed-specification/releases) - [Commits](https://github.com/hed-standard/hed-specification/compare/38a5993234b0b29e6adb8edd4647f9ad33c6eb1f...570ae3e56c042c05a6f488e3cfe56fb70d1fda72) --- updated-dependencies: - dependency-name: spec_tests/hed-specification dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- spec_tests/hed-specification | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification index 38a59932..570ae3e5 160000 --- a/spec_tests/hed-specification +++ b/spec_tests/hed-specification @@ -1 +1 @@ -Subproject commit 38a5993234b0b29e6adb8edd4647f9ad33c6eb1f +Subproject commit 570ae3e56c042c05a6f488e3cfe56fb70d1fda72 From 0c2aef399eda92517aa33127128049374499a008 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jan 2024 11:26:45 +0000 Subject: [PATCH 09/18] Bump spec_tests/hed-examples from `ae000a6` to `a4b0168` Bumps [spec_tests/hed-examples](https://github.com/hed-standard/hed-examples) from `ae000a6` to `a4b0168`. - [Release notes](https://github.com/hed-standard/hed-examples/releases) - [Commits](https://github.com/hed-standard/hed-examples/compare/ae000a6be5fa994d3f6808a4b56cd56ff665cae5...a4b016822b4666285b92715917355ec6bd2ae9d1) --- updated-dependencies: - dependency-name: spec_tests/hed-examples dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- spec_tests/hed-examples | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec_tests/hed-examples b/spec_tests/hed-examples index ae000a6b..a4b01682 160000 --- a/spec_tests/hed-examples +++ b/spec_tests/hed-examples @@ -1 +1 @@ -Subproject commit ae000a6be5fa994d3f6808a4b56cd56ff665cae5 +Subproject commit a4b016822b4666285b92715917355ec6bd2ae9d1 From f05139e0d486d7ac786cae6626b7e80280ff80d8 Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 2 Jan 2024 15:51:52 -0600 Subject: [PATCH 10/18] add linkcheck to readthedocs --- readthedocs.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/readthedocs.yml b/readthedocs.yml index f7198138..d5458324 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -8,6 +8,9 @@ build: os: "ubuntu-22.04" tools: python: "3.7" + jobs: + pre_build: + - sphinx-build -W --keep-going -q -b linkcheck -d docs/_build/doctrees docs/ docs/_build/linkcheck # Build documentation in the docs/ directory with Sphinx sphinx: From 7bfc2acf2fc6ae03e283dfff1ae5a3a5c175f643 Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 2 Jan 2024 15:56:00 -0600 Subject: [PATCH 11/18] Trigger webhook --- readthedocs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/readthedocs.yml b/readthedocs.yml index d5458324..64f279cd 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -12,6 +12,7 @@ build: pre_build: - sphinx-build -W --keep-going -q -b linkcheck -d docs/_build/doctrees docs/ docs/_build/linkcheck + # Build documentation in the docs/ directory with Sphinx sphinx: builder: html From 60a512231c8a299b9ad138382f9f789a7462c054 Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 2 Jan 2024 16:04:32 -0600 Subject: [PATCH 12/18] Update conf source file location --- readthedocs.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/readthedocs.yml b/readthedocs.yml index 64f279cd..b20edf11 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -10,8 +10,7 @@ build: python: "3.7" jobs: pre_build: - - sphinx-build -W --keep-going -q -b linkcheck -d docs/_build/doctrees docs/ docs/_build/linkcheck - + - sphinx-build -W --keep-going -q -b linkcheck -d docs/_build/doctrees docs/source/ docs/_build/linkcheck # Build documentation in the docs/ directory with Sphinx sphinx: @@ -19,7 +18,6 @@ sphinx: configuration: docs/source/conf.py fail_on_warning: false - python: install: - requirements: docs/requirements.txt From 3246efd8fa537c380900f50546478940d0ac0b53 Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 2 Jan 2024 16:10:14 -0600 Subject: [PATCH 13/18] Add bad URL to test --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index 3b82987d..c3e96e1e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,6 +8,7 @@ Hierarchical Event Descriptor (HED) Python Tools .. sidebar:: **Links** * `PDF docs `_ + * `Bad URL `_ * `Source code `_ From 8517ea1563832bdd9faad09ed59b4be4de1ef323 Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 2 Jan 2024 19:26:30 -0600 Subject: [PATCH 14/18] Clean up names/get rid of identifying thing errors --- .../_templates/custom-class-template.rst | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/docs/source/_templates/custom-class-template.rst b/docs/source/_templates/custom-class-template.rst index cf03326d..1039bc70 100644 --- a/docs/source/_templates/custom-class-template.rst +++ b/docs/source/_templates/custom-class-template.rst @@ -8,34 +8,24 @@ .. rubric:: {{ _('Methods') }} .. autosummary:: -{% for item in methods %} - {{ module }}.{{ objname }}.{{ item }} +{%- for item in methods %} + {{ objname }}.{{ item }} {%- endfor %} .. rubric:: {{ _('Attributes') }} .. autosummary:: -{% for item in attributes %} - {{ module }}.{{ objname }}.{{ item }} +{%- for item in attributes %} + {{ objname }}.{{ item }} {%- endfor %} -.. toctree:: - :hidden: - -{% for item in methods %} - {{ fullname }}#method-{{ item }} -{%- endfor %} -{% for item in attributes %} - {{ fullname }}#attribute-{{ item }} -{%- endfor %} - -{% for item in methods %} +{%- for item in methods %} .. _method-{{ item }}: .. automethod:: {{ module }}.{{ objname }}.{{ item }} {%- endfor %} -{% for item in attributes %} +{%- for item in attributes %} .. _attribute-{{ item }}: .. autoattribute:: {{ module }}.{{ objname }}.{{ item }} From 7f41ea2ab197f74f780f8a6a0c67f35033587a51 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 3 Jan 2024 14:13:17 -0600 Subject: [PATCH 15/18] Update doc strings to fix format errors. Update templates slightly --- docs/source/_templates/custom-module-template.rst | 2 +- docs/source/index.rst | 1 - hed/models/definition_dict.py | 10 ++++++---- hed/models/expression_parser.py | 2 +- hed/models/hed_tag.py | 1 + hed/schema/hed_schema_io.py | 2 +- hed/schema/schema_compare.py | 10 +++++----- hed/tools/analysis/hed_type_defs.py | 2 +- hed/tools/remodeling/backup_manager.py | 2 +- hed/validator/tag_util/group_util.py | 1 + 10 files changed, 18 insertions(+), 15 deletions(-) diff --git a/docs/source/_templates/custom-module-template.rst b/docs/source/_templates/custom-module-template.rst index 9e9c8a77..6062649d 100644 --- a/docs/source/_templates/custom-module-template.rst +++ b/docs/source/_templates/custom-module-template.rst @@ -38,7 +38,7 @@ .. rubric:: {{ _('Classes') }} .. autosummary:: - :toctree: + :toctree: _generated_classes :template: custom-class-template.rst {% for item in classes %} {{ item }} diff --git a/docs/source/index.rst b/docs/source/index.rst index c3e96e1e..3b82987d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,7 +8,6 @@ Hierarchical Event Descriptor (HED) Python Tools .. sidebar:: **Links** * `PDF docs `_ - * `Bad URL `_ * `Source code `_ diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index 0d689510..49edf9e8 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -30,12 +30,14 @@ def __init__(self, def_dicts=None, hed_schema=None): self.add_definitions(def_dicts, hed_schema) def add_definitions(self, def_dicts, hed_schema=None): - """ Add definitions from dict(s) to this dict. + """ Add definitions from dict(s) or strings(s) to this dict. Parameters: - def_dicts (list, DefinitionDict, or dict): DefinitionDict or list of DefinitionDicts/strings/dicts whose - definitions should be added. - Note dict form expects DefinitionEntries in the same form as a DefinitionDict + def_dicts (list, DefinitionDict, dict, or str): DefinitionDict or list of DefinitionDicts/strings/dicts whose + definitions should be added. + Note - dict form expects DefinitionEntries in the same form as a DefinitionDict + Note - str or list of strings will parse the strings using the hed_schema. + Note - You can mix and match types, eg [DefinitionDict, str, list of str] would be valid input. hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. :raises TypeError: diff --git a/hed/models/expression_parser.py b/hed/models/expression_parser.py index 736ff562..83e91adc 100644 --- a/hed/models/expression_parser.py +++ b/hed/models/expression_parser.py @@ -332,7 +332,7 @@ def __init__(self, expression_string): '"Event"' - Finds the Event tag, but not any descendent tags - 'Def/DefName/*' - Find Def/DefName instances with placeholders, regardless of the value of the placeholder + `Def/DefName/*` - Find Def/DefName instances with placeholders, regardless of the value of the placeholder 'Eve*' - Find any short tags that begin with Eve*, such as Event, but not Sensory-event diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py index bdbfa852..db6dd7e8 100644 --- a/hed/models/hed_tag.py +++ b/hed/models/hed_tag.py @@ -499,6 +499,7 @@ def default_unit(self): """ Get the default unit class unit for this tag. Only a tag with a single unit class can have default units. + Returns: unit(UnitEntry or None): the default unit entry for this tag, or None """ diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py index 326df76b..027c63d6 100644 --- a/hed/schema/hed_schema_io.py +++ b/hed/schema/hed_schema_io.py @@ -223,7 +223,7 @@ def load_schema_version(xml_version=None, xml_folder=None): An empty string returns the latest version A json str format is also supported, based on the output of HedSchema.get_formatted_version - Basic format: '[schema_namespace:][library_name_][X.Y.Z]'. + Basic format: `[schema_namespace:][library_name_][X.Y.Z]`. xml_folder (str): Path to a folder containing schema. Returns: diff --git a/hed/schema/schema_compare.py b/hed/schema/schema_compare.py index 1cd974c0..f128306d 100644 --- a/hed/schema/schema_compare.py +++ b/hed/schema/schema_compare.py @@ -176,14 +176,14 @@ def compare_schemas(schema1, schema2, attribute_filter=HedKey.InLibrary, section schema1 (HedSchema): The first schema to be compared. schema2 (HedSchema): The second schema to be compared. attribute_filter (str, optional): The attribute to filter entries by. - Entries without this attribute are skipped. - The most common use would be HedKey.InLibrary - If it evaluates to False, no filtering is performed. + Entries without this attribute are skipped. + The most common use would be HedKey.InLibrary + If it evaluates to False, no filtering is performed. sections(list): the list of sections to compare. By default, just the tags section. - If None, checks all sections including header, prologue, and epilogue. + If None, checks all sections including header, prologue, and epilogue. Returns: - tuple: A tuple containing four dictionaries: + tuple: A tuple containing four dictionaries: - matches(dict): Entries present in both schemas and are equal. - not_in_schema1(dict): Entries present in schema2 but not in schema1. - not_in_schema2(dict): Entries present in schema1 but not in schema2. diff --git a/hed/tools/analysis/hed_type_defs.py b/hed/tools/analysis/hed_type_defs.py index 988b4bda..fba665d7 100644 --- a/hed/tools/analysis/hed_type_defs.py +++ b/hed/tools/analysis/hed_type_defs.py @@ -11,7 +11,7 @@ class HedTypeDefs: def_map (dict): keys are definition names, values are dict {type_values, description, tags} Example: A definition 'famous-face-cond' with contents `(Condition-variable/Face-type,Description/A face that should be recognized by the - participants,(Image,(Face,Famous)))` + participants,(Image,(Face,Famous)))` would have type_values ['face_type']. All items are strings not objects. diff --git a/hed/tools/remodeling/backup_manager.py b/hed/tools/remodeling/backup_manager.py index 75c6f4f1..60ecf753 100644 --- a/hed/tools/remodeling/backup_manager.py +++ b/hed/tools/remodeling/backup_manager.py @@ -224,7 +224,7 @@ def get_task(task_names, file_path): """ Return the task if the file name contains a task_xxx where xxx is in task_names. Parameters: - task_names (list): List of task names (without the task_ prefix). + task_names (list): List of task names (without the `task_` prefix). file_path (str): Path of the filename to be tested. Returns: diff --git a/hed/validator/tag_util/group_util.py b/hed/validator/tag_util/group_util.py index 490f5668..b01a4f55 100644 --- a/hed/validator/tag_util/group_util.py +++ b/hed/validator/tag_util/group_util.py @@ -71,6 +71,7 @@ def check_tag_level_issue(original_tag_list, is_top_level, is_group): """ Report tags incorrectly positioned in hierarchy. Top-level groups can contain definitions, Onset, etc. tags. + Parameters: original_tag_list (list): HedTags containing the original tags. is_top_level (bool): If True, this group is a "top level tag group" From b9803356362813969f3c2d5e1496742323189ad7 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 3 Jan 2024 14:29:44 -0600 Subject: [PATCH 16/18] add missing change file --- hed/schema/schema_attribute_validators.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index c08a11a2..942c4167 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -1,13 +1,15 @@ """The built-in functions to validate known attributes. Template for the functions: -attribute_checker_template(hed_schema, tag_entry, attribute_name, possible_values): - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this tag. - attribute_name (str): The name of this attribute + +- ``attribute_checker_template(hed_schema, tag_entry, attribute_name)``: + - ``hed_schema (HedSchema)``: The schema to use for validation. + - ``tag_entry (HedSchemaEntry)``: The schema entry for this tag. + - ``attribute_name (str)``: The name of this attribute. + Returns: - bool -""" + - ``bool``: Description of the return value. + """ from hed.errors.error_types import SchemaWarnings, ValidationErrors, SchemaAttributeErrors from hed.errors.error_reporter import ErrorHandler From 68b2e94d39aeb01c6b02961ebf3be37737c685b2 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 3 Jan 2024 15:45:54 -0600 Subject: [PATCH 17/18] Remove autosectionlabel to stop duplicate errors --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 39ee127f..6d69ae7c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -39,7 +39,7 @@ "myst_parser", "sphinx.ext.autodoc", "sphinx.ext.autosummary", - "sphinx.ext.autosectionlabel", + # "sphinx.ext.autosectionlabel", "sphinx.ext.intersphinx", "sphinx.ext.coverage", "sphinx.ext.mathjax", From 88374f5cbea42d22418584cdf302b51e1de0b6e4 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 3 Jan 2024 17:29:59 -0600 Subject: [PATCH 18/18] Updated broken link in the docs --- docs/source/introduction.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index fbb72f41..3ff7f3c9 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -42,10 +42,6 @@ Finding help The `HED online tools `_ provide an easy-to-use interface that requires no programming. -:Mailing lists and forums: - - * Don't hesitate to ask questions about the python hedtools on `NeuroStars - `_. :Issues and problems: * If you notice a bug in the python hedtools code or encounter other problems using the tools, please `open an issue`_ in the