Merge pull request hed-standard#372 from VisLab/develop

Updated Jupyter notebooks and removed MATLAB examples
IanCa · Jun 15, 2024 · 5c3544a · 5c3544a
2 parents 5e9f9eb + 819926d
commit 5c3544a
Show file tree

Hide file tree

Showing 54 changed files with 378 additions and 2,013 deletions.
diff --git a/docs/source/FileRemodelingTools.md b/docs/source/FileRemodelingTools.md
@@ -877,7 +877,7 @@ The resulting columns are called *stopped* and *stop_failed*, respectively.
 The results of executing this *factor_column* operation on the 
 [**sample remodel event file**](sample-remodel-event-file-anchor) are:
 
-````{admonition} Results of the factor_column operation on the sample data.
+````{admonition} Results of the factor_column operation on the samplepip  data.
 
 | onset | duration | trial_type | stop_signal_delay | response_time | response_accuracy | response_hand | sex | stopped | stop_failed |
 | ----- | -------- | ---------- |  ----------------- | ------------- | ----------------- | ------------- | --- | ---------- | ---------- |

diff --git a/docs/source/HedSearchGuide.md b/docs/source/HedSearchGuide.md
diff --git a/docs/source/WhatsNew.md b/docs/source/WhatsNew.md
@@ -1,6 +1,11 @@
 (whats-new-anchor)=
 # What's new?
 
+**June 10, 2024**: **HEDTools 0.5.0 released on PyPI.**  
+> Remodeling tool validation uses JSON schema.  
+> Supports `.tsv` format and HED ontology generation for HED schemas.  
+> Additional visualizations and summaries.  
+
 **June 10, 2024**: **HED standard schema v8.3.0 released.**  
 > The [**HED schema v8.3,0**](https://doi.org/10.5281/zenodo.7876037) has just
 been released. This release introduces `hedId` globally unique identifiers for every HED element and enables mapping into a HED ontology.

diff --git a/src/README.md b/src/README.md
@@ -46,4 +46,4 @@ To install directly from the
        pip install git+https://github.com/hed-standard/hed-python/@master
    ```
 
-HEDTools require python 3.7 or greater.
+HEDTools require python 3.8 or greater.
diff --git a/src/jupyter_notebooks/README.md b/src/jupyter_notebooks/README.md
@@ -14,4 +14,4 @@ To install directly from the
        pip install git+https://github.com/hed-standard/hed-python/@master
    ```
 
-HEDTools require python 3.7 or greater.
+HEDTools require python 3.8 or greater.
diff --git a/src/jupyter_notebooks/bids/README.md b/src/jupyter_notebooks/bids/README.md
@@ -18,23 +18,18 @@ validating, summarizing, and analyzing your BIDS datasets.
 
 These notebooks require HEDTools, which can be installed using `pip` or directly.
 
-**NOTE:  These notebooks have been updated to use the HEDTOOLS version on the develop branch of the HedTools.
-These tools must be installed directly from GitHub until the newest version of HEDTools is released.**
-
-To install directly from the 
-[GitHub](https://github.com/hed-standard/hed-python) repository:
+To use `pip` to install `hedtools` from PyPI:
 
    ```
-       pip install git+https://github.com/hed-standard/hed-python/@master
+       pip install hedtools
    ```
 
-
-To use `pip` to install `hedtools` from PyPI:
+To install directly from the 
+[GitHub](https://github.com/hed-standard/hed-python) repository:
 
    ```
-       pip install hedtools
+       pip install git+https://github.com/hed-standard/hed-python/@master
    ```
 
-
-HEDTools require python 3.7 or greater.
+HEDTools require python 3.8 or greater.
 
diff --git a/src/jupyter_notebooks/bids/extract_json_template.ipynb b/src/jupyter_notebooks/bids/extract_json_template.ipynb
@@ -34,7 +34,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "source": [
+    "import json\n",
+    "from hed.tools.analysis.tabular_summary import TabularSummary\n",
+    "from hed.tools.util.io_util import get_file_list\n",
+    "\n",
+    "dataset_root = '../../../datasets/eeg_ds003645s_hed'\n",
+    "exclude_dirs = ['stimuli', 'code', 'derivatives', 'sourcedata', 'phenotype']\n",
+    "skip_columns = [\"onset\", \"duration\", \"sample\"]\n",
+    "value_columns = [\"stim_file\", \"response_time\"]\n",
+    "output_path = None\n",
+    "\n",
+    "# Construct the event file dictionary for the BIDS event files\n",
+    "event_files = get_file_list(dataset_root, extensions=[\".tsv\"], name_suffix=\"_events\", exclude_dirs=exclude_dirs)\n",
+    "\n",
+    "# Construct the event file value summary and generate a sidecar template representing dataset\n",
+    "value_summary = TabularSummary(value_cols=value_columns, skip_cols=skip_columns, name=\"Wakeman-Hanson test data\")\n",
+    "value_summary.update(event_files)\n",
+    "sidecar_template = value_summary.extract_sidecar_template()\n",
+    "if output_path:\n",
+    "    with open(output_path, \"w\") as f:\n",
+    "        json.dump(sidecar_template, f, indent=4)\n",
+    "else:\n",
+    "    print(json.dumps(sidecar_template, indent=4))"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-06-15T15:54:13.163193Z",
+     "start_time": "2024-06-15T15:53:40.611422Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -297,37 +327,7 @@
      ]
     }
    ],
-   "source": [
-    "import json\n",
-    "from hed.tools.analysis.tabular_summary import TabularSummary\n",
-    "from hed.tools.util.io_util import get_file_list\n",
-    "\n",
-    "dataset_root = '../../../datasets/eeg_ds003645s_hed'\n",
-    "exclude_dirs = ['stimuli', 'code', 'derivatives', 'sourcedata', 'phenotype']\n",
-    "skip_columns = [\"onset\", \"duration\", \"sample\"]\n",
-    "value_columns = [\"stim_file\", \"response_time\"]\n",
-    "output_path = None\n",
-    "\n",
-    "# Construct the event file dictionary for the BIDS event files\n",
-    "event_files = get_file_list(dataset_root, extensions=[\".tsv\"], name_suffix=\"_events\", exclude_dirs=exclude_dirs)\n",
-    "\n",
-    "# Construct the event file value summary and generate a sidecar template representing dataset\n",
-    "value_summary = TabularSummary(value_cols=value_columns, skip_cols=skip_columns, name=\"Wakeman-Hanson test data\")\n",
-    "value_summary.update(event_files)\n",
-    "sidecar_template = value_summary.extract_sidecar_template()\n",
-    "if output_path:\n",
-    "    with open(output_path, \"w\") as f:\n",
-    "        json.dump(sidecar_template, f, indent=4)\n",
-    "else:\n",
-    "    print(json.dumps(sidecar_template, indent=4))"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-01-09T22:02:52.047144900Z",
-     "start_time": "2024-01-09T22:02:51.951144900Z"
-    }
-   }
+   "execution_count": 1
   }
  ],
  "metadata": {

diff --git a/src/jupyter_notebooks/bids/find_event_combinations.ipynb b/src/jupyter_notebooks/bids/find_event_combinations.ipynb
@@ -26,66 +26,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sub-002_task-FaceRecognition_events.tsv\n",
-      "sub-003_task-FaceRecognition_events.tsv\n",
-      "sub-004_task-FaceRecognition_events.tsv\n",
-      "sub-005_task-FaceRecognition_events.tsv\n",
-      "sub-006_task-FaceRecognition_events.tsv\n",
-      "sub-007_task-FaceRecognition_events.tsv\n",
-      "sub-008_task-FaceRecognition_events.tsv\n",
-      "sub-009_task-FaceRecognition_events.tsv\n",
-      "sub-010_task-FaceRecognition_events.tsv\n",
-      "sub-011_task-FaceRecognition_events.tsv\n",
-      "sub-012_task-FaceRecognition_events.tsv\n",
-      "sub-013_task-FaceRecognition_events.tsv\n",
-      "sub-014_task-FaceRecognition_events.tsv\n",
-      "sub-015_task-FaceRecognition_events.tsv\n",
-      "sub-016_task-FaceRecognition_events.tsv\n",
-      "sub-017_task-FaceRecognition_events.tsv\n",
-      "sub-018_task-FaceRecognition_events.tsv\n",
-      "sub-019_task-FaceRecognition_events.tsv\n",
-      "The total count of the keys is:31448\n",
-      "    key_counts               trial_type value\n",
-      "0           90                 boundary     0\n",
-      "1         2700               famous_new     5\n",
-      "2         1313      famous_second_early     6\n",
-      "3         1291       famous_second_late     7\n",
-      "4         3532              left_nonsym   256\n",
-      "5         3381                 left_sym   256\n",
-      "6         3616             right_nonsym  4096\n",
-      "7         4900                right_sym  4096\n",
-      "8         2700            scrambled_new    17\n",
-      "9         1271   scrambled_second_early    18\n",
-      "10        1334    scrambled_second_late    19\n",
-      "11        2700           unfamiliar_new    13\n",
-      "12        1304  unfamiliar_second_early    14\n",
-      "13        1316   unfamiliar_second_late    15\n"
-     ]
-    }
-   ],
    "source": [
     "import os\n",
     "from hed.tools.analysis.key_map import KeyMap\n",
     "from hed.tools.util.data_util import get_new_dataframe\n",
     "from hed.tools.util.io_util import get_file_list\n",
     "\n",
     "# Variables to set for the specific dataset\n",
-    "data_root = 'T:/summaryTests/ds002718-download'\n",
+    "dataset_root = '../../../datasets/eeg_ds002893s_hed_attention_shift'\n",
+    "exclude_dirs = ['stimuli', 'code', 'derivatives', 'sourcedata', 'phenotype']\n",
     "output_path = ''\n",
-    "exclude_dirs = ['stimuli', 'derivatives', 'code', 'sourcedata']\n",
+    "exclude_dirs = ['trial', 'derivatives', 'code', 'sourcedata']\n",
     "\n",
     "# Construct the key map\n",
-    "key_columns = [ \"trial_type\", \"value\"]\n",
+    "key_columns = [\"focus_modality\", \"event_type\", \"attention_status\"]\n",
     "key_map = KeyMap(key_columns)\n",
     "\n",
     "# Construct the unique combinations\n",
-    "event_files = get_file_list(data_root, extensions=[\".tsv\"], name_suffix=\"_events\", exclude_dirs=exclude_dirs)\n",
+    "event_files = get_file_list(dataset_root, extensions=[\".tsv\"], name_suffix=\"_events\", exclude_dirs=exclude_dirs)\n",
     "for event_file in event_files:\n",
     "    print(f\"{os.path.basename(event_file)}\")\n",
     "    df = get_new_dataframe(event_file)\n",
@@ -103,10 +61,42 @@
    "metadata": {
     "collapsed": false,
     "ExecuteTime": {
-     "end_time": "2023-10-24T20:08:40.958637400Z",
-     "start_time": "2023-10-24T20:08:24.603887900Z"
+     "end_time": "2024-06-15T16:02:17.144301Z",
+     "start_time": "2024-06-15T16:02:14.364188Z"
     }
-   }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sub-001_task-AuditoryVisualShift_run-01_events.tsv\n",
+      "sub-002_task-AuditoryVisualShift_run-01_events.tsv\n",
+      "The total count of the keys is:11730\n",
+      "    key_counts focus_modality       event_type attention_status\n",
+      "0         2298       auditory         low_tone         attended\n",
+      "1         2292         visual         dark_bar         attended\n",
+      "2         1540       auditory         dark_bar       unattended\n",
+      "3         1538         visual         low_tone       unattended\n",
+      "4          585       auditory     button_press              nan\n",
+      "5          577       auditory        high_tone         attended\n",
+      "6          576         visual        light_bar         attended\n",
+      "7          572         visual     button_press              nan\n",
+      "8          384       auditory        light_bar       unattended\n",
+      "9          383         visual        high_tone       unattended\n",
+      "10         288       auditory        hear_word         attended\n",
+      "11         287         visual        look_word         attended\n",
+      "12          96         visual        look_word       unattended\n",
+      "13          96       auditory        hear_word       unattended\n",
+      "14          96       auditory        look_word       unattended\n",
+      "15          96         visual        hear_word       unattended\n",
+      "16          14         visual  pause_recording              nan\n",
+      "17          11       auditory  pause_recording              nan\n",
+      "18           1            nan  pause_recording              nan\n"
+     ]
+    }
+   ],
+   "execution_count": 3
   }
  ],
  "metadata": {

diff --git a/src/jupyter_notebooks/bids/merge_spreadsheet_into_sidecar.ipynb b/src/jupyter_notebooks/bids/merge_spreadsheet_into_sidecar.ipynb
@@ -30,7 +30,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "source": [
+    "import os\n",
+    "import json\n",
+    "from hed.models import SpreadsheetInput\n",
+    "from hed.tools import df_to_hed, merge_hed_dict\n",
+    "\n",
+    "# Spreadsheet input\n",
+    "spreadsheet_path = os.path.realpath('../../../docs/source/_static/data/task-WorkingMemory_example_spreadsheet.tsv')\n",
+    "filename = os.path.basename(spreadsheet_path)\n",
+    "worksheet_name = None\n",
+    "spreadsheet = SpreadsheetInput(file=spreadsheet_path, worksheet_name=worksheet_name,\n",
+    "                               tag_columns=['HED'], has_column_names=True, name=filename)\n",
+    "\n",
+    "# Must convert the spreadsheet to a sidecar before merging\n",
+    "spreadsheet_sidecar = df_to_hed(spreadsheet.dataframe, description_tag=False)\n",
+    "\n",
+    "# Use an empty dict to merge into, but any valid dict read from JSON will work\n",
+    "target_sidecar_dict = {}\n",
+    "\n",
+    "# Do the merge\n",
+    "merge_hed_dict(target_sidecar_dict, spreadsheet_sidecar)\n",
+    "merged_json = json.dumps(target_sidecar_dict, indent=4)\n",
+    "print(merged_json)"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2024-06-15T16:03:32.787320Z",
+     "start_time": "2024-06-15T16:03:32.760819Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -107,37 +137,7 @@
      ]
     }
    ],
-   "source": [
-    "import os\n",
-    "import json\n",
-    "from hed.models import SpreadsheetInput\n",
-    "from hed.tools import df_to_hed, merge_hed_dict\n",
-    "\n",
-    "# Spreadsheet input\n",
-    "spreadsheet_path = os.path.realpath('../../../docs/source/_static/data/task-WorkingMemory_example_spreadsheet.tsv')\n",
-    "filename = os.path.basename(spreadsheet_path)\n",
-    "worksheet_name = None\n",
-    "spreadsheet = SpreadsheetInput(file=spreadsheet_path, worksheet_name=worksheet_name,\n",
-    "                               tag_columns=['HED'], has_column_names=True, name=filename)\n",
-    "\n",
-    "# Must convert the spreadsheet to a sidecar before merging\n",
-    "spreadsheet_sidecar = df_to_hed(spreadsheet.dataframe, description_tag=False)\n",
-    "\n",
-    "# Use an empty dict to merge into, but any valid dict read from JSON will work\n",
-    "target_sidecar_dict = {}\n",
-    "\n",
-    "# Do the merge\n",
-    "merge_hed_dict(target_sidecar_dict, spreadsheet_sidecar)\n",
-    "merged_json = json.dumps(target_sidecar_dict, indent=4)\n",
-    "print(merged_json)"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "ExecuteTime": {
-     "end_time": "2024-01-10T12:44:41.634832500Z",
-     "start_time": "2024-01-10T12:44:39.230433200Z"
-    }
-   }
+   "execution_count": 2
   }
  ],
  "metadata": {