Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Empty tabular files are now allowed. #1033

Merged
merged 5 commits into from
Oct 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 46 additions & 24 deletions hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,32 +449,54 @@ def get_column_refs(self):
return []

def _open_dataframe_file(self, file, has_column_names, input_type):
pandas_header = 0
if not has_column_names:
pandas_header = None
""" Set the _dataframe property of BaseInput. """
pandas_header = 0 if has_column_names else None

# If file is already a DataFrame
if isinstance(file, pd.DataFrame):
self._dataframe = file.astype(str)
self._has_column_names = self._dataframe_has_names(self._dataframe)
elif not file:
raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file passed to BaseInput.", file)
elif input_type in self.TEXT_EXTENSION:
try:
self._dataframe = pd.read_csv(file, delimiter='\t', header=pandas_header,
dtype=str, keep_default_na=True, na_values=("", "null"))
except Exception as e:
raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, str(e), self.name) from e
# Convert nan values to a known value
self._dataframe = self._dataframe.fillna("n/a")
elif input_type in self.EXCEL_EXTENSION:
try:
self._loaded_workbook = openpyxl.load_workbook(file)
loaded_worksheet = self.get_worksheet(self._worksheet_name)
self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names)
except Exception as e:
raise HedFileError(HedExceptions.GENERIC_ERROR, str(e), self.name) from e
else:
raise HedFileError(HedExceptions.INVALID_EXTENSION, "", file)
return

if self._dataframe.size == 0:
raise HedFileError(HedExceptions.INVALID_DATAFRAME, "Invalid dataframe(malformed datafile, etc)", file)
# Check for empty file or None
if not file:
raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file specification passed to BaseInput.", file)

# Handle Excel file input
if input_type in self.EXCEL_EXTENSION:
self._load_excel_file(file, has_column_names)
return

# Handle unsupported file extensions
if input_type not in self.TEXT_EXTENSION:
raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unsupported file extension for text files.",
self.name)

# Handle text file input (CSV/TSV)
self._load_text_file(file, pandas_header)

def _load_excel_file(self, file, has_column_names):
""" Load an Excel file into a Pandas dataframe"""
try:
self._loaded_workbook = openpyxl.load_workbook(file)
loaded_worksheet = self.get_worksheet(self._worksheet_name)
self._dataframe = self._get_dataframe_from_worksheet(loaded_worksheet, has_column_names)
except Exception as e:
raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, f"Failed to load Excel file: {str(e)}", self.name) from e

def _load_text_file(self, file, pandas_header):
""" Load an text file"""
if isinstance(file, str) and os.path.exists(file) and os.path.getsize(file) == 0:
self._dataframe = pd.DataFrame() # Handle empty file
return

try:
self._dataframe = pd.read_csv(file, delimiter='\t', header=pandas_header, skip_blank_lines=True,
dtype=str, keep_default_na=True, na_values=("", "null"))
# Replace NaN values with a known value
self._dataframe = self._dataframe.fillna("n/a")
except pd.errors.EmptyDataError:
self._dataframe = pd.DataFrame() # Handle case where file has no data
except Exception as e:
raise HedFileError(HedExceptions.INVALID_FILE_FORMAT, f"Failed to load text file: {str(e)}",
self.name) from e
6 changes: 6 additions & 0 deletions tests/data/bids_tests/eeg_ds003645s_empty/CHANGES
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
1.0.0 2021-05-11
- First release
Revision history for Face Recognition experiment by Wakeman-Henson

version 1.0 - April 2021
- Initial release of EEG data in this experiment for HED education purposes
24 changes: 24 additions & 0 deletions tests/data/bids_tests/eeg_ds003645s_empty/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
**Introduction:**
This dataset consists of the MEEG (sMRI+MEG+EEG) portion of the multi-subject, multi-modal face processing dataset (ds000117). This dataset was originally acquired and shared by Daniel Wakeman and Richard Henson (https://pubmed.ncbi.nlm.nih.gov/25977808/). The data has been repackaged in EEGLAB format and has undergone minimal preprocessing as well as reorganization and annotation of the dataset events.

**Overview of the experiment:**
Eighteen participants completed two recording sessions spaced three months apart – one session recorded fMRI and the other simultaneously recorded MEG and EEG data. During each session, participants performed the same simple perceptual task, responding to presented photographs of famous, unfamiliar, and scrambled faces by pressing one of two keyboard keys to indicate a subjective yes or no decision as to the relative spatial symmetry of the viewed face. Famous faces were feature-matched to unfamiliar faces; half the faces were female. The two sessions (MEEG, fMRI) had different organizations of event timing and presentation because of technological requirements of the respective imaging modalities. Each individual face was presented twice during the session. For half of the presented faces, the second presentation followed immediately after the first. For the other half, the second presentation was delayed by 5-15 face presentations.

**Preprocessing:**
The preprocessing, which was performed using the `wh_extracteeg_BIDS.m` located in the code directory, includes the following steps:
* Ignore MRI data except for sMRI.
* Extract EEG channels out of the MEG/EEG fif data
* Add fiducials
* Rename EOG and EKG channels
* Extract events from event channel
* Remove spurious events 5, 6, 7, 13, 14, 15, 17, 18 and 19
* Remove spurious event 24 for subject 3 run 4
* Rename events taking into account button assigned to each subject
* Correct event latencies (events have a shift of 34 ms)
* Resample data to 250 Hz (this step is performed because this dataset is used in a tutorial for EEGLAB and needs to be lightweight)
* Remove event fields `urevent` and `duration`
* Save as EEGLAB .set format

**Data curators:**
Ramon Martinez, Dung Truong, Scott Makeig, Arnaud Delorme (UCSD, La Jolla, CA, USA), Kay Robbins (UTSA, San Antonio, TX, USA)

24 changes: 24 additions & 0 deletions tests/data/bids_tests/eeg_ds003645s_empty/dataset_description.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"Name": "Face processing MEEG dataset with HED annotation",
"BIDSVersion": "1.9.0",
"HEDVersion": "8.2.0",
"License": "CC0",
"Authors": [
"Daniel G. Wakeman",
"Richard N Henson",
"Dung Truong (curation)",
"Kay Robbins (curation)",
"Scott Makeig (curation)",
"Arno Delorme (curation)"
],
"ReferencesAndLinks": [
"Wakeman, D., Henson, R. (2015). A multi-subject, multi-modal human neuroimaging dataset. Sci Data 2, 150001. https://doi.org/10.1038/sdata.2015.1",
"Robbins, K., Truong, D., Appelhoff, S., Delorme, A., & Makeig, S. (2021). Capturing the nature of events and event context using Hierarchical Event Descriptors (HED). In press for NeuroImage Special Issue Practice in MEEG. NeuroImage 245 (2021) 118766. Online: https://www.sciencedirect.com/science/article/pii/S1053811921010387.",
"Robbins, K., Truong, D., Jones, A., Callanan, I., & Makeig, S. (2021). Building FAIR functionality: Annotating events in time series data using Hierarchical Event Descriptors (HED). Neuroinformatics Special Issue Building the NeuroCommons. Neuroinformatics https://doi.org/10.1007/s12021-021-09537-4. Online: https://link.springer.com/article/10.1007/s12021-021-09537-4."
],
"Funding": [
"Experiment was supported by the UK Medical Research Council (MC_A060_5PR10) and Elekta Ltd.",
"Curation was supported by: Army Research Laboratory W911NF-10-2-0022, NIH R01 EB023297-03, NIH R01 NS047293-l4, and NIH R24 MH120037-01."
],
"DatasetDOI": "10.18112/openneuro.ds003645.v1.0.0"
}
17 changes: 17 additions & 0 deletions tests/data/bids_tests/eeg_ds003645s_empty/participants.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"participant_id": {
"LongName": "Participant identifier",
"Description": "Unique subject identifier"
},
"gender": {
"Description": "Sex of the subject",
"Levels": {
"M": "male",
"F": "female"
}
},
"age": {
"Description": "Age of the subject",
"Units": "years"
}
}
3 changes: 3 additions & 0 deletions tests/data/bids_tests/eeg_ds003645s_empty/participants.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
participant_id age gender
sub-002 31 M
sub-003 25 M
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"TaskName": "FacePerception",
"TaskDescription": "Subjects viewed stimuli on a screen during six, 7.5 minute runs. The stimuli were photographs of either a famous face (known to most of British or a scrambled face, and appeared for a random duration between 800 and 1,000 ms. Subjects were instructed to fixate centrally throughout the experiment. To ensure attention to each stimulus, participants were asked to press one of two keys with either their left or right index finger (assignment counter-balanced across participants). Their key-press was based on how symmetric they regarded each image: pressing one or the other key depending whether they thought the image was 'more' or 'less symmetric' than average.",
"InstitutionAddress": "15 Chaucer Road, Cambridge, UK",
"InstitutionName": "MRC Cognition & Brain Sciences Unit",
"EEGReference": "nose",
"EEGGround": "left collar bone",
"SamplingFrequency": 250,
"PowerLineFrequency": 50,
"SoftwareFilters": {
"LowPassFilter": {
"cutoff": "350 (Hz)"
}
},
"EEGPlacementScheme": "extended 10-10% system",
"CapManufacturer": "Easycap",
"EEGChannelCount": 70,
"EOGChannelCount": 2,
"RecordingType": "continuous",
"MiscChannelCount": 309,
"RecordingDuration": 494,
"ECGChannelCount": 0,
"EMGChannelCount": 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
onset duration sample event_type face_type rep_status trial rep_lag value stim_file
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"TaskName": "FacePerception",
"TaskDescription": "Subjects viewed stimuli on a screen during six, 7.5 minute runs. The stimuli were photographs of either a famous face (known to most of British or a scrambled face, and appeared for a random duration between 800 and 1,000 ms. Subjects were instructed to fixate centrally throughout the experiment. To ensure attention to each stimulus, participants were asked to press one of two keys with either their left or right index finger (assignment counter-balanced across participants). Their key-press was based on how symmetric they regarded each image: pressing one or the other key depending whether they thought the image was 'more' or 'less symmetric' than average.",
"InstitutionAddress": "15 Chaucer Road, Cambridge, UK",
"InstitutionName": "MRC Cognition & Brain Sciences Unit",
"EEGReference": "nose",
"EEGGround": "left collar bone",
"SamplingFrequency": 250,
"PowerLineFrequency": 50,
"SoftwareFilters": {
"LowPassFilter": {
"cutoff": "350 (Hz)"
}
},
"EEGPlacementScheme": "extended 10-10% system",
"CapManufacturer": "Easycap",
"EEGChannelCount": 70,
"EOGChannelCount": 2,
"RecordingType": "continuous",
"MiscChannelCount": 309,
"RecordingDuration": 494,
"ECGChannelCount": 0,
"EMGChannelCount": 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"TaskName": "FacePerception",
"TaskDescription": "Subjects viewed stimuli on a screen during six, 7.5 minute runs. The stimuli were photographs of either a famous face (known to most of British or a scrambled face, and appeared for a random duration between 800 and 1,000 ms. Subjects were instructed to fixate centrally throughout the experiment. To ensure attention to each stimulus, participants were asked to press one of two keys with either their left or right index finger (assignment counter-balanced across participants). Their key-press was based on how symmetric they regarded each image: pressing one or the other key depending whether they thought the image was 'more' or 'less symmetric' than average.",
"InstitutionAddress": "15 Chaucer Road, Cambridge, UK",
"InstitutionName": "MRC Cognition & Brain Sciences Unit",
"EEGReference": "nose",
"EEGGround": "left collar bone",
"SamplingFrequency": 250,
"PowerLineFrequency": 50,
"SoftwareFilters": {
"LowPassFilter": {
"cutoff": "350 (Hz)"
}
},
"EEGPlacementScheme": "extended 10-10% system",
"CapManufacturer": "Easycap",
"EEGChannelCount": 70,
"EOGChannelCount": 2,
"RecordingType": "continuous",
"MiscChannelCount": 309,
"RecordingDuration": 494,
"ECGChannelCount": 0,
"EMGChannelCount": 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@


Loading