diff --git a/README.md b/README.md index b7ff7401f..48d4074bd 100644 --- a/README.md +++ b/README.md @@ -188,12 +188,13 @@ TABLE BELOW IS GENERATED AUTOMATICALLY. DO NOT EDIT DIRECTLY. --> -| name | description | datatypes | suffixes | link to full data | maintained by | -|:------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:-----------------------------|:--------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------|:---------------------------------------------| -| [eeg_ds003645s_hed_demo](https://github.com/bids-standard/bids-examples/tree/master/eeg_ds003645s_hed_demo) | Shows usage of Hierarchical Event Descriptor (HED) in .tsv files | anat, beh, eeg, micr, motion | KSSSleep, SPIM, beh, channels, coordsystem, defacemask, eeg, electrodes, events, headshape, motion, photo, samples, scans | [link](https://openneuro.org/datasets/ds003645) | [@VisLab](https://github.com/VisLab) | -| [micr_SEM](https://github.com/bids-standard/bids-examples/tree/master/micr_SEM) | Example SEM dataset in PNG format with 1 sample imaged over 2 sessions | micr | SEM, photo, samples, sessions | [link](https://doi.org/10.5281/zenodo.5498378) | [@jcohenadad](https://github.com/jcohenadad) | -| [micr_SEMzarr](https://github.com/bids-standard/bids-examples/tree/master/micr_SEMzarr) | Example SEM dataset in PNG and OME-ZARR format with 1 sample imaged over 2 sessions | micr | SEM, SPIM, samples, sessions | n/a | [@TheChymera](https://github.com/TheChymera) | -| [micr_SPIM](https://github.com/bids-standard/bids-examples/tree/master/micr_SPIM) | Example SPIM dataset in OME-TIFF format with 2 samples from the same subject with 4 chunks each | micr | SPIM, photo, samples | [link](https://doi.org/10.5281/zenodo.5517223) | [@jcohenadad](https://github.com/jcohenadad) | +| name | description | datatypes | suffixes | link to full data | maintained by | +|:------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------|:-----------------------------|:--------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------|:---------------------------------------------| +| [eeg_ds003645s_hed_demo](https://github.com/bids-standard/bids-examples/tree/master/eeg_ds003645s_hed_demo) | Shows usage of Hierarchical Event Descriptor (HED) in .tsv files | anat, beh, eeg, micr, motion | KSSSleep, SPIM, beh, channels, coordsystem, defacemask, eeg, electrodes, events, headshape, motion, photo, samples, scans | [link](https://openneuro.org/datasets/ds003645) | [@VisLab](https://github.com/VisLab) | +| [micr_SEM](https://github.com/bids-standard/bids-examples/tree/master/micr_SEM) | Example SEM dataset in PNG format with 1 sample imaged over 2 sessions | micr | SEM, photo, samples, sessions | [link](https://doi.org/10.5281/zenodo.5498378) | [@jcohenadad](https://github.com/jcohenadad) | +| [micr_SEMzarr](https://github.com/bids-standard/bids-examples/tree/master/micr_SEMzarr) | Example SEM dataset in PNG and OME-ZARR format with 1 sample imaged over 2 sessions | micr | SEM, SPIM, samples, sessions | n/a | [@TheChymera](https://github.com/TheChymera) | +| [micr_SPIM](https://github.com/bids-standard/bids-examples/tree/master/micr_SPIM) | Example SPIM dataset in OME-TIFF format with 2 samples from the same subject with 4 chunks each | micr | SPIM, photo, samples | [link](https://doi.org/10.5281/zenodo.5517223) | [@jcohenadad](https://github.com/jcohenadad) | +| [micr_XPCTzarr](https://github.com/bids-standard/bids-examples/tree/master/micr_XPCTzarr) | Example XPCT dataset in OME-ZARR format with 1 sample imaged | micr | XPCT, photo, samples, sessions | [link](https://human-organ-atlas.esrf.eu/datasets/572252538) | [@chourroutm](https://github.com/chourroutm) | ### Motion diff --git a/dataset_listing.tsv b/dataset_listing.tsv index 8a512e609..a8ef30074 100644 --- a/dataset_listing.tsv +++ b/dataset_listing.tsv @@ -59,6 +59,7 @@ pet005 T1w, PET [@mnoergaard](https://github.com/mnoergaard) anat, pet T1w, eve micr_SEM Example SEM dataset in PNG format with 1 sample imaged over 2 sessions [link](https://doi.org/10.5281/zenodo.5498378) [@jcohenadad](https://github.com/jcohenadad) micr SEM, photo, samples, sessions micr_SEMzarr Example SEM dataset in PNG and OME-ZARR format with 1 sample imaged over 2 sessions [@TheChymera](https://github.com/TheChymera) micr SEM, SPIM, samples, sessions micr_SPIM Example SPIM dataset in OME-TIFF format with 2 samples from the same subject with 4 chunks each [link](https://doi.org/10.5281/zenodo.5517223) [@jcohenadad](https://github.com/jcohenadad) micr SPIM, photo, samples +micr_XPCTzarr Example XPCT dataset in OME-ZARR format with 1 sample imaged [link](https://human-organ-atlas.esrf.eu/datasets/572252538) [@chourroutm](https://github.com/chourroutm) micr XPCT, photo, samples, sessions fnirs_tapping Example fNIRS measurement with three conditions from five subjects [link](https://doi.org/10.5281/zenodo.5529797) [@rob-luke](https://github.com/rob-luke) nirs channels, coordsystem, events, nirs, optodes, scans fnirs_automaticity 24 subjects performing (non-)automatic finger tapping and foot stepping [link](https://doi.org/10.34973/vesb-mh30) [@robertoostenveld](https://github.com/robertoostenveld) nirs channels, coordsystem, events, nirs, optodes, practicelogbook, scans motion_systemvalidation Example dataset of two different motion captured system recorded almost simultaneously, but no brain data [link](https://doi.org/10.6084/m9.figshare.20238006.v2) [@JuliusWelzel](https://github.com/JuliusWelzel) motion channels, motion, scans diff --git a/genetics_ukbb/.bids-validator-config.json b/genetics_ukbb/.bids-validator-config.json deleted file mode 100644 index 4448b724a..000000000 --- a/genetics_ukbb/.bids-validator-config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "ignore": [99, 15, 66] -} diff --git a/bidsconfig.json b/legacyconfig.json similarity index 100% rename from bidsconfig.json rename to legacyconfig.json diff --git a/micr_XPCTzarr/README.md b/micr_XPCTzarr/README.md new file mode 100644 index 000000000..7f439742a --- /dev/null +++ b/micr_XPCTzarr/README.md @@ -0,0 +1,3 @@ +1 human brain sample imaged over 1 session. +Example dataset with empty images containing X-ray phase-contrast tomography (XPCT) data to illustrate BIDS convention. +Using NGFF nested directory format example diff --git a/micr_XPCTzarr/code/data_retrieval.py b/micr_XPCTzarr/code/data_retrieval.py new file mode 100644 index 000000000..b3974a3d5 --- /dev/null +++ b/micr_XPCTzarr/code/data_retrieval.py @@ -0,0 +1,77 @@ +from pathlib import Path +import zarr.convenience +import dask.array +import ome_zarr, ome_zarr.io, ome_zarr.writer +from numcodecs import Blosc, Delta +import time + +""" +You may use the following command to prepare a Python 3.8+ environment for the download of the dataset: `pip install -r data_retrieval_reqs.txt` +""" + +data_path = Path("../sub-LADAF-2020-31/ses-01/micr/") # path to session +data_path.mkdir(exist_ok=True) # create directory + +# short utility function +def zarr_array(url,selector): + print('Retrieving data from', url) + n5_store = zarr.N5FSStore(url) + root = zarr.group(store=n5_store) + return dask.array.from_zarr(root[selector]) + +dataset_full = zarr_array("gs://ucl-hip-ct-35a68e99feaae8932b1d44da0358940b/LADAF-2020-31/brain/25.08um_complete-organ_bm05/","s0") # get a dask.array.Array that points to the whole N5 dataset + +# default filters and compressors made the script crash; the ones below work: +filters = [Delta(dtype='i4')] +compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE) + +path_roi = data_path / "sub-LADAF-2020-31_ses-01_sample-brain_XPCT.ome.zarr" # full name of the dataset following BIDS specification + +print('Writing in', str(path_roi)) + +tic = time.time() + +store = ome_zarr.io.parse_url(path_roi,mode="a").store # NB: `mode="a"` should allow overwrite but it does not at the moment, see https://github.com/ome/ome-zarr-py/issues/376 +root = zarr.group(store=store) +ome_zarr.writer.write_image(image=dataset_full, + group=root, + scaler=None, + axes=[ + { + "name": "z", + "type": "space", + "units": "micrometer" # voxel size is an isotropic 25.08 um + }, + { + "name": "y", + "type": "space", + "units": "micrometer" # voxel size is an isotropic 25.08 um + }, + { + "name": "x", + "type": "space", + "units": "micrometer" # voxel size is an isotropic 25.08 um + } + ], # axis order of the dataset following BIDS specification + coordinate_transformations=[ + [ + { + "scale": [ + 25.08, + 25.08, + 25.08 + ], # voxel size is an isotropic 25.08 um + "type": "scale" + } + ] + ], + storage_options=dict( + chunks=(512,512,512), # this chunk size may be altered depending on someone's needs + filters=filters, # default filters made the script crash + compressor=compressor # default compressors made the script crash + ) + ) + +toc = time.time() + +print('Writing completed in', (toc - tic)/60, 'min!') \ No newline at end of file diff --git a/micr_XPCTzarr/code/data_retrieval_reqs.txt b/micr_XPCTzarr/code/data_retrieval_reqs.txt new file mode 100644 index 000000000..cc664fe79 --- /dev/null +++ b/micr_XPCTzarr/code/data_retrieval_reqs.txt @@ -0,0 +1,5 @@ +dask[complete] +zarr +ome-zarr +numcodecs +gcsfs \ No newline at end of file diff --git a/micr_XPCTzarr/code/metadata_formatter.py b/micr_XPCTzarr/code/metadata_formatter.py new file mode 100644 index 000000000..4406085eb --- /dev/null +++ b/micr_XPCTzarr/code/metadata_formatter.py @@ -0,0 +1,39 @@ +import urllib.request +import json + +""" +You may use the following command to prepare a Python 3.8+ environment for the download of the dataset: `pip install -r metadata_formatter_reqs.txt` +""" + +# The following metadata file can be downloaded from https://human-organ-atlas.esrf.eu/datasets/572252538 +metadata_txt_file_uri = 'https://ids.esrf.fr/ids/getData?sessionId=182d0a3b-de3b-4602-8caf-9bd91dc5b0e5&datafileIds=572252539' # This URI expires quite often + +req = urllib.request.urlopen(metadata_txt_file_uri) + +json_dict = {} + +for line in urllib.request.urlopen(metadata_txt_file_uri): + text = line.decode('utf-8') + if text.startswith('#') or text == '\r\n': + pass + else: + s = text.replace('\t','').replace('\r\n','').replace('"', '').replace('N.A.', 'n/a').split('=') + + try: + s[1] = int(s[1]) + except: + try: + s[1] = float(s[1]) + except: + pass + json_dict[s[0]] = s[1] +json_obj = json.dumps(json_dict) +print(json_obj) + +""" +The metadata has been split into several files: + - micr_XPCTzarr/samples.json + - micr_XPCTzarr/sub-LADAF-2020-31/sub-LADAF-2020-31_sessions.tsv + - micr_XPCTzarr/sub-LADAF-2020-31/ses-01/micr/sub-LADAF-2020-31_ses-01_sample-brain_XPCT.json +Note that some fields are redundant. +""" \ No newline at end of file diff --git a/micr_XPCTzarr/code/metadata_formatter_reqs.txt b/micr_XPCTzarr/code/metadata_formatter_reqs.txt new file mode 100644 index 000000000..81d8a95c0 --- /dev/null +++ b/micr_XPCTzarr/code/metadata_formatter_reqs.txt @@ -0,0 +1 @@ +urllib3 \ No newline at end of file diff --git a/micr_XPCTzarr/code/truncate_data.sh b/micr_XPCTzarr/code/truncate_data.sh new file mode 100644 index 000000000..f3a9b65e9 --- /dev/null +++ b/micr_XPCTzarr/code/truncate_data.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Bash script to truncate all the files containing the binary chunked data as per ../../CONTRIBUTING.md +# Modified from ../../CONTRIBUTING.md + +find ../sub-LADAF-2020-31/ses-01/micr/sub-LADAF-2020-31_ses-01_sample-brain_XPCT.ome.zarr/ -type f -regex ".*/[0-9]*" -exec truncate -s 0 {} + diff --git a/micr_XPCTzarr/dataset_description.json b/micr_XPCTzarr/dataset_description.json new file mode 100644 index 000000000..af2719573 --- /dev/null +++ b/micr_XPCTzarr/dataset_description.json @@ -0,0 +1,15 @@ +{ + "Name": "micr_XPCTzarr", + "Authors": [ + "Matthieu Chourrout", + "David Stansby", + "Guillaume Gaisne", + "Claire L. Walsh", + "Peter D. Lee" + ], + "BIDSVersion": "1.10.0", + "License": "CC-BY-4.0", + "DatasetDOI": "doi:10.15151/ESRF-DC-572252655", + "DatasetType": "raw", + "HowToAcknowledge": "Please cite this dataset as: Tafforeau, P., Walsh, C., Wagner, W. L., Daniyal J. Jafree, Bellier, A., Werlein, C., Kühnel, M. P., Boller, E., Walker-Samuel, S., Robertus, J. L., Long, D. A., Jacob, J., Marussi, S., Eeline Brown, Holroyd, N., Jonigk, D. D., Ackermann, M., & Lee, P. D. (2021). Complete brain from the body donor LADAF-2020-31 (Version 1) [dataset]. European Synchrotron Radiation Facility. https://doi.org/10.15151/ESRF-DC-572252655" +} diff --git a/micr_XPCTzarr/participants.json b/micr_XPCTzarr/participants.json new file mode 100644 index 000000000..7a0848c2a --- /dev/null +++ b/micr_XPCTzarr/participants.json @@ -0,0 +1,36 @@ +{ + "participant_id": { + "Description": "Unique alphanumeric participant ID starting with sub-" + }, + "participant_name": { + "Description": "full name or pseudo of the participant which can contain non-alphanumeric characters" + }, + "sex": { + "Description": "sex of the participant", + "Levels": { + "M": "male", + "F": "female" + } + }, + "age": { + "Description": "age of the participant", + "Units": "year" + }, + "weight": { + "Description": "weight of the participant", + "Units": "kg" + }, + "height": { + "Description": "height of the participant", + "Units": "cm" + }, + "species": { + "Description": "binomial species name from the NCBI Taxonomy (https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi)" + }, + "medical_information": { + "Description": "medical information as provided by the biobank" + }, + "institute": { + "Description": "name of the biobank" + } +} diff --git a/micr_XPCTzarr/participants.tsv b/micr_XPCTzarr/participants.tsv new file mode 100644 index 000000000..14072b57f --- /dev/null +++ b/micr_XPCTzarr/participants.tsv @@ -0,0 +1,2 @@ +participant_id participant_name sex age species weight height institute medical_information +sub-01 LADAF-2020-31 F 69 homo sapiens 40 145 Laboratoire d'Anatomie des Alpes Françaises type 2 diabetes, pelvic radiation to treat cancer of the uterus, right colectomy (benign lesion on histopathology), bilateral nephrostomy for acute obstructive renal failure, cystectomy, omentectomy and peritoneal carcinoma with occlusive syndrome diff --git a/micr_XPCTzarr/samples.json b/micr_XPCTzarr/samples.json new file mode 100644 index 000000000..3da08f32e --- /dev/null +++ b/micr_XPCTzarr/samples.json @@ -0,0 +1,17 @@ +{ + "sample_id": { + "Description": "Sample ID" + }, + "participant_id": { + "Description": "Participant ID from whom tissue samples have been acquired" + }, + "sample_type": { + "Description": "Type of sample from ENCODE Biosample Type (https://www.encodeproject.org/profiles/biosample_type)" + }, + "sample_info": { + "Description": "One-line title of the sample" + }, + "sample_preparation": { + "Description": "Specific preparation of the sample" + } +} diff --git a/micr_XPCTzarr/samples.tsv b/micr_XPCTzarr/samples.tsv new file mode 100644 index 000000000..097b300ea --- /dev/null +++ b/micr_XPCTzarr/samples.tsv @@ -0,0 +1,2 @@ +sample_id participant_id sample_type sample_info sample_preparation +sample-brain sub-01 tissue complete brain from the body donor program of the Laboratoire d'Anatomie des Alpes Francaise (LADAF) formalin fixed, progressive transfer to ethanol 70% with gentle vacuum degassing at each step, mounted with mixed agar gel at 70% ethanol, n.b. some damages due to the too rapid vacuum degassing diff --git a/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.json b/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.json new file mode 100644 index 000000000..7f5456f4a --- /dev/null +++ b/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.json @@ -0,0 +1,57 @@ +{ + "InstitutionName": "European Synchrotron Radiation Facility", + "StationName": "BM05 EBS dipole wiggler 0.85T", + "BodyPart": "BRAIN", + "SampleEnvironment": "ex vivo", + "SampleFixation": "formalin", + "SampleEmbedding": "mixed agar gel at 70% ethanol", + "PixelSize": [ + 25.08, + 25.08, + 25.08 + ], + "PixelSizeUnits": "um", + "AcquisitionParameters": { + "XStep": "n/a", + "XStages": 1, + "YStep": "n/a", + "YStages": 1, + "ZStep ": "2.2 mm", + "ZStages ": "2x79", + "Projections": 9990, + "RefN": "n/a", + "DarkN": 400, + "RefOn": "n/a", + "ScanningMode ": "continuous", + "ExposureTime": "0.036 s", + "AccExposureTime ": "0.006 s", + "AccFramesCount": 6, + "PropDistance": "3475 mm", + "Filters": "Mo 0.1mm Al 2mm SiO2 bars 12*5mm diameter", + "DetAvgEnergy": "93 keV", + "ScanGeometry": "quarter-acquisition, one scan in half-acquisition plus one annular scan", + "ScanRange": "360 deg", + "SensorName": "sCMOS PCO edge 4.2 CLHS", + "SensorMode": "rolling shutter", + "SensorPixelSize": "6.5 um", + "Magnification": 0.24, + "XPixelNum": 2048, + "YPixelNum": 176, + "OpticsType": "dzoom optic from BM05 based on Hasselblad 120mm F/4 macro objective", + "Scintillator": "LuAG:Ce 2000 um", + "SurfDoseRate": "10.5 Gy/s", + "VoiDoseRate": "10.5 Gy/s", + "VoiIntegDose": "2.48 kGy", + "ScanTime": "7.88 min", + "SeriesTime": "22 h" + }, + "ProcessingParameters": { + "RefApproach": "reference jar with 70% ethanol, single reference", + "VolumeX": 5965, + "VolumeY": 5965, + "VolumeZ": 6990, + "32to16BitsMin": -0.04, + "32to16BitsMax": 0.1, + "Jp2ComprRatio": 10 + } +} diff --git a/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zattrs b/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zattrs new file mode 100644 index 000000000..915eace09 --- /dev/null +++ b/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zattrs @@ -0,0 +1,40 @@ +{ + "multiscales": [ + { + "axes": [ + { + "name": "z", + "type": "space", + "units": "micrometer" + }, + { + "name": "y", + "type": "space", + "units": "micrometer" + }, + { + "name": "x", + "type": "space", + "units": "micrometer" + } + ], + "datasets": [ + { + "coordinateTransformations": [ + { + "scale": [ + 25.08, + 25.08, + 25.08 + ], + "type": "scale" + } + ], + "path": "0" + } + ], + "name": "/", + "version": "0.4" + } + ] +} \ No newline at end of file diff --git a/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zgroup b/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zgroup new file mode 100644 index 000000000..3b7daf227 --- /dev/null +++ b/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} \ No newline at end of file diff --git a/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/0/.zarray b/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/0/.zarray new file mode 100644 index 000000000..730a837ac --- /dev/null +++ b/micr_XPCTzarr/sub-01/ses-01/micr/sub-01_ses-01_sample-brain_XPCT.ome.zarr/0/.zarray @@ -0,0 +1,25 @@ +{ + "chunks": [ + 512, + 512, + 512 + ], + "compressor": { + "blocksize": 0, + "clevel": 1, + "cname": "zstd", + "id": "blosc", + "shuffle": 1 + }, + "dimension_separator": "/", + "dtype": "/dev/null \ - | jq '(.issues | map(select(.severity == "error" and .key != "EMPTY_FILE"))) | map(.files_1 = (.files | if length > 0 then .[0:1] else empty end) | del(.files)) | if length > 0 then . else empty end' \ - ) - if [ -n "$errors" ]; then - echo -e "$errors" | sed -e 's,^, ,g' - failed+=" $i" - fi - fi + $CMD || failed+=" $i" done if [ -n "$failed" ]; then echo "Datasets failed validation: $failed" diff --git a/schemaconfig.json b/schemaconfig.json new file mode 100644 index 000000000..d20e3569d --- /dev/null +++ b/schemaconfig.json @@ -0,0 +1,5 @@ +{ + "ignore": [ + { "code": "EMPTY_FILE" } + ] +}