diff --git a/manifest.json b/manifest.json index d863b55..2f3c966 100644 --- a/manifest.json +++ b/manifest.json @@ -1,46 +1,50 @@ { - "name": "parrec-mr-classifier", - "label": "SciTran PAR/REC MR Classifier", - "description": "Extract metadata from PAR/REC MR data generated by Philips MR scanners.", - "maintainer": "Michael Perry ", - "author": "Michael Perry ", - "url": "https://scitran.github.io", - "source": "https://github.com/scitran-apps/parrec-mr-classifier", - "license": "Apache-2.0", - "flywheel": "0", - "version": "2.0.0", - "custom": { - "docker-image": "scitran/parrec-mr-classifier:2.0.0", - "flywheel": { - "suite": "SciTran" - } - }, - "config": { - "timezone": { - "description": "Time Zone to which all timestamps should be localized. This will set the default time zone in the Gear and thus localize the timestamps to that time zone. Examples: 'UTC', 'America/Los_Angeles', 'America/New_York'. [default = 'UTC'].", - "type": "string", - "default": "UTC" - } - }, - "inputs": { - "parrec": { - "base": "file", - "type": { - "enum": [ - "parrec" - ] - }, - "description": "PAR file, -OR- archive (.zip) containing both PAR/REC files. If this is an archive containing both files, the REC file need not be specified below." + "name": "parrec-mr-classifier", + "label": "SciTran PAR/REC MR Classifier", + "description": "Extract metadata from PAR/REC MR data generated by Philips MR scanners.", + "maintainer": "Michael Perry ", + "author": "Michael Perry ", + "url": "https://scitran.github.io", + "source": "https://github.com/scitran-apps/parrec-mr-classifier", + "license": "Apache-2.0", + "flywheel": "0", + "version": "2.0.1", + "custom": { + "docker-image": "scitran/parrec-mr-classifier:2.0.1", + "gear-builder": { + "category": "converter", + "image": "scitran/parrec-mr-classifier:2.0.1" + }, + "flywheel": { + "suite": "SciTran" + } + }, + "config": { + "timezone": { + "description": "Time Zone to which all timestamps should be localized. This will set the default time zone in the Gear and thus localize the timestamps to that time zone. Examples: 'UTC', 'America/Los_Angeles', 'America/New_York'. [default = 'UTC'].", + "type": "string", + "default": "UTC" + } }, - "rec": { - "base": "file", - "optional": true, - "type": { - "enum": [ - "parrec" - ] - }, - "description": "REC file. -OPTIONAL- This file should be provided -IF- an archive, consisting of both a PAR and REC file was not chosen as the parrec input above. -NOTE- If a REC file is not provided here the Gear will attempt to parse only the PAR file." + "inputs": { + "parrec": { + "base": "file", + "type": { + "enum": [ + "parrec" + ] + }, + "description": "PAR file, -OR- archive (.zip) containing both PAR/REC files. If this is an archive containing both files, the REC file need not be specified below." + }, + "rec": { + "base": "file", + "optional": true, + "type": { + "enum": [ + "parrec" + ] + }, + "description": "REC file. -OPTIONAL- This file should be provided -IF- an archive, consisting of both a PAR and REC file was not chosen as the parrec input above. -NOTE- If a REC file is not provided here the Gear will attempt to parse only the PAR file." + } } - } -} +} \ No newline at end of file diff --git a/parrec-mr-classifier.py b/parrec-mr-classifier.py index eae7ce3..e8939b9 100755 --- a/parrec-mr-classifier.py +++ b/parrec-mr-classifier.py @@ -1,36 +1,39 @@ #!/usr/bin/env python +import datetime +import json +import logging import os import re -import sys -import json -import pytz -import numpy import shutil import string -import logging +import sys import zipfile -import datetime from glob import glob from pprint import pprint as pp + +import numpy +import pytz from nibabel import parrec + import classification_from_label logging.basicConfig() log = logging.getLogger('parrec-mr-classifier') + def assign_type(s): """ Sets the type of a given input. """ if type(s) == list or type(s) == numpy.ndarray: try: - return [ int(x) for x in s ] + return [int(x) for x in s] except ValueError: try: - return [ float(x) for x in s ] + return [float(x) for x in s] except ValueError: - return [ format_string(x) for x in s if len(x) > 0 ] + return [format_string(x) for x in s if len(x) > 0] else: s = str(s) try: @@ -47,8 +50,10 @@ def format_string(in_string): Sanitize strings for input in the DB. """ # Remove non-ascii characters - formatted = re.sub(r'[^\x00-\x7f]',r'', str(in_string)) # Remove non-ascii characters + # Remove non-ascii characters + formatted = re.sub(r'[^\x00-\x7f]', r'', str(in_string)) formatted = filter(lambda x: x in string.printable, formatted) + formatted = ''.join([alpha_num for alpha_num in formatted]) if len(formatted) == 1 and formatted == '?': formatted = None return formatted @@ -80,18 +85,28 @@ def parrec_classify(input_file_path, output_dir, timezone): # rec_dir = '/flywheel/v0/input/rec' rec_file = glob('/flywheel/v0/input/rec/*REC') if len(rec_file) == 1 and os.path.isfile(rec_file[0]): - shutil.copyfile(rec_file[0], os.path.join(os.path.dirname(par_file), os.path.basename(rec_file[0]))) + shutil.copyfile(rec_file[0], os.path.join( + os.path.dirname(par_file), os.path.basename(rec_file[0]))) rec_file = glob(os.path.join(os.path.dirname(par_file), '*REC')) else: - log.warning('No REC corresponding REC file could be found! Attempting to continue!') + log.warning( + 'No REC corresponding REC file could be found! ' + 'Attempting to continue!' + ) shutil.copyfile(par_file, par_file.replace('PAR', 'REC')) # Load the par file and parse the header if par_file: try: - par = parrec.load(par_file, permit_truncated=True) - par_file_header = par.get_header() - par_header_info = par_file_header.general_info + par_header_info, image_defs = parrec.parse_PAR_header( + open(par_file, 'r')) + par_file_header = parrec.PARRECHeader(par_header_info, image_defs) + # NOTE: The below commented lines faile when more than one image + # Orientation is present, + # nibabel.parrec.PARRECError: Varying slice orientation in image sequence ([2 2 2 3 3 3 1 1 1]). This is not suppported. + # par = parrec.load(par_file, permit_truncated=True) + # par_file_header = par.get_header() + # par_header_info = par_file_header.general_info except: log.error('Failed to load ' + os.path.basename(par_file)) sys.exit(1) @@ -104,7 +119,7 @@ def parrec_classify(input_file_path, output_dir, timezone): # Allowed types types = [list, float, int, str] - for k,v in par_header_info.iteritems(): + for k, v in par_header_info.items(): value = assign_type(v) if value and type(value) in types: # Put the value in the header @@ -113,14 +128,14 @@ def parrec_classify(input_file_path, output_dir, timezone): log.debug('Excluding ' + k) log.info('done') - ########################################################################### # Build metadata metadata = {} # Session metadata metadata['session'] = {} - metadata['session']['timestamp'] = pytz.timezone(args.timezone).localize(datetime.datetime.strptime(par_header_info['exam_date'], '%Y.%m.%d / %H:%M:%S')).isoformat() + metadata['session']['timestamp'] = pytz.timezone(args.timezone).localize( + datetime.datetime.strptime(par_header_info['exam_date'], '%Y.%m.%d / %H:%M:%S')).isoformat() # Subject Metadata metadata['session']['subject'] = {} @@ -136,23 +151,25 @@ def parrec_classify(input_file_path, output_dir, timezone): metadata['acquisition'] = {} metadata['acquisition']['instrument'] = 'MR' metadata['acquisition']['label'] = par_header_info['protocol_name'] - metadata['acquisition']['timestamp'] = pytz.timezone(args.timezone).localize(datetime.datetime.strptime(par_header_info['exam_date'], '%Y.%m.%d / %H:%M:%S')).isoformat() + metadata['acquisition']['timestamp'] = pytz.timezone(args.timezone).localize( + datetime.datetime.strptime(par_header_info['exam_date'], '%Y.%m.%d / %H:%M:%S')).isoformat() # File metadata parrec_file = {} parrec_file['name'] = os.path.basename(input_file_path) parrec_file['modality'] = 'MR' - parrec_file['classification'] = classification_from_label.infer_classification(par_header_info['protocol_name']) + parrec_file['classification'] = classification_from_label.infer_classification( + par_header_info['protocol_name']) parrec_file['info'] = {} # File metadata extracted from PAR file header if header: parrec_file['info'] = header # Check for diffusion measurement - if header.has_key('diffusion') and header['diffusion']: + if header.get('diffusion') and header['diffusion']: log.info('Detected diffusion data - overriding.') parrec_file['classification'] = {'Intent': ['Structural'], - 'Measurement': ['Diffusion']} + 'Measurement': ['Diffusion']} # Append the parrec_file to the files array metadata['acquisition']['files'] = [] @@ -169,7 +186,7 @@ def parrec_classify(input_file_path, output_dir, timezone): os.remove(rec_file[0]) # Write out the metadata to file (.metadata.json) - metafile_outname = os.path.join(output_dir,'.metadata.json') + metafile_outname = os.path.join(output_dir, '.metadata.json') with open(metafile_outname, 'w') as metafile: json.dump(metadata, metafile) pp(metadata) @@ -184,16 +201,19 @@ def parrec_classify(input_file_path, output_dir, timezone): import argparse ap = argparse.ArgumentParser() ap.add_argument('parrec_input_file', help='path to par file or parrec zip') - ap.add_argument('output_dir', default= '/flywheel/v0/output', help='output directory') + ap.add_argument('output_dir', default='/flywheel/v0/output', + help='output directory') ap.add_argument('--log_level', help='logging level', default='info') - ap.add_argument('-z', '--timezone', help='instrument timezone [system timezone]', default='UTC') + ap.add_argument('-z', '--timezone', + help='instrument timezone [system timezone]', default='UTC') args = ap.parse_args() log.setLevel(getattr(logging, args.log_level.upper())) logging.getLogger('parrec-mr-classifier').setLevel(logging.INFO) log.info('start: %s' % datetime.datetime.utcnow()) - metadatafile = parrec_classify(args.parrec_input_file, args.output_dir, args.timezone) + metadatafile = parrec_classify( + args.parrec_input_file, args.output_dir, args.timezone) if os.path.exists(metadatafile): log.info('generated %s' % metadatafile)