Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

address error, simple formatting #6

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 47 additions & 43 deletions manifest.json
Original file line number Diff line number Diff line change
@@ -1,46 +1,50 @@
{
"name": "parrec-mr-classifier",
"label": "SciTran PAR/REC MR Classifier",
"description": "Extract metadata from PAR/REC MR data generated by Philips MR scanners.",
"maintainer": "Michael Perry <[email protected]>",
"author": "Michael Perry <[email protected]>",
"url": "https://scitran.github.io",
"source": "https://github.com/scitran-apps/parrec-mr-classifier",
"license": "Apache-2.0",
"flywheel": "0",
"version": "2.0.0",
"custom": {
"docker-image": "scitran/parrec-mr-classifier:2.0.0",
"flywheel": {
"suite": "SciTran"
}
},
"config": {
"timezone": {
"description": "Time Zone to which all timestamps should be localized. This will set the default time zone in the Gear and thus localize the timestamps to that time zone. Examples: 'UTC', 'America/Los_Angeles', 'America/New_York'. [default = 'UTC'].",
"type": "string",
"default": "UTC"
}
},
"inputs": {
"parrec": {
"base": "file",
"type": {
"enum": [
"parrec"
]
},
"description": "PAR file, -OR- archive (.zip) containing both PAR/REC files. If this is an archive containing both files, the REC file need not be specified below."
"name": "parrec-mr-classifier",
"label": "SciTran PAR/REC MR Classifier",
"description": "Extract metadata from PAR/REC MR data generated by Philips MR scanners.",
"maintainer": "Michael Perry <[email protected]>",
"author": "Michael Perry <[email protected]>",
"url": "https://scitran.github.io",
"source": "https://github.com/scitran-apps/parrec-mr-classifier",
"license": "Apache-2.0",
"flywheel": "0",
"version": "2.0.1",
"custom": {
"docker-image": "scitran/parrec-mr-classifier:2.0.1",
"gear-builder": {
"category": "converter",
"image": "scitran/parrec-mr-classifier:2.0.1"
},
"flywheel": {
"suite": "SciTran"
}
},
"config": {
"timezone": {
"description": "Time Zone to which all timestamps should be localized. This will set the default time zone in the Gear and thus localize the timestamps to that time zone. Examples: 'UTC', 'America/Los_Angeles', 'America/New_York'. [default = 'UTC'].",
"type": "string",
"default": "UTC"
}
},
"rec": {
"base": "file",
"optional": true,
"type": {
"enum": [
"parrec"
]
},
"description": "REC file. -OPTIONAL- This file should be provided -IF- an archive, consisting of both a PAR and REC file was not chosen as the parrec input above. -NOTE- If a REC file is not provided here the Gear will attempt to parse only the PAR file."
"inputs": {
"parrec": {
"base": "file",
"type": {
"enum": [
"parrec"
]
},
"description": "PAR file, -OR- archive (.zip) containing both PAR/REC files. If this is an archive containing both files, the REC file need not be specified below."
},
"rec": {
"base": "file",
"optional": true,
"type": {
"enum": [
"parrec"
]
},
"description": "REC file. -OPTIONAL- This file should be provided -IF- an archive, consisting of both a PAR and REC file was not chosen as the parrec input above. -NOTE- If a REC file is not provided here the Gear will attempt to parse only the PAR file."
}
}
}
}
}
72 changes: 46 additions & 26 deletions parrec-mr-classifier.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,39 @@
#!/usr/bin/env python

import datetime
import json
import logging
import os
import re
import sys
import json
import pytz
import numpy
import shutil
import string
import logging
import sys
import zipfile
import datetime
from glob import glob
from pprint import pprint as pp

import numpy
import pytz
from nibabel import parrec

import classification_from_label

logging.basicConfig()
log = logging.getLogger('parrec-mr-classifier')


def assign_type(s):
"""
Sets the type of a given input.
"""
if type(s) == list or type(s) == numpy.ndarray:
try:
return [ int(x) for x in s ]
return [int(x) for x in s]
except ValueError:
try:
return [ float(x) for x in s ]
return [float(x) for x in s]
except ValueError:
return [ format_string(x) for x in s if len(x) > 0 ]
return [format_string(x) for x in s if len(x) > 0]
else:
s = str(s)
try:
Expand All @@ -47,8 +50,10 @@ def format_string(in_string):
Sanitize strings for input in the DB.
"""
# Remove non-ascii characters
formatted = re.sub(r'[^\x00-\x7f]',r'', str(in_string)) # Remove non-ascii characters
# Remove non-ascii characters
formatted = re.sub(r'[^\x00-\x7f]', r'', str(in_string))
formatted = filter(lambda x: x in string.printable, formatted)
formatted = ''.join([alpha_num for alpha_num in formatted])
if len(formatted) == 1 and formatted == '?':
formatted = None
return formatted
Expand Down Expand Up @@ -80,18 +85,28 @@ def parrec_classify(input_file_path, output_dir, timezone):
# rec_dir = '/flywheel/v0/input/rec'
rec_file = glob('/flywheel/v0/input/rec/*REC')
if len(rec_file) == 1 and os.path.isfile(rec_file[0]):
shutil.copyfile(rec_file[0], os.path.join(os.path.dirname(par_file), os.path.basename(rec_file[0])))
shutil.copyfile(rec_file[0], os.path.join(
os.path.dirname(par_file), os.path.basename(rec_file[0])))
rec_file = glob(os.path.join(os.path.dirname(par_file), '*REC'))
else:
log.warning('No REC corresponding REC file could be found! Attempting to continue!')
log.warning(
'No REC corresponding REC file could be found! '
'Attempting to continue!'
)
shutil.copyfile(par_file, par_file.replace('PAR', 'REC'))

# Load the par file and parse the header
if par_file:
try:
par = parrec.load(par_file, permit_truncated=True)
par_file_header = par.get_header()
par_header_info = par_file_header.general_info
par_header_info, image_defs = parrec.parse_PAR_header(
open(par_file, 'r'))
par_file_header = parrec.PARRECHeader(par_header_info, image_defs)
# NOTE: The below commented lines faile when more than one image
# Orientation is present,
# nibabel.parrec.PARRECError: Varying slice orientation in image sequence ([2 2 2 3 3 3 1 1 1]). This is not suppported.
# par = parrec.load(par_file, permit_truncated=True)
# par_file_header = par.get_header()
# par_header_info = par_file_header.general_info
except:
log.error('Failed to load ' + os.path.basename(par_file))
sys.exit(1)
Expand All @@ -104,7 +119,7 @@ def parrec_classify(input_file_path, output_dir, timezone):

# Allowed types
types = [list, float, int, str]
for k,v in par_header_info.iteritems():
for k, v in par_header_info.items():
value = assign_type(v)
if value and type(value) in types:
# Put the value in the header
Expand All @@ -113,14 +128,14 @@ def parrec_classify(input_file_path, output_dir, timezone):
log.debug('Excluding ' + k)
log.info('done')


###########################################################################
# Build metadata
metadata = {}

# Session metadata
metadata['session'] = {}
metadata['session']['timestamp'] = pytz.timezone(args.timezone).localize(datetime.datetime.strptime(par_header_info['exam_date'], '%Y.%m.%d / %H:%M:%S')).isoformat()
metadata['session']['timestamp'] = pytz.timezone(args.timezone).localize(
datetime.datetime.strptime(par_header_info['exam_date'], '%Y.%m.%d / %H:%M:%S')).isoformat()

# Subject Metadata
metadata['session']['subject'] = {}
Expand All @@ -136,23 +151,25 @@ def parrec_classify(input_file_path, output_dir, timezone):
metadata['acquisition'] = {}
metadata['acquisition']['instrument'] = 'MR'
metadata['acquisition']['label'] = par_header_info['protocol_name']
metadata['acquisition']['timestamp'] = pytz.timezone(args.timezone).localize(datetime.datetime.strptime(par_header_info['exam_date'], '%Y.%m.%d / %H:%M:%S')).isoformat()
metadata['acquisition']['timestamp'] = pytz.timezone(args.timezone).localize(
datetime.datetime.strptime(par_header_info['exam_date'], '%Y.%m.%d / %H:%M:%S')).isoformat()

# File metadata
parrec_file = {}
parrec_file['name'] = os.path.basename(input_file_path)
parrec_file['modality'] = 'MR'
parrec_file['classification'] = classification_from_label.infer_classification(par_header_info['protocol_name'])
parrec_file['classification'] = classification_from_label.infer_classification(
par_header_info['protocol_name'])
parrec_file['info'] = {}

# File metadata extracted from PAR file header
if header:
parrec_file['info'] = header
# Check for diffusion measurement
if header.has_key('diffusion') and header['diffusion']:
if header.get('diffusion') and header['diffusion']:
log.info('Detected diffusion data - overriding.')
parrec_file['classification'] = {'Intent': ['Structural'],
'Measurement': ['Diffusion']}
'Measurement': ['Diffusion']}

# Append the parrec_file to the files array
metadata['acquisition']['files'] = []
Expand All @@ -169,7 +186,7 @@ def parrec_classify(input_file_path, output_dir, timezone):
os.remove(rec_file[0])

# Write out the metadata to file (.metadata.json)
metafile_outname = os.path.join(output_dir,'.metadata.json')
metafile_outname = os.path.join(output_dir, '.metadata.json')
with open(metafile_outname, 'w') as metafile:
json.dump(metadata, metafile)
pp(metadata)
Expand All @@ -184,16 +201,19 @@ def parrec_classify(input_file_path, output_dir, timezone):
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('parrec_input_file', help='path to par file or parrec zip')
ap.add_argument('output_dir', default= '/flywheel/v0/output', help='output directory')
ap.add_argument('output_dir', default='/flywheel/v0/output',
help='output directory')
ap.add_argument('--log_level', help='logging level', default='info')
ap.add_argument('-z', '--timezone', help='instrument timezone [system timezone]', default='UTC')
ap.add_argument('-z', '--timezone',
help='instrument timezone [system timezone]', default='UTC')
args = ap.parse_args()

log.setLevel(getattr(logging, args.log_level.upper()))
logging.getLogger('parrec-mr-classifier').setLevel(logging.INFO)
log.info('start: %s' % datetime.datetime.utcnow())

metadatafile = parrec_classify(args.parrec_input_file, args.output_dir, args.timezone)
metadatafile = parrec_classify(
args.parrec_input_file, args.output_dir, args.timezone)

if os.path.exists(metadatafile):
log.info('generated %s' % metadatafile)
Expand Down