From 846e43f2569dfad15c630e56ab10d7817c5b824b Mon Sep 17 00:00:00 2001 From: abrooks Date: Fri, 2 Apr 2021 15:26:31 +0100 Subject: [PATCH 1/9] Add 64-bit library directory to PATH --- src/applications/SRAnonTool/CTP_SRAnonTool.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/applications/SRAnonTool/CTP_SRAnonTool.sh b/src/applications/SRAnonTool/CTP_SRAnonTool.sh index 5f929f5da..6ae594f64 100755 --- a/src/applications/SRAnonTool/CTP_SRAnonTool.sh +++ b/src/applications/SRAnonTool/CTP_SRAnonTool.sh @@ -58,7 +58,7 @@ tidy_exit() # Default executable PATHs and Python libraries export PATH=${PATH}:${SMI_ROOT}/bin:${SMI_ROOT}/scripts:${progdir} -export PYTHONPATH=${SMI_ROOT}/lib/python3:${SMI_ROOT}/lib/python3/virtualenvs/semehr/$(hostname -s)/lib/python3.6/site-packages +export PYTHONPATH=${SMI_ROOT}/lib/python3:${SMI_ROOT}/lib/python3/virtualenvs/semehr/$(hostname -s)/lib/python3.6/site-packages:${SMI_ROOT}/lib/python3/virtualenvs/semehr/$(hostname -s)/lib64/python3.6/site-packages # Command line arguments while getopts ${options} var; do From ebecb06323f29947af62798afae720b641c1adcb Mon Sep 17 00:00:00 2001 From: abrooks Date: Fri, 2 Apr 2021 15:27:41 +0100 Subject: [PATCH 2/9] Fix typos --- src/applications/SRAnonTool/CTP_DicomToText.py | 5 ++--- src/applications/SRAnonTool/CTP_XMLToDicom.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/applications/SRAnonTool/CTP_DicomToText.py b/src/applications/SRAnonTool/CTP_DicomToText.py index 1c3a88f44..50ac58db5 100755 --- a/src/applications/SRAnonTool/CTP_DicomToText.py +++ b/src/applications/SRAnonTool/CTP_DicomToText.py @@ -120,7 +120,6 @@ def extract_file(input, output): log_dir = cfg_dict['LoggingOptions']['LogsRoot'] root_dir = cfg_dict['FileSystemOptions']['FileSystemRoot'] - extract_dir = cfg_dict['FileSystemOptions']['ExtractRoot'] # --------------------------------------------------------------------- # Now we know the LogsRoot we can set up logging @@ -144,10 +143,10 @@ def extract_file(input, output): elif mongo_db != {}: # Only DicomFilePath and StudyDate are indexed in MongoDB. # Passing a SOPInstanceUID would be handy but no point if not indexed. - mongodb = Mongo.SmiPyMongoCollection(mongo_host, mongo_user, mongo_host) + mongodb = Mongo.SmiPyMongoCollection(mongo_host, mongo_user, mongo_pass) mongodb.setImageCollection('SR') # If it looks like a date YYYY/MM/DD or YYYYMMDD: - if re.match('^\\s*\\d+/\\d+/\\d+\\s*$|^\\s*\\d{6}\\s*$', args.input): + if re.match('^\\s*\\d+/\\d+/\\d+\\s*$|^\\s*\\d{8}\\s*$', args.input): for mongojson in mongodb.StudyDateToJSONList(args.input): extract_mongojson(mongojson, args.output_dir) # Otherwise assume a DICOM file path diff --git a/src/applications/SRAnonTool/CTP_XMLToDicom.py b/src/applications/SRAnonTool/CTP_XMLToDicom.py index 2300529d1..5339bbb6a 100755 --- a/src/applications/SRAnonTool/CTP_XMLToDicom.py +++ b/src/applications/SRAnonTool/CTP_XMLToDicom.py @@ -80,7 +80,7 @@ xmlroot = xml.etree.ElementTree.parse(args.input_xml).getroot() xmldictlist = Knowtator.annotation_xml_to_dict(xmlroot) if xmldictlist == []: - print('WARNING: empty document in {}'.format(xmlfilename)) + print('WARNING: empty document in {}'.format(args.input_xml)) #for annot in xmldictlist: # print('REMOVE {} from DICOM at {}'.format(annot['text'], annot['start_char'])) From 0a646a80638b7a8a3342f5aad6c5e2ebcef37cc0 Mon Sep 17 00:00:00 2001 From: abrooks Date: Fri, 2 Apr 2021 15:27:59 +0100 Subject: [PATCH 3/9] Remove unnecessary import --- src/applications/SRAnonTool/test/CTP_SRAnonTool_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/applications/SRAnonTool/test/CTP_SRAnonTool_test.py b/src/applications/SRAnonTool/test/CTP_SRAnonTool_test.py index 08528a741..b131ec234 100755 --- a/src/applications/SRAnonTool/test/CTP_SRAnonTool_test.py +++ b/src/applications/SRAnonTool/test/CTP_SRAnonTool_test.py @@ -15,7 +15,7 @@ import re import shutil import sys -from SmiServices import DicomText, Knowtator +from SmiServices import DicomText # Configurable: semehr_dir = '/opt/semehr' From b7749b080fd1dbfc780687cc1924bbf32cdf5de7 Mon Sep 17 00:00:00 2001 From: abrooks Date: Fri, 2 Apr 2021 16:56:07 +0100 Subject: [PATCH 4/9] Use correct decoder for IMAGE types, decode SourceImageSequence, silently ignore types known to contain no text, handle missing NumericValue --- .../Smi_Common_Python/SmiServices/Dicom.py | 24 ++++++++++++++++++- .../SmiServices/StructuredReport.py | 20 +++++++++++++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/common/Smi_Common_Python/SmiServices/Dicom.py b/src/common/Smi_Common_Python/SmiServices/Dicom.py index fbcdfc17b..0c9e11c5b 100644 --- a/src/common/Smi_Common_Python/SmiServices/Dicom.py +++ b/src/common/Smi_Common_Python/SmiServices/Dicom.py @@ -171,6 +171,24 @@ def test_sr_decode_ConceptNameCodeSequence(): assert sr_decode_ConceptNameCodeSequence( [ { 'CodeMeaning': 'cm', 'CodeValue': 'cv' } ] ) == 'cm' +# --------------------------------------------------------------------- +# Decode a SourceImageSequence by returning a string consisting of the +# UID of the referenced image. +# XXX ignores the ReferencedSOPClassUID. +# XXX this is the same as ReferencedSOPSequence above. + +def sr_decode_SourceImageSequence(sis): + assert isinstance(sis, list) + for sis_item in sis: + if has_tag(sis_item, 'ReferencedSOPInstanceUID'): + return tag_val(sis_item, 'ReferencedSOPInstanceUID') + return '' + +def test_sr_decode_SourceImageSequence(): + assert sr_decode_SourceImageSequence([]) == '' + assert sr_decode_SourceImageSequence( [ {'ReferencedSOPClassUID':'1.2', 'ReferencedSOPInstanceUID':'1.2.3.4.5'} ] ) == '1.2.3.4.5' + + # --------------------------------------------------------------------- # Decode a MeasuredValueSequence by returning a string consisting of the # NumericValue inside, and having the short form of the units appended @@ -204,10 +222,14 @@ def sr_decode_MeasuredValueSequence(mvs): num_str = tag_val(mvs_item, 'NumericValue') if has_tag(mvs_item, 'MeasurementUnitsCodeSequence'): units_str = sr_decode_MeasurementUnitsCodeSequence(tag_val(mvs_item, 'MeasurementUnitsCodeSequence')) - return num_str+' '+units_str + # Have to use str() because sometimes the value is missing, i.e. None(!) + return str(num_str)+' '+str(units_str) def test_sr_decode_MeasuredValueSequence(): assert sr_decode_MeasuredValueSequence(None) == '' + assert sr_decode_MeasuredValueSequence( [ { 'NumericValue': None, 'MeasurementUnitsCodeSequence': [ { 'CodeMeaning': 'cm', 'CodeValue': 'mm' } ] } ] ) == 'None mm' + assert sr_decode_MeasuredValueSequence( [ { 'NumericValue': { 'vr':'DS' }, 'MeasurementUnitsCodeSequence': [ { 'CodeMeaning': 'cm', 'CodeValue': 'mm' } ] } ] ) == ' mm' + assert sr_decode_MeasuredValueSequence( [ { 'NumericValue': { 'vr':'DS', 'Value': '23' }, 'MeasurementUnitsCodeSequence': [ { 'CodeMeaning': 'cm', 'CodeValue': 'mm' } ] } ] ) == '23 mm' assert sr_decode_MeasuredValueSequence( [ { 'NumericValue': '23', 'MeasurementUnitsCodeSequence': [ { 'CodeMeaning': 'cm', 'CodeValue': 'mm' } ] } ] ) == '23 mm' diff --git a/src/common/Smi_Common_Python/SmiServices/StructuredReport.py b/src/common/Smi_Common_Python/SmiServices/StructuredReport.py index 23d808fe3..d1b3c9e3f 100644 --- a/src/common/Smi_Common_Python/SmiServices/StructuredReport.py +++ b/src/common/Smi_Common_Python/SmiServices/StructuredReport.py @@ -256,6 +256,7 @@ def sr_key_can_be_ignored(keystr): if keystr == sr_extract_dict['tag']: return True # We cannot definitively decode private tags, BUT some contain information which is not anywhere else, even person names! + # XXX TODO: add the ones we know (study description, hospital name, etc) if '-PrivateCreator' in keystr: return True if '-Unknown' in keystr: @@ -305,6 +306,8 @@ def _SR_output_string(keystr, valstr, fp): def _SR_parse_key(json_dict, json_key, fp): if tag_is(json_key, 'ConceptNameCodeSequence'): _SR_output_string('', Dicom.sr_decode_ConceptNameCodeSequence(tag_val(json_dict, json_key)), fp) + elif tag_is(json_key, 'SourceImageSequence'): + _SR_output_string('SourceImage', Dicom.sr_decode_SourceImageSequence(tag_val(json_dict, json_key)), fp) elif tag_is(json_key, 'ContentSequence'): for cs_item in tag_val(json_dict, json_key): if has_tag(cs_item, 'RelationshipType') and has_tag(cs_item, 'ValueType'): @@ -326,19 +329,30 @@ def _SR_parse_key(json_dict, json_key, fp): elif value_type == 'UIDREF' or value_type == ['UIDREF']: _SR_output_string(Dicom.sr_decode_ConceptNameCodeSequence(tag_val(cs_item, 'ConceptNameCodeSequence')), tag_val(cs_item, 'UID'), fp) elif value_type == 'IMAGE' or value_type == ['IMAGE']: - _SR_output_string(Dicom.sr_decode_ConceptNameCodeSequence(tag_val(cs_item, 'ConceptNameCodeSequence')), Dicom.sr_decode_ReferencedSOPSequence(tag_val(cs_item, 'ReferencedSOPSequence')), fp) + _SR_output_string(Dicom.sr_decode_ConceptNameCodeSequence(tag_val(cs_item, 'ReferencedSOPSequence')), Dicom.sr_decode_ReferencedSOPSequence(tag_val(cs_item, 'ReferencedSOPSequence')), fp) elif value_type == 'CONTAINER' or value_type == ['CONTAINER']: # Sometimes it has no ContentSequence or is 'null' if has_tag(cs_item, 'ContentSequence') and tag_val(cs_item, 'ContentSequence') != None: _SR_output_string('', Dicom.sr_decode_ConceptNameCodeSequence(tag_val(cs_item, 'ConceptNameCodeSequence')), fp) _SR_parse_key(cs_item, 'ContentSequence', fp) + # explicitly ignore TIME, SCOORD, TCOORD, COMPOSITE, IMAGE, WAVEFORM + # as they have no useful text to return + elif value_type == 'SCOORD' or value_type == ['SCOORD']: + pass + elif value_type == 'TCOORD' or value_type == ['TCOORD']: + pass + elif value_type == 'COMPOSITE' or value_type == ['COMPOSITE']: + pass + elif value_type == 'TIME' or value_type == ['TIME']: + pass + elif value_type == 'WAVEFORM' or value_type == ['WAVEFORM']: + pass else: print('UNEXPECTED ITEM OF TYPE %s = %s' % (value_type, cs_item), file=sys.stderr) #print('ITEM %s' % cs_item) else: if not sr_key_can_be_ignored(json_key): - print('UNEXPECTED KEY %s' % json_key, file=sys.stderr) - print(json_dict[json_key]) + print('UNEXPECTED KEY %s = %s' % (json_key, json_dict[json_key]), file=sys.stderr) # --------------------------------------------------------------------- From 3a83ff1b5987adbf2aa952270db831ef949d724b Mon Sep 17 00:00:00 2001 From: abrooks Date: Sun, 4 Apr 2021 21:28:49 +0100 Subject: [PATCH 5/9] Handle missing ConceptNameCodeSequence --- src/common/Smi_Common_Python/SmiServices/StructuredReport.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common/Smi_Common_Python/SmiServices/StructuredReport.py b/src/common/Smi_Common_Python/SmiServices/StructuredReport.py index d1b3c9e3f..3d451c2ce 100644 --- a/src/common/Smi_Common_Python/SmiServices/StructuredReport.py +++ b/src/common/Smi_Common_Python/SmiServices/StructuredReport.py @@ -333,7 +333,8 @@ def _SR_parse_key(json_dict, json_key, fp): elif value_type == 'CONTAINER' or value_type == ['CONTAINER']: # Sometimes it has no ContentSequence or is 'null' if has_tag(cs_item, 'ContentSequence') and tag_val(cs_item, 'ContentSequence') != None: - _SR_output_string('', Dicom.sr_decode_ConceptNameCodeSequence(tag_val(cs_item, 'ConceptNameCodeSequence')), fp) + if has_tag(cs_item, 'ConceptNameCodeSequence'): + _SR_output_string('', Dicom.sr_decode_ConceptNameCodeSequence(tag_val(cs_item, 'ConceptNameCodeSequence')), fp) _SR_parse_key(cs_item, 'ContentSequence', fp) # explicitly ignore TIME, SCOORD, TCOORD, COMPOSITE, IMAGE, WAVEFORM # as they have no useful text to return From a974ac650fb20872355d5510b9f6f8b566555725 Mon Sep 17 00:00:00 2001 From: abrooks Date: Sun, 4 Apr 2021 21:29:23 +0100 Subject: [PATCH 6/9] Added findSOPInstanceUID helper --- src/common/Smi_Common_Python/SmiServices/Mongo.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/common/Smi_Common_Python/SmiServices/Mongo.py b/src/common/Smi_Common_Python/SmiServices/Mongo.py index 763943a6d..3bb6a2cec 100644 --- a/src/common/Smi_Common_Python/SmiServices/Mongo.py +++ b/src/common/Smi_Common_Python/SmiServices/Mongo.py @@ -19,12 +19,16 @@ def __init__(self, hostname, username = None, password = None): self.mongoConnection = MongoClient(host=hostname) + def setSemEHRCollection(self, collection_name): + """ After initialisation set the desired collection using the two-letter modality, eg. SR selects dicom.image_SR """ + + self.mongoCollection = self.mongoConnection['semehr'][collection_name] + def setImageCollection(self, modality): """ After initialisation set the desired collection using the two-letter modality, eg. SR selects dicom.image_SR """ self.mongoCollection = self.mongoConnection['dicom']['image_'+modality] - def DicomFilePathToJSON(self, DicomFilePath): """ After setting a collection(modality) you can extract a document given a DICOM path (can be absolute, as everything upto PACS stripped off, or relative to root of collection)""" @@ -41,4 +45,11 @@ def StudyDateToJSONList(self, StudyDate): StudyDate = re.sub('[/ ]*', '', StudyDate) assert(len(StudyDate) == 8) - return self.mongoCollection.find( { "StudyDate" : StudyDate } ) \ No newline at end of file + return self.mongoCollection.find( { "StudyDate" : StudyDate } ) + + def findSOPInstanceUID(self, sopinstanceuid): + """ This is intended to check for the existance of a document having the + given SOPInstanceUID but since it also returns that document is can be + used as a query """ + + return self.mongoCollection.find_one( { 'SOPInstanceUID': sopinstanceuid } ) \ No newline at end of file From aac0fee9fda613d656fe88706ac531fb0dc34d5e Mon Sep 17 00:00:00 2001 From: abrooks Date: Sun, 4 Apr 2021 21:41:17 +0100 Subject: [PATCH 7/9] Add option to extract only if not already in semehr database --- .../SRAnonTool/CTP_DicomToText.py | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/applications/SRAnonTool/CTP_DicomToText.py b/src/applications/SRAnonTool/CTP_DicomToText.py index 50ac58db5..a3a1f9e98 100755 --- a/src/applications/SRAnonTool/CTP_DicomToText.py +++ b/src/applications/SRAnonTool/CTP_DicomToText.py @@ -18,11 +18,8 @@ import yaml import pydicom import re -import xml.etree.ElementTree # untangle and xmltodict not available in NSH from deepmerge import Merger # for deep merging dictionaries from SmiServices import Mongo -from SmiServices import Rabbit -from SmiServices import Dicom from SmiServices import DicomText from SmiServices import StructuredReport as SR @@ -37,6 +34,7 @@ def extract_mongojson(mongojson, output): if os.path.isdir(output): filename = mongojson['SOPInstanceUID'] + '.txt' output = os.path.join(output, filename) + logging.info('Parse %s' % mongojson.get('header',{}).get('DicomFilePath','')) with open(output, 'w') as fd: SR.SR_parse(mongojson, filename, fd) logging.info(f'Wrote {output}') @@ -98,6 +96,7 @@ def extract_file(input, output): parser.add_argument('-y', dest='yamlfile', action="append", help='path to yaml config file (can be used more than once)') parser.add_argument('-i', dest='input', action="store", help='SOPInstanceUID or path to raw DICOM file from which text will be redacted') parser.add_argument('-o', dest='output_dir', action="store", help='path to directory where extracted text will be written') + parser.add_argument('--semehr-unique', dest='semehr_unique', action="store_true", help='only extra from MongoDB/dicom if not already in MongoDB/semehr') args = parser.parse_args() if not args.input: parser.print_help() @@ -113,10 +112,15 @@ def extract_file(input, output): # Merge all the yaml dicts into one cfg_dict = Merger([(list, ["append"]),(dict, ["merge"])],["override"],["override"]).merge(cfg_dict, yaml.safe_load(fd)) - mongo_host = cfg_dict.get('MongoDatabases', {}).get('DicomStoreOptions',{}).get('HostName',{}) - mongo_user = cfg_dict.get('MongoDatabases', {}).get('DicomStoreOptions',{}).get('UserName',{}) - mongo_pass = cfg_dict.get('MongoDatabases', {}).get('DicomStoreOptions',{}).get('Password',{}) - mongo_db = cfg_dict.get('MongoDatabases', {}).get('DicomStoreOptions',{}).get('DatabaseName',{}) + mongo_dicom_host = cfg_dict.get('MongoDatabases', {}).get('DicomStoreOptions',{}).get('HostName',{}) + mongo_dicom_user = cfg_dict.get('MongoDatabases', {}).get('DicomStoreOptions',{}).get('UserName',{}) + mongo_dicom_pass = cfg_dict.get('MongoDatabases', {}).get('DicomStoreOptions',{}).get('Password',{}) + mongo_dicom_db = cfg_dict.get('MongoDatabases', {}).get('DicomStoreOptions',{}).get('DatabaseName',{}) + + mongo_semehr_host = cfg_dict.get('MongoDatabases', {}).get('SemEHRStoreOptions',{}).get('HostName',{}) + mongo_semehr_user = cfg_dict.get('MongoDatabases', {}).get('SemEHRStoreOptions',{}).get('UserName',{}) + mongo_semehr_pass = cfg_dict.get('MongoDatabases', {}).get('SemEHRStoreOptions',{}).get('Password',{}) + mongo_semehr_db = cfg_dict.get('MongoDatabases', {}).get('SemEHRStoreOptions',{}).get('DatabaseName',{}) log_dir = cfg_dict['LoggingOptions']['LogsRoot'] root_dir = cfg_dict['FileSystemOptions']['FileSystemRoot'] @@ -140,18 +144,22 @@ def extract_file(input, output): for root, dirs, files in os.walk(args.input, topdown=False): for name in files: extract_file(os.path.join(root, name), args.output_dir) - elif mongo_db != {}: + elif mongo_dicom_db != {}: # Only DicomFilePath and StudyDate are indexed in MongoDB. # Passing a SOPInstanceUID would be handy but no point if not indexed. - mongodb = Mongo.SmiPyMongoCollection(mongo_host, mongo_user, mongo_pass) - mongodb.setImageCollection('SR') - # If it looks like a date YYYY/MM/DD or YYYYMMDD: + mongodb_in = Mongo.SmiPyMongoCollection(mongo_dicom_host, mongo_dicom_user, mongo_dicom_pass) + mongodb_in.setImageCollection('SR') + mongodb_out = Mongo.SmiPyMongoCollection(mongo_dicom_host, mongo_dicom_user, mongo_dicom_pass) + mongodb_out.setSemEHRCollection('semehr_results') + # If it looks like a date YYYY/MM/DD or YYYYMMDD extract all on that day: if re.match('^\\s*\\d+/\\d+/\\d+\\s*$|^\\s*\\d{8}\\s*$', args.input): - for mongojson in mongodb.StudyDateToJSONList(args.input): - extract_mongojson(mongojson, args.output_dir) - # Otherwise assume a DICOM file path + for mongojson in mongodb_in.StudyDateToJSONList(args.input): + # If it's already in the annotation database then don't bother extracting. + if not args.semehr_unique or not mongodb_out.findSOPInstanceUID(mongojson['SOPInstanceUID']): + extract_mongojson(mongojson, args.output_dir) + # Otherwise assume a DICOM file path which can be retrieved from MongoDB else: - mongojson = mongodb.DicomFilePathToJSON(args.input) + mongojson = mongodb_in.DicomFilePathToJSON(args.input) extract_mongojson(mongojson, args.output_dir) else: logging.error(f'Cannot find {args.input} as file and MongoDB not configured') From 86a079011b6547c9194e347a5af284374d149383 Mon Sep 17 00:00:00 2001 From: abrooks Date: Sun, 4 Apr 2021 21:45:06 +0100 Subject: [PATCH 8/9] Added news --- news/701-bugfix.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/701-bugfix.md diff --git a/news/701-bugfix.md b/news/701-bugfix.md new file mode 100644 index 000000000..1d84e84b3 --- /dev/null +++ b/news/701-bugfix.md @@ -0,0 +1 @@ +Several improvements to Python code for handling unusually-formatted SR documents. From bde334eb56a2d17b23a1798b2b08bf75ed31ae4b Mon Sep 17 00:00:00 2001 From: abrooks Date: Tue, 6 Apr 2021 08:30:59 +0100 Subject: [PATCH 9/9] copy/paste error --- src/applications/SRAnonTool/CTP_DicomToText.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/applications/SRAnonTool/CTP_DicomToText.py b/src/applications/SRAnonTool/CTP_DicomToText.py index a3a1f9e98..a227d353b 100755 --- a/src/applications/SRAnonTool/CTP_DicomToText.py +++ b/src/applications/SRAnonTool/CTP_DicomToText.py @@ -149,7 +149,7 @@ def extract_file(input, output): # Passing a SOPInstanceUID would be handy but no point if not indexed. mongodb_in = Mongo.SmiPyMongoCollection(mongo_dicom_host, mongo_dicom_user, mongo_dicom_pass) mongodb_in.setImageCollection('SR') - mongodb_out = Mongo.SmiPyMongoCollection(mongo_dicom_host, mongo_dicom_user, mongo_dicom_pass) + mongodb_out = Mongo.SmiPyMongoCollection(mongo_semehr_host, mongo_semehr_user, mongo_semehr_pass) mongodb_out.setSemEHRCollection('semehr_results') # If it looks like a date YYYY/MM/DD or YYYYMMDD extract all on that day: if re.match('^\\s*\\d+/\\d+/\\d+\\s*$|^\\s*\\d{8}\\s*$', args.input):