Skip to content

Commit

Permalink
[BF]: escape special characters in solr search
Browse files Browse the repository at this point in the history
  • Loading branch information
alexpron committed Nov 22, 2024
1 parent f05750d commit 364b005
Showing 1 changed file with 87 additions and 40 deletions.
127 changes: 87 additions & 40 deletions shanoir2bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import os
from os.path import join as opj, splitext as ops, exists as ope, dirname as opd
import re
from glob import glob
import sys
from pathlib import Path
Expand Down Expand Up @@ -179,9 +180,9 @@ def read_json_config_file(json_file):


def generate_bids_heuristic_file(
shanoir2bids_dict,
path_heuristic_file,
output_type='("dicom","nii.gz")',
shanoir2bids_dict,
path_heuristic_file,
output_type='("dicom","nii.gz")',
) -> None:
"""Generate heudiconv heuristic.py file from shanoir2bids mapping dict
Parameters
Expand Down Expand Up @@ -427,31 +428,55 @@ def is_mapping_bids(self):
for i, far in enumerate(list_find_and_replace):
if i == 0:
normalised_subject = subject
normalised_subject = normalised_subject.replace(far["find"], far["replace"])
normalised_subject = normalised_subject.replace(
far["find"], far["replace"]
)
normalised_subjects.append(normalised_subject)
else:
normalised_subjects = subjects

sessions = list(set([d['bidsSession'] for d in self.shanoir2bids_dict if 'bidsSession' in d]))
extension = '.nii.gz'
sessions = list(
set(
[d["bidsSession"] for d in self.shanoir2bids_dict if "bidsSession" in d]
)
)
extension = ".nii.gz"

if not sessions:
paths = (
"/" + "sub-" + subject + '/' +
map["bidsDir"] + '/' +
"sub-" + subject + '_' +
map["bidsName"] + extension
"/"
+ "sub-"
+ subject
+ "/"
+ map["bidsDir"]
+ "/"
+ "sub-"
+ subject
+ "_"
+ map["bidsName"]
+ extension
for subject in normalised_subjects
for map in self.shanoir2bids_dict
)
else:
paths = (
"/" + "sub-" + subject + '/' +
"ses-" + map['bidsSession'] + '/' +
map["bidsDir"] + '/' +
"sub-" + subject + '_' + "ses-" + map['bidsSession'] + '_' +
map["bidsName"] + extension

"/"
+ "sub-"
+ subject
+ "/"
+ "ses-"
+ map["bidsSession"]
+ "/"
+ map["bidsDir"]
+ "/"
+ "sub-"
+ subject
+ "_"
+ "ses-"
+ map["bidsSession"]
+ "_"
+ map["bidsName"]
+ extension
for subject in normalised_subjects
for map in self.shanoir2bids_dict
)
Expand Down Expand Up @@ -493,7 +518,6 @@ def download_subject(self, subject_to_search):
for far in self.list_fars:
bids_subject_id.replace(far[K_FIND], far[K_REPLACE])


bids_seq_session = None

# Loop on each sequence defined in the dictionary
Expand Down Expand Up @@ -523,23 +547,44 @@ def download_subject(self, subject_to_search):
"[" + str(seq + 1) + "/" + str(self.n_seq) + "]",
)

request_terms = [
self.shanoir_study_id,
shanoir_seq_name,
subject_to_search,
self.shanoir_session_id,
self.date_from,
self.date_to,
]

def escape_solr_special_characters(s):
# List of Solr special characters
special_characters = r'\+\-\!\(\)\{\}\[\]\^"~\?:\\'
# remove \* from special characters to be able to use wildcards in solr
# Add more if needed
escape_pattern = re.compile(r'([{}])'.format(special_characters))
return escape_pattern.sub(r'\\\1', s)

escaped_request_terms = {s: escape_solr_special_characters(s) for s in request_terms}

# Initialize the parser
search_txt = (
"studyName:"
+ self.shanoir_study_id.replace(" ", "?")
+ " AND datasetName:"
+ shanoir_seq_name.replace(" ", "?")
+ " AND subjectName:"
+ subject_to_search.replace(" ", "?")
+ " AND examinationComment:"
+ self.shanoir_session_id.replace(" ", "*")
+ " AND examinationDate:["
+ self.date_from
+ " TO "
+ self.date_to
+ "]"
"studyName:"
+ escaped_request_terms[self.shanoir_study_id].replace(" ", "?")
+ " AND datasetName:"
+ escaped_request_terms[shanoir_seq_name].replace(" ", "?")
+ " AND subjectName:"
+ escaped_request_terms[subject_to_search].replace(" ", "?")
+ " AND examinationComment:"
+ escaped_request_terms[self.shanoir_session_id].replace(" ", "*")
+ " AND examinationDate:["
+ self.date_from
+ " TO "
+ self.date_to
+ "]"
)

print(search_txt)

args = self.parser.parse_args(
[
"-u",
Expand Down Expand Up @@ -647,19 +692,19 @@ def download_subject(self, subject_to_search):

# Launch DICOM to BIDS conversion using heudiconv + heuristic file + dcm2niix options
with tempfile.NamedTemporaryFile(
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".py"
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".py"
) as heuristic_file:
# Generate Heudiconv heuristic file from configuration.json mapping
generate_bids_heuristic_file(
bids_mapping, heuristic_file.name, output_type=self.output_file_type
)
with tempfile.NamedTemporaryFile(
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".json"
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".json"
) as dcm2niix_config_file:
self.export_dcm2niix_config_options(dcm2niix_config_file.name)
workflow_params = {
"files": glob(opj(tmp_dicom, "*", "*.dcm"), recursive=True),
"outdir": opj(self.dl_dir, self.shanoir_study_id).replace(' ', ''),
"outdir": opj(self.dl_dir, self.shanoir_study_id).replace(" ", ""),
"subjs": [bids_subject_id],
"converter": "dcm2niix",
"heuristic": heuristic_file.name,
Expand All @@ -678,12 +723,13 @@ def download_subject(self, subject_to_search):
try:
workflow(**workflow_params)
except AssertionError:
error = (f" \n >> WARNING : No DICOM file available for conversion for subject {subject_to_search} \n "
f"If some datasets are to be downloaded check log file and your configuration file syntax \n ")
error = (
f" \n >> WARNING : No DICOM file available for conversion for subject {subject_to_search} \n "
f"If some datasets are to be downloaded check log file and your configuration file syntax \n "
)
print(error)
fp.write(error)
finally:

if not self.debug_mode:
shutil.rmtree(tmp_archive, ignore_errors=True)
shutil.rmtree(tmp_dicom, ignore_errors=True)
Expand All @@ -708,9 +754,9 @@ def download(self):
dur_min = int((time() - t_start_subject) // 60)
dur_sec = int((time() - t_start_subject) % 60)
end_msg = (
"Downloaded dataset for subject "
+ subject_to_search
+ " in {}m{}s".format(dur_min, dur_sec)
"Downloaded dataset for subject "
+ subject_to_search
+ " in {}m{}s".format(dur_min, dur_sec)
)
banner_msg(end_msg)
else:
Expand Down Expand Up @@ -784,7 +830,8 @@ def main():
)
else:
if not stb.is_mapping_bids()[0]:
print(f" WARNING !: Provided BIDS keys {stb.is_mapping_bids()[1]} are not BIDS compliant check syntax in provided configuration file {args.config_file}"
print(
f" WARNING !: Provided BIDS keys {stb.is_mapping_bids()[1]} are not BIDS compliant check syntax in provided configuration file {args.config_file}"
)
stb.download()

Expand Down

0 comments on commit 364b005

Please sign in to comment.