Skip to content

Commit

Permalink
Move functions + todos
Browse files Browse the repository at this point in the history
  • Loading branch information
AliceJoubert committed Nov 22, 2024
1 parent cc5e9ba commit 2cfd11f
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 219 deletions.
211 changes: 1 addition & 210 deletions clinica/iotools/bids_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,158 +480,6 @@ def create_participants_df(
return participant_df


def create_scans_dict(
clinical_data_dir: Path,
study_name: StudyName,
clinical_specifications_folder: Path,
bids_ids: list[str],
name_column_ids: str,
name_column_ses: str,
ses_dict: dict,
) -> pd.DataFrame:
"""[summary].
Parameters
----------
clinical_data_dir : Path
The path to the directory where the clinical data are stored.
study_name : StudyName
The name of the study (Ex ADNI).
clinical_specifications_folder : Path
The path to the folder containing the clinical specification files.
bids_ids : list of str
A list of bids ids.
name_column_ids : str
The name of the column where the subject id is contained.
name_column_ses : str
The name of the column where the viscode of the session is contained.
ses_dict : dict
The links the session id to the viscode of the session.
Returns
-------
pd.DataFrame :
A pandas DataFrame that contains the scans information for all sessions of all participants.
"""
import datetime

from clinica.utils.pet import Tracer
from clinica.utils.stream import cprint

scans_dict = {}
prev_file = ""
prev_sheet = ""

# Init the dictionary with the subject ids
for bids_id in bids_ids:
scans_dict[bids_id] = dict()
for session_id in {"ses-" + key for key in ses_dict[bids_id].keys()}:
scans_dict[bids_id][session_id] = {
"T1/DWI/fMRI/FMAP": {},
Tracer.PIB: {},
Tracer.AV45: {},
Tracer.FMM: {},
Tracer.FDG: {},
}

scans_specs = pd.read_csv(clinical_specifications_folder / "scans.tsv", sep="\t")
fields_dataset = []
fields_location = []
fields_bids = []
fields_mod = []

# Extract the fields available and the corresponding bids name, location and type
for i in range(0, len(scans_specs[study_name.value])):
field = scans_specs[study_name.value][i]
if not pd.isnull(field):
fields_dataset.append(field)
fields_bids.append(scans_specs["BIDS CLINICA"][i])
fields_location.append(scans_specs[f"{study_name.value} location"][i])
fields_mod.append(scans_specs["Modalities related"][i])

# For each field available extract the original name, extract from the file all the values and fill a data structure
for i in range(0, len(fields_dataset)):
# Location is composed by file/sheet
location = fields_location[i].split("/")
file_name = location[0]
sheet = location[1] if len(location) > 1 else ""
# Check if the file to read is already opened
if file_name == prev_file and sheet == prev_sheet:
pass
else:
file_ext = os.path.splitext(file_name)[1]
files_to_read = [f for f in clinical_data_dir.glob(file_name)]
if file_ext == ".xlsx":
file_to_read = pd.read_excel(files_to_read[0], sheet_name=sheet)
elif file_ext == ".csv":
file_path = files_to_read[0]

# Fix for malformed flutemeta file in AIBL (see #796).
# Some flutemeta lines contain a non-coded string value at the second-to-last position. This value
# contains a comma which adds an extra column and shifts the remaining values to the right. In this
# case, we just remove the erroneous content and replace it with -4 which AIBL uses as n/a value.
on_bad_lines = lambda x: "error" # noqa
if "flutemeta" in file_path.name and study_name == StudyName.AIBL:
on_bad_lines = lambda bad_line: bad_line[:-3] + [-4, bad_line[-1]] # noqa
file_to_read = pd.read_csv(
file_path,
sep=",",
engine="python",
on_bad_lines=on_bad_lines,
)
prev_file = file_name
prev_sheet = sheet

for bids_id in bids_ids:
original_id = bids_id.replace(f"sub-{study_name.value}", "")
for session_name in {"ses-" + key for key in ses_dict[bids_id].keys()}:
# When comparing sessions, remove the "-ses" prefix IF it exists
row_to_extract = file_to_read[
(file_to_read[name_column_ids] == int(original_id))
& (
list(
filter(
None, file_to_read[name_column_ses].str.split("ses-")
)
)[0][0]
== ses_dict[bids_id][
list(filter(None, session_name.split("ses-")))[0]
]
)
].index.tolist()
if len(row_to_extract) > 0:
row_to_extract = row_to_extract[0]
# Fill the dictionary with all the information
value = file_to_read.iloc[row_to_extract][fields_dataset[i]]

if study_name == StudyName.AIBL: # Deal with special format in AIBL
if value == "-4":
value = "n/a"
elif fields_bids[i] == "acq_time":
date_obj = datetime.datetime.strptime(value, "%m/%d/%Y")
value = date_obj.strftime("%Y-%m-%dT%H:%M:%S")

scans_dict[bids_id][session_name][fields_mod[i]][
fields_bids[i]
] = value
else:
cprint(
f"Scans information for {bids_id} {session_name} not found.",
lvl="info",
)
scans_dict[bids_id][session_name][fields_mod[i]][
fields_bids[i]
] = "n/a"

return scans_dict


def _write_bids_dataset_description(
study_name: StudyName,
bids_dir: Path,
Expand Down Expand Up @@ -709,6 +557,7 @@ def write_modality_agnostic_files(
_write_bidsignore(bids_dir)


# todo : test + open issue for usability across converters (though may not be useful if json from dicom)
def _get_pet_tracer_from_filename(filename: str) -> Tracer:
"""Return the PET tracer from the provided filename.
Expand Down Expand Up @@ -742,64 +591,6 @@ def _get_pet_tracer_from_filename(filename: str) -> Tracer:
return Tracer(tracer)


def write_scans_tsv(
bids_dir: Path, participant_ids: List[str], scans_dict: dict
) -> None:
"""Write the scans dict into TSV files.
Parameters
----------
bids_dir : Path
The path to the BIDS directory.
participant_ids : List[str]
List of participant ids for which to write the scans TSV files.
scans_dict : dict
Dictionary containing scans metadata.
.. note::
This is the output of the function
`clinica.iotools.bids_utils.create_scans_dict`.
See also
--------
write_sessions_tsv
"""
supported_modalities = ("anat", "dwi", "func", "pet")

for sub in participant_ids:
for session_path in (bids_dir / sub).glob("ses-*"):
scans_df = pd.DataFrame()
tsv_file = (
bids_dir
/ sub
/ session_path.name
/ f"{sub}_{session_path.name}_scans.tsv"
)
tsv_file.unlink(missing_ok=True)

for mod in (bids_dir / sub / session_path.name).glob("*"):
if mod.name in supported_modalities:
for file in [
file for file in mod.iterdir() if mod.suffix != ".json"
]:
f_type = (
"T1/DWI/fMRI/FMAP"
if mod.name in ("anat", "dwi", "func")
else _get_pet_tracer_from_filename(file.name).value
)
row_to_append = pd.DataFrame(
scans_dict[sub][session_path.name][f_type], index=[0]
)
row_to_append.insert(
0, "filename", str(Path(mod.name) / Path(file.name))
)
scans_df = pd.concat([scans_df, row_to_append])
scans_df = scans_df.set_index("filename").fillna("n/a")
scans_df.to_csv(tsv_file, sep="\t", encoding="utf8")


def get_bids_subjs_list(bids_path: Path) -> List[str]:
"""Given a BIDS compliant dataset, return the list of all the subjects available.
Expand Down
1 change: 1 addition & 0 deletions clinica/iotools/converter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"replace_sequence_chars",
"write_longitudinal_analysis",
"write_statistics",
"viscode_to_session",
]


Expand Down
Loading

0 comments on commit 2cfd11f

Please sign in to comment.