Skip to content

Commit

Permalink
More testing - 2711
Browse files Browse the repository at this point in the history
  • Loading branch information
AliceJoubert committed Nov 27, 2024
1 parent b0ddb34 commit b064215
Show file tree
Hide file tree
Showing 2 changed files with 124 additions and 47 deletions.
94 changes: 51 additions & 43 deletions clinica/iotools/converters/aibl_to_bids/utils/clinical.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Iterator, List, Optional, Union
from typing import Callable, Iterator, List, Optional, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -149,6 +149,26 @@ def _load_specifications(
return pd.read_csv(specifications, sep="\t")


def _load_metadata_from_pattern(
clinical_dir: Path,
pattern: str,
on_bad_lines: Optional[Union[str, Callable]] = "error",
) -> pd.DataFrame:
try:
return pd.read_csv(
next(clinical_dir.glob(pattern)),
dtype={"text": str},
sep=",",
engine="python",
on_bad_lines=on_bad_lines,
)
except StopIteration:
raise FileNotFoundError(
f"Clinical data file corresponding to pattern {pattern} was not found in folder "
f"{clinical_dir}"
)


def _map_diagnosis(diagnosis: int) -> str:
if diagnosis == 1:
return "CN"
Expand Down Expand Up @@ -239,17 +259,9 @@ def create_sessions_tsv_file(
).set_index("session_id", drop=False)

for _, row in specifications.iterrows():
try:
df = pd.read_csv(
next(clinical_data_dir.glob(row[f"{study} location"])),
dtype={"text": str},
)
except StopIteration:
raise FileNotFoundError(
f"Clinical data file corresponding to pattern {row[f'{study} location']} was not found in folder "
f"{clinical_data_dir}"
)

df = _load_metadata_from_pattern(
clinical_data_dir, row[f"{study} location"]
)
data = _format_metadata_for_rid(
input_df=df,
source_id=rid,
Expand Down Expand Up @@ -399,6 +411,13 @@ def _init_scans_dict(bids_path: Path) -> dict:
return scans_dict


def _format_time(time: str) -> str:
import datetime

date_obj = datetime.datetime.strptime(time, "%m/%d/%Y")
return date_obj.strftime("%Y-%m-%dT%H:%M:%S")


def create_scans_dict(
clinical_data_dir: Path,
clinical_specifications_folder: Path,
Expand All @@ -422,60 +441,49 @@ def create_scans_dict(
pd.DataFrame :
A pandas DataFrame that contains the scans information for all sessions of all participants.
"""
import datetime

from clinica.iotools.converter_utils import viscode_to_session

scans_dict = _init_scans_dict(bids_path)

study = StudyName.AIBL.value
scans_specs = pd.read_csv(clinical_specifications_folder / "scans.tsv", sep="\t")[

scans_specs = _load_specifications(clinical_specifications_folder, "scans.tsv")[
[study, f"{study} location", "BIDS CLINICA", "Modalities related"]
].dropna()

for _, row in scans_specs.iterrows():
file_name = row[f"{study} location"]
# todo : more robust
file_path = [f for f in clinical_data_dir.glob(file_name)][0]

on_bad_lines = lambda x: "error" # noqa
if "flutemeta" in file_path.name:
on_bad_lines = lambda bad_line: _flutemeta_badline(bad_line) # noqa
file_to_read = pd.read_csv(
file_path,
sep=",",
engine="python",
on_bad_lines=on_bad_lines,
on_bad_lines = (
_flutemeta_badline if "flutemeta" in row[f"{study} location"] else "error"
)
file_to_read["session_id"] = file_to_read["VISCODE"].apply(
lambda x: viscode_to_session(x)
file = _load_metadata_from_pattern(
clinical_data_dir, row[f"{study} location"], on_bad_lines
)
file["session_id"] = file["VISCODE"].apply(lambda x: viscode_to_session(x))

for bids_id in scans_dict.keys():
original_id = bids_id_factory(StudyName.AIBL)(
bids_id
).to_original_study_id()
for session in scans_dict[bids_id].keys():
values_to_extract = file_to_read[
(file_to_read["RID"] == int(original_id))
& (file_to_read["session_id"] == session)
][row[study]].tolist()

if values_to_extract:
value = values_to_extract[0]
try:
value = file[
(file["RID"] == int(original_id))
& (file["session_id"] == session)
][row[study]].item()
if value == "-4":
value = "n/a"
elif row["BIDS CLINICA"] == "acq_time":
date_obj = datetime.datetime.strptime(value, "%m/%d/%Y")
value = date_obj.strftime("%Y-%m-%dT%H:%M:%S")
else:
value = _format_time(value)
except ValueError:
value = "n/a"

scans_dict[bids_id][session][row["Modalities related"]][
row["BIDS CLINICA"]
] = value
modality = scans_dict[bids_id][session][row["Modalities related"]]

# todo : case where it gets written over ?
# Avoid writing over in case of modality "T1/..." because it is used twice
if (row["BIDS CLINICA"] not in modality) or (
modality[row["BIDS CLINICA"]] == "n/a"
):
modality[row["BIDS CLINICA"]] = value
return scans_dict


Expand Down
77 changes: 73 additions & 4 deletions test/unittests/iotools/converters/aibl_to_bids/test_aibl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,12 +515,81 @@ def test_create_sessions_tsv(tmp_path):
assert_frame_equal(result_sub109, expected_sub109, check_like=True)


def test_create_sessions_tsv_clinical_not_found(tmp_path):
def test_load_metadata_from_pattern_success(tmp_path):
from clinica.iotools.converters.aibl_to_bids.utils.clinical import (
create_sessions_tsv_file,
_load_metadata_from_pattern,
)

result = _load_metadata_from_pattern(
build_clinical_data(tmp_path), pattern="aibl_neurobat_*.csv"
)

expected = pd.DataFrame(
{
"RID": [1, 2, 12, 100, 100, 109, 109],
"VISCODE": ["bl", "bl", "bl", "bl", "m12", "bl", "m06"],
"EXAMDATE": [
"01/01/2001",
"01/01/2002",
"01/01/2012",
"01/01/2100",
"12/01/2100",
"01/01/2109",
"-4",
],
}
)

assert_frame_equal(expected, result, check_like=True)


def test_load_metadata_from_pattern_not_found(tmp_path):
from clinica.iotools.converters.aibl_to_bids.utils.clinical import (
_load_metadata_from_pattern,
)

with pytest.raises(FileNotFoundError, match="Clinical data"):
create_sessions_tsv_file(
build_bids_dir(tmp_path), tmp_path, build_sessions_spec(tmp_path)
_load_metadata_from_pattern(
clinical_dir=tmp_path,
pattern="aibl_neurobat_*.csv",
)

with pytest.raises(FileNotFoundError, match="Clinical data"):
_load_metadata_from_pattern(
clinical_dir=build_clinical_data(tmp_path),
pattern="foo",
)


def test_load_metadata_from_pattern_optional(tmp_path):
from io import StringIO

from clinica.iotools.converters.aibl_to_bids.utils.clinical import (
_flutemeta_badline,
_load_metadata_from_pattern,
)

csv_content = """col1,col2,col3,col4
1,1,1,1
1,1,1,1
1,1,measured,AUSTIN AC CT Brain H19s,1
1,1,1,1
"""
with open(tmp_path / "bad_line.csv", "w") as f:
f.write(csv_content)

assert_frame_equal(
_load_metadata_from_pattern(
clinical_dir=tmp_path,
pattern="bad_line.csv",
on_bad_lines=_flutemeta_badline,
),
pd.DataFrame(
{
"col1": [1, 1, 1, 1],
"col2": [1, 1, 1, 1],
"col3": [1, 1, -4, 1],
"col4": [1, 1, 1, 1],
}
),
)

0 comments on commit b064215

Please sign in to comment.