Skip to content

Commit

Permalink
enable additional json (#54)
Browse files Browse the repository at this point in the history
* implement additional personal data

* enable additional json file

* Update documents.csv

* Update .test documents.csv

* error correction
  • Loading branch information
josefawelling authored Jun 22, 2021
1 parent eedbca4 commit 4a40f8a
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 10 deletions.
8 changes: 4 additions & 4 deletions .tests/config/pep/documents.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample_name,fhir_metadata,compressed_docs
1,data/1111111111.otto_normalverbraucher_jpgs.patient.json,data/1111111111.otto_normalverbraucher_jpgs.tar.lz4
2,data/2222222222.maxime_mustermann_jpgs.patient.json,data/2222222222.maxime_mustermann_jpgs.tar.lz4
3,data/v00076462.maxime_mustermann_tiff.patient.json,data/v00076462.maxime_mustermann_tiff.tar.lz4
sample_name,fhir_metadata,compressed_docs,additional_metadata
1,data/1111111111.otto_normalverbraucher_jpgs.patient.json,data/1111111111.otto_normalverbraucher_jpgs.tar.lz4,
2,data/2222222222.maxime_mustermann_jpgs.patient.json,data/2222222222.maxime_mustermann_jpgs.tar.lz4,
3,data/v00076462.maxime_mustermann_tiff.patient.json,data/v00076462.maxime_mustermann_tiff.tar.lz4,
4 changes: 2 additions & 2 deletions config/pep/documents.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
sample_name,fhir_metadata,compressed_docs
1,data/fhire_meta_data.json,data/path_to.lz4
sample_name,fhir_metadata,compressed_docs,additional_metadata
1,data/fhire_meta_data.json,data/path_to.lz4,data/path_to_additional_metadata.json
9 changes: 8 additions & 1 deletion workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,18 @@ def get_compressed_docs(wildcards):
def get_fhir_metadata(wildcards):
return pep.sample_table.loc[wildcards.id][["fhir_metadata"]]

def get_additional_metadata(wildcards):
if "additional_metadata" in pep.sample_table.columns:
if type(pep.sample_table.loc[wildcards.id]["additional_metadata"]) == str:
return pep.sample_table.loc[wildcards.id][["additional_metadata"]]
else:
return []
else:
return []

def get_all_ids():
return pep.sample_table["sample_name"].to_list()


def get_image_paths_for_id(wildcards):
with checkpoints.fix_file_ext.get(id=wildcards.id).output[0].open() as f:
paths = pd.read_csv(f, sep="\n", header=None, squeeze=True)
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/extract-data.smk
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
rule extract_personal_data:
input:
get_fhir_metadata,
get_additional_metadata,
output:
temp("results/{id}/tmp/personal-data.json"),
log:
"logs/{id}/extract_personal_data.log",
script:
"../scripts/extract-personal-data.py"


rule extract_lz4_docs:
input:
get_compressed_docs,
Expand Down
16 changes: 14 additions & 2 deletions workflow/scripts/extract-personal-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def parse_meta_data(json_path: str) -> defaultdict:
with open(json_path) as json_file:
data = json.load(json_file)

# select the patient reosurce from the bundel data export
# select the patient resource from the bundel data export
for ele in data.get("entry", {}):
# iterate of entries
for key, value in ele.get("resource", {}).items():
Expand Down Expand Up @@ -141,6 +141,13 @@ def variate_personal_data(personal_data: dict, first_name_count: int) -> default

return personal_data

def add_additional_personal_data(add_json_path: str, personal_data: dict) -> defaultdict:
# if an additional data file exist, this data will be added to the personal data json file
with open(add_json_path) as json_file:
additional_data = json.load(json_file)
personal_data.update(additional_data)

return personal_data

def save_personal_data(personal_data: dict, out_path: str):
"""Save the final dic with the personal data as json.
Expand Down Expand Up @@ -169,7 +176,12 @@ def save_personal_data(personal_data: dict, out_path: str):

# personal_data = {key: value.lower().strip() for key, value in personal_data.items()}
var_data = {key: value.lower().strip() for key, value in var_data.items()}
save_personal_data(var_data, snakemake.output[0])

if len(snakemake.input) > 1:
add_data = add_additional_personal_data(snakemake.input[1], var_data)
save_personal_data(add_data, snakemake.output[0])
else:
save_personal_data(var_data, snakemake.output[0])

# def enrich_personal_data(personal_data: dict) -> dict:
# personal_data["names_combined"] = ",".join(
Expand Down

0 comments on commit 4a40f8a

Please sign in to comment.