From a20adf7df1e499dfac4d74d5dbd31dc42784f702 Mon Sep 17 00:00:00 2001 From: Natasha Singh Date: Fri, 6 Oct 2023 21:22:33 -0400 Subject: [PATCH] :bug: Don't filter out authz = [] patches when testing After implementing new rules which set authz = [] some tests fail This is because values get filtered out of patches if they are equal to what is already in dataservice (avoids extra API calls). The authz field will always be = [] in dataservice during tests since it is not connected to indexd. That means any GF patches which include authz = [] will be filtered out causing tests to fail --- kf_update_dbgap_consent/sample_status.py | 27 ++- tests/data/phs999999_dataservice.json | 280 +++++++++++++---------- tests/data/phs999999_patches.json | 152 ++++++------ 3 files changed, 255 insertions(+), 204 deletions(-) diff --git a/kf_update_dbgap_consent/sample_status.py b/kf_update_dbgap_consent/sample_status.py index 26e672a..d11f3e7 100644 --- a/kf_update_dbgap_consent/sample_status.py +++ b/kf_update_dbgap_consent/sample_status.py @@ -53,6 +53,7 @@ """ +from pprint import pprint from collections import defaultdict from concurrent.futures import ThreadPoolExecutor, as_completed @@ -63,6 +64,11 @@ from kf_utils.dbgap.release import get_latest_sample_status +def is_localhost(url): + hosts = {"localhost", "127.0.0.1"} + return any(hostname in url for hostname in hosts) + + class ConsentProcessor: def __init__(self, api_url, db_url=None): self.api_url = api_url @@ -96,7 +102,7 @@ def get_patches_for_study( print(f"Found accession ID: {study_phs}") open_acl = {"/open"} empty_acl = set() - default_acl = empty_acl + default_acl = [study_id] alerts = [] patches = defaultdict(lambda: defaultdict(dict)) @@ -170,6 +176,9 @@ def entities_dict(endpoint, filt): hidden_genomic_files = set( k for k, e in storage["genomic-files"].items() if not e["visible"] ) + print("**************") + for entity, entities in storage.items(): + print(f"*** {entity} count: {len(entities)}") """ Rule: For all samples in the sample status file which are not found in @@ -310,8 +319,18 @@ def entities_dict(endpoint, filt): ) # remove known unneeded patches + def cmp(a, b, field_name): + # Values get filtered out if they are equal to what + # is already in dataservice. + # This matters for the authz field bc it will always + # be equal to [] since local dataservice is not connected to + # indexd. Therefore when we try to patch a GF with + # authz = [], this will get filtered out and + # tests will fail + # So when testing with localhost we force a patch with authz + if field_name == "authz" and is_localhost(self.api_url): + return False - def cmp(a, b): if isinstance(a, list) and isinstance(b, list): return sorted(a) == sorted(b) else: @@ -326,7 +345,7 @@ def cmp(a, b): (endpoint in storage) and (kfid in storage[endpoint]) and (k in storage[endpoint][kfid]) - and (cmp(storage[endpoint][kfid][k], v)) + and cmp(storage[endpoint][kfid][k], v, k) ) } for kfid, patch in ep_patches.items() @@ -338,6 +357,8 @@ def cmp(a, b): for endpoint, ep_patches in patches.items() } patches = {k: v for k, v in patches.items() if v} + # from pprint import pprint # breakpoint() + return patches, alerts diff --git a/tests/data/phs999999_dataservice.json b/tests/data/phs999999_dataservice.json index 5ce5150..d12e98d 100644 --- a/tests/data/phs999999_dataservice.json +++ b/tests/data/phs999999_dataservice.json @@ -1,126 +1,160 @@ { - "studies": { - "SD_00000000": { - "data_access_authority": "dbGaP", - "external_id": "phs999999" - } - }, - "participants": { - "PT_11111111": { - "study_id": "SD_00000000", - "external_id": "test_subject_1" - }, - "PT_22222222": { - "study_id": "SD_00000000", - "external_id": "test_subject_2" - } - }, - "sequencing-centers": { - "SC_11111111": { - "name": "test_center" - } - }, - "biospecimens": { - "BS_11111111": { - "participant_id": "PT_11111111", - "external_sample_id": "test_sample_1", - "sequencing_center_id": "SC_11111111", - "analyte_type": "DNA" - }, - "BS_22222222": { - "participant_id": "PT_22222222", - "external_sample_id": "test_sample_2", - "sequencing_center_id": "SC_11111111", - "analyte_type": "DNA" - }, - "BS_33333333": { - "participant_id": "PT_22222222", - "external_sample_id": "test_sample_3", - "sequencing_center_id": "SC_11111111", - "analyte_type": "DNA" - } - }, - "diagnoses": { - "DG_11111111": {"participant_id": "PT_11111111"}, - "DG_22222222": {"participant_id": "PT_22222222"} - }, - "biospecimen-diagnoses": { - "BD_11111111": { - "biospecimen_id": "BS_11111111", - "diagnosis_id": "DG_11111111" - }, - "BD_22222222": { - "biospecimen_id": "BS_22222222", - "diagnosis_id": "DG_22222222" - }, - "BD_33333333": { - "biospecimen_id": "BS_33333333", - "diagnosis_id": "DG_22222222" - } - }, - "genomic-files": { - "GF_00000000": {"hashes": {}, "size": 1, "urls": [], "controlled_access": false}, - "GF_11111111": {"hashes": {}, "size": 1, "urls": [], "controlled_access": true}, - "GF_22222222": {"hashes": {}, "size": 1, "urls": [], "controlled_access": true}, - "GF_33333333": {"hashes": {}, "size": 1, "urls": [], "controlled_access": true}, - "GF_44444444": {"hashes": {}, "size": 1, "urls": [], "controlled_access": false} - }, - "biospecimen-genomic-files": { - "BG_00000000": { - "biospecimen_id": "BS_11111111", - "genomic_file_id": "GF_00000000" - }, - "BG_11111111": { - "biospecimen_id": "BS_11111111", - "genomic_file_id": "GF_11111111" - }, - "BG_22222222": { - "biospecimen_id": "BS_22222222", - "genomic_file_id": "GF_22222222" - }, - "BG_33333333": { - "biospecimen_id": "BS_33333333", - "genomic_file_id": "GF_33333333" - }, - "BG_44444444": { - "biospecimen_id": "BS_33333333", - "genomic_file_id": "GF_44444444" - } - }, - "sequencing-experiments": { - "SE_11111111": { - "external_id": "SE_11111111", "sequencing_center_id": "SC_11111111", - "is_paired_end": false, "experiment_strategy": "WGS", "platform": "Not Applicable" - }, - "SE_22222222": { - "external_id": "SE_22222222", "sequencing_center_id": "SC_11111111", - "is_paired_end": false, "experiment_strategy": "WGS", "platform": "Not Applicable" - }, - "SE_33333333": { - "external_id": "SE_33333333", "sequencing_center_id": "SC_11111111", - "is_paired_end": false, "experiment_strategy": "WGS", "platform": "Not Applicable" - } - }, - "sequencing-experiment-genomic-files": { - "SG_00000000": { - "sequencing_experiment_id": "SE_11111111", - "genomic_file_id": "GF_00000000" - }, - "SG_11111111": { - "sequencing_experiment_id": "SE_11111111", - "genomic_file_id": "GF_11111111" - }, - "SG_22222222": { - "sequencing_experiment_id": "SE_22222222", - "genomic_file_id": "GF_22222222" - }, - "SG_33333333": { - "sequencing_experiment_id": "SE_33333333", - "genomic_file_id": "GF_33333333" - }, - "SG_44444444": { - "sequencing_experiment_id": "SE_33333333", - "genomic_file_id": "GF_44444444" - } + "studies": { + "SD_00000000": { + "data_access_authority": "dbGaP", + "external_id": "phs999999" } + }, + "participants": { + "PT_11111111": { + "study_id": "SD_00000000", + "external_id": "test_subject_1" + }, + "PT_22222222": { + "study_id": "SD_00000000", + "external_id": "test_subject_2" + } + }, + "sequencing-centers": { + "SC_11111111": { + "name": "test_center" + } + }, + "biospecimens": { + "BS_11111111": { + "participant_id": "PT_11111111", + "external_sample_id": "test_sample_1", + "sequencing_center_id": "SC_11111111", + "analyte_type": "DNA" + }, + "BS_22222222": { + "participant_id": "PT_22222222", + "external_sample_id": "test_sample_2", + "sequencing_center_id": "SC_11111111", + "analyte_type": "DNA" + }, + "BS_33333333": { + "participant_id": "PT_22222222", + "external_sample_id": "test_sample_3", + "sequencing_center_id": "SC_11111111", + "analyte_type": "DNA" + } + }, + "diagnoses": { + "DG_11111111": { "participant_id": "PT_11111111" }, + "DG_22222222": { "participant_id": "PT_22222222" } + }, + "biospecimen-diagnoses": { + "BD_11111111": { + "biospecimen_id": "BS_11111111", + "diagnosis_id": "DG_11111111" + }, + "BD_22222222": { + "biospecimen_id": "BS_22222222", + "diagnosis_id": "DG_22222222" + }, + "BD_33333333": { + "biospecimen_id": "BS_33333333", + "diagnosis_id": "DG_22222222" + } + }, + "genomic-files": { + "GF_00000000": { + "hashes": {}, + "size": 1, + "urls": [], + "controlled_access": false + }, + "GF_11111111": { + "hashes": {}, + "size": 1, + "urls": [], + "controlled_access": true + }, + "GF_22222222": { + "hashes": {}, + "size": 1, + "urls": [], + "controlled_access": true + }, + "GF_33333333": { + "hashes": {}, + "size": 1, + "urls": [], + "controlled_access": true + }, + "GF_44444444": { + "hashes": {}, + "size": 1, + "urls": [], + "controlled_access": false + } + }, + "biospecimen-genomic-files": { + "BG_00000000": { + "biospecimen_id": "BS_11111111", + "genomic_file_id": "GF_00000000" + }, + "BG_11111111": { + "biospecimen_id": "BS_11111111", + "genomic_file_id": "GF_11111111" + }, + "BG_22222222": { + "biospecimen_id": "BS_22222222", + "genomic_file_id": "GF_22222222" + }, + "BG_33333333": { + "biospecimen_id": "BS_33333333", + "genomic_file_id": "GF_33333333" + }, + "BG_44444444": { + "biospecimen_id": "BS_33333333", + "genomic_file_id": "GF_44444444" + } + }, + "sequencing-experiments": { + "SE_11111111": { + "external_id": "SE_11111111", + "sequencing_center_id": "SC_11111111", + "is_paired_end": false, + "experiment_strategy": "WGS", + "platform": "Not Applicable" + }, + "SE_22222222": { + "external_id": "SE_22222222", + "sequencing_center_id": "SC_11111111", + "is_paired_end": false, + "experiment_strategy": "WGS", + "platform": "Not Applicable" + }, + "SE_33333333": { + "external_id": "SE_33333333", + "sequencing_center_id": "SC_11111111", + "is_paired_end": false, + "experiment_strategy": "WGS", + "platform": "Not Applicable" + } + }, + "sequencing-experiment-genomic-files": { + "SG_00000000": { + "sequencing_experiment_id": "SE_11111111", + "genomic_file_id": "GF_00000000" + }, + "SG_11111111": { + "sequencing_experiment_id": "SE_11111111", + "genomic_file_id": "GF_11111111" + }, + "SG_22222222": { + "sequencing_experiment_id": "SE_22222222", + "genomic_file_id": "GF_22222222" + }, + "SG_33333333": { + "sequencing_experiment_id": "SE_33333333", + "genomic_file_id": "GF_33333333" + }, + "SG_44444444": { + "sequencing_experiment_id": "SE_33333333", + "genomic_file_id": "GF_44444444" + } + } } diff --git a/tests/data/phs999999_patches.json b/tests/data/phs999999_patches.json index 25970d8..90aa6a8 100644 --- a/tests/data/phs999999_patches.json +++ b/tests/data/phs999999_patches.json @@ -1,87 +1,83 @@ { - "biospecimen-diagnoses": { - "BD_11111111": { - "visible": false, - "visibility_reason": "Consent Hold", - "visibility_comment": "Sample is not registered in dbGaP" - } + "biospecimen-diagnoses": { + "BD_11111111": { + "visible": false, + "visibility_reason": "Consent Hold", + "visibility_comment": "Sample is not registered in dbGaP" + } + }, + "biospecimen-genomic-files": { + "BG_00000000": { + "visible": false, + "visibility_reason": "Consent Hold", + "visibility_comment": "Sample is not registered in dbGaP" + }, + "BG_11111111": { + "visible": false, + "visibility_reason": "Consent Hold", + "visibility_comment": "Sample is not registered in dbGaP" + } + }, + "biospecimens": { + "BS_11111111": { + "visible": false, + "visibility_reason": "Consent Hold", + "visibility_comment": "Sample is not registered in dbGaP" + }, + "BS_22222222": { + "consent_type": "LOL", + "dbgap_consent_code": "phs999999.c1" }, - "biospecimen-genomic-files": { - "BG_00000000": { - "visible": false, - "visibility_reason": "Consent Hold", - "visibility_comment": "Sample is not registered in dbGaP" - }, - "BG_11111111": { - "visible": false, - "visibility_reason": "Consent Hold", - "visibility_comment": "Sample is not registered in dbGaP" - } + "BS_33333333": { + "consent_type": "HMB", + "dbgap_consent_code": "phs999999.c2" + } + }, + "genomic-files": { + "GF_00000000": { + "authz": [], + "visible": false, + "visibility_reason": "Consent Hold", + "visibility_comment": "Sample is not registered in dbGaP" }, - "biospecimens": { - "BS_11111111": { - "visible": false, - "visibility_reason": "Consent Hold", - "visibility_comment": "Sample is not registered in dbGaP" - }, - "BS_22222222": { - "consent_type": "LOL", - "dbgap_consent_code": "phs999999.c1" - }, - "BS_33333333": { - "consent_type": "HMB", - "dbgap_consent_code": "phs999999.c2" - } + "GF_11111111": { + "authz": [], + "visible": false, + "visibility_reason": "Consent Hold", + "visibility_comment": "Sample is not registered in dbGaP" }, - "genomic-files": { - "GF_00000000": { - "authz": [], - "visible": false, - "visibility_reason": "Consent Hold", - "visibility_comment": "Sample is not registered in dbGaP" - }, - "GF_11111111": { - "authz": [], - "visible": false, - "visibility_reason": "Consent Hold", - "visibility_comment": "Sample is not registered in dbGaP" - }, - "GF_22222222": { - "authz": [ - "/programs/phs999999.c1" - ] - }, - "GF_33333333": { - "authz": [ - "/programs/phs999999.c2" - ] - }, - "GF_44444444": { - "authz": ["/open"] - } + "GF_22222222": { + "authz": ["/programs/phs999999.c1"] }, - "sequencing-experiment-genomic-files": { - "SG_00000000": { - "visible": false, - "visibility_reason": "Consent Hold", - "visibility_comment": "Sample is not registered in dbGaP" - }, - "SG_11111111": { - "visible": false, - "visibility_reason": "Consent Hold", - "visibility_comment": "Sample is not registered in dbGaP" - } + "GF_33333333": { + "authz": ["/programs/phs999999.c2"] }, - "sequencing-experiments": { - "SE_11111111": { - "visible": false, - "visibility_reason": "Consent Hold", - "visibility_comment": "Sample is not registered in dbGaP" - } + "GF_44444444": { + "authz": ["/open"] + } + }, + "sequencing-experiment-genomic-files": { + "SG_00000000": { + "visible": false, + "visibility_reason": "Consent Hold", + "visibility_comment": "Sample is not registered in dbGaP" }, - "studies": { - "SD_00000000": { - "version": "v1.p1" - } + "SG_11111111": { + "visible": false, + "visibility_reason": "Consent Hold", + "visibility_comment": "Sample is not registered in dbGaP" + } + }, + "sequencing-experiments": { + "SE_11111111": { + "visible": false, + "visibility_reason": "Consent Hold", + "visibility_comment": "Sample is not registered in dbGaP" + } + }, + "studies": { + "SD_00000000": { + "version": "v1.p1" } + } }