Skip to content

Commit

Permalink
refactoring join queries
Browse files Browse the repository at this point in the history
  • Loading branch information
costero-e committed Oct 30, 2024
1 parent ce63c94 commit 18b1f3d
Show file tree
Hide file tree
Showing 11 changed files with 638 additions and 356 deletions.
41 changes: 34 additions & 7 deletions beacon/connections/mongo/analyses.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

from beacon.request.parameters import RequestParams
from beacon.response.schemas import DefaultSchemas
import yaml
Expand All @@ -23,7 +24,7 @@ def get_analyses(self, entry_id: Optional[str], qparams: RequestParams, dataset:
elif query_parameters == {'$and': []}:# pragma: no cover
query_parameters = {}
query={}
query = apply_filters(self, query, qparams.query.filters, collection, query_parameters)
query = apply_filters(self, query, qparams.query.filters, collection, query_parameters, dataset)
schema = DefaultSchemas.ANALYSES
include = qparams.query.include_resultset_responses
limit = qparams.query.pagination.limit
Expand All @@ -39,7 +40,7 @@ def get_analysis_with_id(self, entry_id: Optional[str], qparams: RequestParams,
collection = 'analyses'
idq="biosampleId"
mongo_collection = client.beacon.analyses
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset)
query = query_id(self, query, entry_id)
schema = DefaultSchemas.ANALYSES
include = qparams.query.include_resultset_responses
Expand All @@ -55,17 +56,43 @@ def get_variants_of_analysis(self, entry_id: Optional[str], qparams: RequestPara
collection = 'analyses'
mongo_collection = client.beacon.genomicVariations
query = {"$and": [{"id": entry_id}]}
query = apply_filters(self, query, qparams.query.filters, collection, {})
query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset)
analysis_ids = client.beacon.analyses \
.find_one(query, {"biosampleId": 1, "_id": 0})
query = {"caseLevelData.biosampleId": analysis_ids["biosampleId"]}
query = apply_filters(self, query, qparams.query.filters, collection, {})
targets = client.beacon.targets \
.find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0})
position=0
bioids=targets[0]["biosampleIds"]
for bioid in bioids:
if bioid == analysis_ids["biosampleId"]:
break
position+=1
position=str(position)
position1="^"+position+","
position2=","+position+","
position3=","+position+"$"
query_cl={ "$or": [
{"biosampleIds": {"$regex": position1}},
{"biosampleIds": {"$regex": position2}},
{"biosampleIds": {"$regex": position3}}
]}
string_of_ids = client.beacon.caseLevelData \
.find(query_cl, {"id": 1, "_id": 0})
HGVSIds=list(string_of_ids)
query={}
queryHGVS={}
listHGVS=[]
for HGVSId in HGVSIds:
justid=HGVSId["id"]
listHGVS.append(justid)
queryHGVS["$in"]=listHGVS
query["identifiers.genomicHGVSId"]=queryHGVS
query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.GENOMICVARIATIONS
include = qparams.query.include_resultset_responses
limit = qparams.query.pagination.limit
skip = qparams.query.pagination.skip
if limit > 100 or limit == 0:
limit = 100# pragma: no cover
idq="caseLevelData.biosampleId"
count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq)
return schema, count, dataset_count, docs, dataset
count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq)
42 changes: 34 additions & 8 deletions beacon/connections/mongo/biosamples.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def get_biosamples(self, entry_id: Optional[str], qparams: RequestParams, datase
elif query_parameters == {'$and': []}:# pragma: no cover
query_parameters = {}
query={}
query = apply_filters(self, query, qparams.query.filters, collection, query_parameters)
query = apply_filters(self, query, qparams.query.filters, collection, query_parameters, dataset)
schema = DefaultSchemas.BIOSAMPLES
include = qparams.query.include_resultset_responses
limit = qparams.query.pagination.limit
Expand All @@ -38,7 +38,7 @@ def get_biosamples(self, entry_id: Optional[str], qparams: RequestParams, datase
def get_biosample_with_id(self, entry_id: Optional[str], qparams: RequestParams, dataset: str):
collection = 'biosamples'
mongo_collection = client.beacon.biosamples
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset)
query = query_id(self, query, entry_id)
schema = DefaultSchemas.BIOSAMPLES
include = qparams.query.include_resultset_responses
Expand All @@ -54,8 +54,35 @@ def get_biosample_with_id(self, entry_id: Optional[str], qparams: RequestParams,
def get_variants_of_biosample(self, entry_id: Optional[str], qparams: RequestParams, dataset: str):
collection = 'g_variants'
mongo_collection = client.beacon.genomicVariations
query = {"caseLevelData.biosampleId": entry_id}
query = apply_filters(self, query, qparams.query.filters, collection, {})
targets = client.beacon.targets \
.find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0})
position=0
bioids=targets[0]["biosampleIds"]
for bioid in bioids:
if bioid == entry_id:
break
position+=1
position=str(position)
position1="^"+position+","
position2=","+position+","
position3=","+position+"$"
query_cl={ "$or": [
{"biosampleIds": {"$regex": position1}},
{"biosampleIds": {"$regex": position2}},
{"biosampleIds": {"$regex": position3}}
]}
string_of_ids = client.beacon.caseLevelData \
.find(query_cl, {"id": 1, "_id": 0})
HGVSIds=list(string_of_ids)
query={}
queryHGVS={}
listHGVS=[]
for HGVSId in HGVSIds:
justid=HGVSId["id"]
listHGVS.append(justid)
queryHGVS["$in"]=listHGVS
query["identifiers.genomicHGVSId"]=queryHGVS
query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.GENOMICVARIATIONS
include = qparams.query.include_resultset_responses
limit = qparams.query.pagination.limit
Expand All @@ -71,7 +98,7 @@ def get_analyses_of_biosample(self, entry_id: Optional[str], qparams: RequestPar
collection = 'biosamples'
mongo_collection = client.beacon.analyses
query = {"biosampleId": entry_id}
query = apply_filters(self, query, qparams.query.filters, collection, {})
query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.ANALYSES
include = qparams.query.include_resultset_responses
limit = qparams.query.pagination.limit
Expand All @@ -87,13 +114,12 @@ def get_runs_of_biosample(self, entry_id: Optional[str], qparams: RequestParams,
collection = 'biosamples'
mongo_collection = client.beacon.runs
query = {"individualId": entry_id}
query = apply_filters(self, query, qparams.query.filters, collection, {})
query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.RUNS
include = qparams.query.include_resultset_responses
limit = qparams.query.pagination.limit
skip = qparams.query.pagination.skip
if limit > 100 or limit == 0:
limit = 100# pragma: no cover
idq="biosampleId"
count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq)
return schema, count, dataset_count, docs, dataset
count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq)
24 changes: 12 additions & 12 deletions beacon/connections/mongo/cohorts.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
def get_cohorts(self, entry_id: Optional[str], qparams: RequestParams):
collection = 'cohorts'
limit = qparams.query.pagination.limit
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, "a")
schema = DefaultSchemas.COHORTS
count = get_count(self, client.beacon.cohorts, query)
docs = get_documents(self,
Expand All @@ -31,7 +31,7 @@ def get_cohorts(self, entry_id: Optional[str], qparams: RequestParams):
def get_cohort_with_id(self, entry_id: Optional[str], qparams: RequestParams):
collection = 'cohorts'
limit = qparams.query.pagination.limit
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, "a")
query = query_id(self, query, entry_id)
schema = DefaultSchemas.COHORTS
count = get_count(self, client.beacon.cohorts, query)
Expand All @@ -53,12 +53,12 @@ def get_individuals_of_cohort(self, entry_id: Optional[str], qparams: RequestPar
dataset_count=0
limit = qparams.query.pagination.limit
include = qparams.query.include_resultset_responses
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset)
query = query_id(self, query, entry_id)
count = get_count(self, client.beacon.cohorts, query)
dict_in={}
dict_in['datasetId']=dataset
query = apply_filters(self, dict_in, qparams.query.filters, collection, {})
query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset)

schema = DefaultSchemas.INDIVIDUALS
skip = qparams.query.pagination.skip
Expand All @@ -75,12 +75,12 @@ def get_analyses_of_cohort(self, entry_id: Optional[str], qparams: RequestParams
dataset_count=0
limit = qparams.query.pagination.limit
include = qparams.query.include_resultset_responses
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset)
query = query_id(self, query, entry_id)
count = get_count(self, client.beacon.cohorts, query)
dict_in={}
dict_in['datasetId']=dataset
query = apply_filters(self, dict_in, qparams.query.filters, collection, {})
query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.ANALYSES
skip = qparams.query.pagination.skip
if limit > 100 or limit == 0:
Expand All @@ -96,7 +96,7 @@ def get_variants_of_cohort(self,entry_id: Optional[str], qparams: RequestParams,
dataset_count=0
limit = qparams.query.pagination.limit
include = qparams.query.include_resultset_responses
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset)
query = query_id(self, query, entry_id)
count = get_count(self, client.beacon.cohorts, query)
query_count={}
Expand All @@ -117,7 +117,7 @@ def get_variants_of_cohort(self,entry_id: Optional[str], qparams: RequestParams,
else:
schema = DefaultSchemas.GENOMICVARIATIONS# pragma: no cover
return schema, 0, 0, None, dataset# pragma: no cover
query = apply_filters(self, query_count, qparams.query.filters, collection, {})
query = apply_filters(self, query_count, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.GENOMICVARIATIONS
skip = qparams.query.pagination.skip
if limit > 100 or limit == 0:
Expand All @@ -133,12 +133,12 @@ def get_runs_of_cohort(self, entry_id: Optional[str], qparams: RequestParams, da
dataset_count=0
limit = qparams.query.pagination.limit
include = qparams.query.include_resultset_responses
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset)
query = query_id(self, query, entry_id)
count = get_count(self, client.beacon.cohorts, query)
dict_in={}
dict_in['datasetId']=dataset
query = apply_filters(self, dict_in, qparams.query.filters, collection, {})
query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.RUNS
skip = qparams.query.pagination.skip
if limit > 100 or limit == 0:
Expand All @@ -154,12 +154,12 @@ def get_biosamples_of_cohort(self, entry_id: Optional[str], qparams: RequestPara
dataset_count=0
limit = qparams.query.pagination.limit
include = qparams.query.include_resultset_responses
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset)
query = query_id(self, query, entry_id)
count = get_count(self, client.beacon.cohorts, query)
dict_in={}
dict_in['datasetId']=dataset
query = apply_filters(self, dict_in, qparams.query.filters, collection, {})
query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.BIOSAMPLES
skip = qparams.query.pagination.skip
if limit > 100 or limit == 0:
Expand Down
18 changes: 9 additions & 9 deletions beacon/connections/mongo/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def get_variants_of_dataset(self, entry_id: Optional[str], qparams: RequestParam
else:
schema = DefaultSchemas.GENOMICVARIATIONS# pragma: no cover
return schema, 0, 0, None, dataset# pragma: no cover
query = apply_filters(self, query_count, qparams.query.filters, collection, {})
query = apply_filters(self, query_count, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.GENOMICVARIATIONS
include = qparams.query.include_resultset_responses
limit = qparams.query.pagination.limit
Expand All @@ -107,12 +107,12 @@ def get_biosamples_of_dataset(self, entry_id: Optional[str], qparams: RequestPar
mongo_collection = client.beacon.biosamples
dataset_count=0
limit = qparams.query.pagination.limit
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset)
query = query_id(self, query, entry_id)
count = get_count(self, client.beacon.datasets, query)
dict_in={}
dict_in['datasetId']=dataset
query = apply_filters(self, dict_in, qparams.query.filters, collection, {})
query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.BIOSAMPLES
include = qparams.query.include_resultset_responses
limit = qparams.query.pagination.limit
Expand All @@ -129,12 +129,12 @@ def get_individuals_of_dataset(self, entry_id: Optional[str], qparams: RequestPa
mongo_collection = client.beacon.individuals
dataset_count=0
limit = qparams.query.pagination.limit
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset)
query = query_id(self, query, entry_id)
count = get_count(self, client.beacon.datasets, query)
dict_in={}
dict_in['datasetId']=dataset
query = apply_filters(self, dict_in, qparams.query.filters, collection, {})
query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.INDIVIDUALS
include = qparams.query.include_resultset_responses
limit = qparams.query.pagination.limit
Expand All @@ -151,12 +151,12 @@ def get_runs_of_dataset(self, entry_id: Optional[str], qparams: RequestParams, d
mongo_collection = client.beacon.runs
dataset_count=0
limit = qparams.query.pagination.limit
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset)
query = query_id(self, query, entry_id)
count = get_count(self, client.beacon.datasets, query)
dict_in={}
dict_in['datasetId']=dataset
query = apply_filters(self, dict_in, qparams.query.filters, collection, {})
query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.RUNS
include = qparams.query.include_resultset_responses
limit = qparams.query.pagination.limit
Expand All @@ -177,12 +177,12 @@ def get_analyses_of_dataset(self, entry_id: Optional[str], qparams: RequestParam
mongo_collection = client.beacon.analyses
dataset_count=0
limit = qparams.query.pagination.limit
query = apply_filters(self, {}, qparams.query.filters, collection, {})
query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset)
query = query_id(self, query, entry_id)
count = get_count(self, client.beacon.datasets, query)
dict_in={}
dict_in['datasetId']=dataset
query = apply_filters(self, dict_in, qparams.query.filters, collection, {})
query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset)
schema = DefaultSchemas.ANALYSES
include = qparams.query.include_resultset_responses
limit = qparams.query.pagination.limit
Expand Down
2 changes: 1 addition & 1 deletion beacon/connections/mongo/extract_filtering_terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def insert_all_ontology_terms_used():
collections.remove('filtering_terms')
print("Collections:", collections)
for c_name in collections:
if c_name not in ['counts', 'similarities', 'synonyms']:
if c_name not in ['counts', 'similarities', 'synonyms', 'caseLevelData', 'targets']:
terms_ids = find_ontology_terms_used(c_name)
terms = get_filtering_object(terms_ids, c_name)
if len(terms) > 0:
Expand Down
Loading

0 comments on commit 18b1f3d

Please sign in to comment.