Skip to content

Commit

Permalink
Merge pull request #88 from FHIR/vcf2json
Browse files Browse the repository at this point in the history
Molec Conseq API bug fixes
  • Loading branch information
rhdolin authored Jun 11, 2024
2 parents c4208e6 + acd9094 commit 8fb3420
Show file tree
Hide file tree
Showing 25 changed files with 3,188 additions and 2,806 deletions.
57 changes: 21 additions & 36 deletions app/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ def get_liftover(from_db, to_db):
}

SUPPORTED_GENE_SYSTEM_URLS = r'^https?:\/\/www\.genenames.org\/geneId$'
# SUPPORTED_FEATURE_CONSEQUENCE_SYSTEM_URLS = r'^http?:\/\/www\.sequenceontology.org\/$'
SUPPORTED_FEATURE_CONSEQUENCE_SYSTEM_URLS = 'http://sequenceontology.org'

SUPPORTED_DATE_FORMAT = '%Y-%m-%d'

Expand Down Expand Up @@ -234,6 +236,23 @@ def get_gene(gene):
return gene_return


def get_feature_consequence(feature_consequence):
feature_consequence = feature_consequence.strip()
feature_consequence_return = {'isSystem': False, 'feature_consequence': feature_consequence, 'system': None}
if "|" in feature_consequence:
# if feature_consequence.count("|") == 1 and (re.match(SUPPORTED_FEATURE_CONSEQUENCE_SYSTEM_URLS, feature_consequence.rsplit('|')[0])):
if feature_consequence.count("|") == 1 and feature_consequence.rsplit('|')[0] == 'http://sequenceontology.org':
feature_consequence_system_url = feature_consequence.rsplit("|")[0]
feature_consequence = feature_consequence.rsplit("|")[1]
feature_consequence_return['isSystem'] = True
feature_consequence_return['feature_consequence'] = feature_consequence
feature_consequence_return['system'] = feature_consequence_system_url
else:
abort(400, f'feature_consequence ({feature_consequence}) is not in the correct format(codesystem|code)')

return feature_consequence_return


def get_haplotype(haplotype):
haplotype = haplotype.strip()
haplotype_return = {'isSystem': False, 'haplotype': haplotype, 'system': None}
Expand Down Expand Up @@ -714,7 +733,7 @@ def create_molecular_consequence_profile(molecular_consequence, subject, vids):
if 'impact' in molecular_consequence:
resource["component"].append({"code": {"coding": [{"system": "http://hl7.org/fhir/uv/genomics-reporting/CodeSystem/tbd-codes-cs",
"code": "functional-effect",
"display": "Functional Effectt"}]},
"display": "Functional Effect"}]},
"valueCodeableConcept": {"text": f"{molecular_consequence['impact']}"}})

return resource
Expand Down Expand Up @@ -1859,8 +1878,7 @@ def query_molecular_consequences_by_variants(normalized_variant_list, feature_co
if "GRCh38" in item:
variant_list.append(item["GRCh38"])

pipeline_part = [{'$match': {'$expr': {'$and': [{'$or': [{'$eq': ['$variantID', '$$myvariant_id']}]}]}}},
{'$addFields': {}}]
pipeline_part = [{'$match': {'$expr': {'$and': [{'$or': [{'$eq': ['$variantID', '$$myvariant_id']}]}]}}}]

if feature_consequence_list != []:
pipeline_part.append({'$match': {'$or': []}})
Expand All @@ -1875,46 +1893,13 @@ def query_molecular_consequences_by_variants(normalized_variant_list, feature_co
{'featureConsequence.display': {'$regex': ".*"+str(feature_consequence['feature_consequence']).replace('*', r'\*')+".*"}}
]})
pipeline_part[-1]['$match']['$or'] = or_query
pipeline_part.append({"$unwind": "$featureConsequence"})
pipeline_part.append({'$match': {'$or': or_query}})
pipeline_part.append({"$group": {
"patientID": {
"$first": "$$ROOT.patientID"
},
"variantID": {
"$first": "$$ROOT.variantID"
},
"transcriptRefSeq": {
"$first": "$$ROOT.transcriptRefSeq"
},
"MANE": {
"$first": "$$ROOT.MANE"
},
"source": {
"$first": "$$ROOT.source"
},
"cHGVS": {
"$first": "$$ROOT.cHGVS"
},
"pHGVS": {
"$first": "$$ROOT.pHGVS"
},
"featureConsequence": {
"$push": "$$ROOT.featureConsequence"
},
"impact": {
"$first": "$$ROOT.impact"
}
}})

query['SPDI'] = {'$in': variant_list}

query_string = [{'$match': query},
{'$lookup': {'from': 'MolecConseq', 'let': {'myvariant_id': '$_id'}, 'pipeline': pipeline_part,
'as': 'molecularConsequenceMatches'}},
{'$addFields': {}},
{'$match': {'molecularConsequenceMatches': {'$exists': True, '$not': {'$size': 0}}}}]

try:
results = variants_db.aggregate(query_string)
results = list(results)
Expand Down
9 changes: 4 additions & 5 deletions app/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -1161,7 +1161,7 @@ def find_subject_dx_implications(


def find_subject_molecular_consequences(
subject, variants=None, ranges=None, featureConsequence=None, testIdentifiers=None, testDateRange=None,
subject, variants=None, ranges=None, featureConsequences=None, testIdentifiers=None, testDateRange=None,
specimenIdentifiers=None, genomicSourceClass=None):

# Parameters
Expand All @@ -1175,8 +1175,8 @@ def find_subject_molecular_consequences(
abort(400, "You must supply either 'variants' or 'ranges'.")

normalized_feature_consequence_list = []
if featureConsequence:
normalized_feature_consequence_list = list(map(common.get_feature_consequence, featureConsequence))
if featureConsequences:
normalized_feature_consequence_list = list(map(common.get_feature_consequence, featureConsequences))

# Query
query = {}
Expand Down Expand Up @@ -1238,13 +1238,12 @@ def find_subject_molecular_consequences(

for res in query_results:
if res["molecularConsequenceMatches"]:
result["parameter"].append([])
for molecular_consequence in res["molecularConsequenceMatches"]:
parameter = OrderedDict()
parameter["name"] = "consequence"
molecular_consequence_profile = common.create_molecular_consequence_profile(molecular_consequence, subject, [str(res['_id'])])
parameter["resource"] = molecular_consequence_profile
result["parameter"][0].append(parameter)
result["parameter"].append(parameter)
ref_seq = common.get_ref_seq_by_chrom_and_build(res['genomicBuild'], res['CHROM'])
resource = common.create_fhir_variant_resource(res, ref_seq, subject)
variant_param = {
Expand Down
4 changes: 2 additions & 2 deletions tests/expected_outputs/find_subject_dx_implications/1.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
},
"derivedFrom": [
{
"reference": "Observation/dv-47e768d210124003ac7d6eec303cbf3e"
"reference": "Observation/dv-aad8639bd558470a9ca8612eae011065"
}
],
"component": [
Expand Down Expand Up @@ -100,7 +100,7 @@
"name": "variant",
"resource": {
"resourceType": "Observation",
"id": "dv-47e768d210124003ac7d6eec303cbf3e",
"id": "dv-aad8639bd558470a9ca8612eae011065",
"meta": {
"profile": [
"http://hl7.org/fhir/uv/genomics-reporting/StructureDefinition/variant"
Expand Down
88 changes: 44 additions & 44 deletions tests/expected_outputs/find_subject_dx_implications/2.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"name": "implication",
"resource": {
"resourceType": "Observation",
"id": "dv-62fab6475932091e78bff0a1",
"id": "dv-62fab6f25932091e78c68d2f",
"meta": {
"profile": [
"http://hl7.org/fhir/uv/genomics-reporting/StructureDefinition/diagnostic-implication"
Expand Down Expand Up @@ -35,7 +35,7 @@
},
"derivedFrom": [
{
"reference": "Observation/dv-0a1fc0f05e9a49779ac7195931024f24"
"reference": "Observation/dv-aad8639bd558470a9ca8612eae011065"
}
],
"component": [
Expand All @@ -53,8 +53,8 @@
"coding": [
{
"system": "http://loinc.org",
"code": "LA6675-8",
"display": "Benign"
"code": "LA26333-7",
"display": "Uncertain significance"
}
]
}
Expand Down Expand Up @@ -100,7 +100,7 @@
"name": "variant",
"resource": {
"resourceType": "Observation",
"id": "dv-0a1fc0f05e9a49779ac7195931024f24",
"id": "dv-aad8639bd558470a9ca8612eae011065",
"meta": {
"profile": [
"http://hl7.org/fhir/uv/genomics-reporting/StructureDefinition/variant"
Expand Down Expand Up @@ -192,8 +192,8 @@
"coding": [
{
"system": "http://loinc.org",
"code": "LA6705-3",
"display": "homozygous"
"code": "LA6706-1",
"display": "heterozygous"
}
]
}
Expand All @@ -212,16 +212,16 @@
"coding": [
{
"system": "https://api.ncbi.nlm.nih.gov/variation/v0/",
"code": "NC_000001.10:17380496:G:T",
"display": "NC_000001.10:17380496:G:T"
"code": "NC_000001.10:161333381:C:T",
"display": "NC_000001.10:161333381:C:T"
},
{
"system": "http://www.ncbi.nlm.nih.gov/clinvar",
"code": "44641"
"code": "873683"
},
{
"system": "http://www.ncbi.nlm.nih.gov/clinvar/scv",
"code": "SCV000351450.3"
"code": "SCV001252233.1"
}
]
}
Expand All @@ -237,7 +237,7 @@
]
},
"valueQuantity": {
"value": 1.0,
"value": 0.5,
"unit": "relative frequency of a particular allele in the specimen",
"system": "http://unitsofmeasure.org",
"code": "1"
Expand All @@ -253,7 +253,7 @@
}
]
},
"valueString": "G"
"valueString": "C"
},
{
"code": {
Expand Down Expand Up @@ -299,25 +299,9 @@
},
"valueRange": {
"low": {
"value": 17380496
"value": 161333381
}
}
},
{
"code": {
"coding": [
{
"system": "http://loinc.org",
"code": "92821-8",
"display": "Population allele frequency"
}
]
},
"valueQuantity": {
"value": 0.973659,
"system": "http://unitsofmeasure.org",
"code": "1"
}
}
]
}
Expand All @@ -326,7 +310,7 @@
"name": "implication",
"resource": {
"resourceType": "Observation",
"id": "dv-62fab6f25932091e78c68d2f",
"id": "dv-62fab6475932091e78bff0a1",
"meta": {
"profile": [
"http://hl7.org/fhir/uv/genomics-reporting/StructureDefinition/diagnostic-implication"
Expand Down Expand Up @@ -356,7 +340,7 @@
},
"derivedFrom": [
{
"reference": "Observation/dv-47e768d210124003ac7d6eec303cbf3e"
"reference": "Observation/dv-d9c5ad61266c4f3c90e44199a8864662"
}
],
"component": [
Expand All @@ -374,8 +358,8 @@
"coding": [
{
"system": "http://loinc.org",
"code": "LA26333-7",
"display": "Uncertain significance"
"code": "LA6675-8",
"display": "Benign"
}
]
}
Expand Down Expand Up @@ -421,7 +405,7 @@
"name": "variant",
"resource": {
"resourceType": "Observation",
"id": "dv-47e768d210124003ac7d6eec303cbf3e",
"id": "dv-d9c5ad61266c4f3c90e44199a8864662",
"meta": {
"profile": [
"http://hl7.org/fhir/uv/genomics-reporting/StructureDefinition/variant"
Expand Down Expand Up @@ -513,8 +497,8 @@
"coding": [
{
"system": "http://loinc.org",
"code": "LA6706-1",
"display": "heterozygous"
"code": "LA6705-3",
"display": "homozygous"
}
]
}
Expand All @@ -533,16 +517,16 @@
"coding": [
{
"system": "https://api.ncbi.nlm.nih.gov/variation/v0/",
"code": "NC_000001.10:161333381:C:T",
"display": "NC_000001.10:161333381:C:T"
"code": "NC_000001.10:17380496:G:T",
"display": "NC_000001.10:17380496:G:T"
},
{
"system": "http://www.ncbi.nlm.nih.gov/clinvar",
"code": "873683"
"code": "44641"
},
{
"system": "http://www.ncbi.nlm.nih.gov/clinvar/scv",
"code": "SCV001252233.1"
"code": "SCV000351450.3"
}
]
}
Expand All @@ -558,7 +542,7 @@
]
},
"valueQuantity": {
"value": 0.5,
"value": 1.0,
"unit": "relative frequency of a particular allele in the specimen",
"system": "http://unitsofmeasure.org",
"code": "1"
Expand All @@ -574,7 +558,7 @@
}
]
},
"valueString": "C"
"valueString": "G"
},
{
"code": {
Expand Down Expand Up @@ -620,9 +604,25 @@
},
"valueRange": {
"low": {
"value": 161333381
"value": 17380496
}
}
},
{
"code": {
"coding": [
{
"system": "http://loinc.org",
"code": "92821-8",
"display": "Population allele frequency"
}
]
},
"valueQuantity": {
"value": 0.973659,
"system": "http://unitsofmeasure.org",
"code": "1"
}
}
]
}
Expand Down
Loading

0 comments on commit 8fb3420

Please sign in to comment.