Skip to content

Commit

Permalink
feat!: remove normalized_id and leverage `MappableConcept.primaryCo…
Browse files Browse the repository at this point in the history
…de` (#216)

close #213

* also updates mappings to include exactMatch relation for merged
concept identifier
  • Loading branch information
korikuzma authored Dec 31, 2024
1 parent 7ce49c1 commit a15eaeb
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 20 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ $ curl -s 'https://normalize.cancervariants.org/disease/normalize?q=liver%20canc
"query": "liver cancer",
"warnings": null,
"match_type": 80,
"normalized_id": "ncit:C34803",
"disease": {
"type": "Disease",
"conceptType": "Disease",
"primaryCode": "ncit:C34803",
"id": "normalize.disease:liver%20cancer",
"label": "Primary Malignant Liver Neoplasm",
# ...
Expand All @@ -55,7 +55,7 @@ Or utilize the [Python API](https://disease-normalizer.readthedocs.io/latest/ref
>>> from disease.database import create_db
>>> q = QueryHandler(create_db())
>>> result = q.normalize("NSCLC")
>>> result.normalized_id
>>> result.disease.primaryCode.root
'ncit:C2926'
```

Expand Down
8 changes: 4 additions & 4 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ A `public REST instance of the service <https://normalize.cancervariants.org/dis
>>> import requests
>>> result = requests.get("https://normalize.cancervariants.org/disease/normalize?q=nsclc").json()
>>> result["normalized_id"]
>>> result["disease"]["primaryCode"]
'ncit:C2926'
>>> result["disease"]["aliases"][:5]
>>> next(ext for ext in result["disease"]["extensions"] if ext["name"] == "aliases")["value"][:5]
['Non-Small Cell Carcinoma of Lung', 'NSCLC - non-small cell lung cancer', 'Non-small cell lung cancer', 'Non-Small Cell Carcinoma of the Lung', 'non-small cell cancer of the lung']
The Disease Normalizer can also be installed locally as a Python package for fast access:
Expand All @@ -37,9 +37,9 @@ The Disease Normalizer can also be installed locally as a Python package for fas
>>> from disease.database import create_db
>>> q = QueryHandler(create_db())
>>> result = q.normalize("nsclc")
>>> result.normalized_id
>>> result.disease.primaryCode.root
'ncit:C2926'
>>> result.disease.aliases[:5]
>>> next(ext for ext in result.disease.extensions if ext.name == "aliases").value[:5]
['Non-Small Cell Carcinoma of Lung', 'NSCLC - non-small cell lung cancer', 'Non-small cell lung cancer', 'Non-Small Cell Carcinoma of the Lung', 'non-small cell cancer of the lung']
The Disease Normalizer was created to support the `Knowledgebase Integration Project <https://cancervariants.org/projects/integration/>`_ of the `Variant Interpretation for Cancer Consortium (VICC) <https://cancervariants.org/>`_. It is developed primarily by the `Wagner Lab <https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab>`_. Full source code is available on `GitHub <https://github.com/cancervariants/disease-normalization>`_.
Expand Down
2 changes: 1 addition & 1 deletion docs/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ The beginning of the response to a GET request to http://localhost:5000/disease/
{
"query": "nsclc",
"match_type": 60,
"normalized_id": "ncit:C2926",
"disease": {
"id": "normalize.disease.ncit:C2926",
"primaryCode": "ncit:C2926",
"label": "Lung Non-Small Cell Carcinoma",
...
Expand Down
14 changes: 7 additions & 7 deletions src/disease/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,6 @@ def _add_merged_meta(self, response: dict) -> dict:
disease = response["disease"]

sources = []
concept_id_source = response["normalized_id"].split(":")[0]
if concept_id_source in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[concept_id_source])

for m in disease.mappings or []:
ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system, "").lower()
if ns in PREFIX_LOOKUP:
Expand Down Expand Up @@ -361,13 +357,18 @@ def _create_concept_mapping(

disease_obj = MappableConcept(
id=f"normalize.disease.{record['concept_id']}",
primaryCode=code(root=record["concept_id"]),
conceptType="Disease",
label=record["label"],
extensions=[],
)

# mappings
mappings = [
_create_concept_mapping(record["concept_id"], relation=Relation.EXACT_MATCH)
]
source_ids = record.get("xrefs", []) + record.get("associated_with", [])
mappings = [_create_concept_mapping(source_id) for source_id in source_ids]
mappings.extend(_create_concept_mapping(source_id) for source_id in source_ids)
if mappings:
disease_obj.mappings = mappings

Expand All @@ -378,7 +379,6 @@ def _create_concept_mapping(

response["match_type"] = match_type
response["disease"] = disease_obj
response["normalized_id"] = record["concept_id"]
response = self._add_merged_meta(response)
return NormalizationService(**response)

Expand Down Expand Up @@ -432,7 +432,7 @@ def normalize(self, query: str) -> NormalizationService:
>>> from disease.database import create_db
>>> q = QueryHandler(create_db())
>>> result = q.normalize("NSCLC")
>>> result.normalized_id
>>> result.disease.primaryCode.root
'ncit:C2926'
:param query: String to find normalized concept for
Expand Down
10 changes: 8 additions & 2 deletions src/disease/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,6 @@ class NormalizationService(BaseModel):
query: StrictStr
warnings: dict | None = None
match_type: MatchType
normalized_id: str | None = None
disease: MappableConcept | None = None
source_meta_: dict[SourceName, SourceMeta] | None = None
service_meta_: ServiceMeta
Expand All @@ -307,12 +306,19 @@ class NormalizationService(BaseModel):
"query": "childhood leukemia",
"warnings": None,
"match_type": 80,
"normalized_id": "ncit:C4989",
"disease": {
"id": "normalize.disease.ncit:C4989",
"primaryCode": "ncit:C4989",
"conceptType": "Disease",
"label": "Childhood Leukemia",
"mappings": [
{
"coding": {
"code": "ncit:C4989",
"system": "https://www.ebi.ac.uk/ols4/ontologies/ncit/classes?short_form=NCIT_",
},
"relation": "exactMatch",
},
{
"coding": {
"code": "mondo:0004355",
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_normalize(api_client):
"""Test /normalize endpoint."""
response = api_client.get("/disease/normalize?q=neuroblastoma")
assert response.status_code == 200
assert response.json()["normalized_id"] == "ncit:C3270"
assert response.json()["disease"]["primaryCode"] == "ncit:C3270"

response = api_client.get("/disease/normalize")
assert response.status_code == 422
30 changes: 28 additions & 2 deletions tests/unit/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from datetime import datetime

import pytest
from ga4gh.core.models import Extension, MappableConcept
from ga4gh.core.models import Extension, MappableConcept, code

from disease.query import InvalidParameterException, QueryHandler
from disease.schemas import MatchType, SourceName
Expand All @@ -21,8 +21,16 @@ def neuroblastoma():
return MappableConcept(
conceptType="Disease",
id="normalize.disease.ncit:C3270",
primaryCode=code(root="ncit:C3270"),
label="Neuroblastoma",
mappings=[
{
"coding": {
"code": "ncit:C3270",
"system": "http://purl.obolibrary.org/obo/ncit.owl",
},
"relation": "exactMatch",
},
{
"coding": {
"code": "mondo:0005072",
Expand Down Expand Up @@ -129,7 +137,17 @@ def skin_myo():
return MappableConcept(
conceptType="Disease",
id="normalize.disease.ncit:C167370",
primaryCode=code(root="ncit:C167370"),
label="Skin Myoepithelioma",
mappings=[
{
"coding": {
"code": "ncit:C167370",
"system": "http://purl.obolibrary.org/obo/ncit.owl",
},
"relation": "exactMatch",
},
],
extensions=[Extension(name="aliases", value=["Cutaneous Myoepithelioma"])],
)

Expand All @@ -142,8 +160,16 @@ def mafd2():
return MappableConcept(
conceptType="Disease",
id="normalize.disease.mondo:0010648",
primaryCode=code(root="mondo:0010648"),
label="major affective disorder 2",
mappings=[
{
"coding": {
"code": "mondo:0010648",
"system": "http://purl.obolibrary.org/obo/mondo.owl",
},
"relation": "exactMatch",
},
{
"coding": {"code": "MIM:309200", "system": "https://www.omim.org"},
"relation": "relatedMatch",
Expand Down Expand Up @@ -196,7 +222,7 @@ def mafd2():

def compare_disease(actual, fixture):
"""Verify correctness of returned Disease core object against test fixture."""
assert actual.normalized_id == fixture.id.split("normalize.disease.")[-1]
assert actual.disease.primaryCode.root == fixture.id.split("normalize.disease.")[-1]
actual = actual.disease
actual_keys = actual.model_dump(exclude_none=True).keys()
fixture_keys = fixture.model_dump(exclude_none=True).keys()
Expand Down

0 comments on commit a15eaeb

Please sign in to comment.