Skip to content

Commit

Permalink
feat!: use preferred formats for MappableConcept.mappings (#215)
Browse files Browse the repository at this point in the history
close #212

* `system` will use OBO Foundry persistent URL (PURL), source homepage,
or namespace prefix, in that order of preference, if available.
* `code` will use the `concept_id` as the CURIE
  • Loading branch information
korikuzma authored Dec 31, 2024
1 parent c666955 commit 7ce49c1
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 46 deletions.
66 changes: 46 additions & 20 deletions src/disease/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@
from disease import NAMESPACE_LOOKUP, PREFIX_LOOKUP, SOURCES_LOWER_LOOKUP, __version__
from disease.database.database import AbstractDatabase
from disease.schemas import (
NAMESPACE_TO_SYSTEM_URI,
SYSTEM_URI_TO_NAMESPACE,
Disease,
MatchType,
NamespacePrefix,
NormalizationService,
RefType,
SearchService,
Expand Down Expand Up @@ -299,19 +302,20 @@ def _add_merged_meta(self, response: dict) -> dict:
"""
sources_meta = {}
disease = response["disease"]
sources = [response["normalized_id"].split(":")[0]]
if disease.mappings:
sources += [m.coding.system for m in disease.mappings]

sources = []
concept_id_source = response["normalized_id"].split(":")[0]
if concept_id_source in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[concept_id_source])

for m in disease.mappings or []:
ns = SYSTEM_URI_TO_NAMESPACE.get(m.coding.system, "").lower()
if ns in PREFIX_LOOKUP:
sources.append(PREFIX_LOOKUP[ns])

for src in sources:
try:
src_name = PREFIX_LOOKUP[src]
except KeyError:
# not an imported source
continue
else:
if src_name not in sources_meta:
sources_meta[src_name] = self.db.get_source_metadata(src_name)
if src not in sources_meta:
sources_meta[src] = self.db.get_source_metadata(src)
response["source_meta_"] = sources_meta
return response

Expand All @@ -325,6 +329,36 @@ def _add_disease(
:param match_type: type of match achieved
:return: completed normalized response object ready to return to user
"""

def _create_concept_mapping(
concept_id: str, relation: Relation = Relation.RELATED_MATCH
) -> ConceptMapping:
"""Create concept mapping for identifier
``system`` will use OBO Foundry persistent URL (PURL), source homepage, or
namespace prefix, in that order of preference, if available.
:param concept_id: Concept identifier represented as a curie
:param relation: SKOS mapping relationship, default is relatedMatch
:return: Concept mapping for identifier
"""
source = concept_id.split(":")[0]

try:
source = NamespacePrefix(source)
except ValueError:
try:
source = NamespacePrefix(source.upper())
except ValueError as e:
err_msg = f"Namespace prefix not supported: {source}"
raise ValueError(err_msg) from e

system = NAMESPACE_TO_SYSTEM_URI.get(source, source)

return ConceptMapping(
coding=Coding(code=code(concept_id), system=system), relation=relation
)

disease_obj = MappableConcept(
id=f"normalize.disease.{record['concept_id']}",
conceptType="Disease",
Expand All @@ -333,15 +367,7 @@ def _add_disease(
)

source_ids = record.get("xrefs", []) + record.get("associated_with", [])
mappings = []
for source_id in source_ids:
system, source_code = source_id.split(":")
mappings.append(
ConceptMapping(
coding=Coding(code=code(source_code), system=system.lower()),
relation=Relation.RELATED_MATCH,
)
)
mappings = [_create_concept_mapping(source_id) for source_id in source_ids]
if mappings:
disease_obj.mappings = mappings

Expand Down
69 changes: 58 additions & 11 deletions src/disease/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,41 @@ class NamespacePrefix(Enum):
WIKIDATA = "wikidata"


# Source to URI. Will use OBO Foundry persistent URL (PURL) or source homepage
NAMESPACE_TO_SYSTEM_URI: dict[NamespacePrefix, str] = {
NamespacePrefix.NCIT: "http://purl.obolibrary.org/obo/ncit.owl",
NamespacePrefix.MONDO: "http://purl.obolibrary.org/obo/mondo.owl",
NamespacePrefix.DO: "http://purl.obolibrary.org/obo/doid.owl",
NamespacePrefix.DOID: "http://purl.obolibrary.org/obo/doid.owl",
NamespacePrefix.OMIM: "https://www.omim.org",
NamespacePrefix.ONCOTREE: "https://oncotree.mskcc.org",
NamespacePrefix.COHD: "https://cohd.io",
NamespacePrefix.DECIPHER: "https://www.deciphergenomics.org",
NamespacePrefix.EFO: "https://www.ebi.ac.uk/efo/",
NamespacePrefix.GARD: "https://rarediseases.info.nih.gov",
NamespacePrefix.HP: "http://purl.obolibrary.org/obo/hp.owl",
NamespacePrefix.HPO: "http://purl.obolibrary.org/obo/hp.owl",
NamespacePrefix.ICD11: "https://icd.who.int/en/",
NamespacePrefix.ICDO: "https://www.who.int/standards/classifications/other-classifications/international-classification-of-diseases-for-oncology/",
NamespacePrefix.KEGG: "https://www.genome.jp/kegg/disease/",
NamespacePrefix.MEDDRA: "https://www.meddra.org",
NamespacePrefix.MEDGEN: "https://www.ncbi.nlm.nih.gov/medgen/",
NamespacePrefix.MESH: "https://id.nlm.nih.gov/mesh/",
NamespacePrefix.MP: "http://purl.obolibrary.org/obo/mp.owl",
NamespacePrefix.OBI: "http://purl.obolibrary.org/obo/obi.owl",
NamespacePrefix.ORPHANET: "https://www.orpha.net",
NamespacePrefix.PATO: "http://purl.obolibrary.org/obo/pato.owl",
NamespacePrefix.UMLS: "https://www.nlm.nih.gov/research/umls/index.html",
NamespacePrefix.WIKIPEDIA: "https://en.wikipedia.org",
NamespacePrefix.WIKIDATA: "https://www.wikidata.org",
}

# URI to source
SYSTEM_URI_TO_NAMESPACE = {
system_uri: ns.value for ns, system_uri in NAMESPACE_TO_SYSTEM_URI.items()
}


class SourcePriority(IntEnum):
"""Define priorities for sources in building merged concepts."""

Expand Down Expand Up @@ -275,30 +310,42 @@ class NormalizationService(BaseModel):
"normalized_id": "ncit:C4989",
"disease": {
"id": "normalize.disease.ncit:C4989",
"type": "Disease",
"conceptType": "Disease",
"label": "Childhood Leukemia",
"aliases": [
"childhood leukemia (disease)",
"leukemia",
"pediatric leukemia (disease)",
"Leukemia",
"leukemia (disease) of childhood",
],
"mappings": [
{
"coding": {"code": "0004355", "system": "mondo"},
"coding": {
"code": "mondo:0004355",
"system": "http://purl.obolibrary.org/obo/mondo.owl",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "7757", "system": "doid"},
"coding": {
"code": "DOID:7757",
"system": "http://purl.obolibrary.org/obo/doid.owl",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "C1332977", "system": "umls"},
"coding": {
"code": "umls:C1332977",
"system": "https://www.nlm.nih.gov/research/umls/index.html",
},
"relation": "relatedMatch",
},
],
"extensions": [
{
"name": "aliases",
"value": [
"childhood leukemia (disease)",
"leukemia",
"pediatric leukemia (disease)",
"Leukemia",
"leukemia (disease) of childhood",
],
},
{
"name": "pediatric_disease",
"value": True,
Expand Down
72 changes: 57 additions & 15 deletions tests/unit/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,47 +24,80 @@ def neuroblastoma():
label="Neuroblastoma",
mappings=[
{
"coding": {"code": "0005072", "system": "mondo"},
"coding": {
"code": "mondo:0005072",
"system": "http://purl.obolibrary.org/obo/mondo.owl",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "NBL", "system": "oncotree"},
"coding": {
"code": "oncotree:NBL",
"system": "https://oncotree.mskcc.org",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "769", "system": "doid"},
"coding": {
"code": "DOID:769",
"system": "http://purl.obolibrary.org/obo/doid.owl",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "C0027819", "system": "umls"},
"coding": {
"code": "umls:C0027819",
"system": "https://www.nlm.nih.gov/research/umls/index.html",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "9500/3", "system": "icdo"},
"coding": {
"code": "icdo:9500/3",
"system": "https://www.who.int/standards/classifications/other-classifications/international-classification-of-diseases-for-oncology/",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "0000621", "system": "efo"},
"coding": {
"code": "efo:0000621",
"system": "https://www.ebi.ac.uk/efo/",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "7185", "system": "gard"},
"coding": {
"code": "gard:7185",
"system": "https://rarediseases.info.nih.gov",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "D009447", "system": "mesh"},
"coding": {
"code": "mesh:D009447",
"system": "https://id.nlm.nih.gov/mesh/",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "635", "system": "orphanet"},
"coding": {
"code": "orphanet:635",
"system": "https://www.orpha.net",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "C2751421", "system": "umls"},
"coding": {
"code": "umls:C2751421",
"system": "https://www.nlm.nih.gov/research/umls/index.html",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "18012", "system": "medgen"},
"coding": {
"code": "medgen:18012",
"system": "https://www.ncbi.nlm.nih.gov/medgen/",
},
"relation": "relatedMatch",
},
],
Expand Down Expand Up @@ -112,19 +145,28 @@ def mafd2():
label="major affective disorder 2",
mappings=[
{
"coding": {"code": "309200", "system": "mim"},
"coding": {"code": "MIM:309200", "system": "https://www.omim.org"},
"relation": "relatedMatch",
},
{
"coding": {"code": "C564108", "system": "mesh"},
"coding": {
"code": "mesh:C564108",
"system": "https://id.nlm.nih.gov/mesh/",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "326975", "system": "medgen"},
"coding": {
"code": "medgen:326975",
"system": "https://www.ncbi.nlm.nih.gov/medgen/",
},
"relation": "relatedMatch",
},
{
"coding": {"code": "C1839839", "system": "umls"},
"coding": {
"code": "umls:C1839839",
"system": "https://www.nlm.nih.gov/research/umls/index.html",
},
"relation": "relatedMatch",
},
],
Expand Down

0 comments on commit 7ce49c1

Please sign in to comment.