From 0c9b35a6c823a7ac7472121f00c74605be92badf Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Wed, 26 Jul 2023 15:15:21 -0700 Subject: [PATCH 01/12] handing to handle more types of publications --- src/transformers/transformer.ts | 46 ++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/src/transformers/transformer.ts b/src/transformers/transformer.ts index adb7fc3..ef7c269 100644 --- a/src/transformers/transformer.ts +++ b/src/transformers/transformer.ts @@ -54,20 +54,42 @@ export default class BaseTransformer { } _updatePublications(mappedResponse: any) { - if ("pubmed" in mappedResponse) { - mappedResponse.pubmed = toArray(mappedResponse.pubmed); - mappedResponse.publications = mappedResponse.pubmed.map(item => - typeof item === "string" && item.toUpperCase().startsWith("PMID:") ? item.toUpperCase() : "PMID:" + item, - ); - delete mappedResponse.pubmed; + if (!Array.isArray(mappedResponse.publications)) { + mappedResponse.publications = []; } - if ("pmc" in mappedResponse) { - mappedResponse.pmc = toArray(mappedResponse.pmc); - mappedResponse.publications = mappedResponse.pmc.map(item => - typeof item === "string" && item.toUpperCase().startsWith("PMC:") ? item.toUpperCase() : "PMC:" + item, - ); - delete mappedResponse.pmc; + + const publicationTypes = [ + {prop: "ref_pmid", prefix: "PMID:"}, + {prop: "ref_url", prefix: ""}, + {prop: "ref_pmcid", prefix: "PMCID:"}, + {prop: "ref_clinicaltrials", prefix: "clinicaltrials:"}, + {prop: "ref_doi", prefix: "doi:"}, + {prop: "ref_isbn", prefix: "isbn:"} + ] + + for (let publicationType of publicationTypes) { + if (publicationType.prop in mappedResponse) { + for (let publication of toArray(mappedResponse[publicationType.prop])) { + if (typeof publication !== "string" || publication.length === 0) { + continue; + } + + if (publication.toUpperCase().startsWith(publicationType.prefix.toUpperCase())) { + mappedResponse.publications.push(publicationType.prefix + publication.slice(publicationType.prefix.length)); + } + else { + mappedResponse.publications.push(publicationType.prefix + publication); + } + } + + delete mappedResponse[publicationType.prop]; + } + } + + if (mappedResponse.publications.length === 0) { + delete mappedResponse.publications; } + return mappedResponse; } From 991683fda900d94a1e93a05cbae6953283b47523 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 28 Jul 2023 13:54:00 -0700 Subject: [PATCH 02/12] url -> curie if applicable for publications --- src/transformers/transformer.ts | 49 +++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/src/transformers/transformer.ts b/src/transformers/transformer.ts index ef7c269..101b156 100644 --- a/src/transformers/transformer.ts +++ b/src/transformers/transformer.ts @@ -59,14 +59,51 @@ export default class BaseTransformer { } const publicationTypes = [ - {prop: "ref_pmid", prefix: "PMID:"}, - {prop: "ref_url", prefix: ""}, - {prop: "ref_pmcid", prefix: "PMCID:"}, - {prop: "ref_clinicaltrials", prefix: "clinicaltrials:"}, - {prop: "ref_doi", prefix: "doi:"}, - {prop: "ref_isbn", prefix: "isbn:"} + {prop: "ref_pmid", prefix: "PMID:", urls: ["http://www.ncbi.nlm.nih.gov/pubmed/", "http://europepmc.org/abstract/MED/"]}, + {prop: "ref_pmcid", prefix: "PMCID:", urls: ["http://www.ncbi.nlm.nih.gov/pmc/articles/", "http://europepmc.org/articles/"]}, + {prop: "ref_clinicaltrials", prefix: "clinicaltrials:", urls: ["https://clinicaltrials.gov/ct2/show/"]}, + {prop: "ref_doi", prefix: "doi:", urls: ["https://doi.org/", "http://www.nejm.org/doi/full/", "https://www.tandfonline.com/doi/abs/", "http://onlinelibrary.wiley.com/doi/"]}, + {prop: "ref_isbn", prefix: "isbn:", urls: ["https://www.isbn-international.org/identifier/"]} ] + // handle URLs (which could be CURIEs) + if ("ref_url" in mappedResponse) { + for (let publication of toArray(mappedResponse.ref_url)) { + if (typeof publication !== "string" || publication.length === 0) { + continue; + } + + let isCurie = false; + for (let publicationType of publicationTypes) { + for (let url of publicationType.urls) { + if (publication.startsWith(url)) { + isCurie = true; + + if (!mappedResponse[publicationType.prop]) { + mappedResponse[publicationType.prop] = []; + } + else if (!Array.isArray(mappedResponse[publicationType.prop])) { + mappedResponse[publicationType.prop] = toArray(mappedResponse[publicationType.prop]); + } + + mappedResponse[publicationType.prop].push(publication.slice(url.length)); + + break; + } + } + + if (isCurie) { + break; + } + } + + if (!isCurie) { + mappedResponse.publications.push(publication); + } + } + } + delete mappedResponse.ref_url; + for (let publicationType of publicationTypes) { if (publicationType.prop in mappedResponse) { for (let publication of toArray(mappedResponse[publicationType.prop])) { From e041731ffc792f84326b9578342ba048cdebf6cf Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 28 Jul 2023 13:54:21 -0700 Subject: [PATCH 03/12] Fix biolink transformer for publications --- src/transformers/biolink_transformer.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/transformers/biolink_transformer.ts b/src/transformers/biolink_transformer.ts index 29bef56..d1d6b12 100644 --- a/src/transformers/biolink_transformer.ts +++ b/src/transformers/biolink_transformer.ts @@ -15,12 +15,18 @@ export default class BiolinkTransformer extends BaseTransformer { rec['object'][prefix] = rec.object.id; } } - if (rec.publications === undefined || rec.publications.length === 0 || !(rec.publications[0]['id'].startsWith("PMID"))) { + if (rec.publications === undefined || rec.publications.length === 0) { delete rec.publications } else { - rec.publications = rec.publications.map(pub => { - return { "id": pub.id.split(':').slice(-1)[0] } - }) + const oldPublications = rec.publications; + rec.publications = []; + for (let oldPub of oldPublications) { + if (!oldPub?.id?.startsWith?.("PMID:")) { + continue; + } + + rec.publications.push({ id: oldPub.id.split(':').slice(-1)[0] }); + } } if (!("provided_by" in rec)) { delete rec.provided_by From f7b3961b48611951b04b0885358854d1f9c7bc61 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 28 Jul 2023 14:04:10 -0700 Subject: [PATCH 04/12] update tests for new publication format --- __test__/base_transformer.test.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/__test__/base_transformer.test.ts b/__test__/base_transformer.test.ts index aa0bf0b..369f220 100644 --- a/__test__/base_transformer.test.ts +++ b/__test__/base_transformer.test.ts @@ -57,41 +57,41 @@ describe("test base transformer", () => { test("Test _updatePublications function if pubmed id is prefixed", () => { const tf = new base_tf(input, {}); const fake = { - pubmed: "PMID:1233" + ref_pmid: "PMID:1233" } const res = tf._updatePublications(fake); - expect(res).not.toHaveProperty('pubmed'); + expect(res).not.toHaveProperty('ref_pmid'); expect(res.publications).toEqual(["PMID:1233"]); }) test("Test _updatePublications function if pubmed id is NOT prefixed", () => { const tf = new base_tf(input, {}); const fake = { - pubmed: 1233 + ref_pmid: 1233 } const res = tf._updatePublications(fake); - expect(res).not.toHaveProperty('pubmed'); + expect(res).not.toHaveProperty('ref_pmid'); expect(res.publications).toEqual(["PMID:1233"]) }) test("Test _updatePublications function if pmc id is prefixed", () => { const tf = new base_tf(input, {}); const fake = { - pmc: "PMC:1233" + ref_pmcid: "PMC:1233" } const res = tf._updatePublications(fake); - expect(res).not.toHaveProperty('pmc'); - expect(res.publications).toEqual(["PMC:1233"]); + expect(res).not.toHaveProperty('ref_pmcid'); + expect(res.publications).toEqual(["PMCID:1233"]); }) test("Test _updatePublications function if pmc id is NOT prefixed", () => { const tf = new base_tf(input, {}); const fake = { - pmc: 123 + ref_pmcid: 123 } const res = tf._updatePublications(fake); - expect(res).not.toHaveProperty('pmc'); - expect(res.publications).toEqual(["PMC:123"]) + expect(res).not.toHaveProperty('ref_pmcid'); + expect(res.publications).toEqual(["PMCID:123"]) }) test("Test extractObjectIDs function if output id type not in result", () => { From 077c085216284e7d12ddf2bbbd6337e3f711bdc8 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 28 Jul 2023 14:05:30 -0700 Subject: [PATCH 05/12] convert numbered publications to strings --- src/transformers/transformer.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/transformers/transformer.ts b/src/transformers/transformer.ts index 101b156..3fdcac3 100644 --- a/src/transformers/transformer.ts +++ b/src/transformers/transformer.ts @@ -107,6 +107,11 @@ export default class BaseTransformer { for (let publicationType of publicationTypes) { if (publicationType.prop in mappedResponse) { for (let publication of toArray(mappedResponse[publicationType.prop])) { + // handle numbers + if (typeof publication === "number") { + publication = publication.toString(); + } + if (typeof publication !== "string" || publication.length === 0) { continue; } From 2851686b7f0c3aacc55342173734426482a77d38 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 28 Jul 2023 14:11:48 -0700 Subject: [PATCH 06/12] update more tests for new publication system --- __test__/base_transformer.test.ts | 2 +- __test__/biothings_transformer.test.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/__test__/base_transformer.test.ts b/__test__/base_transformer.test.ts index 369f220..95172bd 100644 --- a/__test__/base_transformer.test.ts +++ b/__test__/base_transformer.test.ts @@ -77,7 +77,7 @@ describe("test base transformer", () => { test("Test _updatePublications function if pmc id is prefixed", () => { const tf = new base_tf(input, {}); const fake = { - ref_pmcid: "PMC:1233" + ref_pmcid: "PMCID:1233" } const res = tf._updatePublications(fake); expect(res).not.toHaveProperty('ref_pmcid'); diff --git a/__test__/biothings_transformer.test.ts b/__test__/biothings_transformer.test.ts index 6308e39..93821c4 100644 --- a/__test__/biothings_transformer.test.ts +++ b/__test__/biothings_transformer.test.ts @@ -58,7 +58,7 @@ describe("test biothings transformer", () => { let tf = new biothings_tf(input, {}); let res = await tf.transform(); expect(res).toHaveLength(27); - expect(res[0]).not.toHaveProperty('pubmed'); + expect(res[0]).not.toHaveProperty('ref_pmid'); expect(res[0]).toHaveProperty('publications', ["PMID:21873635"]); }) }) From 9ca7ed7d14fbbc660a0394dfc1e094567075c7eb Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 4 Aug 2023 15:08:17 -0700 Subject: [PATCH 07/12] handle missing id, multiple edges per result --- src/transformers/trapi_transformer.ts | 28 ++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/transformers/trapi_transformer.ts b/src/transformers/trapi_transformer.ts index cbce358..f1a3d95 100644 --- a/src/transformers/trapi_transformer.ts +++ b/src/transformers/trapi_transformer.ts @@ -11,19 +11,21 @@ export default class TRAPITransformer extends BaseTransformer { ) { this.data.response.message.results.forEach(result => { result.analyses.forEach(analysis => { - const edgeID = analysis.edge_bindings.e01[0].id; - const edge = - "message" in this.data.response ? this.data.response.message.knowledge_graph.edges[edgeID] : undefined; - const edgeHasSupportGraph = edge.attributes.some(attribute => { - if (attribute.attribute_type_id === "biolink:support_graphs" && attribute.value?.length) { - return true; - } - }); - if (edgeHasSupportGraph) return; - edges[edgeID] = { - subject: result.node_bindings.n0[0].id, - object: result.node_bindings.n1[0].id, - }; + analysis?.edge_bindings?.e01?.forEach(binding => { + const edgeID = binding?.id; + const edge = + "message" in this.data.response && edgeID ? this.data.response.message.knowledge_graph.edges[edgeID] : undefined; + const edgeHasSupportGraph = edge.attributes.some(attribute => { + if (attribute.attribute_type_id === "biolink:support_graphs" && attribute.value?.length) { + return true; + } + }); + if (edgeHasSupportGraph || !edgeID) return; + edges[edgeID] = { + subject: result.node_bindings.n0[0].id, + object: result.node_bindings.n1[0].id, + }; + }) }); }); } From 10a7083d9145386c3d3e22b421675b38c47cac46 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 4 Aug 2023 15:17:14 -0700 Subject: [PATCH 08/12] use kg to determine subject/object in trapi --- src/transformers/trapi_transformer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/trapi_transformer.ts b/src/transformers/trapi_transformer.ts index f1a3d95..350ca76 100644 --- a/src/transformers/trapi_transformer.ts +++ b/src/transformers/trapi_transformer.ts @@ -22,8 +22,8 @@ export default class TRAPITransformer extends BaseTransformer { }); if (edgeHasSupportGraph || !edgeID) return; edges[edgeID] = { - subject: result.node_bindings.n0[0].id, - object: result.node_bindings.n1[0].id, + subject: this.data.response.message.knowledge_graph.edges[edgeID].subject ?? result.node_bindings.n0[0].id, + object: this.data.response.message.knowledge_graph.edges[edgeID].object ?? result.node_bindings.n1[0].id, }; }) }); From b5bd2ce8808db8b837422b9dc0623067749b0699 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 4 Aug 2023 15:28:41 -0700 Subject: [PATCH 09/12] Fix types on trapi transfomre --- src/transformers/trapi_transformer.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/transformers/trapi_transformer.ts b/src/transformers/trapi_transformer.ts index 350ca76..402f848 100644 --- a/src/transformers/trapi_transformer.ts +++ b/src/transformers/trapi_transformer.ts @@ -1,5 +1,6 @@ import BaseTransformer from "./transformer"; import { Record } from "../record"; +import { JSONDoc } from "../json_transform/types"; export default class TRAPITransformer extends BaseTransformer { _getUniqueEdges() { @@ -22,8 +23,8 @@ export default class TRAPITransformer extends BaseTransformer { }); if (edgeHasSupportGraph || !edgeID) return; edges[edgeID] = { - subject: this.data.response.message.knowledge_graph.edges[edgeID].subject ?? result.node_bindings.n0[0].id, - object: this.data.response.message.knowledge_graph.edges[edgeID].object ?? result.node_bindings.n1[0].id, + subject: (this.data.response as JSONDoc).message.knowledge_graph.edges[edgeID].subject ?? result.node_bindings.n0[0].id, + object: (this.data.response as JSONDoc).message.knowledge_graph.edges[edgeID].object ?? result.node_bindings.n1[0].id, }; }) }); From 38a24132734fb068b58a449d3671c9120eb8e894 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Thu, 10 Aug 2023 10:22:37 -0700 Subject: [PATCH 10/12] Add base urls --- src/transformers/transformer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/transformer.ts b/src/transformers/transformer.ts index 3fdcac3..21e3dcb 100644 --- a/src/transformers/transformer.ts +++ b/src/transformers/transformer.ts @@ -59,9 +59,9 @@ export default class BaseTransformer { } const publicationTypes = [ - {prop: "ref_pmid", prefix: "PMID:", urls: ["http://www.ncbi.nlm.nih.gov/pubmed/", "http://europepmc.org/abstract/MED/"]}, + {prop: "ref_pmid", prefix: "PMID:", urls: ["http://www.ncbi.nlm.nih.gov/pubmed/", "http://europepmc.org/abstract/MED/", "https://www.ncbi.nlm.nih.gov/pubmed/"]}, {prop: "ref_pmcid", prefix: "PMCID:", urls: ["http://www.ncbi.nlm.nih.gov/pmc/articles/", "http://europepmc.org/articles/"]}, - {prop: "ref_clinicaltrials", prefix: "clinicaltrials:", urls: ["https://clinicaltrials.gov/ct2/show/"]}, + {prop: "ref_clinicaltrials", prefix: "clinicaltrials:", urls: ["https://clinicaltrials.gov/ct2/show/", "https://www.clinicaltrials.gov/ct2/show/"]}, {prop: "ref_doi", prefix: "doi:", urls: ["https://doi.org/", "http://www.nejm.org/doi/full/", "https://www.tandfonline.com/doi/abs/", "http://onlinelibrary.wiley.com/doi/"]}, {prop: "ref_isbn", prefix: "isbn:", urls: ["https://www.isbn-international.org/identifier/"]} ] From 7fe1cba3461fd1b7673096b977aa3a1e7084548b Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 10 Aug 2023 16:37:11 -0400 Subject: [PATCH 11/12] feat: use service provider as source where appropriate --- src/record.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/record.ts b/src/record.ts index 6568bb5..2b09610 100644 --- a/src/record.ts +++ b/src/record.ts @@ -436,7 +436,9 @@ export class Record { } } returnValue.push({ - resource_id: "infores:biothings-explorer", + resource_id: this.config.provenanceUsesServiceProvider + ? "infores:service-provider-trapi" + : "infores:biothings-explorer", resource_role: "aggregator_knowledge_source", upstream_resource_ids: [this.apiInforesCurie], }); From ae3a5f4161b0f10dcc07b35b4a8ed8e15e9a1391 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 11 Aug 2023 09:47:15 -0700 Subject: [PATCH 12/12] Fix CTD transformer id casing --- src/transformers/ctd_transformer.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/transformers/ctd_transformer.ts b/src/transformers/ctd_transformer.ts index 0b02ac1..e639e80 100644 --- a/src/transformers/ctd_transformer.ts +++ b/src/transformers/ctd_transformer.ts @@ -7,9 +7,15 @@ export default class CTDTransformer extends BaseTransformer { if (typeof item.PubMedIDs === "string") { item.PubMedIDs = item.PubMedIDs.split('|'); } + if (typeof item.PubMedIds === "string") { + item.PubMedIds = item.PubMedIds.split('|'); + } if (typeof item.DiseaseID === "string") { item.DiseaseID = item.DiseaseID.split(':').slice(-1)[0]; } + if (typeof item.DiseaseId === "string") { + item.DiseaseId = item.DiseaseId.split(':').slice(-1)[0]; + } return item; }); return { data: res };