From 0c9b35a6c823a7ac7472121f00c74605be92badf Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Wed, 26 Jul 2023 15:15:21 -0700 Subject: [PATCH 1/8] handing to handle more types of publications --- src/transformers/transformer.ts | 46 ++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/src/transformers/transformer.ts b/src/transformers/transformer.ts index adb7fc3..ef7c269 100644 --- a/src/transformers/transformer.ts +++ b/src/transformers/transformer.ts @@ -54,20 +54,42 @@ export default class BaseTransformer { } _updatePublications(mappedResponse: any) { - if ("pubmed" in mappedResponse) { - mappedResponse.pubmed = toArray(mappedResponse.pubmed); - mappedResponse.publications = mappedResponse.pubmed.map(item => - typeof item === "string" && item.toUpperCase().startsWith("PMID:") ? item.toUpperCase() : "PMID:" + item, - ); - delete mappedResponse.pubmed; + if (!Array.isArray(mappedResponse.publications)) { + mappedResponse.publications = []; } - if ("pmc" in mappedResponse) { - mappedResponse.pmc = toArray(mappedResponse.pmc); - mappedResponse.publications = mappedResponse.pmc.map(item => - typeof item === "string" && item.toUpperCase().startsWith("PMC:") ? item.toUpperCase() : "PMC:" + item, - ); - delete mappedResponse.pmc; + + const publicationTypes = [ + {prop: "ref_pmid", prefix: "PMID:"}, + {prop: "ref_url", prefix: ""}, + {prop: "ref_pmcid", prefix: "PMCID:"}, + {prop: "ref_clinicaltrials", prefix: "clinicaltrials:"}, + {prop: "ref_doi", prefix: "doi:"}, + {prop: "ref_isbn", prefix: "isbn:"} + ] + + for (let publicationType of publicationTypes) { + if (publicationType.prop in mappedResponse) { + for (let publication of toArray(mappedResponse[publicationType.prop])) { + if (typeof publication !== "string" || publication.length === 0) { + continue; + } + + if (publication.toUpperCase().startsWith(publicationType.prefix.toUpperCase())) { + mappedResponse.publications.push(publicationType.prefix + publication.slice(publicationType.prefix.length)); + } + else { + mappedResponse.publications.push(publicationType.prefix + publication); + } + } + + delete mappedResponse[publicationType.prop]; + } + } + + if (mappedResponse.publications.length === 0) { + delete mappedResponse.publications; } + return mappedResponse; } From 991683fda900d94a1e93a05cbae6953283b47523 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 28 Jul 2023 13:54:00 -0700 Subject: [PATCH 2/8] url -> curie if applicable for publications --- src/transformers/transformer.ts | 49 +++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/src/transformers/transformer.ts b/src/transformers/transformer.ts index ef7c269..101b156 100644 --- a/src/transformers/transformer.ts +++ b/src/transformers/transformer.ts @@ -59,14 +59,51 @@ export default class BaseTransformer { } const publicationTypes = [ - {prop: "ref_pmid", prefix: "PMID:"}, - {prop: "ref_url", prefix: ""}, - {prop: "ref_pmcid", prefix: "PMCID:"}, - {prop: "ref_clinicaltrials", prefix: "clinicaltrials:"}, - {prop: "ref_doi", prefix: "doi:"}, - {prop: "ref_isbn", prefix: "isbn:"} + {prop: "ref_pmid", prefix: "PMID:", urls: ["http://www.ncbi.nlm.nih.gov/pubmed/", "http://europepmc.org/abstract/MED/"]}, + {prop: "ref_pmcid", prefix: "PMCID:", urls: ["http://www.ncbi.nlm.nih.gov/pmc/articles/", "http://europepmc.org/articles/"]}, + {prop: "ref_clinicaltrials", prefix: "clinicaltrials:", urls: ["https://clinicaltrials.gov/ct2/show/"]}, + {prop: "ref_doi", prefix: "doi:", urls: ["https://doi.org/", "http://www.nejm.org/doi/full/", "https://www.tandfonline.com/doi/abs/", "http://onlinelibrary.wiley.com/doi/"]}, + {prop: "ref_isbn", prefix: "isbn:", urls: ["https://www.isbn-international.org/identifier/"]} ] + // handle URLs (which could be CURIEs) + if ("ref_url" in mappedResponse) { + for (let publication of toArray(mappedResponse.ref_url)) { + if (typeof publication !== "string" || publication.length === 0) { + continue; + } + + let isCurie = false; + for (let publicationType of publicationTypes) { + for (let url of publicationType.urls) { + if (publication.startsWith(url)) { + isCurie = true; + + if (!mappedResponse[publicationType.prop]) { + mappedResponse[publicationType.prop] = []; + } + else if (!Array.isArray(mappedResponse[publicationType.prop])) { + mappedResponse[publicationType.prop] = toArray(mappedResponse[publicationType.prop]); + } + + mappedResponse[publicationType.prop].push(publication.slice(url.length)); + + break; + } + } + + if (isCurie) { + break; + } + } + + if (!isCurie) { + mappedResponse.publications.push(publication); + } + } + } + delete mappedResponse.ref_url; + for (let publicationType of publicationTypes) { if (publicationType.prop in mappedResponse) { for (let publication of toArray(mappedResponse[publicationType.prop])) { From e041731ffc792f84326b9578342ba048cdebf6cf Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 28 Jul 2023 13:54:21 -0700 Subject: [PATCH 3/8] Fix biolink transformer for publications --- src/transformers/biolink_transformer.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/transformers/biolink_transformer.ts b/src/transformers/biolink_transformer.ts index 29bef56..d1d6b12 100644 --- a/src/transformers/biolink_transformer.ts +++ b/src/transformers/biolink_transformer.ts @@ -15,12 +15,18 @@ export default class BiolinkTransformer extends BaseTransformer { rec['object'][prefix] = rec.object.id; } } - if (rec.publications === undefined || rec.publications.length === 0 || !(rec.publications[0]['id'].startsWith("PMID"))) { + if (rec.publications === undefined || rec.publications.length === 0) { delete rec.publications } else { - rec.publications = rec.publications.map(pub => { - return { "id": pub.id.split(':').slice(-1)[0] } - }) + const oldPublications = rec.publications; + rec.publications = []; + for (let oldPub of oldPublications) { + if (!oldPub?.id?.startsWith?.("PMID:")) { + continue; + } + + rec.publications.push({ id: oldPub.id.split(':').slice(-1)[0] }); + } } if (!("provided_by" in rec)) { delete rec.provided_by From f7b3961b48611951b04b0885358854d1f9c7bc61 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 28 Jul 2023 14:04:10 -0700 Subject: [PATCH 4/8] update tests for new publication format --- __test__/base_transformer.test.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/__test__/base_transformer.test.ts b/__test__/base_transformer.test.ts index aa0bf0b..369f220 100644 --- a/__test__/base_transformer.test.ts +++ b/__test__/base_transformer.test.ts @@ -57,41 +57,41 @@ describe("test base transformer", () => { test("Test _updatePublications function if pubmed id is prefixed", () => { const tf = new base_tf(input, {}); const fake = { - pubmed: "PMID:1233" + ref_pmid: "PMID:1233" } const res = tf._updatePublications(fake); - expect(res).not.toHaveProperty('pubmed'); + expect(res).not.toHaveProperty('ref_pmid'); expect(res.publications).toEqual(["PMID:1233"]); }) test("Test _updatePublications function if pubmed id is NOT prefixed", () => { const tf = new base_tf(input, {}); const fake = { - pubmed: 1233 + ref_pmid: 1233 } const res = tf._updatePublications(fake); - expect(res).not.toHaveProperty('pubmed'); + expect(res).not.toHaveProperty('ref_pmid'); expect(res.publications).toEqual(["PMID:1233"]) }) test("Test _updatePublications function if pmc id is prefixed", () => { const tf = new base_tf(input, {}); const fake = { - pmc: "PMC:1233" + ref_pmcid: "PMC:1233" } const res = tf._updatePublications(fake); - expect(res).not.toHaveProperty('pmc'); - expect(res.publications).toEqual(["PMC:1233"]); + expect(res).not.toHaveProperty('ref_pmcid'); + expect(res.publications).toEqual(["PMCID:1233"]); }) test("Test _updatePublications function if pmc id is NOT prefixed", () => { const tf = new base_tf(input, {}); const fake = { - pmc: 123 + ref_pmcid: 123 } const res = tf._updatePublications(fake); - expect(res).not.toHaveProperty('pmc'); - expect(res.publications).toEqual(["PMC:123"]) + expect(res).not.toHaveProperty('ref_pmcid'); + expect(res.publications).toEqual(["PMCID:123"]) }) test("Test extractObjectIDs function if output id type not in result", () => { From 077c085216284e7d12ddf2bbbd6337e3f711bdc8 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 28 Jul 2023 14:05:30 -0700 Subject: [PATCH 5/8] convert numbered publications to strings --- src/transformers/transformer.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/transformers/transformer.ts b/src/transformers/transformer.ts index 101b156..3fdcac3 100644 --- a/src/transformers/transformer.ts +++ b/src/transformers/transformer.ts @@ -107,6 +107,11 @@ export default class BaseTransformer { for (let publicationType of publicationTypes) { if (publicationType.prop in mappedResponse) { for (let publication of toArray(mappedResponse[publicationType.prop])) { + // handle numbers + if (typeof publication === "number") { + publication = publication.toString(); + } + if (typeof publication !== "string" || publication.length === 0) { continue; } From 2851686b7f0c3aacc55342173734426482a77d38 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 28 Jul 2023 14:11:48 -0700 Subject: [PATCH 6/8] update more tests for new publication system --- __test__/base_transformer.test.ts | 2 +- __test__/biothings_transformer.test.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/__test__/base_transformer.test.ts b/__test__/base_transformer.test.ts index 369f220..95172bd 100644 --- a/__test__/base_transformer.test.ts +++ b/__test__/base_transformer.test.ts @@ -77,7 +77,7 @@ describe("test base transformer", () => { test("Test _updatePublications function if pmc id is prefixed", () => { const tf = new base_tf(input, {}); const fake = { - ref_pmcid: "PMC:1233" + ref_pmcid: "PMCID:1233" } const res = tf._updatePublications(fake); expect(res).not.toHaveProperty('ref_pmcid'); diff --git a/__test__/biothings_transformer.test.ts b/__test__/biothings_transformer.test.ts index 6308e39..93821c4 100644 --- a/__test__/biothings_transformer.test.ts +++ b/__test__/biothings_transformer.test.ts @@ -58,7 +58,7 @@ describe("test biothings transformer", () => { let tf = new biothings_tf(input, {}); let res = await tf.transform(); expect(res).toHaveLength(27); - expect(res[0]).not.toHaveProperty('pubmed'); + expect(res[0]).not.toHaveProperty('ref_pmid'); expect(res[0]).toHaveProperty('publications', ["PMID:21873635"]); }) }) From 38a24132734fb068b58a449d3671c9120eb8e894 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Thu, 10 Aug 2023 10:22:37 -0700 Subject: [PATCH 7/8] Add base urls --- src/transformers/transformer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/transformer.ts b/src/transformers/transformer.ts index 3fdcac3..21e3dcb 100644 --- a/src/transformers/transformer.ts +++ b/src/transformers/transformer.ts @@ -59,9 +59,9 @@ export default class BaseTransformer { } const publicationTypes = [ - {prop: "ref_pmid", prefix: "PMID:", urls: ["http://www.ncbi.nlm.nih.gov/pubmed/", "http://europepmc.org/abstract/MED/"]}, + {prop: "ref_pmid", prefix: "PMID:", urls: ["http://www.ncbi.nlm.nih.gov/pubmed/", "http://europepmc.org/abstract/MED/", "https://www.ncbi.nlm.nih.gov/pubmed/"]}, {prop: "ref_pmcid", prefix: "PMCID:", urls: ["http://www.ncbi.nlm.nih.gov/pmc/articles/", "http://europepmc.org/articles/"]}, - {prop: "ref_clinicaltrials", prefix: "clinicaltrials:", urls: ["https://clinicaltrials.gov/ct2/show/"]}, + {prop: "ref_clinicaltrials", prefix: "clinicaltrials:", urls: ["https://clinicaltrials.gov/ct2/show/", "https://www.clinicaltrials.gov/ct2/show/"]}, {prop: "ref_doi", prefix: "doi:", urls: ["https://doi.org/", "http://www.nejm.org/doi/full/", "https://www.tandfonline.com/doi/abs/", "http://onlinelibrary.wiley.com/doi/"]}, {prop: "ref_isbn", prefix: "isbn:", urls: ["https://www.isbn-international.org/identifier/"]} ] From ae3a5f4161b0f10dcc07b35b4a8ed8e15e9a1391 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Fri, 11 Aug 2023 09:47:15 -0700 Subject: [PATCH 8/8] Fix CTD transformer id casing --- src/transformers/ctd_transformer.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/transformers/ctd_transformer.ts b/src/transformers/ctd_transformer.ts index 0b02ac1..e639e80 100644 --- a/src/transformers/ctd_transformer.ts +++ b/src/transformers/ctd_transformer.ts @@ -7,9 +7,15 @@ export default class CTDTransformer extends BaseTransformer { if (typeof item.PubMedIDs === "string") { item.PubMedIDs = item.PubMedIDs.split('|'); } + if (typeof item.PubMedIds === "string") { + item.PubMedIds = item.PubMedIds.split('|'); + } if (typeof item.DiseaseID === "string") { item.DiseaseID = item.DiseaseID.split(':').slice(-1)[0]; } + if (typeof item.DiseaseId === "string") { + item.DiseaseId = item.DiseaseId.split(':').slice(-1)[0]; + } return item; }); return { data: res };