From e72edd0ce721fd029be8891de24ba45ce2660e1a Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Thu, 7 Mar 2024 18:23:38 -0800 Subject: [PATCH 01/32] receive metakg from main thread; receieve specs from redis --- src/index.ts | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/index.ts b/src/index.ts index e0a029ad..9d5aebe8 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,4 @@ -import MetaKG from '@biothings-explorer/smartapi-kg'; +import MetaKG, { SmartAPIKGOperationObject } from '@biothings-explorer/smartapi-kg'; import path from 'path'; import QueryGraph from './query_graph'; import KnowledgeGraph from './graph/knowledge_graph'; @@ -28,6 +28,7 @@ import { } from './types'; import BTEGraph from './graph/graph'; import QEdge from './query_edge'; +import { redisClient } from './redis-client'; import { Telemetry } from '@biothings-explorer/utils'; // Exports for external availability @@ -51,6 +52,7 @@ export interface QueryHandlerOptions { resolveOutputIDs?: boolean; submitter?: string; caching?: boolean; // from request url query values + metakg?: SmartAPIKGOperationObject[]; // list of meta kg ops EDGE_ATTRIBUTES_USED_IN_RECORD_HASH?: string[]; } export default class TRAPIQueryHandler { @@ -87,14 +89,26 @@ export default class TRAPIQueryHandler { async findUnregisteredAPIs() { const configListAPIs = this.options.apiList['include']; - const smartapiRegistry = await fs.readFile(this.path, { encoding: 'utf8' }); + + let smartapiRegistry; + if (redisClient.clientEnabled) { + const redisData = await redisClient.client.getTimeout(`bte:smartapi:specs`) + if (redisData) { + smartapiRegistry = JSON.parse(redisData); + } + } + + if (!smartapiRegistry) { + const file = await fs.readFile(this.path, "utf-8"); + smartapiRegistry = JSON.parse(file); + } const smartapiIds: string[] = []; const inforesIds: string[] = []; const unregisteredAPIs: string[] = []; // TODO typing for smartapiRegistration - JSON.parse(smartapiRegistry).hits.forEach((smartapiRegistration) => { + smartapiRegistry.hits.forEach((smartapiRegistration) => { smartapiIds.push(smartapiRegistration._id); inforesIds.push(smartapiRegistration.info?.['x-translator']?.infores); }); @@ -110,14 +124,20 @@ export default class TRAPIQueryHandler { return unregisteredAPIs; } - _loadMetaKG(): MetaKG { - const metaKG = new MetaKG(this.path, this.predicatePath); + async _loadMetaKG(): Promise { debug( `Query options are: ${JSON.stringify({ ...this.options, schema: this.options.schema ? this.options.schema.info.version : 'not included', + metakg: "" })}`, ); + + if (this.options.metakg) { + return new MetaKG(undefined, undefined, (this.options as any).metakg); + } + + const metaKG = new MetaKG(this.path, this.predicatePath); debug(`SmartAPI Specs read from path: ${this.path}`); metaKG.constructMetaKGSync(this.includeReasoner, this.options); return metaKG; @@ -626,7 +646,7 @@ export default class TRAPIQueryHandler { const span1 = Telemetry.startSpan({ description: 'loadMetaKG' }); debug('Start to load metakg.'); - const metaKG = this._loadMetaKG(); + const metaKG = await this._loadMetaKG(); if (!metaKG.ops.length) { let error: string; if (this.options.smartAPIID) { From fa889e421b9dcb91dceb373ed5a90b251b749455 Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Mon, 11 Mar 2024 17:46:36 -0700 Subject: [PATCH 02/32] filter meta kg when it is sent via options --- src/index.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/index.ts b/src/index.ts index 9d5aebe8..346a4c79 100644 --- a/src/index.ts +++ b/src/index.ts @@ -134,7 +134,9 @@ export default class TRAPIQueryHandler { ); if (this.options.metakg) { - return new MetaKG(undefined, undefined, (this.options as any).metakg); + const metaKG = new MetaKG(undefined, undefined, (this.options as any).metakg); + metaKG.filterKG(this.options); + return metaKG; } const metaKG = new MetaKG(this.path, this.predicatePath); From 62c6492d9b76d1765c4844dd2ec6cb06d4843f2d Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 2 May 2024 13:17:38 -0400 Subject: [PATCH 03/32] feat: handle max_research_phase --- src/graph/knowledge_graph.ts | 50 ++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/src/graph/knowledge_graph.ts b/src/graph/knowledge_graph.ts index 58b621cb..a05239b4 100644 --- a/src/graph/knowledge_graph.ts +++ b/src/graph/knowledge_graph.ts @@ -14,11 +14,43 @@ import KGNode from './kg_node'; import KGEdge from './kg_edge'; import { BTEGraphUpdate } from './graph'; import { APIDefinition } from '@biothings-explorer/types'; +import { Telemetry } from '@biothings-explorer/utils'; const debug = Debug('bte:biothings-explorer-trapi:KnowledgeGraph'); const NON_ARRAY_ATTRIBUTES = ['biolink:knowledge_level', 'biolink:agent_type']; +interface SpecialAttributeHandlers { + [attribute_type_id: string]: (value: Set, kgEdge: KGEdge) => TrapiAttribute['value']; +} + +const SPECIAL_ATTRIBUTE_HANDLERS: SpecialAttributeHandlers = { + 'biolink:max_research_phase': (value, kgEdge) => { + // Special handling for max research phase + const phase_map = { + '-1.0': 'not_provided', + '0.5': 'pre_clinical_research_phase', + '1.0': 'clinical_trial_phase_1', + '2.0': 'clinical_trial_phase_2', + '3.0': 'clinical_trial_phase_3', + '4.0': 'clinical_trial_phase_4', + }; + function map_phase(val: string) { + let new_val = phase_map[val]; + if (typeof new_val !== 'undefined') return new_val; + + const source = Object.values(kgEdge.sources).find((src) => typeof src.primary_knowledge_source !== 'undefined') + .primary_knowledge_source.resource_id; + const err = new Error( + `Unrecognized research phase (${val}) from ${source} ${kgEdge.subject} > ${kgEdge.predicate} > ${kgEdge.object}`, + ); + Telemetry.captureException(err); + return 'not_provided'; + } + return Array.from(value as Set).map(map_phase); + }, +}; + export default class KnowledgeGraph { nodes: { [nodePrimaryID: string]: TrapiKGNode; @@ -117,13 +149,21 @@ export default class KnowledgeGraph { Object.entries(kgEdge.attributes).forEach(([key, value]) => { if (key === 'edge-attributes') return; - // if (key == 'edge-attributes') return; + + let formatted_value: TrapiAttribute['value'] = NON_ARRAY_ATTRIBUTES.includes(key) + ? Array.from(value as Set).reduce((acc, val) => acc + val) + : Array.from(value as Set); + + if (key in SPECIAL_ATTRIBUTE_HANDLERS) { + formatted_value = SPECIAL_ATTRIBUTE_HANDLERS[key](value as Set, kgEdge); + } + attributes.push({ attribute_type_id: key, - value: // technically works for numbers as well - NON_ARRAY_ATTRIBUTES.includes(key) - ? [...(value as Set)].reduce((acc, val) => acc + val) - : Array.from(value as Set), + // technically works for numbers as well + value: NON_ARRAY_ATTRIBUTES.includes(key) + ? [...(value as Set)].reduce((acc, val) => acc + val) + : Array.from(value as Set), //value_type_id: 'bts:' + key, }); }); From 1f891ccea1f6e812ce797796991d10a94d029681 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 2 May 2024 13:29:26 -0400 Subject: [PATCH 04/32] fix: use formatted_value --- src/graph/knowledge_graph.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/graph/knowledge_graph.ts b/src/graph/knowledge_graph.ts index a05239b4..e4bfc9e1 100644 --- a/src/graph/knowledge_graph.ts +++ b/src/graph/knowledge_graph.ts @@ -161,9 +161,7 @@ export default class KnowledgeGraph { attributes.push({ attribute_type_id: key, // technically works for numbers as well - value: NON_ARRAY_ATTRIBUTES.includes(key) - ? [...(value as Set)].reduce((acc, val) => acc + val) - : Array.from(value as Set), + value: formatted_value, //value_type_id: 'bts:' + key, }); }); From b0fc94d762ad17d277bc6ddfa635ba60cc3e28aa Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 2 May 2024 14:14:46 -0400 Subject: [PATCH 05/32] feat: add evidence_count to summed attributes --- src/graph/knowledge_graph.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graph/knowledge_graph.ts b/src/graph/knowledge_graph.ts index 58b621cb..706fe0f8 100644 --- a/src/graph/knowledge_graph.ts +++ b/src/graph/knowledge_graph.ts @@ -17,7 +17,7 @@ import { APIDefinition } from '@biothings-explorer/types'; const debug = Debug('bte:biothings-explorer-trapi:KnowledgeGraph'); -const NON_ARRAY_ATTRIBUTES = ['biolink:knowledge_level', 'biolink:agent_type']; +const NON_ARRAY_ATTRIBUTES = ['biolink:knowledge_level', 'biolink:agent_type', 'biolink:evidence_count']; export default class KnowledgeGraph { nodes: { From 91e398a4eac957cc77c907f1c6f3fa5d16f31b0d Mon Sep 17 00:00:00 2001 From: Rohan Juneja Date: Wed, 15 May 2024 17:58:05 -0700 Subject: [PATCH 06/32] pass specs through thread instead of redis --- src/index.ts | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/index.ts b/src/index.ts index 84747d1f..a692179f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -28,7 +28,6 @@ import { import { QueryHandlerOptions } from '@biothings-explorer/types'; import BTEGraph from './graph/graph'; import QEdge from './query_edge'; -import { redisClient } from '@biothings-explorer/utils'; import { Telemetry } from '@biothings-explorer/utils'; // Exports for external availability @@ -75,16 +74,11 @@ export default class TRAPIQueryHandler { const configListAPIs = this.options.apiList['include']; let smartapiRegistry; - if (redisClient.clientEnabled) { - const redisData = await redisClient.client.getTimeout(`bte:smartapi:specs`) - if (redisData) { - smartapiRegistry = JSON.parse(redisData); - } - } - - if (!smartapiRegistry) { - const file = await fs.readFile(this.path, "utf-8"); - smartapiRegistry = JSON.parse(file); + if (this.options.smartapi) { + smartapiRegistry = this.options.smartapi; + } else { + const file = await fs.readFile(this.path, "utf-8"); + smartapiRegistry = JSON.parse(file); } const smartapiIds: string[] = []; @@ -113,7 +107,8 @@ export default class TRAPIQueryHandler { `Query options are: ${JSON.stringify({ ...this.options, schema: this.options.schema ? this.options.schema.info.version : 'not included', - metakg: "" + metakg: "", + smartapi: "" })}`, ); From 505922a880bddef46f3cbba503713ad622a1ba19 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 16 May 2024 11:42:00 -0400 Subject: [PATCH 07/32] chore: fix types after merge from main --- src/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index a692179f..59401b36 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,4 @@ -import MetaKG from '@biothings-explorer/smartapi-kg'; +import MetaKG, { SmartAPIQueryResult } from '@biothings-explorer/smartapi-kg'; import path from 'path'; import QueryGraph from './query_graph'; import KnowledgeGraph from './graph/knowledge_graph'; @@ -73,7 +73,7 @@ export default class TRAPIQueryHandler { async findUnregisteredAPIs() { const configListAPIs = this.options.apiList['include']; - let smartapiRegistry; + let smartapiRegistry: SmartAPIQueryResult; if (this.options.smartapi) { smartapiRegistry = this.options.smartapi; } else { From 69258438fc3a09fa85cd3926a0d7ae8781242042 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Fri, 28 Jun 2024 16:38:17 -0400 Subject: [PATCH 08/32] feat: change scoring sigmoid function to arctan Should give more granularity at the high end of scores without significantly lowering mid- and lower-end scores --- src/results_assembly/score.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index 5b55c4c6..fb4c1522 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -8,7 +8,7 @@ import _ from 'lodash'; import { ConsolidatedSolutionRecord, RecordsByQEdgeID } from './query_results'; import { Telemetry } from '@biothings-explorer/utils'; -const tuning_param = 2.0; +const tuning_param = 1.8; const record_weight = 1.0; const text_mined_record_weight = 0.5; @@ -109,8 +109,7 @@ export async function getScores(recordsByQEdgeID: RecordsByQEdgeID): Promise Date: Fri, 28 Jun 2024 17:01:30 -0400 Subject: [PATCH 09/32] fix: add inverse sigmoid function --- src/results_assembly/score.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index fb4c1522..d6fa0db5 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -113,7 +113,7 @@ export function scaled_sigmoid(input: number): number { } export function inverse_scaled_sigmoid(input: number): number { - return -tuning_param * Math.log(2 / (input + 1) - 1); + return tuning_param * Math.tan((2 / Math.PI) * input); } export function calculateScore( From 91abb51b66758a65b2d6170c37ce30f057d42e8a Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:22:13 -0400 Subject: [PATCH 10/32] fix: flipped fraction --- src/results_assembly/score.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index d6fa0db5..c7f824e2 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -113,7 +113,7 @@ export function scaled_sigmoid(input: number): number { } export function inverse_scaled_sigmoid(input: number): number { - return tuning_param * Math.tan((2 / Math.PI) * input); + return tuning_param * Math.tan((Math.PI / 2) * Math.min(input, 1)); } export function calculateScore( From 38a04ea48ce667f775430bccdd44844c0bc91cd0 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Mon, 8 Jul 2024 13:28:48 -0400 Subject: [PATCH 11/32] basic ngd timeout --- src/results_assembly/score.ts | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index d6fa0db5..130fda77 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -26,6 +26,8 @@ export interface ScoreCombos { // create lookup table for ngd scores in the format: {inputUMLS-outputUMLS: ngd} async function query(queryPairs: string[][]): Promise { + const NGD_TIMEOUT = process.env.NGD_TIMEOUT_MS ? parseInt(process.env.NGD_TIMEOUT_MS) : 20 * 1000; + const url = { dev: 'https://biothings.ci.transltr.io/semmeddb/query/ngd', ci: 'https://biothings.ci.transltr.io/semmeddb/query/ngd', @@ -33,13 +35,17 @@ async function query(queryPairs: string[][]): Promise { prod: 'https://biothings.ncats.io/semmeddb/query/ngd', }[process.env.INSTANCE_ENV ?? 'prod']; const batchSize = 250; - const concurrency_limit = os.cpus().length * 2; + const concurrency_limit = 100; // server handles ~100 requests per second debug('Querying', queryPairs.length, 'combos.'); const chunked_input = _.chunk(queryPairs, batchSize); + const start = Date.now(); + try { const response = await async.mapLimit(chunked_input, concurrency_limit, async (input) => { + if (Date.now() - start > NGD_TIMEOUT) return; + const span = Telemetry.startSpan({ description: 'NGDScoreRequest' }); const data = { umls: input, @@ -59,6 +65,7 @@ async function query(queryPairs: string[][]): Promise { }); //convert res array into single object with all curies const result = response + .filter(r => r != undefined) .map((r): ngdScoreCombo[] => r.data.filter((combo: ngdScoreCombo) => Number.isFinite(combo.ngd))) .flat(); // get numerical scores and flatten array return result.reduce((acc, cur) => ({ ...acc, [`${cur.umls[0]}-${cur.umls[1]}`]: cur.ngd }), {}); @@ -67,6 +74,17 @@ async function query(queryPairs: string[][]): Promise { } } +// edits array in place +function shuffle(array: T[]): T[] { + for (let i = array.length - 1; i > 0; i--) { + let j = Math.floor(Math.random() * (i + 1)); // random index from 0 to i + let t = array[i]; + array[i] = array[j]; + array[j] = t; + } + return array; +} + // retrieve all ngd scores at once export async function getScores(recordsByQEdgeID: RecordsByQEdgeID): Promise { const pairs: { [input_umls: string]: Set } = {}; @@ -100,7 +118,9 @@ export async function getScores(recordsByQEdgeID: RecordsByQEdgeID): Promise Date: Mon, 8 Jul 2024 14:41:28 -0400 Subject: [PATCH 12/32] better algorithm to allocate queries --- src/results_assembly/score.ts | 43 ++++++++++++++++------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index 130fda77..86ab3fee 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -26,7 +26,7 @@ export interface ScoreCombos { // create lookup table for ngd scores in the format: {inputUMLS-outputUMLS: ngd} async function query(queryPairs: string[][]): Promise { - const NGD_TIMEOUT = process.env.NGD_TIMEOUT_MS ? parseInt(process.env.NGD_TIMEOUT_MS) : 20 * 1000; + const NGD_TIMEOUT = process.env.NGD_TIMEOUT_MS ? parseInt(process.env.NGD_TIMEOUT_MS) : 10 * 1000; const url = { dev: 'https://biothings.ci.transltr.io/semmeddb/query/ngd', @@ -74,20 +74,9 @@ async function query(queryPairs: string[][]): Promise { } } -// edits array in place -function shuffle(array: T[]): T[] { - for (let i = array.length - 1; i > 0; i--) { - let j = Math.floor(Math.random() * (i + 1)); // random index from 0 to i - let t = array[i]; - array[i] = array[j]; - array[j] = t; - } - return array; -} - // retrieve all ngd scores at once export async function getScores(recordsByQEdgeID: RecordsByQEdgeID): Promise { - const pairs: { [input_umls: string]: Set } = {}; + const pairsToAdd: { [recordHash: string]: string[] } = {}; let combosWithoutIDs = 0; @@ -95,13 +84,14 @@ export async function getScores(recordsByQEdgeID: RecordsByQEdgeID): Promise { const inputUMLS = record.subject.UMLS || []; const outputUMLS = record.object.UMLS || []; + const hash = record.recordHash; inputUMLS?.forEach((input_umls) => { - if (!(input_umls in pairs)) { - pairs[input_umls] = new Set(); + if (!(hash in pairsToAdd)) { + pairsToAdd[hash] = []; } outputUMLS?.forEach((output_umls) => { - pairs[input_umls].add(output_umls); + pairsToAdd[hash].push(`${input_umls}\n${output_umls}`); }); }); @@ -112,15 +102,20 @@ export async function getScores(recordsByQEdgeID: RecordsByQEdgeID): Promise { - return [...pairs[inputUMLS]].map((outputUMLS) => [inputUMLS, outputUMLS]); - }) - .flat(); + // organize queries to be distributed among different records + const pairs = new Set(); + let running = true; + while (running) { + running = false; + for (const hash in pairsToAdd) { + if (pairsToAdd[hash].length > 0) { + pairs.add(pairsToAdd[hash].pop()); + running = true; + } + } + } - // shuffle is used to ensure that queries are distributed amognst different records - // due to timeouts, it is more likely that earlier queries will be completed - const results = await query(shuffle(queries)); + const results = await query([...pairs].map(p => p.split('\n'))); debug('Combos no UMLS ID: ', combosWithoutIDs); return results || {}; // in case results is undefined, avoid TypeErrors From 27b6ec37bbb47a9d24042ca13f9704f9d6064b7b Mon Sep 17 00:00:00 2001 From: rjawesome Date: Tue, 9 Jul 2024 10:51:54 -0400 Subject: [PATCH 13/32] make umls pair ordering more efficient --- src/results_assembly/score.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index 86ab3fee..aa10fe74 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -112,6 +112,9 @@ export async function getScores(recordsByQEdgeID: RecordsByQEdgeID): Promise Date: Tue, 9 Jul 2024 13:28:05 -0400 Subject: [PATCH 14/32] speed improvements for ngd --- src/results_assembly/score.ts | 80 +++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index aa10fe74..6b9540ba 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -44,7 +44,8 @@ async function query(queryPairs: string[][]): Promise { try { const response = await async.mapLimit(chunked_input, concurrency_limit, async (input) => { - if (Date.now() - start > NGD_TIMEOUT) return; + const timeRemaining = NGD_TIMEOUT - (Date.now() - start); + if (timeRemaining <= 0) return; const span = Telemetry.startSpan({ description: 'NGDScoreRequest' }); const data = { @@ -54,7 +55,7 @@ async function query(queryPairs: string[][]): Promise { span.setData('requestBody', data); try { // const start = performance.now(); - const response = await axios.post(url, data); + const response = await axios.post(url, data, { timeout: timeRemaining }); // const end = performance.now(); span.finish(); return response; @@ -64,11 +65,15 @@ async function query(queryPairs: string[][]): Promise { } }); //convert res array into single object with all curies - const result = response - .filter(r => r != undefined) - .map((r): ngdScoreCombo[] => r.data.filter((combo: ngdScoreCombo) => Number.isFinite(combo.ngd))) - .flat(); // get numerical scores and flatten array - return result.reduce((acc, cur) => ({ ...acc, [`${cur.umls[0]}-${cur.umls[1]}`]: cur.ngd }), {}); + const result = {}; + for (const res of response) { + if (res == undefined) continue; + for (const combo of res.data) { + if (!Number.isFinite(combo.ngd)) continue; + result[`${combo.umls[0]}-${combo.umls[1]}`] = combo.ngd; + } + } + return result; } catch (err) { debug('Failed to query for scores: ', err); } @@ -76,49 +81,52 @@ async function query(queryPairs: string[][]): Promise { // retrieve all ngd scores at once export async function getScores(recordsByQEdgeID: RecordsByQEdgeID): Promise { - const pairsToAdd: { [recordHash: string]: string[] } = {}; + const pairSet = new Set(); + // organize pairs in layers + // first from each record is first layer, second from each record is second layer, etc. + // this makes it so more records are covered in earlier layers + const organizedPairs: string[][][] = []; + // this stores the "layer" number for each recordHash + const pairCounts: { [hash: string]: number } = {}; let combosWithoutIDs = 0; - Object.values(recordsByQEdgeID).forEach(({ records }) => { - records.forEach((record) => { + for (const { records } of Object.values(recordsByQEdgeID)) { + for (const record of records) { const inputUMLS = record.subject.UMLS || []; const outputUMLS = record.object.UMLS || []; const hash = record.recordHash; - inputUMLS?.forEach((input_umls) => { - if (!(hash in pairsToAdd)) { - pairsToAdd[hash] = []; - } - outputUMLS?.forEach((output_umls) => { - pairsToAdd[hash].push(`${input_umls}\n${output_umls}`); - }); - }); - if (inputUMLS.length == 0 || outputUMLS.length == 0) { // debug("NO RESULT", record.subject.curie, record.subject.UMLS, record.object.curie, record.object.UMLS) combosWithoutIDs++; + continue; } - }); - }); - - // organize queries to be distributed among different records - const pairs = new Set(); - let running = true; - while (running) { - running = false; - for (const hash in pairsToAdd) { - if (pairsToAdd[hash].length > 0) { - pairs.add(pairsToAdd[hash].pop()); - running = true; - } - if (pairsToAdd[hash].length == 0) { - delete pairsToAdd[hash]; - } + + for (const input_umls of inputUMLS) { + for (const output_umls of outputUMLS) { + const pairStr = `${input_umls}\n${output_umls}`; + if (pairSet.has(pairStr)) continue; + pairSet.add(pairStr); + if (pairCounts[hash] == undefined) pairCounts[hash] = 0; + if (organizedPairs.length <= pairCounts[hash]) organizedPairs.push([]); + organizedPairs[pairCounts[hash]].push([input_umls, output_umls]); + pairCounts[hash]++; + } + } + } + } + + const flatPairs = Array(pairSet.size).fill([]); + let i = 0; + for (const pairGroup of organizedPairs) { + for (const pair of pairGroup) { + flatPairs[i] = pair; + i++; } } - const results = await query([...pairs].map(p => p.split('\n'))); + const results = await query(flatPairs); debug('Combos no UMLS ID: ', combosWithoutIDs); return results || {}; // in case results is undefined, avoid TypeErrors From c858f5613a612d39b7df2bbbf066fb63edbc2041 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Fri, 12 Jul 2024 13:46:45 -0400 Subject: [PATCH 15/32] improve ngd logging --- src/results_assembly/score.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index 6b9540ba..b78dea75 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -7,6 +7,7 @@ import async from 'async'; import _ from 'lodash'; import { ConsolidatedSolutionRecord, RecordsByQEdgeID } from './query_results'; import { Telemetry } from '@biothings-explorer/utils'; +import { AxiosError } from 'axios'; const tuning_param = 1.8; @@ -39,6 +40,9 @@ async function query(queryPairs: string[][]): Promise { debug('Querying', queryPairs.length, 'combos.'); + let successCount = 0; + let errCount = 0; + const chunked_input = _.chunk(queryPairs, batchSize); const start = Date.now(); @@ -58,9 +62,14 @@ async function query(queryPairs: string[][]): Promise { const response = await axios.post(url, data, { timeout: timeRemaining }); // const end = performance.now(); span.finish(); + successCount++; return response; } catch (err) { - debug(`NGD score query failed: ${err}`); + const timeoutError = err instanceof AxiosError && err.code === AxiosError.ECONNABORTED; + if (!timeoutError) { + errCount++; + debug(`NGD score query failed: ${err}`); + } span.finish(); } }); @@ -73,6 +82,7 @@ async function query(queryPairs: string[][]): Promise { result[`${combo.umls[0]}-${combo.umls[1]}`] = combo.ngd; } } + debug(`${successCount} successful queries, ${errCount} errored queries, ${queryPairs.length - successCount - errCount} timed out queries.`); return result; } catch (err) { debug('Failed to query for scores: ', err); From 383cdc73a8c49aec0ff4b88bf89c65059d2de725 Mon Sep 17 00:00:00 2001 From: Colleen Xu Date: Thu, 18 Jul 2024 10:35:35 -0700 Subject: [PATCH 16/32] feat: add qualified predicate to MVP2 template matching --- data/templateGroups.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data/templateGroups.json b/data/templateGroups.json index 3df3fd2e..7b27afe1 100644 --- a/data/templateGroups.json +++ b/data/templateGroups.json @@ -21,6 +21,7 @@ ], "predicate": ["affects"], "qualifiers": { + "qualified_predicate": "causes", "object_aspect_qualifier": "activity_or_abundance", "object_direction_qualifier": "increased" }, @@ -40,6 +41,7 @@ ], "predicate": ["affects"], "qualifiers": { + "qualified_predicate": "causes", "object_aspect_qualifier": "activity_or_abundance", "object_direction_qualifier": "decreased" }, From afb6a8be66dfee2052aaab7bdcf47589a245cc90 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Thu, 18 Jul 2024 10:40:29 -0700 Subject: [PATCH 17/32] fix ngd log --- src/results_assembly/score.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index b78dea75..9a97dbff 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -40,12 +40,14 @@ async function query(queryPairs: string[][]): Promise { debug('Querying', queryPairs.length, 'combos.'); - let successCount = 0; - let errCount = 0; - const chunked_input = _.chunk(queryPairs, batchSize); const start = Date.now(); + let successCount = 0; + let successPairCount = 0; + let errCount = 0; + let errPairCount = 0; + try { const response = await async.mapLimit(chunked_input, concurrency_limit, async (input) => { const timeRemaining = NGD_TIMEOUT - (Date.now() - start); @@ -63,11 +65,13 @@ async function query(queryPairs: string[][]): Promise { // const end = performance.now(); span.finish(); successCount++; + successPairCount += input.length; return response; } catch (err) { const timeoutError = err instanceof AxiosError && err.code === AxiosError.ECONNABORTED; if (!timeoutError) { errCount++; + errPairCount += input.length; debug(`NGD score query failed: ${err}`); } span.finish(); @@ -82,7 +86,7 @@ async function query(queryPairs: string[][]): Promise { result[`${combo.umls[0]}-${combo.umls[1]}`] = combo.ngd; } } - debug(`${successCount} successful queries, ${errCount} errored queries, ${queryPairs.length - successCount - errCount} timed out queries.`); + debug(`${successCount} / ${errCount} / ${chunked_input.length - successCount - errCount} queries successful / errored / timed out, representing ${successPairCount} / ${errPairCount} / ${queryPairs.length - successPairCount - errPairCount} pairs`); return result; } catch (err) { debug('Failed to query for scores: ', err); From aaa903a5b499d7a91c6c4d40411af7c4cc887df4 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Fri, 19 Jul 2024 14:00:22 -0700 Subject: [PATCH 18/32] add qualifiers to creative edges --- src/inferred_mode/inferred_mode.ts | 14 +++++++++++--- src/inferred_mode/template_lookup.ts | 23 ++++++++++++++--------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index bc6f3dd7..d4e842bf 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -13,6 +13,7 @@ import { TrapiKnowledgeGraph, TrapiQEdge, TrapiQNode, + TrapiQualifier, TrapiQueryGraph, TrapiResponse, TrapiResult, @@ -214,7 +215,7 @@ export default class InferredQueryHandler { async createQueries(qEdge: TrapiQEdge, qSubject: TrapiQNode, qObject: TrapiQNode): Promise { const templates = await this.findTemplates(qEdge, qSubject, qObject); // combine creative query with templates - const subQueries = templates.map(({ template, queryGraph }) => { + const subQueries = templates.map(({ template, queryGraph, qualifiers }) => { queryGraph.nodes.creativeQuerySubject.categories = [ ...new Set([...queryGraph.nodes.creativeQuerySubject.categories, ...qSubject.categories]), ]; @@ -244,7 +245,7 @@ export default class InferredQueryHandler { delete queryGraph.nodes.creativeQueryObject.ids; } - return { template, queryGraph }; + return { template, queryGraph, qualifiers }; }); return subQueries; @@ -256,6 +257,7 @@ export default class InferredQueryHandler { qEdgeID: string, qEdge: TrapiQEdge, combinedResponse: CombinedResponse, + qualifers?: CompactQualifiers ): CombinedResponseReport { const span = Telemetry.startSpan({ description: 'creativeCombineResponse' }); const newResponse = handler.getResponse(); @@ -339,6 +341,11 @@ export default class InferredQueryHandler { ], }; } + // Add qualifiers to edge + if (typeof qualifers == 'object' && Object.keys(qualifers).length > 0 && !combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers) { + combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers = Object.entries(qualifers).map(([qualifierType, qualifierValue]) => ({ qualifier_type_id: qualifierType, qualifier_value: qualifierValue })); + } + let auxGraphSuffix = 0; while ( Object.keys(combinedResponse.message.auxiliary_graphs).includes(`${inferredEdgeID}-support${auxGraphSuffix}`) @@ -523,7 +530,7 @@ export default class InferredQueryHandler { [resultID: string]: number; } = {}; - await async.eachOfSeries(subQueries, async ({ template, queryGraph }, i) => { + await async.eachOfSeries(subQueries, async ({ template, queryGraph, qualifiers }, i) => { const span = Telemetry.startSpan({ description: 'creativeTemplate' }); span.setData('template', (i as number) + 1); i = i as number; @@ -546,6 +553,7 @@ export default class InferredQueryHandler { qEdgeID, qEdge, combinedResponse, + qualifiers ); // update values used in logging successfulQueries += querySuccess; diff --git a/src/inferred_mode/template_lookup.ts b/src/inferred_mode/template_lookup.ts index e32973cb..70e45b88 100644 --- a/src/inferred_mode/template_lookup.ts +++ b/src/inferred_mode/template_lookup.ts @@ -9,14 +9,13 @@ export interface TemplateLookup { subject: string; object: string; predicate: string; - qualifiers: { - [qualifierType: string]: string; - }; + qualifiers: CompactQualifiers; } export interface MatchedTemplate { template: string; queryGraph: TrapiQueryGraph; + qualifiers: CompactQualifiers; } export interface TemplateGroup { @@ -35,6 +34,11 @@ export interface CompactEdge { qualifiers: CompactQualifiers; } +interface PathMatch { + path: string; + qualifiers: CompactQualifiers; +} + export async function getTemplates(lookups: TemplateLookup[]): Promise { async function getFiles(dir: string): Promise { const rootFiles = await fs.readdir(path.resolve(dir)); @@ -55,7 +59,7 @@ export async function getTemplates(lookups: TemplateLookup[]): Promise { + const matchingTemplatePaths: PathMatch[] = templateGroups.reduce((matches: PathMatch[], group: TemplateGroup) => { const lookupMatch = lookups.some((lookup) => { return ( group.subject.includes(lookup.subject) && @@ -69,17 +73,18 @@ export async function getTemplates(lookups: TemplateLookup[]): Promise { - if (!matches.includes(templatePaths[template])) { - matches.push(templatePaths[template]); + if (!matches.find(t => t.path === templatePaths[template])) { + matches.push({ path: templatePaths[template], qualifiers: group.qualifiers }); } }); } return matches; }, [] as string[]); - return await async.map(matchingTemplatePaths, async (templatePath: string) => { + return await async.map(matchingTemplatePaths, async (templatePathObj: PathMatch) => { return { - template: templatePath.substring(templatePath.lastIndexOf('/') + 1), - queryGraph: JSON.parse(await fs.readFile(templatePath, { encoding: 'utf8' })).message.query_graph, + template: templatePathObj.path.substring(templatePathObj.path.lastIndexOf('/') + 1), + queryGraph: JSON.parse(await fs.readFile(templatePathObj.path, { encoding: 'utf8' })).message.query_graph, + qualifiers: templatePathObj.qualifiers, }; }); } From 6bae9c7760aaa49a00c0af83575905ca491a6fac Mon Sep 17 00:00:00 2001 From: rjawesome Date: Tue, 30 Jul 2024 13:29:14 -0700 Subject: [PATCH 19/32] only include qualifiers that are specified in the query --- src/inferred_mode/template_lookup.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/inferred_mode/template_lookup.ts b/src/inferred_mode/template_lookup.ts index 70e45b88..118c4847 100644 --- a/src/inferred_mode/template_lookup.ts +++ b/src/inferred_mode/template_lookup.ts @@ -60,8 +60,9 @@ export async function getTemplates(lookups: TemplateLookup[]): Promise { + let matchingQualifers: CompactQualifiers; const lookupMatch = lookups.some((lookup) => { - return ( + const match = ( group.subject.includes(lookup.subject) && group.object.includes(lookup.object) && group.predicate.includes(lookup.predicate) && @@ -69,12 +70,14 @@ export async function getTemplates(lookups: TemplateLookup[]): Promise { if (!matches.find(t => t.path === templatePaths[template])) { - matches.push({ path: templatePaths[template], qualifiers: group.qualifiers }); + matches.push({ path: templatePaths[template], qualifiers: matchingQualifers }); } }); } From 5a55739896cd469338ac9cd2fcfbddee6f0f5a97 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:08:22 -0400 Subject: [PATCH 20/32] feat: PFOCR enrichment takes support graphs into account --- src/index.ts | 4 + src/results_assembly/pfocr.ts | 150 +++++++++++--------------- src/results_assembly/query_results.ts | 17 ++- 3 files changed, 74 insertions(+), 97 deletions(-) diff --git a/src/index.ts b/src/index.ts index 59401b36..95751de3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -29,6 +29,7 @@ import { QueryHandlerOptions } from '@biothings-explorer/types'; import BTEGraph from './graph/graph'; import QEdge from './query_edge'; import { Telemetry } from '@biothings-explorer/utils'; +import { enrichTrapiResultsWithPfocrFigures } from './results_assembly/pfocr'; // Exports for external availability export * from './types'; @@ -703,6 +704,9 @@ export default class TRAPIQueryHandler { this.bteGraph.prune(this.finalizedResults, this.auxGraphs); this.bteGraph.notify(); + // Attempt to enrich results with PFOCR figures + this.logs = [...this.logs, ...(await enrichTrapiResultsWithPfocrFigures(this.getResponse()))]; + span3?.finish(); // check primary knowledge sources diff --git a/src/results_assembly/pfocr.ts b/src/results_assembly/pfocr.ts index 7f68528c..557badff 100644 --- a/src/results_assembly/pfocr.ts +++ b/src/results_assembly/pfocr.ts @@ -4,12 +4,16 @@ const debug = Debug('bte:biothings-explorer-trapi:pfocr'); import { intersection } from '../utils'; import _ from 'lodash'; import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import { TrapiResult } from '@biothings-explorer/types'; +import { TrapiResult, TrapiKGNode, TrapiResponse, TrapiKGEdge } from '@biothings-explorer/types'; +import Graph from '../graph/graph'; // the minimum acceptable intersection size between the CURIEs // in a TRAPI result and in a PFOCR figure. const MATCH_COUNT_MIN = 2; const FIGURE_COUNT_MAX = 20; +const SUPPORTED_PREFIXES = { + NCBIGene: 'associatedWith.mentions.genes.ncbigene', +}; interface pfocrQueryBody { q: string[]; @@ -124,31 +128,32 @@ async function getPfocrFigures(qTerms: Set): Promise { - const matchableQNodeIDs: Set = new Set(); - - if (allTrapiResults.length === 0) { - return matchableQNodeIDs; - } - - // TODO: this will need to be updated to handle non-NCBIGene CURIEs as well - // as non-gene CURIEs once we support querying for chemicals and diseases. +function traverseResultForNodes(result: TrapiResult, response: TrapiResponse): Set { + const kg = response.message.knowledge_graph; + const nodes: Set = new Set(); + const edgeStack: TrapiKGEdge[] = []; + // TODO: get all nodes from a result, whether it be recursive or iterative. + // First get all bound nodes and edges + Object.values(result.node_bindings).forEach((bindings) => + bindings.forEach((binding) => nodes.add(kg.nodes[binding.id])), + ); + Object.values(result.analyses[0].edge_bindings).forEach((bindings) => + bindings.forEach((binding) => edgeStack.push(kg.edges[binding.id])), + ); - const supportedPrefixes = new Set(['NCBIGene']); - for (const trapiResult of allTrapiResults) { - for (const [qNodeID, nodeBindingValues] of Object.entries(trapiResult.node_bindings)) { - for (const nodeBindingValue of nodeBindingValues) { - const prefix = nodeBindingValue.id.split(':')[0]; - if (supportedPrefixes.has(prefix)) { - matchableQNodeIDs.add(qNodeID); - break; - } - } + while (edgeStack.length > 0) { + const edge = edgeStack.pop(); + nodes.add(kg.nodes[edge.object]); + nodes.add(kg.nodes[edge.subject]); + const supportGraphs = edge.attributes.find((attribute) => attribute.attribute_type_id == 'biolink:support_graphs'); + if (supportGraphs) { + (supportGraphs.value as string[]).forEach((auxGraphID) => + response.message.auxiliary_graphs[auxGraphID].edges.forEach((edgeID) => edgeStack.push(kg.edges[edgeID])), + ); } } - debug(`QNode(s) having CURIEs that PFOCR could potentially match: ${[...matchableQNodeIDs]}`); - return matchableQNodeIDs; + return nodes; } /* time complexity: O(t*f) @@ -156,53 +161,54 @@ function getMatchableQNodeIDs(allTrapiResults: TrapiResult[]): Set { * t: trapiResults.length * f: figures.length */ -export async function enrichTrapiResultsWithPfocrFigures(allTrapiResults: TrapiResult[]): Promise { - const matchableQNodeIDs = getMatchableQNodeIDs(allTrapiResults); +export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse): Promise { + // NOTE: This function operates on the actual TRAPI information that will be returned + // to the client. Don't mutate what shouldn't be mutated! + const supportedPrefixes = new Set(['NCBIGene']); + const results = response.message.results; const logs: StampedLog[] = []; let resultsWithTruncatedFigures = 0; const truncatedFigures: Set = new Set(); - if (matchableQNodeIDs.size < MATCH_COUNT_MIN) { + const curieCombosByResult: Map = new Map(); + const curieCombos: Set = results.reduce((combos: Set, result: TrapiResult) => { + const nodes: Set = traverseResultForNodes(result, response); + const combo: Set = new Set(); + let matchedNodes = 0; + Object.entries(nodes).forEach(([primaryCurie, node]) => { + let nodeMatched = false; + const equivalentCuries = node.attributes?.find((attribute) => attribute.attribute_type_id === 'biolink:xref') + .value as string[]; + [primaryCurie, ...equivalentCuries].forEach((curie) => { + if (supportedPrefixes.has(curie.split(':')[0])) { + combo.add(curie.split(':')[1]); + nodeMatched = true; + } + }); + if (nodeMatched) matchedNodes += 1; + }); + if (matchedNodes >= MATCH_COUNT_MIN) { + const comboString = [...combo].join(' '); + curieCombosByResult.set(result, comboString); + combos.add(comboString); + } + return combos; + }, new Set()); + + if (curieCombos.size < 1) { // No TRAPI result can satisfy MATCH_COUNT_MIN logs.push(new LogEntry('DEBUG', null, 'Query does not match criteria, skipping PFOCR figure enrichment.').getLog()); return logs; } - // TODO: currently just NCBIGene CURIEs. Expand to handle any CURIE in PFOCR. - const trapiResultToCurieSet: Map = new Map(); - const curieCombinations: Set = new Set( - allTrapiResults.reduce((arr: string[], res) => { - const resultCuries: Set = new Set(); - const matchedQNodes: Set = new Set(); - [...matchableQNodeIDs].forEach((qNodeID) => { - res.node_bindings[qNodeID] - .map((node_binding) => node_binding.id) - .filter((curie) => curie.startsWith('NCBIGene:')) - .forEach((curie) => { - resultCuries.add(curie); - matchedQNodes.add(qNodeID); - }); - }); - - const resultCuriesString = [...resultCuries].map((curie) => curie.replace('NCBIGene:', '')).join(' '); - - if (resultCuries.size >= MATCH_COUNT_MIN && matchedQNodes.size >= MATCH_COUNT_MIN) { - trapiResultToCurieSet.set(res, resultCuriesString); - arr.push(resultCuriesString); - } - - return arr; - }, []), - ); - - const figures = await getPfocrFigures(curieCombinations).catch((err) => { + const figures = await getPfocrFigures(curieCombos).catch((err) => { debug('Error getting PFOCR figures (enrichTrapiResultsWithPfocrFigures)', err); throw err; }); - debug(`${figures.length} PFOCR figures match at least ${MATCH_COUNT_MIN} genes from any TRAPI result`); + debug(`${figures.length} PFOCR figures match at least ${MATCH_COUNT_MIN} nodes from any TRAPI result`); const figuresByCuries: { [queryCuries: string]: DeDupedFigureResult[] } = {}; figures.forEach((figure) => { @@ -220,22 +226,11 @@ export async function enrichTrapiResultsWithPfocrFigures(allTrapiResults: TrapiR // return set; // }, new Set() as Set); - for (const trapiResult of allTrapiResults) { + for (const trapiResult of results) { // No figures match this result if (!figuresByCuries[trapiResultToCurieSet.get(trapiResult)]) continue; const resultCuries: Set = new Set(); - const resultMatchableQNodeIDs: Set = new Set(); - [...matchableQNodeIDs].forEach((qNodeID) => { - trapiResult.node_bindings[qNodeID] - .map((node_binding) => node_binding.id) - .filter((curie) => curie.startsWith('NCBIGene:')) - .forEach((curie) => { - resultCuries.add(curie.replace('NCBIGene:', '')); - resultMatchableQNodeIDs.add(qNodeID); - }); - }); - if (resultMatchableQNodeIDs.size < 2) continue; (figuresByCuries[trapiResultToCurieSet.get(trapiResult)] ?? []).forEach((figure) => { if (!('pfocr' in trapiResult)) { @@ -245,20 +240,6 @@ export async function enrichTrapiResultsWithPfocrFigures(allTrapiResults: TrapiR const figureCurieSet = new Set(figure.associatedWith.mentions.genes.ncbigene); const resultGenesInFigure = intersection(resultCuries, figureCurieSet); - const matchedQNodes = [...matchableQNodeIDs].filter((matchableQNodeID) => { - const currentQNodeCurieSet = new Set( - trapiResult.node_bindings[matchableQNodeID].map((node_binding) => node_binding.id), - ); - - return ( - intersection(currentQNodeCurieSet, new Set([...resultGenesInFigure].map((geneID) => `NCBIGene:${geneID}`))) - .size > 0 - ); - }); - - // If we've matched on 2 curies, but we haven't actually matched on multiple nodes - if (matchedQNodes.length < 2) return; - const otherGenesInFigure = figureCurieSet.size - resultGenesInFigure.size; const resultGenesInOtherFigures = [...resultCuries].filter((gene) => { @@ -278,13 +259,6 @@ export async function enrichTrapiResultsWithPfocrFigures(allTrapiResults: TrapiR //title: figure.associatedWith.title, matchedCuries: [...resultGenesInFigure].map((geneID) => `NCBIGene:${geneID}`), score: 2 * ((precision * recall) / (precision + recall)), - // 1 - - // parseFloat( - // Analyze([ - // [resultGenesInFigure.size, resultGenesInOtherFigures], - // [otherGenesInFigure, otherGenesInOtherFigures], - // ]).pValue, - // ), }); matchedTrapiResults.add(trapiResult); }); @@ -311,7 +285,7 @@ export async function enrichTrapiResultsWithPfocrFigures(allTrapiResults: TrapiR debug(message); logs.push(new LogEntry('DEBUG', null, message).getLog()); debug( - `${MATCH_COUNT_MIN}+ CURIE matches: ${matchedFigures.size} PFOCR figures and ${matchedTrapiResults.size} TRAPI results`, + `${MATCH_COUNT_MIN}+ node matches: ${matchedFigures.size} PFOCR figures across ${matchedTrapiResults.size} TRAPI results`, ); logs.push( new LogEntry( diff --git a/src/results_assembly/query_results.ts b/src/results_assembly/query_results.ts index 51dff31c..9c444916 100644 --- a/src/results_assembly/query_results.ts +++ b/src/results_assembly/query_results.ts @@ -5,7 +5,6 @@ import { zip } from 'lodash'; const debug = Debug('bte:biothings-explorer-trapi:QueryResult'); import { getScores, calculateScore, ScoreCombos } from './score'; import { Record } from '@biothings-explorer/api-response-transform'; -import { enrichTrapiResultsWithPfocrFigures } from './pfocr'; import * as config from '../config'; export interface RecordsByQEdgeID { @@ -477,13 +476,13 @@ export default class TrapiResultsAssembler { .sort((result1, result2) => (result2.analyses[0].score ?? 0) - (result1.analyses[0].score ?? 0)); //sort by decreasing score if (shouldScore) { - try { - const pfocrEnrichmentLogs = await enrichTrapiResultsWithPfocrFigures(this._results); - this.logs.push(...pfocrEnrichmentLogs); - } catch (err) { - debug('Error enriching with PFOCR figures: ', err); - this.logs.push(new LogEntry('DEBUG', null, 'Error enriching with PFOCR figures: ', err).getLog()); - } + // try { + // const pfocrEnrichmentLogs = await enrichTrapiResultsWithPfocrFigures(this._results); + // this.logs.push(...pfocrEnrichmentLogs); + // } catch (err) { + // debug('Error enriching with PFOCR figures: ', err); + // this.logs.push(new LogEntry('DEBUG', null, 'Error enriching with PFOCR figures: ', err).getLog()); + // } debug(`Scored ${resultsWithScore} results with NGD score, scored ${resultsWithoutScore} results without NGD.`); this.logs.push( new LogEntry( @@ -503,7 +502,7 @@ export default class TrapiResultsAssembler { new LogEntry( 'DEBUG', null, - `Scoring/PFOCR figures disabled for KP endpoints; results not scored. Use ARA endpoints (/v1/query or /v1/asyncquery) for scoring/PFOCR figures.`, + `Scoring disabled for KP endpoints; results not scored. Use ARA endpoints (/v1/query or /v1/asyncquery) for scoring.`, { type: 'scoring', scored: resultsWithScore, From 7145a9e472d10c750aa981822d57b1905e2a615d Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 1 Aug 2024 14:40:36 -0400 Subject: [PATCH 21/32] fix: node handling, curieCombosByResults --- src/results_assembly/pfocr.ts | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/results_assembly/pfocr.ts b/src/results_assembly/pfocr.ts index 557badff..5d752d04 100644 --- a/src/results_assembly/pfocr.ts +++ b/src/results_assembly/pfocr.ts @@ -132,8 +132,6 @@ function traverseResultForNodes(result: TrapiResult, response: TrapiResponse): S const kg = response.message.knowledge_graph; const nodes: Set = new Set(); const edgeStack: TrapiKGEdge[] = []; - // TODO: get all nodes from a result, whether it be recursive or iterative. - // First get all bound nodes and edges Object.values(result.node_bindings).forEach((bindings) => bindings.forEach((binding) => nodes.add(kg.nodes[binding.id])), ); @@ -164,7 +162,6 @@ function traverseResultForNodes(result: TrapiResult, response: TrapiResponse): S export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse): Promise { // NOTE: This function operates on the actual TRAPI information that will be returned // to the client. Don't mutate what shouldn't be mutated! - const supportedPrefixes = new Set(['NCBIGene']); const results = response.message.results; const logs: StampedLog[] = []; let resultsWithTruncatedFigures = 0; @@ -175,12 +172,12 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse const nodes: Set = traverseResultForNodes(result, response); const combo: Set = new Set(); let matchedNodes = 0; - Object.entries(nodes).forEach(([primaryCurie, node]) => { + [...nodes].forEach((node) => { let nodeMatched = false; const equivalentCuries = node.attributes?.find((attribute) => attribute.attribute_type_id === 'biolink:xref') .value as string[]; - [primaryCurie, ...equivalentCuries].forEach((curie) => { - if (supportedPrefixes.has(curie.split(':')[0])) { + equivalentCuries.forEach((curie) => { + if (Object.keys(SUPPORTED_PREFIXES).includes(curie.split(':')[0])) { combo.add(curie.split(':')[1]); nodeMatched = true; } @@ -201,8 +198,6 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse return logs; } - const trapiResultToCurieSet: Map = new Map(); - const figures = await getPfocrFigures(curieCombos).catch((err) => { debug('Error getting PFOCR figures (enrichTrapiResultsWithPfocrFigures)', err); throw err; @@ -228,11 +223,11 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse for (const trapiResult of results) { // No figures match this result - if (!figuresByCuries[trapiResultToCurieSet.get(trapiResult)]) continue; + if (!figuresByCuries[curieCombosByResult.get(trapiResult)]) continue; const resultCuries: Set = new Set(); - (figuresByCuries[trapiResultToCurieSet.get(trapiResult)] ?? []).forEach((figure) => { + (figuresByCuries[curieCombosByResult.get(trapiResult)] ?? []).forEach((figure) => { if (!('pfocr' in trapiResult)) { trapiResult.pfocr = []; } From fedd52617d1f29e386aa445f0293d627dce933d2 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 1 Aug 2024 15:15:05 -0400 Subject: [PATCH 22/32] fix: add pfocrUrl, fix curie handling for score --- src/results_assembly/pfocr.ts | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/results_assembly/pfocr.ts b/src/results_assembly/pfocr.ts index 5d752d04..c122adb6 100644 --- a/src/results_assembly/pfocr.ts +++ b/src/results_assembly/pfocr.ts @@ -32,6 +32,7 @@ interface FigureResult { notfound?: boolean; associatedWith: { figureUrl: string; + pfocrUrl: string; pmc: string; mentions: { genes: { @@ -93,7 +94,13 @@ async function getPfocrFigures(qTerms: Set): Promise = new Set(); const curieCombosByResult: Map = new Map(); + const curiesByResult: Map> = new Map(); + const curieCombos: Set = results.reduce((combos: Set, result: TrapiResult) => { const nodes: Set = traverseResultForNodes(result, response); const combo: Set = new Set(); @@ -177,8 +186,10 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse const equivalentCuries = node.attributes?.find((attribute) => attribute.attribute_type_id === 'biolink:xref') .value as string[]; equivalentCuries.forEach((curie) => { - if (Object.keys(SUPPORTED_PREFIXES).includes(curie.split(':')[0])) { - combo.add(curie.split(':')[1]); + const prefix = curie.split(':')[0]; + const suffix = curie.replace(`${prefix}:`, ''); + if (Object.keys(SUPPORTED_PREFIXES).includes(prefix)) { + combo.add(suffix); nodeMatched = true; } }); @@ -188,6 +199,7 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse const comboString = [...combo].join(' '); curieCombosByResult.set(result, comboString); combos.add(comboString); + curiesByResult.set(result, combo); } return combos; }, new Set()); @@ -225,7 +237,7 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse // No figures match this result if (!figuresByCuries[curieCombosByResult.get(trapiResult)]) continue; - const resultCuries: Set = new Set(); + const resultCuries = curiesByResult.get(trapiResult); (figuresByCuries[curieCombosByResult.get(trapiResult)] ?? []).forEach((figure) => { if (!('pfocr' in trapiResult)) { @@ -249,6 +261,7 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse trapiResult.pfocr.push({ figureUrl: figure.associatedWith.figureUrl, + pfocrUrl: figure.associatedWith.pfocrUrl, pmc: figure.associatedWith.pmc, // TODO: do we want to include figure title? Note: this would need to be added to queryBody. //title: figure.associatedWith.title, From 37f768005feafbd63d36cf8c30e72d4887ef6c58 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Mon, 5 Aug 2024 15:05:08 -0400 Subject: [PATCH 23/32] fix: more specific direct edge criteria --- src/inferred_mode/inferred_mode.ts | 47 ++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index bc6f3dd7..0061edbb 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -309,8 +309,28 @@ export default class InferredQueryHandler { const resultID = `${resultCreativeSubjectID}-${resultCreativeObjectID}`; // Direct edge answers stand on their own, not as an inferred edge. - if (Object.keys(result.node_bindings).length == 2) { - const boundEdgeID = Object.values(result.analyses[0].edge_bindings)[0][0].id; + const boundEdgeID = Object.values(result.analyses[0].edge_bindings)[0][0].id; + const boundEdge = combinedResponse.message.knowledge_graph.edges[boundEdgeID]; + const specialHandling = [ + Object.keys(result.node_bindings).length === 2, // Direct edge + // Predicate matches or is descendant + qEdge.predicates.some( + (predicate) => + predicate === boundEdge.predicate || + biolink.getDescendantPredicates(predicate).includes(boundEdge.predicate), + ), + // All query qualifiers (if any) are accounted for (more is fine) + qEdge.qualifier_constraints.every(({ qualifier_set }) => { + return qualifier_set.every((queryQualifier) => + boundEdge.qualifiers.some( + (qualifier) => + queryQualifier.qualifier_type_id === qualifier.qualifier_type_id && + queryQualifier.qualifier_value === qualifier.qualifier_value, + ), + ); + }), + ].every((test) => test); + if (specialHandling) { translatedResult.analyses[0].edge_bindings = { [qEdgeID]: [{ id: boundEdgeID, attributes: [] }] }; } else { // Create an aux graph using the result and associate it with an inferred Edge @@ -334,8 +354,8 @@ export default class InferredQueryHandler { ], attributes: [ { attribute_type_id: 'biolink:support_graphs', value: [] }, - { attribute_type_id: 'biolink:knowledge_level', value: "prediction" }, - { attribute_type_id: 'biolink:agent_type', value: "computational_model" }, + { attribute_type_id: 'biolink:knowledge_level', value: 'prediction' }, + { attribute_type_id: 'biolink:agent_type', value: 'computational_model' }, ], }; } @@ -357,7 +377,7 @@ export default class InferredQueryHandler { }, [] as string[], ), - attributes: [] + attributes: [], }; } @@ -390,9 +410,9 @@ export default class InferredQueryHandler { if (typeof combinedResponse.message.results[resultID].analyses[0].score !== 'undefined') { combinedResponse.message.results[resultID].analyses[0].score = resScore ? scaled_sigmoid( - inverse_scaled_sigmoid(combinedResponse.message.results[resultID].analyses[0].score) + - inverse_scaled_sigmoid(resScore), - ) + inverse_scaled_sigmoid(combinedResponse.message.results[resultID].analyses[0].score) + + inverse_scaled_sigmoid(resScore), + ) : combinedResponse.message.results[resultID].analyses[0].score; } else { combinedResponse.message.results[resultID].analyses[0].score = resScore; @@ -560,11 +580,9 @@ export default class InferredQueryHandler { const message = [ `Addition of ${creativeLimitHit} results from Template ${i + 1}`, Object.keys(combinedResponse.message.results).length === this.CREATIVE_LIMIT ? ' meets ' : ' exceeds ', - `creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${ - Object.keys(combinedResponse.message.results).length + `creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${Object.keys(combinedResponse.message.results).length } merged). `, - `Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results. Skipping remaining ${ - subQueries.length - (i + 1) + `Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results. Skipping remaining ${subQueries.length - (i + 1) } `, subQueries.length - (i + 1) === 1 ? `template.` : `templates.`, ].join(''); @@ -589,9 +607,8 @@ export default class InferredQueryHandler { const total = Object.values(mergedResultsCount).reduce((sum, count) => sum + count, 0) + Object.keys(mergedResultsCount).length; - const message = `Merging Summary: (${total}) inferred-template results were merged into (${ - Object.keys(mergedResultsCount).length - }) final results, reducing result count by (${total - Object.keys(mergedResultsCount).length})`; + const message = `Merging Summary: (${total}) inferred-template results were merged into (${Object.keys(mergedResultsCount).length + }) final results, reducing result count by (${total - Object.keys(mergedResultsCount).length})`; debug(message); combinedResponse.logs.push(new LogEntry('INFO', null, message).getLog()); } From f4e63f7e46aeede6f17ee1d6acf51d6ca2fa4eb8 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Mon, 5 Aug 2024 15:09:56 -0400 Subject: [PATCH 24/32] fix: satisfy 1+ qualifier set, not all necessarily --- src/inferred_mode/inferred_mode.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index 0061edbb..1c973fc6 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -320,7 +320,7 @@ export default class InferredQueryHandler { biolink.getDescendantPredicates(predicate).includes(boundEdge.predicate), ), // All query qualifiers (if any) are accounted for (more is fine) - qEdge.qualifier_constraints.every(({ qualifier_set }) => { + qEdge.qualifier_constraints.some(({ qualifier_set }) => { return qualifier_set.every((queryQualifier) => boundEdge.qualifiers.some( (qualifier) => From 215a423147dd7868bfe1e838bc886ed32f698f87 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Mon, 5 Aug 2024 15:02:53 -0700 Subject: [PATCH 25/32] do not include records with no curies in qedge --- src/edge_manager.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/edge_manager.ts b/src/edge_manager.ts index 543a6ea5..dabacb86 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -207,11 +207,8 @@ export default class QueryEdgeManager { const objectIDs = [record.object.original, record.object.curie, ...record.object.equivalentCuries]; // there must be at least a minimal intersection - const subjectMatch = - subjectIDs.some((curie) => execSubjectCuries.includes(curie)) || execSubjectCuries.length === 0; - const objectMatch = objectIDs.some((curie) => execObjectCuries.includes(curie)) || execObjectCuries.length === 0; - - //if both ends match then keep record + const subjectMatch = subjectIDs.some((curie) => execSubjectCuries.includes(curie)); + const objectMatch = objectIDs.some((curie) => execObjectCuries.includes(curie)); // Don't keep self-edges const selfEdge = [...subjectIDs].some((curie) => objectIDs.includes(curie)); From f6eaa70444e76504bb161cb8d0ecfe061ea7a1f8 Mon Sep 17 00:00:00 2001 From: Anudeep Tubati <40491005+NeuralFlux@users.noreply.github.com> Date: Tue, 6 Aug 2024 12:06:00 -0400 Subject: [PATCH 26/32] fix: added original curies to results as `query_id` (#204) * fix: added original curies to results as `query_id` * fix: replaced redundant map with forEach --- src/graph/kg_node.ts | 5 +++++ src/index.ts | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/graph/kg_node.ts b/src/graph/kg_node.ts index 60dac19f..ff338c5d 100644 --- a/src/graph/kg_node.ts +++ b/src/graph/kg_node.ts @@ -8,12 +8,14 @@ export interface KGNodeInfo { curies: string[]; primaryCurie: string; qNodeID: string; + originalCurie?: string; } export default class KGNode { id: string; primaryCurie: string; qNodeID: string; + originalCurie: string; curies: string[]; names: string[]; semanticType: string[]; @@ -36,6 +38,9 @@ export default class KGNode { this.targetNodes = new Set(); this.sourceQNodeIDs = new Set(); this.targetQNodeIDs = new Set(); + + // store original curie to output `query_id bte#815` + this.originalCurie = info.originalCurie; } addSourceNode(kgNodeID: string): void { diff --git a/src/index.ts b/src/index.ts index 95751de3..54d752c1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -318,6 +318,18 @@ export default class TRAPIQueryHandler { this.finalizedResults = fixedResults; } + appendOriginalCuriesToResults(results: TrapiResult[]): void { + results.forEach(result => { + Object.entries(result.node_bindings).forEach(([_, bindings]) => { + bindings.forEach(binding => { + if (this.bteGraph.nodes[binding.id].originalCurie && this.bteGraph.nodes[binding.id].originalCurie !== binding.id) { + binding.query_id = this.bteGraph.nodes[binding.id].originalCurie; + } + }) + }) + }) + } + async addQueryNodes(): Promise { const qNodeIDsByOriginalID: Map = new Map(); const curiesToResolve = [ @@ -339,6 +351,7 @@ export default class TRAPIQueryHandler { this.bteGraph.nodes[resolvedEntity.primaryID] = new KGNode(resolvedEntity.primaryID, { primaryCurie: resolvedEntity.primaryID, qNodeID: qNodeIDsByOriginalID[originalCurie], + originalCurie: originalCurie, curies: resolvedEntity.equivalentIDs, names: resolvedEntity.labelAliases, semanticType: category ? [category] : ['biolink:NamedThing'], @@ -702,6 +715,8 @@ export default class TRAPIQueryHandler { this.createSubclassSupportGraphs(); // prune bteGraph this.bteGraph.prune(this.finalizedResults, this.auxGraphs); + // add original curies to results + this.appendOriginalCuriesToResults(this.finalizedResults); this.bteGraph.notify(); // Attempt to enrich results with PFOCR figures From 46a5e4f8b16434263259a6dc5b0ea7f916173a9f Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Tue, 6 Aug 2024 16:52:52 -0400 Subject: [PATCH 27/32] fix: handle undefined cases --- src/inferred_mode/inferred_mode.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index e6bc1a8a..3054de0d 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -316,21 +316,21 @@ export default class InferredQueryHandler { const specialHandling = [ Object.keys(result.node_bindings).length === 2, // Direct edge // Predicate matches or is descendant - qEdge.predicates.some( + qEdge.predicates?.some( (predicate) => predicate === boundEdge.predicate || biolink.getDescendantPredicates(predicate).includes(boundEdge.predicate), - ), + ) ?? false, // All query qualifiers (if any) are accounted for (more is fine) - qEdge.qualifier_constraints.some(({ qualifier_set }) => { + qEdge.qualifier_constraints?.some(({ qualifier_set }) => { return qualifier_set.every((queryQualifier) => - boundEdge.qualifiers.some( + boundEdge.qualifiers?.some( (qualifier) => queryQualifier.qualifier_type_id === qualifier.qualifier_type_id && queryQualifier.qualifier_value === qualifier.qualifier_value, - ), + ) ?? false, ); - }), + }) ?? false, ].every((test) => test); if (specialHandling) { translatedResult.analyses[0].edge_bindings = { [qEdgeID]: [{ id: boundEdgeID, attributes: [] }] }; From 9fad32df5871197cd711f319051a400ecd758420 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 8 Aug 2024 16:29:20 -0400 Subject: [PATCH 28/32] fix: pfocr failure shouldn't fail whole query --- src/results_assembly/pfocr.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/results_assembly/pfocr.ts b/src/results_assembly/pfocr.ts index c122adb6..870ddb2c 100644 --- a/src/results_assembly/pfocr.ts +++ b/src/results_assembly/pfocr.ts @@ -210,10 +210,14 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse return logs; } - const figures = await getPfocrFigures(curieCombos).catch((err) => { + let figures: DeDupedFigureResult[]; + try { + figures = await getPfocrFigures(curieCombos) + } catch (err) { debug('Error getting PFOCR figures (enrichTrapiResultsWithPfocrFigures)', err); - throw err; - }); + logs.push(new LogEntry('ERROR', null, 'Error getting PFOCR figures, results will not be enriched.').getLog()) + } + if (!figures) return logs; debug(`${figures.length} PFOCR figures match at least ${MATCH_COUNT_MIN} nodes from any TRAPI result`); From 6a8d1e52cfc0b1b4ed659fda1dfabc468bf152ee Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Fri, 9 Aug 2024 12:45:39 -0400 Subject: [PATCH 29/32] fix: preserve qualifier biolink prefix --- src/inferred_mode/inferred_mode.ts | 35 +++++++++++++++++----------- src/inferred_mode/template_lookup.ts | 12 ++++++---- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index 3054de0d..57e2feae 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -313,25 +313,32 @@ export default class InferredQueryHandler { // Direct edge answers stand on their own, not as an inferred edge. const boundEdgeID = Object.values(result.analyses[0].edge_bindings)[0][0].id; const boundEdge = combinedResponse.message.knowledge_graph.edges[boundEdgeID]; - const specialHandling = [ - Object.keys(result.node_bindings).length === 2, // Direct edge - // Predicate matches or is descendant + const oneHop = Object.keys(result.node_bindings).length === 2; // Direct edge + // Predicate matches or is descendant + const predicateMatch = qEdge.predicates?.some( (predicate) => predicate === boundEdge.predicate || biolink.getDescendantPredicates(predicate).includes(boundEdge.predicate), - ) ?? false, - // All query qualifiers (if any) are accounted for (more is fine) + ) ?? false; + // All query qualifiers (if any) are accounted for (more is fine) + const qualifierMatch = qEdge.qualifier_constraints?.some(({ qualifier_set }) => { - return qualifier_set.every((queryQualifier) => - boundEdge.qualifiers?.some( - (qualifier) => - queryQualifier.qualifier_type_id === qualifier.qualifier_type_id && - queryQualifier.qualifier_value === qualifier.qualifier_value, - ) ?? false, - ); - }) ?? false, - ].every((test) => test); + return qualifier_set.every((queryQualifier) => { + return ( + boundEdge.qualifiers?.some((qualifier) => { + const typeMatch = queryQualifier.qualifier_type_id === qualifier.qualifier_type_id; + const valueMatch = + queryQualifier.qualifier_value === qualifier.qualifier_value || + biolink + .getDescendantQualifiers(queryQualifier.qualifier_value as string) + .includes(qualifier.qualifier_value as string); + return typeMatch && valueMatch; + }) ?? false + ); + }); + }) ?? false; + const specialHandling = oneHop && predicateMatch && qualifierMatch; if (specialHandling) { translatedResult.analyses[0].edge_bindings = { [qEdgeID]: [{ id: boundEdgeID, attributes: [] }] }; } else { diff --git a/src/inferred_mode/template_lookup.ts b/src/inferred_mode/template_lookup.ts index 118c4847..9c6f86d3 100644 --- a/src/inferred_mode/template_lookup.ts +++ b/src/inferred_mode/template_lookup.ts @@ -62,21 +62,23 @@ export async function getTemplates(lookups: TemplateLookup[]): Promise { let matchingQualifers: CompactQualifiers; const lookupMatch = lookups.some((lookup) => { - const match = ( + const match = group.subject.includes(lookup.subject) && group.object.includes(lookup.object) && group.predicate.includes(lookup.predicate) && Object.entries(lookup.qualifiers || {}).every(([qualifierType, qualifierValue]) => { - return (group.qualifiers || {})[qualifierType] && group.qualifiers[qualifierType] === qualifierValue; - }) - ); + return ( + (group.qualifiers || {})[qualifierType.replace('biolink:', '')] && + group.qualifiers[qualifierType.replace('biolink:', '')] === qualifierValue.replace('biolink:', '') + ); + }); if (match) matchingQualifers = lookup.qualifiers; return match; }); if (lookupMatch) { group.templates.forEach((template) => { - if (!matches.find(t => t.path === templatePaths[template])) { + if (!matches.find((t) => t.path === templatePaths[template])) { matches.push({ path: templatePaths[template], qualifiers: matchingQualifers }); } }); From 29bab451c2f4b19a75cb85fb9aeed709599d8291 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Fri, 9 Aug 2024 12:46:17 -0400 Subject: [PATCH 30/32] fix: response merging special handling criteria --- src/inferred_mode/inferred_mode.ts | 39 ++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index 57e2feae..92d9fbc6 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -169,10 +169,8 @@ export default class InferredQueryHandler { const qualifierConstraints = (qEdge.qualifier_constraints || []).map((qualifierSetObj) => { return Object.fromEntries( qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => [ - qualifier_type_id.replace('biolink:', ''), - Array.isArray(qualifier_value) - ? qualifier_value.map((string) => string.replace('biolink:', '')) - : qualifier_value.replace('biolink:', ''), + qualifier_type_id, + qualifier_value, ]), ) as CompactQualifiers; }); @@ -257,7 +255,7 @@ export default class InferredQueryHandler { qEdgeID: string, qEdge: TrapiQEdge, combinedResponse: CombinedResponse, - qualifers?: CompactQualifiers + qualifiers?: CompactQualifiers, ): CombinedResponseReport { const span = Telemetry.startSpan({ description: 'creativeCombineResponse' }); const newResponse = handler.getResponse(); @@ -328,11 +326,17 @@ export default class InferredQueryHandler { return ( boundEdge.qualifiers?.some((qualifier) => { const typeMatch = queryQualifier.qualifier_type_id === qualifier.qualifier_type_id; - const valueMatch = - queryQualifier.qualifier_value === qualifier.qualifier_value || - biolink - .getDescendantQualifiers(queryQualifier.qualifier_value as string) - .includes(qualifier.qualifier_value as string); + let valueMatch: boolean; + try { + const descendants = queryQualifier.qualifier_value.includes('biolink:') + ? biolink.getDescendantPredicates(queryQualifier.qualifier_value as string) + : biolink.getDescendantQualifiers(queryQualifier.qualifier_value as string); + const valueMatch = + queryQualifier.qualifier_value === qualifier.qualifier_value || + descendants.includes(qualifier.qualifier_value as string); + } catch (err) { + valueMatch = queryQualifier.qualifier_value === qualifier.qualifier_value; + } return typeMatch && valueMatch; }) ?? false ); @@ -369,8 +373,17 @@ export default class InferredQueryHandler { }; } // Add qualifiers to edge - if (typeof qualifers == 'object' && Object.keys(qualifers).length > 0 && !combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers) { - combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers = Object.entries(qualifers).map(([qualifierType, qualifierValue]) => ({ qualifier_type_id: qualifierType, qualifier_value: qualifierValue })); + if ( + typeof qualifiers == 'object' && + Object.keys(qualifiers).length > 0 && + !combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers + ) { + combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers = Object.entries(qualifiers).map( + ([qualifierType, qualifierValue]) => ({ + qualifier_type_id: qualifierType, + qualifier_value: qualifierValue, + }), + ); } let auxGraphSuffix = 0; @@ -580,7 +593,7 @@ export default class InferredQueryHandler { qEdgeID, qEdge, combinedResponse, - qualifiers + qualifiers, ); // update values used in logging successfulQueries += querySuccess; From 2d754feb42610c980ca8f5ea819df6da586d66e8 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Fri, 9 Aug 2024 15:00:51 -0400 Subject: [PATCH 31/32] fix: pfocr server selection, timeout handling --- src/results_assembly/pfocr.ts | 38 +++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/results_assembly/pfocr.ts b/src/results_assembly/pfocr.ts index 870ddb2c..222c220c 100644 --- a/src/results_assembly/pfocr.ts +++ b/src/results_assembly/pfocr.ts @@ -64,14 +64,19 @@ async function getAllByScrolling( hits: RawFigureResult[] = [], ): Promise { queryBody.from = batchIndex; - const { data } = await axios.post(baseUrl, queryBody).catch((err) => { - debug('Error in scrolling request', err); - throw err; - }); + let data: { hits: RawFigureResult[]; max_total: number }; + try { + data = (await axios.post(baseUrl, queryBody, { timeout: 15000 })).data; + } catch (err) { + debug(`Error in scrolling request window ${batchIndex}-${batchIndex + 1000}, error is ${(err as Error).message}`); + } + + if (data) { + hits.push(...data.hits); + debug(`Batch window ${batchIndex}-${batchIndex + 1000}: ${data.hits.length} hits retrieved for PFOCR figure data`); + } - hits.push(...data.hits); - debug(`Batch window ${batchIndex}-${batchIndex + 1000}: ${data.hits.length} hits retrieved for PFOCR figure data`); - if (batchIndex + 1000 < data.max_total) { + if (data && batchIndex + 1000 < data.max_total) { return await getAllByScrolling(baseUrl, queryBody, batchIndex + 1000, hits); } else { return hits; @@ -82,7 +87,12 @@ async function getAllByScrolling( */ async function getPfocrFigures(qTerms: Set): Promise { debug(`Getting PFOCR figure data`); - const url = 'https://biothings.ncats.io/pfocr/query'; + const url = { + dev: 'https://biothings.ci.transltr.io/pfocr/query', + ci: 'https://biothings.ci.transltr.io/pfocr/query', + test: 'https://biothings.test.transltr.io/pfocr/query', + prod: 'https://biothings.ncats.io/pfocr/query', + }[process.env.INSTANCE_ENV ?? 'prod']; /* * We can now POST using minimum_should_match to bypass most set logic on our side * detailed here: https://github.com/biothings/pending.api/issues/88 @@ -212,10 +222,16 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse let figures: DeDupedFigureResult[]; try { - figures = await getPfocrFigures(curieCombos) + figures = await getPfocrFigures(curieCombos); } catch (err) { - debug('Error getting PFOCR figures (enrichTrapiResultsWithPfocrFigures)', err); - logs.push(new LogEntry('ERROR', null, 'Error getting PFOCR figures, results will not be enriched.').getLog()) + debug('Error getting PFOCR figures (enrichTrapiResultsWithPfocrFigures)', (err as Error).message); + logs.push( + new LogEntry( + 'WARNING', + null, + `Error getting PFOCR figures, results will not be enriched. The error is ${err.message}`, + ).getLog(), + ); } if (!figures) return logs; From df2549422b91c086eaed27456dfa03fc89a62cff Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Mon, 12 Aug 2024 10:43:04 -0400 Subject: [PATCH 32/32] fix: qualifier logic, typo --- src/inferred_mode/inferred_mode.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index 92d9fbc6..0574080b 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -321,6 +321,8 @@ export default class InferredQueryHandler { ) ?? false; // All query qualifiers (if any) are accounted for (more is fine) const qualifierMatch = + !qEdge.qualifier_constraints || + qEdge.qualifier_constraints.length === 0 || qEdge.qualifier_constraints?.some(({ qualifier_set }) => { return qualifier_set.every((queryQualifier) => { return ( @@ -331,7 +333,7 @@ export default class InferredQueryHandler { const descendants = queryQualifier.qualifier_value.includes('biolink:') ? biolink.getDescendantPredicates(queryQualifier.qualifier_value as string) : biolink.getDescendantQualifiers(queryQualifier.qualifier_value as string); - const valueMatch = + valueMatch = queryQualifier.qualifier_value === qualifier.qualifier_value || descendants.includes(qualifier.qualifier_value as string); } catch (err) { @@ -341,7 +343,7 @@ export default class InferredQueryHandler { }) ?? false ); }); - }) ?? false; + }); const specialHandling = oneHop && predicateMatch && qualifierMatch; if (specialHandling) { translatedResult.analyses[0].edge_bindings = { [qEdgeID]: [{ id: boundEdgeID, attributes: [] }] };