From b84e33b3b11ee39e915c4969011d8cdb094a1219 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 9 Nov 2023 12:34:06 -0500 Subject: [PATCH 01/12] chore: migrate logEntry, sentry to utils --- package.json | 1 + src/batch_edge_query.ts | 8 ++-- src/cache_handler.ts | 2 +- src/edge_manager.ts | 12 ++---- src/graph/graph.ts | 2 +- src/index.ts | 24 ++++------- src/inferred_mode/inferred_mode.ts | 2 +- src/log_entry.ts | 58 --------------------------- src/qedge2apiedge.ts | 2 +- src/query_edge.ts | 2 +- src/query_graph.ts | 2 +- src/results_assembly/pfocr.ts | 2 +- src/results_assembly/query_results.ts | 2 +- src/types.ts | 6 ++- tsconfig.json | 6 ++- 15 files changed, 34 insertions(+), 97 deletions(-) delete mode 100644 src/log_entry.ts diff --git a/package.json b/package.json index 85313c19..4de1384d 100644 --- a/package.json +++ b/package.json @@ -56,6 +56,7 @@ "@biothings-explorer/call-apis": "workspace:../call-apis", "@biothings-explorer/node-expansion": "workspace:../node-expansion", "@biothings-explorer/smartapi-kg": "workspace:../smartapi-kg", + "@biothings-explorer/utils": "workspace:../utils", "@sentry/node": "^7.74.1", "async": "^3.2.4", "biolink-model": "workspace:../biolink-model", diff --git a/src/batch_edge_query.ts b/src/batch_edge_query.ts index 17c2adfd..b9cb0b99 100644 --- a/src/batch_edge_query.ts +++ b/src/batch_edge_query.ts @@ -1,4 +1,4 @@ -import call_api from '@biothings-explorer/call-apis'; +import call_api, { RedisClient } from '@biothings-explorer/call-apis'; import QEdge2APIEdgeHandler, { APIEdge } from './qedge2apiedge'; import NodesUpdateHandler from './update_nodes'; import Debug from 'debug'; @@ -6,8 +6,8 @@ const debug = Debug('bte:biothings-explorer-trapi:batch_edge_query'); import CacheHandler from './cache_handler'; import { threadId } from 'worker_threads'; import MetaKG from '@biothings-explorer/smartapi-kg'; -import { StampedLog } from './log_entry'; -import { QueryHandlerOptions } from '.'; +import { StampedLog } from '@biothings-explorer/utils'; +import { QueryHandlerOptions, redisClient } from '.'; import QEdge from './query_edge'; import { UnavailableAPITracker } from './types'; import { Record } from '@biothings-explorer/api-response-transform'; @@ -62,7 +62,7 @@ export default class BatchEdgeQueryHandler { * @private */ async _queryAPIEdges(APIEdges: APIEdge[], unavailableAPIs: UnavailableAPITracker = {}): Promise { - const executor = new call_api(APIEdges, this.options); + const executor = new call_api(APIEdges, this.options, redisClient as RedisClient); const records: Record[] = await executor.query(this.resolveOutputIDs, unavailableAPIs); this.logs = [...this.logs, ...executor.logs]; return records; diff --git a/src/cache_handler.ts b/src/cache_handler.ts index 079a57e3..a42d0e23 100644 --- a/src/cache_handler.ts +++ b/src/cache_handler.ts @@ -1,7 +1,7 @@ import { redisClient } from './redis-client'; import Debug from 'debug'; const debug = Debug('bte:biothings-explorer-trapi:cache_handler'); -import LogEntry, { StampedLog } from './log_entry'; +import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import async from 'async'; import helper from './helper'; import lz4 from 'lz4'; diff --git a/src/edge_manager.ts b/src/edge_manager.ts index 798b2677..2b336565 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -1,11 +1,11 @@ import _ from 'lodash'; -import LogEntry, { StampedLog } from './log_entry'; +import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import BTEError from './exceptions/bte_error'; import Debug from 'debug'; const debug = Debug('bte:biothings-explorer-trapi:edge-manager'); import * as config from './config'; import BatchEdgeQueryHandler, { BatchEdgeQueryOptions } from './batch_edge_query'; -import * as Sentry from '@sentry/node'; +import { Telemetry } from '@biothings-explorer/utils'; import QEdge from './query_edge'; import MetaKG from '@biothings-explorer/smartapi-kg'; import { QueryHandlerOptions } from '.'; @@ -451,16 +451,12 @@ export default class QueryEdgeManager { // storing records will trigger a node entity count update currentQEdge.storeRecords(queryRecords); - const span1 = Sentry?.getCurrentHub()?.getScope()?.getTransaction()?.startChild({ - description: 'filteringRecords', - }); + const span1 = Telemetry.startSpan({ description: 'filteringRecords' }); // filter records this.updateEdgeRecords(currentQEdge); span1?.finish(); - const span2 = Sentry?.getCurrentHub()?.getScope()?.getTransaction()?.startChild({ - description: 'updatingRecordEdges', - }); + const span2 = Telemetry.startSpan({ description: 'updatingRecordEdges' }); // update and filter neighbors this.updateAllOtherEdges(currentQEdge); diff --git a/src/graph/graph.ts b/src/graph/graph.ts index 004a8192..018a096b 100644 --- a/src/graph/graph.ts +++ b/src/graph/graph.ts @@ -1,7 +1,7 @@ import kg_edge from './kg_edge'; import kg_node from './kg_node'; import Debug from 'debug'; -import LogEntry, { StampedLog } from '../log_entry'; +import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import KGNode from './kg_node'; import KGEdge from './kg_edge'; import { Record } from '../../../api-response-transform/built'; diff --git a/src/index.ts b/src/index.ts index cfe26747..16a0d87a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -10,14 +10,13 @@ import Graph from './graph/graph'; import EdgeManager from './edge_manager'; import _ from 'lodash'; import QEdge2APIEdgeHandler from './qedge2apiedge'; -import LogEntry, { StampedLog } from './log_entry'; +import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import { promises as fs } from 'fs'; import { getDescendants } from '@biothings-explorer/node-expansion'; import { resolveSRI, SRINodeNormFailure } from 'biomedical_id_resolver'; import InferredQueryHandler from './inferred_mode/inferred_mode'; import KGNode from './graph/kg_node'; import KGEdge from './graph/kg_edge'; -import * as Sentry from '@sentry/node'; import { APIList, TrapiAuxGraphCollection, @@ -29,6 +28,7 @@ import { } from './types'; import BTEGraph from './graph/graph'; import QEdge from './query_edge'; +import { Telemetry } from '@biothings-explorer/utils'; // Exports for external availability export * from './types'; @@ -37,8 +37,6 @@ export { getTemplates, supportedLookups } from './inferred_mode/template_lookup' export { default as QEdge } from './query_edge'; export { default as QNode } from './query_node'; export { default as InvalidQueryGraphError } from './exceptions/invalid_query_graph_error'; -export * from './log_entry'; -export { default as LogEntry } from './log_entry'; export * from './qedge2apiedge'; export interface QueryHandlerOptions { @@ -315,14 +313,14 @@ export default class TRAPIQueryHandler { async addQueryNodes(): Promise { const qNodeIDsByOriginalID: Map = new Map(); const curiesToResolve = [ - ...Object.values(this.queryGraph.nodes).reduce((set, qNode) => { + ...Object.values(this.queryGraph.nodes).reduce((set: Set, qNode) => { qNode.ids?.forEach((id) => { set.add(id); qNodeIDsByOriginalID.set(id, qNode); }); return set; - }, new Set() as Set), - ]; + }, new Set()), + ] as string[]; const resolvedCuries = await resolveSRI({ unknown: curiesToResolve }); Object.entries(resolvedCuries).forEach(([originalCurie, resolvedEntity]) => { if (!this.bteGraph.nodes[resolvedEntity.primaryID]) { @@ -623,9 +621,7 @@ export default class TRAPIQueryHandler { this._initializeResponse(); await this.addQueryNodes(); - const span1 = Sentry?.getCurrentHub()?.getScope()?.getTransaction()?.startChild({ - description: 'loadMetaKG', - }); + const span1 = Telemetry.startSpan({ description: 'loadMetaKG' }); debug('Start to load metakg.'); const metaKG = this._loadMetaKG(); @@ -671,9 +667,7 @@ export default class TRAPIQueryHandler { debug(`(3) All edges created ${JSON.stringify(queryEdges)} `); if (this._queryUsesInferredMode()) { - const span2 = Sentry?.getCurrentHub()?.getScope()?.getTransaction()?.startChild({ - description: 'creativeExecution', - }); + const span2 = Telemetry.startSpan({ description: 'creativeExecution' }); await this._handleInferredEdges(); span2?.finish(); return; @@ -690,9 +684,7 @@ export default class TRAPIQueryHandler { return; } - const span3 = Sentry?.getCurrentHub()?.getScope()?.getTransaction()?.startChild({ - description: 'resultsAssembly', - }); + const span3 = Telemetry.startSpan({ description: 'resultsAssembly' }); // update query graph this.bteGraph.update(manager.getRecords()); diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index 4734f070..bcf55e22 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -1,5 +1,5 @@ import Debug from 'debug'; -import LogEntry, { StampedLog } from '../log_entry'; +import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import * as utils from '../utils'; import async from 'async'; import biolink from '../biolink'; diff --git a/src/log_entry.ts b/src/log_entry.ts deleted file mode 100644 index dc3c4e7a..00000000 --- a/src/log_entry.ts +++ /dev/null @@ -1,58 +0,0 @@ -import _ from 'lodash'; -import * as Sentry from '@sentry/node'; - -export interface TrapiLog { - timestamp: string; - level: string; - message: string; - code: string; -} - -export interface StampedLog extends TrapiLog { - data: any; - toJSON(): TrapiLog; -} - -export enum SentryLogSeverity { - ERROR = 'error', - WARNING = 'warning', - INFO = 'info', - DEBUG = 'debug', -} - -export default class LogEntry { - level: string; - message: string; - code: string; - data: any; - constructor(level = 'DEBUG', code = null, message = null, data = null) { - this.level = level; - this.message = message; - this.code = code; - this.data = data; - } - - getLog(): StampedLog { - const log = { - timestamp: new Date().toISOString(), - level: this.level, - message: this.message, - code: this.code, - }; - if (global.job) { - global.job.log(JSON.stringify(log, undefined, 2)); - } - Sentry.addBreadcrumb({ - category: 'log', - message: this.message, - level: SentryLogSeverity[this.level.toLowerCase()], - }); - return { - ...log, - data: this.data, - toJSON() { - return _.omit(this, ['data', 'toJSON']) as StampedLog; - }, - }; - } -} diff --git a/src/qedge2apiedge.ts b/src/qedge2apiedge.ts index bed42750..29e592da 100644 --- a/src/qedge2apiedge.ts +++ b/src/qedge2apiedge.ts @@ -1,5 +1,5 @@ import _ from 'lodash'; -import LogEntry, { StampedLog } from './log_entry'; +import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import * as config from './config'; const CURIE_WITH_PREFIXES = ['MONDO', 'DOID', 'UBERON', 'EFO', 'HP', 'CHEBI', 'CL', 'MGI', 'NCIT']; import Debug from 'debug'; diff --git a/src/query_edge.ts b/src/query_edge.ts index a60c9286..476fa527 100644 --- a/src/query_edge.ts +++ b/src/query_edge.ts @@ -5,7 +5,7 @@ import biolink from './biolink'; import { Record, RecordNode, FrozenRecord } from '@biothings-explorer/api-response-transform'; import QNode from './query_node'; import { QNodeInfo } from './query_node'; -import { StampedLog } from './log_entry'; +import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import { TrapiAttributeConstraint, TrapiQualifierConstraint } from './types'; const debug = Debug('bte:biothings-explorer-trapi:QEdge'); diff --git a/src/query_graph.ts b/src/query_graph.ts index 8721a8d1..2b78ab9d 100644 --- a/src/query_graph.ts +++ b/src/query_graph.ts @@ -1,6 +1,6 @@ import QEdge from './query_edge'; import InvalidQueryGraphError from './exceptions/invalid_query_graph_error'; -import LogEntry, { StampedLog } from './log_entry'; +import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import Debug from 'debug'; import QNode from './query_node'; import biolink from './biolink'; diff --git a/src/results_assembly/pfocr.ts b/src/results_assembly/pfocr.ts index f84aab0b..1afd52b4 100644 --- a/src/results_assembly/pfocr.ts +++ b/src/results_assembly/pfocr.ts @@ -3,7 +3,7 @@ import Debug from 'debug'; const debug = Debug('bte:biothings-explorer-trapi:pfocr'); import { intersection } from '../utils'; import _ from 'lodash'; -import LogEntry, { StampedLog } from '../log_entry'; +import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import { TrapiResult } from '../types'; // the minimum acceptable intersection size between the CURIEs diff --git a/src/results_assembly/query_results.ts b/src/results_assembly/query_results.ts index 767244bb..026556fc 100644 --- a/src/results_assembly/query_results.ts +++ b/src/results_assembly/query_results.ts @@ -1,4 +1,4 @@ -import LogEntry, { StampedLog } from '../log_entry'; +import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import { TrapiResult } from '../types'; import Debug from 'debug'; import { zip } from 'lodash'; diff --git a/src/types.ts b/src/types.ts index 2aff4216..367271c9 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,6 +1,6 @@ // TODO change how types are exported on smartapi-kg so this is a normal package import import { SmartAPISpec } from '@biothings-explorer/smartapi-kg'; -import { TrapiLog } from './log_entry'; +import { TrapiLog } from '@biothings-explorer/utils'; declare global { var missingAPIs: SmartAPISpec[]; @@ -16,7 +16,9 @@ declare global { jobID?: string; callback_url?: string; }; - var job: any; // TODO type as Piscina job + var job: { + log: (logString: string) => void; + }; // TODO type as Piscina job } export interface TrapiQNode { diff --git a/tsconfig.json b/tsconfig.json index 4d739ea4..9a9535fe 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -8,7 +8,8 @@ "@biothings-explorer/call-apis": ["../call-apis"], "biolink-model": ["../biolink-model"], "biomedical_id_resolver": ["../biomedical_id_resolver"], - "@biothings-explorer/smartapi-kg": ["../smartapi-kg"] + "@biothings-explorer/smartapi-kg": ["../smartapi-kg"], + "@biothings-explorer/utils": ["../utils"] } }, "include": ["./src/**/*", "./src/biolink.json", "./src/smartapi_specs.json", "./src/predicates.json"], @@ -25,6 +26,9 @@ }, { "path": "../biolink-model" + }, + { + "path": "../utils" } ] } From 4c5efadf679fa9998d23d38667efb5b0cf88b20e Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 9 Nov 2023 16:13:49 -0500 Subject: [PATCH 02/12] test: fix tests --- __test__/unittest/LogEntry.test.ts | 27 ------------------------- __test__/unittest/inferred_mode.test.ts | 2 +- 2 files changed, 1 insertion(+), 28 deletions(-) delete mode 100644 __test__/unittest/LogEntry.test.ts diff --git a/__test__/unittest/LogEntry.test.ts b/__test__/unittest/LogEntry.test.ts deleted file mode 100644 index 3afa34da..00000000 --- a/__test__/unittest/LogEntry.test.ts +++ /dev/null @@ -1,27 +0,0 @@ -import logEntry from '../../src/log_entry'; - -describe('test log entry module', () => { - test('if no optional param passed, should return default value', () => { - const log = new logEntry().getLog(); - expect(log.code).toEqual(null); - expect(log.level).toEqual('DEBUG'); - expect(log.message).toEqual(null); - expect(log).toHaveProperty('timestamp'); - }); - - test('if code is correctly set', () => { - const log = new logEntry('DEBUG', 404).getLog(); - expect(log.code).toEqual(404); - expect(log.level).toEqual('DEBUG'); - expect(log.message).toEqual(null); - expect(log).toHaveProperty('timestamp'); - }); - - test('if message is correctly set', () => { - const log = new logEntry('DEBUG', null, 'yes').getLog(); - expect(log.code).toEqual(null); - expect(log.level).toEqual('DEBUG'); - expect(log.message).toEqual('yes'); - expect(log).toHaveProperty('timestamp'); - }); -}); diff --git a/__test__/unittest/inferred_mode.test.ts b/__test__/unittest/inferred_mode.test.ts index ee2a2c86..3aedd136 100644 --- a/__test__/unittest/inferred_mode.test.ts +++ b/__test__/unittest/inferred_mode.test.ts @@ -4,7 +4,7 @@ import fs from 'fs'; const smartAPIPAth = path.resolve(__dirname, '../../../bte-trapi/data/smartapi_specs.json'); const predicatesPath = path.resolve(__dirname, '../../../bte-trapi/data/predicates.json'); import _ from 'lodash'; -import { StampedLog, TrapiLog } from '../../src/log_entry'; +import { StampedLog, TrapiLog } from '@biothings-explorer/utils'; import InferredQueryHandler, { CombinedResponse } from '../../src/inferred_mode/inferred_mode'; import { MatchedTemplate } from '../../src/inferred_mode/template_lookup'; From 610313fae9a98f6e109ab3ab40d7985b802f753f Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Mon, 13 Nov 2023 12:15:22 -0500 Subject: [PATCH 03/12] chore: add telemetry spans --- src/edge_manager.ts | 4 ++++ src/inferred_mode/inferred_mode.ts | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/edge_manager.ts b/src/edge_manager.ts index 2b336565..35b88ea9 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -397,6 +397,7 @@ export default class QueryEdgeManager { async executeEdges(): Promise { const unavailableAPIs: UnavailableAPITracker = {}; while (this.getEdgesNotExecuted()) { + const span = Telemetry.startSpan({ description: 'edgeExecution' }); //next available/most efficient edge const currentQEdge = this.getNext(); //crate queries from edge @@ -446,6 +447,7 @@ export default class QueryEdgeManager { `qEdge (${currentQEdge.getID()}) got 0 records. Your query terminates.`, ).getLog(), ); + span.finish(); return; } // storing records will trigger a node entity count update @@ -473,6 +475,7 @@ export default class QueryEdgeManager { `qEdge (${currentQEdge.getID()}) kept 0 records. Your query terminates.`, ).getLog(), ); + span.finish(); return; } // edge all done @@ -486,6 +489,7 @@ export default class QueryEdgeManager { if (process.env.DUMP_RECORDS) { await this.dumpRecords(this.getRecords()); } + span.finish(); return true; } } diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index bcf55e22..8f074b6d 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -1,5 +1,5 @@ import Debug from 'debug'; -import { LogEntry, StampedLog } from '@biothings-explorer/utils'; +import { LogEntry, StampedLog, Telemetry } from '@biothings-explorer/utils'; import * as utils from '../utils'; import async from 'async'; import biolink from '../biolink'; @@ -254,6 +254,7 @@ export default class InferredQueryHandler { qEdge: TrapiQEdge, combinedResponse: CombinedResponse, ): CombinedResponseReport { + const span = Telemetry.startSpan({ description: 'creativeCombineResponse' }); const newResponse = handler.getResponse(); const report: CombinedResponseReport = { querySuccess: 0, @@ -422,6 +423,7 @@ export default class InferredQueryHandler { if (Object.keys(combinedResponse.message.results).length >= this.CREATIVE_LIMIT && !report.creativeLimitHit) { report.creativeLimitHit = Object.keys(newResponse.message.results).length; } + span.finish(); return report; } @@ -514,8 +516,11 @@ export default class InferredQueryHandler { } = {}; await async.eachOfSeries(subQueries, async ({ template, queryGraph }, i) => { + const span = Telemetry.startSpan({ description: 'creativeTemplate' }); + span.setData('template', i + 1); i = i as number; if (stop) { + span.finish(); return; } if (global.queryInformation?.queryGraph) { @@ -558,6 +563,7 @@ export default class InferredQueryHandler { debug(message); combinedResponse.logs.push(new LogEntry(`INFO`, null, message).getLog()); } + span.finish(); } catch (error) { handler.logs.forEach((log) => { combinedResponse.logs.push(log); @@ -565,6 +571,7 @@ export default class InferredQueryHandler { const message = `ERROR: Template-${i + 1} failed due to error ${error}`; debug(message); combinedResponse.logs.push(new LogEntry(`ERROR`, null, message).getLog()); + span.finish(); return; } }); From 318c1c466c4acc2114f624db9d831bac94bb46e6 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Mon, 13 Nov 2023 12:15:56 -0500 Subject: [PATCH 04/12] fix: function not async --- src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index.ts b/src/index.ts index 16a0d87a..84e4a47d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -361,7 +361,7 @@ export default class TRAPIQueryHandler { * Set TRAPI Query Graph * @param { object } queryGraph - TRAPI Query Graph Object */ - async setQueryGraph(queryGraph: TrapiQueryGraph): Promise { + setQueryGraph(queryGraph: TrapiQueryGraph): void { this.originalQueryGraph = _.cloneDeep(queryGraph); this.queryGraph = queryGraph; for (const nodeId in queryGraph.nodes) { From c43726114cc0f6fcaf132936110028c9c9688bb5 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Mon, 13 Nov 2023 12:16:14 -0500 Subject: [PATCH 05/12] perf: mild potential optimizations --- src/edge_manager.ts | 29 +++++++---------------------- src/query_node.ts | 2 +- src/results_assembly/score.ts | 2 +- 3 files changed, 9 insertions(+), 24 deletions(-) diff --git a/src/edge_manager.ts b/src/edge_manager.ts index 35b88ea9..91da45fe 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -203,29 +203,14 @@ export default class QueryEdgeManager { const execObjectCuries = qEdge.reverse ? subjectCuries : objectCuries; records.forEach((record) => { - //check sub curies against $input ids - const subjectIDs: Set = new Set(); - const objectIDs: Set = new Set(); - let objectMatch = 0; - let subjectMatch = 0; + // check against original, primaryID, and equivalent ids + const subjectIDs = [record.subject.original, record.subject.curie, ...record.subject.equivalentCuries]; + const objectIDs = [record.object.original, record.object.curie, ...record.object.equivalentCuries]; + + // there must be at least a minimal intersection + const subjectMatch = subjectIDs.some((curie) => execSubjectCuries.includes(curie)); + const objectMatch = objectIDs.some((curie) => execObjectCuries.includes(curie)); - //compare record I/O ids against edge node ids - // #1 check equivalent ids - record.subject.equivalentCuries.forEach((curie) => { - subjectIDs.add(curie); - }); - record.object.equivalentCuries.forEach((curie) => { - objectIDs.add(curie); - }); - // #2 ensure we have the primaryID - subjectIDs.add(record.subject.curie); - objectIDs.add(record.object.curie); - // #3 make sure we at least have the original - subjectIDs.add(record.subject.original); - objectIDs.add(record.object.original); - // check ids - subjectMatch = _.intersection([...subjectIDs], execSubjectCuries).length; - objectMatch = _.intersection([...objectIDs], execObjectCuries).length; //if both ends match then keep record if (subjectMatch && objectMatch) { keep.push(record); diff --git a/src/query_node.ts b/src/query_node.ts index dfacf099..de539a15 100644 --- a/src/query_node.ts +++ b/src/query_node.ts @@ -180,7 +180,7 @@ export default class QNode { const keep: { [mainID: string]: string[] } = {}; // If a new entity has any alias intersection with an existing entity, keep it Object.entries(newCuries).forEach(([newMainID, currentAliases]) => { - const someIntersection = Object.entries(this.expanded_curie).some(([, existingAliases]) => { + const someIntersection = Object.values(this.expanded_curie).some((existingAliases) => { return currentAliases.some((currentAlias) => existingAliases.some((existingAlias) => currentAlias.toLowerCase() === existingAlias.toLowerCase()), ); diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index e28ff983..6cc9e2b4 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -24,7 +24,7 @@ export interface ScoreCombos { // create lookup table for ngd scores in the format: {inputUMLS-outputUMLS: ngd} async function query(queryPairs: string[][]): Promise { const url = 'https://biothings.ncats.io/semmeddb/query/ngd'; - const batchSize = 1000; + const batchSize = 500; debug('Querying', queryPairs.length, 'combos.'); From 30a0adc11db3f3f031f1337513f5255153375be3 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Mon, 13 Nov 2023 13:23:00 -0500 Subject: [PATCH 06/12] fix: span end location --- src/edge_manager.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/edge_manager.ts b/src/edge_manager.ts index 91da45fe..3e57e1b6 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -466,6 +466,7 @@ export default class QueryEdgeManager { // edge all done currentQEdge.executed = true; debug(`(10) Edge successfully queried.`); + span.finish(); } this._logSkippedQueries(unavailableAPIs); // collect and organize records @@ -474,7 +475,6 @@ export default class QueryEdgeManager { if (process.env.DUMP_RECORDS) { await this.dumpRecords(this.getRecords()); } - span.finish(); return true; } } From 152a3c35b53007e98e8f4f560af92960c29a553f Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Mon, 13 Nov 2023 14:40:57 -0500 Subject: [PATCH 07/12] perf: decrease batch size for semmed, text mining targetted --- src/config.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config.ts b/src/config.ts index f4efc733..d700a3b9 100644 --- a/src/config.ts +++ b/src/config.ts @@ -3,12 +3,12 @@ const API_BATCH_SIZE = [ { id: '1d288b3a3caf75d541ffaae3aab386c8', name: 'BioThings SEMMEDDB API', - max: 100, + max: 50, }, { id: '978fe380a147a8641caf72320862697b', name: 'Text Mining Targeted Association API', - max: 100, + max: 50, }, { id: '02af7d098ab304e80d6f4806c3527027', From f998f5a82291798e0a37e1714a447e3c6da150ce Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Mon, 13 Nov 2023 15:00:22 -0500 Subject: [PATCH 08/12] perf: further lower ngd batch size --- src/results_assembly/score.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index 6cc9e2b4..42e0f6f4 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -24,7 +24,7 @@ export interface ScoreCombos { // create lookup table for ngd scores in the format: {inputUMLS-outputUMLS: ngd} async function query(queryPairs: string[][]): Promise { const url = 'https://biothings.ncats.io/semmeddb/query/ngd'; - const batchSize = 500; + const batchSize = 250; debug('Querying', queryPairs.length, 'combos.'); From a783679e14e016c83d5fd1e63f4aa75f6b8e2ba5 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 16 Nov 2023 11:24:26 -0500 Subject: [PATCH 09/12] chore: add request body breadcrumb --- package.json | 1 + src/inferred_mode/inferred_mode.ts | 2 +- src/results_assembly/score.ts | 17 ++++++++++++++--- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/package.json b/package.json index 4de1384d..bf03aa25 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ }, "homepage": "https://github.com/biothings/bte_trapi_query_graph_handler#readme", "devDependencies": { + "@types/async": "^3.2.22", "@types/debug": "^4.1.10", "@types/jest": "^29.5.6", "@types/lodash": "^4.14.200", diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index 8f074b6d..fde4f8a0 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -517,7 +517,7 @@ export default class InferredQueryHandler { await async.eachOfSeries(subQueries, async ({ template, queryGraph }, i) => { const span = Telemetry.startSpan({ description: 'creativeTemplate' }); - span.setData('template', i + 1); + span.setData('template', (i as number) + 1); i = i as number; if (stop) { span.finish(); diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index 42e0f6f4..6fe7109f 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -1,9 +1,12 @@ import Debug from 'debug'; import axios from 'axios'; const debug = Debug('bte:biothings-explorer-trapi:score'); +import os from 'os'; +import async from 'async'; import _ from 'lodash'; import { ConsolidatedSolutionRecord, RecordsByQEdgeID } from './query_results'; +import { Telemetry } from '@biothings-explorer/utils'; const tuning_param = 2.0; @@ -25,19 +28,27 @@ export interface ScoreCombos { async function query(queryPairs: string[][]): Promise { const url = 'https://biothings.ncats.io/semmeddb/query/ngd'; const batchSize = 250; + const concurrency_limit = os.cpus().length * 2; debug('Querying', queryPairs.length, 'combos.'); const chunked_input = _.chunk(queryPairs, batchSize); try { - const axios_queries = chunked_input.map((input) => { - return axios.post(url, { + const response = await async.mapLimit(chunked_input, concurrency_limit, async (input) => { + const data = { umls: input, expand: 'both', + }; + Telemetry.addBreadcrumb({ + category: 'requestBody', + data: { data }, }); + const start = performance.now(); + const response = await axios.post(url, data); + const end = performance.now(); + return response; }); //convert res array into single object with all curies - const response = await Promise.all(axios_queries); const result = response .map((r): ngdScoreCombo[] => r.data.filter((combo: ngdScoreCombo) => Number.isFinite(combo.ngd))) .flat(); // get numerical scores and flatten array From e9b8a38ab279fd287f9a49652aa30c4774e3c246 Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Thu, 16 Nov 2023 12:06:02 -0500 Subject: [PATCH 10/12] chore: better span --- src/results_assembly/score.ts | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/results_assembly/score.ts b/src/results_assembly/score.ts index 6fe7109f..1e552ac9 100644 --- a/src/results_assembly/score.ts +++ b/src/results_assembly/score.ts @@ -35,18 +35,22 @@ async function query(queryPairs: string[][]): Promise { const chunked_input = _.chunk(queryPairs, batchSize); try { const response = await async.mapLimit(chunked_input, concurrency_limit, async (input) => { + const span = Telemetry.startSpan({ description: 'NGDScoreRequest' }); const data = { umls: input, expand: 'both', }; - Telemetry.addBreadcrumb({ - category: 'requestBody', - data: { data }, - }); - const start = performance.now(); - const response = await axios.post(url, data); - const end = performance.now(); - return response; + span.setData('requestBody', data); + try { + // const start = performance.now(); + const response = await axios.post(url, data); + // const end = performance.now(); + span.finish(); + return response; + } catch (err) { + debug(`NGD score query failed: ${err}`); + span.finish(); + } }); //convert res array into single object with all curies const result = response From dcb6fc427fc8e36919928c44982d86d17b527e1f Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:24:43 -0500 Subject: [PATCH 11/12] fix: intersection test --- src/edge_manager.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/edge_manager.ts b/src/edge_manager.ts index 3e57e1b6..5655154a 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -208,8 +208,9 @@ export default class QueryEdgeManager { const objectIDs = [record.object.original, record.object.curie, ...record.object.equivalentCuries]; // there must be at least a minimal intersection - const subjectMatch = subjectIDs.some((curie) => execSubjectCuries.includes(curie)); - const objectMatch = objectIDs.some((curie) => execObjectCuries.includes(curie)); + const subjectMatch = + subjectIDs.some((curie) => execSubjectCuries.includes(curie)) || execSubjectCuries.length === 0; + const objectMatch = objectIDs.some((curie) => execObjectCuries.includes(curie)) || execObjectCuries.length === 0; //if both ends match then keep record if (subjectMatch && objectMatch) { From 0582a82f64e0631609838ada36077e77817963bd Mon Sep 17 00:00:00 2001 From: tokebe <43009413+tokebe@users.noreply.github.com> Date: Tue, 5 Dec 2023 16:10:49 -0500 Subject: [PATCH 12/12] fix: type change from merge --- src/edge_manager.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/edge_manager.ts b/src/edge_manager.ts index 6f7447a4..145e415c 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -215,7 +215,7 @@ export default class QueryEdgeManager { //if both ends match then keep record // Don't keep self-edges - const selfEdge = [...subjectIDs].some((curie) => objectIDs.has(curie)); + const selfEdge = [...subjectIDs].some((curie) => objectIDs.includes(curie)); if (subjectMatch && objectMatch && !selfEdge) { keep.push(record); }