From b86e66a9898586a74757dec3e752aed560647834 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Mon, 24 Jun 2024 15:11:28 -0700 Subject: [PATCH 01/18] allow queries to be aborted --- src/batch_edge_query.ts | 16 +++++++++------- src/cache_handler.ts | 5 ++++- src/edge_manager.ts | 6 ++++-- src/index.ts | 6 ++++-- src/update_nodes.ts | 8 ++++---- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/batch_edge_query.ts b/src/batch_edge_query.ts index d5504b21..0d843985 100644 --- a/src/batch_edge_query.ts +++ b/src/batch_edge_query.ts @@ -61,9 +61,9 @@ export default class BatchEdgeQueryHandler { /** * @private */ - async _queryAPIEdges(APIEdges: APIEdge[], unavailableAPIs: UnavailableAPITracker = {}): Promise { + async _queryAPIEdges(APIEdges: APIEdge[], unavailableAPIs: UnavailableAPITracker = {}, abortSignal?: AbortSignal): Promise { const executor = new call_api(APIEdges, this.options, redisClient); - const records: Record[] = await executor.query(this.resolveOutputIDs, unavailableAPIs); + const records: Record[] = await executor.query(this.resolveOutputIDs, unavailableAPIs, abortSignal); this.logs = [...this.logs, ...executor.logs]; return records; } @@ -123,18 +123,20 @@ export default class BatchEdgeQueryHandler { }); } - async query(qEdges: QEdge | QEdge[], unavailableAPIs: UnavailableAPITracker = {}): Promise { + async query(qEdges: QEdge | QEdge[], unavailableAPIs: UnavailableAPITracker = {}, abortSignal?: AbortSignal): Promise { debug('Node Update Start'); // it's now a single edge but convert to arr to simplify refactoring qEdges = Array.isArray(qEdges) ? qEdges : [qEdges]; const nodeUpdate = new NodesUpdateHandler(qEdges); // difference is there is no previous edge info anymore - await nodeUpdate.setEquivalentIDs(qEdges); + await nodeUpdate.setEquivalentIDs(qEdges, abortSignal); await this._rmEquivalentDuplicates(qEdges); debug('Node Update Success'); + if (abortSignal?.aborted) return []; + const cacheHandler = new CacheHandler(this.caching, this.metaKG, this.options); - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); + const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges, abortSignal); this.logs = [...this.logs, ...cacheHandler.logs]; let queryRecords: Record[]; @@ -154,8 +156,8 @@ export default class BatchEdgeQueryHandler { } const expanded_APIEdges = this._expandAPIEdges(APIEdges); debug('Start to query APIEdges....'); - queryRecords = await this._queryAPIEdges(expanded_APIEdges, unavailableAPIs); - if (queryRecords === undefined) return; + queryRecords = await this._queryAPIEdges(expanded_APIEdges, unavailableAPIs, abortSignal); + if (queryRecords === undefined || abortSignal?.aborted) return; debug('APIEdges are successfully queried....'); queryRecords = await this._postQueryFilter(queryRecords); debug(`Total number of records is (${queryRecords.length})`); diff --git a/src/cache_handler.ts b/src/cache_handler.ts index a52c6d1f..f62f3a84 100644 --- a/src/cache_handler.ts +++ b/src/cache_handler.ts @@ -112,7 +112,7 @@ export default class CacheHandler { ); } - async categorizeEdges(qEdges: QEdge[]): Promise<{ cachedRecords: Record[]; nonCachedQEdges: QEdge[] }> { + async categorizeEdges(qEdges: QEdge[], abortSignal?: AbortSignal): Promise<{ cachedRecords: Record[]; nonCachedQEdges: QEdge[] }> { if (this.cacheEnabled === false || process.env.INTERNAL_DISABLE_REDIS === 'true') { return { cachedRecords: [], @@ -123,6 +123,7 @@ export default class CacheHandler { let cachedRecords: Record[] = []; debug('Begin edge cache lookup...'); await async.eachSeries(qEdges, async (qEdge) => { + if (abortSignal?.aborted) return; const qEdgeMetaKGHash = this._hashEdgeByMetaKG(qEdge.getHashedEdgeRepresentation()); const unpackedRecords: Record[] = await new Promise((resolve) => { const redisID = 'bte:edgeCache:' + qEdgeMetaKGHash; @@ -130,6 +131,8 @@ export default class CacheHandler { try { const compressedRecordPack = await redisClient.client.hgetallTimeout(redisID); + if (abortSignal?.aborted) resolve([]); + if (compressedRecordPack && Object.keys(compressedRecordPack).length) { const recordPack = []; diff --git a/src/edge_manager.ts b/src/edge_manager.ts index 543a6ea5..a0db4dc2 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -384,9 +384,11 @@ export default class QueryEdgeManager { debug(logMessage); } - async executeEdges(): Promise { + async executeEdges(abortSignal?: AbortSignal): Promise { const unavailableAPIs: UnavailableAPITracker = {}; while (this.getEdgesNotExecuted()) { + if (abortSignal?.aborted) return false; + const span = Telemetry.startSpan({ description: 'edgeExecution' }); //next available/most efficient edge const currentQEdge = this.getNext(); @@ -402,7 +404,7 @@ export default class QueryEdgeManager { ); debug(`(5) Executing current edge >> "${currentQEdge.getID()}"`); //execute current edge query - const queryRecords = await queryBatchHandler.query(queryBatchHandler.qEdges, unavailableAPIs); + const queryRecords = await queryBatchHandler.query(queryBatchHandler.qEdges, unavailableAPIs, abortSignal); this.logs = [...this.logs, ...queryBatchHandler.logs]; if (queryRecords === undefined) return; // create an edge execution summary diff --git a/src/index.ts b/src/index.ts index 59401b36..ff860d0e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -620,7 +620,7 @@ export default class TRAPIQueryHandler { ]; }; - async query(): Promise { + async query(abortSignal?: AbortSignal): Promise { this._initializeResponse(); await this.addQueryNodes(); @@ -681,12 +681,14 @@ export default class TRAPIQueryHandler { } const manager = new EdgeManager(queryEdges, metaKG, this.options); - const executionSuccess = await manager.executeEdges(); + const executionSuccess = await manager.executeEdges(abortSignal); this.logs = [...this.logs, ...manager.logs]; if (!executionSuccess) { return; } + if (abortSignal?.aborted) return; + const span3 = Telemetry.startSpan({ description: 'resultsAssembly' }); // update query graph diff --git a/src/update_nodes.ts b/src/update_nodes.ts index c9c30f93..2b41d549 100644 --- a/src/update_nodes.ts +++ b/src/update_nodes.ts @@ -39,17 +39,17 @@ export default class NodesUpdateHandler { * Resolve input ids * @param {object} curies - each key represents the category, e.g. gene, value is an array of curies. */ - async _getEquivalentIDs(curies: ResolverInput): Promise { + async _getEquivalentIDs(curies: ResolverInput, abortSignal?: AbortSignal): Promise { // const resolver = new id_resolver.Resolver('biolink'); // const equivalentIDs = await resolver.resolve(curies); - return await resolveSRI(curies); + return await resolveSRI(curies, abortSignal); } - async setEquivalentIDs(qEdges: QEdge[]): Promise { + async setEquivalentIDs(qEdges: QEdge[], abortSignal?: AbortSignal): Promise { debug(`Getting equivalent IDs...`); const curies = this._getCuries(this.qEdges); debug(`curies: ${JSON.stringify(curies)}`); - const equivalentIDs = await this._getEquivalentIDs(curies); + const equivalentIDs = await this._getEquivalentIDs(curies, abortSignal); qEdges.map((qEdge) => { const edgeEquivalentIDs = Object.keys(equivalentIDs) .filter((key) => qEdge.getInputCurie().includes(key)) From 3ce0b79e43c06833031d0f30cba04295c7a96872 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Tue, 25 Jun 2024 14:51:15 -0700 Subject: [PATCH 02/18] run creative mode templates in parallel --- src/inferred_mode/inferred_mode.ts | 116 ++++++++++++++--------------- 1 file changed, 54 insertions(+), 62 deletions(-) diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index bc6f3dd7..bc297b73 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -1,5 +1,5 @@ import Debug from 'debug'; -import { LogEntry, StampedLog, Telemetry } from '@biothings-explorer/utils'; +import { LogEntry, StampedLog, Telemetry, timeoutPromise } from '@biothings-explorer/utils'; import * as utils from '../utils'; import async from 'async'; import biolink from '../biolink'; @@ -38,7 +38,6 @@ export interface CombinedResponseReport { querySuccess: number; queryHadResults: boolean; mergedResults: { [resultID: string]: number }; - creativeLimitHit: boolean | number; } // MatchedTemplate, but with IDs, etc. filled in @@ -263,7 +262,6 @@ export default class InferredQueryHandler { querySuccess: 0, queryHadResults: false, mergedResults: {}, - creativeLimitHit: false, }; let mergedThisTemplate = 0; const resultIDsFromPrevious = new Set(Object.keys(combinedResponse.message.results)); @@ -428,9 +426,6 @@ export default class InferredQueryHandler { } report.querySuccess = 1; - if (Object.keys(combinedResponse.message.results).length >= this.CREATIVE_LIMIT && !report.creativeLimitHit) { - report.creativeLimitHit = Object.keys(newResponse.message.results).length; - } span.finish(); return report; } @@ -523,66 +518,49 @@ export default class InferredQueryHandler { [resultID: string]: number; } = {}; - await async.eachOfSeries(subQueries, async ({ template, queryGraph }, i) => { - const span = Telemetry.startSpan({ description: 'creativeTemplate' }); - span.setData('template', (i as number) + 1); - i = i as number; - if (stop) { - span.finish(); - return; - } - if (global.queryInformation?.queryGraph) { - global.queryInformation.isCreativeMode = true; - global.queryInformation.creativeTemplate = template; - } - const handler = new TRAPIQueryHandler(this.options, this.path, this.predicatePath, this.includeReasoner); - try { - // make query and combine results/kg/logs/etc + const QUERY_TIMEOUT = 4.8 * 60 * 1000; // 4.5 minutes + + const completedHandlers = await Promise.all( + subQueries.map(async ({ template, queryGraph }, i) => { + const span = Telemetry.startSpan({ description: 'creativeTemplate' }); + span.setData('template', i + 1); + const handler = new TRAPIQueryHandler(this.options, this.path, this.predicatePath, this.includeReasoner); handler.setQueryGraph(queryGraph); - await handler.query(); - const { querySuccess, queryHadResults, mergedResults, creativeLimitHit } = this.combineResponse( - i, - handler, - qEdgeID, - qEdge, - combinedResponse, - ); - // update values used in logging - successfulQueries += querySuccess; - if (queryHadResults) resultQueries.push(i); - Object.entries(mergedResults).forEach(([result, countMerged]) => { - mergedResultsCount[result] = - result in mergedResultsCount ? mergedResultsCount[result] + countMerged : countMerged; - }); - // log to user if we should stop - if (creativeLimitHit) { - stop = true; - const message = [ - `Addition of ${creativeLimitHit} results from Template ${i + 1}`, - Object.keys(combinedResponse.message.results).length === this.CREATIVE_LIMIT ? ' meets ' : ' exceeds ', - `creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${ - Object.keys(combinedResponse.message.results).length - } merged). `, - `Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results. Skipping remaining ${ - subQueries.length - (i + 1) - } `, - subQueries.length - (i + 1) === 1 ? `template.` : `templates.`, - ].join(''); + try { + await timeoutPromise(handler.query(AbortSignal.timeout(QUERY_TIMEOUT)), QUERY_TIMEOUT); + } catch (error) { + handler.logs.forEach((log) => { + combinedResponse.logs.push(log); + }); + const message = `ERROR: Template-${i + 1} failed due to error ${error}`; debug(message); - combinedResponse.logs.push(new LogEntry(`INFO`, null, message).getLog()); + combinedResponse.logs.push(new LogEntry(`ERROR`, null, message).getLog()); + span.finish(); + return undefined; } span.finish(); - } catch (error) { - handler.logs.forEach((log) => { - combinedResponse.logs.push(log); - }); - const message = `ERROR: Template-${i + 1} failed due to error ${error}`; - debug(message); - combinedResponse.logs.push(new LogEntry(`ERROR`, null, message).getLog()); - span.finish(); - return; - } - }); + return { i, handler }; + }) + ); + + for (const handlerInfo of completedHandlers) { + if (handlerInfo === undefined) continue; + const { i, handler } = handlerInfo; + const { querySuccess, queryHadResults, mergedResults } = this.combineResponse( + i, + handler, + qEdgeID, + qEdge, + combinedResponse, + ); + successfulQueries += querySuccess; + if (queryHadResults) resultQueries.push(i); + Object.entries(mergedResults).forEach(([result, countMerged]) => { + mergedResultsCount[result] = + result in mergedResultsCount ? mergedResultsCount[result] + countMerged : countMerged; + }); + } + // log about merged Results if (Object.keys(mergedResultsCount).length) { // Add 1 for first instance of result (not counted during merging) @@ -613,6 +591,20 @@ export default class InferredQueryHandler { response.message.results = Object.values(combinedResponse.message.results).sort((a, b) => { return b.analyses[0].score - a.analyses[0].score ? b.analyses[0].score - a.analyses[0].score : 0; }); + + // log about trimming results + if (response.message.results.length > this.CREATIVE_LIMIT) { + const message = [ + `Number of results exceeds`, + `creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${ + Object.keys(response.message.results).length + } merged). `, + `Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results.` + ].join(''); + debug(message); + combinedResponse.logs.push(new LogEntry(`INFO`, null, message).getLog()); + } + // trim extra results and prune kg response.message.results = response.message.results.slice(0, this.CREATIVE_LIMIT); response.description = `Query processed successfully, retrieved ${response.message.results.length} results.`; From 1203b2c3c429c5b90e20b4143d479c2d8f59ffab Mon Sep 17 00:00:00 2001 From: rjawesome Date: Wed, 26 Jun 2024 10:14:55 -0700 Subject: [PATCH 03/18] creative timeout environment variable --- src/inferred_mode/inferred_mode.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index bc297b73..d5ec7a8d 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -52,6 +52,7 @@ export default class InferredQueryHandler { predicatePath: string; includeReasoner: boolean; CREATIVE_LIMIT: number; + CREATIVE_TIMEOUT: number; constructor( parent: TRAPIQueryHandler, queryGraph: TrapiQueryGraph, @@ -69,6 +70,7 @@ export default class InferredQueryHandler { this.predicatePath = predicatePath; this.includeReasoner = includeReasoner; this.CREATIVE_LIMIT = process.env.CREATIVE_LIMIT ? parseInt(process.env.CREATIVE_LIMIT) : 500; + this.CREATIVE_TIMEOUT = process.env.CREATIVE_TIMEOUT_S ? parseInt(process.env.CREATIVE_TIMEOUT) * 1000 : 4.8 * 60 * 1000; } get queryIsValid(): boolean { @@ -518,8 +520,6 @@ export default class InferredQueryHandler { [resultID: string]: number; } = {}; - const QUERY_TIMEOUT = 4.8 * 60 * 1000; // 4.5 minutes - const completedHandlers = await Promise.all( subQueries.map(async ({ template, queryGraph }, i) => { const span = Telemetry.startSpan({ description: 'creativeTemplate' }); @@ -527,7 +527,7 @@ export default class InferredQueryHandler { const handler = new TRAPIQueryHandler(this.options, this.path, this.predicatePath, this.includeReasoner); handler.setQueryGraph(queryGraph); try { - await timeoutPromise(handler.query(AbortSignal.timeout(QUERY_TIMEOUT)), QUERY_TIMEOUT); + await timeoutPromise(handler.query(AbortSignal.timeout(this.CREATIVE_TIMEOUT)), this.CREATIVE_TIMEOUT); } catch (error) { handler.logs.forEach((log) => { combinedResponse.logs.push(log); From 0a9c6a112f43462b50dca871d4a37c85d4f76468 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Wed, 26 Jun 2024 11:17:25 -0700 Subject: [PATCH 04/18] debnug cpu usage --- src/inferred_mode/inferred_mode.ts | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index d5ec7a8d..81323096 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -70,7 +70,7 @@ export default class InferredQueryHandler { this.predicatePath = predicatePath; this.includeReasoner = includeReasoner; this.CREATIVE_LIMIT = process.env.CREATIVE_LIMIT ? parseInt(process.env.CREATIVE_LIMIT) : 500; - this.CREATIVE_TIMEOUT = process.env.CREATIVE_TIMEOUT_S ? parseInt(process.env.CREATIVE_TIMEOUT) * 1000 : 4.8 * 60 * 1000; + this.CREATIVE_TIMEOUT = process.env.CREATIVE_TIMEOUT_S ? parseInt(process.env.CREATIVE_TIMEOUT) * 1000 : 4.75 * 60 * 1000; } get queryIsValid(): boolean { @@ -520,6 +520,11 @@ export default class InferredQueryHandler { [resultID: string]: number; } = {}; + // perf debugging + const startUsage = process.cpuUsage(); + const startTime = new Date().getTime(); + const ncpu = require('os').cpus().length; + const completedHandlers = await Promise.all( subQueries.map(async ({ template, queryGraph }, i) => { const span = Telemetry.startSpan({ description: 'creativeTemplate' }); @@ -543,6 +548,12 @@ export default class InferredQueryHandler { }) ); + // perf debugging + const endTime = new Date().getTime(); + const timeDelta = (endTime - startTime) * 10 * ncpu; + const { user, system } = process.cpuUsage(startUsage); + debug(`Average CPU Usage: ${(system + user) / timeDelta}%`); + for (const handlerInfo of completedHandlers) { if (handlerInfo === undefined) continue; const { i, handler } = handlerInfo; From 76eaeace18ef00aedec8c2adc16d97f1804b77b1 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Wed, 26 Jun 2024 15:25:37 -0700 Subject: [PATCH 05/18] new threading messaging type --- src/batch_edge_query.ts | 4 ++-- src/cache_handler.ts | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/batch_edge_query.ts b/src/batch_edge_query.ts index d5504b21..2cdfe3d9 100644 --- a/src/batch_edge_query.ts +++ b/src/batch_edge_query.ts @@ -8,7 +8,7 @@ import CacheHandler from './cache_handler'; import { threadId } from 'worker_threads'; import MetaKG from '@biothings-explorer/smartapi-kg'; import { StampedLog } from '@biothings-explorer/utils'; -import { QueryHandlerOptions } from '@biothings-explorer/types'; +import { QueryHandlerOptions, ThreadMessage } from '@biothings-explorer/types'; import QEdge from './query_edge'; import { UnavailableAPITracker } from './types'; import { Record } from '@biothings-explorer/api-response-transform'; @@ -141,7 +141,7 @@ export default class BatchEdgeQueryHandler { if (nonCachedQEdges.length === 0) { queryRecords = []; if (global.parentPort) { - global.parentPort.postMessage({ threadId, cacheDone: true }); + global.parentPort.postMessage({ threadId, type: 'cacheDone', value: true } satisfies ThreadMessage); } } else { debug('Start to convert qEdges into APIEdges....'); diff --git a/src/cache_handler.ts b/src/cache_handler.ts index a52c6d1f..ea0fc10a 100644 --- a/src/cache_handler.ts +++ b/src/cache_handler.ts @@ -11,7 +11,7 @@ import { Record, RecordPackage } from '@biothings-explorer/api-response-transfor import { threadId } from 'worker_threads'; import MetaKG from '@biothings-explorer/smartapi-kg'; import QEdge from './query_edge'; -import { QueryHandlerOptions } from '@biothings-explorer/types'; +import { QueryHandlerOptions, ThreadMessage } from '@biothings-explorer/types'; export interface RecordPacksByQedgeMetaKGHash { [QEdgeHash: string]: RecordPackage; @@ -212,12 +212,12 @@ export default class CacheHandler { async cacheEdges(queryRecords: Record[]): Promise { if (this.cacheEnabled === false || process.env.INTERNAL_DISABLE_REDIS === 'true') { if (global.parentPort) { - global.parentPort.postMessage({ threadId, cacheDone: true }); + global.parentPort.postMessage({ threadId, type: 'cacheDone', value: true } satisfies ThreadMessage); } return; } if (global.parentPort) { - global.parentPort.postMessage({ threadId, cacheInProgress: 1 }); + global.parentPort.postMessage({ threadId, type: 'cacheInProgress', value: 1 } satisfies ThreadMessage); } debug('Start to cache query records.'); try { @@ -229,7 +229,7 @@ export default class CacheHandler { // lock to prevent caching to/reading from actively caching edge const redisID = 'bte:edgeCache:' + hash; if (global.parentPort) { - global.parentPort.postMessage({ threadId, addCacheKey: redisID }); + global.parentPort.postMessage({ threadId, type: 'addCacheKey', value: redisID } satisfies ThreadMessage); } await redisClient.client.usingLock([`redisLock:${redisID}`, 'redisLock:EdgeCaching'], 600000, async () => { try { @@ -267,7 +267,7 @@ export default class CacheHandler { ); } finally { if (global.parentPort) { - global.parentPort.postMessage({ threadId, completeCacheKey: redisID }); + global.parentPort.postMessage({ threadId, type: 'completeCacheKey', value: redisID } satisfies ThreadMessage); } } }); @@ -284,7 +284,7 @@ export default class CacheHandler { debug(`Caching failed due to ${error}. This does not terminate the query.`); } finally { if (global.parentPort) { - global.parentPort.postMessage({ threadId, cacheDone: 1 }); + global.parentPort.postMessage({ threadId, type: 'cacheDone', value: 1 } satisfies ThreadMessage); } } } From e6ab581a11e5ca89e54294f1fa1c1be243f5004c Mon Sep 17 00:00:00 2001 From: rjawesome Date: Mon, 1 Jul 2024 13:47:11 -0700 Subject: [PATCH 06/18] single subquery queue --- src/batch_edge_query.ts | 123 +++-- src/biolink.ts | 74 --- src/cache_handler.ts | 3 +- src/edge_manager.ts | 8 +- src/exceptions/bte_error.ts | 16 - src/exceptions/invalid_query_graph_error.ts | 16 - src/graph/graph.ts | 3 +- src/index.ts | 12 +- src/inferred_mode/inferred_mode.ts | 2 +- src/qedge2apiedge.ts | 38 +- src/query_edge.ts | 580 -------------------- src/query_graph.ts | 400 -------------- src/query_node.ts | 306 ----------- src/results_assembly/query_results.ts | 3 +- src/update_nodes.ts | 9 +- 15 files changed, 94 insertions(+), 1499 deletions(-) delete mode 100644 src/biolink.ts delete mode 100644 src/exceptions/bte_error.ts delete mode 100644 src/exceptions/invalid_query_graph_error.ts delete mode 100644 src/query_edge.ts delete mode 100644 src/query_graph.ts delete mode 100644 src/query_node.ts diff --git a/src/batch_edge_query.ts b/src/batch_edge_query.ts index 2cdfe3d9..a69f434f 100644 --- a/src/batch_edge_query.ts +++ b/src/batch_edge_query.ts @@ -1,6 +1,6 @@ -import call_api from '@biothings-explorer/call-apis'; -import { redisClient } from '@biothings-explorer/utils'; -import QEdge2APIEdgeHandler, { APIEdge } from './qedge2apiedge'; +import { LogEntry, SerializableLog, redisClient } from '@biothings-explorer/utils'; +import { APIEdge, Record, RecordPackage } from '@biothings-explorer/types'; +import QEdge2APIEdgeHandler from './qedge2apiedge'; import NodesUpdateHandler from './update_nodes'; import Debug from 'debug'; const debug = Debug('bte:biothings-explorer-trapi:batch_edge_query'); @@ -8,10 +8,9 @@ import CacheHandler from './cache_handler'; import { threadId } from 'worker_threads'; import MetaKG from '@biothings-explorer/smartapi-kg'; import { StampedLog } from '@biothings-explorer/utils'; -import { QueryHandlerOptions, ThreadMessage } from '@biothings-explorer/types'; -import QEdge from './query_edge'; +import { QueryHandlerOptions, ThreadMessage, QEdge } from '@biothings-explorer/types'; import { UnavailableAPITracker } from './types'; -import { Record } from '@biothings-explorer/api-response-transform'; +import { constructQueries } from '@biothings-explorer/call-apis'; export interface BatchEdgeQueryOptions extends QueryHandlerOptions { recordHashEdgeAttributes: string[]; @@ -23,9 +22,8 @@ export default class BatchEdgeQueryHandler { logs: StampedLog[]; caching: boolean; options: QueryHandlerOptions; - resolveOutputIDs: boolean; qEdges: QEdge | QEdge[]; - constructor(metaKG: MetaKG, resolveOutputIDs = true, options?: BatchEdgeQueryOptions) { + constructor(metaKG: MetaKG, options?: BatchEdgeQueryOptions) { this.metaKG = metaKG; this.logs = []; this.caching = options && options.caching; @@ -33,7 +31,6 @@ export default class BatchEdgeQueryHandler { if (options && options.recordHashEdgeAttributes) { this.options.EDGE_ATTRIBUTES_USED_IN_RECORD_HASH = options.recordHashEdgeAttributes; } - this.resolveOutputIDs = resolveOutputIDs; } /** @@ -61,11 +58,62 @@ export default class BatchEdgeQueryHandler { /** * @private */ - async _queryAPIEdges(APIEdges: APIEdge[], unavailableAPIs: UnavailableAPITracker = {}): Promise { - const executor = new call_api(APIEdges, this.options, redisClient); - const records: Record[] = await executor.query(this.resolveOutputIDs, unavailableAPIs); - this.logs = [...this.logs, ...executor.logs]; - return records; + _queryAPIEdges(APIEdges: APIEdge[], unavailableAPIs: UnavailableAPITracker = {}): Promise { + // Skip queueing queries to unavailable APIs + const queries = constructQueries(APIEdges, this.options).filter((query) => { + if (unavailableAPIs[query.APIEdge.query_operation.server]?.skip === true) { + unavailableAPIs[query.APIEdge.query_operation.server].skippedQueries += 1; + return false; + } + return true; + }); + + const queriesByHash = Object.fromEntries(queries.map((query) => [query.hash, query])); + + const qEdge = APIEdges[0].reasoner_edge; + const message = `${queries.length} planned queries for edge ${qEdge.id}`; + debug(message); + this.logs.push(new LogEntry('INFO', null, message).getLog()); + let finishedCount = 0; + const completedLogs = this.logs; + const completedRecords: Record[] = []; + return new Promise((resolve) => { + function listener(msg: ThreadMessage) { + if (msg.type !== 'subQueryResult') return; + const { hash, records, logs, apiUnavailable } = msg.value as { + hash: string; + records: RecordPackage; + logs: SerializableLog[]; + apiUnavailable: boolean; + }; + completedLogs.push(...LogEntry.deserialize(logs)); + completedRecords.push(...Record.unpackRecords(records, qEdge)); + + // Update any APIs that were unavailable for this segment + const server = queriesByHash[hash].APIEdge.query_operation.server; + if (apiUnavailable) { + if (!unavailableAPIs[server]) { + unavailableAPIs[server] = { skip: true, skippedQueries: 0 }; + } + unavailableAPIs[server].skippedQueries += 1; + } + + finishedCount += 1; + if (finishedCount >= queries.length) { + debug(`Total number of records returned for qEdge ${qEdge.id} is ${completedRecords.length}`); + resolve(completedRecords); + global.parentPort.off('message', listener); // Clean up + } + } + global.parentPort.on('message', listener); + global.parentPort.postMessage({ + type: 'subqueryRequest', + value: { + queries: queries.map((query) => query.freeze()), + options: this.options, + }, + }); + }); } /** @@ -133,40 +181,25 @@ export default class BatchEdgeQueryHandler { await this._rmEquivalentDuplicates(qEdges); debug('Node Update Success'); - const cacheHandler = new CacheHandler(this.caching, this.metaKG, this.options); - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - this.logs = [...this.logs, ...cacheHandler.logs]; let queryRecords: Record[]; - if (nonCachedQEdges.length === 0) { - queryRecords = []; - if (global.parentPort) { - global.parentPort.postMessage({ threadId, type: 'cacheDone', value: true } satisfies ThreadMessage); - } - } else { - debug('Start to convert qEdges into APIEdges....'); - const edgeConverter = new QEdge2APIEdgeHandler(nonCachedQEdges, this.metaKG); - const APIEdges = await edgeConverter.convert(nonCachedQEdges); - debug(`qEdges are successfully converted into ${APIEdges.length} APIEdges....`); - this.logs = [...this.logs, ...edgeConverter.logs]; - if (APIEdges.length === 0 && cachedRecords.length === 0) { - return []; - } - const expanded_APIEdges = this._expandAPIEdges(APIEdges); - debug('Start to query APIEdges....'); - queryRecords = await this._queryAPIEdges(expanded_APIEdges, unavailableAPIs); - if (queryRecords === undefined) return; - debug('APIEdges are successfully queried....'); - queryRecords = await this._postQueryFilter(queryRecords); - debug(`Total number of records is (${queryRecords.length})`); - const cacheTask = cacheHandler.cacheEdges(queryRecords); - if (!(process.env.USE_THREADING === 'false')) { - global.cachingTasks?.push(cacheTask); - } else { - await cacheTask; - } + debug('Start to convert qEdges into APIEdges....'); + const edgeConverter = new QEdge2APIEdgeHandler(qEdges, this.metaKG); + const APIEdges = await edgeConverter.convert(qEdges); + debug(`qEdges are successfully converted into ${APIEdges.length} APIEdges....`); + this.logs = [...this.logs, ...edgeConverter.logs]; + if (APIEdges.length === 0) { + return []; } - queryRecords = [...queryRecords, ...cachedRecords]; + + const expanded_APIEdges = this._expandAPIEdges(APIEdges); + debug('Start to query APIEdges....'); + queryRecords = await this._queryAPIEdges(expanded_APIEdges, unavailableAPIs); + if (queryRecords === undefined) return; + debug('APIEdges are successfully queried....'); + queryRecords = await this._postQueryFilter(queryRecords); + debug(`Total number of records is (${queryRecords.length})`); + debug('Start to update nodes...'); nodeUpdate.update(queryRecords); debug('Update nodes completed!'); diff --git a/src/biolink.ts b/src/biolink.ts deleted file mode 100644 index e5bb659d..00000000 --- a/src/biolink.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { BioLink } from 'biolink-model'; -import Debug from 'debug'; -const debug = Debug('bte:biothings-explorer-trapi:EdgeReverse'); - -class BioLinkModel { - biolink: BioLink; - constructor() { - this.biolink = new BioLink(); - this.biolink.loadSync(); - } - - reverse(predicate: string) { - if (typeof predicate === 'string') { - if (predicate in this.biolink.slotTree.objects) { - if (this.biolink.slotTree.objects[predicate].symmetric === true) { - return predicate; - } - return this.biolink.slotTree.objects[predicate].inverse; - } - } - - return undefined; - } - - getAncestorClasses(className: string): string | string[] { - if (className in this.biolink.classTree.objects) { - const ancestors = this.biolink.classTree.getAncestors(className).map((entity) => entity.name); - return [...ancestors, ...[className]]; - } - return className; - } - - getAncestorPredicates(predicate: string): string | string[] { - if (predicate in this.biolink.slotTree.objects) { - const ancestors = this.biolink.slotTree.getAncestors(predicate).map((entity) => entity.name); - return [...ancestors, ...[predicate]]; - } - return predicate; - } - - getDescendantClasses(className: string): string | string[] { - if (className in this.biolink.classTree.objects) { - const descendants = this.biolink.classTree.getDescendants(className).map((entity) => entity.name); - return [...descendants, ...[className]]; - } - return className; - } - - getDescendantPredicates(predicate: string): string[] { - if (predicate in this.biolink.slotTree.objects) { - const descendants = this.biolink.slotTree.getDescendants(predicate).map((entity) => entity.name); - return [...descendants, ...[predicate]]; - } - return [predicate]; - } - - getDescendantQualifiers(qualifier: string): string[] { - try { - const descendants = this.biolink.enumTree.getDescendants(qualifier).map((entity) => entity.name); - return [...descendants, qualifier]; - } catch (e) { - console.log('qual error', e); - return [qualifier]; - } - } -} - -// Freeze an instance to avoid multiple reloads -const biolink = new BioLinkModel(); -Object.freeze(biolink); - -global.BIOLINK_VERSION = biolink.biolink.biolinkJSON.version; - -export default biolink; diff --git a/src/cache_handler.ts b/src/cache_handler.ts index ea0fc10a..3dbd80be 100644 --- a/src/cache_handler.ts +++ b/src/cache_handler.ts @@ -7,10 +7,9 @@ import helper from './helper'; import lz4 from 'lz4'; import chunker from 'stream-chunker'; import { Readable, Transform } from 'stream'; -import { Record, RecordPackage } from '@biothings-explorer/api-response-transform'; +import { Record, RecordPackage, QEdge } from '@biothings-explorer/types'; import { threadId } from 'worker_threads'; import MetaKG from '@biothings-explorer/smartapi-kg'; -import QEdge from './query_edge'; import { QueryHandlerOptions, ThreadMessage } from '@biothings-explorer/types'; export interface RecordPacksByQedgeMetaKGHash { diff --git a/src/edge_manager.ts b/src/edge_manager.ts index 543a6ea5..b0a46d4f 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -1,15 +1,13 @@ import _ from 'lodash'; import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import BTEError from './exceptions/bte_error'; import Debug from 'debug'; const debug = Debug('bte:biothings-explorer-trapi:edge-manager'); import * as config from './config'; import BatchEdgeQueryHandler, { BatchEdgeQueryOptions } from './batch_edge_query'; import { Telemetry } from '@biothings-explorer/utils'; -import QEdge from './query_edge'; import MetaKG from '@biothings-explorer/smartapi-kg'; -import { QueryHandlerOptions } from '@biothings-explorer/types'; -import { Record } from '@biothings-explorer/api-response-transform'; +import { QueryHandlerOptions, QEdge, BTEError } from '@biothings-explorer/types'; +import { Record } from '@biothings-explorer/types'; import { UnavailableAPITracker } from './types'; import { RecordsByQEdgeID } from './results_assembly/query_results'; import path from 'path'; @@ -354,7 +352,7 @@ export default class QueryEdgeManager { } _createBatchQueryHandler(qEdge: QEdge, metaKG: MetaKG): BatchEdgeQueryHandler { - const handler = new BatchEdgeQueryHandler(metaKG, this.options.resolveOutputIDs, { + const handler = new BatchEdgeQueryHandler(metaKG, { caching: this.options.caching, submitter: this.options.submitter, recordHashEdgeAttributes: config.EDGE_ATTRIBUTES_USED_IN_RECORD_HASH, diff --git a/src/exceptions/bte_error.ts b/src/exceptions/bte_error.ts deleted file mode 100644 index 986234b5..00000000 --- a/src/exceptions/bte_error.ts +++ /dev/null @@ -1,16 +0,0 @@ -export default class BTEError extends Error { - statusCode: string; - constructor(message = 'Query aborted', name = 'QueryAborted', code = '501', ...params: any) { - super(...params); - - if (Error.captureStackTrace) { - Error.captureStackTrace(this, BTEError); - } - - this.name = name; - this.message = message; - this.statusCode = code; - } -} - -module.exports = BTEError; diff --git a/src/exceptions/invalid_query_graph_error.ts b/src/exceptions/invalid_query_graph_error.ts deleted file mode 100644 index 9974b541..00000000 --- a/src/exceptions/invalid_query_graph_error.ts +++ /dev/null @@ -1,16 +0,0 @@ -export default class InvalidQueryGraphError extends Error { - statusCode: number; - constructor(message = 'Your Input Query Graph is invalid.', ...params: string[]) { - super(...params); - - Object.setPrototypeOf(this, InvalidQueryGraphError.prototype); - - if (Error.captureStackTrace) { - Error.captureStackTrace(this, InvalidQueryGraphError); - } - - this.name = 'InvalidQueryGraphError'; - this.message = message; - this.statusCode = 400; - } -} diff --git a/src/graph/graph.ts b/src/graph/graph.ts index 5227594d..3530ab1c 100644 --- a/src/graph/graph.ts +++ b/src/graph/graph.ts @@ -4,8 +4,7 @@ import Debug from 'debug'; import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import KGNode from './kg_node'; import KGEdge from './kg_edge'; -import { Record } from '@biothings-explorer/api-response-transform'; -import { TrapiAuxiliaryGraph, TrapiResult } from '@biothings-explorer/types'; +import { TrapiAuxiliaryGraph, TrapiResult, Record } from '@biothings-explorer/types'; import KnowledgeGraph from './knowledge_graph'; const debug = Debug('bte:biothings-explorer-trapi:Graph'); diff --git a/src/index.ts b/src/index.ts index 59401b36..34dd0c31 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,9 +1,8 @@ import MetaKG, { SmartAPIQueryResult } from '@biothings-explorer/smartapi-kg'; import path from 'path'; -import QueryGraph from './query_graph'; import KnowledgeGraph from './graph/knowledge_graph'; import TrapiResultsAssembler from './results_assembly/query_results'; -import InvalidQueryGraphError from './exceptions/invalid_query_graph_error'; +import { QueryGraph, InvalidQueryGraphError } from '@biothings-explorer/types'; import Debug from 'debug'; const debug = Debug('bte:biothings-explorer-trapi:main'); import Graph from './graph/graph'; @@ -25,17 +24,13 @@ import { TrapiResponse, TrapiResult, } from '@biothings-explorer/types'; -import { QueryHandlerOptions } from '@biothings-explorer/types'; +import { QueryHandlerOptions, QEdge } from '@biothings-explorer/types'; import BTEGraph from './graph/graph'; -import QEdge from './query_edge'; import { Telemetry } from '@biothings-explorer/utils'; // Exports for external availability export * from './types'; export { getTemplates, supportedLookups } from './inferred_mode/template_lookup'; -export { default as QEdge } from './query_edge'; -export { default as QNode } from './query_node'; -export { default as InvalidQueryGraphError } from './exceptions/invalid_query_graph_error'; export * from './qedge2apiedge'; export default class TRAPIQueryHandler { @@ -108,7 +103,8 @@ export default class TRAPIQueryHandler { ...this.options, schema: this.options.schema ? this.options.schema.info.version : 'not included', metakg: "", - smartapi: "" + smartapi: "", + apiList: "[omitted]" })}`, ); diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index bc6f3dd7..7178ee2d 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -2,7 +2,7 @@ import Debug from 'debug'; import { LogEntry, StampedLog, Telemetry } from '@biothings-explorer/utils'; import * as utils from '../utils'; import async from 'async'; -import biolink from '../biolink'; +import { biolink } from '@biothings-explorer/utils'; import { getTemplates, MatchedTemplate, TemplateLookup } from './template_lookup'; import { scaled_sigmoid, inverse_scaled_sigmoid } from '../results_assembly/score'; import TRAPIQueryHandler from '../index'; diff --git a/src/qedge2apiedge.ts b/src/qedge2apiedge.ts index 29e592da..fbb8fd6e 100644 --- a/src/qedge2apiedge.ts +++ b/src/qedge2apiedge.ts @@ -3,47 +3,13 @@ import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import * as config from './config'; const CURIE_WITH_PREFIXES = ['MONDO', 'DOID', 'UBERON', 'EFO', 'HP', 'CHEBI', 'CL', 'MGI', 'NCIT']; import Debug from 'debug'; -import QEdge from './query_edge'; +import { QEdge } from '@biothings-explorer/types'; import MetaKG from '@biothings-explorer/smartapi-kg'; import { SmartAPIKGOperationObject } from '@biothings-explorer/smartapi-kg'; import { SRIBioEntity } from 'biomedical_id_resolver'; +import { APIEdge, BatchAPIEdge, MetaXEdge, NonBatchAPIEdge, TemplateBatchAPIEdge, TemplateNonBatchAPIEdge } from '@biothings-explorer/types'; const debug = Debug('bte:biothings-explorer-trapi:qedge2btedge'); -export interface MetaXEdge extends SmartAPIKGOperationObject { - reasoner_edge: QEdge; -} - -export interface TemplatedInput { - queryInputs: string | string[]; - [additionalAttributes: string]: string | string[]; -} - -export interface APIEdge extends MetaXEdge { - input: string | string[] | TemplatedInput; - input_resolved_identifiers: { - [curie: string]: SRIBioEntity; - }; - original_input: { - [equivalentCurie: string]: string; - }; -} - -export interface NonBatchAPIEdge extends APIEdge { - input: string; -} - -export interface BatchAPIEdge extends APIEdge { - input: string[]; -} - -export interface TemplateNonBatchAPIEdge extends APIEdge { - input: TemplatedInput; -} - -export interface TemplateBatchAPIEdge extends APIEdge { - input: TemplatedInput; -} - export default class QEdge2APIEdgeHandler { qEdges: QEdge[]; metaKG: MetaKG; diff --git a/src/query_edge.ts b/src/query_edge.ts deleted file mode 100644 index a9513741..00000000 --- a/src/query_edge.ts +++ /dev/null @@ -1,580 +0,0 @@ -import helper from './helper'; -import Debug from 'debug'; -import * as utils from './utils'; -import biolink from './biolink'; -import { Record, RecordNode, FrozenRecord } from '@biothings-explorer/api-response-transform'; -import QNode from './query_node'; -import { QNodeInfo } from './query_node'; -import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import { TrapiAttributeConstraint, TrapiQualifierConstraint } from '@biothings-explorer/types'; - -const debug = Debug('bte:biothings-explorer-trapi:QEdge'); - -interface ExpandedQualifier { - qualifier_type_id: string; - qualifier_value: string[]; -} - -interface ExpandedQEdgeQualifierConstraint { - qualifier_set: ExpandedQualifier[]; -} - -interface CompactQualifiers { - [qualfier_type_id: string]: string | string[]; -} - -interface QEdgeInfo { - id: string; - object: QNodeInfo | QNode; - subject: QNodeInfo | QNode; - records?: FrozenRecord[]; - logs?: StampedLog[]; - executed?: boolean; - reverse?: boolean; - qualifier_constraints?: TrapiQualifierConstraint[]; - frozen?: boolean; - predicates?: string[]; -} - -interface AliasesByPrimary { - [primaryClient: string]: string[]; -} - -interface AliasesByPrimaryByType { - [semanticType: string]: AliasesByPrimary; -} - -export default class QEdge { - id: string; - predicate: string[]; - subject: QNode; - object: QNode; - expanded_predicates: string[]; - qualifier_constraints: TrapiQualifierConstraint[]; - reverse: boolean; - executed: boolean; - logs: StampedLog[]; - records: Record[]; - filter?: any; - - constructor(info: QEdgeInfo, reverse?: boolean) { - this.id = info.id; - this.predicate = info.predicates; - this.subject = info.frozen === true ? new QNode(info.subject as QNodeInfo) : (info.subject as QNode); - this.object = info.frozen === true ? new QNode(info.object as QNodeInfo) : (info.object as QNode); - this.expanded_predicates = []; - this.qualifier_constraints = info.qualifier_constraints || []; - - this.reverse = this.subject?.getCurie?.() === undefined && this.object?.getCurie?.() !== undefined; - - this.reverse = info.reverse !== undefined ? info.reverse : this.reverse; - this.reverse = reverse !== undefined ? reverse : this.reverse; - - this.init(); - - // edge has been fully executed - this.executed = info.executed === undefined ? false : info.executed; - // run initial checks - this.logs = info.logs === undefined ? [] : info.logs; - - // this edges query response records - if (info.records && info.frozen === true) - this.records = info.records.map((recordJSON: FrozenRecord) => new Record(recordJSON)); - else this.records = []; - - debug(`(2) Created Edge` + ` ${JSON.stringify(this.getID())} Reverse = ${this.reverse}`); - } - - freeze(): QEdgeInfo { - return { - id: this.id, - predicates: this.predicate, - qualifier_constraints: this.qualifier_constraints, - executed: this.executed, - reverse: this.reverse, - logs: this.logs, - subject: this.subject.freeze(), - object: this.object.freeze(), - records: this.records.map((record) => record.freeze()), - frozen: true, - }; - } - - init(): void { - this.expanded_predicates = this.getPredicate(); - } - - getID(): string { - return this.id; - } - - getHashedEdgeRepresentation(): string { - // all values sorted so same qEdge with slightly different orders will hash the same - const qualifiersSorted = (this.getSimpleQualifierConstraints() || []) - .map((qualifierSet) => { - return Object.entries(qualifierSet) - .sort(([qTa], [qTb]) => qTa.localeCompare(qTb)) - .reduce((str, [qType, qVal]) => `${str}${qType}:${qVal};`, ''); - }) - .sort((setString1, setString2) => setString1.localeCompare(setString2)); - - const toBeHashed = - (this.getInputNode().getCategories() || []).sort().join(',') + - (this.getPredicate() || []).sort() + - (this.getOutputNode().getCategories() || []).sort().join(',') + - (this.getInputCurie() || []).sort() + - qualifiersSorted; - - return helper._generateHash(toBeHashed); - } - - expandPredicates(predicates: string[]): string[] { - return Array.from(new Set(predicates.reduce((acc, cur) => [...acc, ...biolink.getDescendantPredicates(cur)], []))); - } - - getPredicate(): string[] { - if (this.predicate === undefined || this.predicate === null) { - return undefined; - } - const predicates = utils.toArray(this.predicate).map((item) => utils.removeBioLinkPrefix(item)); - const expandedPredicates = this.expandPredicates(predicates); - debug(`Expanded edges: ${expandedPredicates}`); - return expandedPredicates - .map((predicate) => { - return this.isReversed() === true ? biolink.reverse(predicate) : predicate; - }) - .filter((item) => !(typeof item === 'undefined')); - } - - expandQualifierConstraints(constraints: TrapiQualifierConstraint[]): ExpandedQEdgeQualifierConstraint[] { - return constraints.map((qualifierSetObj) => { - return { - qualifier_set: qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => { - const new_qualifier_values = qualifier_type_id.includes('predicate') - ? Array.isArray(qualifier_value) - ? Array.from( - qualifier_value.reduce((set: Set, predicate: string) => { - biolink - .getDescendantPredicates(utils.removeBioLinkPrefix(predicate)) - .forEach((item) => set.add(`biolink:${utils.removeBioLinkPrefix(item)}`)); - return set; - }, new Set()), - ) - : Array.from( - new Set( - biolink - .getDescendantPredicates(utils.removeBioLinkPrefix(qualifier_value)) - .map((item) => `biolink:${utils.removeBioLinkPrefix(item)}`), - ), - ) - : Array.from( - new Set(biolink.getDescendantQualifiers(utils.removeBioLinkPrefix(qualifier_value as string))), - ); - - return { - qualifier_type_id, - qualifier_value: new_qualifier_values, - }; - }), - }; - }); - } - - getQualifierConstraints(): TrapiQualifierConstraint[] { - if (!this.qualifier_constraints) { - return []; - } - if (this.isReversed()) { - return this.qualifier_constraints.map((qualifierSetObj) => { - return { - qualifier_set: qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => { - let newQualifierType = qualifier_type_id; - let newQualifierValue = qualifier_value; - if (qualifier_type_id.includes('predicate')) { - if (Array.isArray(qualifier_value)) { - newQualifierValue = qualifier_value.map((str) => `biolink:${str.replace('biolink', '')}`); - } else { - newQualifierValue = `biolink:${qualifier_value.replace('biolink:', '')}`; - } - } - if (qualifier_type_id.includes('subject')) { - newQualifierType = qualifier_type_id.replace('subject', 'object'); - } - if (qualifier_type_id.includes('object')) { - newQualifierType = qualifier_type_id.replace('object', 'subject'); - } - return { - qualifier_type_id: newQualifierType, - qualifier_value: newQualifierValue, - }; - }), - }; - }); - } - return this.qualifier_constraints; - } - - getSimpleQualifierConstraints(): CompactQualifiers[] | undefined { - const constraints: CompactQualifiers[] = this.getQualifierConstraints().map((qualifierSetObj) => { - return Object.fromEntries( - qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => [ - qualifier_type_id.replace('biolink:', ''), - Array.isArray(qualifier_value) - ? qualifier_value.map((string) => string.replace('biolink:', '')) - : qualifier_value.replace('biolink:', ''), - ]), - ); - }); - return constraints.length > 0 ? constraints : undefined; - } - - getSimpleExpandedQualifierConstraints(): CompactQualifiers[] | undefined { - const constraints = this.expandQualifierConstraints(this.getQualifierConstraints()).map( - (qualifierSetObj: ExpandedQEdgeQualifierConstraint) => { - return Object.fromEntries( - qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => [ - utils.removeBioLinkPrefix(qualifier_type_id), - utils.toArray(qualifier_value).map((e) => utils.removeBioLinkPrefix(e)), - ]), - ); - }, - ); - return constraints.length > 0 ? constraints : undefined; - } - - chooseLowerEntityValue(): void { - // edge has both subject and object entity counts and must choose lower value - // to use in query. - debug(`(8) Choosing lower entity count in edge...`); - if (this.object.entity_count && this.subject.entity_count) { - if (this.object.entity_count == this.subject.entity_count) { - // // (#) ---> () - this.reverse = false; - this.object.holdCurie(); - debug(`(8) Sub - Obj were same but chose subject (${this.subject.entity_count})`); - } else if (this.object.entity_count > this.subject.entity_count) { - // (#) ---> () - this.reverse = false; - // tell node to hold curie in a temp field - this.object.holdCurie(); - debug(`(8) Chose lower entity value in subject (${this.subject.entity_count})`); - } else { - // () <--- (#) - this.reverse = true; - // tell node to hold curie in a temp field - this.subject.holdCurie(); - debug(`(8) Chose lower entity value in object (${this.object.entity_count})`); - } - } else { - debug(`(8) Error: Edge must have both object and subject entity values.`); - } - } - - extractCuriesFromRecords(records: Record[], isReversed: boolean): AliasesByPrimaryByType { - // will give you all curies found by semantic type, each type will have - // a main ID and all of it's aliases - debug(`(7) Updating Entities in "${this.getID()}"`); - const typesToInclude = isReversed ? this.subject.getCategories() : this.object.getCategories(); - debug(`(7) Collecting Types: "${JSON.stringify(typesToInclude)}"`); - const all: AliasesByPrimaryByType = {}; - records.forEach((record) => { - const subjectTypes = record.subject.semanticType.map((type) => type.replace('biolink:', '')); - const objectTypes = record.object.semanticType.map((type) => type.replace('biolink:', '')); - const nodeOriginals = { - subject: record.subject.original, - object: record.object.original, - }; - - Object.entries({ subject: subjectTypes, object: objectTypes }).forEach(([node, nodeTypes]) => { - nodeTypes.forEach((nodeType) => { - const nodeOriginal = nodeOriginals[node]; - - if (!typesToInclude.includes(nodeType) && !typesToInclude.includes('NamedThing')) { - return; - } - if (!all[nodeType]) { - all[nodeType] = {}; - } - const originalAliases: Set = new Set(); - (record[node] as RecordNode).equivalentCuries.forEach((curie) => { - originalAliases.add(curie); - }); - // check and add only unique - let wasFound = false; - originalAliases.forEach((alias) => { - if (all[nodeType][alias]) { - wasFound = true; - } - }); - if (!wasFound) { - all[nodeType][nodeOriginal] = [...originalAliases]; - } - - if (!all[nodeType][nodeOriginal] || all[nodeType][nodeOriginal].length === 0) { - if (record[node].curie.length > 0) { - // else #2 check curie - all[nodeType][nodeOriginal] = [record[node].curie]; - } else { - // #3 last resort check original - all[nodeType][nodeOriginal] = [nodeOriginal]; - } - } - }); - }); - }); - debug(`Collected entity ids in records: ${JSON.stringify(Object.keys(all))}`); - return all; - // {Gene:{'id': ['alias']}} - } - - _combineCuries(curies: AliasesByPrimaryByType): AliasesByPrimary { - // combine all curies in case there are - // multiple categories in this node since - // they are separated by type - const combined = {}; - for (const type in curies) { - for (const original in curies[type]) { - combined[original] = curies[type][original]; - } - } - return combined; - } - - updateNodesCuries(records: Record[]): void { - // update node queried (1) ---> (update) - const curies_by_semantic_type = this.extractCuriesFromRecords(records, this.reverse); - const combined_curies = this._combineCuries(curies_by_semantic_type); - this.reverse ? this.subject.updateCuries(combined_curies) : this.object.updateCuries(combined_curies); - // update node used as input (1 [update]) ---> () - const curies_by_semantic_type_2 = this.extractCuriesFromRecords(records, !this.reverse); - const combined_curies_2 = this._combineCuries(curies_by_semantic_type_2); - !this.reverse ? this.subject.updateCuries(combined_curies_2) : this.object.updateCuries(combined_curies_2); - } - - applyNodeConstraints(): void { - debug(`(6) Applying Node Constraints to ${this.records.length} records.`); - const kept = []; - let save_kept = false; - const sub_constraints = this.subject.constraints; - if (sub_constraints && sub_constraints.length) { - const from = this.reverse ? 'object' : 'subject'; - debug(`Node (subject) constraints: ${JSON.stringify(sub_constraints)}`); - save_kept = true; - for (let i = 0; i < this.records.length; i++) { - const res = this.records[i]; - let keep = true; - // apply constraints - for (let x = 0; x < sub_constraints.length; x++) { - const constraint = sub_constraints[x]; - keep = this.meetsConstraint(constraint, res, from); - } - // pass or not - if (keep) { - kept.push(res); - } - } - } - - const obj_constraints = this.object.constraints; - if (obj_constraints && obj_constraints.length) { - const from = this.reverse ? 'subject' : 'object'; - debug(`Node (object) constraints: ${JSON.stringify(obj_constraints)}`); - save_kept = true; - for (let i = 0; i < this.records.length; i++) { - const res = this.records[i]; - let keep = true; - // apply constraints - for (let x = 0; x < obj_constraints.length; x++) { - const constraint = obj_constraints[x]; - keep = this.meetsConstraint(constraint, res, from); - } - // pass or not - if (keep) { - kept.push(res); - } - } - } - if (save_kept) { - // only override recordss if there was any filtering done. - this.records = kept; - debug(`(6) Reduced to (${this.records.length}) records.`); - } else { - debug(`(6) No constraints. Skipping...`); - } - } - - meetsConstraint(constraint: TrapiAttributeConstraint, record: Record, from: string): boolean { - // list of attribute ids in node - const available_attributes = [...new Set(Object.keys(record[from].attributes))]; - // debug(`ATTRS ${JSON.stringify(record[from].normalizedInfo[0]._leafSemanticType)}` + - // ` ${from} : ${JSON.stringify(available_attributes)}`); - // determine if node even contains right attributes - const filters_found = available_attributes.filter((attr) => attr == constraint.id); - if (!filters_found.length) { - // node doesn't have the attribute needed - return false; - } else { - // match attr by name, parse only attrs of interest - const node_attributes = {}; - filters_found.forEach((filter) => { - node_attributes[filter] = record[from].attributes[filter]; - }); - switch (constraint.operator) { - case '==': - for (const key in node_attributes) { - if (!isNaN(constraint.value as number)) { - if (Array.isArray(node_attributes[key])) { - if ( - node_attributes[key].includes(constraint.value) || - node_attributes[key].includes(constraint.value.toString()) - ) { - return true; - } - } else { - if ( - node_attributes[key] == constraint.value || - node_attributes[key] == constraint.value.toString() || - node_attributes[key] == parseInt(constraint.value as string) - ) { - return true; - } - } - } else { - if (Array.isArray(node_attributes[key])) { - if (node_attributes[key].includes(constraint.value)) { - return true; - } - } else { - if ( - node_attributes[key] == constraint.value || - node_attributes[key] == constraint.value.toString() || - node_attributes[key] == parseInt(constraint.value as string) - ) { - return true; - } - } - } - } - return false; - case '>': - for (const key in node_attributes) { - if (Array.isArray(node_attributes[key])) { - for (let index = 0; index < node_attributes[key].length; index++) { - const element = node_attributes[key][index]; - if (parseInt(element) > parseInt(constraint.value as string)) { - return true; - } - } - } else { - if (parseInt(node_attributes[key]) > parseInt(constraint.value as string)) { - return true; - } - } - } - return false; - case '>=': - for (const key in node_attributes) { - if (Array.isArray(node_attributes[key])) { - for (let index = 0; index < node_attributes[key].length; index++) { - const element = node_attributes[key][index]; - if (parseInt(element) >= parseInt(constraint.value as string)) { - return true; - } - } - } else { - if (parseInt(node_attributes[key]) >= parseInt(constraint.value as string)) { - return true; - } - } - } - return false; - case '<': - for (const key in node_attributes) { - if (Array.isArray(node_attributes[key])) { - for (let index = 0; index < node_attributes[key].length; index++) { - const element = node_attributes[key][index]; - if (parseInt(element) > parseInt(constraint.value as string)) { - return true; - } - } - } else { - if (parseInt(node_attributes[key]) < parseInt(constraint.value as string)) { - return true; - } - } - } - return false; - case '<=': - for (const key in node_attributes) { - if (Array.isArray(node_attributes[key])) { - for (let index = 0; index < node_attributes[key].length; index++) { - const element = node_attributes[key][index]; - if (parseInt(element) <= parseInt(constraint.value as string)) { - return true; - } - } - } else { - if (parseInt(node_attributes[key]) <= parseInt(constraint.value as string)) { - return true; - } - } - } - return false; - default: - debug(`Node operator not handled ${constraint.operator}`); - return false; - } - } - } - - storeRecords(records: Record[]): void { - debug(`(6) Storing records...`); - // store new records in current edge - this.records = records; - // will update records if any constraints are found - this.applyNodeConstraints(); - debug(`(7) Updating nodes based on edge records...`); - this.updateNodesCuries(records); - } - - getInputNode(): QNode { - if (this.reverse) { - return this.object; - } - return this.subject; - } - - getOutputNode(): QNode { - if (this.reverse) { - return this.subject; - } - return this.object; - } - - isReversed(): boolean { - return this.reverse; - } - - getInputCurie(): string[] { - const curie = this.subject.getCurie() || this.object.getCurie(); - if (Array.isArray(curie)) { - return curie; - } - return [curie]; - } - - hasInputResolved(): boolean { - return this.getInputNode().hasEquivalentIDs(); - } - - hasInput(): boolean { - if (this.reverse) { - return this.object.hasInput(); - } - return this.subject.hasInput(); - } - - getReversedPredicate(predicate: string): string { - return predicate ? biolink.reverse(predicate) : undefined; - } -} diff --git a/src/query_graph.ts b/src/query_graph.ts deleted file mode 100644 index df95c906..00000000 --- a/src/query_graph.ts +++ /dev/null @@ -1,400 +0,0 @@ -import QEdge from './query_edge'; -import InvalidQueryGraphError from './exceptions/invalid_query_graph_error'; -import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import Debug from 'debug'; -import QNode from './query_node'; -import biolink from './biolink'; -import { resolveSRI } from 'biomedical_id_resolver'; -import _ from 'lodash'; -import * as utils from './utils'; -import { TrapiQueryGraph } from '@biothings-explorer/types'; - -const debug = Debug('bte:biothings-explorer-trapi:query_graph'); - -export default class QueryGraph { - queryGraph: TrapiQueryGraph; - schema: any; - logs: StampedLog[]; - nodes: { [QNodeID: string]: QNode }; - edges: { [QEdgeID: string]: QEdge }; - constructor(queryGraph: TrapiQueryGraph, schema: any) { - this.queryGraph = queryGraph; - this.schema = schema; - this.logs = []; - } - - _validateEmptyNodes(queryGraph: TrapiQueryGraph): void { - if (Object.keys(queryGraph.nodes).length === 0) { - throw new InvalidQueryGraphError('Your Query Graph has no nodes defined.'); - } - } - - _validateOneNodeID(queryGraph: TrapiQueryGraph): void { - for (const nodeID in queryGraph.nodes) { - if (queryGraph.nodes[nodeID] && queryGraph.nodes[nodeID]?.ids?.length > 0) { - return; - } - } - throw new InvalidQueryGraphError( - 'body/message.query_graph.nodes should contain at least one node with at least one non-null id', - ); - } - - _validateEmptyEdges(queryGraph: TrapiQueryGraph): void { - if (Object.keys(queryGraph.edges).length === 0) { - throw new InvalidQueryGraphError('Your Query Graph has no edges defined.'); - } - } - - _validateNodeEdgeCorrespondence(queryGraph: TrapiQueryGraph): void { - for (const qEdgeID in queryGraph.edges) { - if (!(this.queryGraph.edges[qEdgeID].subject in queryGraph.nodes)) { - throw new InvalidQueryGraphError(`The subject of edge ${qEdgeID} is not defined in the query graph.`); - } - if (!(this.queryGraph.edges[qEdgeID].object in queryGraph.nodes)) { - throw new InvalidQueryGraphError(`The object of edge ${qEdgeID} is not defined in the query graph.`); - } - } - } - - _validateBatchSize(queryGraph: TrapiQueryGraph): void { - Object.entries(queryGraph.nodes).forEach(([nodeID, node]) => { - if (node.ids && node.ids.length > 150) { - throw new InvalidQueryGraphError(`Query node ${nodeID} exceeds batch size limit.`); - } - }); - } - - _validateDuplicateEdges(queryGraph: TrapiQueryGraph): void { - const edgeSet = new Set(); - for (const edgeID in queryGraph.edges) { - const subject = queryGraph.edges[edgeID].subject; - const object = queryGraph.edges[edgeID].object; - if (edgeSet.has(`${subject}-${object}`) || edgeSet.has(`${object}-${subject}`)) { - throw new InvalidQueryGraphError('Multiple edges between two nodes.'); - } - edgeSet.add(`${subject}-${object}`); - } - } - - _validateCycles(queryGraph: TrapiQueryGraph): void { - const nodes = {}; - for (const nodeID in queryGraph.nodes) { - nodes[nodeID] = { - connections: new Set(), - visited: false, - }; - } - - for (const edgeID in queryGraph.edges) { - const edge = queryGraph.edges[edgeID]; - nodes[edge.subject].connections.add(edge.object); - nodes[edge.object].connections.add(edge.subject); - } - - for (const firstNode in nodes) { - if (nodes[firstNode].visited === true) continue; - const stack: { curNode: string; parent: string | number }[] = [{ curNode: firstNode, parent: -1 }]; - nodes[firstNode].visited = true; - while (stack.length !== 0) { - const { curNode, parent } = stack.pop(); - for (const conNode of nodes[curNode].connections) { - if (conNode == parent) continue; - if (nodes[conNode].visited === true) { - throw new InvalidQueryGraphError('The query graph contains a cycle.'); - } - stack.push({ curNode: conNode, parent: curNode }); - nodes[conNode].visited = true; - } - } - } - } - - _validateNodeProperties(queryGraph: TrapiQueryGraph): void { - const schemProps = this.schema?.components?.schemas?.QNode?.properties - ? this.schema.components.schemas.QNode.properties - : {}; - const nodeProperties = new Set(Object.keys(schemProps)); - const badProperties = new Set(); - const badNodes = new Set(); - for (const nodeID in queryGraph.nodes) { - for (const property in queryGraph.nodes[nodeID]) { - if (!nodeProperties.has(property)) { - badProperties.add(property); - badNodes.add(nodeID); - } - } - } - - if (badProperties.size !== 0) { - this.logs.push( - new LogEntry( - 'WARNING', - null, - `Ignoring unrecognized properties (${[...badProperties].join(',')}) on nodes (${[...badNodes].join(',')}).`, - ).getLog(), - ); - } - } - - _validateEdgeProperties(queryGraph: TrapiQueryGraph): void { - const schemProps = this.schema?.components?.schemas?.QEdge?.properties - ? this.schema.components.schemas.QEdge.properties - : {}; - const edgeProperties = new Set(Object.keys(schemProps)); - const badProperties = new Set(); - const badEdges = new Set(); - for (const edgeID in queryGraph.edges) { - for (const property in queryGraph.edges[edgeID]) { - if (!edgeProperties.has(property)) { - badProperties.add(property); - badEdges.add(edgeID); - } - } - } - - if (badProperties.size !== 0) { - this.logs.push( - new LogEntry( - 'WARNING', - null, - `Ignoring unrecognized properties (${[...badProperties].join(',')}) on edges (${[...badEdges].join(',')}).`, - ).getLog(), - ); - } - } - - _validateNoDuplicateQualifierTypes(queryGraph: TrapiQueryGraph): void { - Object.entries(queryGraph.edges).forEach(([id, edge]) => { - if (edge.qualifier_constraints) { - edge.qualifier_constraints.forEach((qualifierSet, i) => { - const qualifierTypes = new Set(); - qualifierSet.qualifier_set.forEach(({ qualifier_type_id }) => { - if (qualifierTypes.has(qualifier_type_id)) { - throw new InvalidQueryGraphError( - `Query edge ${id} qualifier set ${i} contains duplicate qualifier_type_id ${qualifier_type_id}`, - ); - } - qualifierTypes.add(qualifier_type_id); - }); - }); - } - }); - } - - _validate(queryGraph: TrapiQueryGraph): void { - this._validateEmptyEdges(queryGraph); - this._validateEmptyNodes(queryGraph); - this._validateOneNodeID(queryGraph); - this._validateNodeEdgeCorrespondence(queryGraph); - this._validateDuplicateEdges(queryGraph); - this._validateNodeProperties(queryGraph); - this._validateEdgeProperties(queryGraph); - this._validateBatchSize(queryGraph); - this._validateCycles(queryGraph); - this._validateNoDuplicateQualifierTypes(queryGraph); - } - - private async _findNodeCategories(curies: string[]): Promise { - const noMatchMessage = `No category match found for ${JSON.stringify(curies)}.`; - if (curies.length == 1) { - let matchedCategories: string[]; - const resolved = await resolveSRI({ - unknown: curies, - }); - debug(`Query node missing categories...Looking for match...`); - if (resolved[curies[0]] && resolved[curies[0]].primaryTypes) { - matchedCategories = resolved[curies[0]].primaryTypes; - return matchedCategories.filter((c) => c).map((c) => `biolink:${c}`); - } else { - debug(noMatchMessage); - this.logs.push(new LogEntry('ERROR', null, noMatchMessage).getLog()); - return []; - } - } else { - try { - let finalCategories: string[] = []; - const tree = biolink.biolink.classTree.objects; - - // get array of all unique categories for all curies - const allCategories = [ - ...Object.values(await resolveSRI({ unknown: curies })) - .map((resolvedCurie) => resolvedCurie.semanticTypes) - .filter((semanticTypes) => semanticTypes.some((item) => item !== null)) - .map((semanticTypes) => semanticTypes.map((t) => utils.removeBioLinkPrefix(t))) - .reduce((set: Set, arr: string[]): Set => new Set([...set, ...arr]), new Set()), - ]; - - if (allCategories.length) { - finalCategories.push(allCategories[0]); - } else { - debug(noMatchMessage); - this.logs.push(new LogEntry('ERROR', null, noMatchMessage).getLog()); - return []; - } - - allCategories.forEach((category, i) => { - const keepSet: Set = new Set(); - const rmSet: Set = new Set(); - // check against each currently selected category - finalCategories.forEach((selected) => { - if (tree[selected].is_mixin) { - rmSet.add(selected); - } - if (tree[category].is_mixin) { - rmSet.add(category); - } - if (category === selected) { - return keepSet.add(category); - } - - let parent = category; - while (parent) { - if (selected === parent || tree[selected].children.includes(parent)) { - rmSet.add(selected); - return keepSet.add(category); - } - parent = tree[parent].parent; - } - - parent = selected; - while (parent) { - if (category === parent || tree[category].children.includes(parent)) { - rmSet.add(category); - return keepSet.add(selected); - } - parent = tree[parent].parent; - } - // add both if neither is ancestor of the other - keepSet.add(category).add(selected); - }); - finalCategories = [...keepSet].filter((cat) => !rmSet.has(cat)); - // in event no categories are kept (due to mixin shenanigans/etc) - if (!finalCategories.length && i < allCategories.length - 1) { - finalCategories = [allCategories[i + 1]]; - } - }); - if (!finalCategories.length) { - debug(noMatchMessage); - this.logs.push(new LogEntry('ERROR', null, noMatchMessage).getLog()); - } - return [...finalCategories].map((cat) => 'biolink:' + cat); - } catch (error) { - const errorMessage = `Unable to retrieve categories due to error ${error}`; - debug(errorMessage); - this.logs.push(new LogEntry('ERROR', null, errorMessage).getLog()); - return []; - } - } - } - - private async _storeNodes(): Promise<{ [qNodeID: string]: QNode }> { - const nodes: { [qNodeID: string]: QNode } = {}; - for (const qNodeID in this.queryGraph.nodes) { - //if node has ID but no categories - if ( - (!this.queryGraph.nodes[qNodeID].categories && this.queryGraph.nodes[qNodeID].ids) || - (this.queryGraph.nodes[qNodeID].categories && - // this.queryGraph.nodes[qNodeID].categories.length == 0 && - this.queryGraph.nodes[qNodeID].ids) - ) { - let userAssignedCategories = this.queryGraph.nodes[qNodeID].categories; - let categories = await this._findNodeCategories(this.queryGraph.nodes[qNodeID].ids); - if (userAssignedCategories) { - userAssignedCategories = [...userAssignedCategories]; // new Array for accurate logging after node updated - categories = categories.filter((category) => !userAssignedCategories.includes(category)); - } - if (categories.length) { - if (!this.queryGraph.nodes[qNodeID].categories) { - this.queryGraph.nodes[qNodeID].categories = categories; - } else { - this.queryGraph.nodes[qNodeID].categories.push(...categories); - } - debug(`Node categories found. Assigning value: ${JSON.stringify(this.queryGraph.nodes[qNodeID])}`); - this.logs.push( - new LogEntry( - 'INFO', - null, - [ - `Node ${qNodeID} `, - `with id${this.queryGraph.nodes[qNodeID].ids.length > 1 ? 's' : ''} `, - `[${this.queryGraph.nodes[qNodeID].ids.join(', ')}] `, - `${ - userAssignedCategories && userAssignedCategories.length - ? `and categor${userAssignedCategories.length === 1 ? 'y' : 'ies'} [${userAssignedCategories.join( - ', ', - )}] augmented with` - : `assigned` - } `, - `categor${categories.length > 1 ? 'ies' : 'y'} `, - `[${categories.join(', ')}] inferred from `, - `id${this.queryGraph.nodes[qNodeID].ids.length > 1 ? 's' : ''}.`, - ].join(''), - ).getLog(), - ); - } - nodes[qNodeID] = new QNode({ id: qNodeID, ...this.queryGraph.nodes[qNodeID] }); - } else { - debug(`Creating node...`); - nodes[qNodeID] = new QNode({ id: qNodeID, ...this.queryGraph.nodes[qNodeID] }); - } - - if (nodes[qNodeID].categories !== undefined) { - if ( - nodes[qNodeID].categories.includes('biolink:Disease') || - nodes[qNodeID].categories.includes('biolink:PhenotypicFeature') - ) { - nodes[qNodeID].categories = nodes[qNodeID].categories.filter( - (e) => e !== 'biolink:Disease' && e !== 'biolink:PhenotypicFeature', - ); - nodes[qNodeID].categories.push('biolink:DiseaseOrPhenotypicFeature'); - } - if ( - nodes[qNodeID].categories.includes('biolink:Protein') && - !nodes[qNodeID].categories.includes('biolink:Gene') - ) { - nodes[qNodeID].categories.push('biolink:Gene'); - } - } - } - this.logs.push( - new LogEntry('DEBUG', null, `BTE identified ${Object.keys(nodes).length} qNodes from your query graph`).getLog(), - ); - return nodes; - } - - async calculateEdges(): Promise { - this._validate(this.queryGraph); - //populate edge and node info - debug(`(1) Creating edges for manager...`); - if (this.nodes === undefined) { - this.nodes = await this._storeNodes(); - } - - const edges = {}; - Object.entries(this.queryGraph.edges).forEach(([qEdgeID, qEdge]) => { - const edge_info = { - ...qEdge, - ...{ - subject: this.nodes[qEdge.subject], - object: this.nodes[qEdge.object], - }, - }; - - //store in each node ids of edges connected to them - this.nodes[qEdge.subject].updateConnection(qEdgeID); - this.nodes[qEdge.object].updateConnection(qEdgeID); - - edges[qEdgeID] = new QEdge({ id: qEdgeID, ...edge_info }); - }); - this.edges = edges; - this.logs.push( - new LogEntry( - 'DEBUG', - null, - `BTE identified ${Object.keys(this.edges).length} qEdges from your query graph`, - ).getLog(), - ); - return Object.values(this.edges); - } -} diff --git a/src/query_node.ts b/src/query_node.ts deleted file mode 100644 index 98160026..00000000 --- a/src/query_node.ts +++ /dev/null @@ -1,306 +0,0 @@ -/* eslint-disable @typescript-eslint/no-var-requires */ -import _ from 'lodash'; -import * as utils from './utils'; -import biolink from './biolink'; -import Debug from 'debug'; -import InvalidQueryGraphError from './exceptions/invalid_query_graph_error'; -import { SRIBioEntity } from 'biomedical_id_resolver'; -const debug = Debug('bte:biothings-explorer-trapi:QNode'); - -export interface QNodeInfo { - id: string; - categories?: string[]; - ids?: string[]; - is_set?: boolean; - expanded_curie?: ExpandedCuries; - held_curie?: string[]; - held_expanded?: ExpandedCuries; - constraints?: any; - connected_to?: string[]; - equivalentIDs?: SRIResolvedSet; -} - -export interface SRIResolvedSet { - [originalCurie: string]: SRIBioEntity; -} - -export interface ExpandedCuries { - [originalCurie: string]: string[]; -} - -export default class QNode { - id: string; - categories: string[]; - equivalentIDs?: SRIResolvedSet; - expandedCategories: string[]; - equivalentIDsUpdated: boolean; - curie: string[]; - is_set: boolean; - expanded_curie: ExpandedCuries; - entity_count: number; - held_curie: string[]; - held_expanded: ExpandedCuries; - constraints: any; // TODO type - connected_to: Set; - - constructor(info: QNodeInfo) { - this.id = info.id; - this.categories = info.categories || ['NamedThing']; - this.expandedCategories = this.categories; - this.equivalentIDsUpdated = false; - // mainIDs - this.curie = info.ids; - //is_set - this.is_set = info.is_set; - //mainID : its equivalent ids - this.expanded_curie = info.expanded_curie !== undefined ? info.expanded_curie : {}; - this.entity_count = info.ids ? info.ids.length : 0; - debug(`(1) Node "${this.id}" has (${this.entity_count}) entities at start.`); - //when choosing a lower entity count a node with higher count - // might be told to store its curies temporarily - this.held_curie = info.held_curie !== undefined ? info.held_curie : []; - this.held_expanded = info.held_expanded !== undefined ? info.held_expanded : {}; - //node constraints - this.constraints = info.constraints; - //list of edge ids that are connected to this node - this.connected_to = info.connected_to !== undefined ? new Set(info.connected_to) : new Set(); - //object-ify array of initial curies - if (info.expanded_curie === undefined) this.expandCurie(); - this.validateConstraints(); - this.expandCategories(); - } - - freeze(): QNodeInfo { - return { - categories: this.categories, - connected_to: Array.from(this.connected_to), - constraints: this.constraints, - ids: this.curie, - equivalentIDs: this.equivalentIDs, - expanded_curie: this.expanded_curie, - held_curie: this.held_curie, - held_expanded: this.held_expanded, - id: this.id, - is_set: this.is_set, - }; - } - - isSet(): boolean { - //query node specified as set - return this.is_set ? true : false; - } - - validateConstraints(): void { - const required = ['id', 'operator', 'value']; - if (this.constraints && this.constraints.length) { - this.constraints.forEach((constraint: unknown) => { - const constraint_keys = Object.keys(constraint); - if (_.intersection(constraint_keys, required).length < 3) { - throw new InvalidQueryGraphError(`Invalid constraint specification must include (${required})`); - } - }); - } - } - - expandCurie(): void { - if (this.curie && this.curie.length) { - this.curie.forEach((id) => { - if (!Object.hasOwnProperty.call(id, this.expanded_curie)) { - this.expanded_curie[id] = [id]; - } - }); - debug(`(1) Node "${this.id}" expanded initial curie. ${JSON.stringify(this.expanded_curie)}`); - } - } - - updateConnection(qEdgeID: string): void { - this.connected_to.add(qEdgeID); - debug(`"${this.id}" connected to "${[...this.connected_to]}"`); - } - - getConnections(): string[] { - return [...this.connected_to]; - } - - holdCurie(): void { - //hold curie aside temp - debug(`(8) Node "${this.id}" holding ${JSON.stringify(this.curie)} aside.`); - this.held_curie = this.curie; - this.held_expanded = this.expanded_curie; - this.curie = undefined; - this.expanded_curie = {}; - } - - updateCuries(curies: ExpandedCuries): void { - // {originalID : [aliases]} - if (!this.curie) { - this.curie = []; - } - //bring back held curie - if (this.held_curie.length) { - debug(`(8) Node "${this.id}" restored curie.`); - //restore - this.curie = this.held_curie; - this.expanded_curie = this.held_expanded; - //reset holds - this.held_curie = []; - this.held_expanded = {}; - } - if (!this.curie.length) { - debug(`Node "${this.id}" saving (${Object.keys(curies).length}) curies...`); - this.curie = Object.keys(curies); - this.expanded_curie = curies; - } else { - debug(`Node "${this.id}" intersecting (${this.curie.length})/(${Object.keys(curies).length}) curies...`); - // let intersection = this.intersectCuries(this.curie, curies); - // this.curie = intersection; - // debug(`Node "${this.id}" kept (${intersection.length}) curies...`); - this.intersectWithExpandedCuries(curies); - } - this.entity_count = this.curie.length; - } - - _combineCuriesIntoList(curies: ExpandedCuries): string[] { - // curies {originalID : ['aliasID']} - //combine all curies into single list for easy intersection - const combined: Set = new Set(); - Object.values(curies).forEach((expanded) => { - if (!Array.isArray(expanded)) { - combined.add(expanded); - } else { - expanded.forEach((curie) => { - combined.add(curie); - }); - } - }); - return [...combined]; - } - - - intersectWithExpandedCuries(newCuries: ExpandedCuries): void { - const keep: { [mainID: string]: string[] } = {}; - - const existingSet = new Set(); - for (const key in this.expanded_curie) { - for (const curie of this.expanded_curie[key]) { - existingSet.add(curie.toLowerCase()); - } - } - - // If a new entity has any alias intersection with an existing entity, keep it - for (const [newMainID, currentAliases] of Object.entries(newCuries)) { - let someIntersection = false; - for (const curie of currentAliases) { - if (existingSet.has(curie.toLowerCase())) { - someIntersection = true; - break; - } - } - - if (someIntersection) { - if (!keep[newMainID]) keep[newMainID] = currentAliases; - } - } - - //save expanded curies (main + aliases) - this.expanded_curie = keep; - //save curies (main ids) - this.curie = Object.keys(keep); - debug(`Node "${this.id}" kept (${Object.keys(keep).length}) curies...`); - } - - intersectCuries(curies: string[], newCuries: ExpandedCuries): string[] { - //curies is a list ['ID'] - // new curies {originalID : ['aliasID']} - const all_new_curies = this._combineCuriesIntoList(newCuries); - return _.intersection(curies, all_new_curies); - } - - getID(): string { - return this.id; - } - - getCurie(): string[] { - return this.curie; - } - - getEquivalentIDs(): SRIResolvedSet { - return this.equivalentIDs ?? {}; - } - - removeEquivalentID(id: string): void { - delete this.equivalentIDs[id]; - } - - getCategories(): string[] { - if (this.equivalentIDsUpdated) this.expandCategories(); - return this.expandedCategories; - } - - expandCategories(): void { - this.equivalentIDsUpdated = false; - if (this.hasEquivalentIDs() === false) { - const categories = utils.toArray(this.categories); - let expanded_categories = []; - categories.map((category) => { - expanded_categories = [ - ...expanded_categories, - ...(biolink.getDescendantClasses(utils.removeBioLinkPrefix(category)) || []), - ]; - }); - this.expandedCategories = utils.getUnique(expanded_categories); - return; - } - // let ancestors = new Set( - // utils - // .toArray(this.category) - // .map((category) => utils.removeBioLinkPrefix(category)) - // .reduce((arr, category) => [...arr, ...biolink.getAncestorClasses(category)], []) - // .filter((category) => !utils.toArray(this.category).includes(`biolink:${category}`)), - // ); - let categories = utils.toArray(this.categories).map((category) => utils.removeBioLinkPrefix(category)); - Object.values(this.equivalentIDs).map((entity) => { - categories = [...categories, ...entity.primaryTypes]; - }); - this.expandedCategories = utils.getUnique( - utils - .getUnique(categories) - .reduce((arr, category) => [...arr, ...(biolink.getDescendantClasses(category) || [])], []), - ); - // .filter(category => !ancestors.has(category)); - } - - getEntities(): SRIBioEntity[] { - return Object.values(this.equivalentIDs); - } - - getPrimaryIDs(): string[] { - return this.getEntities().map((entity) => entity.primaryID); - } - - setEquivalentIDs(equivalentIDs: SRIResolvedSet): void { - this.equivalentIDs = equivalentIDs; - this.equivalentIDsUpdated = true; - } - - updateEquivalentIDs(equivalentIDs: SRIResolvedSet): void { - if (this.equivalentIDs === undefined) { - this.equivalentIDs = equivalentIDs; - } else { - this.equivalentIDs = { ...this.equivalentIDs, ...equivalentIDs }; - } - this.equivalentIDsUpdated = true; - } - - hasInput(): boolean { - return !(this.curie === undefined || this.curie === null); - } - - hasEquivalentIDs(): boolean { - return !(typeof this.equivalentIDs === 'undefined'); - } - - getEntityCount(): number { - return this.curie ? this.curie.length : 0; - } -} diff --git a/src/results_assembly/query_results.ts b/src/results_assembly/query_results.ts index 51dff31c..73a4a67f 100644 --- a/src/results_assembly/query_results.ts +++ b/src/results_assembly/query_results.ts @@ -1,10 +1,9 @@ import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import { TrapiResult } from '@biothings-explorer/types'; +import { TrapiResult, Record } from '@biothings-explorer/types'; import Debug from 'debug'; import { zip } from 'lodash'; const debug = Debug('bte:biothings-explorer-trapi:QueryResult'); import { getScores, calculateScore, ScoreCombos } from './score'; -import { Record } from '@biothings-explorer/api-response-transform'; import { enrichTrapiResultsWithPfocrFigures } from './pfocr'; import * as config from '../config'; diff --git a/src/update_nodes.ts b/src/update_nodes.ts index c9c30f93..790f91fc 100644 --- a/src/update_nodes.ts +++ b/src/update_nodes.ts @@ -1,9 +1,6 @@ -import { resolveSRI } from 'biomedical_id_resolver'; import Debug from 'debug'; -import { ResolverInput, SRIResolverOutput } from 'biomedical_id_resolver'; -import { Record } from '@biothings-explorer/api-response-transform'; -import QEdge from './query_edge'; -import { NodeNormalizerResultObj } from '@biothings-explorer/api-response-transform'; +import { ResolverInput, SRIResolverOutput, SRIBioEntity, resolveSRI } from 'biomedical_id_resolver'; +import { Record, QEdge } from "@biothings-explorer/types"; const debug = Debug('bte:biothings-explorer-trapi:nodeUpdateHandler'); export interface CuriesByCategory { @@ -64,7 +61,7 @@ export default class NodesUpdateHandler { return; } - _createEquivalentIDsObject(record: Record): { [curie: string]: NodeNormalizerResultObj } { + _createEquivalentIDsObject(record: Record): { [curie: string]: SRIBioEntity } { if (record.object.normalizedInfo !== undefined) { return { [record.object.curie]: record.object.normalizedInfo, From 592759ebc2d57324dd098ed99ffda1e412976481 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Tue, 2 Jul 2024 15:21:04 -0700 Subject: [PATCH 07/18] update tests --- __test__/integration/KnowledgeGraph.test.ts | 2 +- .../integration/QEdge2BTEEdgeHandler.test.ts | 3 +- __test__/integration/QueryEdge.test.ts | 201 ------- .../integration/QueryGraphHandler.test.ts | 531 ------------------ __test__/integration/QueryNode.test.ts | 166 ------ __test__/integration/QueryResult.test.ts | 76 ++- .../integration/TRAPIQueryHandler.test.ts | 23 + __test__/integration/biolink.test.ts | 72 --- __test__/integration/graph/graph.test.ts | 2 +- __test__/unittest/QueryEdge.test.ts | 87 --- __test__/unittest/TRAPIQueryHandler.test.ts | 26 +- __test__/unittest/cacheHandler.test.ts | 3 +- __test__/unittest/inferred_mode.test.ts | 3 +- 13 files changed, 124 insertions(+), 1071 deletions(-) delete mode 100644 __test__/integration/QueryEdge.test.ts delete mode 100644 __test__/integration/QueryGraphHandler.test.ts delete mode 100644 __test__/integration/QueryNode.test.ts delete mode 100644 __test__/integration/biolink.test.ts delete mode 100644 __test__/unittest/QueryEdge.test.ts diff --git a/__test__/integration/KnowledgeGraph.test.ts b/__test__/integration/KnowledgeGraph.test.ts index c993c583..18c99a90 100644 --- a/__test__/integration/KnowledgeGraph.test.ts +++ b/__test__/integration/KnowledgeGraph.test.ts @@ -1,7 +1,7 @@ import KGEdge from '../../src/graph/kg_edge'; import KGNode from '../../src/graph/kg_node'; import KnowledgeGraph from '../../src/graph/knowledge_graph'; -import { TrapiAttribute } from '../../src/types'; +import { TrapiAttribute } from '@biothings-explorer/types'; describe('Testing KnowledgeGraph Module', () => { const nodeInput = new KGNode('PUBCHEM.COMPOUND:2662-n0', { diff --git a/__test__/integration/QEdge2BTEEdgeHandler.test.ts b/__test__/integration/QEdge2BTEEdgeHandler.test.ts index 9093fbce..6fa24ac6 100644 --- a/__test__/integration/QEdge2BTEEdgeHandler.test.ts +++ b/__test__/integration/QEdge2BTEEdgeHandler.test.ts @@ -1,8 +1,7 @@ jest.mock('axios'); import axios from 'axios'; -import QNode from '../../src/query_node'; -import QEdge from '../../src/query_edge'; +import { QNode, QEdge } from '@biothings-explorer/types'; import NodeUpdateHandler from '../../src/update_nodes'; describe('Testing NodeUpdateHandler Module', () => { diff --git a/__test__/integration/QueryEdge.test.ts b/__test__/integration/QueryEdge.test.ts deleted file mode 100644 index ff2c3195..00000000 --- a/__test__/integration/QueryEdge.test.ts +++ /dev/null @@ -1,201 +0,0 @@ -import QNode from '../../src/query_node'; -import QEdge from '../../src/query_edge'; - -describe('Testing QueryEdge Module', () => { - const gene_node1 = new QNode({ id: 'n1', categories: ['Gene'], ids: ['NCBIGene:1017'] }); - const type_node = new QNode({ id: 'n2', categories: ['SmallMolecule'] }); - const disease1_node = new QNode({ id: 'n1', categories: ['Disease'], ids: ['MONDO:000123'] }); - const node1_equivalent_ids = { - 'NCBIGene:1017': { - db_ids: { - NCBIGene: ['1017'], - SYMBOL: ['CDK2'], - }, - }, - }; - - const gene_node2 = new QNode({ id: 'n2', categories: ['Gene'], ids: ['NCBIGene:1017', 'NCBIGene:1018'] }); - const gene_node1_with_id_annotated = new QNode({ id: 'n1', categories: ['Gene'], ids: ['NCBIGene:1017'] }); - //@ts-expect-error: partial data for specific test scope - gene_node1_with_id_annotated.setEquivalentIDs(node1_equivalent_ids); - const chemical_node1 = new QNode({ id: 'n3', categories: ['SmallMolecule'] }); - const edge1 = new QEdge({ id: 'e01', subject: gene_node1, object: chemical_node1 }); - const edge2 = new QEdge({ id: 'e02', subject: gene_node1_with_id_annotated, object: chemical_node1 }); - const edge3 = new QEdge({ id: 'e04', subject: gene_node2, object: chemical_node1 }); - const edge4 = new QEdge({ id: 'e05', object: gene_node2, subject: chemical_node1 }); - const edge5 = new QEdge({ id: 'e06', object: gene_node1_with_id_annotated, subject: chemical_node1 }); - - describe('Testing isReversed function', () => { - test('test if only the object of the edge has curie defined, should return true', () => { - const res = edge4.isReversed(); - expect(res).toBeTruthy(); - }); - - test('test if the subject of the edge has curie defined, should return false', () => { - const res = edge1.isReversed(); - expect(res).toBeFalsy(); - }); - - test('test if both subject and object curie not defined, should return false', () => { - const node1 = new QNode({ id: 'n1', categories: ['Gene'] }); - const node2 = new QNode({ id: 'n2', categories: ['SmallMolecule'] }); - const edge = new QEdge({ id: 'e01', subject: node1, object: node2 }); - expect(edge.isReversed()).toBeFalsy(); - }); - }); - - describe('Testing getInputCurie function', () => { - test('test return an array of one curie if subject has only one curie specified', () => { - const res = edge1.getInputCurie(); - expect(res).toEqual(['NCBIGene:1017']); - }); - - test('test return an array of two curie if subject has only an array of two curies specified', () => { - const res = edge3.getInputCurie(); - expect(res).toEqual(['NCBIGene:1017', 'NCBIGene:1018']); - }); - - test('test return an array of two curies if edge is reversed and object has two curies specified', () => { - const res = edge4.getInputCurie(); - expect(res).toEqual(['NCBIGene:1017', 'NCBIGene:1018']); - }); - }); - - describe('Testing hasInput function', () => { - test('test return true if subject has only one curie specified', () => { - const res = edge1.hasInput(); - expect(res).toBeTruthy(); - }); - - test('test return true if subject has only an array of two curies specified', () => { - const res = edge3.hasInput(); - expect(res).toBeTruthy(); - }); - - test('test return true if subject has no curies specified but object does', () => { - const res = edge4.hasInput(); - expect(res).toBeTruthy(); - }); - - test('test return false if both subject and object has no curies specified', () => { - const node1 = new QNode({ id: 'n1', categories: ['Gene'] }); - const node2 = new QNode({ id: 'n2', categories: ['SmallMolecule'] }); - const edge = new QEdge({ id: 'e01', subject: node1, object: node2 }); - expect(edge.hasInput()).toBeFalsy(); - }); - }); - - // Removed because new QEdge has different implementation for hasInputResolved - // describe("Testing hasInputResolved function", () => { - // test("test return true if subject has input resolved", () => { - // const res = edge2.hasInputResolved(); - // expect(res).toBeTruthy(); - // }); - - // test("test return false if both subject and object do not have input resolved", () => { - // const res = edge1.hasInputResolved(); - // expect(res).toBeFalsy(); - // }); - - // test("test return true if subject doesn't have input resolved, but object does", () => { - // const res = edge5.hasInputResolved(); - // expect(res).toBeTruthy(); - // }); - - // }) - - describe('Testing getPredicate function', () => { - test('test get reverse predicate if query is reversed', () => { - const edge = new QEdge({ id: 'e01', subject: type_node, object: disease1_node, predicates: ['biolink:treats'] }); - const res = edge.getPredicate(); - expect(res).toContain('treated_by'); - }); - - test('test get reverse predicate if query is reversed and expanded', () => { - const edge = new QEdge({ id: 'e01', subject: type_node, object: disease1_node, predicates: ['biolink:affects'] }); - const res = edge.getPredicate(); - expect(res).toContain('affected_by'); - expect(res).toContain('disrupted_by'); - }); - }); - - describe('Testing expandPredicates function', () => { - test('All predicates are correctly expanded if in biolink model', () => { - const edge = new QEdge({ - id: 'e01', - subject: type_node, - object: disease1_node, - predicates: ['biolink:contributes_to'], - }); - const res = edge.expandPredicates(['contributes_to']); - expect(res).toContain('contributes_to'); - expect(res).toContain('causes'); - }); - - test('Multiple predicates can be resolved', () => { - const edge = new QEdge({ - id: 'e01', - subject: type_node, - object: disease1_node, - predicates: ['biolink:contributes_to'], - }); - const res = edge.expandPredicates(['contributes_to', 'ameliorates']); - expect(res).toContain('contributes_to'); - expect(res).toContain('causes'); - expect(res).toContain('ameliorates'); - expect(res).toContain('treats'); - }); - - test('Predicates not in biolink model should return itself', () => { - const edge = new QEdge({ - id: 'e01', - subject: type_node, - object: disease1_node, - predicates: ['biolink:contributes_to'], - }); - const res = edge.expandPredicates(['contributes_to', 'amelio']); - expect(res).toContain('contributes_to'); - expect(res).toContain('causes'); - expect(res).toContain('amelio'); - }); - }); - - describe('chooseLowerEntityValue', () => { - test('Should reverse if subject has more curies', () => { - const qEdgeClone = new QEdge(edge1.freeze()); - qEdgeClone.subject.entity_count = 2; - qEdgeClone.object.entity_count = 1; - - qEdgeClone.chooseLowerEntityValue(); - - expect(qEdgeClone.isReversed()).toBeTruthy(); - }); - - test("Shouldn't reverse if object has more curies", () => { - const qEdgeClone = new QEdge(edge1.freeze()); - qEdgeClone.subject.entity_count = 1; - qEdgeClone.object.entity_count = 2; - - qEdgeClone.chooseLowerEntityValue(); - - expect(qEdgeClone.isReversed()).toBeFalsy(); - }); - - test("Shouldn't reverse if both have same number", () => { - const qEdgeClone = new QEdge(edge1.freeze()); - qEdgeClone.subject.entity_count = 2; - qEdgeClone.object.entity_count = 2; - - qEdgeClone.chooseLowerEntityValue(); - - expect(qEdgeClone.isReversed()).toBeFalsy(); - }); - }); - - test('getHashedEdgeRepresentation', () => { - const qEdge1 = new QEdge({ id: 'e01', subject: type_node, object: disease1_node, predicates: ['biolink:treats'] }); - const qEdge2 = new QEdge(qEdge1.freeze(), true); - // NOTE: recently changed from not.toEqual, because an unfrozen edge *should* equal its original? - expect(qEdge1.getHashedEdgeRepresentation()).toEqual(qEdge2.getHashedEdgeRepresentation()); - }); -}); diff --git a/__test__/integration/QueryGraphHandler.test.ts b/__test__/integration/QueryGraphHandler.test.ts deleted file mode 100644 index d4f7278c..00000000 --- a/__test__/integration/QueryGraphHandler.test.ts +++ /dev/null @@ -1,531 +0,0 @@ -jest.mock('axios'); -import axios from 'axios'; - -import QueryGraphHandler from '../../src/query_graph'; -import QNode2 from '../../src/query_node'; -import QEdge from '../../src/query_edge'; -import InvalidQueryGraphError from '../../src/exceptions/invalid_query_graph_error'; - -describe('Testing QueryGraphHandler Module', () => { - const disease_entity_node = { - categories: ['biolink:Disease'], - ids: ['MONDO:0005737'], - }; - const gene_entity_node = { - categories: ['biolink:Gene'], - ids: ['NCBIGene:1017'], - }; - const gene_class_node = { - categories: ['biolink:Gene'], - }; - const chemical_class_node = { - categories: ['biolink:SmallMolecule'], - }; - const pathway_class_node = { - categories: ['biolink:Pathways'], - }; - const phenotype_class_node = { - categories: ['biolink:Phenotype'], - }; - const OneHopQuery = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - }, - }; - - const ThreeHopExplainQuery = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - n2: chemical_class_node, - n3: gene_entity_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - e02: { - subject: 'n1', - object: 'n2', - }, - e03: { - subject: 'n2', - object: 'n3', - }, - }, - }; - - const FourHopQuery = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - n2: chemical_class_node, - n3: phenotype_class_node, - n4: pathway_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - e02: { - subject: 'n1', - object: 'n2', - }, - e03: { - subject: 'n2', - object: 'n3', - }, - e04: { - subject: 'n3', - object: 'n4', - }, - }, - }; - - const QueryWithCycle1 = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - n2: chemical_class_node, - n3: phenotype_class_node, - n4: pathway_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - e02: { - subject: 'n1', - object: 'n2', - }, - e03: { - subject: 'n2', - object: 'n3', - }, - e04: { - subject: 'n3', - object: 'n4', - }, - e05: { - subject: 'n4', - object: 'n1', - }, - }, - }; - - const QueryWithCycle2 = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - n2: chemical_class_node, - n3: phenotype_class_node, - n4: pathway_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - e02: { - subject: 'n1', - object: 'n2', - }, - e03: { - subject: 'n2', - object: 'n3', - }, - e04: { - subject: 'n3', - object: 'n4', - }, - e05: { - subject: 'n4', - object: 'n1', - }, - }, - }; - - const QueryWithDuplicateEdge1 = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - e02: { - subject: 'n1', - object: 'n0', - }, - }, - }; - - const QueryWithNullValues = { - nodes: { - n0: { - ...disease_entity_node, - categories: null, - }, - n1: { - ...gene_class_node, - ids: null, - }, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - }, - }; - - const QueryWithNullPredicate = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - predicate: null, - }, - }, - }; - - const QueryWithNullIds = { - nodes: { - n0: { - ...disease_entity_node, - ids: [], - }, - n1: { - ...gene_class_node, - ids: null, - }, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - }, - }; - - describe('test _storeNodes function', () => { - test('test if storeNodes with one hop query', async () => { - (axios.post as jest.Mock).mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(OneHopQuery, undefined); - //@ts-expect-error: explicitly testing private method - const nodes = await handler._storeNodes(); - expect(nodes).toHaveProperty('n0'); - expect(nodes).not.toHaveProperty('n2'); - expect(nodes.n0).toBeInstanceOf(QNode2); - }); - - test('test if storeNodes with multi hop query', async () => { - (axios.post as jest.Mock).mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(FourHopQuery, undefined); - //@ts-expect-error: explicitly testing private method - const nodes = await handler._storeNodes(); - expect(nodes).toHaveProperty('n0'); - expect(nodes).toHaveProperty('n3'); - expect(nodes.n0).toBeInstanceOf(QNode2); - expect(nodes.n3).toBeInstanceOf(QNode2); - }); - }); - - describe('test calculateEdges function', () => { - test('test storeEdges with one hop query', async () => { - (axios.post as jest.Mock).mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(OneHopQuery, undefined); - await handler.calculateEdges(); - expect(handler.edges).toHaveProperty('e01'); - expect(handler.edges).not.toHaveProperty('e02'); - expect(handler.edges.e01).toBeInstanceOf(QEdge); - expect(handler.edges.e01.getInputNode()).toBeInstanceOf(QNode2); - }); - }); - - describe('test _createQueryPaths function', () => { - test('test createQueryPaths with three hop explain query', async () => { - (axios.post as jest.Mock) - .mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }) - .mockResolvedValueOnce({ - data: { - 'NCBIGene:1017': { - id: { identifier: 'NCBIGene:1017', label: 'CDK2' }, - equivalent_identifiers: [ - { identifier: 'NCBIGene:1017', label: 'CDK2' }, - { identifier: 'ENSEMBL:ENSG00000123374' }, - { identifier: 'HGNC:1771', label: 'CDK2' }, - { identifier: 'OMIM:116953' }, - { identifier: 'UMLS:C1332733', label: 'CDK2 gene' }, - { - identifier: 'UniProtKB:A0A024RB10', - label: 'A0A024RB10_HUMAN Cyclin-dependent kinase 2, isoform CRA_a (trembl)', - }, - { - identifier: 'UniProtKB:A0A024RB77', - label: 'A0A024RB77_HUMAN Cyclin-dependent kinase 2, isoform CRA_b (trembl)', - }, - { - identifier: 'UniProtKB:B4DDL9', - label: - 'B4DDL9_HUMAN cDNA FLJ54979, highly similar to Homo sapiens cyclin-dependent kinase 2 (CDK2), transcript variant 2, mRNA (trembl)', - }, - { identifier: 'UniProtKB:E7ESI2', label: 'E7ESI2_HUMAN Cyclin-dependent kinase 2 (trembl)' }, - { identifier: 'ENSEMBL:ENSP00000393605' }, - { identifier: 'UniProtKB:G3V5T9', label: 'G3V5T9_HUMAN Cyclin-dependent kinase 2 (trembl)' }, - { identifier: 'ENSEMBL:ENSP00000452514' }, - { identifier: 'UniProtKB:P24941', label: 'CDK2_HUMAN Cyclin-dependent kinase 2 (sprot)' }, - { identifier: 'PR:P24941', label: 'cyclin-dependent kinase 2 (human)' }, - { identifier: 'UMLS:C0108855', label: 'CDK2 protein, human' }, - ], - type: [ - 'biolink:Gene', - 'biolink:GeneOrGeneProduct', - 'biolink:GenomicEntity', - 'biolink:ChemicalEntityOrGeneOrGeneProduct', - 'biolink:PhysicalEssence', - 'biolink:OntologyClass', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - 'biolink:PhysicalEssenceOrOccurrent', - 'biolink:ThingWithTaxon', - 'biolink:MacromolecularMachineMixin', - 'biolink:Protein', - 'biolink:GeneProductMixin', - 'biolink:Polypeptide', - 'biolink:ChemicalEntityOrProteinOrPolypeptide', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(ThreeHopExplainQuery, undefined); - const edges = await handler.calculateEdges(); - expect(Object.keys(edges)).toHaveLength(3); - }); - }); - describe('test cycle/duplicate edge detection for query graphs', () => { - test('Duplicate Edge Graph #1', async () => { - const handler = new QueryGraphHandler(QueryWithDuplicateEdge1, undefined); - await expect(handler.calculateEdges()).rejects.toThrow(InvalidQueryGraphError); - }); - test('Query Graph Cycle #1', async () => { - const handler = new QueryGraphHandler(QueryWithCycle1, undefined); - await expect(handler.calculateEdges()).rejects.toThrow(InvalidQueryGraphError); - }); - test('Query Graph Cycle #2', async () => { - const handler = new QueryGraphHandler(QueryWithCycle2, undefined); - await expect(handler.calculateEdges()).rejects.toThrow(InvalidQueryGraphError); - }); - }); - - describe('test chandling of null ids / categories / predicates', () => { - test('Null id/categories graph', async () => { - (axios.post as jest.Mock).mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(QueryWithNullValues, undefined); - await expect(handler.calculateEdges()).resolves.not.toThrow(); - }); - test('Null predicate graph', async () => { - (axios.post as jest.Mock).mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(QueryWithNullPredicate, undefined); - const edges = await handler.calculateEdges(); - // if this is undefined (not null) then smartapi-kg treats as if the field doesn't exist (desired behavior) - expect(edges[0].getPredicate()).toBe(undefined); - }); - test('Graph without any ids', async () => { - const handler = new QueryGraphHandler(QueryWithNullIds, undefined); - expect(handler.calculateEdges()).rejects.toThrow(InvalidQueryGraphError); - }); - }); -}); diff --git a/__test__/integration/QueryNode.test.ts b/__test__/integration/QueryNode.test.ts deleted file mode 100644 index 28ac0c13..00000000 --- a/__test__/integration/QueryNode.test.ts +++ /dev/null @@ -1,166 +0,0 @@ -import { SRIBioEntity } from 'biomedical_id_resolver/built/common/types'; -import QNode from '../../src/query_node'; - -describe('Testing QueryNode Module', () => { - const node1_equivalent_ids = { - 'NCBIGene:1017': { - primaryID: 'NCBIGene:1017', - equivalentIDs: ['NCBIGene:1017'], - label: 'CDK2', - labelAliases: ['CDK2'], - primaryTypes: ['Gene'], - semanticTypes: ['Gene'], - db_ids: { - NCBIGene: ['1017'], - SYMBOL: ['CDK2'], - }, - }, - }; - - describe('Testing hasInput function', () => { - test('test node without curies specified should return false', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - const res = gene_node.hasInput(); - expect(res).toBeFalsy(); - }); - - test('test node with curies specified should return true', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'], ids: ['NCBIGene:1017'] }); - const res = gene_node.hasInput(); - expect(res).toBeTruthy(); - }); - }); - - describe('Test hasEquivalentIDs function', () => { - test('test node with equivalent identifiers set should return true', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - gene_node.setEquivalentIDs(node1_equivalent_ids); - const res = gene_node.hasEquivalentIDs(); - expect(res).toBeTruthy(); - }); - - test('test node with equivalent identifiers not set should return false', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - const res = gene_node.hasEquivalentIDs(); - expect(res).toBeFalsy(); - }); - }); - - describe('Test getEntities', () => { - test('If equivalent ids are empty, should return an empty array', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - gene_node.equivalentIDs = {}; - expect(gene_node.getEntities()).toEqual([]); - }); - - test('If equivalent ids are not empty, should return an array of bioentities', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - gene_node.equivalentIDs = { - //@ts-expect-error: partial data for specific test - A: { - primaryID: 'a', - equivalentIDs: ['b', 'c'], - }, - //@ts-expect-error: partial data for specific test - B: { - primaryID: 'd', - equivalentIDs: ['e'], - }, - }; - expect(gene_node.getEntities()).toEqual([ - { - primaryID: 'a', - equivalentIDs: ['b', 'c'], - }, - { - primaryID: 'd', - equivalentIDs: ['e'], - }, - ]); - }); - }); - - describe('Test getPrimaryIDs', () => { - test('If equivalent ids are empty, should return an empty array', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - gene_node.equivalentIDs = {}; - expect(gene_node.getPrimaryIDs()).toEqual([]); - }); - - test('If equivalent ids are not empty, should return an array of primaryIDs', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - gene_node.equivalentIDs = { - //@ts-expect-error: partial data for specific test - A: { - primaryID: 'a', - equivalentIDs: ['b', 'c'], - }, - //@ts-expect-error: partial data for specific test - B: { - primaryID: 'd', - equivalentIDs: ['e'], - }, - }; - expect(gene_node.getPrimaryIDs()).toEqual(['a', 'd']); - }); - }); - - describe('Test updateEquivalentIDs', () => { - test('If equivalent ids does not exist, should set it with the input', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - //@ts-expect-error: partial data for specific test - gene_node.updateEquivalentIDs({ a: 'b' }); - expect(gene_node.equivalentIDs).toEqual({ a: 'b' }); - }); - - test('If equivalent ids are not empty, should update the equivalent ids', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - //@ts-expect-error: partial data for specific test - gene_node.equivalentIDs = { a: 'b', c: 'd' }; - //@ts-expect-error: partial data for specific test - gene_node.updateEquivalentIDs({ e: 'f' }); - expect(gene_node.getEquivalentIDs()).toEqual({ a: 'b', c: 'd', e: 'f' }); - }); - }); - - describe('Test getCategories function', () => { - test('If equivalent ids are empty, return itself and its descendants', () => { - const node = new QNode({ id: 'n1', categories: ['DiseaseOrPhenotypicFeature'] }); - expect(node.getCategories()).toContain('Disease'); - expect(node.getCategories()).toContain('PhenotypicFeature'); - expect(node.getCategories()).toContain('DiseaseOrPhenotypicFeature'); - }); - - test('If equivalent ids are empty, return itself and its descendants using NamedThing as example', () => { - const node = new QNode({ id: 'n1', categories: ['NamedThing'] }); - expect(node.getCategories()).toContain('Disease'); - expect(node.getCategories()).toContain('PhenotypicFeature'); - expect(node.getCategories()).toContain('DiseaseOrPhenotypicFeature'); - expect(node.getCategories()).toContain('Gene'); - expect(node.getCategories()).toContain('NamedThing'); - }); - - test('If equivalent ids are empty, return itself and its descendants using Gene as example', () => { - const node = new QNode({ id: 'n1', categories: ['Gene'] }); - expect(node.getCategories()).toEqual(['Gene']); - }); - - test('If equivalent ids are not empty, return all primary semantic types defined in equivalent entities', () => { - const node = new QNode({ id: 'n1', categories: ['Gene'] }); - node.setEquivalentIDs({ - //@ts-expect-error: partial data for specific test - A: { - primaryTypes: ['m', 'p'], - semanticTypes: ['m', 'n', 'p', 'q'], - }, - //@ts-expect-error: partial data for specific test - B: { - primaryTypes: ['x'], - semanticTypes: ['x', 'y'], - }, - }); - // console.log(node.getCategories()); - expect(node.getCategories()).toEqual(['Gene', 'm', 'p', 'x']); - }); - }); -}); diff --git a/__test__/integration/QueryResult.test.ts b/__test__/integration/QueryResult.test.ts index 2fb8cf0a..1390e0d6 100644 --- a/__test__/integration/QueryResult.test.ts +++ b/__test__/integration/QueryResult.test.ts @@ -1,8 +1,7 @@ import { cloneDeep, range } from 'lodash'; -import QNode from '../../src/query_node'; -import QEdge from '../../src/query_edge'; +import { QNode, QEdge } from '@biothings-explorer/types'; import QueryResult from '../../src/results_assembly/query_results'; -import { Record } from '@biothings-explorer/api-response-transform'; +import { Record } from '@biothings-explorer/types'; import { EDGE_ATTRIBUTES_USED_IN_RECORD_HASH } from '../../src/config'; describe('Testing QueryResults Module', () => { @@ -41,8 +40,11 @@ describe('Testing QueryResults Module', () => { EDGE_ATTRIBUTES_USED_IN_RECORD_HASH, { predicate: 'biolink:physically_interacts_with', - source: 'DGIdb', api_name: 'BioThings DGIDB API', + source: 'BioThings DGIDB API', + "x-translator": { + infores: "infores:dgidb", + }, }, edge1, ); @@ -98,6 +100,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:gene_associated_with_condition', api_name: 'Automat Pharos', + source: 'Automat Pharos', + "x-translator": { + infores: "infores:automat", + }, }, edge1, ); @@ -128,6 +134,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -197,6 +207,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:gene_associated_with_condition', api_name: 'Automat Pharos', + source: 'Automat Pharos', + "x-translator": { + infores: "infores:automat", + }, }, edge1, ); @@ -227,6 +241,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -295,6 +313,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:gene_associated_with_condition', api_name: 'Automat Pharos', + source: 'Automat Pharos', + "x-translator": { + infores: "infores:automat", + }, }, edge1, ); @@ -325,6 +347,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -393,6 +419,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:gene_associated_with_condition', api_name: 'Automat Pharos', + source: 'Automat Pharos', + "x-translator": { + infores: "infores:automat", + }, }, edge1, ); @@ -425,6 +455,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -493,6 +527,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:gene_associated_with_condition', api_name: 'Automat Pharos', + source: 'Automat Pharos', + "x-translator": { + infores: "infores:automat", + }, }, edge1, ); @@ -525,6 +563,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -594,6 +636,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:gene_associated_with_condition', api_name: 'Automat Pharos', + source: 'Automat Pharos', + "x-translator": { + infores: "infores:automat", + }, }, edge1, ); @@ -624,6 +670,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -654,6 +704,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -724,7 +778,7 @@ describe('Testing QueryResults Module', () => { const [source0, source1] = Array(2) .fill(0) .map((s, i) => { - return { source: `source${i}` }; + return { metaEdgeSource: `source${i}`, apiInforesCurie: `infores:source${i}` }; }); const [api0, api1] = Array(2) .fill(0) @@ -1868,6 +1922,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record0_pred0', source: 'source0', api_name: 'api0', + source: 'api0', }, // n0 subject: { @@ -1894,6 +1949,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record1_pred0', source: 'source1', api_name: 'api1', + source: 'api1', }, // n1 subject: { @@ -1976,6 +2032,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record0_pred0', source: 'source0', api_name: 'api0', + source: 'api0', }, // n0 subject: { @@ -2002,6 +2059,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record1_pred0', source: 'source1', api_name: 'api1', + source: 'api1', }, // n1 subject: { @@ -2031,6 +2089,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record0_pred0', source: 'source0', api_name: 'api0', + source: 'api0', }, // n0 subject: { @@ -2057,6 +2116,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record1_pred0', source: 'source1', api_name: 'api1', + source: 'api1', }, // n1 subject: { @@ -2138,6 +2198,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record0_pred0', source: 'source0', api_name: 'api0', + source: 'api0', }, // n0 subject: { @@ -2164,6 +2225,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record1_pred0', source: 'source1', api_name: 'api1', + source: 'api1', }, // n1 subject: { @@ -2975,6 +3037,8 @@ describe('Testing QueryResults Module', () => { ], gene_symbol: 'HRAS', }, + metaEdgeSource: 'bioplanet', + apiInforesCurie: 'infores:bioplanet' }); const record2 = new Record({ subject: { @@ -3009,6 +3073,8 @@ describe('Testing QueryResults Module', () => { ], gene_symbol: 'HRAS', }, + metaEdgeSource: 'bioplanet', + apiInforesCurie: 'infores:bioplanet' }); const queryResult = new QueryResult({ provenanceUsesServiceProvider: false }); await queryResult.update({ diff --git a/__test__/integration/TRAPIQueryHandler.test.ts b/__test__/integration/TRAPIQueryHandler.test.ts index 425fd3cc..0c29d247 100644 --- a/__test__/integration/TRAPIQueryHandler.test.ts +++ b/__test__/integration/TRAPIQueryHandler.test.ts @@ -4,6 +4,8 @@ const mockedAxios = axios as jest.Mocked; import TRAPIQueryHandler from '../../src/index'; import path from 'path'; +import { Subquery, SubqueryRelay } from '@biothings-explorer/call-apis'; +import { MessageChannel } from 'worker_threads'; describe('Testing TRAPIQueryHandler Module', () => { const disease_entity_node = { @@ -25,6 +27,27 @@ describe('Testing TRAPIQueryHandler Module', () => { }, }, }; + + beforeAll(async () => { + const subqueryRelay = new SubqueryRelay(); + const { port1: toWorker, port2: fromWorker } = new MessageChannel(); + global.parentPort = toWorker; + fromWorker.on("message", async (msg: any) => { + const { queries, options } = msg.value + subqueryRelay.subscribe( + await Promise.all(queries.map(async query => await Subquery.unfreeze(query))), + options, + ({ hash, records, logs, apiUnavailable }) => { + fromWorker.postMessage({ + threadId: 0, + type: "subQueryResult", + value: { hash, records, logs, apiUnavailable }, + }); + }, + ); + }); + }); + describe('Testing query function', () => { test('test with one query edge', async () => { (mockedAxios.get as jest.Mock).mockResolvedValue({ diff --git a/__test__/integration/biolink.test.ts b/__test__/integration/biolink.test.ts deleted file mode 100644 index be391761..00000000 --- a/__test__/integration/biolink.test.ts +++ /dev/null @@ -1,72 +0,0 @@ -import biolink from '../../src/biolink'; - -describe('Test BioLinkModel class', () => { - test('test reverse with correct predicate', () => { - const res = biolink.reverse('treats'); - expect(res).toBe('treated_by'); - }); - - test('test reverse with correct predicate if it contains underscore', () => { - const res = biolink.reverse('treated_by'); - expect(res).toBe('treats'); - }); - - test('test reverse with predicate having symmetric equal to true', () => { - const res = biolink.reverse('correlated_with'); - expect(res).toBe('correlated_with'); - }); - - test('test predicate with no inverse property and symmetric not equal to true', () => { - const res = biolink.reverse('has_phenotype'); - expect(res).toBe('phenotype_of'); - }); - - test('test predicate not exist in biolink model', () => { - const res = biolink.reverse('haha'); - expect(res).toBeUndefined(); - }); - - test('if input not string, return undefined', () => { - //@ts-expect-error: Explicitly testing for wrong type - const res = biolink.reverse(['dd']); - expect(res).toBeUndefined(); - }); - - describe('Test getDescendants function', () => { - test('if input is in biolink model, return all its desendants and itself', () => { - const res = biolink.getDescendantClasses('MolecularEntity'); - expect(res).toContain('SmallMolecule'); - expect(res).toContain('NucleicAcidEntity'); - expect(res).toContain('MolecularEntity'); - }); - - test("if input is in biolink model but doesn't have descendants, return itself", () => { - const res = biolink.getDescendantClasses('Gene'); - expect(res).toEqual(['Gene']); - }); - - test('if input is not in biolink, return itself', () => { - const res = biolink.getDescendantClasses('Gene1'); - expect(res).toEqual('Gene1'); - }); - }); - - describe('Test getDescendantPredicates function', () => { - test('if input is in biolink model, return all its desendants and itself', () => { - const res = biolink.getDescendantPredicates('related_to'); - expect(res).toContain('subclass_of'); - expect(res).toContain('superclass_of'); - expect(res).toContain('related_to'); - }); - - test("if input is in biolink model but doesn't have descendants, return itself", () => { - const res = biolink.getDescendantPredicates('subclass_of'); - expect(res).toEqual(['subclass_of']); - }); - - test('if input is not in biolink, return itself', () => { - const res = biolink.getDescendantPredicates('Gene1'); - expect(res).toEqual(['Gene1']); - }); - }); -}); diff --git a/__test__/integration/graph/graph.test.ts b/__test__/integration/graph/graph.test.ts index fcdb2a4c..435f7596 100644 --- a/__test__/integration/graph/graph.test.ts +++ b/__test__/integration/graph/graph.test.ts @@ -1,5 +1,5 @@ import graph from '../../../src/graph/graph'; -import { Record } from '@biothings-explorer/api-response-transform'; +import { Record } from '@biothings-explorer/types'; describe('Test graph class', () => { const qNode1 = { diff --git a/__test__/unittest/QueryEdge.test.ts b/__test__/unittest/QueryEdge.test.ts deleted file mode 100644 index b68c8648..00000000 --- a/__test__/unittest/QueryEdge.test.ts +++ /dev/null @@ -1,87 +0,0 @@ -import QEdge from '../../src/query_edge'; -import QNode from '../../src/query_node'; - -function basicQEdge({ - predicates, - subjectIds, - objectIds, - reverse = false, -}: { predicates?: string[]; subjectIds?: string[]; objectIds?: string[]; reverse?: boolean } = {}): QEdge { - return new QEdge({ - id: 'e01', - predicates, - subject: new QNode({ - id: 'n01', - ids: subjectIds, - }), - object: new QNode({ - id: 'n02', - ids: objectIds, - }), - }); -} - -describe('Test QEdge class', () => { - describe('Test getPredicate function', () => { - test('Non reversed edge should return predicates itself', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats'], - }); - const res = edge.getPredicate(); - expect(res).toContain('treats'); - }); - - test('Undefined predicate should return itself', () => { - const edge = basicQEdge(); - const res = edge.getPredicate(); - expect(res).toBeUndefined; - }); - - test('An array of non-undefined predicates should return itself', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats', 'biolink:targets'], - }); - const res = edge.getPredicate(); - expect(res).toContain('treats'); - expect(res).toContain('targets'); - }); - - test('An array of non-undefined predicates with reverse edge should exclude return value if undefined', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats', 'biolink:targets'], - objectIds: ['yes'], - }); - const res = edge.getPredicate(); - expect(res).toContain('treated_by'); - }); - - test('An array of non-undefined predicates with reverse edge should return reversed predicates if not undefined', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats', 'biolink:targets'], - objectIds: ['yes'], - }); - const res = edge.getPredicate(); - expect(res).toContain('treated_by'); - }); - }); - - describe('Test getOutputNode function', () => { - test('reversed edge should return the subject', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats', 'biolink:targets'], - objectIds: ['yes'], - }); - const res = edge.getOutputNode(); - expect(res.id).toEqual('n01'); - }); - - test('non reversed edge should return the object', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats', 'biolink:targets'], - subjectIds: ['yes'], - }); - const res = edge.getOutputNode(); - expect(res.id).toEqual('n02'); - }); - }); -}); diff --git a/__test__/unittest/TRAPIQueryHandler.test.ts b/__test__/unittest/TRAPIQueryHandler.test.ts index c042caf2..ec2630ec 100644 --- a/__test__/unittest/TRAPIQueryHandler.test.ts +++ b/__test__/unittest/TRAPIQueryHandler.test.ts @@ -1,11 +1,13 @@ -import { Record } from '@biothings-explorer/api-response-transform'; +import { Record, InvalidQueryGraphError } from '@biothings-explorer/types'; import path from 'path'; import fs from 'fs'; import _ from 'lodash'; const AxiosActual = jest.requireActual('axios'); import InferredQueryHandler from '../../src/inferred_mode/inferred_mode'; -import TRAPIQueryHandler, { InvalidQueryGraphError } from '../../src/index'; +import TRAPIQueryHandler from '../../src/index'; import axios from 'axios'; +import { MessageChannel } from 'worker_threads'; +import { Subquery, SubqueryRelay } from '@biothings-explorer/call-apis'; jest.mock('../../src/inferred_mode/inferred_mode'); jest.mock('axios'); @@ -17,6 +19,26 @@ const records = Record.unfreezeRecords( describe('test TRAPIQueryHandler methods', () => { const OLD_ENV = process.env; + beforeAll(async () => { + const subqueryRelay = new SubqueryRelay(); + const { port1: toWorker, port2: fromWorker } = new MessageChannel(); + global.parentPort = toWorker; + fromWorker.on("message", async (msg: any) => { + const { queries, options } = msg.value + subqueryRelay.subscribe( + await Promise.all(queries.map(async query => await Subquery.unfreeze(query))), + options, + ({ hash, records, logs, apiUnavailable }) => { + fromWorker.postMessage({ + threadId: 0, + type: "subQueryResult", + value: { hash, records, logs, apiUnavailable }, + }); + }, + ); + }); + }); + beforeEach(() => { jest.resetAllMocks(); jest.resetModules(); diff --git a/__test__/unittest/cacheHandler.test.ts b/__test__/unittest/cacheHandler.test.ts index 621401ae..99f8caa2 100644 --- a/__test__/unittest/cacheHandler.test.ts +++ b/__test__/unittest/cacheHandler.test.ts @@ -1,11 +1,10 @@ import fs from 'fs'; import path from 'path'; import { Readable } from 'stream'; -import { Record } from '@biothings-explorer/api-response-transform'; +import { Record, QEdge } from '@biothings-explorer/types'; import RedisMock from 'ioredis-mock'; import RealCacheHandler from '../../src/cache_handler'; import MetaKG from '@biothings-explorer/smartapi-kg'; -import QEdge from '../../src/query_edge'; const qEdges = JSON.parse(fs.readFileSync(path.resolve(__dirname, '../data/qEdges.json'), { encoding: 'utf8' })); diff --git a/__test__/unittest/inferred_mode.test.ts b/__test__/unittest/inferred_mode.test.ts index 3aedd136..7f452e47 100644 --- a/__test__/unittest/inferred_mode.test.ts +++ b/__test__/unittest/inferred_mode.test.ts @@ -1,4 +1,5 @@ -import TRAPIQueryHandler, { TrapiQueryGraph, TrapiResponse, TrapiResult } from '../../src/index'; +import TRAPIQueryHandler from '../../src/index'; +import { TrapiQueryGraph, TrapiResponse, TrapiResult } from '@biothings-explorer/types'; import path from 'path'; import fs from 'fs'; const smartAPIPAth = path.resolve(__dirname, '../../../bte-trapi/data/smartapi_specs.json'); From 67a55df7d41b825d614fd3daae3f95a50f574215 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Thu, 18 Jul 2024 15:13:47 -0700 Subject: [PATCH 08/18] retry for pnpm install test --- .github/workflows/test_ws_codecov.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_ws_codecov.yml b/.github/workflows/test_ws_codecov.yml index 54c3c6a7..fbf7c9d9 100644 --- a/.github/workflows/test_ws_codecov.yml +++ b/.github/workflows/test_ws_codecov.yml @@ -32,7 +32,7 @@ jobs: run: | pnpm run clone pnpm run git checkout ${{ steps.branch-name.outputs.current_branch }} - pnpm i + pnpm i || pnpm i pnpm --filter query_graph_handler test-cov - name: Send coverage report to codecov for visualization From fe0fd8bc721d4b0858b350dba92139fb645e5ff6 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Mon, 29 Jul 2024 11:22:55 -0700 Subject: [PATCH 09/18] change to parentSide/workerSide --- .../integration/TRAPIQueryHandler.test.ts | 8 ++++---- __test__/unittest/TRAPIQueryHandler.test.ts | 8 ++++---- src/batch_edge_query.ts | 9 +++++---- src/cache_handler.ts | 20 +++++++++---------- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/__test__/integration/TRAPIQueryHandler.test.ts b/__test__/integration/TRAPIQueryHandler.test.ts index 0c29d247..32275e84 100644 --- a/__test__/integration/TRAPIQueryHandler.test.ts +++ b/__test__/integration/TRAPIQueryHandler.test.ts @@ -30,15 +30,15 @@ describe('Testing TRAPIQueryHandler Module', () => { beforeAll(async () => { const subqueryRelay = new SubqueryRelay(); - const { port1: toWorker, port2: fromWorker } = new MessageChannel(); - global.parentPort = toWorker; - fromWorker.on("message", async (msg: any) => { + const { port1: workerSide, port2: parentSide } = new MessageChannel(); + global.workerSide = workerSide; + parentSide.on("message", async (msg: any) => { const { queries, options } = msg.value subqueryRelay.subscribe( await Promise.all(queries.map(async query => await Subquery.unfreeze(query))), options, ({ hash, records, logs, apiUnavailable }) => { - fromWorker.postMessage({ + parentSide.postMessage({ threadId: 0, type: "subQueryResult", value: { hash, records, logs, apiUnavailable }, diff --git a/__test__/unittest/TRAPIQueryHandler.test.ts b/__test__/unittest/TRAPIQueryHandler.test.ts index ec2630ec..7cf2e8c1 100644 --- a/__test__/unittest/TRAPIQueryHandler.test.ts +++ b/__test__/unittest/TRAPIQueryHandler.test.ts @@ -21,15 +21,15 @@ describe('test TRAPIQueryHandler methods', () => { beforeAll(async () => { const subqueryRelay = new SubqueryRelay(); - const { port1: toWorker, port2: fromWorker } = new MessageChannel(); - global.parentPort = toWorker; - fromWorker.on("message", async (msg: any) => { + const { port1: workerSide, port2: parentSide } = new MessageChannel(); + global.workerSide = workerSide; + parentSide.on("message", async (msg: any) => { const { queries, options } = msg.value subqueryRelay.subscribe( await Promise.all(queries.map(async query => await Subquery.unfreeze(query))), options, ({ hash, records, logs, apiUnavailable }) => { - fromWorker.postMessage({ + parentSide.postMessage({ threadId: 0, type: "subQueryResult", value: { hash, records, logs, apiUnavailable }, diff --git a/src/batch_edge_query.ts b/src/batch_edge_query.ts index a69f434f..1894bc43 100644 --- a/src/batch_edge_query.ts +++ b/src/batch_edge_query.ts @@ -102,17 +102,18 @@ export default class BatchEdgeQueryHandler { if (finishedCount >= queries.length) { debug(`Total number of records returned for qEdge ${qEdge.id} is ${completedRecords.length}`); resolve(completedRecords); - global.parentPort.off('message', listener); // Clean up + global.workerSide.off('message', listener); // Clean up } } - global.parentPort.on('message', listener); - global.parentPort.postMessage({ + global.workerSide.on('message', listener); + global.workerSide.postMessage({ + threadId, type: 'subqueryRequest', value: { queries: queries.map((query) => query.freeze()), options: this.options, }, - }); + } satisfies ThreadMessage); }); } diff --git a/src/cache_handler.ts b/src/cache_handler.ts index 3dbd80be..bec9fef8 100644 --- a/src/cache_handler.ts +++ b/src/cache_handler.ts @@ -210,13 +210,13 @@ export default class CacheHandler { async cacheEdges(queryRecords: Record[]): Promise { if (this.cacheEnabled === false || process.env.INTERNAL_DISABLE_REDIS === 'true') { - if (global.parentPort) { - global.parentPort.postMessage({ threadId, type: 'cacheDone', value: true } satisfies ThreadMessage); + if (global.workerSide) { + global.workerSide.postMessage({ threadId, type: 'cacheDone', value: true } satisfies ThreadMessage); } return; } - if (global.parentPort) { - global.parentPort.postMessage({ threadId, type: 'cacheInProgress', value: 1 } satisfies ThreadMessage); + if (global.workerSide) { + global.workerSide.postMessage({ threadId, type: 'cacheInProgress', value: 1 } satisfies ThreadMessage); } debug('Start to cache query records.'); try { @@ -227,8 +227,8 @@ export default class CacheHandler { await async.eachSeries(qEdgeHashes, async (hash) => { // lock to prevent caching to/reading from actively caching edge const redisID = 'bte:edgeCache:' + hash; - if (global.parentPort) { - global.parentPort.postMessage({ threadId, type: 'addCacheKey', value: redisID } satisfies ThreadMessage); + if (global.workerSide) { + global.workerSide.postMessage({ threadId, type: 'addCacheKey', value: redisID } satisfies ThreadMessage); } await redisClient.client.usingLock([`redisLock:${redisID}`, 'redisLock:EdgeCaching'], 600000, async () => { try { @@ -265,8 +265,8 @@ export default class CacheHandler { `Failed to cache qEdge ${hash} records due to error ${error}. This does not stop other edges from caching nor terminate the query.`, ); } finally { - if (global.parentPort) { - global.parentPort.postMessage({ threadId, type: 'completeCacheKey', value: redisID } satisfies ThreadMessage); + if (global.workerSide) { + global.workerSide.postMessage({ threadId, type: 'completeCacheKey', value: redisID } satisfies ThreadMessage); } } }); @@ -282,8 +282,8 @@ export default class CacheHandler { } catch (error) { debug(`Caching failed due to ${error}. This does not terminate the query.`); } finally { - if (global.parentPort) { - global.parentPort.postMessage({ threadId, type: 'cacheDone', value: 1 } satisfies ThreadMessage); + if (global.workerSide) { + global.workerSide.postMessage({ threadId, type: 'cacheDone', value: 1 } satisfies ThreadMessage); } } } From 44b22801e6ad6c91796ba0ffa793c2999437a3b9 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Thu, 1 Aug 2024 15:12:02 -0700 Subject: [PATCH 10/18] rm unused legacy cache_handler --- src/batch_edge_query.ts | 1 - src/cache_handler.ts | 290 ---------------------------------------- 2 files changed, 291 deletions(-) delete mode 100644 src/cache_handler.ts diff --git a/src/batch_edge_query.ts b/src/batch_edge_query.ts index 1894bc43..4d849102 100644 --- a/src/batch_edge_query.ts +++ b/src/batch_edge_query.ts @@ -4,7 +4,6 @@ import QEdge2APIEdgeHandler from './qedge2apiedge'; import NodesUpdateHandler from './update_nodes'; import Debug from 'debug'; const debug = Debug('bte:biothings-explorer-trapi:batch_edge_query'); -import CacheHandler from './cache_handler'; import { threadId } from 'worker_threads'; import MetaKG from '@biothings-explorer/smartapi-kg'; import { StampedLog } from '@biothings-explorer/utils'; diff --git a/src/cache_handler.ts b/src/cache_handler.ts deleted file mode 100644 index bec9fef8..00000000 --- a/src/cache_handler.ts +++ /dev/null @@ -1,290 +0,0 @@ -import { redisClient } from '@biothings-explorer/utils'; -import Debug from 'debug'; -const debug = Debug('bte:biothings-explorer-trapi:cache_handler'); -import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import async from 'async'; -import helper from './helper'; -import lz4 from 'lz4'; -import chunker from 'stream-chunker'; -import { Readable, Transform } from 'stream'; -import { Record, RecordPackage, QEdge } from '@biothings-explorer/types'; -import { threadId } from 'worker_threads'; -import MetaKG from '@biothings-explorer/smartapi-kg'; -import { QueryHandlerOptions, ThreadMessage } from '@biothings-explorer/types'; - -export interface RecordPacksByQedgeMetaKGHash { - [QEdgeHash: string]: RecordPackage; -} - -class DelimitedChunksDecoder extends Transform { - private _buffer: string; - constructor() { - super({ - readableObjectMode: true, - readableHighWaterMark: 32, // limited output reduces RAM usage slightly - writableHighWaterMark: 100000, - }); - this._buffer = ''; - } - - _transform(chunk: string, encoding: string, callback: () => void): void { - this._buffer += chunk; - if (this._buffer.includes(',')) { - const parts = this._buffer.split(','); - this._buffer = parts.pop(); - parts.forEach((part) => { - const parsedPart = JSON.parse(lz4.decode(Buffer.from(part, 'base64url')).toString()); - if (Array.isArray(parsedPart)) { - parsedPart.forEach((obj) => this.push(obj)); - } else { - // backwards compatibility with previous implementation - this.push(parsedPart); - } - }); - } - callback(); // callback *no matter what* - } - - _flush(callback: (error?: Error | null | undefined, data?: unknown) => void): void { - try { - if (this._buffer.length) { - const final = JSON.parse(lz4.decode(Buffer.from(this._buffer, 'base64url')).toString()); - callback(null, final); - } - callback(); - } catch (error) { - callback(error); - } - } -} - -class DelimitedChunksEncoder extends Transform { - private _buffer: unknown[]; - constructor() { - super({ - writableObjectMode: true, - writableHighWaterMark: 128, - }); - this._buffer = []; - } - - _transform(obj: unknown, encoding: unknown, callback: () => void) { - this._buffer.push(obj); // stringify/compress 64 objects at a time limits compress calls - if (this._buffer.length === 64) { - const compressedPart = lz4.encode(JSON.stringify(this._buffer)).toString('base64url') + ','; - this.push(compressedPart); - this._buffer = []; - } - callback(); - } - - _flush(callback: (error?: Error | null | undefined, data?: unknown) => void) { - try { - if (this._buffer.length) { - callback(null, lz4.encode(JSON.stringify(this._buffer)).toString('base64url') + ','); - return; - } - callback(); - } catch (error) { - callback(error); - } - } -} - -export default class CacheHandler { - metaKG: MetaKG; - logs: StampedLog[]; - cacheEnabled: boolean; - recordConfig: QueryHandlerOptions; - constructor(caching: boolean, metaKG = undefined, recordConfig = {}, logs = []) { - this.metaKG = metaKG; - this.logs = logs; - this.cacheEnabled = - caching === false - ? false - : process.env.RESULT_CACHING !== 'false' - ? !(process.env.REDIS_HOST === undefined) && !(process.env.REDIS_PORT === undefined) - : false; - this.recordConfig = recordConfig; - this.logs.push( - new LogEntry('DEBUG', null, `REDIS cache is ${this.cacheEnabled === true ? '' : 'not'} enabled.`).getLog(), - ); - } - - async categorizeEdges(qEdges: QEdge[]): Promise<{ cachedRecords: Record[]; nonCachedQEdges: QEdge[] }> { - if (this.cacheEnabled === false || process.env.INTERNAL_DISABLE_REDIS === 'true') { - return { - cachedRecords: [], - nonCachedQEdges: qEdges, - }; - } - const nonCachedQEdges: QEdge[] = []; - let cachedRecords: Record[] = []; - debug('Begin edge cache lookup...'); - await async.eachSeries(qEdges, async (qEdge) => { - const qEdgeMetaKGHash = this._hashEdgeByMetaKG(qEdge.getHashedEdgeRepresentation()); - const unpackedRecords: Record[] = await new Promise((resolve) => { - const redisID = 'bte:edgeCache:' + qEdgeMetaKGHash; - redisClient.client.usingLock([`redisLock:${redisID}`], 600000, async () => { - try { - const compressedRecordPack = await redisClient.client.hgetallTimeout(redisID); - - if (compressedRecordPack && Object.keys(compressedRecordPack).length) { - const recordPack = []; - - const sortedPackParts = Object.entries(compressedRecordPack) - .sort(([key1], [key2]) => parseInt(key1) - parseInt(key2)) - .map(([, val]) => { - return val; - }); - - const recordStream = Readable.from(sortedPackParts); - recordStream - .pipe(this.createDecodeStream()) - .on('data', (obj) => recordPack.push(obj)) - .on('end', () => resolve(Record.unpackRecords(recordPack as RecordPackage, qEdge, this.recordConfig))); - } else { - resolve(null); - } - } catch (error) { - resolve(null); - debug(`Cache lookup/retrieval failed due to ${error}. Proceeding without cache.`); - } - }); - }); - - if (unpackedRecords) { - this.logs.push( - new LogEntry('DEBUG', null, `BTE finds cached records for ${qEdge.getID()}`, { - type: 'cacheHit', - qEdgeID: qEdge.getID(), - api_names: unpackedRecords.map((record) => record.association?.api_name), - }).getLog(), - ); - cachedRecords = [...cachedRecords, ...unpackedRecords]; - } else { - nonCachedQEdges.push(qEdge); - } - debug(`Found (${cachedRecords.length}) cached records.`); - }); - - return { cachedRecords, nonCachedQEdges }; - } - - _hashEdgeByMetaKG(qEdgeHash: string): string { - if (!this.metaKG) { - return qEdgeHash; - } - const len = String(this.metaKG.ops.length); - const allIDs = Array.from(new Set(this.metaKG.ops.map((op) => op.association.smartapi.id))).join(''); - return helper._generateHash(qEdgeHash + len + allIDs); - } - - _groupQueryRecordsByQEdgeHash(queryRecords: Record[]): RecordPacksByQedgeMetaKGHash { - const groupedRecords: { [qEdgeMetaKGHash: string]: Record[] } = {}; - queryRecords.map((record) => { - try { - const qEdgeMetaKGHash = this._hashEdgeByMetaKG(record.qEdge.getHashedEdgeRepresentation()); - if (!(qEdgeMetaKGHash in groupedRecords)) { - groupedRecords[qEdgeMetaKGHash] = []; - } - groupedRecords[qEdgeMetaKGHash].push(record); - } catch (e) { - debug('skipping malformed record'); - } - }); - return Object.fromEntries( - Object.entries(groupedRecords).map(([qEdgeMetaKGHash, records]) => { - return [qEdgeMetaKGHash, Record.packRecords(records)]; - }), - ); - } - - createEncodeStream(): DelimitedChunksEncoder { - return new DelimitedChunksEncoder(); - } - - createDecodeStream(): DelimitedChunksDecoder { - return new DelimitedChunksDecoder(); - } - - async cacheEdges(queryRecords: Record[]): Promise { - if (this.cacheEnabled === false || process.env.INTERNAL_DISABLE_REDIS === 'true') { - if (global.workerSide) { - global.workerSide.postMessage({ threadId, type: 'cacheDone', value: true } satisfies ThreadMessage); - } - return; - } - if (global.workerSide) { - global.workerSide.postMessage({ threadId, type: 'cacheInProgress', value: 1 } satisfies ThreadMessage); - } - debug('Start to cache query records.'); - try { - const groupedRecords = this._groupQueryRecordsByQEdgeHash(queryRecords); - const qEdgeHashes = Array.from(Object.keys(groupedRecords)); - debug(`Number of hashed edges: ${qEdgeHashes.length}`); - const failedHashes = []; - await async.eachSeries(qEdgeHashes, async (hash) => { - // lock to prevent caching to/reading from actively caching edge - const redisID = 'bte:edgeCache:' + hash; - if (global.workerSide) { - global.workerSide.postMessage({ threadId, type: 'addCacheKey', value: redisID } satisfies ThreadMessage); - } - await redisClient.client.usingLock([`redisLock:${redisID}`, 'redisLock:EdgeCaching'], 600000, async () => { - try { - await redisClient.client.delTimeout(redisID); // prevents weird overwrite edge cases - await new Promise((resolve, reject) => { - let i = 0; - Readable.from(groupedRecords[hash]) - .pipe(this.createEncodeStream()) - .pipe(chunker(100000, { flush: true })) - .on('data', async (chunk: string) => { - try { - await redisClient.client.hsetTimeout(redisID, String(i++), chunk); - } catch (error) { - reject(error); - try { - await redisClient.client.delTimeout(redisID); - } catch (e) { - debug( - `Unable to remove partial cache ${redisID} from redis during cache failure due to error ${error}. This may result in failed or improper cache retrieval of this qEdge.`, - ); - } - } - }) - .on('end', () => { - resolve(); - }); - }); - if (process.env.QEDGE_CACHE_TIME_S !== '0') { - await redisClient.client.expireTimeout(redisID, process.env.QEDGE_CACHE_TIME_S || 1800); - } - } catch (error) { - failedHashes.push(hash); - debug( - `Failed to cache qEdge ${hash} records due to error ${error}. This does not stop other edges from caching nor terminate the query.`, - ); - } finally { - if (global.workerSide) { - global.workerSide.postMessage({ threadId, type: 'completeCacheKey', value: redisID } satisfies ThreadMessage); - } - } - }); - }); - const successCount = Object.entries(groupedRecords).reduce((acc, [hash, records]) => { - return failedHashes.includes(hash) ? acc : acc + records.length; - }, 0); - if (successCount) { - debug(`Successfully cached (${successCount}) query records.`); - } else { - debug(`qEdge caching failed.`); - } - } catch (error) { - debug(`Caching failed due to ${error}. This does not terminate the query.`); - } finally { - if (global.workerSide) { - global.workerSide.postMessage({ threadId, type: 'cacheDone', value: 1 } satisfies ThreadMessage); - } - } - } -} From 3ea03a73c044468f4e4e53c0ef0574224f2d0dc1 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Thu, 1 Aug 2024 15:42:42 -0700 Subject: [PATCH 11/18] use graph classes defined in types package --- src/graph/graph.ts | 198 ----------------------------------- src/graph/kg_edge.ts | 137 ------------------------ src/graph/kg_node.ts | 56 ---------- src/graph/knowledge_graph.ts | 176 ------------------------------- src/index.ts | 9 +- 5 files changed, 2 insertions(+), 574 deletions(-) delete mode 100644 src/graph/graph.ts delete mode 100644 src/graph/kg_edge.ts delete mode 100644 src/graph/kg_node.ts delete mode 100644 src/graph/knowledge_graph.ts diff --git a/src/graph/graph.ts b/src/graph/graph.ts deleted file mode 100644 index 3530ab1c..00000000 --- a/src/graph/graph.ts +++ /dev/null @@ -1,198 +0,0 @@ -import kg_edge from './kg_edge'; -import kg_node from './kg_node'; -import Debug from 'debug'; -import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import KGNode from './kg_node'; -import KGEdge from './kg_edge'; -import { TrapiAuxiliaryGraph, TrapiResult, Record } from '@biothings-explorer/types'; -import KnowledgeGraph from './knowledge_graph'; -const debug = Debug('bte:biothings-explorer-trapi:Graph'); - -export interface BTEGraphUpdate { - nodes: { - [nodeID: string]: KGNode; - }; - edges: { - [edgeID: string]: KGEdge; - }; -} - -export interface BTEGraphSubscriber { - update: (kg: BTEGraphUpdate) => void; -} - -export default class BTEGraph { - nodes: { - [nodeID: string]: KGNode; - }; - edges: { - [edgeID: string]: KGEdge; - }; - subscribers: BTEGraphSubscriber[]; - constructor() { - this.nodes = {}; - this.edges = {}; - this.subscribers = []; - } - - update(queryRecords: Record[]): void { - debug(`Updating BTE Graph now.`); - const bteAttributes = ['name', 'label', 'id', 'api', 'provided_by', 'publications', 'trapi_sources']; - queryRecords.map((record) => { - if (record) { - const inputPrimaryCurie = record.subject.curie; - const inputQNodeID = record.subject.qNodeID; - const inputBTENodeID = inputPrimaryCurie; - // const inputBTENodeID = inputPrimaryCurie + '-' + inputQNodeID; - const outputPrimaryCurie = record.object.curie; - const outputQNodeID = record.object.qNodeID; - // const outputBTENodeID = outputPrimaryCurie + '-' + outputQNodeID; - const outputBTENodeID = outputPrimaryCurie; - const recordHash = record.recordHash; - - if (!(outputBTENodeID in this.nodes)) { - this.nodes[outputBTENodeID] = new kg_node(outputBTENodeID, { - primaryCurie: outputPrimaryCurie, - qNodeID: outputQNodeID, - curies: record.object.equivalentCuries, - names: record.object.names, - label: record.object.label, - semanticType: [record.object.semanticType[0]], - nodeAttributes: record.object.attributes, - }); - } else if ( - this.nodes[outputBTENodeID].label === undefined || - this.nodes[outputBTENodeID].label === this.nodes[outputBTENodeID].primaryCurie - ) { - this.nodes[outputBTENodeID].label = record.object.label; - } - - if (!(inputBTENodeID in this.nodes)) { - this.nodes[inputBTENodeID] = new kg_node(inputBTENodeID, { - primaryCurie: inputPrimaryCurie, - qNodeID: inputQNodeID, - curies: record.subject.equivalentCuries, - names: record.subject.names, - label: record.subject.label, - semanticType: [record.subject.semanticType[0]], - nodeAttributes: record.subject.attributes, - }); - } else if ( - this.nodes[inputBTENodeID].label === undefined || - this.nodes[inputBTENodeID].label === this.nodes[inputBTENodeID].primaryCurie - ) { - this.nodes[inputBTENodeID].label = record.subject.label; - } - - this.nodes[outputBTENodeID].addSourceNode(inputBTENodeID); - this.nodes[outputBTENodeID].addSourceQNodeID(inputQNodeID); - this.nodes[inputBTENodeID].addTargetNode(outputBTENodeID); - this.nodes[inputBTENodeID].addTargetQNodeID(outputQNodeID); - if (!(recordHash in this.edges)) { - this.edges[recordHash] = new kg_edge(recordHash, { - predicate: record.predicate, - subject: inputPrimaryCurie, - object: outputPrimaryCurie, - }); - } - this.edges[recordHash].addAPI(record.api); - this.edges[recordHash].addInforesCurie(record.apiInforesCurie); - this.edges[recordHash].addPublication(record.publications); - Object.keys(record.mappedResponse) - .filter((k) => !(bteAttributes.includes(k) || k.startsWith('$'))) - .map((item) => { - this.edges[recordHash].addAdditionalAttributes(item, record.mappedResponse[item]); - }); - if (record.knowledge_level) { - this.edges[recordHash].addAdditionalAttributes('biolink:knowledge_level', record.knowledge_level); - } - if (record.agent_type) { - this.edges[recordHash].addAdditionalAttributes('biolink:agent_type', record.agent_type); - } - this.edges[recordHash].addSource(record.provenanceChain); - Object.entries(record.qualifiers).forEach(([qualifierType, qualifier]) => { - this.edges[recordHash].addQualifier(qualifierType, qualifier); - }); - } - }); - } - - prune(results: TrapiResult[], auxGraphs: { [auxGraphID: string]: TrapiAuxiliaryGraph }): void { - debug('pruning BTEGraph nodes/edges...'); - const edgeBoundNodes: Set = new Set(); - const resultsBoundEdges: Set = new Set(); - - // Handle nodes and edges bound to results directly - results.forEach((result) => { - Object.entries(result.analyses[0].edge_bindings).forEach(([, bindings]) => { - bindings.forEach((binding) => resultsBoundEdges.add(binding.id)); - }); - }); - - // Handle edges bound via auxiliary graphs - // This will iterate over new edges as they're added - resultsBoundEdges.forEach((edgeID) => { - edgeBoundNodes.add(this.edges[edgeID].subject); - edgeBoundNodes.add(this.edges[edgeID].object); - const supportGraphs = [...(this.edges[edgeID].attributes['biolink:support_graphs'] ?? [])]; - supportGraphs.forEach((auxGraphID: string) => { - auxGraphs[auxGraphID].edges.forEach((auxGraphEdgeID) => { - edgeBoundNodes.add(this.edges[auxGraphEdgeID].subject); - edgeBoundNodes.add(this.edges[auxGraphEdgeID].object); - resultsBoundEdges.add(auxGraphEdgeID); - }); - }); - }); - - const nodesToDelete = Object.keys(this.nodes).filter((bteNodeID) => !edgeBoundNodes.has(bteNodeID)); - nodesToDelete.forEach((unusedBTENodeID) => delete this.nodes[unusedBTENodeID]); - const edgesToDelete = Object.keys(this.edges).filter((recordHash) => !resultsBoundEdges.has(recordHash)); - edgesToDelete.forEach((unusedRecordHash) => delete this.edges[unusedRecordHash]); - debug(`pruned ${nodesToDelete.length} nodes and ${edgesToDelete.length} edges from BTEGraph.`); - } - - checkPrimaryKnowledgeSources(knowledgeGraph: KnowledgeGraph): StampedLog[] { - const logs = []; - Object.entries(knowledgeGraph.edges).map(([edgeID, edge]) => { - const has_primary_knowledge_source = edge.sources.some( - (source) => source.resource_role === 'primary_knowledge_source' && source.resource_id, - ); - if (!has_primary_knowledge_source) { - const logMsg = `Edge ${edgeID} (APIs: ${Array.from(this.edges[edgeID].apis).join( - ', ', - )}) is missing a primary knowledge source`; - debug(logMsg); - logs.push(new LogEntry('WARNING', null, logMsg).getLog()); - } - }); - return logs; - } - - /** - * Register subscribers - */ - subscribe(subscriber: BTEGraphSubscriber): void { - this.subscribers.push(subscriber); - } - - /** - * Unsubscribe a listener - */ - unsubscribe(subscriber: BTEGraphSubscriber): void { - this.subscribers = this.subscribers.filter((fn) => { - if (fn != subscriber) return fn; - }); - } - - /** - * Nofity all listeners - */ - notify(): void { - this.subscribers.map((subscriber) => { - subscriber.update({ - nodes: this.nodes, - edges: this.edges, - }); - }); - } -} diff --git a/src/graph/kg_edge.ts b/src/graph/kg_edge.ts deleted file mode 100644 index 797c82f1..00000000 --- a/src/graph/kg_edge.ts +++ /dev/null @@ -1,137 +0,0 @@ -import { TrapiSource } from '@biothings-explorer/types'; -import { TrapiAttribute } from '@biothings-explorer/types'; - -export interface KGEdgeInfo { - object: string; - subject: string; - predicate: string; -} - -export default class KGEdge { - id: string; - predicate: string; - subject: string; - object: string; - apis: Set; - inforesCuries: Set; - sources: { - [resource_id: string]: { - [resource_role: string]: { - resource_id: string; - resource_role: string; - upstream_resource_ids?: Set; - source_record_urls?: Set; - }; - }; - }; - publications: Set; - qualifiers: { - [qualifier_type_id: string]: string | string[]; - }; - attributes: { - [attribute_type_id: string]: Set | TrapiAttribute[]; - 'edge-attributes'?: TrapiAttribute[]; - }; - constructor(id: string, info: KGEdgeInfo) { - this.id = id; - this.predicate = info.predicate; - this.subject = info.subject; - this.object = info.object; - this.apis = new Set(); - this.inforesCuries = new Set(); - this.sources = {}; - this.publications = new Set(); - this.qualifiers = {}; - this.attributes = {}; - } - - addAPI(api: string | string[]): void { - if (typeof api === 'undefined') { - return; - } - if (!Array.isArray(api)) { - api = [api]; - } - api.map((item) => { - this.apis.add(item); - }); - } - - addInforesCurie(inforesCurie: string | string[]): void { - if (typeof inforesCurie === 'undefined') { - return; - } - if (!Array.isArray(inforesCurie)) { - inforesCurie = [inforesCurie]; - } - inforesCurie.map((item) => { - this.inforesCuries.add(item); - }); - } - - addSource(source: TrapiSource | TrapiSource[]): void { - if (typeof source === 'undefined') { - return; - } - if (!Array.isArray(source)) { - source = [source]; - } - source.forEach((item) => { - if (!this.sources[item.resource_id]) this.sources[item.resource_id] = {}; - if (item.upstream_resource_ids && !Array.isArray(item.upstream_resource_ids)) { - item.upstream_resource_ids = [item.upstream_resource_ids]; - } - if (item.source_record_urls && !Array.isArray(item.source_record_urls)) { - item.source_record_urls = [item.source_record_urls]; - } - if (!this.sources[item.resource_id][item.resource_role]) { - this.sources[item.resource_id][item.resource_role] = { - resource_id: item.resource_id, - resource_role: item.resource_role, - upstream_resource_ids: item.upstream_resource_ids ? new Set(item.upstream_resource_ids) : undefined, - source_record_urls: item.source_record_urls ? new Set(item.source_record_urls) : undefined, - }; - } - item.upstream_resource_ids?.forEach((upstream) => - this.sources[item.resource_id][item.resource_role].upstream_resource_ids.add(upstream), - ); - item.source_record_urls?.forEach((url) => - this.sources[item.resource_id][item.resource_role].source_record_urls.add(url), - ); - }); - } - - addPublication(publication: string | string[]): void { - if (typeof publication === 'undefined') { - return; - } - if (!Array.isArray(publication)) { - publication = [publication]; - } - publication.map((item) => { - this.publications.add(item); - }); - } - - addQualifier(name: string, value: string | string[]): void { - this.qualifiers[name] = value; - } - - addAdditionalAttributes(name: string, value: string | string[] | TrapiAttribute[]): void { - // special handling for full edge attributes - if (name === 'edge-attributes') { - this.attributes[name] = value as TrapiAttribute[]; - return; - } - - if (!(name in this.attributes)) { - this.attributes[name] = new Set(); - } - if (!Array.isArray(value)) { - value = [value]; - } - (value as string[]).map((item) => { - (this.attributes[name] as Set).add(item); - }); - } -} diff --git a/src/graph/kg_node.ts b/src/graph/kg_node.ts deleted file mode 100644 index 60dac19f..00000000 --- a/src/graph/kg_node.ts +++ /dev/null @@ -1,56 +0,0 @@ -import { TrapiAttribute } from '@biothings-explorer/types'; - -export interface KGNodeInfo { - label: string; - nodeAttributes?: TrapiAttribute; - semanticType: string[]; - names: string[]; - curies: string[]; - primaryCurie: string; - qNodeID: string; -} - -export default class KGNode { - id: string; - primaryCurie: string; - qNodeID: string; - curies: string[]; - names: string[]; - semanticType: string[]; - nodeAttributes: TrapiAttribute; - label: string; - sourceNodes: Set; - targetNodes: Set; - sourceQNodeIDs: Set; - targetQNodeIDs: Set; - constructor(id: string, info: KGNodeInfo) { - this.id = id; - this.primaryCurie = info.primaryCurie; - this.qNodeID = info.qNodeID; - this.curies = info.curies; - this.names = info.names; - this.semanticType = info.semanticType; - this.nodeAttributes = info.nodeAttributes; - this.label = info.label; - this.sourceNodes = new Set(); - this.targetNodes = new Set(); - this.sourceQNodeIDs = new Set(); - this.targetQNodeIDs = new Set(); - } - - addSourceNode(kgNodeID: string): void { - this.sourceNodes.add(kgNodeID); - } - - addTargetNode(kgNodeID: string): void { - this.targetNodes.add(kgNodeID); - } - - addSourceQNodeID(qNodeID: string): void { - this.sourceQNodeIDs.add(qNodeID); - } - - addTargetQNodeID(qNodeID: string): void { - this.targetQNodeIDs.add(qNodeID); - } -} diff --git a/src/graph/knowledge_graph.ts b/src/graph/knowledge_graph.ts deleted file mode 100644 index 706fe0f8..00000000 --- a/src/graph/knowledge_graph.ts +++ /dev/null @@ -1,176 +0,0 @@ -import { toArray } from '../utils'; -import Debug from 'debug'; -import { - TrapiAttribute, - TrapiKnowledgeGraph, - TrapiKGEdge, - TrapiKGEdges, - TrapiKGNode, - TrapiKGNodes, - TrapiQualifier, - TrapiSource, -} from '@biothings-explorer/types'; -import KGNode from './kg_node'; -import KGEdge from './kg_edge'; -import { BTEGraphUpdate } from './graph'; -import { APIDefinition } from '@biothings-explorer/types'; - -const debug = Debug('bte:biothings-explorer-trapi:KnowledgeGraph'); - -const NON_ARRAY_ATTRIBUTES = ['biolink:knowledge_level', 'biolink:agent_type', 'biolink:evidence_count']; - -export default class KnowledgeGraph { - nodes: { - [nodePrimaryID: string]: TrapiKGNode; - }; - edges: { - [edgeID: string]: TrapiKGEdge; - }; - kg: TrapiKnowledgeGraph; - apiList?: APIDefinition[]; - constructor(apiList?: APIDefinition[]) { - this.nodes = {}; - this.edges = {}; - this.kg = { - nodes: this.nodes, - edges: this.edges, - }; - this.apiList = apiList; - } - - getNodes(): TrapiKGNodes { - return this.nodes; - } - - getEdges(): TrapiKGEdges { - return this.edges; - } - - _createNode(kgNode: KGNode): TrapiKGNode { - const node = { - categories: kgNode.semanticType, - name: Array.isArray(kgNode.label) ? kgNode.label[0] : kgNode.label, - attributes: [ - { - attribute_type_id: 'biolink:xref', - value: kgNode.curies, - }, - { - attribute_type_id: 'biolink:synonym', - value: kgNode.names.length ? kgNode.names : toArray(kgNode.label), - }, - // Currently unused - // { - // attribute_type_id: 'num_source_nodes', - // value: kgNode._sourceNodes.size, - // //value_type_id: 'bts:num_source_nodes', - // }, - // { - // attribute_type_id: 'num_target_nodes', - // value: kgNode._targetNodes.size, - // //value_type_id: 'bts:num_target_nodes', - // }, - // { - // attribute_type_id: 'source_qg_nodes', - // value: Array.from(kgNode._sourceQNodeIDs), - // //value_type_id: 'bts:source_qg_nodes', - // }, - // { - // attribute_type_id: 'target_qg_nodes', - // value: Array.from(kgNode._targetQNodeIDs), - // //value_type_id: 'bts:target_qg_nodes', - // }, - ], - }; - for (const key in kgNode.nodeAttributes) { - node.attributes.push({ - attribute_type_id: key, - value: kgNode.nodeAttributes[key] as string[], - //value_type_id: 'bts:' + key, - }); - } - return node; - } - - _createQualifiers(kgEdge: KGEdge): TrapiQualifier[] { - const qualifiers = Object.entries(kgEdge.qualifiers || {}).map(([qualifierType, qualifier]) => { - return { - qualifier_type_id: qualifierType, - qualifier_value: qualifier, - }; - }); - - return qualifiers.length ? qualifiers : undefined; - } - - _createAttributes(kgEdge: KGEdge): TrapiAttribute[] { - const attributes: TrapiAttribute[] = []; - - // publications - if (Array.from(kgEdge.publications).length) { - attributes.push({ - attribute_type_id: 'biolink:publications', - value: Array.from(kgEdge.publications), - value_type_id: 'linkml:Uriorcurie', - }); - } - - Object.entries(kgEdge.attributes).forEach(([key, value]) => { - if (key === 'edge-attributes') return; - // if (key == 'edge-attributes') return; - attributes.push({ - attribute_type_id: key, - value: // technically works for numbers as well - NON_ARRAY_ATTRIBUTES.includes(key) - ? [...(value as Set)].reduce((acc, val) => acc + val) - : Array.from(value as Set), - //value_type_id: 'bts:' + key, - }); - }); - - //handle TRAPI APIs (Situation A of https://github.com/biothings/BioThings_Explorer_TRAPI/issues/208) and APIs that define 'edge-atributes' in x-bte - kgEdge.attributes['edge-attributes']?.forEach((attribute) => { - attributes.push(attribute); - }); - return attributes; - } - - _createSources(kgEdge: KGEdge): TrapiSource[] { - const sources: TrapiSource[] = []; - Object.entries(kgEdge.sources).forEach(([, roles]) => { - Object.entries(roles).forEach(([, sourceObj]) => { - const trapiSource: TrapiSource = { - ...sourceObj, - upstream_resource_ids: sourceObj.upstream_resource_ids ? [...sourceObj.upstream_resource_ids] : undefined, - source_record_urls: sourceObj.source_record_urls ? [...sourceObj.source_record_urls] : undefined, - }; - sources.push(trapiSource); - }); - }); - return sources; - } - - _createEdge(kgEdge: KGEdge): TrapiKGEdge { - return { - predicate: kgEdge.predicate, - subject: kgEdge.subject, - object: kgEdge.object, - qualifiers: this._createQualifiers(kgEdge), - attributes: this._createAttributes(kgEdge), - sources: this._createSources(kgEdge), - }; - } - - update(bteGraph: BTEGraphUpdate): void { - Object.keys(bteGraph.nodes).map((node) => { - this.nodes[bteGraph.nodes[node].primaryCurie] = this._createNode(bteGraph.nodes[node]); - }); - Object.keys(bteGraph.edges).map((edge) => { - this.edges[edge] = this._createEdge(bteGraph.edges[edge]); - }); - this.kg = { - nodes: this.nodes, - edges: this.edges, - }; - } -} diff --git a/src/index.ts b/src/index.ts index 34dd0c31..0b4bee96 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,11 +1,9 @@ import MetaKG, { SmartAPIQueryResult } from '@biothings-explorer/smartapi-kg'; import path from 'path'; -import KnowledgeGraph from './graph/knowledge_graph'; import TrapiResultsAssembler from './results_assembly/query_results'; -import { QueryGraph, InvalidQueryGraphError } from '@biothings-explorer/types'; +import { QueryGraph, InvalidQueryGraphError, KGNode, KGEdge, BTEGraph, KnowledgeGraph } from '@biothings-explorer/types'; import Debug from 'debug'; const debug = Debug('bte:biothings-explorer-trapi:main'); -import Graph from './graph/graph'; import EdgeManager from './edge_manager'; import _ from 'lodash'; import QEdge2APIEdgeHandler from './qedge2apiedge'; @@ -14,8 +12,6 @@ import { promises as fs } from 'fs'; import { getDescendants } from '@biothings-explorer/node-expansion'; import { resolveSRI, SRINodeNormFailure } from 'biomedical_id_resolver'; import InferredQueryHandler from './inferred_mode/inferred_mode'; -import KGNode from './graph/kg_node'; -import KGEdge from './graph/kg_edge'; import { TrapiAuxGraphCollection, TrapiAuxiliaryGraph, @@ -25,7 +21,6 @@ import { TrapiResult, } from '@biothings-explorer/types'; import { QueryHandlerOptions, QEdge } from '@biothings-explorer/types'; -import BTEGraph from './graph/graph'; import { Telemetry } from '@biothings-explorer/utils'; // Exports for external availability @@ -408,7 +403,7 @@ export default class TRAPIQueryHandler { _initializeResponse(): void { this.knowledgeGraph = new KnowledgeGraph(this.options?.apiList?.include); this.trapiResultsAssembler = new TrapiResultsAssembler(this.options); - this.bteGraph = new Graph(); + this.bteGraph = new BTEGraph(); this.bteGraph.subscribe(this.knowledgeGraph); } From 8920687a478c5dc6833771a661c5bc8516db3079 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Fri, 2 Aug 2024 10:58:56 -0700 Subject: [PATCH 12/18] max records per edge/query --- src/edge_manager.ts | 86 ++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/src/edge_manager.ts b/src/edge_manager.ts index b0a46d4f..1d27e17e 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -13,6 +13,9 @@ import { RecordsByQEdgeID } from './results_assembly/query_results'; import path from 'path'; import { promises as fs } from 'fs'; +const MAX_RECORDS_PER_EDGE = parseInt(process.env.MAX_RECORDS_PER_EDGE ?? '100000'); +const MAX_RECORDS_PER_QUERY = parseInt(process.env.MAX_RECORDS_TOTAL ?? '200000'); + export default class QueryEdgeManager { private _qEdges: QEdge[]; private _metaKG: MetaKG; @@ -299,45 +302,6 @@ export default class QueryEdgeManager { currentQEdge.storeRecords(filteredRecords); } - /** - * Unused - */ - // updateNeighborsEdgeRecords(currentQEdge) { - // //update and filter only immediate neighbors - // debug(`Updating neighbors...`); - // const currentQEdgeID = currentQEdge.getID(); - // //get neighbors of this edges subject that are not this edge - // let left_connections = currentQEdge.subject.getConnections(); - // left_connections = left_connections.filter((qEdgeID) => qEdgeID !== currentQEdgeID); - // //get neighbors of this edges object that are not this edge - // let right_connections = currentQEdge.object.getConnections(); - // right_connections = right_connections.filter((qEdgeID) => qEdgeID !== currentQEdgeID); - // debug(`(${left_connections})<--edge neighbors-->(${right_connections})`); - // if (left_connections.length) { - // //find edge by id - // left_connections.forEach((qEdgeID) => { - // const edge = this._qEdges.find((edge) => edge.getID() == qEdgeID); - // if (edge && edge.records.length) { - // debug(`Updating "${edge.getID()}" neighbor edge of ${currentQEdgeID}`); - // debug(`Updating neighbor (X)<----()`); - // this.updateEdgeRecords(edge); - // } - // }); - // } - // - // if (right_connections.length) { - // //find edge by id - // right_connections.forEach((neighbor_id) => { - // const edge = this._qEdges.find((edge) => edge.getID() == neighbor_id); - // if (edge && edge.records.length) { - // debug(`Updating "${edge.getID()}" neighbor edge of ${currentQEdgeID}`); - // debug(`Updating neighbor ()---->(X)`); - // this.updateEdgeRecords(edge); - // } - // }); - // } - // } - updateAllOtherEdges(currentQEdge: QEdge): void { //update and filter all other edges debug(`Updating all other edges...`); @@ -353,10 +317,8 @@ export default class QueryEdgeManager { _createBatchQueryHandler(qEdge: QEdge, metaKG: MetaKG): BatchEdgeQueryHandler { const handler = new BatchEdgeQueryHandler(metaKG, { - caching: this.options.caching, - submitter: this.options.submitter, + ...this.options, recordHashEdgeAttributes: config.EDGE_ATTRIBUTES_USED_IN_RECORD_HASH, - provenanceUsesServiceProvider: this.options.provenanceUsesServiceProvider, } as BatchEdgeQueryOptions); handler.setEdges(qEdge); return handler; @@ -400,7 +362,7 @@ export default class QueryEdgeManager { ); debug(`(5) Executing current edge >> "${currentQEdge.getID()}"`); //execute current edge query - const queryRecords = await queryBatchHandler.query(queryBatchHandler.qEdges, unavailableAPIs); + let queryRecords = await queryBatchHandler.query(queryBatchHandler.qEdges, unavailableAPIs); this.logs = [...this.logs, ...queryBatchHandler.logs]; if (queryRecords === undefined) return; // create an edge execution summary @@ -410,6 +372,7 @@ export default class QueryEdgeManager { const cached = this.logs.filter( ({ data }) => data?.qEdgeID === currentQEdge.id && data?.type === 'cacheHit', ).length; + total += cached; this.logs .filter(({ data }) => data?.qEdgeID === currentQEdge.id && data?.type === 'query') .forEach(({ data }) => { @@ -437,6 +400,43 @@ export default class QueryEdgeManager { span.finish(); return; } + + // Check if record count threatens stability + const totalRecords = + this._qEdges.reduce((total, qEdge) => { + return total + qEdge.records.length; + }, 0) + queryRecords.length; + let maxRecordsMessage = [ + `Qedge ${currentQEdge.id}`, + `obtained ${queryRecords.length} records,`, + queryRecords.length === MAX_RECORDS_PER_EDGE ? 'meeting' : 'exceeding', + `maximum of ${MAX_RECORDS_PER_QUERY}`, + `Truncating records for this edge to ${MAX_RECORDS_PER_EDGE}.`, + `Your query may be too general?`, + ]; + if (totalRecords > MAX_RECORDS_PER_QUERY) { + maxRecordsMessage = maxRecordsMessage.slice(0, 2); + maxRecordsMessage.push( + ...[ + `totalling ${totalRecords} for this query.`, + `This exceeds the per-query maximum of ${MAX_RECORDS_PER_QUERY}.`, + `For stability purposes, this query is terminated.`, + `Please consider further refining your query.`, + ], + ); + debug(maxRecordsMessage.join(' ')); + this.logs.push(new LogEntry('WARNING', null, maxRecordsMessage.join(' ')).getLog()); + Telemetry.captureException(new Error(`Stopped on globalMaxRecords (exceeded ${MAX_RECORDS_PER_QUERY})`)); + return; + } + + if (queryRecords.length > MAX_RECORDS_PER_EDGE) { + debug(maxRecordsMessage.join(' ')); + this.logs.push(new LogEntry('WARNING', null, maxRecordsMessage.join(' ')).getLog()); + + queryRecords = queryRecords.slice(0, MAX_RECORDS_PER_EDGE); + } + // storing records will trigger a node entity count update currentQEdge.storeRecords(queryRecords); From 6012a901be811415e79f71bb8aeefac8120fd001 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Wed, 7 Aug 2024 16:40:18 -0700 Subject: [PATCH 13/18] remove tests for files moved to types --- __test__/integration/KnowledgeGraph.test.ts | 156 ---------- __test__/integration/graph/graph.test.ts | 195 ------------- __test__/unittest/cacheHandler.test.ts | 301 -------------------- 3 files changed, 652 deletions(-) delete mode 100644 __test__/integration/KnowledgeGraph.test.ts delete mode 100644 __test__/integration/graph/graph.test.ts delete mode 100644 __test__/unittest/cacheHandler.test.ts diff --git a/__test__/integration/KnowledgeGraph.test.ts b/__test__/integration/KnowledgeGraph.test.ts deleted file mode 100644 index 18c99a90..00000000 --- a/__test__/integration/KnowledgeGraph.test.ts +++ /dev/null @@ -1,156 +0,0 @@ -import KGEdge from '../../src/graph/kg_edge'; -import KGNode from '../../src/graph/kg_node'; -import KnowledgeGraph from '../../src/graph/knowledge_graph'; -import { TrapiAttribute } from '@biothings-explorer/types'; - -describe('Testing KnowledgeGraph Module', () => { - const nodeInput = new KGNode('PUBCHEM.COMPOUND:2662-n0', { - primaryCurie: 'PUBCHEM.COMPOUND:2662', - qNodeID: 'n0', - curies: [ - 'PUBCHEM.COMPOUND:2662', - 'CHEMBL.COMPOUND:CHEMBL118', - 'UNII:JCX84Q7J1L', - 'CHEBI:41423', - 'DRUGBANK:DB00482', - 'MESH:C105934', - 'MESH:D000068579', - 'CAS:169590-42-5', - 'CAS:184007-95-2', - 'CAS:194044-54-7', - 'DrugCentral:568', - 'GTOPDB:2892', - 'HMDB:HMDB0005014', - 'KEGG.COMPOUND:C07589', - 'INCHIKEY:RZEKVGVHFLEQIL-UHFFFAOYSA-N', - ], - names: ['Celecoxib', 'CELECOXIB', 'celecoxib', '[OBSOLETE] celecoxib'], - semanticType: ['biolink:SmallMolecule'], - label: 'Celecoxib', - }); - - const trapiEdgeInput = new KGEdge('PUBCHEM.COMPOUND:2662-biolink:activity_decreased_by-NCBIGene:771', { - predicate: 'biolink:activity_decreased_by', - subject: 'PUBCHEM.COMPOUND:2662', - object: 'NCBIGene:771', - }); - - trapiEdgeInput.addAdditionalAttributes('edge-attributes', [ - { - attribute_type_id: 'biolink:Attribute', - value: 'Ki', - value_type_id: 'EDAM:data_0006', - original_attribute_name: 'affinity_parameter', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:knowledge_source', - value: ['PHAROS_1_norm_edges.jsonl'], - value_type_id: 'EDAM:data_0006', - original_attribute_name: 'knowledge_source', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:aggregator_knowledge_source', - value: ['infores:pharos'], - value_type_id: 'biolink:InformationResource', - original_attribute_name: 'biolink:aggregator_knowledge_source', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:Attribute', - value: 7.75, - value_type_id: 'EDAM:data_0006', - original_attribute_name: 'affinity', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:publications', - value: [ - 'PMID:20605094', - 'PMID:21852133', - 'PMID:16290146', - 'PMID:23965175', - 'PMID:23965175', - 'PMID:24513184', - 'PMID:25766630', - 'PMID:23067387', - ], - value_type_id: 'EDAM:data_0006', - original_attribute_name: 'publications', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:relation', - value: 'GAMMA:ki', - value_type_id: 'EDAM:data_0006', - original_attribute_name: 'relation', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:aggregator_knowledge_source', - value: 'infores:automat.pharos', - value_type_id: 'biolink:InformationResource', - original_attribute_name: 'biolink:aggregator_knowledge_source', - value_url: null, - attribute_source: null, - description: null, - }, - ]); - - describe('Testing _createNode function', () => { - test('test creating node', () => { - const kg = new KnowledgeGraph(); - const res = kg._createNode(nodeInput); - expect(res).toHaveProperty('name', 'Celecoxib'); - expect(res).toHaveProperty('categories'); - expect(res.categories[0]).toBe('biolink:SmallMolecule'); - expect(res).toHaveProperty('attributes'); - }); - }); - - describe('Testing _createAttributes function', () => { - test('test edge attributes', () => { - const kg = new KnowledgeGraph(); - const res = kg._createAttributes(trapiEdgeInput); - expect(res.length).toBeGreaterThan(0); - for (const res_obj of res) { - expect(res_obj).toHaveProperty('attribute_type_id'); - expect(res_obj).toHaveProperty('value'); - if (res_obj.attribute_type_id.includes('biolink:')) { - expect(res_obj).toHaveProperty('value_type_id'); - } - } - }); - }); - - describe('Testing _createEdge function', () => { - test('test creating edge', () => { - const kg = new KnowledgeGraph(); - const res = kg._createEdge(trapiEdgeInput); - expect(res).toHaveProperty('predicate', 'biolink:activity_decreased_by'); - expect(res).toHaveProperty('subject', 'PUBCHEM.COMPOUND:2662'); - expect(res).toHaveProperty('object', 'NCBIGene:771'); - expect(res).toHaveProperty('attributes'); - for (const res_obj of res.attributes as TrapiAttribute[]) { - expect(res_obj).toHaveProperty('attribute_type_id'); - expect(res_obj).toHaveProperty('value'); - if (res_obj.attribute_type_id.includes('biolink:')) { - expect(res_obj).toHaveProperty('value_type_id'); - } - } - }); - }); -}); diff --git a/__test__/integration/graph/graph.test.ts b/__test__/integration/graph/graph.test.ts deleted file mode 100644 index 435f7596..00000000 --- a/__test__/integration/graph/graph.test.ts +++ /dev/null @@ -1,195 +0,0 @@ -import graph from '../../../src/graph/graph'; -import { Record } from '@biothings-explorer/types'; - -describe('Test graph class', () => { - const qNode1 = { - getID() { - return 'qg1'; - }, - }; - const qNode2 = { - getID() { - return 'qg2'; - }, - }; - const record1 = new Record({ - api: 'API1', - metaEdgeSource: 'source1', - apiInforesCurie: 'infores:API1', - predicate: 'predicate1', - object: { - qNodeID: 'qg2', - curie: 'outputPrimaryCurie', - original: 'outputPrimaryCurie', - }, - subject: { - qNodeID: 'qg1', - curie: 'inputPrimaryCurie', - original: 'inputPrimaryCurie', - }, - publications: ['PMID:1', 'PMID:2'], - mappedResponse: { - relation: 'relation1', - }, - }); - - const record2 = new Record({ - api: 'API2', - metaEdgeSource: 'source2', - apiInforesCurie: 'infores:API2', - predicate: 'predicate1', - object: { - qNodeID: 'qg2', - curie: 'outputPrimaryCurie', - original: 'outputPrimaryCurie', - }, - subject: { - qNodeID: 'qg1', - curie: 'inputPrimaryCurie', - original: 'inputPrimaryCurie', - }, - publications: ['PMC:1', 'PMC:2'], - mappedResponse: { - relation: 'relation2', - }, - }); - - const record3 = new Record({ - api: 'API3', - metaEdgeSource: 'source3', - apiInforesCurie: 'infores:API3', - predicate: 'predicate2', - object: { - qNodeID: 'qg2', - curie: 'outputPrimaryCurie', - original: 'outputPrimaryCurie', - }, - subject: { - qNodeID: 'qg1', - curie: 'inputPrimaryCurie', - original: 'inputPrimaryCurie', - }, - publications: ['PMC:3', 'PMC:4'], - mappedResponse: { - relation: 'relation3', - }, - }); - - const record3a = new Record({ - api: 'API3', - metaEdgeSource: 'source3', - apiInforesCurie: 'infores:API3', - predicate: 'predicate2', - object: { - qNodeID: 'qg2', - curie: 'outputPrimaryCurie', - original: 'outputPrimaryCurie', - }, - subject: { - qNodeID: 'qg1', - curie: 'inputPrimaryCurie', - original: 'inputPrimaryCurie', - }, - publications: ['PMC:6', 'PMC:7'], - mappedResponse: { - relation: ['relation3a', 'relation3b'], - }, - }); - - test('A single query result is correctly updated.', () => { - const g = new graph(); - g.update([record1]); - expect(g.nodes).toHaveProperty('outputPrimaryCurie'); - expect(g.nodes).toHaveProperty('inputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].primaryCurie).toEqual('outputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].qNodeID).toEqual('qg2'); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceNodes)).toEqual(['inputPrimaryCurie']); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceQNodeIDs)).toEqual(['qg1']); - expect(g.nodes['inputPrimaryCurie'].primaryCurie).toEqual('inputPrimaryCurie'); - expect(g.nodes['inputPrimaryCurie'].qNodeID).toEqual('qg1'); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetNodes)).toEqual(['outputPrimaryCurie']); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetQNodeIDs)).toEqual(['qg2']); - expect(g.edges).toHaveProperty('95fe2a8089c0d79ea093b97c5991f7ba'); - expect(Array.from(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].apis)).toEqual(['API1']); - expect(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].sources).toHaveProperty('source1'); - expect(Array.from(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].publications)).toEqual(['PMID:1', 'PMID:2']); - expect(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].attributes).toHaveProperty('relation', new Set(['relation1'])); - }); - - test('Multiple query results are correctly updated for two edges having same input, predicate and output', () => { - const g = new graph(); - g.update([record1, record2]); - expect(g.nodes).toHaveProperty('outputPrimaryCurie'); - expect(g.nodes).toHaveProperty('inputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].primaryCurie).toEqual('outputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].qNodeID).toEqual('qg2'); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceNodes)).toEqual(['inputPrimaryCurie']); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceQNodeIDs)).toEqual(['qg1']); - expect(g.nodes['inputPrimaryCurie'].primaryCurie).toEqual('inputPrimaryCurie'); - expect(g.nodes['inputPrimaryCurie'].qNodeID).toEqual('qg1'); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetNodes)).toEqual(['outputPrimaryCurie']); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetQNodeIDs)).toEqual(['qg2']); - - expect(g.edges).toHaveProperty('95fe2a8089c0d79ea093b97c5991f7ba'); - expect(Array.from(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].apis)).toEqual(['API1']); - expect(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].sources).toHaveProperty('source1'); - expect(Array.from(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].publications)).toEqual(['PMID:1', 'PMID:2']); - expect(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].attributes).toHaveProperty('relation', new Set(['relation1'])); - - expect(g.edges).toHaveProperty('9d334cb674d5671364c45cc8403184c6'); - expect(Array.from(g.edges['9d334cb674d5671364c45cc8403184c6'].apis)).toEqual(['API2']); - expect(g.edges['9d334cb674d5671364c45cc8403184c6'].sources).toHaveProperty('source2'); - expect(Array.from(g.edges['9d334cb674d5671364c45cc8403184c6'].publications)).toEqual(['PMC:1', 'PMC:2']); - expect(g.edges['9d334cb674d5671364c45cc8403184c6'].attributes).toHaveProperty('relation', new Set(['relation2'])); - }); - - test('Multiple query results for different edges are correctly updated', () => { - const g = new graph(); - g.update([record1, record2, record3]); - expect(g.nodes).toHaveProperty('outputPrimaryCurie'); - expect(g.nodes).toHaveProperty('inputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].primaryCurie).toEqual('outputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].qNodeID).toEqual('qg2'); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceNodes)).toEqual(['inputPrimaryCurie']); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceQNodeIDs)).toEqual(['qg1']); - expect(g.nodes['inputPrimaryCurie'].primaryCurie).toEqual('inputPrimaryCurie'); - expect(g.nodes['inputPrimaryCurie'].qNodeID).toEqual('qg1'); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetNodes)).toEqual(['outputPrimaryCurie']); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetQNodeIDs)).toEqual(['qg2']); - - expect(g.edges).toHaveProperty('95fe2a8089c0d79ea093b97c5991f7ba'); - expect(Array.from(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].apis)).toEqual(['API1']); - expect(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].sources).toHaveProperty('source1'); - expect(Array.from(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].publications)).toEqual(['PMID:1', 'PMID:2']); - expect(g.edges['95fe2a8089c0d79ea093b97c5991f7ba'].attributes).toHaveProperty('relation', new Set(['relation1'])); - - expect(g.edges).toHaveProperty('9d334cb674d5671364c45cc8403184c6'); - expect(Array.from(g.edges['9d334cb674d5671364c45cc8403184c6'].apis)).toEqual(['API2']); - expect(g.edges['9d334cb674d5671364c45cc8403184c6'].sources).toHaveProperty('source2'); - expect(Array.from(g.edges['9d334cb674d5671364c45cc8403184c6'].publications)).toEqual(['PMC:1', 'PMC:2']); - expect(g.edges['9d334cb674d5671364c45cc8403184c6'].attributes).toHaveProperty('relation', new Set(['relation2'])); - - expect(g.edges).toHaveProperty('4fe2d5d3e03e0f78f272745caf6b627d'); - expect(Array.from(g.edges['4fe2d5d3e03e0f78f272745caf6b627d'].apis)).toEqual(['API3']); - expect(g.edges['4fe2d5d3e03e0f78f272745caf6b627d'].sources).toHaveProperty('source3'); - expect(Array.from(g.edges['4fe2d5d3e03e0f78f272745caf6b627d'].publications)).toEqual(['PMC:3', 'PMC:4']); - expect(g.edges['4fe2d5d3e03e0f78f272745caf6b627d'].attributes).toHaveProperty('relation', new Set(['relation3'])); - }); - - test('Multiple attributes with the same name are merged', () => { - const g = new graph(); - g.update([record3, record3a]); - - expect(g.edges).toHaveProperty('4fe2d5d3e03e0f78f272745caf6b627d'); - expect(Array.from(g.edges['4fe2d5d3e03e0f78f272745caf6b627d'].publications)).toEqual([ - 'PMC:3', - 'PMC:4', - 'PMC:6', - 'PMC:7', - ]); - expect(g.edges['4fe2d5d3e03e0f78f272745caf6b627d'].attributes).toHaveProperty( - 'relation', - new Set(['relation3', 'relation3a', 'relation3b']), - ); - }); -}); diff --git a/__test__/unittest/cacheHandler.test.ts b/__test__/unittest/cacheHandler.test.ts deleted file mode 100644 index 99f8caa2..00000000 --- a/__test__/unittest/cacheHandler.test.ts +++ /dev/null @@ -1,301 +0,0 @@ -import fs from 'fs'; -import path from 'path'; -import { Readable } from 'stream'; -import { Record, QEdge } from '@biothings-explorer/types'; -import RedisMock from 'ioredis-mock'; -import RealCacheHandler from '../../src/cache_handler'; -import MetaKG from '@biothings-explorer/smartapi-kg'; - -const qEdges = JSON.parse(fs.readFileSync(path.resolve(__dirname, '../data/qEdges.json'), { encoding: 'utf8' })); - -const records = Record.unfreezeRecords( - JSON.parse(fs.readFileSync(path.resolve(__dirname, '../data/queryRecords.json'), { encoding: 'utf8' })), -); - -describe('test cache handler', () => { - const OLD_ENV = process.env; - beforeEach(() => { - jest.resetModules(); // Most important - it clears the cache - jest.clearAllMocks(); - jest.mock('ioredis', () => RedisMock); - process.env = { ...OLD_ENV }; // Make a copy - // new RedisMock().flushall(); - }); - - afterAll((done) => { - process.env = OLD_ENV; // Restore old environment - done(); - }); - - describe("ensure caching isn't used when it shouldn't be", () => { - test("don't use cache when explicitely disabled", async () => { - process.env.REDIS_HOST = 'mocked'; - process.env.REDIS_PORT = 'mocked'; - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(false); - const categorizeEdges = jest.spyOn(CacheHandler.prototype, 'categorizeEdges'); - const _hashEdgeByMetaKG = jest.spyOn(CacheHandler.prototype, '_hashEdgeByMetaKG'); - const _groupQueryRecordsByQEdgeHash = jest.spyOn(CacheHandler.prototype, '_groupQueryRecordsByQEdgeHash'); - - expect(cacheHandler.cacheEnabled).toBeFalsy(); - - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - expect(categorizeEdges).toHaveBeenCalledTimes(1); - expect(_hashEdgeByMetaKG).toHaveBeenCalledTimes(0); - expect(cachedRecords).toHaveLength(0); - expect(nonCachedQEdges).toHaveLength(1); - expect(nonCachedQEdges).toEqual(qEdges); - - await cacheHandler.cacheEdges(records); - expect(_groupQueryRecordsByQEdgeHash).toHaveBeenCalledTimes(0); - }); - - test("don't use cache when explicitely disabled by ENV", async () => { - process.env.REDIS_HOST = 'mocked'; - process.env.REDIS_PORT = 'mocked'; - process.env.RESULT_CACHING = 'false'; - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const categorizeEdges = jest.spyOn(CacheHandler.prototype, 'categorizeEdges'); - const _hashEdgeByMetaKG = jest.spyOn(CacheHandler.prototype, '_hashEdgeByMetaKG'); - const _groupQueryRecordsByQEdgeHash = jest.spyOn(CacheHandler.prototype, '_groupQueryRecordsByQEdgeHash'); - - expect(cacheHandler.cacheEnabled).toBeFalsy(); - - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - expect(categorizeEdges).toHaveBeenCalledTimes(1); - expect(_hashEdgeByMetaKG).toHaveBeenCalledTimes(0); - expect(cachedRecords).toHaveLength(0); - expect(nonCachedQEdges).toHaveLength(1); - expect(nonCachedQEdges).toEqual(qEdges); - - await cacheHandler.cacheEdges(records); - expect(_groupQueryRecordsByQEdgeHash).toHaveBeenCalledTimes(0); - }); - - test("don't use cache when redis disabled", async () => { - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const categorizeEdges = jest.spyOn(CacheHandler.prototype, 'categorizeEdges'); - const _hashEdgeByMetaKG = jest.spyOn(CacheHandler.prototype, '_hashEdgeByMetaKG'); - const _groupQueryRecordsByQEdgeHash = jest.spyOn(CacheHandler.prototype, '_groupQueryRecordsByQEdgeHash'); - - expect(cacheHandler.cacheEnabled).toBeFalsy(); - - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - expect(categorizeEdges).toHaveBeenCalledTimes(1); - expect(_hashEdgeByMetaKG).toHaveBeenCalledTimes(0); - expect(cachedRecords).toHaveLength(0); - expect(nonCachedQEdges).toHaveLength(1); - expect(nonCachedQEdges).toEqual(qEdges); - - await cacheHandler.cacheEdges(records); - expect(_groupQueryRecordsByQEdgeHash).toHaveBeenCalledTimes(0); - }); - - test("don't use cache when redis specially disabled", async () => { - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - process.env.REDIS_HOST = 'mocked'; - process.env.REDIS_PORT = 'mocked'; - process.env.INTERNAL_DISABLE_REDIS = 'true'; - const cacheHandler = new CacheHandler(true); - const categorizeEdges = jest.spyOn(CacheHandler.prototype, 'categorizeEdges'); - const _hashEdgeByMetaKG = jest.spyOn(CacheHandler.prototype, '_hashEdgeByMetaKG'); - const _groupQueryRecordsByQEdgeHash = jest.spyOn(CacheHandler.prototype, '_groupQueryRecordsByQEdgeHash'); - - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - expect(categorizeEdges).toHaveBeenCalledTimes(1); - expect(_hashEdgeByMetaKG).toHaveBeenCalledTimes(0); - expect(cachedRecords).toHaveLength(0); - expect(nonCachedQEdges).toHaveLength(1); - expect(nonCachedQEdges).toEqual(qEdges); - - await cacheHandler.cacheEdges(records); - expect(_groupQueryRecordsByQEdgeHash).toHaveBeenCalledTimes(0); - }); - }); - - describe('test encoding/decoding', () => { - test('test encoder', async () => { - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const encoder = cacheHandler.createEncodeStream(); - - let encodedString = ''; - - await new Promise((resolve) => { - Readable.from(Record.freezeRecords(records)) - .pipe(encoder) - .on('data', async (chunk) => { - encodedString += chunk; - }) - .on('end', () => resolve()); - }); - - expect(encodedString).toBeTruthy(); - expect(encodedString.includes(',')).toBeTruthy(); - expect(encodedString.length).toBeLessThan(JSON.stringify(records).length); - }); - - test('test decoder', async () => { - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const encoder = cacheHandler.createEncodeStream(); - const decoder = cacheHandler.createDecodeStream(); - - let encodedString = ''; - - await new Promise((resolve) => { - Readable.from(Record.freezeRecords(records)) - .pipe(encoder) - .on('data', async (chunk) => { - encodedString += chunk; - }) - .on('end', () => resolve()); - }); - - const decodedObjs: unknown[] = []; - - await new Promise((resolve) => { - Readable.from(encodedString) - .pipe(decoder) - .on('data', async (obj) => { - decodedObjs.push(obj); - }) - .on('end', () => resolve()); - }); - - expect(decodedObjs).toStrictEqual(JSON.parse(JSON.stringify(Record.freezeRecords(records)))); - }); - }); - - describe('Test _hashEdgeByMetaKG', () => { - test('without metaKG', () => { - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const hash = cacheHandler._hashEdgeByMetaKG('test'); - - expect(hash).toEqual('test'); - }); - - test('with metaKG', () => { - const fakeMetaKG1 = { - ops: [ - { - association: { - smartapi: { - id: 'someFakeID1', - }, - }, - }, - { - association: { - smartapi: { - id: 'someFakeID2', - }, - }, - }, - ], - }; - const fakeMetaKG2 = { - ops: [ - { - association: { - smartapi: { - id: 'someFakeID1', - }, - }, - }, - { - association: { - smartapi: { - id: 'someFakeID3', - }, - }, - }, - ], - }; - const fakeMetaKG3 = { - ops: [ - { - association: { - smartapi: { - id: 'someFakeID1', - }, - }, - }, - ], - }; - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler1 = new CacheHandler(true, fakeMetaKG1 as MetaKG); - const hash1 = cacheHandler1._hashEdgeByMetaKG('test'); - - const cacheHandler2 = new CacheHandler(true, fakeMetaKG2 as MetaKG); - const hash2 = cacheHandler2._hashEdgeByMetaKG('test'); - - const cacheHandler3 = new CacheHandler(true, fakeMetaKG3 as MetaKG); - const hash3 = cacheHandler3._hashEdgeByMetaKG('test'); - - expect(hash1 === 'test').toBeFalsy(); - expect(hash1 === hash2).toBeFalsy(); - expect(hash1 === hash3).toBeFalsy(); - expect(hash2 === hash3).toBeFalsy(); - }); - }); - - test('_groupQueryRecordsByQEdgeHash', () => { - process.env.REDIS_HOST = 'mocked'; - process.env.REDIS_PORT = 'mocked'; - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const groups = cacheHandler._groupQueryRecordsByQEdgeHash(records); - - const numHashes = records.reduce((set, record) => { - set.add(record.qEdge.getHashedEdgeRepresentation()); - return set; - }, new Set()).size; - - expect(Object.keys(groups)).toHaveLength(numHashes); - expect( - Object.values(groups).reduce((arr, group) => { - arr = [...arr, ...group]; - return arr; - }, [] as unknown[]), - ).toHaveLength(records.length + numHashes); - }); - - test('caching and cache lookup', async () => { - process.env.REDIS_HOST = 'mocked'; - process.env.REDIS_PORT = 'mocked'; - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - - await cacheHandler.cacheEdges(records); - const qEdges: QEdge[] = Object.values( - records.reduce( - (obj, record) => { - if (!(record.qEdge.getHashedEdgeRepresentation() in obj)) { - obj[record.qEdge.getHashedEdgeRepresentation()] = record.qEdge as QEdge; - } - return obj; - }, - {} as { [qEdgeHash: string]: QEdge }, - ), - ); - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - expect(nonCachedQEdges).toHaveLength(0); - expect(cachedRecords).toHaveLength(records.length); - // TODO get each record sorted by hash to compare individually - const originalRecordHashes = records.reduce((set, record) => { - set.add(record.recordHash); - return set; - }, new Set()); - const cachedRecordHashes = cachedRecords.reduce((set, record) => { - set.add(record.recordHash); - return set; - }, new Set()); - const setsMatch = [...originalRecordHashes].every((hash) => cachedRecordHashes.has(hash)); - expect(originalRecordHashes.size).toEqual(cachedRecordHashes.size); - expect(setsMatch).toBeTruthy(); - }); -}); From 06e2cc5c70448f0195d19a2d26af34e3b2a4f7df Mon Sep 17 00:00:00 2001 From: rjawesome Date: Mon, 12 Aug 2024 11:23:49 -0700 Subject: [PATCH 14/18] fix: pnpm uses packages in topological order --- .github/workflows/test_ws_codecov.yml | 2 +- package.json | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test_ws_codecov.yml b/.github/workflows/test_ws_codecov.yml index fbf7c9d9..54c3c6a7 100644 --- a/.github/workflows/test_ws_codecov.yml +++ b/.github/workflows/test_ws_codecov.yml @@ -32,7 +32,7 @@ jobs: run: | pnpm run clone pnpm run git checkout ${{ steps.branch-name.outputs.current_branch }} - pnpm i || pnpm i + pnpm i pnpm --filter query_graph_handler test-cov - name: Send coverage report to codecov for visualization diff --git a/package.json b/package.json index 6426eca2..3dd301c8 100644 --- a/package.json +++ b/package.json @@ -53,14 +53,14 @@ "typescript": "^5.2.2" }, "dependencies": { - "@biothings-explorer/api-response-transform": "workspace:../api-response-transform", - "@biothings-explorer/call-apis": "workspace:../call-apis", - "@biothings-explorer/node-expansion": "workspace:../node-expansion", - "@biothings-explorer/smartapi-kg": "workspace:../smartapi-kg", - "@biothings-explorer/utils": "workspace:../utils", - "@biothings-explorer/types": "workspace:../types", - "biolink-model": "workspace:../biolink-model", - "biomedical_id_resolver": "workspace:../biomedical_id_resolver", + "@biothings-explorer/api-response-transform": "workspace:*", + "@biothings-explorer/call-apis": "workspace:*", + "@biothings-explorer/node-expansion": "workspace:*", + "@biothings-explorer/smartapi-kg": "workspace:*", + "@biothings-explorer/utils": "workspace:*", + "@biothings-explorer/types": "workspace:*", + "biolink-model": "workspace:*", + "biomedical_id_resolver": "workspace:*", "@sentry/node": "^7.74.1", "async": "^3.2.4", "chi-square-p-value": "^1.0.5", From 1c1e8f9f320a742d99cfd8704fe6847f8d248e9f Mon Sep 17 00:00:00 2001 From: rjawesome Date: Mon, 12 Aug 2024 14:14:36 -0700 Subject: [PATCH 15/18] remove uneeded import --- src/results_assembly/pfocr.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/results_assembly/pfocr.ts b/src/results_assembly/pfocr.ts index 222c220c..40cdfe9d 100644 --- a/src/results_assembly/pfocr.ts +++ b/src/results_assembly/pfocr.ts @@ -5,7 +5,6 @@ import { intersection } from '../utils'; import _ from 'lodash'; import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import { TrapiResult, TrapiKGNode, TrapiResponse, TrapiKGEdge } from '@biothings-explorer/types'; -import Graph from '../graph/graph'; // the minimum acceptable intersection size between the CURIEs // in a TRAPI result and in a PFOCR figure. From 8f9c3b5238a34849096d1795694bba4c0033c72d Mon Sep 17 00:00:00 2001 From: rjawesome Date: Tue, 13 Aug 2024 14:02:49 -0700 Subject: [PATCH 16/18] allow inferred mode test to run --- __test__/unittest/inferred_mode.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/__test__/unittest/inferred_mode.test.ts b/__test__/unittest/inferred_mode.test.ts index 7f452e47..a195b4de 100644 --- a/__test__/unittest/inferred_mode.test.ts +++ b/__test__/unittest/inferred_mode.test.ts @@ -202,6 +202,7 @@ describe('Test InferredQueryHandler', () => { }, }, template: 'Chem-treats-DoP.json', + qualifiers: {} }, ]; From bf42f1c23186db3675c163a4a3ffdb202132fa17 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Fri, 20 Sep 2024 10:45:42 -0700 Subject: [PATCH 17/18] minor import fixes --- src/inferred_mode/inferred_mode.ts | 1 + src/inferred_mode/pathfinder.ts | 2 +- src/inferred_mode/pf_template_generator.ts | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index 41de5b59..b82f5063 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -680,6 +680,7 @@ export default class InferredQueryHandler { qEdgeID, qEdge, combinedResponse, + auxGraphSuffixes ); successfulQueries += querySuccess; if (queryHadResults) resultQueries.push(i); diff --git a/src/inferred_mode/pathfinder.ts b/src/inferred_mode/pathfinder.ts index f366d256..4f4c90f5 100644 --- a/src/inferred_mode/pathfinder.ts +++ b/src/inferred_mode/pathfinder.ts @@ -14,7 +14,7 @@ import { scaled_sigmoid, inverse_scaled_sigmoid } from '../results_assembly/scor import { LogEntry, StampedLog, Telemetry } from '@biothings-explorer/utils'; import Debug from 'debug'; import generateTemplates from './pf_template_generator'; -import biolink from '../biolink'; +import { biolink } from '@biothings-explorer/utils'; import { removeBioLinkPrefix } from '../utils'; const debug = Debug('bte:biothings-explorer-trapi:pathfinder'); diff --git a/src/inferred_mode/pf_template_generator.ts b/src/inferred_mode/pf_template_generator.ts index dfbb5f6f..24e6f1e1 100644 --- a/src/inferred_mode/pf_template_generator.ts +++ b/src/inferred_mode/pf_template_generator.ts @@ -1,7 +1,7 @@ import path from "path"; import fs from "fs/promises"; import yaml2json from "js-yaml"; -import biolink from "../biolink"; +import { biolink } from '@biothings-explorer/utils'; import { TrapiQNode, TrapiQueryGraph } from "@biothings-explorer/types"; interface CategoryTable { From 43747d57584c329c9cd5650ef3a7ba1aa2602f40 Mon Sep 17 00:00:00 2001 From: rjawesome Date: Fri, 20 Sep 2024 17:05:52 -0700 Subject: [PATCH 18/18] handle duplicate queries --- src/batch_edge_query.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/batch_edge_query.ts b/src/batch_edge_query.ts index b4576545..e8d16213 100644 --- a/src/batch_edge_query.ts +++ b/src/batch_edge_query.ts @@ -68,9 +68,10 @@ export default class BatchEdgeQueryHandler { }); const queriesByHash = Object.fromEntries(queries.map((query) => [query.hash, query])); + const queryCount = Object.keys(queriesByHash).length; // some duplicates may have been removed const qEdge = APIEdges[0].reasoner_edge; - const message = `${queries.length} planned queries for edge ${qEdge.id}`; + const message = `${queryCount} planned queries for edge ${qEdge.id}`; debug(message); this.logs.push(new LogEntry('INFO', null, message).getLog()); let finishedCount = 0; @@ -106,7 +107,7 @@ export default class BatchEdgeQueryHandler { } finishedCount += 1; - if (finishedCount >= queries.length) { + if (finishedCount >= queryCount) { debug(`Total number of records returned for qEdge ${qEdge.id} is ${completedRecords.length}`); resolve(completedRecords); global.workerSide.off('message', listener); @@ -124,7 +125,7 @@ export default class BatchEdgeQueryHandler { threadId, type: 'subqueryRequest', value: { - queries: queries.map((query) => query.freeze()), + queries: Object.values(queriesByHash).map((query) => query.freeze()), // queriesByHash prevents duplicates options: this.options, }, } satisfies ThreadMessage);