diff --git a/src/edge_manager.ts b/src/edge_manager.ts index dabacb86..92a7a74b 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -10,7 +10,7 @@ import QEdge from './query_edge'; import MetaKG from '@biothings-explorer/smartapi-kg'; import { QueryHandlerOptions } from '@biothings-explorer/types'; import { Record } from '@biothings-explorer/api-response-transform'; -import { UnavailableAPITracker } from './types'; +import { SubclassEdges, UnavailableAPITracker } from './types'; import { RecordsByQEdgeID } from './results_assembly/query_results'; import path from 'path'; import { promises as fs } from 'fs'; @@ -22,7 +22,8 @@ export default class QueryEdgeManager { private _records: Record[]; options: QueryHandlerOptions; private _organizedRecords: RecordsByQEdgeID; - constructor(edges: QEdge[], metaKG: MetaKG, options: QueryHandlerOptions) { + private _subclassEdges: SubclassEdges; + constructor(edges: QEdge[], metaKG: MetaKG, subclassEdges: SubclassEdges, options: QueryHandlerOptions) { // flatten list of all edges available this._qEdges = _.flatten(edges); this._metaKG = metaKG; @@ -31,6 +32,7 @@ export default class QueryEdgeManager { //organized by edge with refs to connected edges this._organizedRecords = {}; this.options = options; + this._subclassEdges = subclassEdges; this.init(); } @@ -203,8 +205,22 @@ export default class QueryEdgeManager { records.forEach((record) => { // check against original, primaryID, and equivalent ids - const subjectIDs = [record.subject.original, record.subject.curie, ...record.subject.equivalentCuries]; - const objectIDs = [record.object.original, record.object.curie, ...record.object.equivalentCuries]; + let subjectIDs = [record.subject.original, record.subject.curie, ...record.subject.equivalentCuries]; + let objectIDs = [record.object.original, record.object.curie, ...record.object.equivalentCuries]; + + // check if IDs will be resolved to a parent + subjectIDs = [...subjectIDs, ...subjectIDs.reduce((set, subjectID) => { + Object.entries(this._subclassEdges[subjectID] ?? {}).forEach(([id, qNodes]) => { + if (qNodes.includes(qEdge.reverse ? qEdge.object.id : qEdge.subject.id)) set.add(id); + }); + return set; + }, new Set())]; + objectIDs = [...objectIDs, ...objectIDs.reduce((set, objectID) => { + Object.entries(this._subclassEdges[objectID] ?? {}).forEach(([id, qNodes]) => { + if (qNodes.includes(qEdge.reverse ? qEdge.subject.id : qEdge.object.id)) set.add(id); + }); + return set; + }, new Set())]; // there must be at least a minimal intersection const subjectMatch = subjectIDs.some((curie) => execSubjectCuries.includes(curie)); diff --git a/src/index.ts b/src/index.ts index 09df0518..d2b5300f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -31,6 +31,7 @@ import BTEGraph from './graph/graph'; import QEdge from './query_edge'; import { Telemetry } from '@biothings-explorer/utils'; import { enrichTrapiResultsWithPfocrFigures } from './results_assembly/pfocr'; +import { SubclassEdges } from './types'; // Exports for external availability export * from './types'; @@ -46,7 +47,7 @@ export default class TRAPIQueryHandler { includeReasoner: boolean; path: string; predicatePath: string; - subclassEdges: { [expandedID: string]: string }; + subclassEdges: SubclassEdges; originalQueryGraph: TrapiQueryGraph; bteGraph: BTEGraph; knowledgeGraph: KnowledgeGraph; @@ -169,14 +170,12 @@ export default class TRAPIQueryHandler { }); // Create subclass edges for nodes that were expanded - const nodesToRebind: { [nodeID: string]: { newNode: string; subclassEdgeID: string } } = {}; + const nodesToRebind: { [nodeID: string]: { [qEdgeID: string]: { newNode: string; subclassEdgeID: string } } } = {}; Object.keys(this.bteGraph.nodes).forEach((nodeID) => { - const subclassCuries = [...(expandedIDsbyPrimaryID[nodeID] ?? [])]?.map((expandedID) => [ - this.subclassEdges[expandedID], - expandedID, - ]); + const subclassCuries = []; + expandedIDsbyPrimaryID[nodeID]?.forEach((expandedID) => Object.keys(this.subclassEdges[expandedID]).forEach((parentID) => subclassCuries.push({ original: parentID, expanded: expandedID }))); if (!subclassCuries.length) return; // Nothing to rebind - subclassCuries.forEach(([original, expanded]) => { + subclassCuries.forEach(({original, expanded}) => { const subject = nodeID; const object = primaryIDsByOriginalID[original]; // Don't keep self-subclass @@ -203,77 +202,101 @@ export default class TRAPIQueryHandler { }, ]); this.bteGraph.edges[subclassEdgeID] = subclassEdge; - nodesToRebind[subject] = { newNode: object, subclassEdgeID }; + if (!nodesToRebind[subject]) nodesToRebind[subject] = {}; + this.subclassEdges[expanded][original].forEach((qNodeID) => nodesToRebind[subject][qNodeID] = { newNode: object, subclassEdgeID }); }); }); // Create new constructed edges and aux graphs for edges that used subclass edges let auxGraphs: { [supportGraphID: string]: TrapiAuxiliaryGraph } = {}; - const edgesToRebind = {}; + const edgesToRebind: { [edgeID: string]: { [originalSubject: string]: { [originalObject: string]: string /* re-bound edge ID */ } } } = {}; const edgesIDsByAuxGraphID = {}; Object.entries(this.bteGraph.edges).forEach(([edgeID, bteEdge]) => { if (edgeID.includes('expanded')) return; - const supportGraph = [edgeID]; - const [subject, object] = [bteEdge.subject, bteEdge.object].map((edgeNodeID) => { - if (!nodesToRebind[edgeNodeID]) { - return edgeNodeID; // nothing to rebind + const combos: {subject: string, object: string, supportGraph: string[]}[] = []; + const subjectToSupportGraphs: {[sbj: string]: Set} = { + [bteEdge.subject]: new Set(), + ...Object.values(nodesToRebind[bteEdge.subject] ?? {}).reduce((acc, x) => { + x.newNode in acc ? acc[x.newNode].add(x.subclassEdgeID) : acc[x.newNode] = new Set([x.subclassEdgeID]) + return acc; + }, {}) + }; + const objectToSupportGraphs: {[obj: string]: Set} = { + [bteEdge.object]: new Set(), + ...Object.values(nodesToRebind[bteEdge.object] ?? {}).reduce((acc, x) => { + x.newNode in acc ? acc[x.newNode].add(x.subclassEdgeID) : acc[x.newNode] = new Set([x.subclassEdgeID]); + return acc; + }, {}) + }; + for (const subject in subjectToSupportGraphs) { + for (const object in objectToSupportGraphs) { + if (subject == bteEdge.subject && object == bteEdge.object) continue; // no nodes are rebound + combos.push({ subject, object, supportGraph: [...subjectToSupportGraphs[subject], ...objectToSupportGraphs[object], edgeID] }); } - supportGraph.push(nodesToRebind[edgeNodeID].subclassEdgeID); - return nodesToRebind[edgeNodeID].newNode; - }); - - if (supportGraph.length === 1) return; // no subclasses - const boundEdgeID = `${subject}-${bteEdge.predicate.replace('biolink:', '')}-${object}-via_subclass`; - let suffix = 0; - while (Object.keys(auxGraphs).includes(`support${suffix}-${boundEdgeID}`)) { - suffix += 1; - } - const supportGraphID = `support${suffix}-${boundEdgeID}`; - auxGraphs[supportGraphID] = { edges: supportGraph, attributes: [] }; - if (!edgesIDsByAuxGraphID[supportGraphID]) { - edgesIDsByAuxGraphID[supportGraphID] = new Set(); } - edgesIDsByAuxGraphID[supportGraphID].add(boundEdgeID); - if (!this.bteGraph.edges[boundEdgeID]) { - const boundEdge = new KGEdge(boundEdgeID, { - predicate: bteEdge.predicate, - subject: subject, - object: object, - }); - boundEdge.addAdditionalAttributes('biolink:support_graphs', [supportGraphID]); - boundEdge.addAdditionalAttributes('biolink:knowledge_level', 'logical_entailment') - boundEdge.addAdditionalAttributes('biolink:agent_type', 'automated_agent') - boundEdge.addSource([ - { - resource_id: this.options.provenanceUsesServiceProvider - ? 'infores:service-provider-trapi' - : 'infores:biothings-explorer', - resource_role: 'primary_knowledge_source', - }, - ]); - this.bteGraph.edges[boundEdgeID] = boundEdge; - } else { - (this.bteGraph.edges[boundEdgeID].attributes['biolink:support_graphs'] as Set).add(supportGraphID); - } - edgesToRebind[edgeID] = boundEdgeID; + + combos.forEach(({subject, object, supportGraph}) => { + const boundEdgeID = `${subject}-${bteEdge.predicate.replace('biolink:', '')}-${object}-via_subclass`; + let suffix = 0; + while (Object.keys(auxGraphs).includes(`support${suffix}-${boundEdgeID}`)) { + suffix += 1; + } + const supportGraphID = `support${suffix}-${boundEdgeID}`; + auxGraphs[supportGraphID] = { edges: supportGraph, attributes: [] }; + if (!edgesIDsByAuxGraphID[supportGraphID]) { + edgesIDsByAuxGraphID[supportGraphID] = new Set(); + } + edgesIDsByAuxGraphID[supportGraphID].add(boundEdgeID); + if (!this.bteGraph.edges[boundEdgeID]) { + const boundEdge = new KGEdge(boundEdgeID, { + predicate: bteEdge.predicate, + subject: subject, + object: object, + }); + boundEdge.addAdditionalAttributes('biolink:support_graphs', [supportGraphID]); + boundEdge.addAdditionalAttributes('biolink:knowledge_level', 'logical_entailment') + boundEdge.addAdditionalAttributes('biolink:agent_type', 'automated_agent') + boundEdge.addSource([ + { + resource_id: this.options.provenanceUsesServiceProvider + ? 'infores:service-provider-trapi' + : 'infores:biothings-explorer', + resource_role: 'primary_knowledge_source', + }, + ]); + this.bteGraph.edges[boundEdgeID] = boundEdge; + } else { + (this.bteGraph.edges[boundEdgeID].attributes['biolink:support_graphs'] as Set).add(supportGraphID); + } + if (!edgesToRebind[edgeID]) edgesToRebind[edgeID] = {}; + if (!edgesToRebind[edgeID][subject]) edgesToRebind[edgeID][subject] = {}; + edgesToRebind[edgeID][subject][object] = boundEdgeID; + }) }); const resultBoundEdgesWithAuxGraphs = new Set(); const fixedResults = this.trapiResultsAssembler.getResults().map((result) => { - result.node_bindings = Object.fromEntries( - Object.entries(result.node_bindings).map(([qNodeID, bindings]) => { + result.analyses[0].edge_bindings = Object.fromEntries( + Object.entries(result.analyses[0].edge_bindings).map(([qEdgeID, bindings]) => { + const subQNode = this.queryGraph.edges[qEdgeID].subject; + const objQNode = this.queryGraph.edges[qEdgeID].object; return [ - qNodeID, + qEdgeID, bindings.reduce( ({ boundIDs, newBindings }, binding) => { - if (!nodesToRebind[binding.id]) { + const originalSub = this.bteGraph.edges[binding.id].subject; + const originalObj = this.bteGraph.edges[binding.id].object; + const subId = nodesToRebind[originalSub]?.[subQNode]?.newNode ?? originalSub; + const objId = nodesToRebind[originalObj]?.[objQNode]?.newNode ?? originalObj; + if (!edgesToRebind[binding.id]?.[subId]?.[objId]) { if (!boundIDs.has(binding.id)) { newBindings.push(binding); boundIDs.add(binding.id); } - } else if (!boundIDs.has(nodesToRebind[binding.id].newNode)) { - newBindings.push({ id: nodesToRebind[binding.id].newNode, attributes: [] }); - boundIDs.add(nodesToRebind[binding.id].newNode); + } else if (!boundIDs.has(edgesToRebind[binding.id]?.[subId]?.[objId])) { + newBindings.push({ id: edgesToRebind[binding.id]?.[subId]?.[objId], attributes: [] }); + boundIDs.add(edgesToRebind[binding.id]?.[subId]?.[objId]); + resultBoundEdgesWithAuxGraphs.add(edgesToRebind[binding.id]?.[subId]?.[objId]); } return { boundIDs, newBindings }; }, @@ -282,21 +305,21 @@ export default class TRAPIQueryHandler { ]; }), ); - result.analyses[0].edge_bindings = Object.fromEntries( - Object.entries(result.analyses[0].edge_bindings).map(([qEdgeID, bindings]) => { + + result.node_bindings = Object.fromEntries( + Object.entries(result.node_bindings).map(([qNodeID, bindings]) => { return [ - qEdgeID, + qNodeID, bindings.reduce( ({ boundIDs, newBindings }, binding) => { - if (!edgesToRebind[binding.id]) { + if (!nodesToRebind[binding.id]?.[qNodeID]) { if (!boundIDs.has(binding.id)) { newBindings.push(binding); boundIDs.add(binding.id); } - } else if (!boundIDs.has(edgesToRebind[binding.id])) { - newBindings.push({ id: edgesToRebind[binding.id], attributes: [] }); - boundIDs.add(edgesToRebind[binding.id]); - resultBoundEdgesWithAuxGraphs.add(edgesToRebind[binding.id]); + } else if (!boundIDs.has(nodesToRebind[binding.id][qNodeID].newNode)) { + newBindings.push({ id: nodesToRebind[binding.id][qNodeID].newNode, attributes: [] }); + boundIDs.add(nodesToRebind[binding.id][qNodeID].newNode); } return { boundIDs, newBindings }; }, @@ -305,6 +328,7 @@ export default class TRAPIQueryHandler { ]; }), ); + return result; }); @@ -404,7 +428,9 @@ export default class TRAPIQueryHandler { Object.entries(descendantsByCurie).forEach(([curie, descendants]) => { descendants.forEach((descendant) => { if (queryGraph.nodes[nodeId].ids.includes(descendant)) return; - this.subclassEdges[descendant] = curie; + if (!this.subclassEdges[descendant]) this.subclassEdges[descendant] = {}; + if (!this.subclassEdges[descendant][curie]) this.subclassEdges[descendant][curie] = []; + this.subclassEdges[descendant][curie].push(nodeId); }); }); } @@ -441,7 +467,6 @@ export default class TRAPIQueryHandler { if (err instanceof InvalidQueryGraphError || err instanceof SRINodeNormFailure) { throw err; } else { - console.log(err.stack); throw new InvalidQueryGraphError(); } } @@ -456,7 +481,7 @@ export default class TRAPIQueryHandler { // _.cloneDeep() is resource-intensive but only runs once per query qEdges = _.cloneDeep(qEdges); - const manager = new EdgeManager(qEdges, metaKG, this.options); + const manager = new EdgeManager(qEdges, metaKG, this.subclassEdges, this.options); const qEdgesMissingOps: { [qEdgeID: string]: boolean } = {}; while (manager.getEdgesNotExecuted()) { const currentQEdge = manager.getNext(); @@ -718,7 +743,7 @@ export default class TRAPIQueryHandler { if (!(await this._edgesSupported(queryEdges, metaKG))) { return; } - const manager = new EdgeManager(queryEdges, metaKG, this.options); + const manager = new EdgeManager(queryEdges, metaKG, this.subclassEdges, this.options); const executionSuccess = await manager.executeEdges(); this.logs = [...this.logs, ...manager.logs]; diff --git a/src/types.ts b/src/types.ts index 21388766..d00065a5 100644 --- a/src/types.ts +++ b/src/types.ts @@ -7,3 +7,9 @@ export interface UnavailableAPITracker { export interface CompactQualifiers { [qualifier_type_id: string]: string; } + +export interface SubclassEdges { + [expandedID: string]: { + [parentID: string]: string[] /* QNode IDs */ + } +} \ No newline at end of file