diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index 7694b66f..c349704f 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -173,10 +173,8 @@ export default class InferredQueryHandler { const qualifierConstraints = (qEdge.qualifier_constraints || []).map((qualifierSetObj) => { return Object.fromEntries( qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => [ - qualifier_type_id.replace('biolink:', ''), - Array.isArray(qualifier_value) - ? qualifier_value.map((string) => string.replace('biolink:', '')) - : qualifier_value.replace('biolink:', ''), + qualifier_type_id, + qualifier_value, ]), ) as CompactQualifiers; }); @@ -262,7 +260,7 @@ export default class InferredQueryHandler { qEdge: TrapiQEdge, combinedResponse: CombinedResponse, auxGraphSuffixes: { [inferredEdgeID: string]: number }, - qualifers?: CompactQualifiers, + qualifiers?: CompactQualifiers, ): CombinedResponseReport { const span = Telemetry.startSpan({ description: 'creativeCombineResponse' }); const newResponse = handler.getResponse(); @@ -331,37 +329,82 @@ export default class InferredQueryHandler { .join(','); const resultID = `${resultCreativeSubjectID}-${resultCreativeObjectID}`; - // Direct edge answers stand on their own, not as an inferred edge. - const boundEdgeID = Object.values(result.analyses[0].edge_bindings)[0][0].id; - const boundEdge = combinedResponse.message.knowledge_graph.edges[boundEdgeID]; - const specialHandling = [ - Object.keys(result.node_bindings).length === 2, // Direct edge - // Predicate matches or is descendant - qEdge.predicates?.some( - (predicate) => - predicate === boundEdge.predicate || - biolink.getDescendantPredicates(predicate).includes(boundEdge.predicate), - ) ?? false, - // All query qualifiers (if any) are accounted for (more is fine) - qEdge.qualifier_constraints?.some(({ qualifier_set }) => { - return qualifier_set.every((queryQualifier) => - boundEdge.qualifiers?.some( - (qualifier) => - queryQualifier.qualifier_type_id === qualifier.qualifier_type_id && - queryQualifier.qualifier_value === qualifier.qualifier_value, - ) ?? false, - ); - }) ?? false, - ].every((test) => test); - if (specialHandling) { - translatedResult.analyses[0].edge_bindings = { [qEdgeID]: [{ id: boundEdgeID, attributes: [] }] }; - } else { + // Direct edge answers stand on their own (assuming some match criteria), not as an inferred edge. + // A given one-hop result may bind both matching and non-matching edges + const oneHop = Object.keys(result.node_bindings).length === 2; + const resultEdgeID = Object.keys(result.analyses[0].edge_bindings)[0]; // Only useful if direct edge + const nonMatchingEdges = []; + let useInferredEdge = + !oneHop || + result.analyses[0].edge_bindings[resultEdgeID] + .map(({ id }) => { + // If an edge doesn't match, add it to nonMatchingEdges and return false + const boundEdge = combinedResponse.message.knowledge_graph.edges[id]; + // Predicate matches or is descendant + const predicateMatch = + qEdge.predicates?.some((predicate) => { + const descendantMatch = biolink + .getDescendantPredicates(utils.removeBioLinkPrefix(predicate)) + .includes(utils.removeBioLinkPrefix(boundEdge.predicate)); + return predicate === boundEdge.predicate || descendantMatch; + }) ?? false; + // All query qualifiers (if any) are accounted for (more is fine) + const qualifierMatch = + !qEdge.qualifier_constraints || + qEdge.qualifier_constraints.length === 0 || + qEdge.qualifier_constraints?.some(({ qualifier_set }) => { + return qualifier_set.every((queryQualifier) => { + return ( + boundEdge.qualifiers?.some((qualifier) => { + const typeMatch = queryQualifier.qualifier_type_id === qualifier.qualifier_type_id; + let valueMatch: boolean; + try { + const descendants = queryQualifier.qualifier_value.includes('biolink:') + ? biolink.getDescendantPredicates( + utils.removeBioLinkPrefix(queryQualifier.qualifier_value as string), + ) + : biolink.getDescendantQualifiers( + utils.removeBioLinkPrefix(queryQualifier.qualifier_value as string), + ); + valueMatch = + queryQualifier.qualifier_value === qualifier.qualifier_value || + descendants.includes(utils.removeBioLinkPrefix(qualifier.qualifier_value as string)); + } catch (err) { + valueMatch = queryQualifier.qualifier_value === qualifier.qualifier_value; + } + return typeMatch && valueMatch; + }) ?? false + ); + }); + }); + if (!(predicateMatch && qualifierMatch)) { + nonMatchingEdges.push(id); + return false; + } + if (!translatedResult.analyses[0].edge_bindings[qEdgeID]) { + translatedResult.analyses[0].edge_bindings[qEdgeID] = []; + } + translatedResult.analyses[0].edge_bindings[qEdgeID].push({ id, attributes: [] }); + return true; + }) + .includes(false); + + // If result was one-hop and some edges didn't match, pull them out to put in an inferred edge + if (oneHop && nonMatchingEdges.length > 0) { + result.analyses[0].edge_bindings[resultEdgeID] = result.analyses[0].edge_bindings[resultEdgeID].filter( + ({ id }) => nonMatchingEdges.includes(id), + ); + } + if (useInferredEdge) { // Create an aux graph using the result and associate it with an inferred Edge const inferredEdgeID = `inferred-${resultCreativeSubjectID}-${qEdge.predicates[0].replace( 'biolink:', '', )}-${resultCreativeObjectID}`; - translatedResult.analyses[0].edge_bindings = { [qEdgeID]: [{ id: inferredEdgeID, attributes: [] }] }; + if (!translatedResult.analyses[0].edge_bindings[qEdgeID]) { + translatedResult.analyses[0].edge_bindings[qEdgeID] = []; + } + translatedResult.analyses[0].edge_bindings[qEdgeID].push({ id: inferredEdgeID, attributes: [] }); if (!combinedResponse.message.knowledge_graph.edges[inferredEdgeID]) { combinedResponse.message.knowledge_graph.edges[inferredEdgeID] = { subject: resultCreativeSubjectID, @@ -387,11 +430,11 @@ export default class InferredQueryHandler { auxGraphSuffixes[inferredEdgeID]++; // Add qualifiers to edge if ( - typeof qualifers == 'object' && - Object.keys(qualifers).length > 0 && + typeof qualifiers == 'object' && + Object.keys(qualifiers).length > 0 && !combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers ) { - combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers = Object.entries(qualifers).map( + combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers = Object.entries(qualifiers).map( ([qualifierType, qualifierValue]) => ({ qualifier_type_id: qualifierType, qualifier_value: qualifierValue, @@ -453,9 +496,9 @@ export default class InferredQueryHandler { if (typeof combinedResponse.message.results[resultID].analyses[0].score !== 'undefined') { combinedResponse.message.results[resultID].analyses[0].score = resScore ? scaled_sigmoid( - inverse_scaled_sigmoid(combinedResponse.message.results[resultID].analyses[0].score) + - inverse_scaled_sigmoid(resScore), - ) + inverse_scaled_sigmoid(combinedResponse.message.results[resultID].analyses[0].score) + + inverse_scaled_sigmoid(resScore), + ) : combinedResponse.message.results[resultID].analyses[0].score; } else { combinedResponse.message.results[resultID].analyses[0].score = resScore; @@ -627,9 +670,12 @@ export default class InferredQueryHandler { stop = true; const message = [ `Addition of ${creativeLimitHit} results from Template ${i + 1}`, - creativeLimitHit === this.CREATIVE_LIMIT ? ' meets ' : ' exceeds ', - `creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${creativeLimitHit} merged). `, - `Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results. Skipping remaining ${subQueries.length - (i + 1) + Object.keys(combinedResponse.message.results).length === this.CREATIVE_LIMIT ? ' meets ' : ' exceeds ', + `creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${ + Object.keys(combinedResponse.message.results).length + } merged). `, + `Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results. Skipping remaining ${ + subQueries.length - (i + 1) } `, subQueries.length - (i + 1) === 1 ? `template.` : `templates.`, ].join(''); @@ -654,8 +700,9 @@ export default class InferredQueryHandler { const total = Object.values(mergedResultsCount).reduce((sum, count) => sum + count, 0) + Object.keys(mergedResultsCount).length; - const message = `Merging Summary: (${total}) inferred-template results were merged into (${Object.keys(mergedResultsCount).length - }) final results, reducing result count by (${total - Object.keys(mergedResultsCount).length})`; + const message = `Merging Summary: (${total}) inferred-template results were merged into (${ + Object.keys(mergedResultsCount).length + }) final results, reducing result count by (${total - Object.keys(mergedResultsCount).length})`; debug(message); combinedResponse.logs.push(new LogEntry('INFO', null, message).getLog()); } diff --git a/src/inferred_mode/template_lookup.ts b/src/inferred_mode/template_lookup.ts index 929e8971..dbd2f19a 100644 --- a/src/inferred_mode/template_lookup.ts +++ b/src/inferred_mode/template_lookup.ts @@ -61,25 +61,27 @@ export async function getTemplates(lookups: TemplateLookup[], pathfinder = false await fs.readFile(path.resolve(__dirname, '../../data/templateGroups.json'), { encoding: 'utf8' }), ); const matchingTemplatePaths: PathMatch[] = templateGroups.reduce((matches: PathMatch[], group: TemplateGroup) => { - let matchingQualifers: CompactQualifiers; + let matchingQualifiers: CompactQualifiers; const lookupMatch = lookups.some((lookup) => { - const match = ( - (!!group.pathfinder === pathfinder) && + const match = + !!group.pathfinder === pathfinder && group.subject.includes(lookup.subject) && group.object.includes(lookup.object) && group.predicate.includes(lookup.predicate) && Object.entries(lookup.qualifiers || {}).every(([qualifierType, qualifierValue]) => { - return (group.qualifiers || {})[qualifierType] && group.qualifiers[qualifierType] === qualifierValue; - }) - ); - if (match) matchingQualifers = lookup.qualifiers; + return ( + (group.qualifiers || {})[qualifierType.replace('biolink:', '')] && + group.qualifiers[qualifierType.replace('biolink:', '')] === qualifierValue.replace('biolink:', '') + ); + }); + if (match) matchingQualifiers = lookup.qualifiers; return match; }); if (lookupMatch) { group.templates.forEach((template) => { - if (!matches.find(t => t.path === templatePaths[template])) { - matches.push({ path: templatePaths[template], qualifiers: matchingQualifers }); + if (!matches.find((t) => t.path === templatePaths[template])) { + matches.push({ path: templatePaths[template], qualifiers: matchingQualifiers }); } }); } diff --git a/src/results_assembly/pfocr.ts b/src/results_assembly/pfocr.ts index c122adb6..222c220c 100644 --- a/src/results_assembly/pfocr.ts +++ b/src/results_assembly/pfocr.ts @@ -64,14 +64,19 @@ async function getAllByScrolling( hits: RawFigureResult[] = [], ): Promise { queryBody.from = batchIndex; - const { data } = await axios.post(baseUrl, queryBody).catch((err) => { - debug('Error in scrolling request', err); - throw err; - }); + let data: { hits: RawFigureResult[]; max_total: number }; + try { + data = (await axios.post(baseUrl, queryBody, { timeout: 15000 })).data; + } catch (err) { + debug(`Error in scrolling request window ${batchIndex}-${batchIndex + 1000}, error is ${(err as Error).message}`); + } + + if (data) { + hits.push(...data.hits); + debug(`Batch window ${batchIndex}-${batchIndex + 1000}: ${data.hits.length} hits retrieved for PFOCR figure data`); + } - hits.push(...data.hits); - debug(`Batch window ${batchIndex}-${batchIndex + 1000}: ${data.hits.length} hits retrieved for PFOCR figure data`); - if (batchIndex + 1000 < data.max_total) { + if (data && batchIndex + 1000 < data.max_total) { return await getAllByScrolling(baseUrl, queryBody, batchIndex + 1000, hits); } else { return hits; @@ -82,7 +87,12 @@ async function getAllByScrolling( */ async function getPfocrFigures(qTerms: Set): Promise { debug(`Getting PFOCR figure data`); - const url = 'https://biothings.ncats.io/pfocr/query'; + const url = { + dev: 'https://biothings.ci.transltr.io/pfocr/query', + ci: 'https://biothings.ci.transltr.io/pfocr/query', + test: 'https://biothings.test.transltr.io/pfocr/query', + prod: 'https://biothings.ncats.io/pfocr/query', + }[process.env.INSTANCE_ENV ?? 'prod']; /* * We can now POST using minimum_should_match to bypass most set logic on our side * detailed here: https://github.com/biothings/pending.api/issues/88 @@ -210,10 +220,20 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse return logs; } - const figures = await getPfocrFigures(curieCombos).catch((err) => { - debug('Error getting PFOCR figures (enrichTrapiResultsWithPfocrFigures)', err); - throw err; - }); + let figures: DeDupedFigureResult[]; + try { + figures = await getPfocrFigures(curieCombos); + } catch (err) { + debug('Error getting PFOCR figures (enrichTrapiResultsWithPfocrFigures)', (err as Error).message); + logs.push( + new LogEntry( + 'WARNING', + null, + `Error getting PFOCR figures, results will not be enriched. The error is ${err.message}`, + ).getLog(), + ); + } + if (!figures) return logs; debug(`${figures.length} PFOCR figures match at least ${MATCH_COUNT_MIN} nodes from any TRAPI result`);