Merge branch 'main' of https://github.com/biothings/bte_trapi_query_g…

…raph_handler into dev
biothings · Aug 19, 2024 · a497bc0 · a497bc0
2 parents 7d66134 + 5789f6c
commit a497bc0
Show file tree

Hide file tree

Showing 3 changed files with 132 additions and 63 deletions.
diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts
@@ -173,10 +173,8 @@ export default class InferredQueryHandler {
     const qualifierConstraints = (qEdge.qualifier_constraints || []).map((qualifierSetObj) => {
       return Object.fromEntries(
         qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => [
-          qualifier_type_id.replace('biolink:', ''),
-          Array.isArray(qualifier_value)
-            ? qualifier_value.map((string) => string.replace('biolink:', ''))
-            : qualifier_value.replace('biolink:', ''),
+          qualifier_type_id,
+          qualifier_value,
         ]),
       ) as CompactQualifiers;
     });
@@ -262,7 +260,7 @@ export default class InferredQueryHandler {
     qEdge: TrapiQEdge,
     combinedResponse: CombinedResponse,
     auxGraphSuffixes: { [inferredEdgeID: string]: number },
-    qualifers?: CompactQualifiers,
+    qualifiers?: CompactQualifiers,
   ): CombinedResponseReport {
     const span = Telemetry.startSpan({ description: 'creativeCombineResponse' });
     const newResponse = handler.getResponse();
@@ -331,37 +329,82 @@ export default class InferredQueryHandler {
         .join(',');
       const resultID = `${resultCreativeSubjectID}-${resultCreativeObjectID}`;
 
-      // Direct edge answers stand on their own, not as an inferred edge.
-      const boundEdgeID = Object.values(result.analyses[0].edge_bindings)[0][0].id;
-      const boundEdge = combinedResponse.message.knowledge_graph.edges[boundEdgeID];
-      const specialHandling = [
-        Object.keys(result.node_bindings).length === 2, // Direct edge
-        // Predicate matches or is descendant
-        qEdge.predicates?.some(
-          (predicate) =>
-            predicate === boundEdge.predicate ||
-            biolink.getDescendantPredicates(predicate).includes(boundEdge.predicate),
-        ) ?? false,
-        // All query qualifiers (if any) are accounted for (more is fine)
-        qEdge.qualifier_constraints?.some(({ qualifier_set }) => {
-          return qualifier_set.every((queryQualifier) =>
-            boundEdge.qualifiers?.some(
-              (qualifier) =>
-                queryQualifier.qualifier_type_id === qualifier.qualifier_type_id &&
-                queryQualifier.qualifier_value === qualifier.qualifier_value,
-            ) ?? false,
-          );
-        }) ?? false,
-      ].every((test) => test);
-      if (specialHandling) {
-        translatedResult.analyses[0].edge_bindings = { [qEdgeID]: [{ id: boundEdgeID, attributes: [] }] };
-      } else {
+      // Direct edge answers stand on their own (assuming some match criteria), not as an inferred edge.
+      // A given one-hop result may bind both matching and non-matching edges
+      const oneHop = Object.keys(result.node_bindings).length === 2;
+      const resultEdgeID = Object.keys(result.analyses[0].edge_bindings)[0]; // Only useful if direct edge
+      const nonMatchingEdges = [];
+      let useInferredEdge =
+        !oneHop ||
+        result.analyses[0].edge_bindings[resultEdgeID]
+          .map(({ id }) => {
+            // If an edge doesn't match, add it to nonMatchingEdges and return false
+            const boundEdge = combinedResponse.message.knowledge_graph.edges[id];
+            // Predicate matches or is descendant
+            const predicateMatch =
+              qEdge.predicates?.some((predicate) => {
+                const descendantMatch = biolink
+                  .getDescendantPredicates(utils.removeBioLinkPrefix(predicate))
+                  .includes(utils.removeBioLinkPrefix(boundEdge.predicate));
+                return predicate === boundEdge.predicate || descendantMatch;
+              }) ?? false;
+            // All query qualifiers (if any) are accounted for (more is fine)
+            const qualifierMatch =
+              !qEdge.qualifier_constraints ||
+              qEdge.qualifier_constraints.length === 0 ||
+              qEdge.qualifier_constraints?.some(({ qualifier_set }) => {
+                return qualifier_set.every((queryQualifier) => {
+                  return (
+                    boundEdge.qualifiers?.some((qualifier) => {
+                      const typeMatch = queryQualifier.qualifier_type_id === qualifier.qualifier_type_id;
+                      let valueMatch: boolean;
+                      try {
+                        const descendants = queryQualifier.qualifier_value.includes('biolink:')
+                          ? biolink.getDescendantPredicates(
+                              utils.removeBioLinkPrefix(queryQualifier.qualifier_value as string),
+                            )
+                          : biolink.getDescendantQualifiers(
+                              utils.removeBioLinkPrefix(queryQualifier.qualifier_value as string),
+                            );
+                        valueMatch =
+                          queryQualifier.qualifier_value === qualifier.qualifier_value ||
+                          descendants.includes(utils.removeBioLinkPrefix(qualifier.qualifier_value as string));
+                      } catch (err) {
+                        valueMatch = queryQualifier.qualifier_value === qualifier.qualifier_value;
+                      }
+                      return typeMatch && valueMatch;
+                    }) ?? false
+                  );
+                });
+              });
+            if (!(predicateMatch && qualifierMatch)) {
+              nonMatchingEdges.push(id);
+              return false;
+            }
+            if (!translatedResult.analyses[0].edge_bindings[qEdgeID]) {
+              translatedResult.analyses[0].edge_bindings[qEdgeID] = [];
+            }
+            translatedResult.analyses[0].edge_bindings[qEdgeID].push({ id, attributes: [] });
+            return true;
+          })
+          .includes(false);
+
+      // If result was one-hop and some edges didn't match, pull them out to put in an inferred edge
+      if (oneHop && nonMatchingEdges.length > 0) {
+        result.analyses[0].edge_bindings[resultEdgeID] = result.analyses[0].edge_bindings[resultEdgeID].filter(
+          ({ id }) => nonMatchingEdges.includes(id),
+        );
+      }
+      if (useInferredEdge) {
         // Create an aux graph using the result and associate it with an inferred Edge
         const inferredEdgeID = `inferred-${resultCreativeSubjectID}-${qEdge.predicates[0].replace(
           'biolink:',
           '',
         )}-${resultCreativeObjectID}`;
-        translatedResult.analyses[0].edge_bindings = { [qEdgeID]: [{ id: inferredEdgeID, attributes: [] }] };
+        if (!translatedResult.analyses[0].edge_bindings[qEdgeID]) {
+          translatedResult.analyses[0].edge_bindings[qEdgeID] = [];
+        }
+        translatedResult.analyses[0].edge_bindings[qEdgeID].push({ id: inferredEdgeID, attributes: [] });
         if (!combinedResponse.message.knowledge_graph.edges[inferredEdgeID]) {
           combinedResponse.message.knowledge_graph.edges[inferredEdgeID] = {
             subject: resultCreativeSubjectID,
@@ -387,11 +430,11 @@ export default class InferredQueryHandler {
         auxGraphSuffixes[inferredEdgeID]++;
         // Add qualifiers to edge
         if (
-          typeof qualifers == 'object' &&
-          Object.keys(qualifers).length > 0 &&
+          typeof qualifiers == 'object' &&
+          Object.keys(qualifiers).length > 0 &&
           !combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers
         ) {
-          combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers = Object.entries(qualifers).map(
+          combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers = Object.entries(qualifiers).map(
             ([qualifierType, qualifierValue]) => ({
               qualifier_type_id: qualifierType,
               qualifier_value: qualifierValue,
@@ -453,9 +496,9 @@ export default class InferredQueryHandler {
         if (typeof combinedResponse.message.results[resultID].analyses[0].score !== 'undefined') {
           combinedResponse.message.results[resultID].analyses[0].score = resScore
             ? scaled_sigmoid(
-              inverse_scaled_sigmoid(combinedResponse.message.results[resultID].analyses[0].score) +
-              inverse_scaled_sigmoid(resScore),
-            )
+                inverse_scaled_sigmoid(combinedResponse.message.results[resultID].analyses[0].score) +
+                  inverse_scaled_sigmoid(resScore),
+              )
             : combinedResponse.message.results[resultID].analyses[0].score;
         } else {
           combinedResponse.message.results[resultID].analyses[0].score = resScore;
@@ -627,9 +670,12 @@ export default class InferredQueryHandler {
           stop = true;
           const message = [
             `Addition of ${creativeLimitHit} results from Template ${i + 1}`,
-            creativeLimitHit === this.CREATIVE_LIMIT ? ' meets ' : ' exceeds ',
-            `creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${creativeLimitHit} merged). `,
-            `Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results. Skipping remaining ${subQueries.length - (i + 1)
+            Object.keys(combinedResponse.message.results).length === this.CREATIVE_LIMIT ? ' meets ' : ' exceeds ',
+            `creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${
+              Object.keys(combinedResponse.message.results).length
+            } merged). `,
+            `Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results. Skipping remaining ${
+              subQueries.length - (i + 1)
             } `,
             subQueries.length - (i + 1) === 1 ? `template.` : `templates.`,
           ].join('');
@@ -654,8 +700,9 @@ export default class InferredQueryHandler {
       const total =
         Object.values(mergedResultsCount).reduce((sum, count) => sum + count, 0) +
         Object.keys(mergedResultsCount).length;
-      const message = `Merging Summary: (${total}) inferred-template results were merged into (${Object.keys(mergedResultsCount).length
-        }) final results, reducing result count by (${total - Object.keys(mergedResultsCount).length})`;
+      const message = `Merging Summary: (${total}) inferred-template results were merged into (${
+        Object.keys(mergedResultsCount).length
+      }) final results, reducing result count by (${total - Object.keys(mergedResultsCount).length})`;
       debug(message);
       combinedResponse.logs.push(new LogEntry('INFO', null, message).getLog());
     }

diff --git a/src/inferred_mode/template_lookup.ts b/src/inferred_mode/template_lookup.ts
@@ -61,25 +61,27 @@ export async function getTemplates(lookups: TemplateLookup[], pathfinder = false
     await fs.readFile(path.resolve(__dirname, '../../data/templateGroups.json'), { encoding: 'utf8' }),
   );
   const matchingTemplatePaths: PathMatch[] = templateGroups.reduce((matches: PathMatch[], group: TemplateGroup) => {
-    let matchingQualifers: CompactQualifiers;
+    let matchingQualifiers: CompactQualifiers;
     const lookupMatch = lookups.some((lookup) => {
-      const match = (
-        (!!group.pathfinder === pathfinder) &&
+      const match =
+        !!group.pathfinder === pathfinder &&
         group.subject.includes(lookup.subject) &&
         group.object.includes(lookup.object) &&
         group.predicate.includes(lookup.predicate) &&
         Object.entries(lookup.qualifiers || {}).every(([qualifierType, qualifierValue]) => {
-          return (group.qualifiers || {})[qualifierType] && group.qualifiers[qualifierType] === qualifierValue;
-        })
-      );
-      if (match) matchingQualifers = lookup.qualifiers;
+          return (
+            (group.qualifiers || {})[qualifierType.replace('biolink:', '')] &&
+            group.qualifiers[qualifierType.replace('biolink:', '')] === qualifierValue.replace('biolink:', '')
+          );
+        });
+      if (match) matchingQualifiers = lookup.qualifiers;
       return match;
     });
 
     if (lookupMatch) {
       group.templates.forEach((template) => {
-        if (!matches.find(t => t.path === templatePaths[template])) {
-          matches.push({ path: templatePaths[template], qualifiers: matchingQualifers });
+        if (!matches.find((t) => t.path === templatePaths[template])) {
+          matches.push({ path: templatePaths[template], qualifiers: matchingQualifiers });
         }
       });
     }

diff --git a/src/results_assembly/pfocr.ts b/src/results_assembly/pfocr.ts
@@ -64,14 +64,19 @@ async function getAllByScrolling(
   hits: RawFigureResult[] = [],
 ): Promise<RawFigureResult[]> {
   queryBody.from = batchIndex;
-  const { data } = await axios.post(baseUrl, queryBody).catch((err) => {
-    debug('Error in scrolling request', err);
-    throw err;
-  });
+  let data: { hits: RawFigureResult[]; max_total: number };
+  try {
+    data = (await axios.post(baseUrl, queryBody, { timeout: 15000 })).data;
+  } catch (err) {
+    debug(`Error in scrolling request window ${batchIndex}-${batchIndex + 1000}, error is ${(err as Error).message}`);
+  }
+
+  if (data) {
+    hits.push(...data.hits);
+    debug(`Batch window ${batchIndex}-${batchIndex + 1000}: ${data.hits.length} hits retrieved for PFOCR figure data`);
+  }
 
-  hits.push(...data.hits);
-  debug(`Batch window ${batchIndex}-${batchIndex + 1000}: ${data.hits.length} hits retrieved for PFOCR figure data`);
-  if (batchIndex + 1000 < data.max_total) {
+  if (data && batchIndex + 1000 < data.max_total) {
     return await getAllByScrolling(baseUrl, queryBody, batchIndex + 1000, hits);
   } else {
     return hits;
@@ -82,7 +87,12 @@ async function getAllByScrolling(
  */
 async function getPfocrFigures(qTerms: Set<string>): Promise<DeDupedFigureResult[]> {
   debug(`Getting PFOCR figure data`);
-  const url = 'https://biothings.ncats.io/pfocr/query';
+  const url = {
+    dev: 'https://biothings.ci.transltr.io/pfocr/query',
+    ci: 'https://biothings.ci.transltr.io/pfocr/query',
+    test: 'https://biothings.test.transltr.io/pfocr/query',
+    prod: 'https://biothings.ncats.io/pfocr/query',
+  }[process.env.INSTANCE_ENV ?? 'prod'];
   /*
    * We can now POST using minimum_should_match to bypass most set logic on our side
    * detailed here: https://github.com/biothings/pending.api/issues/88
@@ -210,10 +220,20 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse
     return logs;
   }
 
-  const figures = await getPfocrFigures(curieCombos).catch((err) => {
-    debug('Error getting PFOCR figures (enrichTrapiResultsWithPfocrFigures)', err);
-    throw err;
-  });
+  let figures: DeDupedFigureResult[];
+  try {
+    figures = await getPfocrFigures(curieCombos);
+  } catch (err) {
+    debug('Error getting PFOCR figures (enrichTrapiResultsWithPfocrFigures)', (err as Error).message);
+    logs.push(
+      new LogEntry(
+        'WARNING',
+        null,
+        `Error getting PFOCR figures, results will not be enriched. The error is ${err.message}`,
+      ).getLog(),
+    );
+  }
+  if (!figures) return logs;
 
   debug(`${figures.length} PFOCR figures match at least ${MATCH_COUNT_MIN} nodes from any TRAPI result`);