Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
tokebe committed Aug 19, 2024
2 parents 7d66134 + 5789f6c commit a497bc0
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 63 deletions.
131 changes: 89 additions & 42 deletions src/inferred_mode/inferred_mode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,8 @@ export default class InferredQueryHandler {
const qualifierConstraints = (qEdge.qualifier_constraints || []).map((qualifierSetObj) => {
return Object.fromEntries(
qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => [
qualifier_type_id.replace('biolink:', ''),
Array.isArray(qualifier_value)
? qualifier_value.map((string) => string.replace('biolink:', ''))
: qualifier_value.replace('biolink:', ''),
qualifier_type_id,
qualifier_value,
]),
) as CompactQualifiers;
});
Expand Down Expand Up @@ -262,7 +260,7 @@ export default class InferredQueryHandler {
qEdge: TrapiQEdge,
combinedResponse: CombinedResponse,
auxGraphSuffixes: { [inferredEdgeID: string]: number },
qualifers?: CompactQualifiers,
qualifiers?: CompactQualifiers,
): CombinedResponseReport {
const span = Telemetry.startSpan({ description: 'creativeCombineResponse' });
const newResponse = handler.getResponse();
Expand Down Expand Up @@ -331,37 +329,82 @@ export default class InferredQueryHandler {
.join(',');
const resultID = `${resultCreativeSubjectID}-${resultCreativeObjectID}`;

// Direct edge answers stand on their own, not as an inferred edge.
const boundEdgeID = Object.values(result.analyses[0].edge_bindings)[0][0].id;
const boundEdge = combinedResponse.message.knowledge_graph.edges[boundEdgeID];
const specialHandling = [
Object.keys(result.node_bindings).length === 2, // Direct edge
// Predicate matches or is descendant
qEdge.predicates?.some(
(predicate) =>
predicate === boundEdge.predicate ||
biolink.getDescendantPredicates(predicate).includes(boundEdge.predicate),
) ?? false,
// All query qualifiers (if any) are accounted for (more is fine)
qEdge.qualifier_constraints?.some(({ qualifier_set }) => {
return qualifier_set.every((queryQualifier) =>
boundEdge.qualifiers?.some(
(qualifier) =>
queryQualifier.qualifier_type_id === qualifier.qualifier_type_id &&
queryQualifier.qualifier_value === qualifier.qualifier_value,
) ?? false,
);
}) ?? false,
].every((test) => test);
if (specialHandling) {
translatedResult.analyses[0].edge_bindings = { [qEdgeID]: [{ id: boundEdgeID, attributes: [] }] };
} else {
// Direct edge answers stand on their own (assuming some match criteria), not as an inferred edge.
// A given one-hop result may bind both matching and non-matching edges
const oneHop = Object.keys(result.node_bindings).length === 2;
const resultEdgeID = Object.keys(result.analyses[0].edge_bindings)[0]; // Only useful if direct edge
const nonMatchingEdges = [];
let useInferredEdge =
!oneHop ||
result.analyses[0].edge_bindings[resultEdgeID]
.map(({ id }) => {
// If an edge doesn't match, add it to nonMatchingEdges and return false
const boundEdge = combinedResponse.message.knowledge_graph.edges[id];
// Predicate matches or is descendant
const predicateMatch =
qEdge.predicates?.some((predicate) => {
const descendantMatch = biolink
.getDescendantPredicates(utils.removeBioLinkPrefix(predicate))
.includes(utils.removeBioLinkPrefix(boundEdge.predicate));
return predicate === boundEdge.predicate || descendantMatch;
}) ?? false;
// All query qualifiers (if any) are accounted for (more is fine)
const qualifierMatch =
!qEdge.qualifier_constraints ||
qEdge.qualifier_constraints.length === 0 ||
qEdge.qualifier_constraints?.some(({ qualifier_set }) => {
return qualifier_set.every((queryQualifier) => {
return (
boundEdge.qualifiers?.some((qualifier) => {
const typeMatch = queryQualifier.qualifier_type_id === qualifier.qualifier_type_id;
let valueMatch: boolean;
try {
const descendants = queryQualifier.qualifier_value.includes('biolink:')
? biolink.getDescendantPredicates(
utils.removeBioLinkPrefix(queryQualifier.qualifier_value as string),
)
: biolink.getDescendantQualifiers(
utils.removeBioLinkPrefix(queryQualifier.qualifier_value as string),
);
valueMatch =
queryQualifier.qualifier_value === qualifier.qualifier_value ||
descendants.includes(utils.removeBioLinkPrefix(qualifier.qualifier_value as string));
} catch (err) {
valueMatch = queryQualifier.qualifier_value === qualifier.qualifier_value;
}
return typeMatch && valueMatch;
}) ?? false
);
});
});
if (!(predicateMatch && qualifierMatch)) {
nonMatchingEdges.push(id);
return false;
}
if (!translatedResult.analyses[0].edge_bindings[qEdgeID]) {
translatedResult.analyses[0].edge_bindings[qEdgeID] = [];
}
translatedResult.analyses[0].edge_bindings[qEdgeID].push({ id, attributes: [] });
return true;
})
.includes(false);

// If result was one-hop and some edges didn't match, pull them out to put in an inferred edge
if (oneHop && nonMatchingEdges.length > 0) {
result.analyses[0].edge_bindings[resultEdgeID] = result.analyses[0].edge_bindings[resultEdgeID].filter(
({ id }) => nonMatchingEdges.includes(id),
);
}
if (useInferredEdge) {
// Create an aux graph using the result and associate it with an inferred Edge
const inferredEdgeID = `inferred-${resultCreativeSubjectID}-${qEdge.predicates[0].replace(
'biolink:',
'',
)}-${resultCreativeObjectID}`;
translatedResult.analyses[0].edge_bindings = { [qEdgeID]: [{ id: inferredEdgeID, attributes: [] }] };
if (!translatedResult.analyses[0].edge_bindings[qEdgeID]) {
translatedResult.analyses[0].edge_bindings[qEdgeID] = [];
}
translatedResult.analyses[0].edge_bindings[qEdgeID].push({ id: inferredEdgeID, attributes: [] });
if (!combinedResponse.message.knowledge_graph.edges[inferredEdgeID]) {
combinedResponse.message.knowledge_graph.edges[inferredEdgeID] = {
subject: resultCreativeSubjectID,
Expand All @@ -387,11 +430,11 @@ export default class InferredQueryHandler {
auxGraphSuffixes[inferredEdgeID]++;
// Add qualifiers to edge
if (
typeof qualifers == 'object' &&
Object.keys(qualifers).length > 0 &&
typeof qualifiers == 'object' &&
Object.keys(qualifiers).length > 0 &&
!combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers
) {
combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers = Object.entries(qualifers).map(
combinedResponse.message.knowledge_graph.edges[inferredEdgeID].qualifiers = Object.entries(qualifiers).map(
([qualifierType, qualifierValue]) => ({
qualifier_type_id: qualifierType,
qualifier_value: qualifierValue,
Expand Down Expand Up @@ -453,9 +496,9 @@ export default class InferredQueryHandler {
if (typeof combinedResponse.message.results[resultID].analyses[0].score !== 'undefined') {
combinedResponse.message.results[resultID].analyses[0].score = resScore
? scaled_sigmoid(
inverse_scaled_sigmoid(combinedResponse.message.results[resultID].analyses[0].score) +
inverse_scaled_sigmoid(resScore),
)
inverse_scaled_sigmoid(combinedResponse.message.results[resultID].analyses[0].score) +
inverse_scaled_sigmoid(resScore),
)
: combinedResponse.message.results[resultID].analyses[0].score;
} else {
combinedResponse.message.results[resultID].analyses[0].score = resScore;
Expand Down Expand Up @@ -627,9 +670,12 @@ export default class InferredQueryHandler {
stop = true;
const message = [
`Addition of ${creativeLimitHit} results from Template ${i + 1}`,
creativeLimitHit === this.CREATIVE_LIMIT ? ' meets ' : ' exceeds ',
`creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${creativeLimitHit} merged). `,
`Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results. Skipping remaining ${subQueries.length - (i + 1)
Object.keys(combinedResponse.message.results).length === this.CREATIVE_LIMIT ? ' meets ' : ' exceeds ',
`creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${
Object.keys(combinedResponse.message.results).length
} merged). `,
`Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results. Skipping remaining ${
subQueries.length - (i + 1)
} `,
subQueries.length - (i + 1) === 1 ? `template.` : `templates.`,
].join('');
Expand All @@ -654,8 +700,9 @@ export default class InferredQueryHandler {
const total =
Object.values(mergedResultsCount).reduce((sum, count) => sum + count, 0) +
Object.keys(mergedResultsCount).length;
const message = `Merging Summary: (${total}) inferred-template results were merged into (${Object.keys(mergedResultsCount).length
}) final results, reducing result count by (${total - Object.keys(mergedResultsCount).length})`;
const message = `Merging Summary: (${total}) inferred-template results were merged into (${
Object.keys(mergedResultsCount).length
}) final results, reducing result count by (${total - Object.keys(mergedResultsCount).length})`;
debug(message);
combinedResponse.logs.push(new LogEntry('INFO', null, message).getLog());
}
Expand Down
20 changes: 11 additions & 9 deletions src/inferred_mode/template_lookup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,25 +61,27 @@ export async function getTemplates(lookups: TemplateLookup[], pathfinder = false
await fs.readFile(path.resolve(__dirname, '../../data/templateGroups.json'), { encoding: 'utf8' }),
);
const matchingTemplatePaths: PathMatch[] = templateGroups.reduce((matches: PathMatch[], group: TemplateGroup) => {
let matchingQualifers: CompactQualifiers;
let matchingQualifiers: CompactQualifiers;
const lookupMatch = lookups.some((lookup) => {
const match = (
(!!group.pathfinder === pathfinder) &&
const match =
!!group.pathfinder === pathfinder &&
group.subject.includes(lookup.subject) &&
group.object.includes(lookup.object) &&
group.predicate.includes(lookup.predicate) &&
Object.entries(lookup.qualifiers || {}).every(([qualifierType, qualifierValue]) => {
return (group.qualifiers || {})[qualifierType] && group.qualifiers[qualifierType] === qualifierValue;
})
);
if (match) matchingQualifers = lookup.qualifiers;
return (
(group.qualifiers || {})[qualifierType.replace('biolink:', '')] &&
group.qualifiers[qualifierType.replace('biolink:', '')] === qualifierValue.replace('biolink:', '')
);
});
if (match) matchingQualifiers = lookup.qualifiers;
return match;
});

if (lookupMatch) {
group.templates.forEach((template) => {
if (!matches.find(t => t.path === templatePaths[template])) {
matches.push({ path: templatePaths[template], qualifiers: matchingQualifers });
if (!matches.find((t) => t.path === templatePaths[template])) {
matches.push({ path: templatePaths[template], qualifiers: matchingQualifiers });
}
});
}
Expand Down
44 changes: 32 additions & 12 deletions src/results_assembly/pfocr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,19 @@ async function getAllByScrolling(
hits: RawFigureResult[] = [],
): Promise<RawFigureResult[]> {
queryBody.from = batchIndex;
const { data } = await axios.post(baseUrl, queryBody).catch((err) => {
debug('Error in scrolling request', err);
throw err;
});
let data: { hits: RawFigureResult[]; max_total: number };
try {
data = (await axios.post(baseUrl, queryBody, { timeout: 15000 })).data;
} catch (err) {
debug(`Error in scrolling request window ${batchIndex}-${batchIndex + 1000}, error is ${(err as Error).message}`);
}

if (data) {
hits.push(...data.hits);
debug(`Batch window ${batchIndex}-${batchIndex + 1000}: ${data.hits.length} hits retrieved for PFOCR figure data`);
}

hits.push(...data.hits);
debug(`Batch window ${batchIndex}-${batchIndex + 1000}: ${data.hits.length} hits retrieved for PFOCR figure data`);
if (batchIndex + 1000 < data.max_total) {
if (data && batchIndex + 1000 < data.max_total) {
return await getAllByScrolling(baseUrl, queryBody, batchIndex + 1000, hits);
} else {
return hits;
Expand All @@ -82,7 +87,12 @@ async function getAllByScrolling(
*/
async function getPfocrFigures(qTerms: Set<string>): Promise<DeDupedFigureResult[]> {
debug(`Getting PFOCR figure data`);
const url = 'https://biothings.ncats.io/pfocr/query';
const url = {
dev: 'https://biothings.ci.transltr.io/pfocr/query',
ci: 'https://biothings.ci.transltr.io/pfocr/query',
test: 'https://biothings.test.transltr.io/pfocr/query',
prod: 'https://biothings.ncats.io/pfocr/query',
}[process.env.INSTANCE_ENV ?? 'prod'];
/*
* We can now POST using minimum_should_match to bypass most set logic on our side
* detailed here: https://github.com/biothings/pending.api/issues/88
Expand Down Expand Up @@ -210,10 +220,20 @@ export async function enrichTrapiResultsWithPfocrFigures(response: TrapiResponse
return logs;
}

const figures = await getPfocrFigures(curieCombos).catch((err) => {
debug('Error getting PFOCR figures (enrichTrapiResultsWithPfocrFigures)', err);
throw err;
});
let figures: DeDupedFigureResult[];
try {
figures = await getPfocrFigures(curieCombos);
} catch (err) {
debug('Error getting PFOCR figures (enrichTrapiResultsWithPfocrFigures)', (err as Error).message);
logs.push(
new LogEntry(
'WARNING',
null,
`Error getting PFOCR figures, results will not be enriched. The error is ${err.message}`,
).getLog(),
);
}
if (!figures) return logs;

debug(`${figures.length} PFOCR figures match at least ${MATCH_COUNT_MIN} nodes from any TRAPI result`);

Expand Down

0 comments on commit a497bc0

Please sign in to comment.