diff --git a/__test__/integration/KnowledgeGraph.test.ts b/__test__/integration/KnowledgeGraph.test.ts deleted file mode 100644 index 18c99a90..00000000 --- a/__test__/integration/KnowledgeGraph.test.ts +++ /dev/null @@ -1,156 +0,0 @@ -import KGEdge from '../../src/graph/kg_edge'; -import KGNode from '../../src/graph/kg_node'; -import KnowledgeGraph from '../../src/graph/knowledge_graph'; -import { TrapiAttribute } from '@biothings-explorer/types'; - -describe('Testing KnowledgeGraph Module', () => { - const nodeInput = new KGNode('PUBCHEM.COMPOUND:2662-n0', { - primaryCurie: 'PUBCHEM.COMPOUND:2662', - qNodeID: 'n0', - curies: [ - 'PUBCHEM.COMPOUND:2662', - 'CHEMBL.COMPOUND:CHEMBL118', - 'UNII:JCX84Q7J1L', - 'CHEBI:41423', - 'DRUGBANK:DB00482', - 'MESH:C105934', - 'MESH:D000068579', - 'CAS:169590-42-5', - 'CAS:184007-95-2', - 'CAS:194044-54-7', - 'DrugCentral:568', - 'GTOPDB:2892', - 'HMDB:HMDB0005014', - 'KEGG.COMPOUND:C07589', - 'INCHIKEY:RZEKVGVHFLEQIL-UHFFFAOYSA-N', - ], - names: ['Celecoxib', 'CELECOXIB', 'celecoxib', '[OBSOLETE] celecoxib'], - semanticType: ['biolink:SmallMolecule'], - label: 'Celecoxib', - }); - - const trapiEdgeInput = new KGEdge('PUBCHEM.COMPOUND:2662-biolink:activity_decreased_by-NCBIGene:771', { - predicate: 'biolink:activity_decreased_by', - subject: 'PUBCHEM.COMPOUND:2662', - object: 'NCBIGene:771', - }); - - trapiEdgeInput.addAdditionalAttributes('edge-attributes', [ - { - attribute_type_id: 'biolink:Attribute', - value: 'Ki', - value_type_id: 'EDAM:data_0006', - original_attribute_name: 'affinity_parameter', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:knowledge_source', - value: ['PHAROS_1_norm_edges.jsonl'], - value_type_id: 'EDAM:data_0006', - original_attribute_name: 'knowledge_source', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:aggregator_knowledge_source', - value: ['infores:pharos'], - value_type_id: 'biolink:InformationResource', - original_attribute_name: 'biolink:aggregator_knowledge_source', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:Attribute', - value: 7.75, - value_type_id: 'EDAM:data_0006', - original_attribute_name: 'affinity', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:publications', - value: [ - 'PMID:20605094', - 'PMID:21852133', - 'PMID:16290146', - 'PMID:23965175', - 'PMID:23965175', - 'PMID:24513184', - 'PMID:25766630', - 'PMID:23067387', - ], - value_type_id: 'EDAM:data_0006', - original_attribute_name: 'publications', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:relation', - value: 'GAMMA:ki', - value_type_id: 'EDAM:data_0006', - original_attribute_name: 'relation', - value_url: null, - attribute_source: null, - description: null, - }, - { - attribute_type_id: 'biolink:aggregator_knowledge_source', - value: 'infores:automat.pharos', - value_type_id: 'biolink:InformationResource', - original_attribute_name: 'biolink:aggregator_knowledge_source', - value_url: null, - attribute_source: null, - description: null, - }, - ]); - - describe('Testing _createNode function', () => { - test('test creating node', () => { - const kg = new KnowledgeGraph(); - const res = kg._createNode(nodeInput); - expect(res).toHaveProperty('name', 'Celecoxib'); - expect(res).toHaveProperty('categories'); - expect(res.categories[0]).toBe('biolink:SmallMolecule'); - expect(res).toHaveProperty('attributes'); - }); - }); - - describe('Testing _createAttributes function', () => { - test('test edge attributes', () => { - const kg = new KnowledgeGraph(); - const res = kg._createAttributes(trapiEdgeInput); - expect(res.length).toBeGreaterThan(0); - for (const res_obj of res) { - expect(res_obj).toHaveProperty('attribute_type_id'); - expect(res_obj).toHaveProperty('value'); - if (res_obj.attribute_type_id.includes('biolink:')) { - expect(res_obj).toHaveProperty('value_type_id'); - } - } - }); - }); - - describe('Testing _createEdge function', () => { - test('test creating edge', () => { - const kg = new KnowledgeGraph(); - const res = kg._createEdge(trapiEdgeInput); - expect(res).toHaveProperty('predicate', 'biolink:activity_decreased_by'); - expect(res).toHaveProperty('subject', 'PUBCHEM.COMPOUND:2662'); - expect(res).toHaveProperty('object', 'NCBIGene:771'); - expect(res).toHaveProperty('attributes'); - for (const res_obj of res.attributes as TrapiAttribute[]) { - expect(res_obj).toHaveProperty('attribute_type_id'); - expect(res_obj).toHaveProperty('value'); - if (res_obj.attribute_type_id.includes('biolink:')) { - expect(res_obj).toHaveProperty('value_type_id'); - } - } - }); - }); -}); diff --git a/__test__/integration/QEdge2BTEEdgeHandler.test.ts b/__test__/integration/QEdge2BTEEdgeHandler.test.ts index 9093fbce..6fa24ac6 100644 --- a/__test__/integration/QEdge2BTEEdgeHandler.test.ts +++ b/__test__/integration/QEdge2BTEEdgeHandler.test.ts @@ -1,8 +1,7 @@ jest.mock('axios'); import axios from 'axios'; -import QNode from '../../src/query_node'; -import QEdge from '../../src/query_edge'; +import { QNode, QEdge } from '@biothings-explorer/types'; import NodeUpdateHandler from '../../src/update_nodes'; describe('Testing NodeUpdateHandler Module', () => { diff --git a/__test__/integration/QueryEdge.test.ts b/__test__/integration/QueryEdge.test.ts deleted file mode 100644 index b195f01e..00000000 --- a/__test__/integration/QueryEdge.test.ts +++ /dev/null @@ -1,200 +0,0 @@ -import QNode from '../../src/query_node'; -import QEdge from '../../src/query_edge'; - -describe('Testing QueryEdge Module', () => { - const gene_node1 = new QNode({ id: 'n1', categories: ['Gene'], ids: ['NCBIGene:1017'] }); - const type_node = new QNode({ id: 'n2', categories: ['SmallMolecule'] }); - const disease1_node = new QNode({ id: 'n1', categories: ['Disease'], ids: ['MONDO:000123'] }); - const node1_equivalent_ids = { - 'NCBIGene:1017': { - db_ids: { - NCBIGene: ['1017'], - SYMBOL: ['CDK2'], - }, - }, - }; - - const gene_node2 = new QNode({ id: 'n2', categories: ['Gene'], ids: ['NCBIGene:1017', 'NCBIGene:1018'] }); - const gene_node1_with_id_annotated = new QNode({ id: 'n1', categories: ['Gene'], ids: ['NCBIGene:1017'] }); - //@ts-expect-error: partial data for specific test scope - gene_node1_with_id_annotated.setEquivalentIDs(node1_equivalent_ids); - const chemical_node1 = new QNode({ id: 'n3', categories: ['SmallMolecule'] }); - const edge1 = new QEdge({ id: 'e01', subject: gene_node1, object: chemical_node1 }); - const edge2 = new QEdge({ id: 'e02', subject: gene_node1_with_id_annotated, object: chemical_node1 }); - const edge3 = new QEdge({ id: 'e04', subject: gene_node2, object: chemical_node1 }); - const edge4 = new QEdge({ id: 'e05', object: gene_node2, subject: chemical_node1 }); - const edge5 = new QEdge({ id: 'e06', object: gene_node1_with_id_annotated, subject: chemical_node1 }); - - describe('Testing isReversed function', () => { - test('test if only the object of the edge has curie defined, should return true', () => { - const res = edge4.isReversed(); - expect(res).toBeTruthy(); - }); - - test('test if the subject of the edge has curie defined, should return false', () => { - const res = edge1.isReversed(); - expect(res).toBeFalsy(); - }); - - test('test if both subject and object curie not defined, should return false', () => { - const node1 = new QNode({ id: 'n1', categories: ['Gene'] }); - const node2 = new QNode({ id: 'n2', categories: ['SmallMolecule'] }); - const edge = new QEdge({ id: 'e01', subject: node1, object: node2 }); - expect(edge.isReversed()).toBeFalsy(); - }); - }); - - describe('Testing getInputCurie function', () => { - test('test return an array of one curie if subject has only one curie specified', () => { - const res = edge1.getInputCurie(); - expect(res).toEqual(['NCBIGene:1017']); - }); - - test('test return an array of two curie if subject has only an array of two curies specified', () => { - const res = edge3.getInputCurie(); - expect(res).toEqual(['NCBIGene:1017', 'NCBIGene:1018']); - }); - - test('test return an array of two curies if edge is reversed and object has two curies specified', () => { - const res = edge4.getInputCurie(); - expect(res).toEqual(['NCBIGene:1017', 'NCBIGene:1018']); - }); - }); - - describe('Testing hasInput function', () => { - test('test return true if subject has only one curie specified', () => { - const res = edge1.hasInput(); - expect(res).toBeTruthy(); - }); - - test('test return true if subject has only an array of two curies specified', () => { - const res = edge3.hasInput(); - expect(res).toBeTruthy(); - }); - - test('test return true if subject has no curies specified but object does', () => { - const res = edge4.hasInput(); - expect(res).toBeTruthy(); - }); - - test('test return false if both subject and object has no curies specified', () => { - const node1 = new QNode({ id: 'n1', categories: ['Gene'] }); - const node2 = new QNode({ id: 'n2', categories: ['SmallMolecule'] }); - const edge = new QEdge({ id: 'e01', subject: node1, object: node2 }); - expect(edge.hasInput()).toBeFalsy(); - }); - }); - - // Removed because new QEdge has different implementation for hasInputResolved - // describe("Testing hasInputResolved function", () => { - // test("test return true if subject has input resolved", () => { - // const res = edge2.hasInputResolved(); - // expect(res).toBeTruthy(); - // }); - - // test("test return false if both subject and object do not have input resolved", () => { - // const res = edge1.hasInputResolved(); - // expect(res).toBeFalsy(); - // }); - - // test("test return true if subject doesn't have input resolved, but object does", () => { - // const res = edge5.hasInputResolved(); - // expect(res).toBeTruthy(); - // }); - - // }) - - describe('Testing getPredicate function', () => { - test('test get reverse predicate if query is reversed', () => { - const edge = new QEdge({ id: 'e01', subject: type_node, object: disease1_node, predicates: ['biolink:treats'] }); - const res = edge.getPredicate(); - expect(res).toContain('treated_by'); - }); - - test('test get reverse predicate if query is reversed and expanded', () => { - const edge = new QEdge({ id: 'e01', subject: type_node, object: disease1_node, predicates: ['biolink:affects'] }); - const res = edge.getPredicate(); - expect(res).toContain('affected_by'); - expect(res).toContain('disrupted_by'); - }); - }); - - describe('Testing expandPredicates function', () => { - test('All predicates are correctly expanded if in biolink model', () => { - const edge = new QEdge({ - id: 'e01', - subject: type_node, - object: disease1_node, - predicates: ['biolink:contributes_to'], - }); - const res = edge.expandPredicates(['contributes_to']); - expect(res).toContain('contributes_to'); - expect(res).toContain('causes'); - }); - - test('Multiple predicates can be resolved', () => { - const edge = new QEdge({ - id: 'e01', - subject: type_node, - object: disease1_node, - predicates: ['biolink:contributes_to'], - }); - const res = edge.expandPredicates(['contributes_to', 'ameliorates']); - expect(res).toContain('contributes_to'); - expect(res).toContain('causes'); - expect(res).toContain('ameliorates'); - }); - - test('Predicates not in biolink model should return itself', () => { - const edge = new QEdge({ - id: 'e01', - subject: type_node, - object: disease1_node, - predicates: ['biolink:contributes_to'], - }); - const res = edge.expandPredicates(['contributes_to', 'amelio']); - expect(res).toContain('contributes_to'); - expect(res).toContain('causes'); - expect(res).toContain('amelio'); - }); - }); - - describe('chooseLowerEntityValue', () => { - test('Should reverse if subject has more curies', () => { - const qEdgeClone = new QEdge(edge1.freeze()); - qEdgeClone.subject.entity_count = 2; - qEdgeClone.object.entity_count = 1; - - qEdgeClone.chooseLowerEntityValue(); - - expect(qEdgeClone.isReversed()).toBeTruthy(); - }); - - test("Shouldn't reverse if object has more curies", () => { - const qEdgeClone = new QEdge(edge1.freeze()); - qEdgeClone.subject.entity_count = 1; - qEdgeClone.object.entity_count = 2; - - qEdgeClone.chooseLowerEntityValue(); - - expect(qEdgeClone.isReversed()).toBeFalsy(); - }); - - test("Shouldn't reverse if both have same number", () => { - const qEdgeClone = new QEdge(edge1.freeze()); - qEdgeClone.subject.entity_count = 2; - qEdgeClone.object.entity_count = 2; - - qEdgeClone.chooseLowerEntityValue(); - - expect(qEdgeClone.isReversed()).toBeFalsy(); - }); - }); - - test('getHashedEdgeRepresentation', () => { - const qEdge1 = new QEdge({ id: 'e01', subject: type_node, object: disease1_node, predicates: ['biolink:treats'] }); - const qEdge2 = new QEdge(qEdge1.freeze(), true); - // NOTE: recently changed from not.toEqual, because an unfrozen edge *should* equal its original? - expect(qEdge1.getHashedEdgeRepresentation()).toEqual(qEdge2.getHashedEdgeRepresentation()); - }); -}); diff --git a/__test__/integration/QueryGraphHandler.test.ts b/__test__/integration/QueryGraphHandler.test.ts deleted file mode 100644 index d4f7278c..00000000 --- a/__test__/integration/QueryGraphHandler.test.ts +++ /dev/null @@ -1,531 +0,0 @@ -jest.mock('axios'); -import axios from 'axios'; - -import QueryGraphHandler from '../../src/query_graph'; -import QNode2 from '../../src/query_node'; -import QEdge from '../../src/query_edge'; -import InvalidQueryGraphError from '../../src/exceptions/invalid_query_graph_error'; - -describe('Testing QueryGraphHandler Module', () => { - const disease_entity_node = { - categories: ['biolink:Disease'], - ids: ['MONDO:0005737'], - }; - const gene_entity_node = { - categories: ['biolink:Gene'], - ids: ['NCBIGene:1017'], - }; - const gene_class_node = { - categories: ['biolink:Gene'], - }; - const chemical_class_node = { - categories: ['biolink:SmallMolecule'], - }; - const pathway_class_node = { - categories: ['biolink:Pathways'], - }; - const phenotype_class_node = { - categories: ['biolink:Phenotype'], - }; - const OneHopQuery = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - }, - }; - - const ThreeHopExplainQuery = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - n2: chemical_class_node, - n3: gene_entity_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - e02: { - subject: 'n1', - object: 'n2', - }, - e03: { - subject: 'n2', - object: 'n3', - }, - }, - }; - - const FourHopQuery = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - n2: chemical_class_node, - n3: phenotype_class_node, - n4: pathway_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - e02: { - subject: 'n1', - object: 'n2', - }, - e03: { - subject: 'n2', - object: 'n3', - }, - e04: { - subject: 'n3', - object: 'n4', - }, - }, - }; - - const QueryWithCycle1 = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - n2: chemical_class_node, - n3: phenotype_class_node, - n4: pathway_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - e02: { - subject: 'n1', - object: 'n2', - }, - e03: { - subject: 'n2', - object: 'n3', - }, - e04: { - subject: 'n3', - object: 'n4', - }, - e05: { - subject: 'n4', - object: 'n1', - }, - }, - }; - - const QueryWithCycle2 = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - n2: chemical_class_node, - n3: phenotype_class_node, - n4: pathway_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - e02: { - subject: 'n1', - object: 'n2', - }, - e03: { - subject: 'n2', - object: 'n3', - }, - e04: { - subject: 'n3', - object: 'n4', - }, - e05: { - subject: 'n4', - object: 'n1', - }, - }, - }; - - const QueryWithDuplicateEdge1 = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - e02: { - subject: 'n1', - object: 'n0', - }, - }, - }; - - const QueryWithNullValues = { - nodes: { - n0: { - ...disease_entity_node, - categories: null, - }, - n1: { - ...gene_class_node, - ids: null, - }, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - }, - }; - - const QueryWithNullPredicate = { - nodes: { - n0: disease_entity_node, - n1: gene_class_node, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - predicate: null, - }, - }, - }; - - const QueryWithNullIds = { - nodes: { - n0: { - ...disease_entity_node, - ids: [], - }, - n1: { - ...gene_class_node, - ids: null, - }, - }, - edges: { - e01: { - subject: 'n0', - object: 'n1', - }, - }, - }; - - describe('test _storeNodes function', () => { - test('test if storeNodes with one hop query', async () => { - (axios.post as jest.Mock).mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(OneHopQuery, undefined); - //@ts-expect-error: explicitly testing private method - const nodes = await handler._storeNodes(); - expect(nodes).toHaveProperty('n0'); - expect(nodes).not.toHaveProperty('n2'); - expect(nodes.n0).toBeInstanceOf(QNode2); - }); - - test('test if storeNodes with multi hop query', async () => { - (axios.post as jest.Mock).mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(FourHopQuery, undefined); - //@ts-expect-error: explicitly testing private method - const nodes = await handler._storeNodes(); - expect(nodes).toHaveProperty('n0'); - expect(nodes).toHaveProperty('n3'); - expect(nodes.n0).toBeInstanceOf(QNode2); - expect(nodes.n3).toBeInstanceOf(QNode2); - }); - }); - - describe('test calculateEdges function', () => { - test('test storeEdges with one hop query', async () => { - (axios.post as jest.Mock).mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(OneHopQuery, undefined); - await handler.calculateEdges(); - expect(handler.edges).toHaveProperty('e01'); - expect(handler.edges).not.toHaveProperty('e02'); - expect(handler.edges.e01).toBeInstanceOf(QEdge); - expect(handler.edges.e01.getInputNode()).toBeInstanceOf(QNode2); - }); - }); - - describe('test _createQueryPaths function', () => { - test('test createQueryPaths with three hop explain query', async () => { - (axios.post as jest.Mock) - .mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }) - .mockResolvedValueOnce({ - data: { - 'NCBIGene:1017': { - id: { identifier: 'NCBIGene:1017', label: 'CDK2' }, - equivalent_identifiers: [ - { identifier: 'NCBIGene:1017', label: 'CDK2' }, - { identifier: 'ENSEMBL:ENSG00000123374' }, - { identifier: 'HGNC:1771', label: 'CDK2' }, - { identifier: 'OMIM:116953' }, - { identifier: 'UMLS:C1332733', label: 'CDK2 gene' }, - { - identifier: 'UniProtKB:A0A024RB10', - label: 'A0A024RB10_HUMAN Cyclin-dependent kinase 2, isoform CRA_a (trembl)', - }, - { - identifier: 'UniProtKB:A0A024RB77', - label: 'A0A024RB77_HUMAN Cyclin-dependent kinase 2, isoform CRA_b (trembl)', - }, - { - identifier: 'UniProtKB:B4DDL9', - label: - 'B4DDL9_HUMAN cDNA FLJ54979, highly similar to Homo sapiens cyclin-dependent kinase 2 (CDK2), transcript variant 2, mRNA (trembl)', - }, - { identifier: 'UniProtKB:E7ESI2', label: 'E7ESI2_HUMAN Cyclin-dependent kinase 2 (trembl)' }, - { identifier: 'ENSEMBL:ENSP00000393605' }, - { identifier: 'UniProtKB:G3V5T9', label: 'G3V5T9_HUMAN Cyclin-dependent kinase 2 (trembl)' }, - { identifier: 'ENSEMBL:ENSP00000452514' }, - { identifier: 'UniProtKB:P24941', label: 'CDK2_HUMAN Cyclin-dependent kinase 2 (sprot)' }, - { identifier: 'PR:P24941', label: 'cyclin-dependent kinase 2 (human)' }, - { identifier: 'UMLS:C0108855', label: 'CDK2 protein, human' }, - ], - type: [ - 'biolink:Gene', - 'biolink:GeneOrGeneProduct', - 'biolink:GenomicEntity', - 'biolink:ChemicalEntityOrGeneOrGeneProduct', - 'biolink:PhysicalEssence', - 'biolink:OntologyClass', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - 'biolink:PhysicalEssenceOrOccurrent', - 'biolink:ThingWithTaxon', - 'biolink:MacromolecularMachineMixin', - 'biolink:Protein', - 'biolink:GeneProductMixin', - 'biolink:Polypeptide', - 'biolink:ChemicalEntityOrProteinOrPolypeptide', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(ThreeHopExplainQuery, undefined); - const edges = await handler.calculateEdges(); - expect(Object.keys(edges)).toHaveLength(3); - }); - }); - describe('test cycle/duplicate edge detection for query graphs', () => { - test('Duplicate Edge Graph #1', async () => { - const handler = new QueryGraphHandler(QueryWithDuplicateEdge1, undefined); - await expect(handler.calculateEdges()).rejects.toThrow(InvalidQueryGraphError); - }); - test('Query Graph Cycle #1', async () => { - const handler = new QueryGraphHandler(QueryWithCycle1, undefined); - await expect(handler.calculateEdges()).rejects.toThrow(InvalidQueryGraphError); - }); - test('Query Graph Cycle #2', async () => { - const handler = new QueryGraphHandler(QueryWithCycle2, undefined); - await expect(handler.calculateEdges()).rejects.toThrow(InvalidQueryGraphError); - }); - }); - - describe('test chandling of null ids / categories / predicates', () => { - test('Null id/categories graph', async () => { - (axios.post as jest.Mock).mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(QueryWithNullValues, undefined); - await expect(handler.calculateEdges()).resolves.not.toThrow(); - }); - test('Null predicate graph', async () => { - (axios.post as jest.Mock).mockResolvedValueOnce({ - data: { - 'MONDO:0005737': { - id: { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - equivalent_identifiers: [ - { identifier: 'MONDO:0005737', label: 'Ebola hemorrhagic fever' }, - { identifier: 'DOID:4325', label: 'Ebola hemorrhagic fever' }, - { identifier: 'ORPHANET:319218' }, - { identifier: 'UMLS:C0282687', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MESH:D019142', label: 'Hemorrhagic Fever, Ebola' }, - { identifier: 'MEDDRA:10014071' }, - { identifier: 'MEDDRA:10014072' }, - { identifier: 'MEDDRA:10014074' }, - { identifier: 'MEDDRA:10055245' }, - { identifier: 'NCIT:C36171', label: 'Ebola Hemorrhagic Fever' }, - { identifier: 'SNOMEDCT:37109004' }, - { identifier: 'ICD10:A98.4' }, - ], - type: [ - 'biolink:Disease', - 'biolink:DiseaseOrPhenotypicFeature', - 'biolink:ThingWithTaxon', - 'biolink:BiologicalEntity', - 'biolink:NamedThing', - 'biolink:Entity', - ], - information_content: 100, - }, - }, - }); - const handler = new QueryGraphHandler(QueryWithNullPredicate, undefined); - const edges = await handler.calculateEdges(); - // if this is undefined (not null) then smartapi-kg treats as if the field doesn't exist (desired behavior) - expect(edges[0].getPredicate()).toBe(undefined); - }); - test('Graph without any ids', async () => { - const handler = new QueryGraphHandler(QueryWithNullIds, undefined); - expect(handler.calculateEdges()).rejects.toThrow(InvalidQueryGraphError); - }); - }); -}); diff --git a/__test__/integration/QueryNode.test.ts b/__test__/integration/QueryNode.test.ts deleted file mode 100644 index 28ac0c13..00000000 --- a/__test__/integration/QueryNode.test.ts +++ /dev/null @@ -1,166 +0,0 @@ -import { SRIBioEntity } from 'biomedical_id_resolver/built/common/types'; -import QNode from '../../src/query_node'; - -describe('Testing QueryNode Module', () => { - const node1_equivalent_ids = { - 'NCBIGene:1017': { - primaryID: 'NCBIGene:1017', - equivalentIDs: ['NCBIGene:1017'], - label: 'CDK2', - labelAliases: ['CDK2'], - primaryTypes: ['Gene'], - semanticTypes: ['Gene'], - db_ids: { - NCBIGene: ['1017'], - SYMBOL: ['CDK2'], - }, - }, - }; - - describe('Testing hasInput function', () => { - test('test node without curies specified should return false', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - const res = gene_node.hasInput(); - expect(res).toBeFalsy(); - }); - - test('test node with curies specified should return true', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'], ids: ['NCBIGene:1017'] }); - const res = gene_node.hasInput(); - expect(res).toBeTruthy(); - }); - }); - - describe('Test hasEquivalentIDs function', () => { - test('test node with equivalent identifiers set should return true', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - gene_node.setEquivalentIDs(node1_equivalent_ids); - const res = gene_node.hasEquivalentIDs(); - expect(res).toBeTruthy(); - }); - - test('test node with equivalent identifiers not set should return false', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - const res = gene_node.hasEquivalentIDs(); - expect(res).toBeFalsy(); - }); - }); - - describe('Test getEntities', () => { - test('If equivalent ids are empty, should return an empty array', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - gene_node.equivalentIDs = {}; - expect(gene_node.getEntities()).toEqual([]); - }); - - test('If equivalent ids are not empty, should return an array of bioentities', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - gene_node.equivalentIDs = { - //@ts-expect-error: partial data for specific test - A: { - primaryID: 'a', - equivalentIDs: ['b', 'c'], - }, - //@ts-expect-error: partial data for specific test - B: { - primaryID: 'd', - equivalentIDs: ['e'], - }, - }; - expect(gene_node.getEntities()).toEqual([ - { - primaryID: 'a', - equivalentIDs: ['b', 'c'], - }, - { - primaryID: 'd', - equivalentIDs: ['e'], - }, - ]); - }); - }); - - describe('Test getPrimaryIDs', () => { - test('If equivalent ids are empty, should return an empty array', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - gene_node.equivalentIDs = {}; - expect(gene_node.getPrimaryIDs()).toEqual([]); - }); - - test('If equivalent ids are not empty, should return an array of primaryIDs', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - gene_node.equivalentIDs = { - //@ts-expect-error: partial data for specific test - A: { - primaryID: 'a', - equivalentIDs: ['b', 'c'], - }, - //@ts-expect-error: partial data for specific test - B: { - primaryID: 'd', - equivalentIDs: ['e'], - }, - }; - expect(gene_node.getPrimaryIDs()).toEqual(['a', 'd']); - }); - }); - - describe('Test updateEquivalentIDs', () => { - test('If equivalent ids does not exist, should set it with the input', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - //@ts-expect-error: partial data for specific test - gene_node.updateEquivalentIDs({ a: 'b' }); - expect(gene_node.equivalentIDs).toEqual({ a: 'b' }); - }); - - test('If equivalent ids are not empty, should update the equivalent ids', () => { - const gene_node = new QNode({ id: 'n1', categories: ['Gene'] }); - //@ts-expect-error: partial data for specific test - gene_node.equivalentIDs = { a: 'b', c: 'd' }; - //@ts-expect-error: partial data for specific test - gene_node.updateEquivalentIDs({ e: 'f' }); - expect(gene_node.getEquivalentIDs()).toEqual({ a: 'b', c: 'd', e: 'f' }); - }); - }); - - describe('Test getCategories function', () => { - test('If equivalent ids are empty, return itself and its descendants', () => { - const node = new QNode({ id: 'n1', categories: ['DiseaseOrPhenotypicFeature'] }); - expect(node.getCategories()).toContain('Disease'); - expect(node.getCategories()).toContain('PhenotypicFeature'); - expect(node.getCategories()).toContain('DiseaseOrPhenotypicFeature'); - }); - - test('If equivalent ids are empty, return itself and its descendants using NamedThing as example', () => { - const node = new QNode({ id: 'n1', categories: ['NamedThing'] }); - expect(node.getCategories()).toContain('Disease'); - expect(node.getCategories()).toContain('PhenotypicFeature'); - expect(node.getCategories()).toContain('DiseaseOrPhenotypicFeature'); - expect(node.getCategories()).toContain('Gene'); - expect(node.getCategories()).toContain('NamedThing'); - }); - - test('If equivalent ids are empty, return itself and its descendants using Gene as example', () => { - const node = new QNode({ id: 'n1', categories: ['Gene'] }); - expect(node.getCategories()).toEqual(['Gene']); - }); - - test('If equivalent ids are not empty, return all primary semantic types defined in equivalent entities', () => { - const node = new QNode({ id: 'n1', categories: ['Gene'] }); - node.setEquivalentIDs({ - //@ts-expect-error: partial data for specific test - A: { - primaryTypes: ['m', 'p'], - semanticTypes: ['m', 'n', 'p', 'q'], - }, - //@ts-expect-error: partial data for specific test - B: { - primaryTypes: ['x'], - semanticTypes: ['x', 'y'], - }, - }); - // console.log(node.getCategories()); - expect(node.getCategories()).toEqual(['Gene', 'm', 'p', 'x']); - }); - }); -}); diff --git a/__test__/integration/QueryResult.test.ts b/__test__/integration/QueryResult.test.ts index b2dce3b1..7b176330 100644 --- a/__test__/integration/QueryResult.test.ts +++ b/__test__/integration/QueryResult.test.ts @@ -1,8 +1,7 @@ import { cloneDeep, range } from 'lodash'; -import QNode from '../../src/query_node'; -import QEdge from '../../src/query_edge'; +import { QNode, QEdge } from '@biothings-explorer/types'; import QueryResult from '../../src/results_assembly/query_results'; -import { Record } from '@biothings-explorer/api-response-transform'; +import { Record } from '@biothings-explorer/types'; import { EDGE_ATTRIBUTES_USED_IN_RECORD_HASH } from '../../src/config'; describe('Testing QueryResults Module', () => { @@ -41,7 +40,6 @@ describe('Testing QueryResults Module', () => { EDGE_ATTRIBUTES_USED_IN_RECORD_HASH, { predicate: 'biolink:physically_interacts_with', - source: 'DGIdb', api_name: 'BioThings DGIDB API', "x-translator": { infores: "infores:biothings-dgidb", @@ -138,6 +136,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -242,6 +244,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -345,6 +351,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -450,6 +460,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -555,6 +569,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -659,6 +677,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -689,6 +711,10 @@ describe('Testing QueryResults Module', () => { { predicate: 'biolink:condition_associated_with_gene', api_name: 'Automat Hetio', + source: 'Automat Hetio', + "x-translator": { + infores: "infores:automat", + }, }, edge2, ); @@ -759,7 +785,7 @@ describe('Testing QueryResults Module', () => { const [source0, source1] = Array(2) .fill(0) .map((s, i) => { - return { source: `source${i}` }; + return { metaEdgeSource: `source${i}`, apiInforesCurie: `infores:source${i}` }; }); const [api0, api1] = Array(2) .fill(0) @@ -1903,6 +1929,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record0_pred0', source: 'source0', api_name: 'api0', + source: 'api0', }, // n0 subject: { @@ -1929,6 +1956,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record1_pred0', source: 'source1', api_name: 'api1', + source: 'api1', }, // n1 subject: { @@ -2011,6 +2039,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record0_pred0', source: 'source0', api_name: 'api0', + source: 'api0', }, // n0 subject: { @@ -2037,6 +2066,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record1_pred0', source: 'source1', api_name: 'api1', + source: 'api1', }, // n1 subject: { @@ -2066,6 +2096,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record0_pred0', source: 'source0', api_name: 'api0', + source: 'api0', }, // n0 subject: { @@ -2092,6 +2123,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record1_pred0', source: 'source1', api_name: 'api1', + source: 'api1', }, // n1 subject: { @@ -2173,6 +2205,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record0_pred0', source: 'source0', api_name: 'api0', + source: 'api0', }, // n0 subject: { @@ -2199,6 +2232,7 @@ describe('Testing QueryResults Module', () => { predicate: 'biolink:record1_pred0', source: 'source1', api_name: 'api1', + source: 'api1', }, // n1 subject: { @@ -3010,6 +3044,8 @@ describe('Testing QueryResults Module', () => { ], gene_symbol: 'HRAS', }, + metaEdgeSource: 'bioplanet', + apiInforesCurie: 'infores:bioplanet' }); const record2 = new Record({ subject: { @@ -3044,6 +3080,8 @@ describe('Testing QueryResults Module', () => { ], gene_symbol: 'HRAS', }, + metaEdgeSource: 'bioplanet', + apiInforesCurie: 'infores:bioplanet' }); const queryResult = new QueryResult({ provenanceUsesServiceProvider: false }); await queryResult.update({ diff --git a/__test__/integration/TRAPIQueryHandler.test.ts b/__test__/integration/TRAPIQueryHandler.test.ts index 7f9d6e21..1bab043f 100644 --- a/__test__/integration/TRAPIQueryHandler.test.ts +++ b/__test__/integration/TRAPIQueryHandler.test.ts @@ -4,6 +4,8 @@ const mockedAxios = axios as jest.Mocked; import TRAPIQueryHandler from '../../src/index'; import path from 'path'; +import { Subquery, SubqueryRelay } from '@biothings-explorer/call-apis'; +import { MessageChannel } from 'worker_threads'; describe('Testing TRAPIQueryHandler Module', () => { const disease_entity_node = { @@ -25,6 +27,27 @@ describe('Testing TRAPIQueryHandler Module', () => { }, }, }; + + beforeAll(async () => { + const subqueryRelay = new SubqueryRelay(); + const { port1: workerSide, port2: parentSide } = new MessageChannel(); + global.workerSide = workerSide; + parentSide.on("message", async (msg: any) => { + const { queries, options } = msg.value + subqueryRelay.subscribe( + await Promise.all(queries.map(async query => await Subquery.unfreeze(query))), + options, + ({ hash, records, logs, apiUnavailable }) => { + parentSide.postMessage({ + threadId: 0, + type: "subQueryResult", + value: { hash, records, logs, apiUnavailable }, + }); + }, + ); + }); + }); + describe('Testing query function', () => { test.skip('test with one query edge', async () => { (mockedAxios.get as jest.Mock).mockResolvedValue({ diff --git a/__test__/integration/biolink.test.ts b/__test__/integration/biolink.test.ts deleted file mode 100644 index be391761..00000000 --- a/__test__/integration/biolink.test.ts +++ /dev/null @@ -1,72 +0,0 @@ -import biolink from '../../src/biolink'; - -describe('Test BioLinkModel class', () => { - test('test reverse with correct predicate', () => { - const res = biolink.reverse('treats'); - expect(res).toBe('treated_by'); - }); - - test('test reverse with correct predicate if it contains underscore', () => { - const res = biolink.reverse('treated_by'); - expect(res).toBe('treats'); - }); - - test('test reverse with predicate having symmetric equal to true', () => { - const res = biolink.reverse('correlated_with'); - expect(res).toBe('correlated_with'); - }); - - test('test predicate with no inverse property and symmetric not equal to true', () => { - const res = biolink.reverse('has_phenotype'); - expect(res).toBe('phenotype_of'); - }); - - test('test predicate not exist in biolink model', () => { - const res = biolink.reverse('haha'); - expect(res).toBeUndefined(); - }); - - test('if input not string, return undefined', () => { - //@ts-expect-error: Explicitly testing for wrong type - const res = biolink.reverse(['dd']); - expect(res).toBeUndefined(); - }); - - describe('Test getDescendants function', () => { - test('if input is in biolink model, return all its desendants and itself', () => { - const res = biolink.getDescendantClasses('MolecularEntity'); - expect(res).toContain('SmallMolecule'); - expect(res).toContain('NucleicAcidEntity'); - expect(res).toContain('MolecularEntity'); - }); - - test("if input is in biolink model but doesn't have descendants, return itself", () => { - const res = biolink.getDescendantClasses('Gene'); - expect(res).toEqual(['Gene']); - }); - - test('if input is not in biolink, return itself', () => { - const res = biolink.getDescendantClasses('Gene1'); - expect(res).toEqual('Gene1'); - }); - }); - - describe('Test getDescendantPredicates function', () => { - test('if input is in biolink model, return all its desendants and itself', () => { - const res = biolink.getDescendantPredicates('related_to'); - expect(res).toContain('subclass_of'); - expect(res).toContain('superclass_of'); - expect(res).toContain('related_to'); - }); - - test("if input is in biolink model but doesn't have descendants, return itself", () => { - const res = biolink.getDescendantPredicates('subclass_of'); - expect(res).toEqual(['subclass_of']); - }); - - test('if input is not in biolink, return itself', () => { - const res = biolink.getDescendantPredicates('Gene1'); - expect(res).toEqual(['Gene1']); - }); - }); -}); diff --git a/__test__/integration/graph/graph.test.ts b/__test__/integration/graph/graph.test.ts deleted file mode 100644 index 0d650621..00000000 --- a/__test__/integration/graph/graph.test.ts +++ /dev/null @@ -1,195 +0,0 @@ -import graph from '../../../src/graph/graph'; -import { Record } from '@biothings-explorer/api-response-transform'; - -describe('Test graph class', () => { - const qNode1 = { - getID() { - return 'qg1'; - }, - }; - const qNode2 = { - getID() { - return 'qg2'; - }, - }; - const record1 = new Record({ - api: 'API1', - metaEdgeSource: 'source1', - apiInforesCurie: 'infores:API1', - predicate: 'predicate1', - object: { - qNodeID: 'qg2', - curie: 'outputPrimaryCurie', - original: 'outputPrimaryCurie', - }, - subject: { - qNodeID: 'qg1', - curie: 'inputPrimaryCurie', - original: 'inputPrimaryCurie', - }, - publications: ['PMID:1', 'PMID:2'], - mappedResponse: { - relation: 'relation1', - }, - }); - - const record2 = new Record({ - api: 'API2', - metaEdgeSource: 'source2', - apiInforesCurie: 'infores:API2', - predicate: 'predicate1', - object: { - qNodeID: 'qg2', - curie: 'outputPrimaryCurie', - original: 'outputPrimaryCurie', - }, - subject: { - qNodeID: 'qg1', - curie: 'inputPrimaryCurie', - original: 'inputPrimaryCurie', - }, - publications: ['PMC:1', 'PMC:2'], - mappedResponse: { - relation: 'relation2', - }, - }); - - const record3 = new Record({ - api: 'API3', - metaEdgeSource: 'source3', - apiInforesCurie: 'infores:API3', - predicate: 'predicate2', - object: { - qNodeID: 'qg2', - curie: 'outputPrimaryCurie', - original: 'outputPrimaryCurie', - }, - subject: { - qNodeID: 'qg1', - curie: 'inputPrimaryCurie', - original: 'inputPrimaryCurie', - }, - publications: ['PMC:3', 'PMC:4'], - mappedResponse: { - relation: 'relation3', - }, - }); - - const record3a = new Record({ - api: 'API3', - metaEdgeSource: 'source3', - apiInforesCurie: 'infores:API3', - predicate: 'predicate2', - object: { - qNodeID: 'qg2', - curie: 'outputPrimaryCurie', - original: 'outputPrimaryCurie', - }, - subject: { - qNodeID: 'qg1', - curie: 'inputPrimaryCurie', - original: 'inputPrimaryCurie', - }, - publications: ['PMC:6', 'PMC:7'], - mappedResponse: { - relation: ['relation3a', 'relation3b'], - }, - }); - - test('A single query result is correctly updated.', () => { - const g = new graph(); - g.update([record1]); - expect(g.nodes).toHaveProperty('outputPrimaryCurie'); - expect(g.nodes).toHaveProperty('inputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].primaryCurie).toEqual('outputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].qNodeID).toEqual('qg2'); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceNodes)).toEqual(['inputPrimaryCurie']); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceQNodeIDs)).toEqual(['qg1']); - expect(g.nodes['inputPrimaryCurie'].primaryCurie).toEqual('inputPrimaryCurie'); - expect(g.nodes['inputPrimaryCurie'].qNodeID).toEqual('qg1'); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetNodes)).toEqual(['outputPrimaryCurie']); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetQNodeIDs)).toEqual(['qg2']); - expect(g.edges).toHaveProperty('2c826c3663b91f65a1cba70f06c7fc65'); - expect(Array.from(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].apis)).toEqual(['API1']); - expect(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].sources).toHaveProperty('source1'); - expect(Array.from(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].publications)).toEqual(['PMID:1', 'PMID:2']); - expect(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].attributes).toHaveProperty('relation', new Set(['relation1'])); - }); - - test('Multiple query results are correctly updated for two edges having same input, predicate and output', () => { - const g = new graph(); - g.update([record1, record2]); - expect(g.nodes).toHaveProperty('outputPrimaryCurie'); - expect(g.nodes).toHaveProperty('inputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].primaryCurie).toEqual('outputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].qNodeID).toEqual('qg2'); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceNodes)).toEqual(['inputPrimaryCurie']); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceQNodeIDs)).toEqual(['qg1']); - expect(g.nodes['inputPrimaryCurie'].primaryCurie).toEqual('inputPrimaryCurie'); - expect(g.nodes['inputPrimaryCurie'].qNodeID).toEqual('qg1'); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetNodes)).toEqual(['outputPrimaryCurie']); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetQNodeIDs)).toEqual(['qg2']); - - expect(g.edges).toHaveProperty('2c826c3663b91f65a1cba70f06c7fc65'); - expect(Array.from(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].apis)).toEqual(['API1']); - expect(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].sources).toHaveProperty('source1'); - expect(Array.from(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].publications)).toEqual(['PMID:1', 'PMID:2']); - expect(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].attributes).toHaveProperty('relation', new Set(['relation1'])); - - expect(g.edges).toHaveProperty('827c366e2e3088b3f4a90dd88a524f15'); - expect(Array.from(g.edges['827c366e2e3088b3f4a90dd88a524f15'].apis)).toEqual(['API2']); - expect(g.edges['827c366e2e3088b3f4a90dd88a524f15'].sources).toHaveProperty('source2'); - expect(Array.from(g.edges['827c366e2e3088b3f4a90dd88a524f15'].publications)).toEqual(['PMC:1', 'PMC:2']); - expect(g.edges['827c366e2e3088b3f4a90dd88a524f15'].attributes).toHaveProperty('relation', new Set(['relation2'])); - }); - - test('Multiple query results for different edges are correctly updated', () => { - const g = new graph(); - g.update([record1, record2, record3]); - expect(g.nodes).toHaveProperty('outputPrimaryCurie'); - expect(g.nodes).toHaveProperty('inputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].primaryCurie).toEqual('outputPrimaryCurie'); - expect(g.nodes['outputPrimaryCurie'].qNodeID).toEqual('qg2'); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceNodes)).toEqual(['inputPrimaryCurie']); - expect(Array.from(g.nodes['outputPrimaryCurie'].sourceQNodeIDs)).toEqual(['qg1']); - expect(g.nodes['inputPrimaryCurie'].primaryCurie).toEqual('inputPrimaryCurie'); - expect(g.nodes['inputPrimaryCurie'].qNodeID).toEqual('qg1'); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetNodes)).toEqual(['outputPrimaryCurie']); - expect(Array.from(g.nodes['inputPrimaryCurie'].targetQNodeIDs)).toEqual(['qg2']); - - expect(g.edges).toHaveProperty('2c826c3663b91f65a1cba70f06c7fc65'); - expect(Array.from(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].apis)).toEqual(['API1']); - expect(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].sources).toHaveProperty('source1'); - expect(Array.from(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].publications)).toEqual(['PMID:1', 'PMID:2']); - expect(g.edges['2c826c3663b91f65a1cba70f06c7fc65'].attributes).toHaveProperty('relation', new Set(['relation1'])); - - expect(g.edges).toHaveProperty('827c366e2e3088b3f4a90dd88a524f15'); - expect(Array.from(g.edges['827c366e2e3088b3f4a90dd88a524f15'].apis)).toEqual(['API2']); - expect(g.edges['827c366e2e3088b3f4a90dd88a524f15'].sources).toHaveProperty('source2'); - expect(Array.from(g.edges['827c366e2e3088b3f4a90dd88a524f15'].publications)).toEqual(['PMC:1', 'PMC:2']); - expect(g.edges['827c366e2e3088b3f4a90dd88a524f15'].attributes).toHaveProperty('relation', new Set(['relation2'])); - - expect(g.edges).toHaveProperty('3138ca0afca791770ed38c243dea2116'); - expect(Array.from(g.edges['3138ca0afca791770ed38c243dea2116'].apis)).toEqual(['API3']); - expect(g.edges['3138ca0afca791770ed38c243dea2116'].sources).toHaveProperty('source3'); - expect(Array.from(g.edges['3138ca0afca791770ed38c243dea2116'].publications)).toEqual(['PMC:3', 'PMC:4']); - expect(g.edges['3138ca0afca791770ed38c243dea2116'].attributes).toHaveProperty('relation', new Set(['relation3'])); - }); - - test('Multiple attributes with the same name are merged', () => { - const g = new graph(); - g.update([record3, record3a]); - - expect(g.edges).toHaveProperty('3138ca0afca791770ed38c243dea2116'); - expect(Array.from(g.edges['3138ca0afca791770ed38c243dea2116'].publications)).toEqual([ - 'PMC:3', - 'PMC:4', - 'PMC:6', - 'PMC:7', - ]); - expect(g.edges['3138ca0afca791770ed38c243dea2116'].attributes).toHaveProperty( - 'relation', - new Set(['relation3', 'relation3a', 'relation3b']), - ); - }); -}); diff --git a/__test__/unittest/QueryEdge.test.ts b/__test__/unittest/QueryEdge.test.ts deleted file mode 100644 index b68c8648..00000000 --- a/__test__/unittest/QueryEdge.test.ts +++ /dev/null @@ -1,87 +0,0 @@ -import QEdge from '../../src/query_edge'; -import QNode from '../../src/query_node'; - -function basicQEdge({ - predicates, - subjectIds, - objectIds, - reverse = false, -}: { predicates?: string[]; subjectIds?: string[]; objectIds?: string[]; reverse?: boolean } = {}): QEdge { - return new QEdge({ - id: 'e01', - predicates, - subject: new QNode({ - id: 'n01', - ids: subjectIds, - }), - object: new QNode({ - id: 'n02', - ids: objectIds, - }), - }); -} - -describe('Test QEdge class', () => { - describe('Test getPredicate function', () => { - test('Non reversed edge should return predicates itself', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats'], - }); - const res = edge.getPredicate(); - expect(res).toContain('treats'); - }); - - test('Undefined predicate should return itself', () => { - const edge = basicQEdge(); - const res = edge.getPredicate(); - expect(res).toBeUndefined; - }); - - test('An array of non-undefined predicates should return itself', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats', 'biolink:targets'], - }); - const res = edge.getPredicate(); - expect(res).toContain('treats'); - expect(res).toContain('targets'); - }); - - test('An array of non-undefined predicates with reverse edge should exclude return value if undefined', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats', 'biolink:targets'], - objectIds: ['yes'], - }); - const res = edge.getPredicate(); - expect(res).toContain('treated_by'); - }); - - test('An array of non-undefined predicates with reverse edge should return reversed predicates if not undefined', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats', 'biolink:targets'], - objectIds: ['yes'], - }); - const res = edge.getPredicate(); - expect(res).toContain('treated_by'); - }); - }); - - describe('Test getOutputNode function', () => { - test('reversed edge should return the subject', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats', 'biolink:targets'], - objectIds: ['yes'], - }); - const res = edge.getOutputNode(); - expect(res.id).toEqual('n01'); - }); - - test('non reversed edge should return the object', () => { - const edge = basicQEdge({ - predicates: ['biolink:treats', 'biolink:targets'], - subjectIds: ['yes'], - }); - const res = edge.getOutputNode(); - expect(res.id).toEqual('n02'); - }); - }); -}); diff --git a/__test__/unittest/TRAPIQueryHandler.test.ts b/__test__/unittest/TRAPIQueryHandler.test.ts index 7ea16825..39412542 100644 --- a/__test__/unittest/TRAPIQueryHandler.test.ts +++ b/__test__/unittest/TRAPIQueryHandler.test.ts @@ -1,11 +1,13 @@ -import { Record } from '@biothings-explorer/api-response-transform'; +import { Record, InvalidQueryGraphError } from '@biothings-explorer/types'; import path from 'path'; import fs from 'fs'; import _ from 'lodash'; const AxiosActual = jest.requireActual('axios'); import InferredQueryHandler from '../../src/inferred_mode/inferred_mode'; -import TRAPIQueryHandler, { InvalidQueryGraphError } from '../../src/index'; +import TRAPIQueryHandler from '../../src/index'; import axios from 'axios'; +import { MessageChannel } from 'worker_threads'; +import { Subquery, SubqueryRelay } from '@biothings-explorer/call-apis'; jest.mock('../../src/inferred_mode/inferred_mode'); jest.mock('axios'); @@ -17,6 +19,26 @@ const records = Record.unfreezeRecords( describe('test TRAPIQueryHandler methods', () => { const OLD_ENV = process.env; + beforeAll(async () => { + const subqueryRelay = new SubqueryRelay(); + const { port1: workerSide, port2: parentSide } = new MessageChannel(); + global.workerSide = workerSide; + parentSide.on("message", async (msg: any) => { + const { queries, options } = msg.value + subqueryRelay.subscribe( + await Promise.all(queries.map(async query => await Subquery.unfreeze(query))), + options, + ({ hash, records, logs, apiUnavailable }) => { + parentSide.postMessage({ + threadId: 0, + type: "subQueryResult", + value: { hash, records, logs, apiUnavailable }, + }); + }, + ); + }); + }); + beforeEach(() => { jest.resetAllMocks(); jest.resetModules(); diff --git a/__test__/unittest/cacheHandler.test.ts b/__test__/unittest/cacheHandler.test.ts deleted file mode 100644 index 621401ae..00000000 --- a/__test__/unittest/cacheHandler.test.ts +++ /dev/null @@ -1,302 +0,0 @@ -import fs from 'fs'; -import path from 'path'; -import { Readable } from 'stream'; -import { Record } from '@biothings-explorer/api-response-transform'; -import RedisMock from 'ioredis-mock'; -import RealCacheHandler from '../../src/cache_handler'; -import MetaKG from '@biothings-explorer/smartapi-kg'; -import QEdge from '../../src/query_edge'; - -const qEdges = JSON.parse(fs.readFileSync(path.resolve(__dirname, '../data/qEdges.json'), { encoding: 'utf8' })); - -const records = Record.unfreezeRecords( - JSON.parse(fs.readFileSync(path.resolve(__dirname, '../data/queryRecords.json'), { encoding: 'utf8' })), -); - -describe('test cache handler', () => { - const OLD_ENV = process.env; - beforeEach(() => { - jest.resetModules(); // Most important - it clears the cache - jest.clearAllMocks(); - jest.mock('ioredis', () => RedisMock); - process.env = { ...OLD_ENV }; // Make a copy - // new RedisMock().flushall(); - }); - - afterAll((done) => { - process.env = OLD_ENV; // Restore old environment - done(); - }); - - describe("ensure caching isn't used when it shouldn't be", () => { - test("don't use cache when explicitely disabled", async () => { - process.env.REDIS_HOST = 'mocked'; - process.env.REDIS_PORT = 'mocked'; - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(false); - const categorizeEdges = jest.spyOn(CacheHandler.prototype, 'categorizeEdges'); - const _hashEdgeByMetaKG = jest.spyOn(CacheHandler.prototype, '_hashEdgeByMetaKG'); - const _groupQueryRecordsByQEdgeHash = jest.spyOn(CacheHandler.prototype, '_groupQueryRecordsByQEdgeHash'); - - expect(cacheHandler.cacheEnabled).toBeFalsy(); - - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - expect(categorizeEdges).toHaveBeenCalledTimes(1); - expect(_hashEdgeByMetaKG).toHaveBeenCalledTimes(0); - expect(cachedRecords).toHaveLength(0); - expect(nonCachedQEdges).toHaveLength(1); - expect(nonCachedQEdges).toEqual(qEdges); - - await cacheHandler.cacheEdges(records); - expect(_groupQueryRecordsByQEdgeHash).toHaveBeenCalledTimes(0); - }); - - test("don't use cache when explicitely disabled by ENV", async () => { - process.env.REDIS_HOST = 'mocked'; - process.env.REDIS_PORT = 'mocked'; - process.env.RESULT_CACHING = 'false'; - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const categorizeEdges = jest.spyOn(CacheHandler.prototype, 'categorizeEdges'); - const _hashEdgeByMetaKG = jest.spyOn(CacheHandler.prototype, '_hashEdgeByMetaKG'); - const _groupQueryRecordsByQEdgeHash = jest.spyOn(CacheHandler.prototype, '_groupQueryRecordsByQEdgeHash'); - - expect(cacheHandler.cacheEnabled).toBeFalsy(); - - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - expect(categorizeEdges).toHaveBeenCalledTimes(1); - expect(_hashEdgeByMetaKG).toHaveBeenCalledTimes(0); - expect(cachedRecords).toHaveLength(0); - expect(nonCachedQEdges).toHaveLength(1); - expect(nonCachedQEdges).toEqual(qEdges); - - await cacheHandler.cacheEdges(records); - expect(_groupQueryRecordsByQEdgeHash).toHaveBeenCalledTimes(0); - }); - - test("don't use cache when redis disabled", async () => { - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const categorizeEdges = jest.spyOn(CacheHandler.prototype, 'categorizeEdges'); - const _hashEdgeByMetaKG = jest.spyOn(CacheHandler.prototype, '_hashEdgeByMetaKG'); - const _groupQueryRecordsByQEdgeHash = jest.spyOn(CacheHandler.prototype, '_groupQueryRecordsByQEdgeHash'); - - expect(cacheHandler.cacheEnabled).toBeFalsy(); - - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - expect(categorizeEdges).toHaveBeenCalledTimes(1); - expect(_hashEdgeByMetaKG).toHaveBeenCalledTimes(0); - expect(cachedRecords).toHaveLength(0); - expect(nonCachedQEdges).toHaveLength(1); - expect(nonCachedQEdges).toEqual(qEdges); - - await cacheHandler.cacheEdges(records); - expect(_groupQueryRecordsByQEdgeHash).toHaveBeenCalledTimes(0); - }); - - test("don't use cache when redis specially disabled", async () => { - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - process.env.REDIS_HOST = 'mocked'; - process.env.REDIS_PORT = 'mocked'; - process.env.INTERNAL_DISABLE_REDIS = 'true'; - const cacheHandler = new CacheHandler(true); - const categorizeEdges = jest.spyOn(CacheHandler.prototype, 'categorizeEdges'); - const _hashEdgeByMetaKG = jest.spyOn(CacheHandler.prototype, '_hashEdgeByMetaKG'); - const _groupQueryRecordsByQEdgeHash = jest.spyOn(CacheHandler.prototype, '_groupQueryRecordsByQEdgeHash'); - - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - expect(categorizeEdges).toHaveBeenCalledTimes(1); - expect(_hashEdgeByMetaKG).toHaveBeenCalledTimes(0); - expect(cachedRecords).toHaveLength(0); - expect(nonCachedQEdges).toHaveLength(1); - expect(nonCachedQEdges).toEqual(qEdges); - - await cacheHandler.cacheEdges(records); - expect(_groupQueryRecordsByQEdgeHash).toHaveBeenCalledTimes(0); - }); - }); - - describe('test encoding/decoding', () => { - test('test encoder', async () => { - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const encoder = cacheHandler.createEncodeStream(); - - let encodedString = ''; - - await new Promise((resolve) => { - Readable.from(Record.freezeRecords(records)) - .pipe(encoder) - .on('data', async (chunk) => { - encodedString += chunk; - }) - .on('end', () => resolve()); - }); - - expect(encodedString).toBeTruthy(); - expect(encodedString.includes(',')).toBeTruthy(); - expect(encodedString.length).toBeLessThan(JSON.stringify(records).length); - }); - - test('test decoder', async () => { - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const encoder = cacheHandler.createEncodeStream(); - const decoder = cacheHandler.createDecodeStream(); - - let encodedString = ''; - - await new Promise((resolve) => { - Readable.from(Record.freezeRecords(records)) - .pipe(encoder) - .on('data', async (chunk) => { - encodedString += chunk; - }) - .on('end', () => resolve()); - }); - - const decodedObjs: unknown[] = []; - - await new Promise((resolve) => { - Readable.from(encodedString) - .pipe(decoder) - .on('data', async (obj) => { - decodedObjs.push(obj); - }) - .on('end', () => resolve()); - }); - - expect(decodedObjs).toStrictEqual(JSON.parse(JSON.stringify(Record.freezeRecords(records)))); - }); - }); - - describe('Test _hashEdgeByMetaKG', () => { - test('without metaKG', () => { - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const hash = cacheHandler._hashEdgeByMetaKG('test'); - - expect(hash).toEqual('test'); - }); - - test('with metaKG', () => { - const fakeMetaKG1 = { - ops: [ - { - association: { - smartapi: { - id: 'someFakeID1', - }, - }, - }, - { - association: { - smartapi: { - id: 'someFakeID2', - }, - }, - }, - ], - }; - const fakeMetaKG2 = { - ops: [ - { - association: { - smartapi: { - id: 'someFakeID1', - }, - }, - }, - { - association: { - smartapi: { - id: 'someFakeID3', - }, - }, - }, - ], - }; - const fakeMetaKG3 = { - ops: [ - { - association: { - smartapi: { - id: 'someFakeID1', - }, - }, - }, - ], - }; - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler1 = new CacheHandler(true, fakeMetaKG1 as MetaKG); - const hash1 = cacheHandler1._hashEdgeByMetaKG('test'); - - const cacheHandler2 = new CacheHandler(true, fakeMetaKG2 as MetaKG); - const hash2 = cacheHandler2._hashEdgeByMetaKG('test'); - - const cacheHandler3 = new CacheHandler(true, fakeMetaKG3 as MetaKG); - const hash3 = cacheHandler3._hashEdgeByMetaKG('test'); - - expect(hash1 === 'test').toBeFalsy(); - expect(hash1 === hash2).toBeFalsy(); - expect(hash1 === hash3).toBeFalsy(); - expect(hash2 === hash3).toBeFalsy(); - }); - }); - - test('_groupQueryRecordsByQEdgeHash', () => { - process.env.REDIS_HOST = 'mocked'; - process.env.REDIS_PORT = 'mocked'; - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - const groups = cacheHandler._groupQueryRecordsByQEdgeHash(records); - - const numHashes = records.reduce((set, record) => { - set.add(record.qEdge.getHashedEdgeRepresentation()); - return set; - }, new Set()).size; - - expect(Object.keys(groups)).toHaveLength(numHashes); - expect( - Object.values(groups).reduce((arr, group) => { - arr = [...arr, ...group]; - return arr; - }, [] as unknown[]), - ).toHaveLength(records.length + numHashes); - }); - - test('caching and cache lookup', async () => { - process.env.REDIS_HOST = 'mocked'; - process.env.REDIS_PORT = 'mocked'; - const CacheHandler = require('../../src/cache_handler').default as typeof RealCacheHandler; - const cacheHandler = new CacheHandler(true); - - await cacheHandler.cacheEdges(records); - const qEdges: QEdge[] = Object.values( - records.reduce( - (obj, record) => { - if (!(record.qEdge.getHashedEdgeRepresentation() in obj)) { - obj[record.qEdge.getHashedEdgeRepresentation()] = record.qEdge as QEdge; - } - return obj; - }, - {} as { [qEdgeHash: string]: QEdge }, - ), - ); - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - expect(nonCachedQEdges).toHaveLength(0); - expect(cachedRecords).toHaveLength(records.length); - // TODO get each record sorted by hash to compare individually - const originalRecordHashes = records.reduce((set, record) => { - set.add(record.recordHash); - return set; - }, new Set()); - const cachedRecordHashes = cachedRecords.reduce((set, record) => { - set.add(record.recordHash); - return set; - }, new Set()); - const setsMatch = [...originalRecordHashes].every((hash) => cachedRecordHashes.has(hash)); - expect(originalRecordHashes.size).toEqual(cachedRecordHashes.size); - expect(setsMatch).toBeTruthy(); - }); -}); diff --git a/package.json b/package.json index 6426eca2..3dd301c8 100644 --- a/package.json +++ b/package.json @@ -53,14 +53,14 @@ "typescript": "^5.2.2" }, "dependencies": { - "@biothings-explorer/api-response-transform": "workspace:../api-response-transform", - "@biothings-explorer/call-apis": "workspace:../call-apis", - "@biothings-explorer/node-expansion": "workspace:../node-expansion", - "@biothings-explorer/smartapi-kg": "workspace:../smartapi-kg", - "@biothings-explorer/utils": "workspace:../utils", - "@biothings-explorer/types": "workspace:../types", - "biolink-model": "workspace:../biolink-model", - "biomedical_id_resolver": "workspace:../biomedical_id_resolver", + "@biothings-explorer/api-response-transform": "workspace:*", + "@biothings-explorer/call-apis": "workspace:*", + "@biothings-explorer/node-expansion": "workspace:*", + "@biothings-explorer/smartapi-kg": "workspace:*", + "@biothings-explorer/utils": "workspace:*", + "@biothings-explorer/types": "workspace:*", + "biolink-model": "workspace:*", + "biomedical_id_resolver": "workspace:*", "@sentry/node": "^7.74.1", "async": "^3.2.4", "chi-square-p-value": "^1.0.5", diff --git a/src/batch_edge_query.ts b/src/batch_edge_query.ts index d5504b21..e8d16213 100644 --- a/src/batch_edge_query.ts +++ b/src/batch_edge_query.ts @@ -1,17 +1,15 @@ -import call_api from '@biothings-explorer/call-apis'; -import { redisClient } from '@biothings-explorer/utils'; -import QEdge2APIEdgeHandler, { APIEdge } from './qedge2apiedge'; +import { LogEntry, SerializableLog, redisClient } from '@biothings-explorer/utils'; +import { APIEdge, Record, RecordPackage } from '@biothings-explorer/types'; +import QEdge2APIEdgeHandler from './qedge2apiedge'; import NodesUpdateHandler from './update_nodes'; import Debug from 'debug'; const debug = Debug('bte:biothings-explorer-trapi:batch_edge_query'); -import CacheHandler from './cache_handler'; import { threadId } from 'worker_threads'; import MetaKG from '@biothings-explorer/smartapi-kg'; import { StampedLog } from '@biothings-explorer/utils'; -import { QueryHandlerOptions } from '@biothings-explorer/types'; -import QEdge from './query_edge'; +import { QueryHandlerOptions, ThreadMessage, QEdge } from '@biothings-explorer/types'; import { UnavailableAPITracker } from './types'; -import { Record } from '@biothings-explorer/api-response-transform'; +import { constructQueries } from '@biothings-explorer/call-apis'; export interface BatchEdgeQueryOptions extends QueryHandlerOptions { recordHashEdgeAttributes: string[]; @@ -23,9 +21,8 @@ export default class BatchEdgeQueryHandler { logs: StampedLog[]; caching: boolean; options: QueryHandlerOptions; - resolveOutputIDs: boolean; qEdges: QEdge | QEdge[]; - constructor(metaKG: MetaKG, resolveOutputIDs = true, options?: BatchEdgeQueryOptions) { + constructor(metaKG: MetaKG, options?: BatchEdgeQueryOptions) { this.metaKG = metaKG; this.logs = []; this.caching = options && options.caching; @@ -33,7 +30,6 @@ export default class BatchEdgeQueryHandler { if (options && options.recordHashEdgeAttributes) { this.options.EDGE_ATTRIBUTES_USED_IN_RECORD_HASH = options.recordHashEdgeAttributes; } - this.resolveOutputIDs = resolveOutputIDs; } /** @@ -61,11 +57,79 @@ export default class BatchEdgeQueryHandler { /** * @private */ - async _queryAPIEdges(APIEdges: APIEdge[], unavailableAPIs: UnavailableAPITracker = {}): Promise { - const executor = new call_api(APIEdges, this.options, redisClient); - const records: Record[] = await executor.query(this.resolveOutputIDs, unavailableAPIs); - this.logs = [...this.logs, ...executor.logs]; - return records; + _queryAPIEdges(APIEdges: APIEdge[], unavailableAPIs: UnavailableAPITracker = {}, abortSignal?: AbortSignal): Promise { + // Skip queueing queries to unavailable APIs + const queries = constructQueries(APIEdges, this.options).filter((query) => { + if (unavailableAPIs[query.APIEdge.query_operation.server]?.skip === true) { + unavailableAPIs[query.APIEdge.query_operation.server].skippedQueries += 1; + return false; + } + return true; + }); + + const queriesByHash = Object.fromEntries(queries.map((query) => [query.hash, query])); + const queryCount = Object.keys(queriesByHash).length; // some duplicates may have been removed + + const qEdge = APIEdges[0].reasoner_edge; + const message = `${queryCount} planned queries for edge ${qEdge.id}`; + debug(message); + this.logs.push(new LogEntry('INFO', null, message).getLog()); + let finishedCount = 0; + let processedHashes = new Set(); + const completedLogs = this.logs; + const completedRecords: Record[] = []; + return new Promise((resolve) => { + function listener(msg: ThreadMessage) { + if (msg.type !== 'subQueryResult') return; + const { hash, records, logs, apiUnavailable } = msg.value as { + hash: string; + records: RecordPackage; + logs: SerializableLog[]; + apiUnavailable: boolean; + }; + + // check if this query is applicable + if (!queriesByHash[hash] || processedHashes.has(hash)) { + return; + } + processedHashes.add(hash); + + completedLogs.push(...LogEntry.deserialize(logs)); + completedRecords.push(...Record.unpackRecords(records, qEdge)); + + // Update any APIs that were unavailable for this segment + const server = queriesByHash[hash].APIEdge.query_operation.server; + if (apiUnavailable) { + if (!unavailableAPIs[server]) { + unavailableAPIs[server] = { skip: true, skippedQueries: 0 }; + } + unavailableAPIs[server].skippedQueries += 1; + } + + finishedCount += 1; + if (finishedCount >= queryCount) { + debug(`Total number of records returned for qEdge ${qEdge.id} is ${completedRecords.length}`); + resolve(completedRecords); + global.workerSide.off('message', listener); + abortSignal?.removeEventListener('abort', abort); // Clean up + } + } + function abort() { + global.workerSide.off('message', listener); + abortSignal?.removeEventListener('abort', abort); // Clean up + resolve([]); + } + global.workerSide.on('message', listener); + abortSignal?.addEventListener('abort', abort); + global.workerSide.postMessage({ + threadId, + type: 'subqueryRequest', + value: { + queries: Object.values(queriesByHash).map((query) => query.freeze()), // queriesByHash prevents duplicates + options: this.options, + }, + } satisfies ThreadMessage); + }); } /** @@ -123,50 +187,35 @@ export default class BatchEdgeQueryHandler { }); } - async query(qEdges: QEdge | QEdge[], unavailableAPIs: UnavailableAPITracker = {}): Promise { + async query(qEdges: QEdge | QEdge[], unavailableAPIs: UnavailableAPITracker = {}, abortSignal?: AbortSignal): Promise { debug('Node Update Start'); // it's now a single edge but convert to arr to simplify refactoring qEdges = Array.isArray(qEdges) ? qEdges : [qEdges]; const nodeUpdate = new NodesUpdateHandler(qEdges); // difference is there is no previous edge info anymore - await nodeUpdate.setEquivalentIDs(qEdges); + await nodeUpdate.setEquivalentIDs(qEdges, abortSignal); await this._rmEquivalentDuplicates(qEdges); debug('Node Update Success'); - const cacheHandler = new CacheHandler(this.caching, this.metaKG, this.options); - const { cachedRecords, nonCachedQEdges } = await cacheHandler.categorizeEdges(qEdges); - this.logs = [...this.logs, ...cacheHandler.logs]; let queryRecords: Record[]; - if (nonCachedQEdges.length === 0) { - queryRecords = []; - if (global.parentPort) { - global.parentPort.postMessage({ threadId, cacheDone: true }); - } - } else { - debug('Start to convert qEdges into APIEdges....'); - const edgeConverter = new QEdge2APIEdgeHandler(nonCachedQEdges, this.metaKG); - const APIEdges = await edgeConverter.convert(nonCachedQEdges); - debug(`qEdges are successfully converted into ${APIEdges.length} APIEdges....`); - this.logs = [...this.logs, ...edgeConverter.logs]; - if (APIEdges.length === 0 && cachedRecords.length === 0) { - return []; - } - const expanded_APIEdges = this._expandAPIEdges(APIEdges); - debug('Start to query APIEdges....'); - queryRecords = await this._queryAPIEdges(expanded_APIEdges, unavailableAPIs); - if (queryRecords === undefined) return; - debug('APIEdges are successfully queried....'); - queryRecords = await this._postQueryFilter(queryRecords); - debug(`Total number of records is (${queryRecords.length})`); - const cacheTask = cacheHandler.cacheEdges(queryRecords); - if (!(process.env.USE_THREADING === 'false')) { - global.cachingTasks?.push(cacheTask); - } else { - await cacheTask; - } + debug('Start to convert qEdges into APIEdges....'); + const edgeConverter = new QEdge2APIEdgeHandler(qEdges, this.metaKG); + const APIEdges = await edgeConverter.convert(qEdges); + debug(`qEdges are successfully converted into ${APIEdges.length} APIEdges....`); + this.logs = [...this.logs, ...edgeConverter.logs]; + if (APIEdges.length === 0) { + return []; } - queryRecords = [...queryRecords, ...cachedRecords]; + + const expanded_APIEdges = this._expandAPIEdges(APIEdges); + debug('Start to query APIEdges....'); + queryRecords = await this._queryAPIEdges(expanded_APIEdges, unavailableAPIs, abortSignal); + if (queryRecords === undefined) return; + debug('APIEdges are successfully queried....'); + queryRecords = await this._postQueryFilter(queryRecords); + debug(`Total number of records is (${queryRecords.length})`); + debug('Start to update nodes...'); nodeUpdate.update(queryRecords); debug('Update nodes completed!'); diff --git a/src/biolink.ts b/src/biolink.ts deleted file mode 100644 index e5bb659d..00000000 --- a/src/biolink.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { BioLink } from 'biolink-model'; -import Debug from 'debug'; -const debug = Debug('bte:biothings-explorer-trapi:EdgeReverse'); - -class BioLinkModel { - biolink: BioLink; - constructor() { - this.biolink = new BioLink(); - this.biolink.loadSync(); - } - - reverse(predicate: string) { - if (typeof predicate === 'string') { - if (predicate in this.biolink.slotTree.objects) { - if (this.biolink.slotTree.objects[predicate].symmetric === true) { - return predicate; - } - return this.biolink.slotTree.objects[predicate].inverse; - } - } - - return undefined; - } - - getAncestorClasses(className: string): string | string[] { - if (className in this.biolink.classTree.objects) { - const ancestors = this.biolink.classTree.getAncestors(className).map((entity) => entity.name); - return [...ancestors, ...[className]]; - } - return className; - } - - getAncestorPredicates(predicate: string): string | string[] { - if (predicate in this.biolink.slotTree.objects) { - const ancestors = this.biolink.slotTree.getAncestors(predicate).map((entity) => entity.name); - return [...ancestors, ...[predicate]]; - } - return predicate; - } - - getDescendantClasses(className: string): string | string[] { - if (className in this.biolink.classTree.objects) { - const descendants = this.biolink.classTree.getDescendants(className).map((entity) => entity.name); - return [...descendants, ...[className]]; - } - return className; - } - - getDescendantPredicates(predicate: string): string[] { - if (predicate in this.biolink.slotTree.objects) { - const descendants = this.biolink.slotTree.getDescendants(predicate).map((entity) => entity.name); - return [...descendants, ...[predicate]]; - } - return [predicate]; - } - - getDescendantQualifiers(qualifier: string): string[] { - try { - const descendants = this.biolink.enumTree.getDescendants(qualifier).map((entity) => entity.name); - return [...descendants, qualifier]; - } catch (e) { - console.log('qual error', e); - return [qualifier]; - } - } -} - -// Freeze an instance to avoid multiple reloads -const biolink = new BioLinkModel(); -Object.freeze(biolink); - -global.BIOLINK_VERSION = biolink.biolink.biolinkJSON.version; - -export default biolink; diff --git a/src/cache_handler.ts b/src/cache_handler.ts deleted file mode 100644 index a52c6d1f..00000000 --- a/src/cache_handler.ts +++ /dev/null @@ -1,291 +0,0 @@ -import { redisClient } from '@biothings-explorer/utils'; -import Debug from 'debug'; -const debug = Debug('bte:biothings-explorer-trapi:cache_handler'); -import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import async from 'async'; -import helper from './helper'; -import lz4 from 'lz4'; -import chunker from 'stream-chunker'; -import { Readable, Transform } from 'stream'; -import { Record, RecordPackage } from '@biothings-explorer/api-response-transform'; -import { threadId } from 'worker_threads'; -import MetaKG from '@biothings-explorer/smartapi-kg'; -import QEdge from './query_edge'; -import { QueryHandlerOptions } from '@biothings-explorer/types'; - -export interface RecordPacksByQedgeMetaKGHash { - [QEdgeHash: string]: RecordPackage; -} - -class DelimitedChunksDecoder extends Transform { - private _buffer: string; - constructor() { - super({ - readableObjectMode: true, - readableHighWaterMark: 32, // limited output reduces RAM usage slightly - writableHighWaterMark: 100000, - }); - this._buffer = ''; - } - - _transform(chunk: string, encoding: string, callback: () => void): void { - this._buffer += chunk; - if (this._buffer.includes(',')) { - const parts = this._buffer.split(','); - this._buffer = parts.pop(); - parts.forEach((part) => { - const parsedPart = JSON.parse(lz4.decode(Buffer.from(part, 'base64url')).toString()); - if (Array.isArray(parsedPart)) { - parsedPart.forEach((obj) => this.push(obj)); - } else { - // backwards compatibility with previous implementation - this.push(parsedPart); - } - }); - } - callback(); // callback *no matter what* - } - - _flush(callback: (error?: Error | null | undefined, data?: unknown) => void): void { - try { - if (this._buffer.length) { - const final = JSON.parse(lz4.decode(Buffer.from(this._buffer, 'base64url')).toString()); - callback(null, final); - } - callback(); - } catch (error) { - callback(error); - } - } -} - -class DelimitedChunksEncoder extends Transform { - private _buffer: unknown[]; - constructor() { - super({ - writableObjectMode: true, - writableHighWaterMark: 128, - }); - this._buffer = []; - } - - _transform(obj: unknown, encoding: unknown, callback: () => void) { - this._buffer.push(obj); // stringify/compress 64 objects at a time limits compress calls - if (this._buffer.length === 64) { - const compressedPart = lz4.encode(JSON.stringify(this._buffer)).toString('base64url') + ','; - this.push(compressedPart); - this._buffer = []; - } - callback(); - } - - _flush(callback: (error?: Error | null | undefined, data?: unknown) => void) { - try { - if (this._buffer.length) { - callback(null, lz4.encode(JSON.stringify(this._buffer)).toString('base64url') + ','); - return; - } - callback(); - } catch (error) { - callback(error); - } - } -} - -export default class CacheHandler { - metaKG: MetaKG; - logs: StampedLog[]; - cacheEnabled: boolean; - recordConfig: QueryHandlerOptions; - constructor(caching: boolean, metaKG = undefined, recordConfig = {}, logs = []) { - this.metaKG = metaKG; - this.logs = logs; - this.cacheEnabled = - caching === false - ? false - : process.env.RESULT_CACHING !== 'false' - ? !(process.env.REDIS_HOST === undefined) && !(process.env.REDIS_PORT === undefined) - : false; - this.recordConfig = recordConfig; - this.logs.push( - new LogEntry('DEBUG', null, `REDIS cache is ${this.cacheEnabled === true ? '' : 'not'} enabled.`).getLog(), - ); - } - - async categorizeEdges(qEdges: QEdge[]): Promise<{ cachedRecords: Record[]; nonCachedQEdges: QEdge[] }> { - if (this.cacheEnabled === false || process.env.INTERNAL_DISABLE_REDIS === 'true') { - return { - cachedRecords: [], - nonCachedQEdges: qEdges, - }; - } - const nonCachedQEdges: QEdge[] = []; - let cachedRecords: Record[] = []; - debug('Begin edge cache lookup...'); - await async.eachSeries(qEdges, async (qEdge) => { - const qEdgeMetaKGHash = this._hashEdgeByMetaKG(qEdge.getHashedEdgeRepresentation()); - const unpackedRecords: Record[] = await new Promise((resolve) => { - const redisID = 'bte:edgeCache:' + qEdgeMetaKGHash; - redisClient.client.usingLock([`redisLock:${redisID}`], 600000, async () => { - try { - const compressedRecordPack = await redisClient.client.hgetallTimeout(redisID); - - if (compressedRecordPack && Object.keys(compressedRecordPack).length) { - const recordPack = []; - - const sortedPackParts = Object.entries(compressedRecordPack) - .sort(([key1], [key2]) => parseInt(key1) - parseInt(key2)) - .map(([, val]) => { - return val; - }); - - const recordStream = Readable.from(sortedPackParts); - recordStream - .pipe(this.createDecodeStream()) - .on('data', (obj) => recordPack.push(obj)) - .on('end', () => resolve(Record.unpackRecords(recordPack as RecordPackage, qEdge, this.recordConfig))); - } else { - resolve(null); - } - } catch (error) { - resolve(null); - debug(`Cache lookup/retrieval failed due to ${error}. Proceeding without cache.`); - } - }); - }); - - if (unpackedRecords) { - this.logs.push( - new LogEntry('DEBUG', null, `BTE finds cached records for ${qEdge.getID()}`, { - type: 'cacheHit', - qEdgeID: qEdge.getID(), - api_names: unpackedRecords.map((record) => record.association?.api_name), - }).getLog(), - ); - cachedRecords = [...cachedRecords, ...unpackedRecords]; - } else { - nonCachedQEdges.push(qEdge); - } - debug(`Found (${cachedRecords.length}) cached records.`); - }); - - return { cachedRecords, nonCachedQEdges }; - } - - _hashEdgeByMetaKG(qEdgeHash: string): string { - if (!this.metaKG) { - return qEdgeHash; - } - const len = String(this.metaKG.ops.length); - const allIDs = Array.from(new Set(this.metaKG.ops.map((op) => op.association.smartapi.id))).join(''); - return helper._generateHash(qEdgeHash + len + allIDs); - } - - _groupQueryRecordsByQEdgeHash(queryRecords: Record[]): RecordPacksByQedgeMetaKGHash { - const groupedRecords: { [qEdgeMetaKGHash: string]: Record[] } = {}; - queryRecords.map((record) => { - try { - const qEdgeMetaKGHash = this._hashEdgeByMetaKG(record.qEdge.getHashedEdgeRepresentation()); - if (!(qEdgeMetaKGHash in groupedRecords)) { - groupedRecords[qEdgeMetaKGHash] = []; - } - groupedRecords[qEdgeMetaKGHash].push(record); - } catch (e) { - debug('skipping malformed record'); - } - }); - return Object.fromEntries( - Object.entries(groupedRecords).map(([qEdgeMetaKGHash, records]) => { - return [qEdgeMetaKGHash, Record.packRecords(records)]; - }), - ); - } - - createEncodeStream(): DelimitedChunksEncoder { - return new DelimitedChunksEncoder(); - } - - createDecodeStream(): DelimitedChunksDecoder { - return new DelimitedChunksDecoder(); - } - - async cacheEdges(queryRecords: Record[]): Promise { - if (this.cacheEnabled === false || process.env.INTERNAL_DISABLE_REDIS === 'true') { - if (global.parentPort) { - global.parentPort.postMessage({ threadId, cacheDone: true }); - } - return; - } - if (global.parentPort) { - global.parentPort.postMessage({ threadId, cacheInProgress: 1 }); - } - debug('Start to cache query records.'); - try { - const groupedRecords = this._groupQueryRecordsByQEdgeHash(queryRecords); - const qEdgeHashes = Array.from(Object.keys(groupedRecords)); - debug(`Number of hashed edges: ${qEdgeHashes.length}`); - const failedHashes = []; - await async.eachSeries(qEdgeHashes, async (hash) => { - // lock to prevent caching to/reading from actively caching edge - const redisID = 'bte:edgeCache:' + hash; - if (global.parentPort) { - global.parentPort.postMessage({ threadId, addCacheKey: redisID }); - } - await redisClient.client.usingLock([`redisLock:${redisID}`, 'redisLock:EdgeCaching'], 600000, async () => { - try { - await redisClient.client.delTimeout(redisID); // prevents weird overwrite edge cases - await new Promise((resolve, reject) => { - let i = 0; - Readable.from(groupedRecords[hash]) - .pipe(this.createEncodeStream()) - .pipe(chunker(100000, { flush: true })) - .on('data', async (chunk: string) => { - try { - await redisClient.client.hsetTimeout(redisID, String(i++), chunk); - } catch (error) { - reject(error); - try { - await redisClient.client.delTimeout(redisID); - } catch (e) { - debug( - `Unable to remove partial cache ${redisID} from redis during cache failure due to error ${error}. This may result in failed or improper cache retrieval of this qEdge.`, - ); - } - } - }) - .on('end', () => { - resolve(); - }); - }); - if (process.env.QEDGE_CACHE_TIME_S !== '0') { - await redisClient.client.expireTimeout(redisID, process.env.QEDGE_CACHE_TIME_S || 1800); - } - } catch (error) { - failedHashes.push(hash); - debug( - `Failed to cache qEdge ${hash} records due to error ${error}. This does not stop other edges from caching nor terminate the query.`, - ); - } finally { - if (global.parentPort) { - global.parentPort.postMessage({ threadId, completeCacheKey: redisID }); - } - } - }); - }); - const successCount = Object.entries(groupedRecords).reduce((acc, [hash, records]) => { - return failedHashes.includes(hash) ? acc : acc + records.length; - }, 0); - if (successCount) { - debug(`Successfully cached (${successCount}) query records.`); - } else { - debug(`qEdge caching failed.`); - } - } catch (error) { - debug(`Caching failed due to ${error}. This does not terminate the query.`); - } finally { - if (global.parentPort) { - global.parentPort.postMessage({ threadId, cacheDone: 1 }); - } - } - } -} diff --git a/src/edge_manager.ts b/src/edge_manager.ts index 79f00dcf..5505a619 100644 --- a/src/edge_manager.ts +++ b/src/edge_manager.ts @@ -1,20 +1,21 @@ import _ from 'lodash'; import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import BTEError from './exceptions/bte_error'; import Debug from 'debug'; const debug = Debug('bte:biothings-explorer-trapi:edge-manager'); import * as config from './config'; import BatchEdgeQueryHandler, { BatchEdgeQueryOptions } from './batch_edge_query'; import { Telemetry } from '@biothings-explorer/utils'; -import QEdge from './query_edge'; import MetaKG from '@biothings-explorer/smartapi-kg'; -import { QueryHandlerOptions } from '@biothings-explorer/types'; -import { Record } from '@biothings-explorer/api-response-transform'; +import { QueryHandlerOptions, QEdge, BTEError } from '@biothings-explorer/types'; +import { Record } from '@biothings-explorer/types'; import { SubclassEdges, UnavailableAPITracker } from './types'; import { RecordsByQEdgeID } from './results_assembly/query_results'; import path from 'path'; import { promises as fs } from 'fs'; +const MAX_RECORDS_PER_EDGE = parseInt(process.env.MAX_RECORDS_PER_EDGE ?? '100000'); +const MAX_RECORDS_PER_QUERY = parseInt(process.env.MAX_RECORDS_TOTAL ?? '200000'); + export default class QueryEdgeManager { private _qEdges: QEdge[]; private _metaKG: MetaKG; @@ -321,45 +322,6 @@ export default class QueryEdgeManager { currentQEdge.storeRecords(filteredRecords); } - /** - * Unused - */ - // updateNeighborsEdgeRecords(currentQEdge) { - // //update and filter only immediate neighbors - // debug(`Updating neighbors...`); - // const currentQEdgeID = currentQEdge.getID(); - // //get neighbors of this edges subject that are not this edge - // let left_connections = currentQEdge.subject.getConnections(); - // left_connections = left_connections.filter((qEdgeID) => qEdgeID !== currentQEdgeID); - // //get neighbors of this edges object that are not this edge - // let right_connections = currentQEdge.object.getConnections(); - // right_connections = right_connections.filter((qEdgeID) => qEdgeID !== currentQEdgeID); - // debug(`(${left_connections})<--edge neighbors-->(${right_connections})`); - // if (left_connections.length) { - // //find edge by id - // left_connections.forEach((qEdgeID) => { - // const edge = this._qEdges.find((edge) => edge.getID() == qEdgeID); - // if (edge && edge.records.length) { - // debug(`Updating "${edge.getID()}" neighbor edge of ${currentQEdgeID}`); - // debug(`Updating neighbor (X)<----()`); - // this.updateEdgeRecords(edge); - // } - // }); - // } - // - // if (right_connections.length) { - // //find edge by id - // right_connections.forEach((neighbor_id) => { - // const edge = this._qEdges.find((edge) => edge.getID() == neighbor_id); - // if (edge && edge.records.length) { - // debug(`Updating "${edge.getID()}" neighbor edge of ${currentQEdgeID}`); - // debug(`Updating neighbor ()---->(X)`); - // this.updateEdgeRecords(edge); - // } - // }); - // } - // } - updateAllOtherEdges(currentQEdge: QEdge): void { //update and filter all other edges debug(`Updating all other edges...`); @@ -374,11 +336,9 @@ export default class QueryEdgeManager { } _createBatchQueryHandler(qEdge: QEdge, metaKG: MetaKG): BatchEdgeQueryHandler { - const handler = new BatchEdgeQueryHandler(metaKG, this.options.resolveOutputIDs, { - caching: this.options.caching, - submitter: this.options.submitter, + const handler = new BatchEdgeQueryHandler(metaKG, { + ...this.options, recordHashEdgeAttributes: config.EDGE_ATTRIBUTES_USED_IN_RECORD_HASH, - provenanceUsesServiceProvider: this.options.provenanceUsesServiceProvider, } as BatchEdgeQueryOptions); handler.setEdges(qEdge); return handler; @@ -404,9 +364,11 @@ export default class QueryEdgeManager { debug(logMessage); } - async executeEdges(): Promise { + async executeEdges(abortSignal?: AbortSignal): Promise { const unavailableAPIs: UnavailableAPITracker = {}; while (this.getEdgesNotExecuted()) { + if (abortSignal?.aborted) return false; + const span = Telemetry.startSpan({ description: 'edgeExecution' }); //next available/most efficient edge const currentQEdge = this.getNext(); @@ -423,7 +385,7 @@ export default class QueryEdgeManager { ); debug(`(5) Executing current edge >> "${currentQEdge.getID()}"`); //execute current edge query - const queryRecords = await queryBatchHandler.query(queryBatchHandler.qEdges, unavailableAPIs); + let queryRecords = await queryBatchHandler.query(queryBatchHandler.qEdges, unavailableAPIs, abortSignal); this.logs = [...this.logs, ...queryBatchHandler.logs]; if (queryRecords === undefined) return; // create an edge execution summary @@ -433,6 +395,7 @@ export default class QueryEdgeManager { const cached = this.logs.filter( ({ data }) => data?.qEdgeID === currentQEdge.id && data?.type === 'cacheHit', ).length; + total += cached; this.logs .filter(({ data }) => data?.qEdgeID === currentQEdge.id && data?.type === 'query') .forEach(({ data }) => { @@ -460,6 +423,43 @@ export default class QueryEdgeManager { span.finish(); return; } + + // Check if record count threatens stability + const totalRecords = + this._qEdges.reduce((total, qEdge) => { + return total + qEdge.records.length; + }, 0) + queryRecords.length; + let maxRecordsMessage = [ + `Qedge ${currentQEdge.id}`, + `obtained ${queryRecords.length} records,`, + queryRecords.length === MAX_RECORDS_PER_EDGE ? 'meeting' : 'exceeding', + `maximum of ${MAX_RECORDS_PER_QUERY}`, + `Truncating records for this edge to ${MAX_RECORDS_PER_EDGE}.`, + `Your query may be too general?`, + ]; + if (totalRecords > MAX_RECORDS_PER_QUERY) { + maxRecordsMessage = maxRecordsMessage.slice(0, 2); + maxRecordsMessage.push( + ...[ + `totalling ${totalRecords} for this query.`, + `This exceeds the per-query maximum of ${MAX_RECORDS_PER_QUERY}.`, + `For stability purposes, this query is terminated.`, + `Please consider further refining your query.`, + ], + ); + debug(maxRecordsMessage.join(' ')); + this.logs.push(new LogEntry('WARNING', null, maxRecordsMessage.join(' ')).getLog()); + Telemetry.captureException(new Error(`Stopped on globalMaxRecords (exceeded ${MAX_RECORDS_PER_QUERY})`)); + return; + } + + if (queryRecords.length > MAX_RECORDS_PER_EDGE) { + debug(maxRecordsMessage.join(' ')); + this.logs.push(new LogEntry('WARNING', null, maxRecordsMessage.join(' ')).getLog()); + + queryRecords = queryRecords.slice(0, MAX_RECORDS_PER_EDGE); + } + // storing records will trigger a node entity count update currentQEdge.storeRecords(queryRecords); diff --git a/src/exceptions/bte_error.ts b/src/exceptions/bte_error.ts deleted file mode 100644 index 986234b5..00000000 --- a/src/exceptions/bte_error.ts +++ /dev/null @@ -1,16 +0,0 @@ -export default class BTEError extends Error { - statusCode: string; - constructor(message = 'Query aborted', name = 'QueryAborted', code = '501', ...params: any) { - super(...params); - - if (Error.captureStackTrace) { - Error.captureStackTrace(this, BTEError); - } - - this.name = name; - this.message = message; - this.statusCode = code; - } -} - -module.exports = BTEError; diff --git a/src/exceptions/invalid_query_graph_error.ts b/src/exceptions/invalid_query_graph_error.ts deleted file mode 100644 index 9974b541..00000000 --- a/src/exceptions/invalid_query_graph_error.ts +++ /dev/null @@ -1,16 +0,0 @@ -export default class InvalidQueryGraphError extends Error { - statusCode: number; - constructor(message = 'Your Input Query Graph is invalid.', ...params: string[]) { - super(...params); - - Object.setPrototypeOf(this, InvalidQueryGraphError.prototype); - - if (Error.captureStackTrace) { - Error.captureStackTrace(this, InvalidQueryGraphError); - } - - this.name = 'InvalidQueryGraphError'; - this.message = message; - this.statusCode = 400; - } -} diff --git a/src/exceptions/not_implemented_error.ts b/src/exceptions/not_implemented_error.ts deleted file mode 100644 index 91bd824e..00000000 --- a/src/exceptions/not_implemented_error.ts +++ /dev/null @@ -1,16 +0,0 @@ -export default class NotImplementedError extends Error { - statusCode: number; - constructor(message = 'Feature not implemented', ...params: string[]) { - super(...params); - - Object.setPrototypeOf(this, NotImplementedError.prototype); - - if (Error.captureStackTrace) { - Error.captureStackTrace(this, NotImplementedError); - } - - this.name = 'NotImplementedError'; - this.message = message; - this.statusCode = 501; - } -} diff --git a/src/graph/graph.ts b/src/graph/graph.ts deleted file mode 100644 index 5227594d..00000000 --- a/src/graph/graph.ts +++ /dev/null @@ -1,199 +0,0 @@ -import kg_edge from './kg_edge'; -import kg_node from './kg_node'; -import Debug from 'debug'; -import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import KGNode from './kg_node'; -import KGEdge from './kg_edge'; -import { Record } from '@biothings-explorer/api-response-transform'; -import { TrapiAuxiliaryGraph, TrapiResult } from '@biothings-explorer/types'; -import KnowledgeGraph from './knowledge_graph'; -const debug = Debug('bte:biothings-explorer-trapi:Graph'); - -export interface BTEGraphUpdate { - nodes: { - [nodeID: string]: KGNode; - }; - edges: { - [edgeID: string]: KGEdge; - }; -} - -export interface BTEGraphSubscriber { - update: (kg: BTEGraphUpdate) => void; -} - -export default class BTEGraph { - nodes: { - [nodeID: string]: KGNode; - }; - edges: { - [edgeID: string]: KGEdge; - }; - subscribers: BTEGraphSubscriber[]; - constructor() { - this.nodes = {}; - this.edges = {}; - this.subscribers = []; - } - - update(queryRecords: Record[]): void { - debug(`Updating BTE Graph now.`); - const bteAttributes = ['name', 'label', 'id', 'api', 'provided_by', 'publications', 'trapi_sources']; - queryRecords.map((record) => { - if (record) { - const inputPrimaryCurie = record.subject.curie; - const inputQNodeID = record.subject.qNodeID; - const inputBTENodeID = inputPrimaryCurie; - // const inputBTENodeID = inputPrimaryCurie + '-' + inputQNodeID; - const outputPrimaryCurie = record.object.curie; - const outputQNodeID = record.object.qNodeID; - // const outputBTENodeID = outputPrimaryCurie + '-' + outputQNodeID; - const outputBTENodeID = outputPrimaryCurie; - const recordHash = record.recordHash; - - if (!(outputBTENodeID in this.nodes)) { - this.nodes[outputBTENodeID] = new kg_node(outputBTENodeID, { - primaryCurie: outputPrimaryCurie, - qNodeID: outputQNodeID, - curies: record.object.equivalentCuries, - names: record.object.names, - label: record.object.label, - semanticType: [record.object.semanticType[0]], - nodeAttributes: record.object.attributes, - }); - } else if ( - this.nodes[outputBTENodeID].label === undefined || - this.nodes[outputBTENodeID].label === this.nodes[outputBTENodeID].primaryCurie - ) { - this.nodes[outputBTENodeID].label = record.object.label; - } - - if (!(inputBTENodeID in this.nodes)) { - this.nodes[inputBTENodeID] = new kg_node(inputBTENodeID, { - primaryCurie: inputPrimaryCurie, - qNodeID: inputQNodeID, - curies: record.subject.equivalentCuries, - names: record.subject.names, - label: record.subject.label, - semanticType: [record.subject.semanticType[0]], - nodeAttributes: record.subject.attributes, - }); - } else if ( - this.nodes[inputBTENodeID].label === undefined || - this.nodes[inputBTENodeID].label === this.nodes[inputBTENodeID].primaryCurie - ) { - this.nodes[inputBTENodeID].label = record.subject.label; - } - - this.nodes[outputBTENodeID].addSourceNode(inputBTENodeID); - this.nodes[outputBTENodeID].addSourceQNodeID(inputQNodeID); - this.nodes[inputBTENodeID].addTargetNode(outputBTENodeID); - this.nodes[inputBTENodeID].addTargetQNodeID(outputQNodeID); - if (!(recordHash in this.edges)) { - this.edges[recordHash] = new kg_edge(recordHash, { - predicate: record.predicate, - subject: inputPrimaryCurie, - object: outputPrimaryCurie, - }); - } - this.edges[recordHash].addAPI(record.api); - this.edges[recordHash].addInforesCurie(record.apiInforesCurie); - this.edges[recordHash].addPublication(record.publications); - Object.keys(record.mappedResponse) - .filter((k) => !(bteAttributes.includes(k) || k.startsWith('$'))) - .map((item) => { - this.edges[recordHash].addAdditionalAttributes(item, record.mappedResponse[item]); - }); - if (record.knowledge_level) { - this.edges[recordHash].addAdditionalAttributes('biolink:knowledge_level', record.knowledge_level); - } - if (record.agent_type) { - this.edges[recordHash].addAdditionalAttributes('biolink:agent_type', record.agent_type); - } - this.edges[recordHash].addSource(record.provenanceChain); - Object.entries(record.qualifiers).forEach(([qualifierType, qualifier]) => { - this.edges[recordHash].addQualifier(qualifierType, qualifier); - }); - } - }); - } - - prune(results: TrapiResult[], auxGraphs: { [auxGraphID: string]: TrapiAuxiliaryGraph }): void { - debug('pruning BTEGraph nodes/edges...'); - const edgeBoundNodes: Set = new Set(); - const resultsBoundEdges: Set = new Set(); - - // Handle nodes and edges bound to results directly - results.forEach((result) => { - Object.entries(result.analyses[0].edge_bindings).forEach(([, bindings]) => { - bindings.forEach((binding) => resultsBoundEdges.add(binding.id)); - }); - }); - - // Handle edges bound via auxiliary graphs - // This will iterate over new edges as they're added - resultsBoundEdges.forEach((edgeID) => { - edgeBoundNodes.add(this.edges[edgeID].subject); - edgeBoundNodes.add(this.edges[edgeID].object); - const supportGraphs = [...(this.edges[edgeID].attributes['biolink:support_graphs'] ?? [])]; - supportGraphs.forEach((auxGraphID: string) => { - auxGraphs[auxGraphID].edges.forEach((auxGraphEdgeID) => { - edgeBoundNodes.add(this.edges[auxGraphEdgeID].subject); - edgeBoundNodes.add(this.edges[auxGraphEdgeID].object); - resultsBoundEdges.add(auxGraphEdgeID); - }); - }); - }); - - const nodesToDelete = Object.keys(this.nodes).filter((bteNodeID) => !edgeBoundNodes.has(bteNodeID)); - nodesToDelete.forEach((unusedBTENodeID) => delete this.nodes[unusedBTENodeID]); - const edgesToDelete = Object.keys(this.edges).filter((recordHash) => !resultsBoundEdges.has(recordHash)); - edgesToDelete.forEach((unusedRecordHash) => delete this.edges[unusedRecordHash]); - debug(`pruned ${nodesToDelete.length} nodes and ${edgesToDelete.length} edges from BTEGraph.`); - } - - checkPrimaryKnowledgeSources(knowledgeGraph: KnowledgeGraph): StampedLog[] { - const logs = []; - Object.entries(knowledgeGraph.edges).map(([edgeID, edge]) => { - const has_primary_knowledge_source = edge.sources.some( - (source) => source.resource_role === 'primary_knowledge_source' && source.resource_id, - ); - if (!has_primary_knowledge_source) { - const logMsg = `Edge ${edgeID} (APIs: ${Array.from(this.edges[edgeID].apis).join( - ', ', - )}) is missing a primary knowledge source`; - debug(logMsg); - logs.push(new LogEntry('WARNING', null, logMsg).getLog()); - } - }); - return logs; - } - - /** - * Register subscribers - */ - subscribe(subscriber: BTEGraphSubscriber): void { - this.subscribers.push(subscriber); - } - - /** - * Unsubscribe a listener - */ - unsubscribe(subscriber: BTEGraphSubscriber): void { - this.subscribers = this.subscribers.filter((fn) => { - if (fn != subscriber) return fn; - }); - } - - /** - * Nofity all listeners - */ - notify(): void { - this.subscribers.map((subscriber) => { - subscriber.update({ - nodes: this.nodes, - edges: this.edges, - }); - }); - } -} diff --git a/src/graph/kg_edge.ts b/src/graph/kg_edge.ts deleted file mode 100644 index 797c82f1..00000000 --- a/src/graph/kg_edge.ts +++ /dev/null @@ -1,137 +0,0 @@ -import { TrapiSource } from '@biothings-explorer/types'; -import { TrapiAttribute } from '@biothings-explorer/types'; - -export interface KGEdgeInfo { - object: string; - subject: string; - predicate: string; -} - -export default class KGEdge { - id: string; - predicate: string; - subject: string; - object: string; - apis: Set; - inforesCuries: Set; - sources: { - [resource_id: string]: { - [resource_role: string]: { - resource_id: string; - resource_role: string; - upstream_resource_ids?: Set; - source_record_urls?: Set; - }; - }; - }; - publications: Set; - qualifiers: { - [qualifier_type_id: string]: string | string[]; - }; - attributes: { - [attribute_type_id: string]: Set | TrapiAttribute[]; - 'edge-attributes'?: TrapiAttribute[]; - }; - constructor(id: string, info: KGEdgeInfo) { - this.id = id; - this.predicate = info.predicate; - this.subject = info.subject; - this.object = info.object; - this.apis = new Set(); - this.inforesCuries = new Set(); - this.sources = {}; - this.publications = new Set(); - this.qualifiers = {}; - this.attributes = {}; - } - - addAPI(api: string | string[]): void { - if (typeof api === 'undefined') { - return; - } - if (!Array.isArray(api)) { - api = [api]; - } - api.map((item) => { - this.apis.add(item); - }); - } - - addInforesCurie(inforesCurie: string | string[]): void { - if (typeof inforesCurie === 'undefined') { - return; - } - if (!Array.isArray(inforesCurie)) { - inforesCurie = [inforesCurie]; - } - inforesCurie.map((item) => { - this.inforesCuries.add(item); - }); - } - - addSource(source: TrapiSource | TrapiSource[]): void { - if (typeof source === 'undefined') { - return; - } - if (!Array.isArray(source)) { - source = [source]; - } - source.forEach((item) => { - if (!this.sources[item.resource_id]) this.sources[item.resource_id] = {}; - if (item.upstream_resource_ids && !Array.isArray(item.upstream_resource_ids)) { - item.upstream_resource_ids = [item.upstream_resource_ids]; - } - if (item.source_record_urls && !Array.isArray(item.source_record_urls)) { - item.source_record_urls = [item.source_record_urls]; - } - if (!this.sources[item.resource_id][item.resource_role]) { - this.sources[item.resource_id][item.resource_role] = { - resource_id: item.resource_id, - resource_role: item.resource_role, - upstream_resource_ids: item.upstream_resource_ids ? new Set(item.upstream_resource_ids) : undefined, - source_record_urls: item.source_record_urls ? new Set(item.source_record_urls) : undefined, - }; - } - item.upstream_resource_ids?.forEach((upstream) => - this.sources[item.resource_id][item.resource_role].upstream_resource_ids.add(upstream), - ); - item.source_record_urls?.forEach((url) => - this.sources[item.resource_id][item.resource_role].source_record_urls.add(url), - ); - }); - } - - addPublication(publication: string | string[]): void { - if (typeof publication === 'undefined') { - return; - } - if (!Array.isArray(publication)) { - publication = [publication]; - } - publication.map((item) => { - this.publications.add(item); - }); - } - - addQualifier(name: string, value: string | string[]): void { - this.qualifiers[name] = value; - } - - addAdditionalAttributes(name: string, value: string | string[] | TrapiAttribute[]): void { - // special handling for full edge attributes - if (name === 'edge-attributes') { - this.attributes[name] = value as TrapiAttribute[]; - return; - } - - if (!(name in this.attributes)) { - this.attributes[name] = new Set(); - } - if (!Array.isArray(value)) { - value = [value]; - } - (value as string[]).map((item) => { - (this.attributes[name] as Set).add(item); - }); - } -} diff --git a/src/graph/kg_node.ts b/src/graph/kg_node.ts deleted file mode 100644 index ff338c5d..00000000 --- a/src/graph/kg_node.ts +++ /dev/null @@ -1,61 +0,0 @@ -import { TrapiAttribute } from '@biothings-explorer/types'; - -export interface KGNodeInfo { - label: string; - nodeAttributes?: TrapiAttribute; - semanticType: string[]; - names: string[]; - curies: string[]; - primaryCurie: string; - qNodeID: string; - originalCurie?: string; -} - -export default class KGNode { - id: string; - primaryCurie: string; - qNodeID: string; - originalCurie: string; - curies: string[]; - names: string[]; - semanticType: string[]; - nodeAttributes: TrapiAttribute; - label: string; - sourceNodes: Set; - targetNodes: Set; - sourceQNodeIDs: Set; - targetQNodeIDs: Set; - constructor(id: string, info: KGNodeInfo) { - this.id = id; - this.primaryCurie = info.primaryCurie; - this.qNodeID = info.qNodeID; - this.curies = info.curies; - this.names = info.names; - this.semanticType = info.semanticType; - this.nodeAttributes = info.nodeAttributes; - this.label = info.label; - this.sourceNodes = new Set(); - this.targetNodes = new Set(); - this.sourceQNodeIDs = new Set(); - this.targetQNodeIDs = new Set(); - - // store original curie to output `query_id bte#815` - this.originalCurie = info.originalCurie; - } - - addSourceNode(kgNodeID: string): void { - this.sourceNodes.add(kgNodeID); - } - - addTargetNode(kgNodeID: string): void { - this.targetNodes.add(kgNodeID); - } - - addSourceQNodeID(qNodeID: string): void { - this.sourceQNodeIDs.add(qNodeID); - } - - addTargetQNodeID(qNodeID: string): void { - this.targetQNodeIDs.add(qNodeID); - } -} diff --git a/src/graph/knowledge_graph.ts b/src/graph/knowledge_graph.ts deleted file mode 100644 index 1a28e9ca..00000000 --- a/src/graph/knowledge_graph.ts +++ /dev/null @@ -1,214 +0,0 @@ -import { toArray } from '../utils'; -import Debug from 'debug'; -import { - TrapiAttribute, - TrapiKnowledgeGraph, - TrapiKGEdge, - TrapiKGEdges, - TrapiKGNode, - TrapiKGNodes, - TrapiQualifier, - TrapiSource, -} from '@biothings-explorer/types'; -import KGNode from './kg_node'; -import KGEdge from './kg_edge'; -import { BTEGraphUpdate } from './graph'; -import { APIDefinition } from '@biothings-explorer/types'; -import { Telemetry } from '@biothings-explorer/utils'; - -const debug = Debug('bte:biothings-explorer-trapi:KnowledgeGraph'); - -const NON_ARRAY_ATTRIBUTES = ['biolink:knowledge_level', 'biolink:agent_type', 'biolink:evidence_count']; - -interface SpecialAttributeHandlers { - [attribute_type_id: string]: (value: Set, kgEdge: KGEdge) => TrapiAttribute['value']; -} - -const SPECIAL_ATTRIBUTE_HANDLERS: SpecialAttributeHandlers = { - 'biolink:max_research_phase': (value, kgEdge) => { - // Special handling for max research phase - const phase_map = { - '-1.0': 'not_provided', - '0.5': 'pre_clinical_research_phase', - '1.0': 'clinical_trial_phase_1', - '2.0': 'clinical_trial_phase_2', - '3.0': 'clinical_trial_phase_3', - '4.0': 'clinical_trial_phase_4', - }; - function map_phase(val: string) { - let new_val = phase_map[val]; - if (typeof new_val !== 'undefined') return new_val; - - const source = Object.values(kgEdge.sources).find((src) => typeof src.primary_knowledge_source !== 'undefined') - .primary_knowledge_source.resource_id; - const err = new Error( - `Unrecognized research phase (${val}) from ${source} ${kgEdge.subject} > ${kgEdge.predicate} > ${kgEdge.object}`, - ); - Telemetry.captureException(err); - return 'not_provided'; - } - return Array.from(value as Set).map(map_phase); - }, -}; - -export default class KnowledgeGraph { - nodes: { - [nodePrimaryID: string]: TrapiKGNode; - }; - edges: { - [edgeID: string]: TrapiKGEdge; - }; - kg: TrapiKnowledgeGraph; - apiList?: APIDefinition[]; - constructor(apiList?: APIDefinition[]) { - this.nodes = {}; - this.edges = {}; - this.kg = { - nodes: this.nodes, - edges: this.edges, - }; - this.apiList = apiList; - } - - getNodes(): TrapiKGNodes { - return this.nodes; - } - - getEdges(): TrapiKGEdges { - return this.edges; - } - - _createNode(kgNode: KGNode): TrapiKGNode { - const node = { - categories: kgNode.semanticType, - name: Array.isArray(kgNode.label) ? kgNode.label[0] : kgNode.label, - attributes: [ - { - attribute_type_id: 'biolink:xref', - value: kgNode.curies, - }, - { - attribute_type_id: 'biolink:synonym', - value: kgNode.names.length ? kgNode.names : toArray(kgNode.label), - }, - // Currently unused - // { - // attribute_type_id: 'num_source_nodes', - // value: kgNode._sourceNodes.size, - // //value_type_id: 'bts:num_source_nodes', - // }, - // { - // attribute_type_id: 'num_target_nodes', - // value: kgNode._targetNodes.size, - // //value_type_id: 'bts:num_target_nodes', - // }, - // { - // attribute_type_id: 'source_qg_nodes', - // value: Array.from(kgNode._sourceQNodeIDs), - // //value_type_id: 'bts:source_qg_nodes', - // }, - // { - // attribute_type_id: 'target_qg_nodes', - // value: Array.from(kgNode._targetQNodeIDs), - // //value_type_id: 'bts:target_qg_nodes', - // }, - ], - }; - for (const key in kgNode.nodeAttributes) { - node.attributes.push({ - attribute_type_id: key, - value: kgNode.nodeAttributes[key] as string[], - //value_type_id: 'bts:' + key, - }); - } - return node; - } - - _createQualifiers(kgEdge: KGEdge): TrapiQualifier[] { - const qualifiers = Object.entries(kgEdge.qualifiers || {}).map(([qualifierType, qualifier]) => { - return { - qualifier_type_id: qualifierType, - qualifier_value: qualifier, - }; - }); - - return qualifiers.length ? qualifiers : undefined; - } - - _createAttributes(kgEdge: KGEdge): TrapiAttribute[] { - const attributes: TrapiAttribute[] = []; - - // publications - if (Array.from(kgEdge.publications).length) { - attributes.push({ - attribute_type_id: 'biolink:publications', - value: Array.from(kgEdge.publications), - value_type_id: 'linkml:Uriorcurie', - }); - } - - Object.entries(kgEdge.attributes).forEach(([key, value]) => { - if (key === 'edge-attributes') return; - - let formatted_value: TrapiAttribute['value'] = NON_ARRAY_ATTRIBUTES.includes(key) - ? Array.from(value as Set).reduce((acc, val) => acc + val) - : Array.from(value as Set); - - if (key in SPECIAL_ATTRIBUTE_HANDLERS) { - formatted_value = SPECIAL_ATTRIBUTE_HANDLERS[key](value as Set, kgEdge); - } - - attributes.push({ - attribute_type_id: key, - // technically works for numbers as well - value: formatted_value, - //value_type_id: 'bts:' + key, - }); - }); - - //handle TRAPI APIs (Situation A of https://github.com/biothings/BioThings_Explorer_TRAPI/issues/208) and APIs that define 'edge-atributes' in x-bte - kgEdge.attributes['edge-attributes']?.forEach((attribute) => { - attributes.push(attribute); - }); - return attributes; - } - - _createSources(kgEdge: KGEdge): TrapiSource[] { - const sources: TrapiSource[] = []; - Object.entries(kgEdge.sources).forEach(([, roles]) => { - Object.entries(roles).forEach(([, sourceObj]) => { - const trapiSource: TrapiSource = { - ...sourceObj, - upstream_resource_ids: sourceObj.upstream_resource_ids ? [...sourceObj.upstream_resource_ids] : undefined, - source_record_urls: sourceObj.source_record_urls ? [...sourceObj.source_record_urls] : undefined, - }; - sources.push(trapiSource); - }); - }); - return sources; - } - - _createEdge(kgEdge: KGEdge): TrapiKGEdge { - return { - predicate: kgEdge.predicate, - subject: kgEdge.subject, - object: kgEdge.object, - qualifiers: this._createQualifiers(kgEdge), - attributes: this._createAttributes(kgEdge), - sources: this._createSources(kgEdge), - }; - } - - update(bteGraph: BTEGraphUpdate): void { - Object.keys(bteGraph.nodes).map((node) => { - this.nodes[bteGraph.nodes[node].primaryCurie] = this._createNode(bteGraph.nodes[node]); - }); - Object.keys(bteGraph.edges).map((edge) => { - this.edges[edge] = this._createEdge(bteGraph.edges[edge]); - }); - this.kg = { - nodes: this.nodes, - edges: this.edges, - }; - } -} diff --git a/src/index.ts b/src/index.ts index 8221e1d0..2037213b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,12 +1,9 @@ import MetaKG, { SmartAPIQueryResult } from '@biothings-explorer/smartapi-kg'; import path from 'path'; -import QueryGraph from './query_graph'; -import KnowledgeGraph from './graph/knowledge_graph'; import TrapiResultsAssembler from './results_assembly/query_results'; -import InvalidQueryGraphError from './exceptions/invalid_query_graph_error'; +import { QueryGraph, InvalidQueryGraphError, KGNode, KGEdge, BTEGraph, KnowledgeGraph } from '@biothings-explorer/types'; import Debug from 'debug'; const debug = Debug('bte:biothings-explorer-trapi:main'); -import Graph from './graph/graph'; import EdgeManager from './edge_manager'; import _ from 'lodash'; import QEdge2APIEdgeHandler from './qedge2apiedge'; @@ -16,8 +13,6 @@ import { getDescendants } from '@biothings-explorer/node-expansion'; import { resolveSRI, SRINodeNormFailure } from 'biomedical_id_resolver'; import InferredQueryHandler from './inferred_mode/inferred_mode'; import PathfinderQueryHandler from './inferred_mode/pathfinder'; -import KGNode from './graph/kg_node'; -import KGEdge from './graph/kg_edge'; import { TrapiAuxGraphCollection, TrapiAuxiliaryGraph, @@ -26,9 +21,7 @@ import { TrapiResponse, TrapiResult, } from '@biothings-explorer/types'; -import { QueryHandlerOptions } from '@biothings-explorer/types'; -import BTEGraph from './graph/graph'; -import QEdge from './query_edge'; +import { QueryHandlerOptions, QEdge } from '@biothings-explorer/types'; import { Telemetry } from '@biothings-explorer/utils'; import { enrichTrapiResultsWithPfocrFigures } from './results_assembly/pfocr'; import { SubclassEdges } from './types'; @@ -36,10 +29,6 @@ import { SubclassEdges } from './types'; // Exports for external availability export * from './types'; export { getTemplates, supportedLookups } from './inferred_mode/template_lookup'; -export { default as QEdge } from './query_edge'; -export { default as QNode } from './query_node'; -export { default as InvalidQueryGraphError } from './exceptions/invalid_query_graph_error'; -export { default as NotImplementedError } from './exceptions/not_implemented_error'; export * from './qedge2apiedge'; export default class TRAPIQueryHandler { @@ -111,8 +100,9 @@ export default class TRAPIQueryHandler { `Query options are: ${JSON.stringify({ ...this.options, schema: this.options.schema ? this.options.schema.info.version : 'not included', - metakg: '', - smartapi: '', + metakg: "", + smartapi: "", + apiList: "[omitted]" })}`, ); @@ -469,7 +459,7 @@ export default class TRAPIQueryHandler { _initializeResponse(): void { this.knowledgeGraph = new KnowledgeGraph(this.options?.apiList?.include); this.trapiResultsAssembler = new TrapiResultsAssembler(this.options); - this.bteGraph = new Graph(); + this.bteGraph = new BTEGraph(); this.bteGraph.subscribe(this.knowledgeGraph); } @@ -689,7 +679,7 @@ return queryEdges; ]; }; - async query(): Promise { + async query(abortSignal?: AbortSignal): Promise { this._initializeResponse(); await this.addQueryNodes(); @@ -757,12 +747,14 @@ return queryEdges; } const manager = new EdgeManager(queryEdges, metaKG, this.subclassEdges, this.options); - const executionSuccess = await manager.executeEdges(); + const executionSuccess = await manager.executeEdges(abortSignal); this.logs = [...this.logs, ...manager.logs]; if (!executionSuccess) { return; } + if (abortSignal?.aborted) return; + const span3 = Telemetry.startSpan({ description: 'resultsAssembly' }); // update query graph diff --git a/src/inferred_mode/inferred_mode.ts b/src/inferred_mode/inferred_mode.ts index edf9bc79..b82f5063 100644 --- a/src/inferred_mode/inferred_mode.ts +++ b/src/inferred_mode/inferred_mode.ts @@ -1,8 +1,8 @@ import Debug from 'debug'; -import { LogEntry, StampedLog, Telemetry } from '@biothings-explorer/utils'; +import { LogEntry, StampedLog, Telemetry, timeoutPromise } from '@biothings-explorer/utils'; import * as utils from '../utils'; import async from 'async'; -import biolink from '../biolink'; +import { biolink } from '@biothings-explorer/utils'; import { getTemplates, MatchedTemplate, TemplateLookup } from './template_lookup'; import { scaled_sigmoid, inverse_scaled_sigmoid } from '../results_assembly/score'; import TRAPIQueryHandler from '../index'; @@ -44,7 +44,6 @@ export interface CombinedResponseReport { querySuccess: number; queryHadResults: boolean; mergedResults: { [resultID: string]: number }; - creativeLimitHit: boolean | number; } // MatchedTemplate, but with IDs, etc. filled in @@ -60,6 +59,7 @@ export default class InferredQueryHandler { includeReasoner: boolean; pathfinder: boolean; CREATIVE_LIMIT: number; + CREATIVE_TIMEOUT: number; constructor( parent: TRAPIQueryHandler, queryGraph: TrapiQueryGraph, @@ -79,6 +79,7 @@ export default class InferredQueryHandler { this.includeReasoner = includeReasoner; this.pathfinder = pathfinder; this.CREATIVE_LIMIT = process.env.CREATIVE_LIMIT ? parseInt(process.env.CREATIVE_LIMIT) : 500; + this.CREATIVE_TIMEOUT = process.env.CREATIVE_TIMEOUT_S ? parseInt(process.env.CREATIVE_TIMEOUT) * 1000 : 4.75 * 60 * 1000; } get queryIsValid(): boolean { @@ -270,7 +271,6 @@ export default class InferredQueryHandler { querySuccess: 0, queryHadResults: false, mergedResults: {}, - creativeLimitHit: false, }; let mergedThisTemplate = 0; const resultIDsFromPrevious = new Set(Object.keys(combinedResponse.message.results)); @@ -541,10 +541,6 @@ export default class InferredQueryHandler { } report.querySuccess = 1; - const resSize = this.pathfinder ? pfIntermediateSet.size : Object.keys(combinedResponse.message.results).length; - if (resSize >= this.CREATIVE_LIMIT && !report.creativeLimitHit) { - report.creativeLimitHit = resSize; - } span.finish(); return report; } @@ -641,68 +637,59 @@ export default class InferredQueryHandler { } = {}; const auxGraphSuffixes: {[inferredEdgeID: string]: number} = {}; - await async.eachOfSeries(subQueries, async ({ template, queryGraph, qualifiers }, i) => { - const span = Telemetry.startSpan({ description: 'creativeTemplate' }); - span.setData('template', (i as number) + 1); - i = i as number; - if (stop) { - span.finish(); - return; - } - if (global.queryInformation?.queryGraph) { - global.queryInformation.isCreativeMode = true; - global.queryInformation.creativeTemplate = template; - } - if (global.queryInformation != null) global.queryInformation.totalRecords = 0; // Reset between templates - - const handler = new TRAPIQueryHandler(this.options, this.path, this.predicatePath, this.includeReasoner); - try { - // make query and combine results/kg/logs/etc + // perf debugging + const startUsage = process.cpuUsage(); + const startTime = new Date().getTime(); + const ncpu = require('os').cpus().length; + + const completedHandlers = await Promise.all( + subQueries.map(async ({ template, queryGraph }, i) => { + const span = Telemetry.startSpan({ description: 'creativeTemplate' }); + span.setData('template', i + 1); + const handler = new TRAPIQueryHandler(this.options, this.path, this.predicatePath, this.includeReasoner); handler.setQueryGraph(queryGraph); - await handler.query(); - const { querySuccess, queryHadResults, mergedResults, creativeLimitHit } = this.combineResponse( - i, - handler, - qEdgeID, - qEdge, - combinedResponse, - auxGraphSuffixes, - qualifiers, - ); - // update values used in logging - successfulQueries += querySuccess; - if (queryHadResults) resultQueries.push(i); - Object.entries(mergedResults).forEach(([result, countMerged]) => { - mergedResultsCount[result] = - result in mergedResultsCount ? mergedResultsCount[result] + countMerged : countMerged; - }); - // log to user if we should stop - if (creativeLimitHit) { - stop = true; - const message = [ - `Addition of ${creativeLimitHit} results from Template ${i + 1}`, - Object.keys(combinedResponse.message.results).length === this.CREATIVE_LIMIT ? ' meets ' : ' exceeds ', - `creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${Object.keys(combinedResponse.message.results).length - } merged). `, - `Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results. Skipping remaining ${subQueries.length - (i + 1) - } `, - subQueries.length - (i + 1) === 1 ? `template.` : `templates.`, - ].join(''); + try { + await timeoutPromise(handler.query(AbortSignal.timeout(this.CREATIVE_TIMEOUT)), this.CREATIVE_TIMEOUT); + } catch (error) { + handler.logs.forEach((log) => { + combinedResponse.logs.push(log); + }); + const message = `ERROR: Template-${i + 1} failed due to error ${error}`; debug(message); - combinedResponse.logs.push(new LogEntry(`INFO`, null, message).getLog()); + combinedResponse.logs.push(new LogEntry(`ERROR`, null, message).getLog()); + span.finish(); + return undefined; } span.finish(); - } catch (error) { - handler.logs.forEach((log) => { - combinedResponse.logs.push(log); - }); - const message = `ERROR: Template-${i + 1} failed due to error ${error}`; - debug(message); - combinedResponse.logs.push(new LogEntry(`ERROR`, null, message).getLog()); - span.finish(); - return; - } - }); + return { i, handler }; + }) + ); + + // perf debugging + const endTime = new Date().getTime(); + const timeDelta = (endTime - startTime) * 10 * ncpu; + const { user, system } = process.cpuUsage(startUsage); + debug(`Average CPU Usage: ${(system + user) / timeDelta}%`); + + for (const handlerInfo of completedHandlers) { + if (handlerInfo === undefined) continue; + const { i, handler } = handlerInfo; + const { querySuccess, queryHadResults, mergedResults } = this.combineResponse( + i, + handler, + qEdgeID, + qEdge, + combinedResponse, + auxGraphSuffixes + ); + successfulQueries += querySuccess; + if (queryHadResults) resultQueries.push(i); + Object.entries(mergedResults).forEach(([result, countMerged]) => { + mergedResultsCount[result] = + result in mergedResultsCount ? mergedResultsCount[result] + countMerged : countMerged; + }); + } + // log about merged Results if (Object.keys(mergedResultsCount).length) { // Add 1 for first instance of result (not counted during merging) @@ -732,6 +719,20 @@ export default class InferredQueryHandler { response.message.results = Object.values(combinedResponse.message.results).sort((a, b) => { return b.analyses[0].score - a.analyses[0].score ? b.analyses[0].score - a.analyses[0].score : 0; }); + + // log about trimming results + if (response.message.results.length > this.CREATIVE_LIMIT) { + const message = [ + `Number of results exceeds`, + `creative result maximum of ${this.CREATIVE_LIMIT} (reaching ${ + Object.keys(response.message.results).length + } merged). `, + `Response will be truncated to top-scoring ${this.CREATIVE_LIMIT} results.` + ].join(''); + debug(message); + combinedResponse.logs.push(new LogEntry(`INFO`, null, message).getLog()); + } + // trim extra results and prune kg response.message.results = response.message.results.slice(0, this.CREATIVE_LIMIT); response.description = `Query processed successfully, retrieved ${response.message.results.length} results.`; diff --git a/src/inferred_mode/pathfinder.ts b/src/inferred_mode/pathfinder.ts index f366d256..4f4c90f5 100644 --- a/src/inferred_mode/pathfinder.ts +++ b/src/inferred_mode/pathfinder.ts @@ -14,7 +14,7 @@ import { scaled_sigmoid, inverse_scaled_sigmoid } from '../results_assembly/scor import { LogEntry, StampedLog, Telemetry } from '@biothings-explorer/utils'; import Debug from 'debug'; import generateTemplates from './pf_template_generator'; -import biolink from '../biolink'; +import { biolink } from '@biothings-explorer/utils'; import { removeBioLinkPrefix } from '../utils'; const debug = Debug('bte:biothings-explorer-trapi:pathfinder'); diff --git a/src/inferred_mode/pf_template_generator.ts b/src/inferred_mode/pf_template_generator.ts index dfbb5f6f..24e6f1e1 100644 --- a/src/inferred_mode/pf_template_generator.ts +++ b/src/inferred_mode/pf_template_generator.ts @@ -1,7 +1,7 @@ import path from "path"; import fs from "fs/promises"; import yaml2json from "js-yaml"; -import biolink from "../biolink"; +import { biolink } from '@biothings-explorer/utils'; import { TrapiQNode, TrapiQueryGraph } from "@biothings-explorer/types"; interface CategoryTable { diff --git a/src/qedge2apiedge.ts b/src/qedge2apiedge.ts index 29e592da..fbb8fd6e 100644 --- a/src/qedge2apiedge.ts +++ b/src/qedge2apiedge.ts @@ -3,47 +3,13 @@ import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import * as config from './config'; const CURIE_WITH_PREFIXES = ['MONDO', 'DOID', 'UBERON', 'EFO', 'HP', 'CHEBI', 'CL', 'MGI', 'NCIT']; import Debug from 'debug'; -import QEdge from './query_edge'; +import { QEdge } from '@biothings-explorer/types'; import MetaKG from '@biothings-explorer/smartapi-kg'; import { SmartAPIKGOperationObject } from '@biothings-explorer/smartapi-kg'; import { SRIBioEntity } from 'biomedical_id_resolver'; +import { APIEdge, BatchAPIEdge, MetaXEdge, NonBatchAPIEdge, TemplateBatchAPIEdge, TemplateNonBatchAPIEdge } from '@biothings-explorer/types'; const debug = Debug('bte:biothings-explorer-trapi:qedge2btedge'); -export interface MetaXEdge extends SmartAPIKGOperationObject { - reasoner_edge: QEdge; -} - -export interface TemplatedInput { - queryInputs: string | string[]; - [additionalAttributes: string]: string | string[]; -} - -export interface APIEdge extends MetaXEdge { - input: string | string[] | TemplatedInput; - input_resolved_identifiers: { - [curie: string]: SRIBioEntity; - }; - original_input: { - [equivalentCurie: string]: string; - }; -} - -export interface NonBatchAPIEdge extends APIEdge { - input: string; -} - -export interface BatchAPIEdge extends APIEdge { - input: string[]; -} - -export interface TemplateNonBatchAPIEdge extends APIEdge { - input: TemplatedInput; -} - -export interface TemplateBatchAPIEdge extends APIEdge { - input: TemplatedInput; -} - export default class QEdge2APIEdgeHandler { qEdges: QEdge[]; metaKG: MetaKG; diff --git a/src/query_edge.ts b/src/query_edge.ts deleted file mode 100644 index a9513741..00000000 --- a/src/query_edge.ts +++ /dev/null @@ -1,580 +0,0 @@ -import helper from './helper'; -import Debug from 'debug'; -import * as utils from './utils'; -import biolink from './biolink'; -import { Record, RecordNode, FrozenRecord } from '@biothings-explorer/api-response-transform'; -import QNode from './query_node'; -import { QNodeInfo } from './query_node'; -import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import { TrapiAttributeConstraint, TrapiQualifierConstraint } from '@biothings-explorer/types'; - -const debug = Debug('bte:biothings-explorer-trapi:QEdge'); - -interface ExpandedQualifier { - qualifier_type_id: string; - qualifier_value: string[]; -} - -interface ExpandedQEdgeQualifierConstraint { - qualifier_set: ExpandedQualifier[]; -} - -interface CompactQualifiers { - [qualfier_type_id: string]: string | string[]; -} - -interface QEdgeInfo { - id: string; - object: QNodeInfo | QNode; - subject: QNodeInfo | QNode; - records?: FrozenRecord[]; - logs?: StampedLog[]; - executed?: boolean; - reverse?: boolean; - qualifier_constraints?: TrapiQualifierConstraint[]; - frozen?: boolean; - predicates?: string[]; -} - -interface AliasesByPrimary { - [primaryClient: string]: string[]; -} - -interface AliasesByPrimaryByType { - [semanticType: string]: AliasesByPrimary; -} - -export default class QEdge { - id: string; - predicate: string[]; - subject: QNode; - object: QNode; - expanded_predicates: string[]; - qualifier_constraints: TrapiQualifierConstraint[]; - reverse: boolean; - executed: boolean; - logs: StampedLog[]; - records: Record[]; - filter?: any; - - constructor(info: QEdgeInfo, reverse?: boolean) { - this.id = info.id; - this.predicate = info.predicates; - this.subject = info.frozen === true ? new QNode(info.subject as QNodeInfo) : (info.subject as QNode); - this.object = info.frozen === true ? new QNode(info.object as QNodeInfo) : (info.object as QNode); - this.expanded_predicates = []; - this.qualifier_constraints = info.qualifier_constraints || []; - - this.reverse = this.subject?.getCurie?.() === undefined && this.object?.getCurie?.() !== undefined; - - this.reverse = info.reverse !== undefined ? info.reverse : this.reverse; - this.reverse = reverse !== undefined ? reverse : this.reverse; - - this.init(); - - // edge has been fully executed - this.executed = info.executed === undefined ? false : info.executed; - // run initial checks - this.logs = info.logs === undefined ? [] : info.logs; - - // this edges query response records - if (info.records && info.frozen === true) - this.records = info.records.map((recordJSON: FrozenRecord) => new Record(recordJSON)); - else this.records = []; - - debug(`(2) Created Edge` + ` ${JSON.stringify(this.getID())} Reverse = ${this.reverse}`); - } - - freeze(): QEdgeInfo { - return { - id: this.id, - predicates: this.predicate, - qualifier_constraints: this.qualifier_constraints, - executed: this.executed, - reverse: this.reverse, - logs: this.logs, - subject: this.subject.freeze(), - object: this.object.freeze(), - records: this.records.map((record) => record.freeze()), - frozen: true, - }; - } - - init(): void { - this.expanded_predicates = this.getPredicate(); - } - - getID(): string { - return this.id; - } - - getHashedEdgeRepresentation(): string { - // all values sorted so same qEdge with slightly different orders will hash the same - const qualifiersSorted = (this.getSimpleQualifierConstraints() || []) - .map((qualifierSet) => { - return Object.entries(qualifierSet) - .sort(([qTa], [qTb]) => qTa.localeCompare(qTb)) - .reduce((str, [qType, qVal]) => `${str}${qType}:${qVal};`, ''); - }) - .sort((setString1, setString2) => setString1.localeCompare(setString2)); - - const toBeHashed = - (this.getInputNode().getCategories() || []).sort().join(',') + - (this.getPredicate() || []).sort() + - (this.getOutputNode().getCategories() || []).sort().join(',') + - (this.getInputCurie() || []).sort() + - qualifiersSorted; - - return helper._generateHash(toBeHashed); - } - - expandPredicates(predicates: string[]): string[] { - return Array.from(new Set(predicates.reduce((acc, cur) => [...acc, ...biolink.getDescendantPredicates(cur)], []))); - } - - getPredicate(): string[] { - if (this.predicate === undefined || this.predicate === null) { - return undefined; - } - const predicates = utils.toArray(this.predicate).map((item) => utils.removeBioLinkPrefix(item)); - const expandedPredicates = this.expandPredicates(predicates); - debug(`Expanded edges: ${expandedPredicates}`); - return expandedPredicates - .map((predicate) => { - return this.isReversed() === true ? biolink.reverse(predicate) : predicate; - }) - .filter((item) => !(typeof item === 'undefined')); - } - - expandQualifierConstraints(constraints: TrapiQualifierConstraint[]): ExpandedQEdgeQualifierConstraint[] { - return constraints.map((qualifierSetObj) => { - return { - qualifier_set: qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => { - const new_qualifier_values = qualifier_type_id.includes('predicate') - ? Array.isArray(qualifier_value) - ? Array.from( - qualifier_value.reduce((set: Set, predicate: string) => { - biolink - .getDescendantPredicates(utils.removeBioLinkPrefix(predicate)) - .forEach((item) => set.add(`biolink:${utils.removeBioLinkPrefix(item)}`)); - return set; - }, new Set()), - ) - : Array.from( - new Set( - biolink - .getDescendantPredicates(utils.removeBioLinkPrefix(qualifier_value)) - .map((item) => `biolink:${utils.removeBioLinkPrefix(item)}`), - ), - ) - : Array.from( - new Set(biolink.getDescendantQualifiers(utils.removeBioLinkPrefix(qualifier_value as string))), - ); - - return { - qualifier_type_id, - qualifier_value: new_qualifier_values, - }; - }), - }; - }); - } - - getQualifierConstraints(): TrapiQualifierConstraint[] { - if (!this.qualifier_constraints) { - return []; - } - if (this.isReversed()) { - return this.qualifier_constraints.map((qualifierSetObj) => { - return { - qualifier_set: qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => { - let newQualifierType = qualifier_type_id; - let newQualifierValue = qualifier_value; - if (qualifier_type_id.includes('predicate')) { - if (Array.isArray(qualifier_value)) { - newQualifierValue = qualifier_value.map((str) => `biolink:${str.replace('biolink', '')}`); - } else { - newQualifierValue = `biolink:${qualifier_value.replace('biolink:', '')}`; - } - } - if (qualifier_type_id.includes('subject')) { - newQualifierType = qualifier_type_id.replace('subject', 'object'); - } - if (qualifier_type_id.includes('object')) { - newQualifierType = qualifier_type_id.replace('object', 'subject'); - } - return { - qualifier_type_id: newQualifierType, - qualifier_value: newQualifierValue, - }; - }), - }; - }); - } - return this.qualifier_constraints; - } - - getSimpleQualifierConstraints(): CompactQualifiers[] | undefined { - const constraints: CompactQualifiers[] = this.getQualifierConstraints().map((qualifierSetObj) => { - return Object.fromEntries( - qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => [ - qualifier_type_id.replace('biolink:', ''), - Array.isArray(qualifier_value) - ? qualifier_value.map((string) => string.replace('biolink:', '')) - : qualifier_value.replace('biolink:', ''), - ]), - ); - }); - return constraints.length > 0 ? constraints : undefined; - } - - getSimpleExpandedQualifierConstraints(): CompactQualifiers[] | undefined { - const constraints = this.expandQualifierConstraints(this.getQualifierConstraints()).map( - (qualifierSetObj: ExpandedQEdgeQualifierConstraint) => { - return Object.fromEntries( - qualifierSetObj.qualifier_set.map(({ qualifier_type_id, qualifier_value }) => [ - utils.removeBioLinkPrefix(qualifier_type_id), - utils.toArray(qualifier_value).map((e) => utils.removeBioLinkPrefix(e)), - ]), - ); - }, - ); - return constraints.length > 0 ? constraints : undefined; - } - - chooseLowerEntityValue(): void { - // edge has both subject and object entity counts and must choose lower value - // to use in query. - debug(`(8) Choosing lower entity count in edge...`); - if (this.object.entity_count && this.subject.entity_count) { - if (this.object.entity_count == this.subject.entity_count) { - // // (#) ---> () - this.reverse = false; - this.object.holdCurie(); - debug(`(8) Sub - Obj were same but chose subject (${this.subject.entity_count})`); - } else if (this.object.entity_count > this.subject.entity_count) { - // (#) ---> () - this.reverse = false; - // tell node to hold curie in a temp field - this.object.holdCurie(); - debug(`(8) Chose lower entity value in subject (${this.subject.entity_count})`); - } else { - // () <--- (#) - this.reverse = true; - // tell node to hold curie in a temp field - this.subject.holdCurie(); - debug(`(8) Chose lower entity value in object (${this.object.entity_count})`); - } - } else { - debug(`(8) Error: Edge must have both object and subject entity values.`); - } - } - - extractCuriesFromRecords(records: Record[], isReversed: boolean): AliasesByPrimaryByType { - // will give you all curies found by semantic type, each type will have - // a main ID and all of it's aliases - debug(`(7) Updating Entities in "${this.getID()}"`); - const typesToInclude = isReversed ? this.subject.getCategories() : this.object.getCategories(); - debug(`(7) Collecting Types: "${JSON.stringify(typesToInclude)}"`); - const all: AliasesByPrimaryByType = {}; - records.forEach((record) => { - const subjectTypes = record.subject.semanticType.map((type) => type.replace('biolink:', '')); - const objectTypes = record.object.semanticType.map((type) => type.replace('biolink:', '')); - const nodeOriginals = { - subject: record.subject.original, - object: record.object.original, - }; - - Object.entries({ subject: subjectTypes, object: objectTypes }).forEach(([node, nodeTypes]) => { - nodeTypes.forEach((nodeType) => { - const nodeOriginal = nodeOriginals[node]; - - if (!typesToInclude.includes(nodeType) && !typesToInclude.includes('NamedThing')) { - return; - } - if (!all[nodeType]) { - all[nodeType] = {}; - } - const originalAliases: Set = new Set(); - (record[node] as RecordNode).equivalentCuries.forEach((curie) => { - originalAliases.add(curie); - }); - // check and add only unique - let wasFound = false; - originalAliases.forEach((alias) => { - if (all[nodeType][alias]) { - wasFound = true; - } - }); - if (!wasFound) { - all[nodeType][nodeOriginal] = [...originalAliases]; - } - - if (!all[nodeType][nodeOriginal] || all[nodeType][nodeOriginal].length === 0) { - if (record[node].curie.length > 0) { - // else #2 check curie - all[nodeType][nodeOriginal] = [record[node].curie]; - } else { - // #3 last resort check original - all[nodeType][nodeOriginal] = [nodeOriginal]; - } - } - }); - }); - }); - debug(`Collected entity ids in records: ${JSON.stringify(Object.keys(all))}`); - return all; - // {Gene:{'id': ['alias']}} - } - - _combineCuries(curies: AliasesByPrimaryByType): AliasesByPrimary { - // combine all curies in case there are - // multiple categories in this node since - // they are separated by type - const combined = {}; - for (const type in curies) { - for (const original in curies[type]) { - combined[original] = curies[type][original]; - } - } - return combined; - } - - updateNodesCuries(records: Record[]): void { - // update node queried (1) ---> (update) - const curies_by_semantic_type = this.extractCuriesFromRecords(records, this.reverse); - const combined_curies = this._combineCuries(curies_by_semantic_type); - this.reverse ? this.subject.updateCuries(combined_curies) : this.object.updateCuries(combined_curies); - // update node used as input (1 [update]) ---> () - const curies_by_semantic_type_2 = this.extractCuriesFromRecords(records, !this.reverse); - const combined_curies_2 = this._combineCuries(curies_by_semantic_type_2); - !this.reverse ? this.subject.updateCuries(combined_curies_2) : this.object.updateCuries(combined_curies_2); - } - - applyNodeConstraints(): void { - debug(`(6) Applying Node Constraints to ${this.records.length} records.`); - const kept = []; - let save_kept = false; - const sub_constraints = this.subject.constraints; - if (sub_constraints && sub_constraints.length) { - const from = this.reverse ? 'object' : 'subject'; - debug(`Node (subject) constraints: ${JSON.stringify(sub_constraints)}`); - save_kept = true; - for (let i = 0; i < this.records.length; i++) { - const res = this.records[i]; - let keep = true; - // apply constraints - for (let x = 0; x < sub_constraints.length; x++) { - const constraint = sub_constraints[x]; - keep = this.meetsConstraint(constraint, res, from); - } - // pass or not - if (keep) { - kept.push(res); - } - } - } - - const obj_constraints = this.object.constraints; - if (obj_constraints && obj_constraints.length) { - const from = this.reverse ? 'subject' : 'object'; - debug(`Node (object) constraints: ${JSON.stringify(obj_constraints)}`); - save_kept = true; - for (let i = 0; i < this.records.length; i++) { - const res = this.records[i]; - let keep = true; - // apply constraints - for (let x = 0; x < obj_constraints.length; x++) { - const constraint = obj_constraints[x]; - keep = this.meetsConstraint(constraint, res, from); - } - // pass or not - if (keep) { - kept.push(res); - } - } - } - if (save_kept) { - // only override recordss if there was any filtering done. - this.records = kept; - debug(`(6) Reduced to (${this.records.length}) records.`); - } else { - debug(`(6) No constraints. Skipping...`); - } - } - - meetsConstraint(constraint: TrapiAttributeConstraint, record: Record, from: string): boolean { - // list of attribute ids in node - const available_attributes = [...new Set(Object.keys(record[from].attributes))]; - // debug(`ATTRS ${JSON.stringify(record[from].normalizedInfo[0]._leafSemanticType)}` + - // ` ${from} : ${JSON.stringify(available_attributes)}`); - // determine if node even contains right attributes - const filters_found = available_attributes.filter((attr) => attr == constraint.id); - if (!filters_found.length) { - // node doesn't have the attribute needed - return false; - } else { - // match attr by name, parse only attrs of interest - const node_attributes = {}; - filters_found.forEach((filter) => { - node_attributes[filter] = record[from].attributes[filter]; - }); - switch (constraint.operator) { - case '==': - for (const key in node_attributes) { - if (!isNaN(constraint.value as number)) { - if (Array.isArray(node_attributes[key])) { - if ( - node_attributes[key].includes(constraint.value) || - node_attributes[key].includes(constraint.value.toString()) - ) { - return true; - } - } else { - if ( - node_attributes[key] == constraint.value || - node_attributes[key] == constraint.value.toString() || - node_attributes[key] == parseInt(constraint.value as string) - ) { - return true; - } - } - } else { - if (Array.isArray(node_attributes[key])) { - if (node_attributes[key].includes(constraint.value)) { - return true; - } - } else { - if ( - node_attributes[key] == constraint.value || - node_attributes[key] == constraint.value.toString() || - node_attributes[key] == parseInt(constraint.value as string) - ) { - return true; - } - } - } - } - return false; - case '>': - for (const key in node_attributes) { - if (Array.isArray(node_attributes[key])) { - for (let index = 0; index < node_attributes[key].length; index++) { - const element = node_attributes[key][index]; - if (parseInt(element) > parseInt(constraint.value as string)) { - return true; - } - } - } else { - if (parseInt(node_attributes[key]) > parseInt(constraint.value as string)) { - return true; - } - } - } - return false; - case '>=': - for (const key in node_attributes) { - if (Array.isArray(node_attributes[key])) { - for (let index = 0; index < node_attributes[key].length; index++) { - const element = node_attributes[key][index]; - if (parseInt(element) >= parseInt(constraint.value as string)) { - return true; - } - } - } else { - if (parseInt(node_attributes[key]) >= parseInt(constraint.value as string)) { - return true; - } - } - } - return false; - case '<': - for (const key in node_attributes) { - if (Array.isArray(node_attributes[key])) { - for (let index = 0; index < node_attributes[key].length; index++) { - const element = node_attributes[key][index]; - if (parseInt(element) > parseInt(constraint.value as string)) { - return true; - } - } - } else { - if (parseInt(node_attributes[key]) < parseInt(constraint.value as string)) { - return true; - } - } - } - return false; - case '<=': - for (const key in node_attributes) { - if (Array.isArray(node_attributes[key])) { - for (let index = 0; index < node_attributes[key].length; index++) { - const element = node_attributes[key][index]; - if (parseInt(element) <= parseInt(constraint.value as string)) { - return true; - } - } - } else { - if (parseInt(node_attributes[key]) <= parseInt(constraint.value as string)) { - return true; - } - } - } - return false; - default: - debug(`Node operator not handled ${constraint.operator}`); - return false; - } - } - } - - storeRecords(records: Record[]): void { - debug(`(6) Storing records...`); - // store new records in current edge - this.records = records; - // will update records if any constraints are found - this.applyNodeConstraints(); - debug(`(7) Updating nodes based on edge records...`); - this.updateNodesCuries(records); - } - - getInputNode(): QNode { - if (this.reverse) { - return this.object; - } - return this.subject; - } - - getOutputNode(): QNode { - if (this.reverse) { - return this.subject; - } - return this.object; - } - - isReversed(): boolean { - return this.reverse; - } - - getInputCurie(): string[] { - const curie = this.subject.getCurie() || this.object.getCurie(); - if (Array.isArray(curie)) { - return curie; - } - return [curie]; - } - - hasInputResolved(): boolean { - return this.getInputNode().hasEquivalentIDs(); - } - - hasInput(): boolean { - if (this.reverse) { - return this.object.hasInput(); - } - return this.subject.hasInput(); - } - - getReversedPredicate(predicate: string): string { - return predicate ? biolink.reverse(predicate) : undefined; - } -} diff --git a/src/query_graph.ts b/src/query_graph.ts deleted file mode 100644 index 5b0f2b68..00000000 --- a/src/query_graph.ts +++ /dev/null @@ -1,420 +0,0 @@ -import QEdge from './query_edge'; -import InvalidQueryGraphError from './exceptions/invalid_query_graph_error'; -import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import Debug from 'debug'; -import QNode from './query_node'; -import biolink from './biolink'; -import { resolveSRI } from 'biomedical_id_resolver'; -import _ from 'lodash'; -import * as utils from './utils'; -import { TrapiQueryGraph } from '@biothings-explorer/types'; -import NotImplementedError from './exceptions/not_implemented_error'; - -const debug = Debug('bte:biothings-explorer-trapi:query_graph'); - -export default class QueryGraph { - queryGraph: TrapiQueryGraph; - schema: any; - logs: StampedLog[]; - skipCycleDetection: boolean; - nodes: { [QNodeID: string]: QNode }; - edges: { [QEdgeID: string]: QEdge }; - constructor(queryGraph: TrapiQueryGraph, schema: any, skipCycleDetection = false) { - this.queryGraph = queryGraph; - this.schema = schema; - this.skipCycleDetection = skipCycleDetection; - this.logs = []; - } - - _validateEmptyNodes(queryGraph: TrapiQueryGraph): void { - if (Object.keys(queryGraph.nodes).length === 0) { - throw new InvalidQueryGraphError('Your Query Graph has no nodes defined.'); - } - } - - _validateOneNodeID(queryGraph: TrapiQueryGraph): void { - for (const nodeID in queryGraph.nodes) { - if (queryGraph.nodes[nodeID] && queryGraph.nodes[nodeID]?.ids?.length > 0) { - return; - } - } - throw new InvalidQueryGraphError( - 'body/message.query_graph.nodes should contain at least one node with at least one non-null id', - ); - } - - _validateEmptyEdges(queryGraph: TrapiQueryGraph): void { - if (Object.keys(queryGraph.edges).length === 0) { - throw new InvalidQueryGraphError('Your Query Graph has no edges defined.'); - } - } - - _validateNodeEdgeCorrespondence(queryGraph: TrapiQueryGraph): void { - for (const qEdgeID in queryGraph.edges) { - if (!(this.queryGraph.edges[qEdgeID].subject in queryGraph.nodes)) { - throw new InvalidQueryGraphError(`The subject of edge ${qEdgeID} is not defined in the query graph.`); - } - if (!(this.queryGraph.edges[qEdgeID].object in queryGraph.nodes)) { - throw new InvalidQueryGraphError(`The object of edge ${qEdgeID} is not defined in the query graph.`); - } - } - } - - _validateBatchSize(queryGraph: TrapiQueryGraph): void { - Object.entries(queryGraph.nodes).forEach(([nodeID, node]) => { - if (node.ids && node.ids.length > 150) { - throw new InvalidQueryGraphError(`Query node ${nodeID} exceeds batch size limit.`); - } - }); - } - - _validateDuplicateEdges(queryGraph: TrapiQueryGraph): void { - const edgeSet = new Set(); - for (const edgeID in queryGraph.edges) { - const subject = queryGraph.edges[edgeID].subject; - const object = queryGraph.edges[edgeID].object; - if (edgeSet.has(`${subject}-${object}`) || edgeSet.has(`${object}-${subject}`)) { - throw new InvalidQueryGraphError('Multiple edges between two nodes.'); - } - edgeSet.add(`${subject}-${object}`); - } - } - - _validateCycles(queryGraph: TrapiQueryGraph): void { - const nodes = {}; - for (const nodeID in queryGraph.nodes) { - nodes[nodeID] = { - connections: new Set(), - visited: false, - }; - } - - for (const edgeID in queryGraph.edges) { - const edge = queryGraph.edges[edgeID]; - nodes[edge.subject].connections.add(edge.object); - nodes[edge.object].connections.add(edge.subject); - } - - for (const firstNode in nodes) { - if (nodes[firstNode].visited == true) continue; - const stack: { curNode: string; parent: string | number }[] = [{ curNode: firstNode, parent: -1 }]; - nodes[firstNode].visited = true; - while (stack.length !== 0) { - const { curNode, parent } = stack.pop(); - for (const conNode of nodes[curNode].connections) { - if (conNode == parent) continue; - if (nodes[conNode].visited === true) { - throw new InvalidQueryGraphError('The query graph contains a cycle.'); - } - stack.push({ curNode: conNode, parent: curNode }); - nodes[conNode].visited = true; - } - } - } - } - - _validateNodeProperties(queryGraph: TrapiQueryGraph): void { - const schemProps = this.schema?.components?.schemas?.QNode?.properties - ? this.schema.components.schemas.QNode.properties - : {}; - const nodeProperties = new Set(Object.keys(schemProps)); - const badProperties = new Set(); - const badNodes = new Set(); - for (const nodeID in queryGraph.nodes) { - for (const property in queryGraph.nodes[nodeID]) { - if (!nodeProperties.has(property)) { - badProperties.add(property); - badNodes.add(nodeID); - } - } - } - - if (badProperties.size !== 0) { - this.logs.push( - new LogEntry( - 'WARNING', - null, - `Ignoring unrecognized properties (${[...badProperties].join(',')}) on nodes (${[...badNodes].join(',')}).`, - ).getLog(), - ); - } - } - - _validateEdgeProperties(queryGraph: TrapiQueryGraph): void { - const schemProps = this.schema?.components?.schemas?.QEdge?.properties - ? this.schema.components.schemas.QEdge.properties - : {}; - const edgeProperties = new Set(Object.keys(schemProps)); - const badProperties = new Set(); - const badEdges = new Set(); - for (const edgeID in queryGraph.edges) { - for (const property in queryGraph.edges[edgeID]) { - if (!edgeProperties.has(property)) { - badProperties.add(property); - badEdges.add(edgeID); - } - } - } - - if (badProperties.size !== 0) { - this.logs.push( - new LogEntry( - 'WARNING', - null, - `Ignoring unrecognized properties (${[...badProperties].join(',')}) on edges (${[...badEdges].join(',')}).`, - ).getLog(), - ); - } - } - - _validateNoDuplicateQualifierTypes(queryGraph: TrapiQueryGraph): void { - Object.entries(queryGraph.edges).forEach(([id, edge]) => { - if (edge.qualifier_constraints) { - edge.qualifier_constraints.forEach((qualifierSet, i) => { - const qualifierTypes = new Set(); - qualifierSet.qualifier_set.forEach(({ qualifier_type_id }) => { - if (qualifierTypes.has(qualifier_type_id)) { - throw new InvalidQueryGraphError( - `Query edge ${id} qualifier set ${i} contains duplicate qualifier_type_id ${qualifier_type_id}`, - ); - } - qualifierTypes.add(qualifier_type_id); - }); - }); - } - }); - } - - _validateNoMCQ(queryGraph: TrapiQueryGraph): boolean { - return Object.values(queryGraph.nodes).some((node) => { - if (node.set_interpretation && node.set_interpretation.toLowerCase() === 'many') { - throw new NotImplementedError('NotImplementedError', 'Set interpretation is not yet implemented.') - } - }) - } - - _validate(queryGraph: TrapiQueryGraph): void { - this._validateEmptyEdges(queryGraph); - this._validateEmptyNodes(queryGraph); - this._validateOneNodeID(queryGraph); - this._validateNodeEdgeCorrespondence(queryGraph); - this._validateDuplicateEdges(queryGraph); - this._validateNodeProperties(queryGraph); - this._validateEdgeProperties(queryGraph); - this._validateBatchSize(queryGraph); - !this.skipCycleDetection && this._validateCycles(queryGraph); - this._validateNoDuplicateQualifierTypes(queryGraph); - this._validateNoMCQ(queryGraph); - } - - private async _findNodeCategories(curies: string[]): Promise { - const noMatchMessage = `No category match found for ${JSON.stringify(curies)}.`; - if (curies.length == 1) { - let matchedCategories: string[]; - const resolved = await resolveSRI({ - unknown: curies, - }); - debug(`Query node missing categories...Looking for match...`); - if (resolved[curies[0]] && resolved[curies[0]].primaryTypes) { - matchedCategories = resolved[curies[0]].primaryTypes; - return matchedCategories.filter((c) => c).map((c) => `biolink:${c}`); - } else { - debug(noMatchMessage); - this.logs.push(new LogEntry('ERROR', null, noMatchMessage).getLog()); - return []; - } - } else { - try { - let finalCategories: string[] = []; - const tree = biolink.biolink.classTree.objects; - - // get array of all unique categories for all curies - const allCategories = [ - ...Object.values(await resolveSRI({ unknown: curies })) - .map((resolvedCurie) => resolvedCurie.semanticTypes) - .filter((semanticTypes) => semanticTypes.some((item) => item !== null)) - .map((semanticTypes) => semanticTypes.map((t) => utils.removeBioLinkPrefix(t))) - .reduce((set: Set, arr: string[]): Set => new Set([...set, ...arr]), new Set()), - ]; - - if (allCategories.length) { - finalCategories.push(allCategories[0]); - } else { - debug(noMatchMessage); - this.logs.push(new LogEntry('ERROR', null, noMatchMessage).getLog()); - return []; - } - - allCategories.forEach((category, i) => { - const keepSet: Set = new Set(); - const rmSet: Set = new Set(); - // check against each currently selected category - finalCategories.forEach((selected) => { - if (tree[selected].is_mixin) { - rmSet.add(selected); - } - if (tree[category].is_mixin) { - rmSet.add(category); - } - if (category === selected) { - return keepSet.add(category); - } - - let parent = category; - while (parent) { - if (selected === parent || tree[selected].children.includes(parent)) { - rmSet.add(selected); - return keepSet.add(category); - } - parent = tree[parent].parent; - } - - parent = selected; - while (parent) { - if (category === parent || tree[category].children.includes(parent)) { - rmSet.add(category); - return keepSet.add(selected); - } - parent = tree[parent].parent; - } - // add both if neither is ancestor of the other - keepSet.add(category).add(selected); - }); - finalCategories = [...keepSet].filter((cat) => !rmSet.has(cat)); - // in event no categories are kept (due to mixin shenanigans/etc) - if (!finalCategories.length && i < allCategories.length - 1) { - finalCategories = [allCategories[i + 1]]; - } - }); - if (!finalCategories.length) { - debug(noMatchMessage); - this.logs.push(new LogEntry('ERROR', null, noMatchMessage).getLog()); - } - return [...finalCategories].map((cat) => 'biolink:' + cat); - } catch (error) { - const errorMessage = `Unable to retrieve categories due to error ${error}`; - debug(errorMessage); - this.logs.push(new LogEntry('ERROR', null, errorMessage).getLog()); - return []; - } - } - } - - private async _storeNodes(): Promise<{ [qNodeID: string]: QNode }> { - const nodes: { [qNodeID: string]: QNode } = {}; - for (const qNodeID in this.queryGraph.nodes) { - //if node has ID but no categories - if ( - (!this.queryGraph.nodes[qNodeID].categories && this.queryGraph.nodes[qNodeID].ids) || - (this.queryGraph.nodes[qNodeID].categories && - // this.queryGraph.nodes[qNodeID].categories.length == 0 && - this.queryGraph.nodes[qNodeID].ids) - ) { - let userAssignedCategories = this.queryGraph.nodes[qNodeID].categories; - let categories = await this._findNodeCategories(this.queryGraph.nodes[qNodeID].ids); - if (userAssignedCategories) { - userAssignedCategories = [...userAssignedCategories]; // new Array for accurate logging after node updated - categories = categories.filter((category) => !userAssignedCategories.includes(category)); - } - if (categories.length) { - if (!this.queryGraph.nodes[qNodeID].categories) { - this.queryGraph.nodes[qNodeID].categories = categories; - } else { - this.queryGraph.nodes[qNodeID].categories.push(...categories); - } - debug(`Node categories found. Assigning value: ${JSON.stringify(this.queryGraph.nodes[qNodeID])}`); - this.logs.push( - new LogEntry( - 'INFO', - null, - [ - `Node ${qNodeID} `, - `with id${this.queryGraph.nodes[qNodeID].ids.length > 1 ? 's' : ''} `, - `[${this.queryGraph.nodes[qNodeID].ids.join(', ')}] `, - `${userAssignedCategories && userAssignedCategories.length - ? `and categor${userAssignedCategories.length === 1 ? 'y' : 'ies'} [${userAssignedCategories.join( - ', ', - )}] augmented with` - : `assigned` - } `, - `categor${categories.length > 1 ? 'ies' : 'y'} `, - `[${categories.join(', ')}] inferred from `, - `id${this.queryGraph.nodes[qNodeID].ids.length > 1 ? 's' : ''}.`, - ].join(''), - ).getLog(), - ); - } - nodes[qNodeID] = new QNode({ id: qNodeID, ...this.queryGraph.nodes[qNodeID] }); - } else { - debug(`Creating node...`); - nodes[qNodeID] = new QNode({ id: qNodeID, ...this.queryGraph.nodes[qNodeID] }); - } - - if (nodes[qNodeID].categories !== undefined) { - // Do some type cleanup/conflation - if ( - nodes[qNodeID].categories.includes('biolink:Disease') || - nodes[qNodeID].categories.includes('biolink:PhenotypicFeature') - ) { - nodes[qNodeID].categories = nodes[qNodeID].categories.filter( - (e) => e !== 'biolink:Disease' && e !== 'biolink:PhenotypicFeature', - ); - nodes[qNodeID].categories.push('biolink:DiseaseOrPhenotypicFeature'); - } - if ( - nodes[qNodeID].categories.includes('biolink:Protein') && - !nodes[qNodeID].categories.includes('biolink:Gene') - ) { - nodes[qNodeID].categories.push('biolink:Gene'); - } - if ( - nodes[qNodeID].categories.includes('biolink:Gene') && - !nodes[qNodeID].categories.includes('biolink:Protein') - ) { - nodes[qNodeID].categories.push('biolink:Protein'); - } - // Ensure categories are rolled into expandedCategories - nodes[qNodeID].expandCategories() - } - } - this.logs.push( - new LogEntry('DEBUG', null, `BTE identified ${Object.keys(nodes).length} qNodes from your query graph`).getLog(), - ); - return nodes; - } - - async calculateEdges(): Promise { - this._validate(this.queryGraph); - //populate edge and node info - debug(`(1) Creating edges for manager...`); - if (this.nodes === undefined) { - this.nodes = await this._storeNodes(); - } - - const edges = {}; - Object.entries(this.queryGraph.edges).forEach(([qEdgeID, qEdge]) => { - const edge_info = { - ...qEdge, - ...{ - subject: this.nodes[qEdge.subject], - object: this.nodes[qEdge.object], - }, - }; - - //store in each node ids of edges connected to them - this.nodes[qEdge.subject].updateConnection(qEdgeID); - this.nodes[qEdge.object].updateConnection(qEdgeID); - - edges[qEdgeID] = new QEdge({ id: qEdgeID, ...edge_info }); - }); - this.edges = edges; - this.logs.push( - new LogEntry( - 'DEBUG', - null, - `BTE identified ${Object.keys(this.edges).length} qEdges from your query graph`, - ).getLog(), - ); - return Object.values(this.edges); - } -} diff --git a/src/query_node.ts b/src/query_node.ts deleted file mode 100644 index 98160026..00000000 --- a/src/query_node.ts +++ /dev/null @@ -1,306 +0,0 @@ -/* eslint-disable @typescript-eslint/no-var-requires */ -import _ from 'lodash'; -import * as utils from './utils'; -import biolink from './biolink'; -import Debug from 'debug'; -import InvalidQueryGraphError from './exceptions/invalid_query_graph_error'; -import { SRIBioEntity } from 'biomedical_id_resolver'; -const debug = Debug('bte:biothings-explorer-trapi:QNode'); - -export interface QNodeInfo { - id: string; - categories?: string[]; - ids?: string[]; - is_set?: boolean; - expanded_curie?: ExpandedCuries; - held_curie?: string[]; - held_expanded?: ExpandedCuries; - constraints?: any; - connected_to?: string[]; - equivalentIDs?: SRIResolvedSet; -} - -export interface SRIResolvedSet { - [originalCurie: string]: SRIBioEntity; -} - -export interface ExpandedCuries { - [originalCurie: string]: string[]; -} - -export default class QNode { - id: string; - categories: string[]; - equivalentIDs?: SRIResolvedSet; - expandedCategories: string[]; - equivalentIDsUpdated: boolean; - curie: string[]; - is_set: boolean; - expanded_curie: ExpandedCuries; - entity_count: number; - held_curie: string[]; - held_expanded: ExpandedCuries; - constraints: any; // TODO type - connected_to: Set; - - constructor(info: QNodeInfo) { - this.id = info.id; - this.categories = info.categories || ['NamedThing']; - this.expandedCategories = this.categories; - this.equivalentIDsUpdated = false; - // mainIDs - this.curie = info.ids; - //is_set - this.is_set = info.is_set; - //mainID : its equivalent ids - this.expanded_curie = info.expanded_curie !== undefined ? info.expanded_curie : {}; - this.entity_count = info.ids ? info.ids.length : 0; - debug(`(1) Node "${this.id}" has (${this.entity_count}) entities at start.`); - //when choosing a lower entity count a node with higher count - // might be told to store its curies temporarily - this.held_curie = info.held_curie !== undefined ? info.held_curie : []; - this.held_expanded = info.held_expanded !== undefined ? info.held_expanded : {}; - //node constraints - this.constraints = info.constraints; - //list of edge ids that are connected to this node - this.connected_to = info.connected_to !== undefined ? new Set(info.connected_to) : new Set(); - //object-ify array of initial curies - if (info.expanded_curie === undefined) this.expandCurie(); - this.validateConstraints(); - this.expandCategories(); - } - - freeze(): QNodeInfo { - return { - categories: this.categories, - connected_to: Array.from(this.connected_to), - constraints: this.constraints, - ids: this.curie, - equivalentIDs: this.equivalentIDs, - expanded_curie: this.expanded_curie, - held_curie: this.held_curie, - held_expanded: this.held_expanded, - id: this.id, - is_set: this.is_set, - }; - } - - isSet(): boolean { - //query node specified as set - return this.is_set ? true : false; - } - - validateConstraints(): void { - const required = ['id', 'operator', 'value']; - if (this.constraints && this.constraints.length) { - this.constraints.forEach((constraint: unknown) => { - const constraint_keys = Object.keys(constraint); - if (_.intersection(constraint_keys, required).length < 3) { - throw new InvalidQueryGraphError(`Invalid constraint specification must include (${required})`); - } - }); - } - } - - expandCurie(): void { - if (this.curie && this.curie.length) { - this.curie.forEach((id) => { - if (!Object.hasOwnProperty.call(id, this.expanded_curie)) { - this.expanded_curie[id] = [id]; - } - }); - debug(`(1) Node "${this.id}" expanded initial curie. ${JSON.stringify(this.expanded_curie)}`); - } - } - - updateConnection(qEdgeID: string): void { - this.connected_to.add(qEdgeID); - debug(`"${this.id}" connected to "${[...this.connected_to]}"`); - } - - getConnections(): string[] { - return [...this.connected_to]; - } - - holdCurie(): void { - //hold curie aside temp - debug(`(8) Node "${this.id}" holding ${JSON.stringify(this.curie)} aside.`); - this.held_curie = this.curie; - this.held_expanded = this.expanded_curie; - this.curie = undefined; - this.expanded_curie = {}; - } - - updateCuries(curies: ExpandedCuries): void { - // {originalID : [aliases]} - if (!this.curie) { - this.curie = []; - } - //bring back held curie - if (this.held_curie.length) { - debug(`(8) Node "${this.id}" restored curie.`); - //restore - this.curie = this.held_curie; - this.expanded_curie = this.held_expanded; - //reset holds - this.held_curie = []; - this.held_expanded = {}; - } - if (!this.curie.length) { - debug(`Node "${this.id}" saving (${Object.keys(curies).length}) curies...`); - this.curie = Object.keys(curies); - this.expanded_curie = curies; - } else { - debug(`Node "${this.id}" intersecting (${this.curie.length})/(${Object.keys(curies).length}) curies...`); - // let intersection = this.intersectCuries(this.curie, curies); - // this.curie = intersection; - // debug(`Node "${this.id}" kept (${intersection.length}) curies...`); - this.intersectWithExpandedCuries(curies); - } - this.entity_count = this.curie.length; - } - - _combineCuriesIntoList(curies: ExpandedCuries): string[] { - // curies {originalID : ['aliasID']} - //combine all curies into single list for easy intersection - const combined: Set = new Set(); - Object.values(curies).forEach((expanded) => { - if (!Array.isArray(expanded)) { - combined.add(expanded); - } else { - expanded.forEach((curie) => { - combined.add(curie); - }); - } - }); - return [...combined]; - } - - - intersectWithExpandedCuries(newCuries: ExpandedCuries): void { - const keep: { [mainID: string]: string[] } = {}; - - const existingSet = new Set(); - for (const key in this.expanded_curie) { - for (const curie of this.expanded_curie[key]) { - existingSet.add(curie.toLowerCase()); - } - } - - // If a new entity has any alias intersection with an existing entity, keep it - for (const [newMainID, currentAliases] of Object.entries(newCuries)) { - let someIntersection = false; - for (const curie of currentAliases) { - if (existingSet.has(curie.toLowerCase())) { - someIntersection = true; - break; - } - } - - if (someIntersection) { - if (!keep[newMainID]) keep[newMainID] = currentAliases; - } - } - - //save expanded curies (main + aliases) - this.expanded_curie = keep; - //save curies (main ids) - this.curie = Object.keys(keep); - debug(`Node "${this.id}" kept (${Object.keys(keep).length}) curies...`); - } - - intersectCuries(curies: string[], newCuries: ExpandedCuries): string[] { - //curies is a list ['ID'] - // new curies {originalID : ['aliasID']} - const all_new_curies = this._combineCuriesIntoList(newCuries); - return _.intersection(curies, all_new_curies); - } - - getID(): string { - return this.id; - } - - getCurie(): string[] { - return this.curie; - } - - getEquivalentIDs(): SRIResolvedSet { - return this.equivalentIDs ?? {}; - } - - removeEquivalentID(id: string): void { - delete this.equivalentIDs[id]; - } - - getCategories(): string[] { - if (this.equivalentIDsUpdated) this.expandCategories(); - return this.expandedCategories; - } - - expandCategories(): void { - this.equivalentIDsUpdated = false; - if (this.hasEquivalentIDs() === false) { - const categories = utils.toArray(this.categories); - let expanded_categories = []; - categories.map((category) => { - expanded_categories = [ - ...expanded_categories, - ...(biolink.getDescendantClasses(utils.removeBioLinkPrefix(category)) || []), - ]; - }); - this.expandedCategories = utils.getUnique(expanded_categories); - return; - } - // let ancestors = new Set( - // utils - // .toArray(this.category) - // .map((category) => utils.removeBioLinkPrefix(category)) - // .reduce((arr, category) => [...arr, ...biolink.getAncestorClasses(category)], []) - // .filter((category) => !utils.toArray(this.category).includes(`biolink:${category}`)), - // ); - let categories = utils.toArray(this.categories).map((category) => utils.removeBioLinkPrefix(category)); - Object.values(this.equivalentIDs).map((entity) => { - categories = [...categories, ...entity.primaryTypes]; - }); - this.expandedCategories = utils.getUnique( - utils - .getUnique(categories) - .reduce((arr, category) => [...arr, ...(biolink.getDescendantClasses(category) || [])], []), - ); - // .filter(category => !ancestors.has(category)); - } - - getEntities(): SRIBioEntity[] { - return Object.values(this.equivalentIDs); - } - - getPrimaryIDs(): string[] { - return this.getEntities().map((entity) => entity.primaryID); - } - - setEquivalentIDs(equivalentIDs: SRIResolvedSet): void { - this.equivalentIDs = equivalentIDs; - this.equivalentIDsUpdated = true; - } - - updateEquivalentIDs(equivalentIDs: SRIResolvedSet): void { - if (this.equivalentIDs === undefined) { - this.equivalentIDs = equivalentIDs; - } else { - this.equivalentIDs = { ...this.equivalentIDs, ...equivalentIDs }; - } - this.equivalentIDsUpdated = true; - } - - hasInput(): boolean { - return !(this.curie === undefined || this.curie === null); - } - - hasEquivalentIDs(): boolean { - return !(typeof this.equivalentIDs === 'undefined'); - } - - getEntityCount(): number { - return this.curie ? this.curie.length : 0; - } -} diff --git a/src/results_assembly/pfocr.ts b/src/results_assembly/pfocr.ts index 5fab0980..5754c4bb 100644 --- a/src/results_assembly/pfocr.ts +++ b/src/results_assembly/pfocr.ts @@ -5,7 +5,6 @@ import { intersection } from '../utils'; import _ from 'lodash'; import { LogEntry, StampedLog } from '@biothings-explorer/utils'; import { TrapiResult, TrapiKGNode, TrapiResponse, TrapiKGEdge } from '@biothings-explorer/types'; -import Graph from '../graph/graph'; // the minimum acceptable intersection size between the CURIEs // in a TRAPI result and in a PFOCR figure. diff --git a/src/results_assembly/query_results.ts b/src/results_assembly/query_results.ts index 9c444916..d88658a3 100644 --- a/src/results_assembly/query_results.ts +++ b/src/results_assembly/query_results.ts @@ -1,10 +1,9 @@ import { LogEntry, StampedLog } from '@biothings-explorer/utils'; -import { TrapiResult } from '@biothings-explorer/types'; +import { TrapiResult, Record } from '@biothings-explorer/types'; import Debug from 'debug'; import { zip } from 'lodash'; const debug = Debug('bte:biothings-explorer-trapi:QueryResult'); import { getScores, calculateScore, ScoreCombos } from './score'; -import { Record } from '@biothings-explorer/api-response-transform'; import * as config from '../config'; export interface RecordsByQEdgeID { diff --git a/src/update_nodes.ts b/src/update_nodes.ts index c9c30f93..700dd47f 100644 --- a/src/update_nodes.ts +++ b/src/update_nodes.ts @@ -1,9 +1,6 @@ -import { resolveSRI } from 'biomedical_id_resolver'; import Debug from 'debug'; -import { ResolverInput, SRIResolverOutput } from 'biomedical_id_resolver'; -import { Record } from '@biothings-explorer/api-response-transform'; -import QEdge from './query_edge'; -import { NodeNormalizerResultObj } from '@biothings-explorer/api-response-transform'; +import { ResolverInput, SRIResolverOutput, SRIBioEntity, resolveSRI } from 'biomedical_id_resolver'; +import { Record, QEdge } from "@biothings-explorer/types"; const debug = Debug('bte:biothings-explorer-trapi:nodeUpdateHandler'); export interface CuriesByCategory { @@ -39,17 +36,17 @@ export default class NodesUpdateHandler { * Resolve input ids * @param {object} curies - each key represents the category, e.g. gene, value is an array of curies. */ - async _getEquivalentIDs(curies: ResolverInput): Promise { + async _getEquivalentIDs(curies: ResolverInput, abortSignal?: AbortSignal): Promise { // const resolver = new id_resolver.Resolver('biolink'); // const equivalentIDs = await resolver.resolve(curies); - return await resolveSRI(curies); + return await resolveSRI(curies, abortSignal); } - async setEquivalentIDs(qEdges: QEdge[]): Promise { + async setEquivalentIDs(qEdges: QEdge[], abortSignal?: AbortSignal): Promise { debug(`Getting equivalent IDs...`); const curies = this._getCuries(this.qEdges); debug(`curies: ${JSON.stringify(curies)}`); - const equivalentIDs = await this._getEquivalentIDs(curies); + const equivalentIDs = await this._getEquivalentIDs(curies, abortSignal); qEdges.map((qEdge) => { const edgeEquivalentIDs = Object.keys(equivalentIDs) .filter((key) => qEdge.getInputCurie().includes(key)) @@ -64,7 +61,7 @@ export default class NodesUpdateHandler { return; } - _createEquivalentIDsObject(record: Record): { [curie: string]: NodeNormalizerResultObj } { + _createEquivalentIDsObject(record: Record): { [curie: string]: SRIBioEntity } { if (record.object.normalizedInfo !== undefined) { return { [record.object.curie]: record.object.normalizedInfo,