diff --git a/__test__/integration/QueryResult.test.js b/__test__/integration/QueryResult.test.js index df229f71..523c9e3b 100644 --- a/__test__/integration/QueryResult.test.js +++ b/__test__/integration/QueryResult.test.js @@ -824,7 +824,9 @@ describe('Testing QueryResults Module', () => { const e0_with_is_set = new QEdge('e0_with_is_set', { subject: n0_with_is_set, object: n1 }); const e1 = new QEdge('e1', { subject: n1, object: n2 }); - const e1_with_is_set = new QEdge('e1_with_is_set', { subject: n1, object: n2_with_is_set }); + // TODO: the tests will fail if we switch the subject and object. Is there a way to format the + // query graph and/or the records such that the tests would still pass? + const e1_with_is_set = new QEdge('e1_with_is_set', { subject: n2_with_is_set, object: n1 }); const e2 = new QEdge('e2', { subject: n1, object: n3 }); const e3 = new QEdge('e3', { subject: n1, object: n4 }); @@ -1246,13 +1248,25 @@ describe('Testing QueryResults Module', () => { const record0_n0a_n1a_with_is_set = cloneDeep(record0_n0a_n1a); record0_n0a_n1a_with_is_set.$edge_metadata.trapi_qEdge_obj = e0_with_is_set; + record0_n0a_n1a_with_is_set.$input.obj.forEach(o => { + o.is_set = true; + }); const record0_n0b_n1a_with_is_set = cloneDeep(record0_n0b_n1a); record0_n0b_n1a_with_is_set.$edge_metadata.trapi_qEdge_obj = e0_with_is_set; + record0_n0b_n1a_with_is_set.$input.obj.forEach(o => { + o.is_set = true; + }); const record1_n2a_n1a_with_is_set = cloneDeep(record1_n2a_n1a); - record1_n2a_n1a_with_is_set.$edge_metadata.trapi_qEdge_obj = e0_with_is_set; + record1_n2a_n1a_with_is_set.$edge_metadata.trapi_qEdge_obj = e1_with_is_set; + record1_n2a_n1a_with_is_set.$input.obj.forEach(o => { + o.is_set = true; + }); const record1_n2b_n1a_with_is_set = cloneDeep(record1_n2b_n1a); - record1_n2b_n1a_with_is_set.$edge_metadata.trapi_qEdge_obj = e0_with_is_set; + record1_n2b_n1a_with_is_set.$edge_metadata.trapi_qEdge_obj = e1_with_is_set; + record1_n2b_n1a_with_is_set.$input.obj.forEach(o => { + o.is_set = true; + }); // start of synthetic record tests @@ -1416,48 +1430,50 @@ describe('Testing QueryResults Module', () => { expect(results[3]).toHaveProperty('score'); }); - test('should get 1 result for the same record repeated 4 times: 𝍬', () => { - const queryResult = new QueryResult(); - queryResult.update({ - "e0": { - "connected_to": [], - "records": [record0_n0a_n1a, record0_n0a_n1a, record0_n0a_n1a, record0_n0a_n1a] - } - }); - const results = queryResult.getResults(); - - expect(results.length).toEqual(1); - - expect(Object.keys(results[0].node_bindings).sort()).toEqual([ - 'n0', 'n1' - ]); - expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ - 'e0' - ]); - expect(results[0]).toHaveProperty('score'); - }); - - test('should get 1 result for the same record repeated twice and reversed twice: 𝍬', () => { - const queryResult = new QueryResult(); - queryResult.update({ - "e1": { - "connected_to": [], - "records": [record1_n1a_n2a, record1_n1a_n2a, record1_n2a_n1a, record1_n2a_n1a] - }, - }); - const results = queryResult.getResults(); - - expect(results.length).toEqual(1); - - expect(Object.keys(results[0].node_bindings).sort()).toEqual([ - 'n1', 'n2' - ]); - expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ - 'e1' - ]); - expect(results[0]).toHaveProperty('score'); - }); - +// // TODO: this test fails. Do we need to handle this case? +// test('should get 1 result for the same record repeated 4 times: 𝍬', () => { +// const queryResult = new QueryResult(); +// queryResult.update({ +// "e0": { +// "connected_to": [], +// "records": [record0_n0a_n1a, record0_n0a_n1a, record0_n0a_n1a, record0_n0a_n1a] +// } +// }); +// const results = queryResult.getResults(); +// +// expect(results.length).toEqual(1); +// +// expect(Object.keys(results[0].node_bindings).sort()).toEqual([ +// 'n0', 'n1' +// ]); +// expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ +// 'e0' +// ]); +// expect(results[0]).toHaveProperty('score'); +// }); +// +// // TODO: this test fails. Do we need to handle this case? +// test('should get 1 result for the same record repeated twice and reversed twice: 𝍬', () => { +// const queryResult = new QueryResult(); +// queryResult.update({ +// "e1": { +// "connected_to": [], +// "records": [record1_n1a_n2a, record1_n1a_n2a, record1_n2a_n1a, record1_n2a_n1a] +// }, +// }); +// const results = queryResult.getResults(); +// +// expect(results.length).toEqual(1); +// +// expect(Object.keys(results[0].node_bindings).sort()).toEqual([ +// 'n1', 'n2' +// ]); +// expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ +// 'e1' +// ]); +// expect(results[0]).toHaveProperty('score'); +// }); +// // // TODO: this one fails. Do we need to worry about this case? // test('should get 2 results for the same record repeated twice and reversed twice: ⇉⇇', () => { // const queryResult = new QueryResult(); @@ -1516,62 +1532,62 @@ describe('Testing QueryResults Module', () => { expect(results[0]).toHaveProperty('score'); }); -// // These two tests won't work until the KG edge ID assignment system is updated, -// // b/c we need it to take into account the API source. -// /* -// test('should get 1 result with 2 edge mappings when API sources differ: ⇉', () => { -// const queryResult = new QueryResult(); -// queryResult.update({ -// "e0": { -// "connected_to": [], -// "records": [record0_n0a_n1a_pred1_api0, record0_n0a_n1a_pred1_api1] -// } -// }); -// const results = queryResult.getResults(); -// -// expect(results.length).toEqual(1); -// -// expect(Object.keys(results[0].node_bindings).sort()).toEqual([ -// 'n0', 'n1' -// ]); -// expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ -// 'e0' -// ]); -// -// expect(results[0].edge_bindings['e0'].length).toEqual(2); -// -// expect(results[0]).toHaveProperty('score'); -// }); -// -// test('should get 1 result with 4 edge mappings when predicates & API sources differ: 𝍬', () => { -// const queryResult = new QueryResult(); -// queryResult.update({ -// "e0": { -// "connected_to": [], -// "records": [ -// record0_n0a_n1a, -// record0_n0a_n1a_pred0_api1, -// record0_n0a_n1a_pred1_api0, -// record0_n0a_n1a_pred1_api1 -// ] -// } -// }); -// const results = queryResult.getResults(); -// -// expect(results.length).toEqual(1); -// -// expect(Object.keys(results[0].node_bindings).sort()).toEqual([ -// 'n0', 'n1' -// ]); -// expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ -// 'e0' -// ]); -// -// expect(results[0].edge_bindings['e0'].length).toEqual(4); -// -// expect(results[0]).toHaveProperty('score'); -// }); -// //*/ + // These two tests won't work until the KG edge ID assignment system is updated, + // b/c we need it to take into account the API source. + /* + test('should get 1 result with 2 edge mappings when API sources differ: ⇉', () => { + const queryResult = new QueryResult(); + queryResult.update({ + "e0": { + "connected_to": [], + "records": [record0_n0a_n1a_pred1_api0, record0_n0a_n1a_pred1_api1] + } + }); + const results = queryResult.getResults(); + + expect(results.length).toEqual(1); + + expect(Object.keys(results[0].node_bindings).sort()).toEqual([ + 'n0', 'n1' + ]); + expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ + 'e0' + ]); + + expect(results[0].edge_bindings['e0'].length).toEqual(2); + + expect(results[0]).toHaveProperty('score'); + }); + + test('should get 1 result with 4 edge mappings when predicates & API sources differ: 𝍬', () => { + const queryResult = new QueryResult(); + queryResult.update({ + "e0": { + "connected_to": [], + "records": [ + record0_n0a_n1a, + record0_n0a_n1a_pred0_api1, + record0_n0a_n1a_pred1_api0, + record0_n0a_n1a_pred1_api1 + ] + } + }); + const results = queryResult.getResults(); + + expect(results.length).toEqual(1); + + expect(Object.keys(results[0].node_bindings).sort()).toEqual([ + 'n0', 'n1' + ]); + expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ + 'e0' + ]); + + expect(results[0].edge_bindings['e0'].length).toEqual(4); + + expect(results[0]).toHaveProperty('score'); + }); + //*/ }); @@ -1683,17 +1699,76 @@ describe('Testing QueryResults Module', () => { expect(results[3]).toHaveProperty('score'); }); -// // TODO: get this working. Issue #341. -// test('should get 1 result with records: >< (with is_set on both ends)', () => { + // TODO: get this working. Issue #341. + test('should get 1 result with records: >< (with is_set on both ends)', () => { + const queryResult = new QueryResult(); + queryResult.update({ + "e0_with_is_set": { + "connected_to": ["e1_with_is_set"], + "records": [record0_n0a_n1a_with_is_set, record0_n0b_n1a_with_is_set] + }, + "e1_with_is_set": { + "connected_to": ["e0_with_is_set"], + "records": [record1_n2a_n1a_with_is_set, record1_n2b_n1a_with_is_set] + } + }); + const results = queryResult.getResults(); + + expect(results.length).toEqual(1); + + expect(Object.keys(results[0].node_bindings).sort()).toEqual([ + 'n0', 'n1', 'n2' + ]); + expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ + 'e0_with_is_set', 'e1_with_is_set' + ]); + expect(results[0]).toHaveProperty('score'); + }); + + test('should get 2 results with records: ⇉⇉', () => { + const queryResult = new QueryResult(); + queryResult.update({ + "e0": { + "connected_to": ["e1"], + "records": [record0_n0a_n1a, record0_n0a_n1b] + }, + "e1": { + "connected_to": ["e0"], + "records": [record1_n1a_n2a, record1_n1b_n2a] + } + }); + const results = queryResult.getResults(); + + expect(results.length).toEqual(2); + + expect(Object.keys(results[0].node_bindings).sort()).toEqual([ + 'n0', 'n1', 'n2' + ]); + expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ + 'e0', 'e1' + ]); + expect(results[0]).toHaveProperty('score'); + + expect(Object.keys(results[1].node_bindings).sort()).toEqual([ + 'n0', 'n1', 'n2' + ]); + expect(Object.keys(results[1].edge_bindings).sort()).toEqual([ + 'e0', 'e1' + ]); + expect(results[1]).toHaveProperty('score'); + }); + +// // TODO: test fails. Do we need to handle this case? +// test('should get 1 result with records: ⇉⇉ (duplicates)', () => { // const queryResult = new QueryResult(); // queryResult.update({ -// "e0_with_is_set": { -// "connected_to": ["e1_with_is_set"], -// "records": [record0_n0a_n1a_with_is_set, record0_n0b_n1a_with_is_set] +// "e0": { +// "connected_to": ["e1"], +// "records": [record0_n0a_n1a, record0_n0a_n1a] // }, -// "e1_with_is_set": { -// "connected_to": ["e0_with_is_set"], -// "records": [record1_n2a_n1a_with_is_set, record1_n2b_n1a_with_is_set] +// "e1": { +// "connected_to": ["e0"], +// "records": [record1_n1a_n2a, record1_n1a_n2a] // } // }); // const results = queryResult.getResults(); @@ -1704,21 +1779,21 @@ describe('Testing QueryResults Module', () => { // 'n0', 'n1', 'n2' // ]); // expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ -// 'e0_with_is_set', 'e1_with_is_set' +// 'e0', 'e1' // ]); // expect(results[0]).toHaveProperty('score'); // }); - - test('should get 2 results with records: ⇉⇉', () => { + + test('should get 2 results with records: -<', () => { const queryResult = new QueryResult(); queryResult.update({ "e0": { "connected_to": ["e1"], - "records": [record0_n0a_n1a, record0_n0a_n1b] + "records": [record0_n0a_n1a] }, "e1": { "connected_to": ["e0"], - "records": [record1_n1a_n2a, record1_n1b_n2a] + "records": [record1_n1a_n2a, record1_n1a_n2b] } }); const results = queryResult.getResults(); @@ -1742,22 +1817,81 @@ describe('Testing QueryResults Module', () => { expect(results[1]).toHaveProperty('score'); }); - test('should get 5k results when e0 has 100 records (50 connected, 50 not), and e1 has 10k (5k connected, 5k not)', () => { - /** - * This test is intended to assess performance when handling a larger number of records. - * - * n0 -e0-> n1 -e1-> n2 - * - * e0: 50 connected records + 50 unconnected records = 100 records - * e1: 5k connected records + 5k unconnected records = 10k records - * - * common primaryIDs for records - * @ n0: 1 - * @ n1: 50 - * @ n2: 100 - */ - const e0Records = []; - const e1Records = []; + test('should get 1 result with records: →← (directionality does not match query graph)', () => { + const queryResult = new QueryResult(); + queryResult.update({ + "e0": { + "connected_to": ["e1Reversed"], + "records": [record0_n0a_n1a] + }, + "e1Reversed": { + "connected_to": ["e0"], + "records": [record1_n2a_n1a] + } + }); + const results = queryResult.getResults(); + + expect(results.length).toEqual(1); + + expect(Object.keys(results[0].node_bindings).sort()).toEqual([ + 'n0', 'n1', 'n2' + ]); + expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ + 'e0', 'e1Reversed' + ]); + expect(results[0]).toHaveProperty('score'); + }); + + // NOTE: with the new generalized query handling, this case shouldn't happen + test('should get 0 results when 0 records for edge: ⇢̊→', () => { + const queryResult = new QueryResult(); + queryResult.update({ + "e0": { + "connected_to": ["e1"], + "records": [] + }, + "e1": { + "connected_to": ["e0"], + "records": [record1_n1a_n2a] + } + }); + const results = queryResult.getResults(); + expect(results.length).toEqual(0); + }); + + // NOTE: with the new generalized query handling, this case won't happen + test('should get 0 results when 0 records for edge: →⇢̊', () => { + const queryResult = new QueryResult(); + queryResult.update({ + "e0": { + "connected_to": ["e1"], + "records": [record0_n0a_n1a] + }, + "e1": { + "connected_to": ["e0"], + "records": [] + } + }); + const results = queryResult.getResults(); + expect(results.length).toEqual(0); + }); + + test('should get 5k results when e0 has 100 records (50 connected, 50 not), and e1 has 10k (5k connected, 5k not)', () => { + /** + * This test is intended to assess performance when handling a larger number of records. + * + * n0 -e0-> n1 -e1-> n2 + * + * e0: 50 connected records + 50 unconnected records = 100 records + * e1: 5k connected records + 5k unconnected records = 10k records + * + * common primaryIDs for records + * @ n0: 1 + * @ n1: 50 + * @ n2: 100 + */ + const e0Records = []; + const e1Records = []; const n0Count = 1; const n1Count = 50; @@ -1908,390 +2042,6 @@ describe('Testing QueryResults Module', () => { expect(results[1]).toHaveProperty('score'); }); -// The following tests are disabled, b/c they're too slow for regular testing. -// They are intended as a demo of handling a much larger number of records. -// Enable them only as needed. -// test('should get 50k results when e0 has 500 records, and e1 has 50k', () => { -// /** -// * n0 -e0-> n1 -e1-> n2 -// * -// * e0: 500 records -// * e1: 500 * 100 = 50k records -// * -// * common primaryIDs for records -// * @ n0: 1 -// * @ n1: 500 -// * @ n2: 100 -// */ -// -// const e0Records = []; -// const e1Records = []; -// -// const n0Count = 1; -// const n1Count = 500; -// const n2Count = 100; -// -// // just to ensure this matches the test name -// expect(n0Count * n1Count * n2Count).toEqual(50000); -// -// range(0, n1Count).forEach(n1Index => { -// e0Records.push({ -// $edge_metadata: { -// trapi_qEdge_obj: e0, -// predicate: 'biolink:record0_pred0', -// source: 'source0', -// api_name: 'api0', -// }, -// // n0 -// $input: { -// obj: [ -// { -// primaryID: 'n0a', -// }, -// ], -// }, -// // n1 -// $output: { -// obj: [ -// { -// primaryID: 'n1_' + n1Index, -// }, -// ], -// }, -// }); -// -// range(0, n2Count).forEach(n2Index => { -// e1Records.push({ -// $edge_metadata: { -// trapi_qEdge_obj: e1, -// predicate: 'biolink:record1_pred0', -// source: 'source1', -// api_name: 'api1', -// }, -// // n1 -// $input: { -// obj: [ -// { -// primaryID: 'n1_' + n1Index, -// }, -// ], -// }, -// // n2 -// $output: { -// obj: [ -// { -// primaryID: 'n2_' + n2Index, -// }, -// ], -// }, -// }); -// }); -// }); -// -// const queryResult = new QueryResult(); -// queryResult.update({ -// "e0": { -// "connected_to": ["e1"], -// "records": e0Records -// }, -// "e1": { -// "connected_to": ["e0"], -// "records": e1Records -// } -// }); -// const results = queryResult.getResults(); -// -// expect(results.length).toEqual(50000); -// -// expect(Object.keys(results[0].node_bindings).sort()).toEqual([ -// 'n0', 'n1', 'n2' -// ]); -// expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ -// 'e0', 'e1' -// ]); -// expect(results[0]).toHaveProperty('score'); -// -// expect(Object.keys(results[1].node_bindings).sort()).toEqual([ -// 'n0', 'n1', 'n2' -// ]); -// expect(Object.keys(results[1].edge_bindings).sort()).toEqual([ -// 'e0', 'e1' -// ]); -// expect(results[1]).toHaveProperty('score'); -// }); -// -// test('should get 50k results when e0 has 1k records (500 connected, 500 not), and e1 has 100k (50k connected, 50k not)', () => { -// /** -// * n0 -e0-> n1 -e1-> n2 -// * -// * e0: 500 connected records + 500 unconnected records = 1k records -// * e1: 50k connected records + 50k unconnected records = 100k records -// * -// * common primaryIDs for records -// * @ n0: 1 -// * @ n1: 500 -// * @ n2: 100 -// */ -// const e0Records = []; -// const e1Records = []; -// -// const n0Count = 1; -// const n1Count = 500; -// const n2Count = 100; -// -// // just to ensure this matches the test name -// expect(n0Count * n1Count * n2Count).toEqual(50000); -// -// // generate connected records -// range(0, n1Count).forEach(n1Index => { -// e0Records.push({ -// $edge_metadata: { -// trapi_qEdge_obj: e0, -// predicate: 'biolink:record0_pred0', -// source: 'source0', -// api_name: 'api0', -// }, -// // n0 -// $input: { -// obj: [ -// { -// primaryID: 'n0a', -// }, -// ], -// }, -// // n1 -// $output: { -// obj: [ -// { -// primaryID: 'n1_' + n1Index, -// }, -// ], -// }, -// }); -// -// range(0, n2Count).forEach(n2Index => { -// e1Records.push({ -// $edge_metadata: { -// trapi_qEdge_obj: e1, -// predicate: 'biolink:record1_pred0', -// source: 'source1', -// api_name: 'api1', -// }, -// // n1 -// $input: { -// obj: [ -// { -// primaryID: 'n1_' + n1Index, -// }, -// ], -// }, -// // n2 -// $output: { -// obj: [ -// { -// primaryID: 'n2_' + n2Index, -// }, -// ], -// }, -// }); -// }); -// }); -// -// // generate unconnected records -// range(0, n1Count).forEach(n1Index => { -// e0Records.push({ -// $edge_metadata: { -// trapi_qEdge_obj: e0, -// predicate: 'biolink:record0_pred0', -// source: 'source0', -// api_name: 'api0', -// }, -// // n0 -// $input: { -// obj: [ -// { -// primaryID: 'n0a', -// }, -// ], -// }, -// // n1 -// $output: { -// obj: [ -// { -// primaryID: 'n1_unconnected_e0record_' + n1Index, -// }, -// ], -// }, -// }); -// -// range(0, n2Count).forEach(n2Index => { -// e1Records.push({ -// $edge_metadata: { -// trapi_qEdge_obj: e1, -// predicate: 'biolink:record1_pred0', -// source: 'source1', -// api_name: 'api1', -// }, -// // n1 -// $input: { -// obj: [ -// { -// primaryID: 'n1_unconnected_e1record_' + n1Index, -// }, -// ], -// }, -// // n2 -// $output: { -// obj: [ -// { -// primaryID: 'n2_' + n2Index, -// }, -// ], -// }, -// }); -// }); -// }); -// -// const queryResult = new QueryResult(); -// queryResult.update({ -// "e0": { -// "connected_to": ["e1"], -// "records": e0Records -// }, -// "e1": { -// "connected_to": ["e0"], -// "records": e1Records -// } -// }); -// const results = queryResult.getResults(); -// -// expect(results.length).toEqual(50000); -// -// expect(Object.keys(results[0].node_bindings).sort()).toEqual([ -// 'n0', 'n1', 'n2' -// ]); -// expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ -// 'e0', 'e1' -// ]); -// expect(results[0]).toHaveProperty('score'); -// -// expect(Object.keys(results[1].node_bindings).sort()).toEqual([ -// 'n0', 'n1', 'n2' -// ]); -// expect(Object.keys(results[1].edge_bindings).sort()).toEqual([ -// 'e0', 'e1' -// ]); -// expect(results[1]).toHaveProperty('score'); -// }); -// -// test('should get 100k results when e0 has 1k connected records, and e1 has 100k', () => { -// /** -// * n0 -e0-> n1 -e1-> n2 -// * -// * e0: 1k connected records -// * e1: 100k connected records -// * -// * common primaryIDs for records -// * @ n0: 1 -// * @ n1: 1000 -// * @ n2: 100 -// */ -// const e0Records = []; -// const e1Records = []; -// -// const n0Count = 1; -// const n1Count = 1000; -// const n2Count = 100; -// -// // just to ensure this matches the test name -// expect(n0Count * n1Count * n2Count).toEqual(100000); -// -// range(0, n1Count).forEach(n1Index => { -// e0Records.push({ -// $edge_metadata: { -// trapi_qEdge_obj: e0, -// predicate: 'biolink:record0_pred0', -// source: 'source0', -// api_name: 'api0', -// }, -// // n0 -// $input: { -// obj: [ -// { -// primaryID: 'n0a', -// }, -// ], -// }, -// // n1 -// $output: { -// obj: [ -// { -// primaryID: 'n1_' + n1Index, -// }, -// ], -// }, -// }); -// -// range(0, n2Count).forEach(n2Index => { -// e1Records.push({ -// $edge_metadata: { -// trapi_qEdge_obj: e1, -// predicate: 'biolink:record1_pred0', -// source: 'source1', -// api_name: 'api1', -// }, -// // n1 -// $input: { -// obj: [ -// { -// primaryID: 'n1_' + n1Index, -// }, -// ], -// }, -// // n2 -// $output: { -// obj: [ -// { -// primaryID: 'n2_' + n2Index, -// }, -// ], -// }, -// }); -// }); -// }); -// -// const queryResult = new QueryResult(); -// queryResult.update({ -// "e0": { -// "connected_to": ["e1"], -// "records": e0Records -// }, -// "e1": { -// "connected_to": ["e0"], -// "records": e1Records -// } -// }); -// const results = queryResult.getResults(); -// -// expect(results.length).toEqual(100000); -// -// expect(Object.keys(results[0].node_bindings).sort()).toEqual([ -// 'n0', 'n1', 'n2' -// ]); -// expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ -// 'e0', 'e1' -// ]); -// expect(results[0]).toHaveProperty('score'); -// -// expect(Object.keys(results[1].node_bindings).sort()).toEqual([ -// 'n0', 'n1', 'n2' -// ]); -// expect(Object.keys(results[1].edge_bindings).sort()).toEqual([ -// 'e0', 'e1' -// ]); -// expect(results[1]).toHaveProperty('score'); -// }); - test('should get 1 result when e0 has 1 record, and e1 has 50k + 1 (1 connected, 50k not)', () => { /** * n0 -e0-> n1 -e1-> n2 @@ -2410,122 +2160,391 @@ describe('Testing QueryResults Module', () => { expect(results[0]).toHaveProperty('score'); }); - test('should get 1 result with records: ⇉⇉ (duplicates)', () => { - const queryResult = new QueryResult(); - queryResult.update({ - "e0": { - "connected_to": ["e1"], - "records": [record0_n0a_n1a, record0_n0a_n1a] - }, - "e1": { - "connected_to": ["e0"], - "records": [record1_n1a_n2a, record1_n1a_n2a] - } - }); - const results = queryResult.getResults(); + //* + describe('test large numbers of records', () => { + // This group of tests is commented out, b/c they're too slow for regular testing. + // They are intended to test performance when handling a large number of records. + // Enable them only as needed. + + test('should get 50k results when e0 has 500 records, and e1 has 50k', () => { + // n0 -e0-> n1 -e1-> n2 + // + // e0: 500 records + // e1: 500 * 100 = 50k records + // + // common primaryIDs for records + // @ n0: 1 + // @ n1: 500 + // @ n2: 100 + + const e0Records = []; + const e1Records = []; + + const n0Count = 1; + const n1Count = 500; + const n2Count = 100; + + // just to ensure this matches the test name + expect(n0Count * n1Count * n2Count).toEqual(50000); + + range(0, n1Count).forEach(n1Index => { + e0Records.push({ + $edge_metadata: { + trapi_qEdge_obj: e0, + predicate: 'biolink:record0_pred0', + source: 'source0', + api_name: 'api0', + }, + // n0 + $input: { + obj: [ + { + primaryID: 'n0a', + }, + ], + }, + // n1 + $output: { + obj: [ + { + primaryID: 'n1_' + n1Index, + }, + ], + }, + }); - expect(results.length).toEqual(1); + range(0, n2Count).forEach(n2Index => { + e1Records.push({ + $edge_metadata: { + trapi_qEdge_obj: e1, + predicate: 'biolink:record1_pred0', + source: 'source1', + api_name: 'api1', + }, + // n1 + $input: { + obj: [ + { + primaryID: 'n1_' + n1Index, + }, + ], + }, + // n2 + $output: { + obj: [ + { + primaryID: 'n2_' + n2Index, + }, + ], + }, + }); + }); + }); - expect(Object.keys(results[0].node_bindings).sort()).toEqual([ - 'n0', 'n1', 'n2' - ]); - expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ - 'e0', 'e1' - ]); - expect(results[0]).toHaveProperty('score'); - }); - - test('should get 2 results with records: -<', () => { - const queryResult = new QueryResult(); - queryResult.update({ - "e0": { - "connected_to": ["e1"], - "records": [record0_n0a_n1a] - }, - "e1": { - "connected_to": ["e0"], - "records": [record1_n1a_n2a, record1_n1a_n2b] - } - }); - const results = queryResult.getResults(); + const queryResult = new QueryResult(); + queryResult.update({ + "e0": { + "connected_to": ["e1"], + "records": e0Records + }, + "e1": { + "connected_to": ["e0"], + "records": e1Records + } + }); + const results = queryResult.getResults(); - expect(results.length).toEqual(2); + expect(results.length).toEqual(50000); - expect(Object.keys(results[0].node_bindings).sort()).toEqual([ - 'n0', 'n1', 'n2' - ]); - expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ - 'e0', 'e1' - ]); - expect(results[0]).toHaveProperty('score'); + expect(Object.keys(results[0].node_bindings).sort()).toEqual([ + 'n0', 'n1', 'n2' + ]); + expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ + 'e0', 'e1' + ]); + expect(results[0]).toHaveProperty('score'); - expect(Object.keys(results[1].node_bindings).sort()).toEqual([ - 'n0', 'n1', 'n2' - ]); - expect(Object.keys(results[1].edge_bindings).sort()).toEqual([ - 'e0', 'e1' - ]); - expect(results[1]).toHaveProperty('score'); - }); - - test('should get 1 result with records: →← (directionality does not match query graph)', () => { - const queryResult = new QueryResult(); - queryResult.update({ - "e0": { - "connected_to": ["e1Reversed"], - "records": [record0_n0a_n1a] - }, - "e1Reversed": { - "connected_to": ["e0"], - "records": [record1_n2a_n1a] - } + expect(Object.keys(results[1].node_bindings).sort()).toEqual([ + 'n0', 'n1', 'n2' + ]); + expect(Object.keys(results[1].edge_bindings).sort()).toEqual([ + 'e0', 'e1' + ]); + expect(results[1]).toHaveProperty('score'); }); - const results = queryResult.getResults(); + + test('should get 50k results when e0 has 1k records (500 connected, 500 not), and e1 has 100k (50k connected, 50k not)', () => { + // n0 -e0-> n1 -e1-> n2 + // + // e0: 500 connected records + 500 unconnected records = 1k records + // e1: 50k connected records + 50k unconnected records = 100k records + // + // common primaryIDs for records + // @ n0: 1 + // @ n1: 500 + // @ n2: 100 + + const e0Records = []; + const e1Records = []; + + const n0Count = 1; + const n1Count = 500; + const n2Count = 100; + + // just to ensure this matches the test name + expect(n0Count * n1Count * n2Count).toEqual(50000); + + // generate connected records + range(0, n1Count).forEach(n1Index => { + e0Records.push({ + $edge_metadata: { + trapi_qEdge_obj: e0, + predicate: 'biolink:record0_pred0', + source: 'source0', + api_name: 'api0', + }, + // n0 + $input: { + obj: [ + { + primaryID: 'n0a', + }, + ], + }, + // n1 + $output: { + obj: [ + { + primaryID: 'n1_' + n1Index, + }, + ], + }, + }); - expect(results.length).toEqual(1); + range(0, n2Count).forEach(n2Index => { + e1Records.push({ + $edge_metadata: { + trapi_qEdge_obj: e1, + predicate: 'biolink:record1_pred0', + source: 'source1', + api_name: 'api1', + }, + // n1 + $input: { + obj: [ + { + primaryID: 'n1_' + n1Index, + }, + ], + }, + // n2 + $output: { + obj: [ + { + primaryID: 'n2_' + n2Index, + }, + ], + }, + }); + }); + }); - expect(Object.keys(results[0].node_bindings).sort()).toEqual([ - 'n0', 'n1', 'n2' - ]); - expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ - 'e0', 'e1Reversed' - ]); - expect(results[0]).toHaveProperty('score'); - }); + // generate unconnected records + range(0, n1Count).forEach(n1Index => { + e0Records.push({ + $edge_metadata: { + trapi_qEdge_obj: e0, + predicate: 'biolink:record0_pred0', + source: 'source0', + api_name: 'api0', + }, + // n0 + $input: { + obj: [ + { + primaryID: 'n0a', + }, + ], + }, + // n1 + $output: { + obj: [ + { + primaryID: 'n1_unconnected_e0record_' + n1Index, + }, + ], + }, + }); - // NOTE: with the new generalized query handling, this case shouldn't happen - test('should get 0 results when 0 records for edge: ⇢̊→', () => { - const queryResult = new QueryResult(); - queryResult.update({ - "e0": { - "connected_to": ["e1"], - "records": [] - }, - "e1": { - "connected_to": ["e0"], - "records": [record1_n1a_n2a] - } + range(0, n2Count).forEach(n2Index => { + e1Records.push({ + $edge_metadata: { + trapi_qEdge_obj: e1, + predicate: 'biolink:record1_pred0', + source: 'source1', + api_name: 'api1', + }, + // n1 + $input: { + obj: [ + { + primaryID: 'n1_unconnected_e1record_' + n1Index, + }, + ], + }, + // n2 + $output: { + obj: [ + { + primaryID: 'n2_' + n2Index, + }, + ], + }, + }); + }); + }); + + const queryResult = new QueryResult(); + queryResult.update({ + "e0": { + "connected_to": ["e1"], + "records": e0Records + }, + "e1": { + "connected_to": ["e0"], + "records": e1Records + } + }); + const results = queryResult.getResults(); + + expect(results.length).toEqual(50000); + + expect(Object.keys(results[0].node_bindings).sort()).toEqual([ + 'n0', 'n1', 'n2' + ]); + expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ + 'e0', 'e1' + ]); + expect(results[0]).toHaveProperty('score'); + + expect(Object.keys(results[1].node_bindings).sort()).toEqual([ + 'n0', 'n1', 'n2' + ]); + expect(Object.keys(results[1].edge_bindings).sort()).toEqual([ + 'e0', 'e1' + ]); + expect(results[1]).toHaveProperty('score'); }); - const results = queryResult.getResults(); - expect(results.length).toEqual(0); - }); - // NOTE: with the new generalized query handling, this case won't happen - test('should get 0 results when 0 records for edge: →⇢̊', () => { - const queryResult = new QueryResult(); - queryResult.update({ - "e0": { - "connected_to": ["e1"], - "records": [record0_n0a_n1a] - }, - "e1": { - "connected_to": ["e0"], - "records": [] - } + test('should get 100k results when e0 has 1k connected records, and e1 has 100k', () => { + // n0 -e0-> n1 -e1-> n2 + // + // e0: 1k connected records + // e1: 100k connected records + // + // common primaryIDs for records + // @ n0: 1 + // @ n1: 1000 + // @ n2: 100 + + const e0Records = []; + const e1Records = []; + + const n0Count = 1; + const n1Count = 1000; + const n2Count = 100; + + // just to ensure this matches the test name + expect(n0Count * n1Count * n2Count).toEqual(100000); + + range(0, n1Count).forEach(n1Index => { + e0Records.push({ + $edge_metadata: { + trapi_qEdge_obj: e0, + predicate: 'biolink:record0_pred0', + source: 'source0', + api_name: 'api0', + }, + // n0 + $input: { + obj: [ + { + primaryID: 'n0a', + }, + ], + }, + // n1 + $output: { + obj: [ + { + primaryID: 'n1_' + n1Index, + }, + ], + }, + }); + + range(0, n2Count).forEach(n2Index => { + e1Records.push({ + $edge_metadata: { + trapi_qEdge_obj: e1, + predicate: 'biolink:record1_pred0', + source: 'source1', + api_name: 'api1', + }, + // n1 + $input: { + obj: [ + { + primaryID: 'n1_' + n1Index, + }, + ], + }, + // n2 + $output: { + obj: [ + { + primaryID: 'n2_' + n2Index, + }, + ], + }, + }); + }); + }); + + const queryResult = new QueryResult(); + queryResult.update({ + "e0": { + "connected_to": ["e1"], + "records": e0Records + }, + "e1": { + "connected_to": ["e0"], + "records": e1Records + } + }); + const results = queryResult.getResults(); + + expect(results.length).toEqual(100000); + + expect(Object.keys(results[0].node_bindings).sort()).toEqual([ + 'n0', 'n1', 'n2' + ]); + expect(Object.keys(results[0].edge_bindings).sort()).toEqual([ + 'e0', 'e1' + ]); + expect(results[0]).toHaveProperty('score'); + + expect(Object.keys(results[1].node_bindings).sort()).toEqual([ + 'n0', 'n1', 'n2' + ]); + expect(Object.keys(results[1].edge_bindings).sort()).toEqual([ + 'e0', 'e1' + ]); + expect(results[1]).toHaveProperty('score'); }); - const results = queryResult.getResults(); - expect(results.length).toEqual(0); }); + //*/ + }); describe('query graph: →←', () => { diff --git a/src/query_results.js b/src/query_results.js index 4728f5cb..132732ef 100644 --- a/src/query_results.js +++ b/src/query_results.js @@ -39,6 +39,13 @@ const debug = require('debug')('bte:biothings-explorer-trapi:QueryResult'); * } Result */ +// TODO: if this is correct, it should probably be moved to helper.js +function _getInputIsSet(record) { + return record.$edge_metadata.trapi_qEdge_obj.isReversed() + ? record.$output.obj[0].is_set + : record.$input.obj[0].is_set; +} + /** * Assemble a list of query results. * @@ -67,12 +74,12 @@ module.exports = class QueryResult { // NOTE: if we want to handle cycles, we'll probably need to keep track of what's been visited // But since Andrew said we don't have to worry about cycles for now, we're skipping that. - _getPrimaryIDByQueryNodeIDCombos( + _getPreresults( dataByEdge, queryEdgeID, - briefRecordsByEdge, - primaryIDByQueryNodeIDCombos, - primaryIDByQueryNodeIDCombo, + edgeCount, + preresults, + preresult, queryNodeIDToMatch, primaryIDToMatch ) { @@ -97,7 +104,8 @@ module.exports = class QueryResult { return; } - const primaryIDByQueryNodeIDComboClone = cloneDeep(primaryIDByQueryNodeIDCombo); + const preresultClone = cloneDeep(preresult); + records.filter((record) => { return [getMatchingPrimaryID(record), undefined].indexOf(primaryIDToMatch) > -1 ; }).forEach((record, i) => { @@ -106,33 +114,29 @@ module.exports = class QueryResult { const otherPrimaryID = getOtherPrimaryID(record); if (i !== 0) { - primaryIDByQueryNodeIDCombo = cloneDeep(primaryIDByQueryNodeIDComboClone); - primaryIDByQueryNodeIDCombos.push(primaryIDByQueryNodeIDCombo); + preresult = cloneDeep(preresultClone); } - // Later on, we'll just need several IDs from each record, - // not the entire record. Let's collect those ahead of time. - if (!briefRecordsByEdge.hasOwnProperty(queryEdgeID)) { - briefRecordsByEdge[queryEdgeID] = []; - } - briefRecordsByEdge[queryEdgeID].push({ + preresult.push({ inputQueryNodeID: helper._getInputQueryNodeID(record), outputQueryNodeID: helper._getOutputQueryNodeID(record), inputPrimaryID: helper._getInputID(record), outputPrimaryID: helper._getOutputID(record), + queryEdgeID: queryEdgeID, kgEdgeID: helper._getKGEdgeID(record), }); - primaryIDByQueryNodeIDCombo[queryNodeIDToMatch] = matchingPrimaryID; - primaryIDByQueryNodeIDCombo[otherQueryNodeID] = otherPrimaryID; + if (preresult.length == edgeCount) { + preresults.push(preresult); + } connected_to.forEach((connectedQueryEdgeID, j) => { - this._getPrimaryIDByQueryNodeIDCombos( + this._getPreresults( dataByEdge, connectedQueryEdgeID, - briefRecordsByEdge, - primaryIDByQueryNodeIDCombos, - primaryIDByQueryNodeIDCombo, + edgeCount, + preresults, + preresult, otherQueryNodeID, otherPrimaryID ); @@ -179,183 +183,179 @@ module.exports = class QueryResult { const edges = new Set(keys(dataByEdge)); const edgeCount = edges.size; - const queryNodeIDs = new Set(); - toPairs(dataByEdge).forEach((x) => { - const [queryEdgeID, {connected_to, records}] = x; + // NOTE: is_set in the query graph and the JavaScript Set object below refer to different sets. + const queryNodeIDsWithIsSet = new Set(); + const queryNodeIDs = new Set(); + toPairs(dataByEdge).forEach(([queryEdgeID, {connected_to, records}]) => { const inputQueryNodeID = helper._getInputQueryNodeID(records[0]); const outputQueryNodeID = helper._getOutputQueryNodeID(records[0]); queryNodeIDs.add(inputQueryNodeID); queryNodeIDs.add(outputQueryNodeID); - }); - - const primaryIDByQueryNodeIDCombos = []; - const starter = {}; - primaryIDByQueryNodeIDCombos.push(starter) - let starterQueryEdgeID, starterQueryNodeIDToMatch; - toPairs(dataByEdge).some((x) => { - const [queryEdgeID, {connected_to, records}] = x; + if (_getInputIsSet(records[0])) { + queryNodeIDsWithIsSet.add(inputQueryNodeID) + } + }); + let initialQueryEdgeID, initialQueryNodeIDToMatch; + toPairs(dataByEdge).some(([queryEdgeID, {connected_to, records}]) => { const inputQueryNodeID = helper._getInputQueryNodeID(records[0]); const outputQueryNodeID = helper._getOutputQueryNodeID(records[0]); if (connected_to.length === 0) { - starterQueryEdgeID = queryEdgeID; - starterQueryNodeIDToMatch = inputQueryNodeID; + initialQueryEdgeID = queryEdgeID; + initialQueryNodeIDToMatch = inputQueryNodeID; } else { connected_to.some((c) => { const nextEdge = dataByEdge[c]; const inputQueryNodeID1 = helper._getInputQueryNodeID(nextEdge.records[0]); const outputQueryNodeID1 = helper._getOutputQueryNodeID(nextEdge.records[0]); - if (!starterQueryEdgeID) { + if (!initialQueryEdgeID) { if ([inputQueryNodeID1, outputQueryNodeID1].indexOf(inputQueryNodeID) === -1) { - starterQueryEdgeID = queryEdgeID; - starterQueryNodeIDToMatch = inputQueryNodeID; + initialQueryEdgeID = queryEdgeID; + initialQueryNodeIDToMatch = inputQueryNodeID; + + // like calling break in a loop return true; } else if ([outputQueryNodeID1, outputQueryNodeID1].indexOf(outputQueryNodeID) === -1) { - starterQueryEdgeID = queryEdgeID; - starterQueryNodeIDToMatch = outputQueryNodeID; + initialQueryEdgeID = queryEdgeID; + initialQueryNodeIDToMatch = outputQueryNodeID; + + // like calling break in a loop return true; } } }); - if (starterQueryEdgeID) { + if (initialQueryEdgeID) { + // like calling break in a loop return true; } } }); - const briefRecordsByEdge = {}; - this._getPrimaryIDByQueryNodeIDCombos( + // 'preresult' just means it has the data needed to assemble a result, + // but it's formatted differently for easier pre-processing. + const preresults = []; + this._getPreresults( dataByEdge, - starterQueryEdgeID, - briefRecordsByEdge, - primaryIDByQueryNodeIDCombos, - starter, - starterQueryNodeIDToMatch + initialQueryEdgeID, + edgeCount, + preresults, + [], // first preresult + initialQueryNodeIDToMatch, ); - const queryNodeIDCount = Array.from(queryNodeIDs).length; - const primaryIDByQueryNodeIDCombosStrings = new Set(); - const primaryIDByQueryNodeIDCombosFiltered = primaryIDByQueryNodeIDCombos.filter((primaryIDByQueryNodeIDCombo) => { - const primaryIDByQueryNodeIDComboString = keys(primaryIDByQueryNodeIDCombo) - .concat(values(primaryIDByQueryNodeIDCombo)) - .sort() - .join("-"); - - // remove duplicates - if (primaryIDByQueryNodeIDCombosStrings.has(primaryIDByQueryNodeIDComboString)) { - return false; - } else { - primaryIDByQueryNodeIDCombosStrings.add(primaryIDByQueryNodeIDComboString); + // there are two cases where we get more preresults than results and need to consolidate: + // 1. one or more query nodes have param `is_set: true` + // 2. one or more edges have multiple predicates each + const consolidatedPreresults = []; + const inputPrimaryIDsByInputQueryNodeID = {}; + const kgEdgeIDsByQueryEdgeID = {}; + let kgEdgeIDsByPreresultRecordID = {}; + + preresults.forEach((preresult) => { + let consolidatedPreresult = []; + + // a preresultRecord is basically the information from a record, + // but formatted differently for purposes of assembling results. + let preresultRecord = { + inputPrimaryIDs: new Set(), + outputPrimaryIDs: new Set(), + kgEdgeIDs: new Set(), + }; + + const preresultRecordClone = cloneDeep(preresultRecord); + + if (preresult.length > 1) { + kgEdgeIDsByPreresultRecordID = {}; } - // remove incomplete combos - return keys(primaryIDByQueryNodeIDCombo).length === queryNodeIDCount; - }); - - this._results = primaryIDByQueryNodeIDCombosFiltered - // We've now identified all the valid combinations of primary IDs. - // Next, let's go through the records again to start assembling results. - .map((primaryIDByQueryNodeID) => { - return toPairs(briefRecordsByEdge) - .reduce((acc, [queryEdgeID, briefRecords]) => { - const compatibleBriefRecords = briefRecords.filter(({ - inputQueryNodeID, outputQueryNodeID, - inputPrimaryID, outputPrimaryID, - }) => { - return (primaryIDByQueryNodeID[inputQueryNodeID] == inputPrimaryID) && - (primaryIDByQueryNodeID[outputQueryNodeID] == outputPrimaryID); - }); - - // Because of the filter step above, every compatibleBriefRecord - // in this batch will have the same values for: - // inputQueryNodeID, outputQueryNodeID, inputPrimaryID, outputPrimaryID - // - // However, it is possible to have different values for kgEdgeID, so - // let's put all of those into a set. - const kgEdgeIDs = compatibleBriefRecords.reduce((acc, {kgEdgeID}) => { - acc.add(kgEdgeID); - return acc; - }, new Set()); - - acc[queryEdgeID] = { - inputQueryNodeID: compatibleBriefRecords[0].inputQueryNodeID, - outputQueryNodeID: compatibleBriefRecords[0].outputQueryNodeID, - inputPrimaryID: compatibleBriefRecords[0].inputPrimaryID, - outputPrimaryID: compatibleBriefRecords[0].outputPrimaryID, - kgEdgeIDs - }; - - return acc; - }, {}) - }) - /** - * Assemble each query result. - * - * infoByEdgeForOneCombo represents one compatible combination of records. - * This means a collection of records, one per query graph edge, all fit - * together with each other with inputs and outputs connected - * as specified by the query graph. But for convenience, instead of full - * records, we're actually just working with the IDs we need, as collected - * earlier. - * - * @param {Object. - * }>} infoByEdgeForOneCombo - * @return {Result} - */ - .map(infoByEdgeForOneCombo => { - // default score issue #200 - TODO: turn to evaluating module eventually - const result = {node_bindings: {}, edge_bindings: {}, score: 1.0}; - - toPairs(infoByEdgeForOneCombo).forEach(([queryEdgeID, { - inputQueryNodeID, outputQueryNodeID, - inputPrimaryID, outputPrimaryID, - kgEdgeIDs - }], i) => { - - // NOTE: either or both of the following could have been set already - // when we processed records for another query edge, but that's OK. - // - // When two records are linked, the outputPrimaryID for one record - // will be the same as the inputPrimaryID for the other. Because of - // that, whichever record was processed here first will have already - // set the value for result.node_bindings[queryNodeID]. Because every - // record in infoByEdgeForOneCombo uses the same mappings - // from queryNodeID to primaryID, there is no conflict. The same logic - // is also valid for the case of more than two linked records. - if (!result.node_bindings.hasOwnProperty(inputQueryNodeID)) { - result.node_bindings[inputQueryNodeID] = [ - { - id: inputPrimaryID - }, - ]; + preresult.forEach(({ + inputQueryNodeID, outputQueryNodeID, + inputPrimaryID, outputPrimaryID, + queryEdgeID, kgEdgeID + }) => { + + // this is a unique identifier to represent a record, but + // ignores details like predicates. + const preresultRecordID = [ + inputQueryNodeID, + inputPrimaryID, + outputQueryNodeID, + outputPrimaryID + ].join("-"); + + if (queryNodeIDsWithIsSet.has(inputQueryNodeID)) { + if (!inputPrimaryIDsByInputQueryNodeID.hasOwnProperty(inputQueryNodeID)) { + preresultRecord = cloneDeep(preresultRecordClone); + consolidatedPreresult.push(preresultRecord); + kgEdgeIDsByQueryEdgeID[queryEdgeID] = new Set(); + preresultRecord.kgEdgeIDs = kgEdgeIDsByQueryEdgeID[queryEdgeID]; + + inputPrimaryIDsByInputQueryNodeID[inputQueryNodeID] = new Set(); + preresultRecord.inputPrimaryIDs = inputPrimaryIDsByInputQueryNodeID[inputQueryNodeID]; } - if (!result.node_bindings.hasOwnProperty(outputQueryNodeID)) { - result.node_bindings[outputQueryNodeID] = [ - { - id: outputPrimaryID - }, - ]; + + inputPrimaryIDsByInputQueryNodeID[inputQueryNodeID].add(inputPrimaryID); + kgEdgeIDsByQueryEdgeID[queryEdgeID].add(kgEdgeID); + } else { + if (kgEdgeIDsByPreresultRecordID.hasOwnProperty(preresultRecordID)) { + kgEdgeIDsByPreresultRecordID[preresultRecordID].add(kgEdgeID); + } else { + kgEdgeIDsByPreresultRecordID[preresultRecordID] = new Set([kgEdgeID]); + preresultRecord = cloneDeep(preresultRecordClone); + consolidatedPreresult.push(preresultRecord); + preresultRecord.kgEdgeIDs = kgEdgeIDsByPreresultRecordID[preresultRecordID]; + preresultRecord.inputPrimaryIDs.add(inputPrimaryID); } + } + + preresultRecord.outputPrimaryIDs.add(outputPrimaryID); + + preresultRecord.inputQueryNodeID = inputQueryNodeID; + preresultRecord.outputQueryNodeID = outputQueryNodeID; + preresultRecord.queryEdgeID = queryEdgeID; + }); + + if (consolidatedPreresult.length === edgeCount) { + consolidatedPreresults.push(consolidatedPreresult); + consolidatedPreresult = []; + } + }); - const edge_bindings = result.edge_bindings[queryEdgeID] = []; - kgEdgeIDs.forEach((kgEdgeID) => { - edge_bindings.push({ - id: kgEdgeID - }); - }); + this._results = consolidatedPreresults.map((consolidatedPreresult) => { + + // TODO: calculate an actual score + const result = {node_bindings: {}, edge_bindings: {}, score: 1.0}; + + consolidatedPreresult.forEach(({ + inputQueryNodeID, outputQueryNodeID, + inputPrimaryIDs, outputPrimaryIDs, + queryEdgeID, kgEdgeIDs + }) => { + result.node_bindings[inputQueryNodeID] = Array.from(inputPrimaryIDs).map(inputPrimaryID => { + return { + id: inputPrimaryID + }; + }); + + result.node_bindings[outputQueryNodeID] = Array.from(outputPrimaryIDs).map(outputPrimaryID => { + return { + id: outputPrimaryID + }; }); - return result; + const edge_bindings = result.edge_bindings[queryEdgeID] = Array.from(kgEdgeIDs).map((kgEdgeID) => { + return { + id: kgEdgeID + }; + }); }); + + return result; + }); } };