-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.js
79 lines (68 loc) · 2.56 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
const { has } = require('lodash');
const EventEmitter = require('events');
const { deduplicate: { target }, currentSessionName } = require('@istex/config-component').get(module);
const { search, updateDuplicatesGraph, searchIgnoredBySourceUid } = require('./src/documentsManager');
const { buildQuery } = require('./src/deduplicateQueryBuilder');
class Business extends EventEmitter {
doTheJob (docObject, cb) {
if (typeof cb === 'function') {
deduplicate(docObject)
.then(() => cb())
.catch((reason) => {
_setDocObjectError(docObject, reason);
return cb(reason);
});
} else {
return deduplicate(docObject)
.catch((reason) => {
_setDocObjectError(docObject, reason);
throw reason;
});
}
}
}
const business = new Business();
module.exports = business;
function deduplicate (docObject) {
return Promise.resolve().then(
async () => {
if (!has(docObject, 'technical.internalId')) {
throw new Error('Expected Object N/A to have property technical.internalId');
}
// maybe change this into simple warning
if (!has(docObject, 'business.duplicateGenre')) {
throw new Error(`Expected Object ${docObject.technical.internalId} to have property business.duplicateGenre`);
}
const ignoredFields = await searchIgnoredBySourceUid(docObject.sourceUid);
const request = buildQuery(docObject, ignoredFields);
if (request.query.bool.filter.bool.should.length === 0) {
business.emit('info', `Not deduplicable {docObject}, internalId: ${docObject.technical.internalId}`);
docObject.business.isDeduplicable = false;
return updateDuplicatesGraph(docObject, currentSessionName);
}
docObject.business.isDeduplicable = true;
return search({
index: target,
body: request,
size: 1000, // This means, 1000 duplicates found max, hopefully it would be enougth.
}).then((result) => {
const { body: { hits } } = result;
if (hits.total.value === 0) {
business.emit('info',
`No duplicates found for {docObject}, internalId: ${docObject.technical.internalId}`);
}
return updateDuplicatesGraph(docObject, currentSessionName, hits.hits);
});
},
);
}
function _setDocObjectError (docObject, error) {
docObject.error = {
code: error?.code ?? error?.meta?.statusCode,
message: error?.message,
stack: error?.stack,
failuresList: error?.failuresList,
failuresTypes: error?.failuresTypes,
};
return docObject;
}