-
Notifications
You must be signed in to change notification settings - Fork 4
/
gene.js
137 lines (121 loc) · 3.58 KB
/
gene.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/**
* Loader module for the entrez gene utility
* @module importer/entrez/gene
*/
const Ajv = require('ajv');
const { checkSpec } = require('../util');
const util = require('./util');
const { entrezGene: SOURCE_DEFN } = require('../sources');
const ajv = new Ajv();
const CACHE = {};
const SEARCH_CACHE = {};
const DB_NAME = 'gene';
const LINK_URL = 'https://www.ncbi.nlm.nih.gov/gene';
const MAX_CONSEC = 100;
const recordSpec = ajv.compile({
properties: {
description: { type: 'string' },
name: { type: 'string' },
summary: { type: 'string' },
uid: { pattern: '^\\d+$', type: 'string' },
},
required: ['uid', 'name'],
type: 'object',
});
/**
* Given an gene record retrieved from entrez, parse it into its equivalent
* GraphKB representation
*/
const parseRecord = (record) => {
checkSpec(recordSpec, record);
return {
biotype: 'gene',
description: record.summary,
displayName: record.name,
longName: record.description,
name: record.name,
sourceId: record.uid,
url: `${LINK_URL}/${record.uid}`,
};
};
/**
*
* @param {ApiConnection} api connection to GraphKB
* @param {Array.<string>} idList list of gene IDs
* @param {object} opt
* @param {boolean} opt.fetchFirst override util.uploadRecord() fetchFirst
* @param {boolean} opt.upsert override util.uploadRecord() upsert
*/
const fetchAndLoadGeneByIds = async (api, idListIn, opt = {}) => {
// For record update, set fetchFirst to false & upsert to true.
const { fetchFirst, upsert } = opt;
return util.fetchAndLoadByIds(
api,
idListIn,
{
MAX_CONSEC,
cache: CACHE,
dbName: DB_NAME,
fetchFirst,
parser: parseRecord,
sourceDefn: SOURCE_DEFN,
target: 'Feature',
upsert,
},
);
};
/**
* Given a gene symbol, search the genes and upload the resulting records to graphkb
* @param {ApiConnection} api connection to GraphKB
* @param {string} symbol the gene symbol
*/
const fetchAndLoadBySearchTerm = async (api, term, termType = 'Preferred Symbol', fallbackTermType = null) => {
const cacheKey = `${termType}:${term}`;
if (SEARCH_CACHE[cacheKey]) {
return SEARCH_CACHE[cacheKey];
}
let result = await util.fetchAndLoadBySearchTerm(
api,
`${term}[${termType}] AND human[ORGN] AND alive[prop]`,
{
MAX_CONSEC,
cache: CACHE,
dbName: DB_NAME,
parser: parseRecord,
sourceDefn: SOURCE_DEFN,
target: 'Feature',
},
);
// fallback to gene name
if (result.length === 0 && fallbackTermType) {
result = await util.fetchAndLoadBySearchTerm(
api,
`${term}[${fallbackTermType}] AND human[ORGN] AND alive[prop]`,
{
MAX_CONSEC,
cache: CACHE,
dbName: DB_NAME,
parser: parseRecord,
sourceDefn: SOURCE_DEFN,
target: 'Feature',
},
);
}
SEARCH_CACHE[cacheKey] = result;
return SEARCH_CACHE[cacheKey];
};
const preLoadCache = async api => util.preLoadCache(
api,
{
cache: CACHE, sourceDefn: SOURCE_DEFN, target: 'Feature',
},
);
const fetchAndLoadBySymbol = async (api, term) => fetchAndLoadBySearchTerm(api, term, 'Preferred Symbol', 'Gene Name');
module.exports = {
SOURCE_DEFN,
fetchAndLoadByIds: fetchAndLoadGeneByIds,
fetchAndLoadBySearchTerm,
fetchAndLoadBySymbol,
parseRecord,
preLoadCache,
};