Skip to content

Commit

Permalink
Merge pull request #63 from bcgsc/feature/node-16
Browse files Browse the repository at this point in the history
Feature/node 16
  • Loading branch information
creisle authored Jan 25, 2022
2 parents 2c0339a + b2bbb40 commit 38bf3ef
Show file tree
Hide file tree
Showing 30 changed files with 1,470 additions and 626 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/npm-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
node: ['12', '14']
node: ['12', '14', '16']
name: node-${{ matrix.node }}
steps:
- uses: actions/checkout@v2
Expand All @@ -25,7 +25,7 @@ jobs:
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
files: coverage/junit.xml
if: matrix.node == 10
if: matrix.node == 14
docker:
runs-on: ubuntu-latest
name: docker build
Expand Down
78 changes: 42 additions & 36 deletions Snakefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
from textwrap import dedent


DATA_DIR = 'snakemake_data'
LOGS_DIR = 'snakemake_logs'

Expand All @@ -18,6 +17,7 @@ COSMIC_EMAIL = config.get('cosmic_email')
COSMIC_PASSWORD = config.get('cosmic_password')
USE_COSMIC = COSMIC_EMAIL or COSMIC_PASSWORD
BACKFILL_TRIALS = config.get('trials')
USE_FDA_UNII = config.get('fda') # due to the non-scriptable download, making FDA optional
GITHUB_DATA = 'https://raw.githubusercontent.com/bcgsc/pori_graphkb_loader/develop/data'
CONTAINER = 'docker://bcgsc/pori-graphkb-loader:latest'

Expand All @@ -28,10 +28,10 @@ rule all:
f'{DATA_DIR}/PMC4468049.COMPLETE',
f'{DATA_DIR}/PMC4232638.COMPLETE',
f'{DATA_DIR}/uberon.COMPLETE',
f'{DATA_DIR}/ncitFdaXref.COMPLETE',
f'{DATA_DIR}/fdaApprovals.COMPLETE',
f'{DATA_DIR}/cancerhotspots.COMPLETE',
f'{DATA_DIR}/moa.COMPLETE',
*([f'{DATA_DIR}/ncitFdaXref.COMPLETE'] if USE_FDA_UNII else []),
*([f'{DATA_DIR}/clinicaltrialsgov.COMPLETE'] if BACKFILL_TRIALS else []),
*([f'{DATA_DIR}/cosmic_resistance.COMPLETE', f'{DATA_DIR}/cosmic_fusions.COMPLETE'] if USE_COSMIC else [])

Expand All @@ -46,11 +46,12 @@ rule download_ncit:
rm -rf __MACOSX''')


rule download_ncit_fda:
output: f'{DATA_DIR}/ncit/FDA-UNII_NCIt_Subsets.txt'
shell: dedent(f'''\
cd {DATA_DIR}/ncit
wget https://evs.nci.nih.gov/ftp1/FDA/UNII/FDA-UNII_NCIt_Subsets.txt''')
if USE_FDA_UNII:
rule download_ncit_fda:
output: f'{DATA_DIR}/ncit/FDA-UNII_NCIt_Subsets.txt'
shell: dedent(f'''\
cd {DATA_DIR}/ncit
wget https://evs.nci.nih.gov/ftp1/FDA/UNII/FDA-UNII_NCIt_Subsets.txt''')


rule download_ensembl:
Expand All @@ -62,16 +63,17 @@ rule download_ensembl:
''')


rule download_fda_srs:
output: f'{DATA_DIR}/fda/UNII_Records.txt'
shell: dedent(f'''\
cd {DATA_DIR}/fda
wget https://fdasis.nlm.nih.gov/srs/download/srs/UNII_Data.zip
unzip UNII_Data.zip
rm UNII_Data.zip
if USE_FDA_UNII:
rule download_fda_srs:
output: f'{DATA_DIR}/fda/UNII_Records.txt'
shell: dedent(f'''\
cd {DATA_DIR}/fda
wget https://fdasis.nlm.nih.gov/srs/download/srs/UNII_Data.zip
unzip UNII_Data.zip
rm UNII_Data.zip
mv UNII*.txt UNII_Records.txt
''')
mv UNII*.txt UNII_Records.txt
''')


rule download_refseq:
Expand Down Expand Up @@ -135,8 +137,8 @@ rule download_cgi:
output: f'{DATA_DIR}/cgi/cgi_biomarkers_per_variant.tsv'
shell: dedent(f'''\
cd {DATA_DIR}/cgi
wget https://www.cancergenomeinterpreter.org/data/cgi_biomarkers_latest.zip
unzip cgi_biomarkers_latest.zip
wget https://www.cancergenomeinterpreter.org/data/cgi_biomarkers_20180117.zip
unzip cgi_biomarkers_20180117.zip
''')


Expand Down Expand Up @@ -217,23 +219,24 @@ rule load_ncit:
shell: 'node bin/load.js file ncit {input.data} &> {log}; cp {log} {output}'


rule load_fda_srs:
input: expand(rules.load_local.output, local=['vocab']),
data=rules.download_fda_srs.output
container: CONTAINER
log: f'{LOGS_DIR}/fdaSrs.logs.txt'
output: f'{DATA_DIR}/fdaSrs.COMPLETE'
shell: 'node bin/load.js file fdaSrs {input.data} &> {log}; cp {log} {output}'
if USE_FDA_UNII:
rule load_fda_srs:
input: expand(rules.load_local.output, local=['vocab']),
data=f'{DATA_DIR}/fda/UNII_Records.txt'
container: CONTAINER
log: f'{LOGS_DIR}/fdaSrs.logs.txt'
output: f'{DATA_DIR}/fdaSrs.COMPLETE'
shell: 'node bin/load.js file fdaSrs {input.data} &> {log}; cp {log} {output}'


rule load_ncit_fda:
input: rules.load_ncit.output,
rules.load_fda_srs.output,
data=rules.download_ncit_fda.output
container: CONTAINER
log: f'{LOGS_DIR}/ncitFdaXref.logs.txt'
output: f'{DATA_DIR}/ncitFdaXref.COMPLETE'
shell: 'node bin/load.js file ncitFdaXref {input.data} &> {log}; cp {log} {output}'
rule load_ncit_fda:
input: rules.load_ncit.output,
rules.load_fda_srs.output,
data=rules.download_ncit_fda.output
container: CONTAINER
log: f'{LOGS_DIR}/ncitFdaXref.logs.txt'
output: f'{DATA_DIR}/ncitFdaXref.COMPLETE'
shell: 'node bin/load.js file ncitFdaXref {input.data} &> {log}; cp {log} {output}'


rule load_refseq:
Expand Down Expand Up @@ -273,7 +276,7 @@ rule load_uberon:


rule load_drugbank:
input: rules.load_fda_srs.output,
input: rules.load_fda_srs.output if USE_FDA_UNII else [],
data=rules.download_drugbank.output
container: CONTAINER
log: f'{LOGS_DIR}/drugbank.logs.txt'
Expand All @@ -298,7 +301,9 @@ rule load_dgidb:


def get_drug_inputs(wildcards):
inputs = [*rules.load_fda_srs.output, *rules.load_ncit.output]
inputs = [*rules.load_ncit.output]
if USE_FDA_UNII:
inputs.extend(rules.load_fda_srs.output)
container: CONTAINER
if USE_DRUGBANK:
inputs.append(*rules.load_drugbank.output)
Expand Down Expand Up @@ -423,7 +428,8 @@ rule load_cosmic_fusions:


rule load_moa:
input: rules.load_oncotree.output
input: rules.load_oncotree.output,
expand(rules.load_local.output, local=['vocab', 'signatures', 'chromosomes', 'evidenceLevels', 'aacr', 'asco'])
container: CONTAINER
log: f'{LOGS_DIR}/load_moa.logs.txt'
output: f'{DATA_DIR}/moa.COMPLETE'
Expand Down
2 changes: 1 addition & 1 deletion bin/load.js
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ if (input) {
loaderOptions.filename = input;

if (options.module === 'cosmic') {
loaderOptions.mappingFilename = options.mappingFilename;
loaderOptions.classification = options.classification;
}
}
}
Expand Down
26 changes: 3 additions & 23 deletions src/asco/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,14 @@ const { requestWithRetry, checkSpec } = require('../util');
const { asco: SOURCE_DEFN } = require('../sources');
const { logger } = require('../logging');
const { rid } = require('../graphkb');
const { main: mainSpec, details: detailsSpec } = require('./specs.json');

const CACHE = {};


const ajv = new Ajv();

const validateMainSpec = ajv.compile({
properties: {
AbstID: { type: 'string' },
AuthorString: { type: 'string' },
Meeting: { type: 'string' },
Title: { type: 'string' },
Year: { pattern: '\\d+', type: 'string' },
id: { type: 'string' },
url: { format: 'url', type: 'string' },
},
required: ['AbstID', 'Title', 'Meeting', 'url', 'id'],
type: 'object',
});


const validateDetailsSpec = ajv.compile({
properties: {
DOI: { type: 'string' },
SiteCitation: { type: 'string' },
},
type: 'object',
});
const validateMainSpec = ajv.compile(mainSpec);
const validateDetailsSpec = ajv.compile(detailsSpec);


/**
Expand Down
48 changes: 48 additions & 0 deletions src/asco/specs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"details": {
"properties": {
"DOI": {
"type": "string"
},
"SiteCitation": {
"type": "string"
}
},
"type": "object"
},
"main": {
"properties": {
"AbstID": {
"type": "string"
},
"AuthorString": {
"type": "string"
},
"Meeting": {
"type": "string"
},
"Title": {
"type": "string"
},
"Year": {
"pattern": "\\d+",
"type": "string"
},
"id": {
"type": "string"
},
"url": {
"format": "url",
"type": "string"
}
},
"required": [
"AbstID",
"Title",
"Meeting",
"url",
"id"
],
"type": "object"
}
}
8 changes: 7 additions & 1 deletion src/cancergenomeinterpreter/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,9 @@ const processRow = async ({ row, source, conn }) => {
};


const uploadFile = async ({ conn, filename, errorLogPrefix }) => {
const uploadFile = async ({
conn, filename, errorLogPrefix, maxRecords,
}) => {
const rows = await loadDelimToJson(filename);
logger.info('creating the source record');
const source = rid(await conn.addSource(SOURCE_DEFN));
Expand All @@ -496,6 +498,10 @@ const uploadFile = async ({ conn, filename, errorLogPrefix }) => {
logger.info(`loading ${rows.length} rows`);

for (let index = 0; index < rows.length; index++) {
if (maxRecords && index > maxRecords) {
logger.warn(`not loading all content due to max records limit (${maxRecords})`);
break;
}
const rawRow = rows[index];
const sourceId = hashRecordToId(rawRow);
logger.info(`processing: ${sourceId} (${index} / ${rows.length})`);
Expand Down
19 changes: 2 additions & 17 deletions src/chembl/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,11 @@ const {
} = require('../graphkb');
const { logger } = require('../logging');
const { chembl: SOURCE_DEFN } = require('../sources');
const spec = require('./spec.json');

const ajv = new Ajv();

const recordSpec = ajv.compile({
properties: {
molecule_chembl_id: { pattern: '^CHEMBL\\d+$', type: 'string' },
molecule_properties: {
oneOf: [{
properties: {
full_molformula: { type: 'string' },
},
type: 'object',
}, { type: 'null' }],
},
pref_name: { type: ['string', 'null'] },
usan_stem_definition: { type: ['string', 'null'] },
},
required: ['molecule_chembl_id'],
type: 'object',
});
const recordSpec = ajv.compile(spec);


const API = 'https://www.ebi.ac.uk/chembl/api/data/molecule';
Expand Down
39 changes: 39 additions & 0 deletions src/chembl/spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"properties": {
"molecule_chembl_id": {
"pattern": "^CHEMBL\\d+$",
"type": "string"
},
"molecule_properties": {
"oneOf": [
{
"properties": {
"full_molformula": {
"type": "string"
}
},
"type": "object"
},
{
"type": "null"
}
]
},
"pref_name": {
"type": [
"string",
"null"
]
},
"usan_stem_definition": {
"type": [
"string",
"null"
]
}
},
"required": [
"molecule_chembl_id"
],
"type": "object"
}
Loading

0 comments on commit 38bf3ef

Please sign in to comment.