Skip to content

Commit

Permalink
Merge branch 'master' into vcep_trackhub
Browse files Browse the repository at this point in the history
  • Loading branch information
melissacline authored Aug 31, 2024
2 parents 36497d8 + 0cffa22 commit f9e9712
Show file tree
Hide file tree
Showing 32 changed files with 981 additions and 93 deletions.
5 changes: 3 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
version: 2
jobs:
buildweb:
circleci_ip_ranges: true
docker:
- image: circleci/node:16
- image: circleci/postgres:9.6.2
Expand Down Expand Up @@ -38,6 +39,7 @@ jobs:
- store_test_results:
path: ~/test_reports
deploy-dev:
circleci_ip_ranges: true
docker:
- image: circleci/node:16
steps:
Expand All @@ -60,6 +62,7 @@ jobs:
name: deploying to dev machine
command: ~/project/deployment/deploy-dev
deploy-beta:
circleci_ip_ranges: true
docker:
- image: circleci/node:16
steps:
Expand Down Expand Up @@ -94,8 +97,6 @@ jobs:
command: |
apk add --no-cache \
py-pip=9.0.0-r1
pip install \
docker-compose==1.12.0
- restore_cache:
keys:
- v1-{{ .Branch }}
Expand Down
5 changes: 3 additions & 2 deletions deployment/deploy-dev
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ cd ${WEBSITE}
npm run build:prod

# deploy (not preserving owner/group)
rsync -rlptD --delete --rsync-path='rsync' build/ ${USER}@${HOST}:/var/www/html/beta
rsync -rlptD --delete --exclude="/uploads" --exclude="/downloads/*" --rsync-path='rsync' django/ ${USER}@${HOST}:/var/www/backend/beta/django
# Force rsync's ssh to use ipv4 to prevent "Cannot assign requested address" error
rsync -rlptD -e 'ssh -4' --delete --rsync-path='rsync' build/ ${USER}@${HOST}:/var/www/html/beta
rsync -rlptD -e 'ssh -4' --delete --exclude="/uploads" --exclude="/downloads/*" --rsync-path='rsync' django/ ${USER}@${HOST}:/var/www/backend/beta/django

requirements=$(cat requirements.txt)
requirements=$(echo ${requirements}) # drop carriage returns
Expand Down
2 changes: 1 addition & 1 deletion deployment/site_settings/config.beta.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
baseurl: '/',
captcha_key: '', /* reCAPTCHA API key */
maps_key: '', /* Google maps javascript API key */
backend_url: 'http://brcaexchange-prod.gi.ucsc.edu/backend',
backend_url: 'https://brcaexchange-prod.gi.ucsc.edu/backend',
analytics: null,
environment: 'beta'
};
Expand Down
5 changes: 2 additions & 3 deletions pipeline/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ endif

COMMON_DOCKER_ARGS = --rm -u `id -u ${USER}`:$(DOCKER_GRP) \
-e "DATA_DATE=$(DATA_DATE)" \
-e "UTA_DB_URL=postgresql://[email protected]:$(UTA_PORT)/uta/uta_$(UTA_RELEASE_DATE)" \
-e "HGVS_SEQREPO_DIR=$(SEQ_REPO_DIR_DOCKER)/latest" \
-e "PYTHONPATH=/opt/brca-exchange/pipeline" \
--network host \
Expand Down Expand Up @@ -142,7 +141,7 @@ test-coverage: ## Running pipeline unit tests with coverage information
docker run $(COMMON_DOCKER_ARGS) $(PIPELINE_IMAGE) bash -c 'cd /opt/brca-exchange/pipeline/data && bash getdata && cd /opt/brca-exchange/pipeline && pytest --cov --ignore=splicing/ && coverage html --include="/opt/brca-exchange/pipeline/*" --omit="*/test_*"'


build-release: start-local-uta checkout build-docker setup-files setup-lovd download-resources download-seqrepo start-seqrepo-rest-service run-pipeline variants-by-source ## create new data release
build-release: checkout build-docker setup-files setup-lovd download-resources download-seqrepo start-seqrepo-rest-service run-pipeline variants-by-source ## create new data release

variants-by-source: ## postprocessing: compute statistics for changes with respect to the last release
docker run $(COMMON_DOCKER_ARGS) $(PIPELINE_IMAGE) python /opt/brca-exchange/pipeline/utilities/variantsBySource.py -i /files/data/output/release/built_with_change_types.tsv -c true
Expand Down Expand Up @@ -179,7 +178,7 @@ post-release-cmds: cleanup-failed include-release-notes push-docker tag-release
setup-data-from-latest-release-tar: setup-files ## sets up brca output dir with data contained in release archive from last release (only data from variant merging onwards)
tar -C $(OUT_DIR) -zxf $(PREVIOUS_RELEASE_PATH)

setup-dev-env: start-local-uta build-docker setup-files download-resources ## setup development environment
setup-dev-env: build-docker setup-files download-resources ## setup development environment

clean-pyc: ## remove Python file artifacts
find . -name '*.pyc' -exec rm -f {} +
Expand Down
204 changes: 204 additions & 0 deletions pipeline/analysis/add_bioinfo_pred.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#!/usr/bin/env python

import argparse
import csv
import re
import sys

BIOINFO_CODE_ID = "Provisional_Evidence_Code_Bioinfo"
BIOINFO_CODE_DESCR = "Provisional_Evidence_Description_Bioinfo"


NO_CODE = "NO_CODE"
PP3 = "PP3"
BP4_BP7 = "BP4,BP7"
BP4 = "BP4"
BP1_STRONG = "BP1_STRONG"
PVS1_CODE = "PVS1"

def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input", default="build_final.tsv",
help="built_final")
parser.add_argument("-o", "--output", default="built_with_bioinfo.tsv",
help="version of input file with new columns added")
parser.add_argument("-d", "--debug", action="store_true", default=False,
help="Print debugging info")
args = parser.parse_args()
return(args)


def initialize_output_file(input_file, output_filename):
"""
Create an empty output file with the new columns
"""
new_columns = [BIOINFO_CODE_ID, BIOINFO_CODE_DESCR]
input_header_row = input_file.fieldnames
if "change_type" in input_header_row:
idx = input_header_row.index("change_type")
output_header_row = input_header_row[:idx] + new_columns \
+ input_header_row[idx:]
else:
output_header_row = input_header_row + new_columns
output_file = csv.DictWriter(open(output_filename,"w"),
fieldnames=output_header_row,
delimiter = '\t')
output_file.writeheader()
return(output_file)


def extract_protein_coordinate(variant):
coordinate = None
hit = re.search("[0-9]+", variant["Protein_Change"])
if hit:
token = variant["Protein_Change"][hit.start():hit.end()]
pos = int(token)
print("from", variant["Protein_Change"], "derived", pos)
return(pos)

def inside_functional_domain(variant):
inside_domain = False
pos = extract_protein_coordinate(variant)
if pos:
if variant["Gene_Symbol"] == "BRCA1":
if pos >= 2 and pos <= 99:
inside_domain = True
elif pos >= 503 and pos <= 508:
inside_domain = True
elif pos >= 607 and pos <= 614:
inside_domain = True
elif pos >= 651 and pos <= 656:
inside_domain = True
elif pos >= 1391 and pos <= 1424:
inside_domain = True
elif pos >= 1650 and pos <= 1863:
inside_domain = True
elif variant["Gene_Symbol"] == "BRCA2":
if pos >= 10 and pos <= 40:
inside_domain = True
elif pos >= 1002 and pos <= 1036:
inside_domain = True
elif pos >= 1212 and pos <= 1246:
inside_domain = True
elif pos >= 1422 and pos <= 1453:
inside_domain = True
elif pos >= 1518 and pos <= 1549:
inside_domain = True
elif pos >= 1665 and pos <= 1696:
inside_domain = True
elif pos >= 1837 and pos <= 1871:
inside_domain = True
elif pos >= 1971 and pos <= 2005:
inside_domain = True
elif pos >= 2051 and pos <= 2085:
inside_domain = True
elif pos >= 2481 and pos <= 3186:
inside_domain = True
elif pos >= 3263 and pos <= 3269:
inside_domain = True
elif pos >= 3265 and pos <= 3330:
inside_domain = True
elif pos >= 3381 and pos <= 3385:
inside_domain = True
return(inside_domain)



def estimate_bioinfo_code(variant):
effect = "unknown"
bioinfo_code = NO_CODE
if re.search("=\)$", variant["pyhgvs_Protein"]):
effect = "synonymous_variant"
elif re.search("[A-Z]+[0-9]+[A-Z]+", variant["Protein_Change"]):
effect = "missense_variant"
elif re.search("c\.[0-9]+[+]", variant["pyhgvs_cDNA"]):
effect = "intron_variant"
elif re.search("c\.[0-9]+[-]", variant["pyhgvs_cDNA"]):
effect = "intron_variant"
print("variant", variant["pyhgvs_cDNA"], "protein change", variant["Protein_Change"], variant["pyhgvs_Protein"], "effect", effect)
if variant["result_spliceai"] == "-":
splicing_effect = False
no_splicing_effect = True
else:
splicing_effect = (float(variant["result_spliceai"]) > 0.2)
no_splicing_effect = (float(variant["result_spliceai"]) < 0.1)
if variant["Gene_Symbol"] == "BRCA1":
if variant["BayesDel_nsfp33a_noAF"] == "-":
protein_effect = False
no_protein_effect = True
elif float(variant["BayesDel_nsfp33a_noAF"]) > 0.28:
protein_effect = True
no_prptein_effect = False
elif float(variant["BayesDel_nsfp33a_noAF"]) < 0.15:
protein_effect = False
no_protein_effect = True
else:
protein_effect = False
no_protein_effect = False
if variant["Gene_Symbol"] == "BRCA2":
if variant["BayesDel_nsfp33a_noAF"] == "-":
protein_effect = False
no_protein_effect = True
elif float(variant["BayesDel_nsfp33a_noAF"]) > 0.30:
protein_effect = True
no_prptein_effect = False
elif float(variant["BayesDel_nsfp33a_noAF"]) < 0.18:
protein_effect = False
no_protein_effect = True
else:
protein_effect = False
no_protein_effect = False
inside_domain = inside_functional_domain(variant)
print("effect", effect, "splicing effect", splicing_effect, "inside domain", inside_domain)
if effect == "synonymous_variant":
if splicing_effect:
bioinfo_code = PP3
elif inside_domain:
bioinfo_code = BP4_BP7
else:
bioinfo_code = BP1_STRONG
elif effect == "intron_variant":
if splicing_effect:
bioinfo_code = PP3
else:
bioinfo_code = BP4
elif effect == "missense_variant":
if splicing_effect:
bioinfo_code = PP3
elif no_splicing_effect:
if not inside_domain:
bioinfo_code = BP1_STRONG
elif protein_effect:
bioinfo_code = PP3
elif no_protein_effect:
bioinfo_code = BP4
else:
if inside_domain and protein_effect:
bioinfo_code = PP3
return(bioinfo_code)


def apply_pvs1_code(variant):
pvs1_code = NO_CODE
protein_hgvs = variant["pyhgvs_Protein"]
stop_added = re.search("Ter", protein_hgvs)
if stop_added:
pvs1_code = PVS1_CODE
return(pvs1_code)


def main():
csv.field_size_limit(sys.maxsize)
args = parse_args()
with open(args.input, 'r') as input_fp:
input_reader = csv.DictReader(input_fp, delimiter = "\t")
writer = initialize_output_file(input_reader, args.output)
for variant in input_reader:
#variant[BIOINFO_CODE_ID] = estimate_bioinfo_code(variant, debug=args.debug)
#pvs1_code = apply_pvs1_code(variant)
variant[BIOINFO_CODE_ID] = ""
variant[BIOINFO_CODE_DESCR] = ""
writer.writerow(variant)

if __name__ == "__main__":
main()
Loading

0 comments on commit f9e9712

Please sign in to comment.