Skip to content

Commit

Permalink
Merge branch 'master' into cleanup_discard_list
Browse files Browse the repository at this point in the history
  • Loading branch information
melissacline authored Oct 24, 2024
2 parents bf4e91c + 7b009b3 commit 8fff8bd
Show file tree
Hide file tree
Showing 32 changed files with 717 additions and 229 deletions.
5 changes: 3 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
version: 2
jobs:
buildweb:
circleci_ip_ranges: true
docker:
- image: circleci/node:16
- image: circleci/postgres:9.6.2
Expand Down Expand Up @@ -38,6 +39,7 @@ jobs:
- store_test_results:
path: ~/test_reports
deploy-dev:
circleci_ip_ranges: true
docker:
- image: circleci/node:16
steps:
Expand All @@ -60,6 +62,7 @@ jobs:
name: deploying to dev machine
command: ~/project/deployment/deploy-dev
deploy-beta:
circleci_ip_ranges: true
docker:
- image: circleci/node:16
steps:
Expand Down Expand Up @@ -94,8 +97,6 @@ jobs:
command: |
apk add --no-cache \
py-pip=9.0.0-r1
pip install \
docker-compose==1.29.2
- restore_cache:
keys:
- v1-{{ .Branch }}
Expand Down
5 changes: 3 additions & 2 deletions deployment/deploy-dev
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ cd ${WEBSITE}
npm run build:prod

# deploy (not preserving owner/group)
rsync -rlptD --delete --rsync-path='rsync' build/ ${USER}@${HOST}:/var/www/html/beta
rsync -rlptD --delete --exclude="/uploads" --exclude="/downloads/*" --rsync-path='rsync' django/ ${USER}@${HOST}:/var/www/backend/beta/django
# Force rsync's ssh to use ipv4 to prevent "Cannot assign requested address" error
rsync -rlptD -e 'ssh -4' --delete --rsync-path='rsync' build/ ${USER}@${HOST}:/var/www/html/beta
rsync -rlptD -e 'ssh -4' --delete --exclude="/uploads" --exclude="/downloads/*" --rsync-path='rsync' django/ ${USER}@${HOST}:/var/www/backend/beta/django

requirements=$(cat requirements.txt)
requirements=$(echo ${requirements}) # drop carriage returns
Expand Down
2 changes: 1 addition & 1 deletion deployment/site_settings/config.beta.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
baseurl: '/',
captcha_key: '', /* reCAPTCHA API key */
maps_key: '', /* Google maps javascript API key */
backend_url: 'http://brcaexchange-prod.gi.ucsc.edu/backend',
backend_url: 'https://brcaexchange-prod.gi.ucsc.edu/backend',
analytics: null,
environment: 'beta'
};
Expand Down
2 changes: 1 addition & 1 deletion pipeline/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ COPY pipeline/requirements.txt .
COPY test-requirements.txt .

# pip 20.3+ uses strict dependency resolver that causes biocommons/bioutils and hgvs/ipython errors
RUN pip install pip==20.2
RUN pip install pip==24.0

# install numpy first to avoid issues with bio python and bx-python (see also https://github.com/LUMC/vep2lovd/issues/1)
RUN pip install $(grep numpy requirements.txt)
Expand Down
38 changes: 38 additions & 0 deletions pipeline/genomeBrowserTrack/bigBedFromBed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash
#
# Generates an output BigBed file from a single input BED file,
# correcting for errors. bedToBigBed errors on variants > 255 characters.
# This script removes those problem variants until bedToBigBed succeeds
#
set -o errexit

INPUT_BED=$1
AS=$2
CHROM_SIZES=$3
OUTPUT_BIGBED=$4

sort -k1,1 -k2,2 ${INPUT_BED} -o ${INPUT_BED}

set +e

# captures the first error message if present
ERROR=`bedToBigBed -type=bed9+ -as=${AS} -tab ${INPUT_BED} ${CHROM_SIZES} ${OUTPUT_BIGBED} 2>&1`

# parses line number of the first problem variant from the error message
ERRORLINE=`echo $ERROR | sed -n -e 's/^.*line //p' | sed 's/\s.*$//'`

until [ -z "$ERRORLINE" ]
do
# removes error variant
sed -i "${ERRORLINE}d" ${INPUT_BED}
echo $ERROR

# captures and parses the line number of the next problem variant
ERROR=`bedToBigBed -type=bed9+ -as=${AS} -tab ${INPUT_BED} ${CHROM_SIZES} ${OUTPUT_BIGBED} 2>&1`
ERRORLINE=`echo $ERROR | sed -n -e 's/^.*line //p' | sed 's/\s.*$//'`
echo $ERRORLINE
done

set -e

bedToBigBed -type=bed9+ -as=${AS} -tab ${INPUT_BED} ${CHROM_SIZES} ${OUTPUT_BIGBED}
99 changes: 99 additions & 0 deletions pipeline/genomeBrowserTrack/brcaPopfreqToBed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python
from collections import namedtuple, OrderedDict
import html
import genomeBrowserUtils




def _write_auto_sql_file(as_path):
with open(as_path, "w") as asFh:

sql = """table brcaExchanges
" These data are in BigBed bed9 format, and include selected fields from https://brcaexchange.org"
(
string chrom; "Chromosome (or contig, scaffold, etc.)"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Name of item"
uint score; "Score from 0-1000"
char[1] strand; "+ or -"
uint thickStart; "Start of where display should be thick (start codon)"
uint thickEnd; "End of where display should be thick (stop codon)"
uint reserved; "Used as itemRgb as of 2004-11-22"
string outlink; "Link to the variant in BRCA Exchange"
string symbol; "Gene Symbol"
string cdna_hgvs; "Variant ID in cDNA HGVS nomenclature"
string protein_hgvs; "Variant ID in protein HGVS nomenclature"
string CA_ID; "ClinGen Allele Registry ID"
string provisional_evidence_code; "Provisional ACMG code"
string provional_code_description; "Accompanying description"
string _mouseOver; "mouse over field hidden"
)
"""

asFh.write(sql)
print("wrote as file to {}".format(asFh.name))


def write_track_item(rec, start, end, output_fp):
chrom = "chr"+rec.Chr
score = 0
strand = "."
name = rec.pyhgvs_cDNA[0:254]
if name == "?":
assert(False)
thickStart = start
thickEnd = end
acmgCode = rec.Provisional_Evidence_Code_Popfreq
color = genomeBrowserUtils.acmgCodeToColor(acmgCode)
out_url = "https://brcaexchange.org/variant/" + rec.CA_ID
#
# When generating the mouseOver, truncate the strings to 50 characters each,
# to not overhwelm the browser's internal limit of 255 characters.
description = "Click on the track item for more details"
mouseOver = (("<b>Provisional ACMG Evidence Code:</b> %s<br>" + \
"<b>Details:</b> %s") \
% (acmgCode, description))[:245] + "<br>"
outRow = [chrom, start, end, name, score, strand, thickStart, thickEnd, color, out_url,
rec.Gene_Symbol,
genomeBrowserUtils.displayString(rec.pyhgvs_cDNA[0:254]),
genomeBrowserUtils.displayString(rec.pyhgvs_Protein[0:254]),
genomeBrowserUtils.displayString(rec.CA_ID),
acmgCode, description[:254],
mouseOver]
outRow = [str(x) for x in outRow]
output_fp.write("\t".join(outRow)+"\n")


def main():
args = genomeBrowserUtils._get_args()

with open(args.input, 'r') as ifh:
ofhg19v = open(args.output_hg19_var, 'w')
ofhg38v = open(args.output_hg38_var, 'w')
ofhg19sv = open(args.output_hg19_sv, 'w')
ofhg38sv = open(args.output_hg38_sv, 'w')
print("Reading %s..." % ifh.name)

headers = ifh.readline().rstrip("\n").rstrip("\r").strip().split("\t")
rowRec = namedtuple("rec", headers)

_write_auto_sql_file(args.auto_sql_file)

for line in ifh:
row = line.rstrip("\n").rstrip("\r").split("\t")
rec = rowRec(*row)
rd = OrderedDict(zip(headers, row)) # row as dict
if int(rec.Hg38_End) - int(rec.Hg38_Start) < args.length_threshold:
write_track_item(rec, str(int(rec.pyhgvs_Hg37_Start)-1), rec.pyhgvs_Hg37_End, ofhg19v)
write_track_item(rec, str(int(rec.Hg38_Start)-1), rec.Hg38_End, ofhg38v)
else:
write_track_item(rec, str(int(rec.pyhgvs_Hg37_Start)-1), rec.pyhgvs_Hg37_End, ofhg19sv)
write_track_item(rec, str(int(rec.Hg38_Start)-1), rec.Hg38_End, ofhg38sv)

print("wrote to %s and %s" % (ofhg19v.name, ofhg38v.name))


if __name__ == '__main__':
main()
145 changes: 49 additions & 96 deletions pipeline/genomeBrowserTrack/brcaToBed.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,6 @@
#!/usr/bin/env python
from collections import namedtuple, OrderedDict
import argparse


def _add_urls(s, url=None):
""" transform a list of URLs to hrefs """
lines = []
for part in s.split(","):
part = part.strip()
if part == "":
continue
if part.startswith("http"):
label = part.split("/")[-1]
if "=" in label:
label = label.split("=")[-1]
part = "<a href='%s'>%s</a>" % (part, label)
lines.append(part)
else:
if url == None:
lines.append(part)
else:
part = "<a href='%s%s'>%s</a>" % (url, part, part)
lines.append(part)

return ", ".join(lines)


def _get_parser():
parser = argparse.ArgumentParser()

parser.add_argument("-i", "--input", help="Path to built_with_change_types.tsv file",
default="output/release/built_with_change_types.tsv")

parser.add_argument("-o19", "--output-hg19", help="Output BED file with hg19",
default="brcaExchange.hg19.bed")
parser.add_argument("-o38", "--output-hg38", help="Output BED file with hg38",
default="brcaExchange.hg38.bed")

parser.add_argument("-a", "--auto-sql-file", help="Field definitions in AutoSQL format",
default="brcaExchange.as")
return parser
import genomeBrowserUtils


def _write_auto_sql_file(as_path):
Expand All @@ -60,6 +21,7 @@ def _write_auto_sql_file(as_path):
string outlink; "Link to the variant in BRCA Exchange"
string symbol; "Gene Symbol"
string cdna_hgvs; "Variant ID in cDNA HGVS nomenclature"
string protein_hgvs; "Variant ID in protein HGVS nomenclature"
string CA_ID; "ClinGen Allele Registry ID"
string Clinical_significance_ENIGMA; "Clinical Significance as curated by the ENIGMA VCEP"
string _mouseOver; "mouse over field hidden"
Expand All @@ -71,18 +33,52 @@ def _write_auto_sql_file(as_path):

print("wrote as file to {}".format(asFh.name))

def write_track_item(rec, start, end, output_fp):
chrom = "chr"+rec.Chr
score = 0
strand = "."
name = rec.pyhgvs_cDNA[0:254]
if name == "?":
assert(False)
thickStart = start
thickEnd = end
color = genomeBrowserUtils.pathogenicityToColor(rec.Clinical_significance_ENIGMA)
out_url = "https://brcaexchange.org/variant/" + rec.CA_ID
#
# When generating the mouseOver, truncate the cDNA and protein HGVS string to 50 characters each,
# to not overhwelm the browser's internal limit of 255 characters.
mouseOver = ("<b>Gene:</b> %s<br>" + \
"<b>HGVS cDNA:</b> %s<br>" + \
"<b>HGVS Protein:</b> %s<br>" + \
"<b>VCEP Curation:</b> %s<br>" + \
"<b>URL:</b> %s<br>") \
% (rec.Gene_Symbol, rec.pyhgvs_cDNA[0:25], rec.pyhgvs_Protein[0:25],
rec.Clinical_significance_ENIGMA,
out_url)
outRow = [chrom, start, end, name, score, strand, thickStart, thickEnd, color, out_url,
rec.Gene_Symbol,
genomeBrowserUtils.displayString(rec.pyhgvs_cDNA[0:254]),
genomeBrowserUtils.displayString(rec.pyhgvs_Protein[0:254]),
genomeBrowserUtils.displayString(rec.CA_ID),
rec.Clinical_significance_ENIGMA, mouseOver]
outRow = [str(x) for x in outRow]
output_fp.write("\t".join(outRow)+"\n")




def main():
parser = _get_parser()
args = genomeBrowserUtils._get_args()

args = parser.parse_args()

with open(args.input, 'r') as ifh, open(args.output_hg19, 'w') as ofh19, open(args.output_hg38, 'w') as ofh38:
with open(args.input, 'r') as ifh:
ofhg19v = open(args.output_hg19_var, 'w')
ofhg38v = open(args.output_hg38_var, 'w')
ofhg19sv = open(args.output_hg19_sv, 'w')
ofhg38sv = open(args.output_hg38_sv, 'w')
print("Reading %s..." % ifh.name)

headers = ifh.readline().rstrip("\n").rstrip("\r").strip().split("\t")
rowRec = namedtuple("rec", headers)
include_cols = ["Chr", "Pos", "pyhgvs_Hg37_Start", "pyhgvs_Hg37_End"]

_write_auto_sql_file(args.auto_sql_file)

Expand All @@ -91,58 +87,15 @@ def main():
rec = rowRec(*row)
rd = OrderedDict(zip(headers, row)) # row as dict

pat = rec.Clinical_significance_ENIGMA.lower()
if "pathogen" in pat:
color = "255,0,0"
elif "benign" in pat:
color = "0,255,0"
elif "uncertain" in pat:
color = "100,100,100"
if int(rec.Hg38_End) - int(rec.Hg38_Start) < args.length_threshold:
write_track_item(rec, str(int(rec.pyhgvs_Hg37_Start)-1), rec.pyhgvs_Hg37_End, ofhg19v)
write_track_item(rec, str(int(rec.Hg38_Start)-1), rec.Hg38_End, ofhg38v)
else:
color = "0,0,0"
out_url = "https://brcaexchange.org/variant/" + rec.CA_ID

chrom = "chr"+rec.Chr
score = 0
strand = "."
name = rec.pyhgvs_cDNA[0:254]
if name == "?":
assert(False)
#
# When generating the mouseOver, truncate the HGVS string to 100 characters, to not overhwelm
# the browser's internal limit of 255 characters.
mouseOver = ("<b>Variant ID:</b> %s %s<br>" + \
"<b>ENIGMA VCEP Clinical Significance:</b> %s<br>" + \
"<b>Variant URL:</b> %s<br>") \
% (rec.Gene_Symbol, rec.pyhgvs_cDNA[0:100], rec.Clinical_significance_ENIGMA,
out_url)

#Start with the hg19 version
start = str(int(rec.pyhgvs_Hg37_Start)-1)
end = rec.pyhgvs_Hg37_End
thickStart = start
thickEnd = end
outRow = [chrom, start, end, name, score, strand, thickStart, thickEnd, color, out_url,
rec.Gene_Symbol, rec.pyhgvs_cDNA[0:254], rec.CA_ID,
rec.Clinical_significance_ENIGMA, mouseOver]

outRow = [str(x) for x in outRow]
ofh19.write("\t".join(outRow)+"\n")

# Repeat with the hg38 version
ftLen = int(end)-int(start)
start = str(int(rec.Hg38_Start)-1)
end = str(int(start)+ftLen)
thickStart = start
thickEnd = end
outRow = [chrom, start, end, name, score, strand, thickStart, thickEnd, color, out_url,
rec.Gene_Symbol, rec.pyhgvs_cDNA[0:254], rec.CA_ID,
rec.Clinical_significance_ENIGMA, mouseOver]

outRow = [str(x) for x in outRow]
ofh38.write("\t".join(outRow)+"\n")

print("wrote to %s and %s" % (ofh19.name, ofh38.name))
write_track_item(rec, str(int(rec.pyhgvs_Hg37_Start)-1), rec.pyhgvs_Hg37_End, ofhg19sv)
write_track_item(rec, str(int(rec.Hg38_Start)-1), rec.Hg38_End, ofhg38sv)


print("wrote to %s, %s, %s and %s" % (ofhg19v.name, ofhg38v.name, ofhg19sv.name, ofhg38sv.name))


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 8fff8bd

Please sign in to comment.