Skip to content

Commit

Permalink
Updated download/install helper scripts.
Browse files Browse the repository at this point in the history
  • Loading branch information
Johannes Linder committed Oct 1, 2024
1 parent 64b2418 commit a99272a
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 1 deletion.
97 changes: 97 additions & 0 deletions data/training/download_dependencies.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/bin/bash

# create additional folder in borzoi data folders
mkdir -p "$BORZOI_HG38/assembly/ucsc"
mkdir -p "$BORZOI_HG38/assembly/gnomad"
mkdir -p "$BORZOI_HG38/mappability"
mkdir -p "$BORZOI_HG38/blacklist"
mkdir -p "$BORZOI_HG38/align"

mkdir -p "$BORZOI_MM10/assembly/ucsc"
mkdir -p "$BORZOI_MM10/mappability"
mkdir -p "$BORZOI_MM10/blacklist"


# download and uncompress auxiliary files required for Makefile (hg38)
if [ -f "$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed" ]; then
echo "hg38_gaps.bed already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38_gaps.bed.gz | gunzip -c > "$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed"
fi

if [ -f "$BORZOI_HG38/mappability/umap_k36_t10_l32.bed" ]; then
echo "umap_k36_t10_l32.bed (hg38) already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_k36_t10_l32_hg38.bed.gz | gunzip -c > "$BORZOI_HG38/mappability/umap_k36_t10_l32.bed"
fi

if [ -f "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" ]; then
echo "blacklist_hg38_all.bed already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/blacklist_hg38_all.bed.gz | gunzip -c > "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed"
fi

if [ -f "$BORZOI_HG38/align/hg38.mm10.syn.net.gz" ]; then
echo "Splice site annotation already exist."
else
wget https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38.mm10.syn.net.gz -O "$BORZOI_HG38/align/hg38.mm10.syn.net.gz"
fi


# download and uncompress auxiliary files required for Makefile (mm10)
if [ -f "$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed" ]; then
echo "mm10_gaps.bed already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/mm10_gaps.bed.gz | gunzip -c > "$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed"
fi

if [ -f "$BORZOI_MM10/mappability/umap_k36_t10_l32.bed" ]; then
echo "umap_k36_t10_l32.bed (mm10) already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_k36_t10_l32_mm10.bed.gz | gunzip -c > "$BORZOI_MM10/mappability/umap_k36_t10_l32.bed"
fi

if [ -f "$BORZOI_MM10/blacklist/blacklist_mm10_all.bed" ]; then
echo "blacklist_mm10_all.bed already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/blacklist_mm10_all.bed.gz | gunzip -c > "$BORZOI_MM10/blacklist/blacklist_mm10_all.bed"
fi


# download and uncompress pre-compiled umap bed files
if [ -f "$BORZOI_DIR/examples/umap_human.bed" ]; then
echo "umap_human.bed already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_human.bed.gz | gunzip -c > "$BORZOI_DIR/examples/umap_human.bed"
fi

if [ -f "$BORZOI_DIR/examples/umap_mouse.bed" ]; then
echo "umap_mouse.bed already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_mouse.bed.gz | gunzip -c > "$BORZOI_DIR/examples/umap_mouse.bed"
fi


# download and index hg38 ml genome
if [ -f "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" ]; then
echo "hg38.ml.fa already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38.ml.fa.gz | gunzip -c > "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa"
idx_genome.py "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa"
fi

# download and index hg38 ml genome (gnomad major alleles)
if [ -f "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" ]; then
echo "hg38.ml.fa (gnomad) already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38_gnomad.ml.fa.gz | gunzip -c > "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa"
idx_genome.py "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa"
fi

# download and index mm10 ml genome
if [ -f "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" ]; then
echo "mm10.ml.fa already exists."
else
wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/mm10.ml.fa.gz | gunzip -c > "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa"
idx_genome.py "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa"
fi
2 changes: 1 addition & 1 deletion model/train.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/sh

westminster_train_folds.py -e borzoi_py310 --f_list 3 -c 4 --identical_crosses -q rtx4090 --rc -o saved_models params.json data/hg38 data/mm10
westminster_train_folds.py -e borzoi_py310 --f_list 3 -c 4 --identical_crosses -q rtx4090 -o saved_models params.json data/hg38 data/mm10

0 comments on commit a99272a

Please sign in to comment.