Skip to content

Commit

Permalink
Merge pull request #72 from PNNL-CompBio/expressionAdd
Browse files Browse the repository at this point in the history
added expression code and docker
  • Loading branch information
sgosline authored May 19, 2023
2 parents 08743c9 + 22e7aba commit 418ad83
Show file tree
Hide file tree
Showing 12 changed files with 2,904 additions and 100 deletions.
130 changes: 66 additions & 64 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
@@ -1,64 +1,66 @@
name: Docker-build
defaults:
run:
working-directory: ./
on:
push:
branches:
- "*"

jobs:
build-and-push:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
- name: Build and push bmd
uses: docker/build-push-action@v2
with:
context: zfBmd
push: true
tags: sgosline/srp-zfbmd:latest
- name: Build and push bmd2Samps
uses: docker/build-push-action@v2
with:
context: bmd2Samps
push: true
tags: sgosline/srp-bmd2samps:v1
- name: Build and push version 2 bmd2Samps
uses: docker/build-push-action@v2
with:
context: bmd2Samps_v2
push: true
tags: sgosline/srp-bmd2samps:latest
- name: Build and push exposome
uses: docker/build-push-action@v2
with:
context: exposome
push: true
tags: sgosline/srp-exposome:latest
- name: Build and push dbSchema
uses: docker/build-push-action@v2
with:
context: dbSchema
push: true
tags: sgosline/srp-dbschema
#- name: docker pull
# run: "docker pull sgosline/srp-analytics"
#- name: test LPR
# run: "docker run -v $PWD:/tmp sgosline/srp-analytics --test-lpr"
#- name: test morpho
# run: "docker run -v $PWD:/tmp sgosline/srp-analytics --test-morpho"
#- name: build db
# run: "docker run -v $PWD:/tmp sgosline/srp-analytics"
#- name: validate schemas
# run: "docker run -v $PWD:/tmp sgosline/srp-analytics --validate"
name: Docker-build
defaults:
run:
working-directory: ./
on:
push:
branches:
- "*"

jobs:
build-and-push:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
- name: Build and push bmd
uses: docker/build-push-action@v3
with:
context: zfBmd
push: true
tags: sgosline/srp-zfbmd:latest
platforms: linux/amd64,linux/arm64
- name: Build and push bmd2Samps
uses: docker/build-push-action@v3
with:
context: bmd2Samps
push: true
tags: sgosline/srp-bmd2samps:v1
platforms: linux/amd64,linux/arm64
- name: Build and push version 2 bmd2Samps
uses: docker/build-push-action@v3
with:
context: bmd2Samps_v2
push: true
tags: sgosline/srp-bmd2samps:latest
platforms: linux/amd64,linux/arm64
- name: Build and push exposome
uses: docker/build-push-action@v3
with:
context: exposome
push: true
tags: sgosline/srp-exposome:latest
platforms: linux/amd64,linux/arm64
# - name: Build and push dbSchema
# uses: docker/build-push-action@v3
# with:
# context: dbSchema
# push: true
# tags: sgosline/srp-dbschema
# platforms: linux/amd64,linux/arm64
- name: Build and push expression
uses: docker/build-push-action@v3
with:
context: zfExp
push: true
tags: sgosline/srp-zfexp
platforms: linux/amd64,linux/arm64
2 changes: 1 addition & 1 deletion bmd2Samps/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rocker/tidyverse
FROM bioconductor/bioconductor_docker
# To setup other dependencies

#COPY setup.sh /setup.sh
Expand Down
2 changes: 1 addition & 1 deletion bmd2Samps_v2/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rocker/tidyverse
FROM bioconductor/bioconductor_docker
# To setup other dependencies

#COPY setup.sh /setup.sh
Expand Down
31 changes: 0 additions & 31 deletions bmd2Samps_v3/parseGexData.R

This file was deleted.

160 changes: 160 additions & 0 deletions buildV3db.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#!/bin/bash
# V3 of the database includes the zebrafish data

##make a folder if one does not exist
if test -f temp; then
echo "temp directory exists"
else
mkdir temp
fi

##Catcmd adds two files together
catcmd() {
cat $1 >> $2
}

#################################################### phase I,II data
##here we have the data from doo nam/lisa, we need to copy it into the temp directory
#phase12 data
p12_morph='raw_files/zf_morphology_data_335_chemicals_2020DEC16_fixed.csv'
p12_lpr_1='344_zf_LPR_data_phase_1_2_2020JUNE25_updated_plate_id_for_TX_tall_fixed_merged.csv'
#this second file was just a duplicate with data from chemicals that only had 15 time points or so
#p12_lpr_2='344_zf_LPR_data_phase_1_2_2020JUNE25_updated_plate_id_for_TX_tall_fixed_merged_full_240_timepoints.csv'
orig_lpr='raw_files/p12_lpr.tar.gz'


cmd='tar -xvzf '$orig_lpr
echo $cmd
$cmd

cp $p12_morph temp/morph0.csv
cp $p12_lpr_1 temp/lpr0_1.csv
#cp $p12_lpr_2 temp/lpr0_2.csv

#################################################### phase III data
##phase 3 data is on dropbox, we need to pull it to temp directory
#https://www.dropbox.com/sh/zg0q6wl13a3uo99/AAA0cdAK_fJwkJqpvF_HH6DWa?dl=0/
p3_morph='wget https://www.dropbox.com/sh/zg0q6wl13a3uo99/AACFZprOKkbvydjfoDI3oZo-a/Tanguay%20Phase%203%20zf%20morphology%20data%20PNNL%202021MAR23.csv -O temp/morph1.csv'
p3_lpr='wget https://www.dropbox.com/sh/zg0q6wl13a3uo99/AADd1QBStMguW9qYgzH2eatJa/Tanguay%20Phase%203%20zf%20LPR%20data%20PNNL%202021MAR23.csv -O temp/lpr1.csv'

echo $p3_morph
$p3_morph
echo $p3_lpr
$p3_lpr

#################################################### PFAS data
##PFAS data is also on dropbox, we need to pull it
##downloading files from Lisa's dropbox PFAS data
#epr_cmd="wget https://www.dropbox.com/sh/69ootcq7yyvvx2h/AABgnmHtboM4LevxK1yxPIK-a/zf%20EPA%20PFAS%20EPR_PNNL_05-28-2021.csv" -O temp/epr.csv
#lpr_cmd='wget https://www.dropbox.com/sh/69ootcq7yyvvx2h/AABgzjaRPteU1EZIhnW9zv2Ka/zf%20EPA%20PFAS%20LPR_PNNL_05-28-2021.csv -O temp/lpr2.csv'
#mor_cmd='wget https://www.dropbox.com/sh/69ootcq7yyvvx2h/AABxsOLgwlv7-_HTZ0xaAIlNa/zf%20EPA%20PFAS%20morphology_PNNL_05-28-2021.csv -O temp/morph2.csv'

lpr_cmd="wget https://www.dropbox.com/s/ha7jduok03j82mf/zf%20EPA%20PFAS%20LPR_PNNL_05-28-2021.csv?dl=0 -O temp/lpr2.csv"
echo $lpr_cmd
$lpr_cmd

mor_cmd="wget https://www.dropbox.com/s/jma3b9al3u8hcny/zf%20EPA%20PFAS%20morphology_PNNL_05-28-2021.csv?dl=0 -O temp/morph2.csv"
echo $mor_cmd
$mor_cmd

################################################## run pipeline
#we have 3 pairs of files to run

dpath='/tmp/' ##path to files in docker images

all_lpr=$dpath"temp/lpr0_1.csv "$dpath"temp/lpr1.csv "$dpath"temp/lpr2.csv"
all_morph=$dpath"temp/morph0.csv "$dpath"temp/morph1.csv "$dpath"temp/morph2.csv"

##first we run validation on each
#docker pull sgosline/srp-schemadb
#docker run -v $PWD:/tmp sgosline/schemadb $all_lpr

##then we get the gene data
gpull="docker pull sgosline/srp-exposome"
echo $gpull
$gpull

grun="docker run -v "$PWD":/tmp sgosline/srp-exposome"
echo $grun
$grun


##get the zf expression data
gpull='docker pull sogsline/srp-zfexp'
echo $gpull
$gpull

grun='docker run -v '$PWD':/tmp sgosline/srp-zfexp'
echo $grun
$grun

##then we have to concatenate the two together
catg='catcmd srpDEGstats.csv sigGeneStats.csv'
echo $catg
$catg

##then we run morph
dpull="docker pull sgosline/srp-zfbmd"
echo $dpull
$dpull

drun="docker run -v "$PWD":/tmp sgosline/srp-zfbmd --output=/tmp --morpho "$all_morph
echo $drun
$drun

##now rename these files
cpcmdb='mv new_bmds.csv new_bmds1.csv'
echo $cpcmdb
$cpcmdb

cpcmdf='mv new_fits.csv new_fits1.csv'
echo $cpcmdf
$cpcmdf

cpcmdd='mv new_dose.csv new_dose1.csv'
echo $cpcmdd
$cpcmdd

##then we concatentate them and run lpr

drun="docker run -v "$PWD":/tmp sgosline/srp-zfbmd --output /tmp --morpho "$all_morph" --LPR "$all_lpr
echo $drun
$drun

echo "BMDs\n"
cc='wc -l new_bmds.csv'
$cc

echo "Dose response\n"
wc -l new_dose.csv
echo "New Fits"
wc -l new_fits.csv



catcmdf='catcmd new_fits1.csv new_fits.csv'
echo $catcmdf
$catcmdf

catcmdd='catcmd new_dose1.csv new_dose.csv'
echo $catcmdd
$catcmdd

catcmdb='catcmd new_bmds1.csv new_bmds.csv'
echo $catcmdb
$catcmdb

##then we use output to build database
dpull="docker pull sgosline/bmd2samps:latest" # -t sgosline/srp-bmd2samps:latest"
echo $dpull
$dpull

##now build the database files
drun="docker run -v"$PWD":/tmp sgosline/srp-bmd2samps:latest --chemicals=/tmp/new_bmds.csv,/tmp/new_fits.csv,/tmp/new_dose.csv"
echo $drun
$drun
##then validate again and add to db

trun='tar -cvzf srpCompendiumV2.tar.gz sigGeneStats.csv chemicals.csv samples.csv sampleToChemicals.csv zebrafishSampBMDs.csv zebrafishChemBMDs.csv zebrafishSampXYCoords.csv zebrafishChemXYCoords.csv zebrafishChemDoseResponse.csv zebrafishSampDoseResponse.csv'
echo $trun
$trun
2 changes: 1 addition & 1 deletion dbSchema/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.7
FROM python:3.9-alpine3.18
# To setup other dependencies
RUN apt-get update -qq && apt-get install -y net-tools \
curl \
Expand Down
2 changes: 1 addition & 1 deletion exposome/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rocker/tidyverse
FROM bioconductor/bioconductor_docker
# To setup other dependencies

#COPY setup.sh /setup.sh
Expand Down
11 changes: 10 additions & 1 deletion exposome/exposome_summary_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ stop_for_status(res)

projects <- fromJSON(fromJSON(rawToChar(res$content)))

##create a mapping to friendly names for projects
project_map<-data.frame(Project=c('ADIPO','HEPG2','MCF10A','TF-GATEs'),
friendlyName=c('Human adipocyte cell lines','Human Hepg2 cell lines','Human MCF10A cell lines','Human TF-GATEs'))

print(paste('We now have data from',length(projects),'projects'))

#portal_name= 'https://montilab.bu.edu/Xposome-API/portals'
Expand Down Expand Up @@ -134,7 +138,12 @@ map <-all.chems%>%
sg.stats <- sig.genes%>%
group_by(Project,cas_number,Conc,Link)%>%
summarize(nGenes=n_distinct(Gene))%>%
left_join(map)
left_join(map)|>
left_join(project_map)|>
ungroup()|>
dplyr::select(-Project)|>
dplyr::rename(Project=friendlyName)|>
dplyr::select(Project,cas_number,Conc,Link,nGenes,Chemical_ID)

write.table(sg.stats,file='/tmp/sigGeneStats.csv',sep=',',row.names=F)
##not using this for now:
Expand Down
14 changes: 14 additions & 0 deletions zfExp/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM bioconductor/bioconductor_docker
# To setup other dependencies

#COPY setup.sh /setup.sh
RUN Rscript -e "install.packages('readxl',dependencies=TRUE, repos='http://cran.rstudio.com')"
RUN Rscript -e "install.packages('dplyr',dependencies=TRUE, repos='http://cran.rstudio.com')"
RUN Rscript -e "install.packages('tidyr',dependencies=TRUE, repos='http://cran.rstudio.com')"

COPY . zfExp
COPY chemicals.csv zfExp
WORKDIR zfExp

ENTRYPOINT ["Rscript","parseGexData.R"]
VOLUME ["/tmp"]
File renamed without changes.
Loading

0 comments on commit 418ad83

Please sign in to comment.