Skip to content

Commit

Permalink
feat: iss#118
Browse files Browse the repository at this point in the history
  • Loading branch information
slsevilla committed Feb 14, 2024
1 parent 24e2c2c commit fc76592
Show file tree
Hide file tree
Showing 10 changed files with 204 additions and 160 deletions.
7 changes: 6 additions & 1 deletion workflow/rules/annotations.smk
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,10 @@ if config["run_contrasts"] == "Y":
params:
rscript_wrapper=join(SCRIPTSDIR,"_go_enrichment_wrapper.R"),
rmd=join(SCRIPTSDIR,"_go_enrichment.Rmd"),
carlisle_functions=join(SCRIPTSDIR,"_carlisle_functions.R"),
Rlib_dir=config["Rlib_dir"],
Rpkg_config=config["Rpkg_config"],
rscript_diff=join(SCRIPTSDIR,"_diff_markdown_wrapper.R"),
rscript_functions=join(SCRIPTSDIR,"_carlisle_functions.R"),
output_dir = join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment"),
species = config["genome"],
Expand All @@ -323,7 +327,8 @@ if config["run_contrasts"] == "Y":
# rum script
Rscript {params.rscript_wrapper} \\
--rmd {params.rmd} \\
--sourcefile {params.rscript_functions} \\
--carlisle_functions {params.carlisle_functions} \\
--Rlib_dir {params.Rlib_dir} \\
--output_dir {params.output_dir} \\
--report {output.html} \\
--peak_list "$clean_sample_list" \\
Expand Down
11 changes: 10 additions & 1 deletion workflow/rules/diff.smk
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,11 @@ rule DESeq:
elbowlimits_frag=join(RESULTSDIR,"peaks","{qthresholds}","contrasts","{contrast_list}.{dupstatus}","{contrast_list}.{dupstatus}.{peak_caller_type}.fragmentsbased_diffanalysis_elbowlimits.yaml"),
pdf=join(RESULTSDIR,"peaks","{qthresholds}","contrasts","{contrast_list}.{dupstatus}","{contrast_list}.{dupstatus}.{peak_caller_type}.venn.pdf")
params:
rscript_diff=join(SCRIPTSDIR,"_diff_markdown_wrapper.R"),
rmd=join(SCRIPTSDIR,"_diff_markdown.Rmd"),
carlisle_functions=join(SCRIPTSDIR,"_carlisle_functions.R"),
Rlib_dir=config["Rlib_dir"],
Rpkg_config=config["Rpkg_config"],
rscript_diff=join(SCRIPTSDIR,"_diff_markdown_wrapper.R"),
rscript_venn=join(SCRIPTSDIR,"_plot_results_venn.R"),
contrast_list="{contrast_list}",
dupstatus = "{dupstatus}",
Expand Down Expand Up @@ -211,6 +214,9 @@ rule DESeq:
else
Rscript {params.rscript_diff} \\
--rmd {params.rmd} \\
--carlisle_functions {params.carlisle_functions} \\
--Rlib_dir {params.Rlib_dir} \\
--Rpkg_config {params.Rpkg_config} \\
--countsmatrix {input.cm_auc} \\
--sampleinfo {input.si} \\
--dupstatus {params.dupstatus} \\
Expand Down Expand Up @@ -242,6 +248,9 @@ rule DESeq:
# Do not use --rawcountsprescaled as these counts are not prescaled!
Rscript {params.rscript_diff} \\
--rmd {params.rmd} \\
--carlisle_functions {params.carlisle_functions} \\
--Rlib_dir {params.Rlib_dir} \\
--Rpkg_config {params.Rpkg_config} \\
--countsmatrix {input.cm_frag} \\
--sampleinfo {input.si} \\
--dupstatus {params.dupstatus} \\
Expand Down
10 changes: 8 additions & 2 deletions workflow/rules/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,14 @@ rule spikein_assessment:
params:
rscript_wrapper=join(SCRIPTSDIR,"_generate_spikein_wrapper.R"),
rmd=join(SCRIPTSDIR,"_generate_spikein_plots.Rmd"),
carlisle_functions=join(SCRIPTSDIR,"_carlisle_functions.R"),
Rlib_dir=config["Rlib_dir"],
Rpkg_config=config["Rpkg_config"],
rscript_diff=join(SCRIPTSDIR,"_diff_markdown_wrapper.R"),
rscript_functions=join(SCRIPTSDIR,"_carlisle_functions.R"),
spikein=config["spikein_genome"],
envmodules:
TOOLS['R'],
TOOLS["R"],
output:
html=join(RESULTSDIR,'qc',"spikein_qc_report.html"),
shell:
Expand All @@ -112,7 +116,9 @@ rule spikein_assessment:
# rum script
Rscript {params.rscript_wrapper} \\
--rmd {params.rmd} \\
--sourcefile {params.rscript_functions} \\
--carlisle_functions {params.carlisle_functions} \\
--Rlib_dir {params.Rlib_dir} \\
--Rpkg_config {params.Rpkg_config} \\
--report {output.html} \\
--bam_list "$clean_sample_list" \\
--spikein_control $species_name
Expand Down
24 changes: 24 additions & 0 deletions workflow/scripts/_carlisle_functions.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
########################################################################
# LIBRARY
########################################################################
carlisle_handle_packages<-function(pkg_df){
for (rowid in rownames(pkg_df)){
pkg=pkg_df[rowid,"package"]
source=pkg_df[rowid,"source"]
version=pkg_df[rowid,"version"]
gh_name=pkg_df[rowid,"gh_name"]

need_install <- pkg[!(pkg %in% installed.packages()[,"Package"])]
if (length(need_install)!=0){
print(paste0("Installing: ", pkg))
if (source=="bc") BiocManager::install(pkg,ask=FALSE,update=FALSE)
if (source=="cr") install.packages(pkg,version=version,repos = "http://cran.us.r-project.org",
local = FALSE,ask=FALSE,update=FALSE)
if (source=="gh") remotes::install_github(gh_name,version=version,local = FALSE,update=FALSE)
}

print(paste0("Loading: ",pkg))
invisible(lapply(pkg, library, character.only = TRUE))
}
}

########################################################################
# SPIKE IN PLOT
########################################################################
Expand Down
76 changes: 24 additions & 52 deletions workflow/scripts/_diff_markdown.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ title: "DifferentialCutAndRun"
output:
html_document:
params:
carlisle_functions: "/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/workflow/scripts/_carlisle_functions.R"
Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_carlisle/"
Rpkg_config: "/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/conf/Rpack.config"
rawcountsmatrix: "~/../../../Volumes/ccbr1155/CS030666/analysis/results/peaks/contrasts/siNC_H4K20me3_vs_siSmyd3_H4K20me3__dedup__narrowGo_peaks.bed/siNC_H4K20me3_vs_siSmyd3_H4K20me3__dedup__narrowGo_peaks.bed_countsmatrix.txt"
coldata: "~/../../../Volumes/ccbr1155/CS030666/analysis/results/peaks/contrasts/siNC_H4K20me3_vs_siSmyd3_H4K20me3__dedup__narrowGo_peaks.bed/siNC_H4K20me3_vs_siSmyd3_H4K20me3__dedup__narrowGo_peaks.bed_sampleinfo.txt"
dupstatus: "dedup" # dedup or no_dedup
Expand All @@ -26,64 +29,33 @@ editor_options:

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
# list packages
list.of.packages=c( "DT",
"RColorBrewer",
"dplyr",
"ggfortify",
"ggplot2",
"ggrepel",
"pander",
"plotly",
"reshape2",
"stringr",
"tidyverse",
"yaml")
list.of.bioconductor.packages=c("BiocManager",
"ChIPseeker",
"DESeq2",
"ELBOW",
"EnhancedVolcano",
"GenomicFeatures",
"HTSFilter",
"TxDb.Hsapiens.UCSC.hg19.knownGene",
"TxDb.Hsapiens.UCSC.hg38.knownGene",
"TxDb.Mmusculus.UCSC.mm10.knownGene",
"edgeR",
"org.Hs.eg.db",
"org.Mm.eg.db",
"rtracklayer")
## add extra packages for HS1
if (params$species=="hs1"){
print("going to install")
#https://bioconductor.org/packages/release/data/annotation/html/BSgenome.Hsapiens.NCBI.T2T.CHM13v2.0.html
list.of.bioconductor.packages=c(list.of.bioconductor.packages,"BSgenome.Hsapiens.NCBI.T2T.CHM13v2.0")
}
#install as needed
new.packages <- list.of.bioconductor.packages[!(list.of.bioconductor.packages %in% installed.packages()[,"Package"])]
# source needed functions
carlisle_functions=params$carlisle_functions
# set library dir, load this and remove any other dirs to avoid confusion
# between personally created pkgs and the pipeline package
## saving old path "/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library"
Rlib_dir=params$Rlib_dir
Rpkg_config=params$Rpkg_config
print(paste0("Using the lib.loc location: ",Rlib_dir))
assign(".lib.loc", Rlib_dir, envir = environment(.libPaths))
.libPaths()
# read in package info
pkg_df=read.csv(Rpkg_config)
pkg_df=subset(pkg_df,diff=="Y")
pkg_df
# for each package check installation, if present then load library
carlisle_handle_packages(pkg_df)
##handle ELBOW separately - it requires an older version of bioconductor / R
if("ELBOW" %in% new.packages){
install.packages("/data/CCBR_Pipeliner/Pipelines/CARLISLE_dev/resources/other/ELBOW_1.10.0.tar.gz",
install.packages(paste0(Rlib_dir,"ELBOW_1.10.0.tar.gz"),
repos = NULL, type="source", INSTALL_opts = '--no-lock')
new.packages=new.packages[names(new.packages) != "ELBOW"]
}
##handle all other pkgs
if(length(new.packages)) {
print(new.packages)
BiocManager::install(new.packages, INSTALL_opts = '--no-lock')
}
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) {
print(new.packages)
install.packages(new.packages, INSTALL_opts = '--no-lock')
}
# load packages
invisible(lapply(list.of.packages, library, character.only = TRUE))
invisible(lapply(list.of.bioconductor.packages, library, character.only = TRUE))
```

## Loading SampleInfo and Counts
Expand Down
88 changes: 52 additions & 36 deletions workflow/scripts/_diff_markdown_wrapper.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,47 +10,56 @@ path="/data/CCBR/projects/ccbr1155/CS030586_CARAP/diff"
# create parser object
parser <- ArgumentParser()
parser$add_argument("--rmd", type="character", required=TRUE,
help="path to rmd")
help="path to rmd")
parser$add_argument("--carlisle_functions", type="character", required=TRUE,
help="path to carlisle functions file")
parser$add_argument("--Rlib_dir", type="character", required=TRUE,
help="path to R lib directory")
parser$add_argument("--Rpkg_config", type="character", required=TRUE,
help="path to package config")
parser$add_argument("--spiked", type="character", required=TRUE,
help="type of normalization used")
help="type of normalization used")
parser$add_argument("--rawcountsprescaled", action='store_true',
help="if counts are scaled by spike-in already ... Y (for AUC-based method) or N (for fragments-based method)")
help="if counts are scaled by spike-in already ... Y (for AUC-based method) or N (for fragments-based method)")
parser$add_argument("--scalesfbymean", action='store_true',
help="DESeq2 scaling factors are around 1. To ensure that spike-in scaling factors are also around 1 divide each scaling factor by mean of all scaling factors.")
help="DESeq2 scaling factors are around 1. To ensure that spike-in scaling factors are also around 1 divide each scaling factor by mean of all scaling factors.")
parser$add_argument("--htsfilter", action='store_true',
help="Use HTSFilter")
help="Use HTSFilter")
parser$add_argument("--contrast_data", type="character", required=FALSE, default=NULL,
help="contrast_data inputs file")
help="contrast_data inputs file")
parser$add_argument("--countsmatrix", type="character", required=TRUE,
help="countsmatrix as TSV")
help="countsmatrix as TSV")
parser$add_argument("--sampleinfo", type="character", required=TRUE,
help="sample info as TSV")
help="sample info as TSV")
parser$add_argument("--dupstatus", type="character", required=TRUE,
help="either dedup or no_dedup")
help="either dedup or no_dedup")
parser$add_argument("--fdr_cutoff", type="double", default=0.05, required=FALSE,
help="FDR cutoff [default %(default)s]")
help="FDR cutoff [default %(default)s]")
parser$add_argument("--log2fc_cutoff", type="double", default=0.59, required=FALSE,
help="log2foldchange cutoff [default %(default)s]")
help="log2foldchange cutoff [default %(default)s]")
parser$add_argument("--condition1", type="character", required=TRUE,
help = "condition1")
help = "condition1")
parser$add_argument("--condition2", type="character", required=TRUE,
help = "condition2")
help = "condition2")
parser$add_argument("--results", type="character", required=TRUE,
help = "path to results TSV")
help = "path to results TSV")
parser$add_argument("--report", type="character", required=TRUE,
help = "HTML report")
help = "HTML report")
parser$add_argument("--elbowlimits", type="character", required=TRUE,
help = "YAML ELBOW limits")
help = "YAML ELBOW limits")
parser$add_argument("--tmpdir", type="character", required=FALSE, default="/tmp",
help = "tmpdir")
help = "tmpdir")
parser$add_argument("--species", type="character", required=TRUE,
help = "species")
help = "species")
parser$add_argument("--gtf", type="character", required=FALSE,
help = "gtf path - needed for HS1")
help = "gtf path - needed for HS1")
args <- parser$parse_args()

gtf <- args$gtf
if (debug){
carlisle_functions="/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/workflow/scripts/_carlisle_functions.R"
Rlib_dir="/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_carlisle/"
Rpkg_config="/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/conf/Rpack.config"
rawcountsmatrix="~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/counts_matrix.txt"
coldata="~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/sample_info.txt"
dupstatus="dedup"
Expand All @@ -69,6 +78,9 @@ if (debug){
tmpdir="/dev/shm"
gtf="~/../../Volumes/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf"
} else {
carlisle_functions=args$carlisle_functions,
Rlib_dir=args$Rlib_dir,
Rpkg_config=args$Rpkg_config,
rawcountsmatrix=args$countsmatrix
coldata=args$sampleinfo
dupstatus=args$dupstatus
Expand All @@ -90,23 +102,27 @@ if (debug){
gtf=args$gtf
}

parameters=list(rawcountsmatrix=rawcountsmatrix,
coldata=coldata,
spiked=spiked,
rawcountsprescaled=rawcountsprescaled,
scalesfbymean=scalesfbymean,
contrast_data=contrast_data,
dupstatus=dupstatus,
condition1=condition1,
condition2=condition2,
indexcols=indexcols,
htsfilter=htsfilter,
fdr_cutoff=fdr_cutoff,
log2fc_cutoff=log2fc_cutoff,
results=results,
elbowlimits=elbowlimits,
species=species,
gtf=gtf)
parameters=list(
carlisle_functions=carlisle_functions,
Rlib_dir=Rlib_dir,
Rpkg_config=Rpkg_config,
rawcountsmatrix=rawcountsmatrix,
coldata=coldata,
spiked=spiked,
rawcountsprescaled=rawcountsprescaled,
scalesfbymean=scalesfbymean,
contrast_data=contrast_data,
dupstatus=dupstatus,
condition1=condition1,
condition2=condition2,
indexcols=indexcols,
htsfilter=htsfilter,
fdr_cutoff=fdr_cutoff,
log2fc_cutoff=log2fc_cutoff,
results=results,
elbowlimits=elbowlimits,
species=species,
gtf=gtf)

rmarkdown::render(args$rmd,
params=parameters,
Expand Down
Loading

0 comments on commit fc76592

Please sign in to comment.