From 78269119c724cbe1b5f9bdb56003df3a8fb2ceb8 Mon Sep 17 00:00:00 2001 From: Armin Toepfer Date: Wed, 19 Sep 2018 15:25:16 +0200 Subject: [PATCH] R Scripts --- README.md | 22 ++++------------- scripts/r/counts_lowplex.R | 49 -------------------------------------- scripts/r/report_detail.R | 22 ++++++++++++++++- scripts/r/report_summary.R | 2 +- 4 files changed, 26 insertions(+), 69 deletions(-) delete mode 100644 scripts/r/counts_lowplex.R diff --git a/README.md b/README.md index a27b3a1..e423cb1 100644 --- a/README.md +++ b/README.md @@ -555,9 +555,9 @@ as time to results is lower. Do not produce any reports. Useful if only the demultiplexed BAM is of interest. ## Quality Control -For quality control, we offer three R scripts to help you troubleshoot your -data. The first two are for low multiplex data. -The third is for high plex data, easily showing 384 barcodes. +For quality control, we offer two R scripts to help you troubleshoot your +data. The first is for low multiplex data. +The second is for high plex data, easily showing 384 barcodes. ### `report_detail.R` @@ -647,22 +647,8 @@ Number of adapters: -### `counts_detail.R` - -The second is for multiple `counts` files: `scripts/r/counts_detail.R`. -This script cannot be called from the CLI yet, to be implemented. - -#### ZMW Counts -Single plot: - - - -Grouped by barcode into facets: - - - ### `report_summary.R` -The third script is for high-plex data in one `lima.report` file: +The second script is for high-plex data in one `lima.report` file: `scripts/r/report_summary.R`. Example: diff --git a/scripts/r/counts_lowplex.R b/scripts/r/counts_lowplex.R deleted file mode 100644 index 3f62474..0000000 --- a/scripts/r/counts_lowplex.R +++ /dev/null @@ -1,49 +0,0 @@ -library(ggplot2) -library(Biostrings) -library(dplyr) -library(tidyr) -library(viridis) -library(data.table) - -plot.counts = function(counts, barcodePath, grouped = FALSE) { - ds = data.frame() - i = 0 - for (l in counts) { - d = as.data.frame(fread(l,stringsAsFactors=FALSE)) - d = cbind(d,l) - i = i + 1 - ds = rbind(ds,d) - } - colnames(ds) = c("IdxFirst", "IdxCombined", "Counts", "Run") - barcodes = readDNAStringSet(barcodePath) - bc_names = names(barcodes) - ds$NameLeading = bc_names[ds$IdxFirst+1] - ds$NameTrailing = bc_names[ds$IdxCombined+1] - ds$BarcodePair = paste(bc_names[ds$IdxFirst+1],bc_names[ds$IdxCombined+1],sep="--") - - g = ggplot(data=ds, aes(x=BarcodePair, y=Counts, fill=Run)) +facet_wrap(~BarcodePair,scales = "free_x")+ - geom_bar(stat="identity", position=position_dodge(width=1))+ - geom_text(aes(label=Counts,y=mean(range(Counts))), color="black", - position = position_dodge(1), size=3.5,angle = 90)+ - scale_color_brewer(palette = "Set1")+ - theme(legend.position="top", legend.direction = "vertical") + - coord_cartesian(ylim = c(0, max(ds$Counts)*1.1)) - ggsave("counts_group.png",g,width=36,height=24,dpi=100,units="cm") - - g = ggplot(data=ds, aes(x=BarcodePair, y=Counts, fill=Run)) + - geom_bar(stat="identity", position=position_dodge(width=1))+ - geom_text(aes(label=Counts), vjust=.4, hjust=-.1, color="black", - position = position_dodge(0.9), size=3.5,angle = 90)+ - scale_color_brewer(palette = "Set1")+ - theme_minimal() + - theme(axis.text.x = element_text(angle = 90, hjust = 0))+ - theme(legend.position="top", legend.direction = "vertical") + - coord_cartesian(ylim = c(0, max(ds$Counts)*1.1)) - ggsave("counts_nogroup.png",g,width=36,height=24,dpi=100,units="cm") -} - -plot.counts(c("m54007_170701_183412.subreadset.demux.counts", - "m54007_170702_064558.subreadset.demux.counts", - "m54200_170625_190247.subreadset.demux.counts", - "m54200_170626_051342.subreadset.demux.counts"), - "Sequel_RSII_16_barcodes_v1.fasta") diff --git a/scripts/r/report_detail.R b/scripts/r/report_detail.R index 04b269d..202b7ce 100644 --- a/scripts/r/report_detail.R +++ b/scripts/r/report_detail.R @@ -41,6 +41,8 @@ count_labeller <- function(value){ sapply(value,function(x) { paste(x, " / ZMWs ", reportCounts[reportCounts$BarcodePair==x,]$n,sep="")}) } +tryCatch({report$ScoresCombined = sapply(report$ScoresCombined,function(x) list(as.numeric(unlist(strsplit(x,",")))))},error=function(e){}) +names(report$ScoresCombined) = c() tryCatch({report$ReadLengths = sapply(report$ReadLengths,function(x) list(as.numeric(unlist(strsplit(x,",")))))},error=function(e){}) names(report$ReadLengths) = c() report$HQLength = sapply(report$ReadLengths,sum) @@ -77,6 +79,8 @@ titration_pass = report %>% filter(Barcoded, PassedFilters) %>% filter(BarcodePa titration_pass$Filter = "PASS" readLengthsUnnested = report %>% select(ReadLengths, Barcoded) %>% unnest(ReadLengths) +scoresCombinedUnnested = report %>% select(ScoresCombined, PassedFilters, BarcodePair) %>% filter(BarcodePair %in% unique_bps$BarcodePair) %>% mutate(Filter = PassedFilters) %>% mutate(Filter=ifelse(Filter==0,"NONE","PASS")) %>% unnest(ScoresCombined) +refSpans = report %>% select(NumBarcodedRegionsPassed, PassedFilters, BarcodePair) %>% filter(BarcodePair %in% unique_bps$BarcodePair) %>% mutate(Filter = PassedFilters) %>% mutate(Filter=ifelse(Filter==0,"NONE","PASS")) dpi = 150 facetHeight = max(nrow(unique_bps)+1,4)/4*5+1 @@ -170,6 +174,22 @@ g = ggplot(reportFilteredADP, geom='none', aes(group = BarcodePair)) + ylab("Number of ZMWs") + xlab("Mean Score") ggsave(paste0("detail_scores_per_adapter.",output),g,width=facetWidth,height=facetHeight,dpi=dpi,units="cm",limitsize = FALSE) +g = ggplot(scoresCombinedUnnested, aes(group = BarcodePair)) + + facet_wrap(~BarcodePair,scales = "free_y",ncol=4)+ + coord_cartesian(xlim=c(0,100))+ + geom_histogram(binwidth=5,aes(ScoresCombined,group=Filter,fill=Filter),color="grey",alpha=.15)+ + theme_light() + + ylab("Number of ZMWs") + xlab("Individual Scores") +ggsave(paste0("detail_individual_scores.",output),g,width=facetWidth,height=facetHeight,dpi=dpi,units="cm",limitsize = FALSE) + +g = ggplot(refSpans, aes(group = BarcodePair)) + + facet_wrap(~BarcodePair,scales = "free_y",ncol=4)+ + # coord_cartesian(xlim=c(0,100))+ + geom_histogram(binwidth=1,aes(NumBarcodedRegionsPassed,group=Filter,fill=Filter),color="grey",alpha=.15)+ + theme_light() + + ylab("Number of ZMWs") + xlab("Number of scoring barcode regions") +ggsave(paste0("detail_scoring_regions.",output),g,width=facetWidth,height=facetHeight,dpi=dpi,units="cm",limitsize = FALSE) + q = quantile(numadapters$NumAdapters,0.999) g = ggplot(numadapters, aes(group = BarcodePair, x = NumAdapters)) + facet_wrap(~BarcodePair, scales = "free_y",ncol=4)+ @@ -220,4 +240,4 @@ g = ggplot(y, aes(group = BarcodePair, HQLength, color = BarcodePair)) + coord_cartesian(xlim = c(0, quantile(y$HQLength,0.999))) + geom_freqpoly(binwidth = 2000)+ theme_minimal()+ylab("Number of ZMWs")+xlab("HQ Length") -ggsave(paste0("detail_hq_length_linehist_nogroup.",output),g,width=25,height=15,dpi=dpi,units="cm",limitsize = FALSE) \ No newline at end of file +ggsave(paste0("detail_hq_length_linehist_nogroup.",output),g,width=25,height=15,dpi=dpi,units="cm",limitsize = FALSE) diff --git a/scripts/r/report_summary.R b/scripts/r/report_summary.R index 9e17a77..0e81bea 100644 --- a/scripts/r/report_summary.R +++ b/scripts/r/report_summary.R @@ -98,4 +98,4 @@ g = ggplot(report %>% filter(Barcoded) %>% filter(IdxFirst == IdxCombined)) + theme(axis.text.x=element_blank(),axis.ticks.x=element_blank(),plot.title = element_text(hjust = 0.5))+ xlab("Barcoded Samples") + ylab("HQ Length") -ggsave("summary_hq_length_hist_2d.png",width=50,height=15,dpi=150,units="cm") \ No newline at end of file +ggsave("summary_hq_length_hist_2d.png",width=50,height=15,dpi=150,units="cm")