diff --git a/README.md b/README.md
index a27b3a1..e423cb1 100644
--- a/README.md
+++ b/README.md
@@ -555,9 +555,9 @@ as time to results is lower.
Do not produce any reports. Useful if only the demultiplexed BAM is of interest.
## Quality Control
-For quality control, we offer three R scripts to help you troubleshoot your
-data. The first two are for low multiplex data.
-The third is for high plex data, easily showing 384 barcodes.
+For quality control, we offer two R scripts to help you troubleshoot your
+data. The first is for low multiplex data.
+The second is for high plex data, easily showing 384 barcodes.
### `report_detail.R`
@@ -647,22 +647,8 @@ Number of adapters:
-### `counts_detail.R`
-
-The second is for multiple `counts` files: `scripts/r/counts_detail.R`.
-This script cannot be called from the CLI yet, to be implemented.
-
-#### ZMW Counts
-Single plot:
-
-
-
-Grouped by barcode into facets:
-
-
-
### `report_summary.R`
-The third script is for high-plex data in one `lima.report` file:
+The second script is for high-plex data in one `lima.report` file:
`scripts/r/report_summary.R`.
Example:
diff --git a/scripts/r/counts_lowplex.R b/scripts/r/counts_lowplex.R
deleted file mode 100644
index 3f62474..0000000
--- a/scripts/r/counts_lowplex.R
+++ /dev/null
@@ -1,49 +0,0 @@
-library(ggplot2)
-library(Biostrings)
-library(dplyr)
-library(tidyr)
-library(viridis)
-library(data.table)
-
-plot.counts = function(counts, barcodePath, grouped = FALSE) {
- ds = data.frame()
- i = 0
- for (l in counts) {
- d = as.data.frame(fread(l,stringsAsFactors=FALSE))
- d = cbind(d,l)
- i = i + 1
- ds = rbind(ds,d)
- }
- colnames(ds) = c("IdxFirst", "IdxCombined", "Counts", "Run")
- barcodes = readDNAStringSet(barcodePath)
- bc_names = names(barcodes)
- ds$NameLeading = bc_names[ds$IdxFirst+1]
- ds$NameTrailing = bc_names[ds$IdxCombined+1]
- ds$BarcodePair = paste(bc_names[ds$IdxFirst+1],bc_names[ds$IdxCombined+1],sep="--")
-
- g = ggplot(data=ds, aes(x=BarcodePair, y=Counts, fill=Run)) +facet_wrap(~BarcodePair,scales = "free_x")+
- geom_bar(stat="identity", position=position_dodge(width=1))+
- geom_text(aes(label=Counts,y=mean(range(Counts))), color="black",
- position = position_dodge(1), size=3.5,angle = 90)+
- scale_color_brewer(palette = "Set1")+
- theme(legend.position="top", legend.direction = "vertical") +
- coord_cartesian(ylim = c(0, max(ds$Counts)*1.1))
- ggsave("counts_group.png",g,width=36,height=24,dpi=100,units="cm")
-
- g = ggplot(data=ds, aes(x=BarcodePair, y=Counts, fill=Run)) +
- geom_bar(stat="identity", position=position_dodge(width=1))+
- geom_text(aes(label=Counts), vjust=.4, hjust=-.1, color="black",
- position = position_dodge(0.9), size=3.5,angle = 90)+
- scale_color_brewer(palette = "Set1")+
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 90, hjust = 0))+
- theme(legend.position="top", legend.direction = "vertical") +
- coord_cartesian(ylim = c(0, max(ds$Counts)*1.1))
- ggsave("counts_nogroup.png",g,width=36,height=24,dpi=100,units="cm")
-}
-
-plot.counts(c("m54007_170701_183412.subreadset.demux.counts",
- "m54007_170702_064558.subreadset.demux.counts",
- "m54200_170625_190247.subreadset.demux.counts",
- "m54200_170626_051342.subreadset.demux.counts"),
- "Sequel_RSII_16_barcodes_v1.fasta")
diff --git a/scripts/r/report_detail.R b/scripts/r/report_detail.R
index 04b269d..202b7ce 100644
--- a/scripts/r/report_detail.R
+++ b/scripts/r/report_detail.R
@@ -41,6 +41,8 @@ count_labeller <- function(value){
sapply(value,function(x) { paste(x, " / ZMWs ", reportCounts[reportCounts$BarcodePair==x,]$n,sep="")})
}
+tryCatch({report$ScoresCombined = sapply(report$ScoresCombined,function(x) list(as.numeric(unlist(strsplit(x,",")))))},error=function(e){})
+names(report$ScoresCombined) = c()
tryCatch({report$ReadLengths = sapply(report$ReadLengths,function(x) list(as.numeric(unlist(strsplit(x,",")))))},error=function(e){})
names(report$ReadLengths) = c()
report$HQLength = sapply(report$ReadLengths,sum)
@@ -77,6 +79,8 @@ titration_pass = report %>% filter(Barcoded, PassedFilters) %>% filter(BarcodePa
titration_pass$Filter = "PASS"
readLengthsUnnested = report %>% select(ReadLengths, Barcoded) %>% unnest(ReadLengths)
+scoresCombinedUnnested = report %>% select(ScoresCombined, PassedFilters, BarcodePair) %>% filter(BarcodePair %in% unique_bps$BarcodePair) %>% mutate(Filter = PassedFilters) %>% mutate(Filter=ifelse(Filter==0,"NONE","PASS")) %>% unnest(ScoresCombined)
+refSpans = report %>% select(NumBarcodedRegionsPassed, PassedFilters, BarcodePair) %>% filter(BarcodePair %in% unique_bps$BarcodePair) %>% mutate(Filter = PassedFilters) %>% mutate(Filter=ifelse(Filter==0,"NONE","PASS"))
dpi = 150
facetHeight = max(nrow(unique_bps)+1,4)/4*5+1
@@ -170,6 +174,22 @@ g = ggplot(reportFilteredADP, geom='none', aes(group = BarcodePair)) +
ylab("Number of ZMWs") + xlab("Mean Score")
ggsave(paste0("detail_scores_per_adapter.",output),g,width=facetWidth,height=facetHeight,dpi=dpi,units="cm",limitsize = FALSE)
+g = ggplot(scoresCombinedUnnested, aes(group = BarcodePair)) +
+ facet_wrap(~BarcodePair,scales = "free_y",ncol=4)+
+ coord_cartesian(xlim=c(0,100))+
+ geom_histogram(binwidth=5,aes(ScoresCombined,group=Filter,fill=Filter),color="grey",alpha=.15)+
+ theme_light() +
+ ylab("Number of ZMWs") + xlab("Individual Scores")
+ggsave(paste0("detail_individual_scores.",output),g,width=facetWidth,height=facetHeight,dpi=dpi,units="cm",limitsize = FALSE)
+
+g = ggplot(refSpans, aes(group = BarcodePair)) +
+ facet_wrap(~BarcodePair,scales = "free_y",ncol=4)+
+ # coord_cartesian(xlim=c(0,100))+
+ geom_histogram(binwidth=1,aes(NumBarcodedRegionsPassed,group=Filter,fill=Filter),color="grey",alpha=.15)+
+ theme_light() +
+ ylab("Number of ZMWs") + xlab("Number of scoring barcode regions")
+ggsave(paste0("detail_scoring_regions.",output),g,width=facetWidth,height=facetHeight,dpi=dpi,units="cm",limitsize = FALSE)
+
q = quantile(numadapters$NumAdapters,0.999)
g = ggplot(numadapters, aes(group = BarcodePair, x = NumAdapters)) +
facet_wrap(~BarcodePair, scales = "free_y",ncol=4)+
@@ -220,4 +240,4 @@ g = ggplot(y, aes(group = BarcodePair, HQLength, color = BarcodePair)) +
coord_cartesian(xlim = c(0, quantile(y$HQLength,0.999))) +
geom_freqpoly(binwidth = 2000)+
theme_minimal()+ylab("Number of ZMWs")+xlab("HQ Length")
-ggsave(paste0("detail_hq_length_linehist_nogroup.",output),g,width=25,height=15,dpi=dpi,units="cm",limitsize = FALSE)
\ No newline at end of file
+ggsave(paste0("detail_hq_length_linehist_nogroup.",output),g,width=25,height=15,dpi=dpi,units="cm",limitsize = FALSE)
diff --git a/scripts/r/report_summary.R b/scripts/r/report_summary.R
index 9e17a77..0e81bea 100644
--- a/scripts/r/report_summary.R
+++ b/scripts/r/report_summary.R
@@ -98,4 +98,4 @@ g = ggplot(report %>% filter(Barcoded) %>% filter(IdxFirst == IdxCombined)) +
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank(),plot.title = element_text(hjust = 0.5))+
xlab("Barcoded Samples") +
ylab("HQ Length")
-ggsave("summary_hq_length_hist_2d.png",width=50,height=15,dpi=150,units="cm")
\ No newline at end of file
+ggsave("summary_hq_length_hist_2d.png",width=50,height=15,dpi=150,units="cm")