Skip to content

Commit

Permalink
R Scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
armintoepfer committed Sep 19, 2018
1 parent 20ae151 commit 7826911
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 69 deletions.
22 changes: 4 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -555,9 +555,9 @@ as time to results is lower.
Do not produce any reports. Useful if only the demultiplexed BAM is of interest.

## Quality Control
For quality control, we offer three R scripts to help you troubleshoot your
data. The first two are for low multiplex data.
The third is for high plex data, easily showing 384 barcodes.
For quality control, we offer two R scripts to help you troubleshoot your
data. The first is for low multiplex data.
The second is for high plex data, easily showing 384 barcodes.

### `report_detail.R`

Expand Down Expand Up @@ -647,22 +647,8 @@ Number of adapters:

<img src="img/plots/detail_num_adapters.png" width="886px">

### `counts_detail.R`

The second is for multiple `counts` files: `scripts/r/counts_detail.R`.
This script cannot be called from the CLI yet, to be implemented.

#### ZMW Counts
Single plot:

<img src="img/plots/counts_nogroup.png" width="886px">

Grouped by barcode into facets:

<img src="img/plots/counts_group.png" width="886px">

### `report_summary.R`
The third script is for high-plex data in one `lima.report` file:
The second script is for high-plex data in one `lima.report` file:
`scripts/r/report_summary.R`.

Example:
Expand Down
49 changes: 0 additions & 49 deletions scripts/r/counts_lowplex.R

This file was deleted.

22 changes: 21 additions & 1 deletion scripts/r/report_detail.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ count_labeller <- function(value){
sapply(value,function(x) { paste(x, " / ZMWs ", reportCounts[reportCounts$BarcodePair==x,]$n,sep="")})
}

tryCatch({report$ScoresCombined = sapply(report$ScoresCombined,function(x) list(as.numeric(unlist(strsplit(x,",")))))},error=function(e){})
names(report$ScoresCombined) = c()
tryCatch({report$ReadLengths = sapply(report$ReadLengths,function(x) list(as.numeric(unlist(strsplit(x,",")))))},error=function(e){})
names(report$ReadLengths) = c()
report$HQLength = sapply(report$ReadLengths,sum)
Expand Down Expand Up @@ -77,6 +79,8 @@ titration_pass = report %>% filter(Barcoded, PassedFilters) %>% filter(BarcodePa
titration_pass$Filter = "PASS"

readLengthsUnnested = report %>% select(ReadLengths, Barcoded) %>% unnest(ReadLengths)
scoresCombinedUnnested = report %>% select(ScoresCombined, PassedFilters, BarcodePair) %>% filter(BarcodePair %in% unique_bps$BarcodePair) %>% mutate(Filter = PassedFilters) %>% mutate(Filter=ifelse(Filter==0,"NONE","PASS")) %>% unnest(ScoresCombined)
refSpans = report %>% select(NumBarcodedRegionsPassed, PassedFilters, BarcodePair) %>% filter(BarcodePair %in% unique_bps$BarcodePair) %>% mutate(Filter = PassedFilters) %>% mutate(Filter=ifelse(Filter==0,"NONE","PASS"))

dpi = 150
facetHeight = max(nrow(unique_bps)+1,4)/4*5+1
Expand Down Expand Up @@ -170,6 +174,22 @@ g = ggplot(reportFilteredADP, geom='none', aes(group = BarcodePair)) +
ylab("Number of ZMWs") + xlab("Mean Score")
ggsave(paste0("detail_scores_per_adapter.",output),g,width=facetWidth,height=facetHeight,dpi=dpi,units="cm",limitsize = FALSE)

g = ggplot(scoresCombinedUnnested, aes(group = BarcodePair)) +
facet_wrap(~BarcodePair,scales = "free_y",ncol=4)+
coord_cartesian(xlim=c(0,100))+
geom_histogram(binwidth=5,aes(ScoresCombined,group=Filter,fill=Filter),color="grey",alpha=.15)+
theme_light() +
ylab("Number of ZMWs") + xlab("Individual Scores")
ggsave(paste0("detail_individual_scores.",output),g,width=facetWidth,height=facetHeight,dpi=dpi,units="cm",limitsize = FALSE)

g = ggplot(refSpans, aes(group = BarcodePair)) +
facet_wrap(~BarcodePair,scales = "free_y",ncol=4)+
# coord_cartesian(xlim=c(0,100))+
geom_histogram(binwidth=1,aes(NumBarcodedRegionsPassed,group=Filter,fill=Filter),color="grey",alpha=.15)+
theme_light() +
ylab("Number of ZMWs") + xlab("Number of scoring barcode regions")
ggsave(paste0("detail_scoring_regions.",output),g,width=facetWidth,height=facetHeight,dpi=dpi,units="cm",limitsize = FALSE)

q = quantile(numadapters$NumAdapters,0.999)
g = ggplot(numadapters, aes(group = BarcodePair, x = NumAdapters)) +
facet_wrap(~BarcodePair, scales = "free_y",ncol=4)+
Expand Down Expand Up @@ -220,4 +240,4 @@ g = ggplot(y, aes(group = BarcodePair, HQLength, color = BarcodePair)) +
coord_cartesian(xlim = c(0, quantile(y$HQLength,0.999))) +
geom_freqpoly(binwidth = 2000)+
theme_minimal()+ylab("Number of ZMWs")+xlab("HQ Length")
ggsave(paste0("detail_hq_length_linehist_nogroup.",output),g,width=25,height=15,dpi=dpi,units="cm",limitsize = FALSE)
ggsave(paste0("detail_hq_length_linehist_nogroup.",output),g,width=25,height=15,dpi=dpi,units="cm",limitsize = FALSE)
2 changes: 1 addition & 1 deletion scripts/r/report_summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,4 @@ g = ggplot(report %>% filter(Barcoded) %>% filter(IdxFirst == IdxCombined)) +
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank(),plot.title = element_text(hjust = 0.5))+
xlab("Barcoded Samples") +
ylab("HQ Length")
ggsave("summary_hq_length_hist_2d.png",width=50,height=15,dpi=150,units="cm")
ggsave("summary_hq_length_hist_2d.png",width=50,height=15,dpi=150,units="cm")

0 comments on commit 7826911

Please sign in to comment.