From 54bbb82e761691209d72042f3a536fb96b5951f4 Mon Sep 17 00:00:00 2001 From: salexander4 Date: Wed, 24 Nov 2021 13:16:48 -0500 Subject: [PATCH 1/2] dada2 r script first write --- microbiomes/qc_amplicons/01_init_QC.sh | 5 ++- microbiomes/qc_amplicons/02_dada2.r | 54 +++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 2 deletions(-) mode change 100644 => 100755 microbiomes/qc_amplicons/01_init_QC.sh diff --git a/microbiomes/qc_amplicons/01_init_QC.sh b/microbiomes/qc_amplicons/01_init_QC.sh old mode 100644 new mode 100755 index 19ab28d..76e3e8a --- a/microbiomes/qc_amplicons/01_init_QC.sh +++ b/microbiomes/qc_amplicons/01_init_QC.sh @@ -1,3 +1,4 @@ + # get local variables source local.env @@ -63,5 +64,7 @@ done EOF if $autorun; then - sbatch ${outdir}/01_init_QC/01_init_QC.sbatch + +sbatch ${outdir}/01_init_QC/01_init_QC.sbatch + fi diff --git a/microbiomes/qc_amplicons/02_dada2.r b/microbiomes/qc_amplicons/02_dada2.r index d6e6b79..4e602de 100644 --- a/microbiomes/qc_amplicons/02_dada2.r +++ b/microbiomes/qc_amplicons/02_dada2.r @@ -1,3 +1,55 @@ library(dada2) -#do dada2 stuff +#some people run packageVersion(dada2) after to see current version, however it's not helpful + +path <- "path1" + +#need to change "path1" to where the data can be found + +forward_reads <- sort(list.files(path, pattern="R1.fastq", full.names=T)) + +reverse_reads <- sort(list.files(path, pattern="R2.fastq", full.names=T)) + +#I believe that dada2 might want us to run its own filter and trim function, however we might get away without using it since prevous cutting will be done + +#The following is to assign the filtered reads a file + +filtered_forward_reads <- file.path(path, "filtered", paste0(sample.names, "_forward_filtered.fastq.gz")) + +filtered_reverse_reads <- file.path(path, "filtered", paste0(sample.names, "_reverse_filtered.fastq.gz")) + +out <- filteredAndTrim(forward_reads, filtered_forward_reads, reverse_reads, filtered_reverse_reads, truncLen=c(240,160), maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=T, compress=T, multithread=T) + +#learning errors is a key component, must be done with filtered reads + +err_forward_reads <- learnErrors(filtered_forward_reads, multithread=T) + +err_reverse_reads <- learnErrors(filtered_reverse_reads, multithread=T) + +#There is a bult-in function that allows the user to plot errors, just allows user to visualize the estimated error rates +#If decided to run this plot- + +plotErrors(err_forward_reads, nominalQ=T) + +plotErrors(err_reverse_reads, nominalQ=T) + +#after dada2 has learned errors, user can denoise + +dada_forward <- dada(filtered_forward_reads, err=err_forward_reads, multithread=T) + +dada_reverse <- dada(Filtered_reverse_reads, err-err_reverse_reads, multithread=T) + +#Inspect the denoising results, if wanted by "dada_forward[[1]] + +#After denoising, we can merge the paired reads + +mergers <- mergePairs(dada_forward, filtered_forward_reads, dada_reverse, filtered_reverse_reads, verbose=T) + +#Again, if wanted to inspect mergers run head(mergers[[1]]) + +#Construct ASV + +seqtab <- makeSequenceTable(mergers) +dim(seqtab) + + From 41efff2ec8778cf49e093d20a8eef91fba9197e9 Mon Sep 17 00:00:00 2001 From: salexander4 Date: Wed, 1 Dec 2021 12:36:55 -0500 Subject: [PATCH 2/2] updated r script --- microbiomes/qc_amplicons/01_init_QC.sh | 4 ++-- microbiomes/qc_amplicons/02_dada2.r | 29 +++++++++++++++----------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/microbiomes/qc_amplicons/01_init_QC.sh b/microbiomes/qc_amplicons/01_init_QC.sh index 76e3e8a..8fe812b 100755 --- a/microbiomes/qc_amplicons/01_init_QC.sh +++ b/microbiomes/qc_amplicons/01_init_QC.sh @@ -15,8 +15,8 @@ cat < ${outdir}/01_init_QC/01_init_QC.sbatch #SBATCH --ntasks=${nthreads} #SBATCH --nodes=1 #SBATCH --cpus-per-task=1 -#SBATCH --mem=20 -#SBATCH --time=01:00:00 +#SBATCH --mem=20G +#SBATCH --time=24:00:00 # get local variables source local.env diff --git a/microbiomes/qc_amplicons/02_dada2.r b/microbiomes/qc_amplicons/02_dada2.r index 4e602de..72d08f4 100644 --- a/microbiomes/qc_amplicons/02_dada2.r +++ b/microbiomes/qc_amplicons/02_dada2.r @@ -6,7 +6,7 @@ path <- "path1" #need to change "path1" to where the data can be found -forward_reads <- sort(list.files(path, pattern="R1.fastq", full.names=T)) +merge_reads <- sort(list.files(path, pattern="R1.fastq", full.names=T)) reverse_reads <- sort(list.files(path, pattern="R2.fastq", full.names=T)) @@ -14,36 +14,39 @@ reverse_reads <- sort(list.files(path, pattern="R2.fastq", full.names=T)) #The following is to assign the filtered reads a file -filtered_forward_reads <- file.path(path, "filtered", paste0(sample.names, "_forward_filtered.fastq.gz")) +#filtered_merged_reads <- file.path(path, "filtered", paste0(sample.names, "_merged_filtered.fastq.gz")) -filtered_reverse_reads <- file.path(path, "filtered", paste0(sample.names, "_reverse_filtered.fastq.gz")) - -out <- filteredAndTrim(forward_reads, filtered_forward_reads, reverse_reads, filtered_reverse_reads, truncLen=c(240,160), maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=T, compress=T, multithread=T) +#filtered_reverse_reads <- file.path(path, "filtered", paste0(sample.names, "_reverse_filtered.fastq.gz")) +#move line ahead +out <- filteredAndTrim(forward_reads, filtered_forward_reads, maxN=0, maxEE=2, truncQ=0, compress=T, multithread=T)#provide number for multireads in local.env #learning errors is a key component, must be done with filtered reads -err_forward_reads <- learnErrors(filtered_forward_reads, multithread=T) +err_merged_reads <- learnErrors(filtered_merged_reads, multithread=T) -err_reverse_reads <- learnErrors(filtered_reverse_reads, multithread=T) +#err_reverse_reads <- learnErrors(filtered_reverse_reads, multithread=T) #There is a bult-in function that allows the user to plot errors, just allows user to visualize the estimated error rates #If decided to run this plot- -plotErrors(err_forward_reads, nominalQ=T) +pdf(path) +plotErrors(err_merged_reads, nominalQ=T) +dev.off() -plotErrors(err_reverse_reads, nominalQ=T) +#plotErrors(err_reverse_reads, nominalQ=T) #after dada2 has learned errors, user can denoise +derepFastq #output will be for dada -dada_forward <- dada(filtered_forward_reads, err=err_forward_reads, multithread=T) +dada_merged <- dada(filtered_merged_reads, err=err_merged_reads, multithread=T) -dada_reverse <- dada(Filtered_reverse_reads, err-err_reverse_reads, multithread=T) +#dada_reverse <- dada(Filtered_reverse_reads, err-err_reverse_reads, multithread=T) #Inspect the denoising results, if wanted by "dada_forward[[1]] #After denoising, we can merge the paired reads -mergers <- mergePairs(dada_forward, filtered_forward_reads, dada_reverse, filtered_reverse_reads, verbose=T) +#mergers <- mergePairs(dada_forward, filtered_forward_reads, dada_reverse, filtered_reverse_reads, verbose=T) #Again, if wanted to inspect mergers run head(mergers[[1]]) @@ -52,4 +55,6 @@ mergers <- mergePairs(dada_forward, filtered_forward_reads, dada_reverse, filter seqtab <- makeSequenceTable(mergers) dim(seqtab) +write.table(seqtab, path, sep='\t') +save.image(path)