Skip to content

Commit

Permalink
Merge pull request #2 from McMinds-Lab/main
Browse files Browse the repository at this point in the history
start fresh for soil edna starting files
  • Loading branch information
AlexanderShane authored Nov 17, 2021
2 parents 946f89c + 7ae9005 commit 9292d55
Show file tree
Hide file tree
Showing 10 changed files with 123 additions and 174 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
*.env
*DS_Store
.Rapp.history
*.hpp
49 changes: 0 additions & 49 deletions microbiomes/qc_16S/1_init_QC.sh

This file was deleted.

53 changes: 0 additions & 53 deletions microbiomes/qc_16S/2_cluster.sh

This file was deleted.

19 changes: 0 additions & 19 deletions microbiomes/qc_16S/3_create_otu_table.sh

This file was deleted.

53 changes: 0 additions & 53 deletions microbiomes/qc_16S/4_MED.sh

This file was deleted.

67 changes: 67 additions & 0 deletions microbiomes/qc_amplicons/01_init_QC.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# get local variables
source local.env

mkdir -p ${outdir}/01_init_QC

cat <<EOF > ${outdir}/01_init_QC/01_init_QC.sbatch
#!/bin/bash
#SBATCH --job-name=01_init_QC
#SBATCH --partition=${partition}
#SBATCH --qos=${qos}
#SBATCH --mail-user=${email}
#SBATCH --mail-type=END,FAIL
#SBATCH --output=${outdir}/01_init_QC/01_init_QC.log
#SBATCH --ntasks=${nthreads}
#SBATCH --nodes=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=20
#SBATCH --time=01:00:00
# get local variables
source local.env
mkdir -p ${outdir}/01_init_QC/demultiplexed/
mkdir -p ${outdir}/01_init_QC/merged
# trim indices and primers from sequences, demultiplex, and discard any sequences that don't contain both full barcodes and primers
module purge
module load hub.apps/anaconda3
source activate cutadapt
cutadapt \
--no-indels \
--discard-untrimmed \
--pair-filter=any \
-g file:${barcodes_fwd} \
-G file:${barcodes_rev} \
-o ${outdir}/01_init_QC/demultiplexed/{name1}-{name2}_R1.fastq \
-p ${outdir}/01_init_QC/demultiplexed/{name1}-{name2}_R2.fastq \
${in_fwd} \
${in_rev}
# double check that reads are oriented consistently (does the above need to be re-run with the forward and reverse indices and or primers switched?)
for file in ${outdir}/01_init_QC/demultiplexed/*_R1.fastq; do
# trim "R1" from filenames to get Sample IDs that match mapping file
filename=\$(basename \$file)
sampleid=\${filename/-*/} ## double check that all files have matching name1 and name2 or else this could overwrite a good file with a bad one
# merge paired-end reads such that short reads, where the read is longer than the insertion (such as mitochondria), are not discarded, and nucleotides are trimmed that extend past the beginning of the paired read (which are just adaptor sequences)
conda deactivate
module purge
module load apps/vsearch
vsearch \
--fastq_mergepairs \${file} \
--reverse \${file/R1/R2} \
--fastq_allowmergestagger \
--fasta_width 0 \
--threads ${nthreads} \
--fastqout ${outdir}/01_init_QC/merged/\${sampleid}.fastq
done
EOF

if $autorun; then
sbatch ${outdir}/01_init_QC/01_init_QC.sbatch
fi
3 changes: 3 additions & 0 deletions microbiomes/qc_amplicons/02_dada2.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
library(dada2)

#do dada2 stuff
27 changes: 27 additions & 0 deletions microbiomes/qc_amplicons/02_dada2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# get local variables
source local.env

mkdir -p ${outdir}/02_dada2

cat <<EOF > ${outdir}/02_dada2/02_dada2.sbatch
#!/bin/bash
#SBATCH --job-name=02_dada2
#SBATCH --partition=${partition}
#SBATCH --qos=${qos}
#SBATCH --mail-user=${email}
#SBATCH --mail-type=END,FAIL
#SBATCH --output=${outdir}/02_dada2/02_dada2.log
#SBATCH --ntasks=${nthreads}
#SBATCH --nodes=1
#SBATCH --cpus-per-task=1
#SBATCH --mem=20
#SBATCH --time=01:00:00
module load apps/R
Rscript 02_dada2.r
EOF

if $autorun; then
sbatch ${outdir}/01_init_QC/01_init_QC.sbatch
fi
3 changes: 3 additions & 0 deletions microbiomes/qc_amplicons/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
make a copy of local.env.txt to local.env and fill in the blanks

run each script by changing into this directory and then using 'bash scriptname.sh'. If 'autorun=true' in the local.env file, this will internally submit an sbatch command
22 changes: 22 additions & 0 deletions microbiomes/qc_amplicons/local.env.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[email protected]

## how many processors do you want to use
nthreads=20

## max ram
maxram=20G

## what slurm partition and qos do you want to use
partition=rra
qos=rra

in_fwd=
in_rev=
barcodes_fwd=barcodes_fwd.fasta
barcodes_rev=barcodes_rev.fasta
outdir=/path/to/output/directory

autorun=true

## this tells the scripts where to find this file
scriptdir=$(dirname "$(realpath -s "$0")")

0 comments on commit 9292d55

Please sign in to comment.