Skip to content

Latest commit

 

History

History
205 lines (175 loc) · 6.7 KB

build_notes.md

File metadata and controls

205 lines (175 loc) · 6.7 KB

<<<<<<< HEAD

Miniconda

wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
sh Miniconda3-latest-Linux-x86_64.sh
#restart shell
conda config --add channels defaults
conda config --add channels bioconda
conda config --add channels conda-forge

Environment setup

conda create -n placenta
conda activate placenta
conda config --add channels defaults
conda config --add channels bioconda
conda config --add channels conda-forge
conda install -y mamba
mamba install -y snakemake pysradb bioconductor-dada2 r-dplyr parallel-fastq-dump

Fetch sequence data and metadata

conda activate placenta
mkdir metadata
pysradb metadata --db  --detailed --expand --saveto metadata/SRP141397.metadata

#this is reentrant - very cool
mkdir -p raw
pysradb download  --out-dir ./raw -p SRP141397

sra2fastq

parallel-fastq-dump --threads 4 --outdir intermediates/fastq --split-files --tmpdir /tmp --gzip -s `find raw/ -name "*sra"`

(Need 12GB)

Fetch supplemental tables

curl https://static-content.springer.com/esm/art%3A10.1186%2Fs40168-018-0575-4/MediaObjects/40168_2018_575_MOESM1_ESM.xls > metadata/table1.xls
curl https://static-content.springer.com/esm/art%3A10.1186%2Fs40168-018-0575-4/MediaObjects/40168_2018_575_MOESM2_ESM.pdf > metadata/table2.pdf

Make qiime environment and install qiime-related stuff

conda create -n qiime1env python=2.7
conda config --add channels defaults
conda config --add channels bioconda
conda config --add channels conda-forge
conda config --add channels anaconda
conda activate qiime1env
conda install -y qiime matplotlib=1.4.3 mock nose
conda install r-optparse bioconductor-metagenomeseq  r-biom r-plyr r-RJSONIO bioconductor-rhdf5 bioconductor-biomformat
conda install gxx_linux-64
pip install --upgrade cython
pip install biom-format==2.1.4
wget https://github.com/biocore/qiime/archive/1.9.1.tar.gz
gunzip 1.9.1.tar.gz && tar -xvf 1.9.1.tar
cd qiime-1.9.1 && python setup.py install

Kraken index stuff

mamba install kraken=1.1-0 #1.0 has a bug related to nucl_est.accession2taxid.gz
git clone https://github.com/zhaoc1/sunbeam_databases
#altered ./build_krakendb
cd sunbeam_databases
mkdir nt_20180816
#2 hours?
update_blastdb.pl --passive --decompress nt
cd sunbeam_databases 
#8 hours!
./build_krakendb.sh
conda activate placenta
echo "source('~/Documents/dev/placenta/runDada.R')" | R --no-save --vanilla

https://benjjneb.github.io/dada2/tutorial.html

Taxonomic assignments were generated by comparison to the Silva reference database [43,44,45] The version is not given but given the paper's date I chose Silva_128 (06-Feb-2017)

wget https://www.arb-silva.de/fileadmin/silva_databases/qiime/Silva_119_release.zip
unzip Silva_119_release.zip

=======

15258cfa3e7c71e3ec036fcb0782811c588d7b36 Miniconda

wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
sh Miniconda3-latest-Linux-x86_64.sh
#restart shell
conda config --add channels defaults
conda config --add channels bioconda
conda config --add channels conda-forge

Environment setup

conda create -n placenta
conda activate placenta
conda config --add channels defaults
conda config --add channels bioconda
conda config --add channels conda-forge
conda install -y mamba
mamba install -y snakemake pysradb bioconductor-dada2 r-dplyr parallel-fastq-dump

Fetch sequence data and metadata

conda activate placenta
mkdir metadata
pysradb metadata --db  --detailed --expand --saveto metadata/SRP141397.metadata

#this is reentrant - very cool
mkdir -p raw
pysradb download  --out-dir ./raw -p SRP141397

sra2fastq

parallel-fastq-dump --threads 4 --outdir intermediates/fastq --split-files --tmpdir /tmp --gzip -s `find raw/ -name "*sra"`

(Need 12GB)

Fetch supplemental tables

curl https://static-content.springer.com/esm/art%3A10.1186%2Fs40168-018-0575-4/MediaObjects/40168_2018_575_MOESM1_ESM.xls > metadata/table1.xls
curl https://static-content.springer.com/esm/art%3A10.1186%2Fs40168-018-0575-4/MediaObjects/40168_2018_575_MOESM2_ESM.pdf > metadata/table2.pdf

Make qiime environment and install qiime-related stuff

conda create -n qiime1env python=2.7
conda config --add channels defaults
conda config --add channels bioconda
conda config --add channels conda-forge
conda config --add channels anaconda
conda activate qiime1env
conda install -y qiime matplotlib=1.4.3 mock nose
conda install r-optparse bioconductor-metagenomeseq  r-biom r-plyr r-RJSONIO bioconductor-rhdf5 bioconductor-biomformat
conda install gxx_linux-64
pip install --upgrade cython
pip install biom-format==2.1.4
wget https://github.com/biocore/qiime/archive/1.9.1.tar.gz
gunzip 1.9.1.tar.gz && tar -xvf 1.9.1.tar
cd qiime-1.9.1 && python setup.py install

Kraken index stuff

mamba install kraken=1.1-0 #1.0 has a bug related to nucl_est.accession2taxid.gz
git clone https://github.com/zhaoc1/sunbeam_databases
#altered ./build_krakendb
cd sunbeam_databases
mkdir nt_20180816
#2 hours?
update_blastdb.pl --passive --decompress nt
cd sunbeam_databases 
#4 hours
./build_krakendb.sh
conda activate placenta
echo "source('~/Documents/dev/placenta/runDada.R')" | R --no-save --vanilla

https://benjjneb.github.io/dada2/tutorial.html

Taxonomic assignments were generated by comparison to the Silva reference database [43,44,45] The version is not given but given the paper's date I chose Silva_128 (06-Feb-2017)

I followed these crazy instructions to set up the config http://www.metagenomics.wiki/tools/16s/qiime/otu-clustering/silva

wget https://www.arb-silva.de/fileadmin/silva_databases/qiime/Silva_119_release.zip
unzip Silva_119_release.zip

#Dada either used Silva 123, 128, 132 123: http://dx.doi.org/10.5281/zenodo.158958 https://zenodo.org/record/158958/files/silva_nr_v123_train_set.fa.gz?download=1 https://zenodo.org/record/158958/files/silva_species_assignment_v123.fa.gz?download=1

echo "pick_otus:enable_rev_strand_match True" > otu_SILVA_settings.txt echo "align_seqs.py:template_fp $PWD/SILVA_119_QIIME_release/core_alignment/core_Silva119_alignment.fna" >> otu_SILVA_settings.txt echo "filter_alignment:allowed_gap_frac 0.80" >> otu_SILVA_settings.txt echo "filter_alignment:entropy_threshold 0.10" >> otu_SILVA_settings.txt echo "filter_alignment:suppress_lane_mask_filter True" >> otu_SILVA_settings.txt echo "assign_taxonomy:reference_seqs_fp $PWD/SILVA_119_QIIME_release/rep_set/97/Silva_119_rep_set97.fna" >> otu_SILVA_settings.txt echo "assign_taxonomy:id_to_taxonomy_fp $PWD/SILVA_119_QIIME_release/taxonomy/97/taxonomy_97_7_levels.txt" >> otu_SILVA_settings.txt

assign_taxonomy.py -i fasta/AS06_16S_F_filt.fasta -r SILVA_119_QIIME_release/rep_set/97/Silva_119_rep_set97.fna -t SILVA_119_QIIME_release/taxonomy/97/taxonomy_97_7_levels.txt -o Silva_tax_assign/