From acc3cf249313338319c872b135c5baf84efb735f Mon Sep 17 00:00:00 2001
From: Nando1014 <66292536+Nando1014@users.noreply.github.com>
Date: Fri, 22 Jan 2021 11:45:17 -0800
Subject: [PATCH 1/8] Create srr.md

---
 docs/srr.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 docs/srr.md

diff --git a/docs/srr.md b/docs/srr.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/docs/srr.md
@@ -0,0 +1 @@
+

From af4b00e76c9ac6259ab8d06da9ded6d370d387a4 Mon Sep 17 00:00:00 2001
From: Nando1014 <66292536+Nando1014@users.noreply.github.com>
Date: Fri, 22 Jan 2021 11:55:30 -0800
Subject: [PATCH 2/8] Rename srr.md to running_bioflows.md

---
 docs/{srr.md => running_bioflows.md} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename docs/{srr.md => running_bioflows.md} (100%)

diff --git a/docs/srr.md b/docs/running_bioflows.md
similarity index 100%
rename from docs/srr.md
rename to docs/running_bioflows.md

From bef24a9406207f17c33a68c88d37ce7bb27e9daf Mon Sep 17 00:00:00 2001
From: Nando1014 <66292536+Nando1014@users.noreply.github.com>
Date: Fri, 22 Jan 2021 11:57:01 -0800
Subject: [PATCH 3/8] Update running_bioflows.md

---
 docs/running_bioflows.md | 124 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)

diff --git a/docs/running_bioflows.md b/docs/running_bioflows.md
index 8b13789..94c9998 100644
--- a/docs/running_bioflows.md
+++ b/docs/running_bioflows.md
@@ -1 +1,125 @@
 
+# Running bioflows
+
+QCDB is currently built to use the outputs generated by Bioflows \([https://compbiocore.github.io/bioflows/](https://compbiocore.github.io/bioflows/)\), a package designed to automate running bioinformatics workflows. To run bioflows:
+
+* Connect to Oscar: `ssh user@ssh.ccv.brown.edu` and enter password when prompted
+* Switch to the cbcollab group: `newgrp cbcollab`
+* Create a yaml file to download reads from SRA and run them in bioflows -- here is an example that  downloads human transcriptome data, runs some QC on the reads, aligns them to the genome, and runs some alignment QC. 
+  * This example has dummy values in place for `sample_manifest:sra:entrez_email` and `run_parms:ssh_user` -- these should be edited before attempting to use this example.
+  * To run bioflows on a different SRA sample, edit the fields for `bioproject:`and  `sample_manifest:sra:id` . You may also need to edit `run_parms:gtf_file:` and`run_parms:reference_fasta_path` to reflect the appropriate reference and annotations, as well as `workflow_sequence:gsnap` if gsnap is not the appropriate alignment tool for your data.
+
+```text
+bioproject: PRJNA240916
+experiment: sra_test
+sample_manifest:
+  sra:  
+      id: SRS594907
+      entrez_email: your_email@email.com
+      downloads: True
+run_parms:
+  conda_command: source /gpfs/runtime/cbc_conda/bin/activate_cbc_conda
+  work_dir: /gpfs/data/cbc/qcdb_populate
+  log_dir: logs
+  paired_end: False
+  local_targets: False
+  saga_host: localhost
+  ssh_user: username
+  saga_scheduler: slurm
+  gtf_file: /gpfs/data/cbc/cbcollab/ref_tools/Ensembl_hg_GRCh37_rel87/Ensembl_Homo_sapiens.GRCh37.87.gtf
+  reference_fasta_path: /gpfs/data/cbc/cbcollab/ref_tools/Ensembl_hg_GRCh37_rel87/Ensembl_Homo_sapiens.GRCh37.dna.primary_assembly.fa
+workflow_sequence:
+  - fastqc: default
+  - gsnap:
+      options:
+        -d: Ensembl_Homo_sapiens_GRCh37
+        -s: /gpfs/data/cbc/cbcollab/cbc_ref/gmapdb_2017.01.14/Ensembl_Homo_sapiens_GRCh37/Ensembl_Homo_sapiens_GRCh37.maps/Ensembl_Homo_sapiens.GRCh37.87.splicesites.iit
+      job_params:
+        ncpus: 42
+        mem: 128000
+        time: 1400
+      suffix:
+        output: ".sam"
+  - samtools:
+      subcommand: view
+      suffix:
+        input: ".sam"
+        output: ".bam"
+      options:
+        -Sbh:
+      job_params:
+        time: 2000
+        ncpus: 8
+        mem: 128000
+  - samtools:
+      subcommand: view
+      suffix:
+        input: ".bam"
+        output: ".mapped.bam"
+      options:
+        -bh:
+        -F: "0x4"
+      job_params:
+        time: 2000
+        ncpus: 8
+        mem: 65000
+  - samtools:
+      subcommand: sort
+      suffix:
+        input: ".mapped.bam"
+        output: ".srtd.bam"
+      job_params:
+        time: 2000
+        ncpus: 8
+        mem: 175000
+  - samtools:
+      subcommand: index
+      suffix:
+        input: ".srtd.bam"
+      job_params:
+        time: 2000
+        ncpus: 8
+        mem: 65000
+  - bammarkduplicates2:
+      suffix:
+        input: ".srtd.bam"
+        output: ".dup.srtd.bam" 
+      job_params:
+        time: 2000
+        ncpus: 8
+        mem: 65000 
+  - picard:
+      subcommand: AddOrReplaceReadGroups
+  - picard:
+      subcommand: CollectAlignmentSummaryMetrics
+      suffix:
+        input: ".rg.srtd.bam"
+      options:
+        VALIDATION_STRINGENCY=LENIENT:
+  - picard:
+      subcommand: CollectInsertSizeMetrics
+      suffix:
+        input: ".rg.srtd.bam"
+      options:
+        VALIDATION_STRINGENCY=LENIENT:
+  - picard:
+      subcommand: CollectGcBiasMetrics
+      suffix:
+        input: ".rg.srtd.bam"
+      options:
+        VALIDATION_STRINGENCY=LENIENT:
+```
+
+* After saving your yaml file, start a screen session -- here is an example of how to start a screen session named `bioflows_qcdb`:
+
+`screen -S bioflows_qcdb`
+
+* Activate the conda environment:
+
+`source /gpfs/runtime/cbc_conda/bin/activate_cbc_conda`
+
+* Run bioflows using the yaml you just created -- in this example, we are assuming the yaml is called `bioflows_qcdb.yaml`:
+
+`bioflows-run bioflows_qcdb.yaml`
+
+* Bioflows will create a series of folders inside the working directory you specified in your yaml file. The outputs that will be useful for qcdb will be located in the directory called `qc`

From f35f6204d4b650a3435c87b0f1ae44889acb51f6 Mon Sep 17 00:00:00 2001
From: Nando1014 <66292536+Nando1014@users.noreply.github.com>
Date: Fri, 22 Jan 2021 11:59:05 -0800
Subject: [PATCH 4/8] Update running_bioflows.md

---
 docs/running_bioflows.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/running_bioflows.md b/docs/running_bioflows.md
index 94c9998..b3888b4 100644
--- a/docs/running_bioflows.md
+++ b/docs/running_bioflows.md
@@ -16,7 +16,7 @@ sample_manifest:
   sra:  
       id: SRS594907
       entrez_email: your_email@email.com
-      downloads: True
+      downloads: False
 run_parms:
   conda_command: source /gpfs/runtime/cbc_conda/bin/activate_cbc_conda
   work_dir: /gpfs/data/cbc/qcdb_populate

From e8ba9a89ad04dd288505ba8e64dbd6ec19ae42e8 Mon Sep 17 00:00:00 2001
From: Nando1014 <66292536+Nando1014@users.noreply.github.com>
Date: Fri, 22 Jan 2021 12:06:11 -0800
Subject: [PATCH 5/8] Delete running_bioflows.md

---
 docs/running_bioflows.md | 125 ---------------------------------------
 1 file changed, 125 deletions(-)
 delete mode 100644 docs/running_bioflows.md

diff --git a/docs/running_bioflows.md b/docs/running_bioflows.md
deleted file mode 100644
index b3888b4..0000000
--- a/docs/running_bioflows.md
+++ /dev/null
@@ -1,125 +0,0 @@
-
-# Running bioflows
-
-QCDB is currently built to use the outputs generated by Bioflows \([https://compbiocore.github.io/bioflows/](https://compbiocore.github.io/bioflows/)\), a package designed to automate running bioinformatics workflows. To run bioflows:
-
-* Connect to Oscar: `ssh user@ssh.ccv.brown.edu` and enter password when prompted
-* Switch to the cbcollab group: `newgrp cbcollab`
-* Create a yaml file to download reads from SRA and run them in bioflows -- here is an example that  downloads human transcriptome data, runs some QC on the reads, aligns them to the genome, and runs some alignment QC. 
-  * This example has dummy values in place for `sample_manifest:sra:entrez_email` and `run_parms:ssh_user` -- these should be edited before attempting to use this example.
-  * To run bioflows on a different SRA sample, edit the fields for `bioproject:`and  `sample_manifest:sra:id` . You may also need to edit `run_parms:gtf_file:` and`run_parms:reference_fasta_path` to reflect the appropriate reference and annotations, as well as `workflow_sequence:gsnap` if gsnap is not the appropriate alignment tool for your data.
-
-```text
-bioproject: PRJNA240916
-experiment: sra_test
-sample_manifest:
-  sra:  
-      id: SRS594907
-      entrez_email: your_email@email.com
-      downloads: False
-run_parms:
-  conda_command: source /gpfs/runtime/cbc_conda/bin/activate_cbc_conda
-  work_dir: /gpfs/data/cbc/qcdb_populate
-  log_dir: logs
-  paired_end: False
-  local_targets: False
-  saga_host: localhost
-  ssh_user: username
-  saga_scheduler: slurm
-  gtf_file: /gpfs/data/cbc/cbcollab/ref_tools/Ensembl_hg_GRCh37_rel87/Ensembl_Homo_sapiens.GRCh37.87.gtf
-  reference_fasta_path: /gpfs/data/cbc/cbcollab/ref_tools/Ensembl_hg_GRCh37_rel87/Ensembl_Homo_sapiens.GRCh37.dna.primary_assembly.fa
-workflow_sequence:
-  - fastqc: default
-  - gsnap:
-      options:
-        -d: Ensembl_Homo_sapiens_GRCh37
-        -s: /gpfs/data/cbc/cbcollab/cbc_ref/gmapdb_2017.01.14/Ensembl_Homo_sapiens_GRCh37/Ensembl_Homo_sapiens_GRCh37.maps/Ensembl_Homo_sapiens.GRCh37.87.splicesites.iit
-      job_params:
-        ncpus: 42
-        mem: 128000
-        time: 1400
-      suffix:
-        output: ".sam"
-  - samtools:
-      subcommand: view
-      suffix:
-        input: ".sam"
-        output: ".bam"
-      options:
-        -Sbh:
-      job_params:
-        time: 2000
-        ncpus: 8
-        mem: 128000
-  - samtools:
-      subcommand: view
-      suffix:
-        input: ".bam"
-        output: ".mapped.bam"
-      options:
-        -bh:
-        -F: "0x4"
-      job_params:
-        time: 2000
-        ncpus: 8
-        mem: 65000
-  - samtools:
-      subcommand: sort
-      suffix:
-        input: ".mapped.bam"
-        output: ".srtd.bam"
-      job_params:
-        time: 2000
-        ncpus: 8
-        mem: 175000
-  - samtools:
-      subcommand: index
-      suffix:
-        input: ".srtd.bam"
-      job_params:
-        time: 2000
-        ncpus: 8
-        mem: 65000
-  - bammarkduplicates2:
-      suffix:
-        input: ".srtd.bam"
-        output: ".dup.srtd.bam" 
-      job_params:
-        time: 2000
-        ncpus: 8
-        mem: 65000 
-  - picard:
-      subcommand: AddOrReplaceReadGroups
-  - picard:
-      subcommand: CollectAlignmentSummaryMetrics
-      suffix:
-        input: ".rg.srtd.bam"
-      options:
-        VALIDATION_STRINGENCY=LENIENT:
-  - picard:
-      subcommand: CollectInsertSizeMetrics
-      suffix:
-        input: ".rg.srtd.bam"
-      options:
-        VALIDATION_STRINGENCY=LENIENT:
-  - picard:
-      subcommand: CollectGcBiasMetrics
-      suffix:
-        input: ".rg.srtd.bam"
-      options:
-        VALIDATION_STRINGENCY=LENIENT:
-```
-
-* After saving your yaml file, start a screen session -- here is an example of how to start a screen session named `bioflows_qcdb`:
-
-`screen -S bioflows_qcdb`
-
-* Activate the conda environment:
-
-`source /gpfs/runtime/cbc_conda/bin/activate_cbc_conda`
-
-* Run bioflows using the yaml you just created -- in this example, we are assuming the yaml is called `bioflows_qcdb.yaml`:
-
-`bioflows-run bioflows_qcdb.yaml`
-
-* Bioflows will create a series of folders inside the working directory you specified in your yaml file. The outputs that will be useful for qcdb will be located in the directory called `qc`

From a9a16c0f4b1f301715b499f480c4ed32ad8d1187 Mon Sep 17 00:00:00 2001
From: Nando1014 <66292536+Nando1014@users.noreply.github.com>
Date: Fri, 22 Jan 2021 12:13:29 -0800
Subject: [PATCH 6/8] Create download_srrs

---
 docs/tutorials/download_srrs | 124 +++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 docs/tutorials/download_srrs

diff --git a/docs/tutorials/download_srrs b/docs/tutorials/download_srrs
new file mode 100644
index 0000000..0d2181d
--- /dev/null
+++ b/docs/tutorials/download_srrs
@@ -0,0 +1,124 @@
+# Running bioflows (Downloading SRA Files)
+
+QCDB is currently built to use the outputs generated by Bioflows \([https://compbiocore.github.io/bioflows/](https://compbiocore.github.io/bioflows/)\), a package designed to automate running bioinformatics workflows. To run bioflows:
+
+* Connect to Oscar: `ssh user@ssh.ccv.brown.edu` and enter password when prompted
+* Switch to the cbcollab group: `newgrp cbcollab`
+* Create a yaml file to download reads from SRA and run them in bioflows -- here is an example that  downloads human transcriptome data, runs some QC on the reads, aligns them to the genome, and runs some alignment QC. 
+  * This example has dummy values in place for `sample_manifest:sra:entrez_email` and `run_parms:ssh_user` -- these should be edited before attempting to use this example.
+  * To run bioflows on a different SRA sample, edit the fields for `bioproject:`and  `sample_manifest:sra:id` . You may also need to edit `run_parms:gtf_file:` and`run_parms:reference_fasta_path` to reflect the appropriate reference and annotations, as well as `workflow_sequence:gsnap` if gsnap is not the appropriate alignment tool for your data.
+
+```text
+bioproject: PRJNA240916
+experiment: sra_test
+sample_manifest:
+  sra:  
+      id: SRS594907
+      entrez_email: your_email@email.com
+      downloads: False
+run_parms:
+  conda_command: source /gpfs/runtime/cbc_conda/bin/activate_cbc_conda
+  work_dir: /gpfs/data/cbc/qcdb_populate
+  log_dir: logs
+  paired_end: False
+  local_targets: False
+  saga_host: localhost
+  ssh_user: username
+  saga_scheduler: slurm
+  gtf_file: /gpfs/data/cbc/cbcollab/ref_tools/Ensembl_hg_GRCh37_rel87/Ensembl_Homo_sapiens.GRCh37.87.gtf
+  reference_fasta_path: /gpfs/data/cbc/cbcollab/ref_tools/Ensembl_hg_GRCh37_rel87/Ensembl_Homo_sapiens.GRCh37.dna.primary_assembly.fa
+workflow_sequence:
+  - fastqc: default
+  - gsnap:
+      options:
+        -d: Ensembl_Homo_sapiens_GRCh37
+        -s: /gpfs/data/cbc/cbcollab/cbc_ref/gmapdb_2017.01.14/Ensembl_Homo_sapiens_GRCh37/Ensembl_Homo_sapiens_GRCh37.maps/Ensembl_Homo_sapiens.GRCh37.87.splicesites.iit
+      job_params:
+        ncpus: 42
+        mem: 128000
+        time: 1400
+      suffix:
+        output: ".sam"
+  - samtools:
+      subcommand: view
+      suffix:
+        input: ".sam"
+        output: ".bam"
+      options:
+        -Sbh:
+      job_params:
+        time: 2000
+        ncpus: 8
+        mem: 128000
+  - samtools:
+      subcommand: view
+      suffix:
+        input: ".bam"
+        output: ".mapped.bam"
+      options:
+        -bh:
+        -F: "0x4"
+      job_params:
+        time: 2000
+        ncpus: 8
+        mem: 65000
+  - samtools:
+      subcommand: sort
+      suffix:
+        input: ".mapped.bam"
+        output: ".srtd.bam"
+      job_params:
+        time: 2000
+        ncpus: 8
+        mem: 175000
+  - samtools:
+      subcommand: index
+      suffix:
+        input: ".srtd.bam"
+      job_params:
+        time: 2000
+        ncpus: 8
+        mem: 65000
+  - bammarkduplicates2:
+      suffix:
+        input: ".srtd.bam"
+        output: ".dup.srtd.bam" 
+      job_params:
+        time: 2000
+        ncpus: 8
+        mem: 65000 
+  - picard:
+      subcommand: AddOrReplaceReadGroups
+  - picard:
+      subcommand: CollectAlignmentSummaryMetrics
+      suffix:
+        input: ".rg.srtd.bam"
+      options:
+        VALIDATION_STRINGENCY=LENIENT:
+  - picard:
+      subcommand: CollectInsertSizeMetrics
+      suffix:
+        input: ".rg.srtd.bam"
+      options:
+        VALIDATION_STRINGENCY=LENIENT:
+  - picard:
+      subcommand: CollectGcBiasMetrics
+      suffix:
+        input: ".rg.srtd.bam"
+      options:
+        VALIDATION_STRINGENCY=LENIENT:
+```
+
+* After saving your yaml file, start a screen session -- here is an example of how to start a screen session named `bioflows_qcdb`:
+
+`screen -S bioflows_qcdb`
+
+* Activate the conda environment:
+
+`source /gpfs/runtime/cbc_conda/bin/activate_cbc_conda`
+
+* Run bioflows using the yaml you just created -- in this example, we are assuming the yaml is called `bioflows_qcdb.yaml`:
+
+`bioflows-run bioflows_qcdb.yaml`
+
+* Bioflows will create a series of folders inside the working directory you specified in your yaml file. The outputs that will be useful for qcdb will be located in the directory called `qc`

From 30ae82725f8e4dbb11f374c2f7d70b379052d514 Mon Sep 17 00:00:00 2001
From: Nando1014 <66292536+Nando1014@users.noreply.github.com>
Date: Fri, 22 Jan 2021 12:13:56 -0800
Subject: [PATCH 7/8] Rename download_srrs to download_sra_doc.md

---
 docs/tutorials/{download_srrs => download_sra_doc.md} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename docs/tutorials/{download_srrs => download_sra_doc.md} (100%)

diff --git a/docs/tutorials/download_srrs b/docs/tutorials/download_sra_doc.md
similarity index 100%
rename from docs/tutorials/download_srrs
rename to docs/tutorials/download_sra_doc.md

From 7aef6c46ac9ad309f0030f6e5c1263bca6baace1 Mon Sep 17 00:00:00 2001
From: Nando1014 <66292536+Nando1014@users.noreply.github.com>
Date: Fri, 22 Jan 2021 12:16:41 -0800
Subject: [PATCH 8/8] Update download_sra_doc.md

---
 docs/tutorials/download_sra_doc.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorials/download_sra_doc.md b/docs/tutorials/download_sra_doc.md
index 0d2181d..a292b2e 100644
--- a/docs/tutorials/download_sra_doc.md
+++ b/docs/tutorials/download_sra_doc.md
@@ -1,4 +1,4 @@
-# Running bioflows (Downloading SRA Files)
+# Downloading SRA Files
 
 QCDB is currently built to use the outputs generated by Bioflows \([https://compbiocore.github.io/bioflows/](https://compbiocore.github.io/bioflows/)\), a package designed to automate running bioinformatics workflows. To run bioflows: