diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index f5f7702..db10044 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,9 +1,9 @@
 ---
 name: Bug report
 about: Create a report to help us improve
-title: ''
-labels: ''
-assignees: 'slsevilla'
+title: ""
+labels: ""
+assignees: "slsevilla"
 ---
 
 **Describe the bug**
@@ -11,10 +11,11 @@ A clear and concise description of what the bug is.
 
 **To Reproduce**
 Steps to reproduce the behavior; if applicable, add screenshots to help explain your problem:
+
 1. Go to '...'
 2. Click on '....'
 3. Scroll down to '....'
 4. See error
 
 **Expected behavior**
-A clear and concise description of what you expected to happen.
\ No newline at end of file
+A clear and concise description of what you expected to happen.
diff --git a/.github/ISSUE_TEMPLATE/enchancement_request.md b/.github/ISSUE_TEMPLATE/enchancement_request.md
index e9a608c..36a0fa7 100644
--- a/.github/ISSUE_TEMPLATE/enchancement_request.md
+++ b/.github/ISSUE_TEMPLATE/enchancement_request.md
@@ -1,14 +1,13 @@
 ---
 name: Enhancement request
 about: Suggest an additional component to an already existing feature
-title: ''
-labels: ''
-assignees: 'slsevilla'
-
+title: ""
+labels: ""
+assignees: "slsevilla"
 ---
 
 **Describe what enhancement you'd like to see.**
 A clear and concise description of what you'd like to add, specifically to what already existing feature. Ex. I am using a new reference, and I'd love to see this reference included [...]
 
 **Additional context**
-Add any other context or screenshots about the feature request here.
\ No newline at end of file
+Add any other context or screenshots about the feature request here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index e4f6e3f..69ef716 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,13 +1,13 @@
 ---
 name: Feature request
 about: Suggest an idea for this project
-title: ''
-labels: ''
-assignees: 'slsevilla'
+title: ""
+labels: ""
+assignees: "slsevilla"
 ---
 
 **Is your feature request related to a problem? Please describe.**
 A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
 
 **Describe the solution you'd like**
-A clear and concise description of what you want to happen. If there are alternative solutions or features you've considered, please include these!
\ No newline at end of file
+A clear and concise description of what you want to happen. If there are alternative solutions or features you've considered, please include these!
diff --git a/.github/workflows/build_mkdocs.yaml b/.github/workflows/build_mkdocs.yaml
index c01e496..e41aadf 100644
--- a/.github/workflows/build_mkdocs.yaml
+++ b/.github/workflows/build_mkdocs.yaml
@@ -3,7 +3,7 @@ on:
   workflow_dispatch:
   push:
     paths:
-      - 'docs/**'
+      - "docs/**"
 jobs:
   deploy:
     runs-on: ubuntu-latest
@@ -14,4 +14,4 @@ jobs:
           python-version: 3.9
       - run: pip install --upgrade pip
       - run: pip install -r docs/requirements.txt
-      - run: mkdocs gh-deploy --force
\ No newline at end of file
+      - run: mkdocs gh-deploy --force
diff --git a/.github/workflows/lintr.yaml b/.github/workflows/lintr.yaml
index 77fe300..ac0b525 100644
--- a/.github/workflows/lintr.yaml
+++ b/.github/workflows/lintr.yaml
@@ -13,18 +13,18 @@ jobs:
   Lintr:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
-    - uses: docker://snakemake/snakemake:v7.19.1
-    - name: Lint Workflow
-      continue-on-error: true
-      run: |
-        docker run -v $PWD:/opt2 snakemake/snakemake:v7.19.1 /bin/bash -c \
-        "mkdir -p /opt2/output_carlisle/config /opt2/output_carlisle/annotation && \
-        cp -r /opt2/workflow/scripts/ /opt2/output_carlisle/ && \
-        cp /opt2/resources/cluster_biowulf.yaml /opt2/output_carlisle/config/cluster.yaml && \
-        cp /opt2/resources/tools_biowulf.yaml /opt2/output_carlisle/config/tools.yaml && \
-        cd /opt2/output_carlisle/annotation && \
-        touch hg38.fa genes.gtf hg38.bed hg38.tss.bed hg38_refseq.ucsc Ecoli_GCF_000005845.2_ASM584v2_genomic.fna adapters.fa && \
-        snakemake --lint -s /opt2/workflow/Snakefile \
-        -d /opt2/output_carlisle --configfile /opt2/.test/config_lint.yaml || \
-        echo 'There may have been a few warnings or errors. Please read through the log to determine if its harmless.'"
\ No newline at end of file
+      - uses: actions/checkout@v2
+      - uses: docker://snakemake/snakemake:v7.19.1
+      - name: Lint Workflow
+        continue-on-error: true
+        run: |
+          docker run -v $PWD:/opt2 snakemake/snakemake:v7.19.1 /bin/bash -c \
+          "mkdir -p /opt2/output_carlisle/config /opt2/output_carlisle/annotation && \
+          cp -r /opt2/workflow/scripts/ /opt2/output_carlisle/ && \
+          cp /opt2/resources/cluster_biowulf.yaml /opt2/output_carlisle/config/cluster.yaml && \
+          cp /opt2/resources/tools_biowulf.yaml /opt2/output_carlisle/config/tools.yaml && \
+          cd /opt2/output_carlisle/annotation && \
+          touch hg38.fa genes.gtf hg38.bed hg38.tss.bed hg38_refseq.ucsc Ecoli_GCF_000005845.2_ASM584v2_genomic.fna adapters.fa && \
+          snakemake --lint -s /opt2/workflow/Snakefile \
+          -d /opt2/output_carlisle --configfile /opt2/.test/config_lint.yaml || \
+          echo 'There may have been a few warnings or errors. Please read through the log to determine if its harmless.'"
diff --git a/.github/workflows/test_dev.yml b/.github/workflows/test_dev.yml
index 33a6a87..d04ba2b 100644
--- a/.github/workflows/test_dev.yml
+++ b/.github/workflows/test_dev.yml
@@ -13,20 +13,20 @@ jobs:
   deploy:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v4.1.0
-    - uses: docker://snakemake/snakemake:v7.19.1
-      with:
-        directory: '.test'
-    - name: dryrun
-      shell: bash {0}
-      run: |
-        bash ./install.sh ./test-workdir/carlisle-v9999.9999.9999-dev
-        ./test-workdir/carlisle-v9999.9999.9999-dev/bin/carlisle --runmode=init --workdir=./test-workdir/output_carlisle
-        cp ./test-workdir/carlisle-v9999.9999.9999-dev/bin/.test/config_lint.yaml ./test-workdir/output_carlisle/config/config.yaml
-        # TODO: use `carlisle run --mode dryrun`
-        docker run -v ./test-workdir/:/opt2 snakemake/snakemake:v7.19.1 /bin/bash -c \
-          "cd /opt2 && snakemake \
-            -s ./carlisle-v9999.9999.9999-dev/bin/workflow/Snakefile \
-            --dryrun \
-            --configfile carlisle-v9999.9999.9999-dev/bin/.test/config_lint.yaml \
-            --directory output_carlisle"
+      - uses: actions/checkout@v4.1.0
+      - uses: docker://snakemake/snakemake:v7.19.1
+        with:
+          directory: ".test"
+      - name: dryrun
+        shell: bash {0}
+        run: |
+          bash ./install.sh ./test-workdir/carlisle-v9999.9999.9999-dev
+          ./test-workdir/carlisle-v9999.9999.9999-dev/bin/carlisle --runmode=init --workdir=./test-workdir/output_carlisle
+          cp ./test-workdir/carlisle-v9999.9999.9999-dev/bin/.test/config_lint.yaml ./test-workdir/output_carlisle/config/config.yaml
+          # TODO: use `carlisle run --mode dryrun`
+          docker run -v ./test-workdir/:/opt2 snakemake/snakemake:v7.19.1 /bin/bash -c \
+            "cd /opt2 && snakemake \
+              -s ./carlisle-v9999.9999.9999-dev/bin/workflow/Snakefile \
+              --dryrun \
+              --configfile carlisle-v9999.9999.9999-dev/bin/.test/config_lint.yaml \
+              --directory output_carlisle"
diff --git a/.test/README.md b/.test/README.md
index 1dff24a..2714e30 100644
--- a/.test/README.md
+++ b/.test/README.md
@@ -1,6 +1,9 @@
 ################################################
+
 # CHANGELOG
+
 ### samples retired 2/6/24
+
 <!-- VS589.R1.fastq.gz,HN6_H3K4me3_2.R1.fastq.gz
 VS589.R2.fastq.gz,HN6_H3K4me3_2.R2.fastq.gz
 VS594.R1.fastq.gz,HN6_H4K20me3_1.R1.fastq.gz
@@ -10,15 +13,21 @@ VS595.R2.fastq.gz,HN6_H4K20me3_2.R2.fastq.gz
 VS597.R1.fastq.gz,53_H4K20m3_1.R1.fastq.gz
 VS597.R2.fastq.gz,53_H4K20m3_1.R2.fastq.gz
 VS598.R1.fastq.gz,53_H4K20m3_2.R1.fastq.gz -->
+
 ### 12/16/22
+
 New test data is added to pipeline
 
 ################################################
+
 # HG38
+
 # sample location
+
 /data/CCBR/projects/ccbr1155/CS031014/fastq
 
 # sample ids mapping; original to renamed
+
 VS586.R1.fastq.gz,HN6_IgG_rabbit_negative_control_1.R1.fastq.gz
 VS586.R2.fastq.gz,HN6_IgG_rabbit_negative_control_1.R2.fastq.gz
 VS591.R1.fastq.gz,53_H3K4me3_1.R1.fastq.gz
@@ -28,7 +37,8 @@ VS592.R2.fastq.gz,53_H3K4me3_2.R2.fastq.gz
 VS588.R1.fastq.gz,HN6_H3K4me3_1.R1.fastq.gz
 VS588.R2.fastq.gz,HN6_H3K4me3_1.R2.fastq.gz
 
-# subset each of the samples 
+# subset each of the samples
+
 fq_original_loc="/data/CCBR/projects/ccbr1155/CS031014/fastq";\
 test_loc="/data/CCBR_Pipeliner/Pipelines/CARLISLE_dev/.test";\
 fq_list=("HN6_IgG_rabbit_negative_control_1.R1.fastq.gz" "HN6_IgG_rabbit_negative_control_1.R2.fastq.gz" "HN6_H3K4me3_1.R1.fastq.gz" "HN6_H3K4me3_1.R2.fastq.gz" "HN6_H3K4me3_2.R1.fastq.gz" "HN6_H3K4me3_2.R2.fastq.gz" "53_H3K4me3_1.R1.fastq.gz" "53_H3K4me3_1.R2.fastq.gz" "53_H3K4me3_2.R1.fastq.gz" "53_H3K4me3_2.R2.fastq.gz" "HN6_H4K20me3_1.R1.fastq.gz" "HN6_H4K20me3_1.R2.fastq.gz" "HN6_H4K20me3_2.R1.fastq.gz" "HN6_H4K20me3_2.R2.fastq.gz" "53_H4K20m3_1.R1.fastq.gz" "53_H4K20m3_1.R2.fastq.gz" "53_H4K20m3_2.R1.fastq.gz" "53_H4K20m3_2.R2.fastq.gz");\
diff --git a/.test/config_lint.yaml b/.test/config_lint.yaml
index 5f7c9e5..dbe1c13 100644
--- a/.test/config_lint.yaml
+++ b/.test/config_lint.yaml
@@ -12,20 +12,20 @@ samplemanifest: "/opt2/.test/samples.test_lintr.tsv"
 #####################################################################################
 # run sample contrasts
 run_contrasts: true
-contrasts: "/opt2/.test/contrasts.test.tsv"  # run_contrasts needs to be "Y"
+contrasts: "/opt2/.test/contrasts.test.tsv" # run_contrasts needs to be "Y"
 contrasts_fdr_cutoff: 0.05
 contrasts_lfc_cutoff: 0.59 # FC of 1.5
 run_go_enrichment: true
 run_rose: true
 
 # reference
-genome: "hg38"    # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below.
+genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below.
 
 # alignment quality threshold
 mapping_quality: 2 #only report alignment records with mapping quality of at least N (>= N).
 
 # normalization method
-## spikein: normalization will be performed based off of spike-in aligned read count; 
+## spikein: normalization will be performed based off of spike-in aligned read count;
 ## library: library normalization will be performed
 ## none: no norm will be performed
 norm_method: "spikein" # method of normalization to be used; currently supports ["spikein","library","none"]
@@ -100,7 +100,7 @@ geneset_id: "GOBP" # ["GOBP" "GOCC" "GOMF" "KEGG"]
 
 #####################################################################################
 # References
-# NOTE: "gtf" is only required if TxDb is not avaiable for the species in 
+# NOTE: "gtf" is only required if TxDb is not avaiable for the species in
 # Bioconductor eg. hs1
 #####################################################################################
 # references:
@@ -152,4 +152,4 @@ adapters: "/opt2/output_carlisle/annotation/adapters.fa"
 # R Packages
 #####################################################################################
 Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_carlisle/"
-Rpkg_config: "/opt2/output_carlisle/config//Rpack.config"
\ No newline at end of file
+Rpkg_config: "/opt2/output_carlisle/config//Rpack.config"
diff --git a/.test/contrasts.test.tsv b/.test/contrasts.test.tsv
index c69fab0..2a4fb72 100644
--- a/.test/contrasts.test.tsv
+++ b/.test/contrasts.test.tsv
@@ -1,2 +1,2 @@
 condition1	condition2
-53_H3K4me3	HN6_H3K4me3
\ No newline at end of file
+53_H3K4me3	HN6_H3K4me3
diff --git a/.test/samples.test.tsv b/.test/samples.test.tsv
index 2abf39f..71acf19 100644
--- a/.test/samples.test.tsv
+++ b/.test/samples.test.tsv
@@ -2,4 +2,4 @@ sampleName	replicateNumber	isControl	controlName	controlReplicateNumber	path_to_
 53_H3K4me3	1	N	HN6_IgG_rabbit_negative_control	1	PIPELINE_HOME/.test/53_H3K4me3_1.R1.fastq.gz	PIPELINE_HOME/.test/53_H3K4me3_1.R2.fastq.gz
 53_H3K4me3	2	N	HN6_IgG_rabbit_negative_control	1	PIPELINE_HOME/.test/53_H3K4me3_2.R1.fastq.gz	PIPELINE_HOME/.test/53_H3K4me3_2.R2.fastq.gz
 HN6_H3K4me3	1	N	HN6_IgG_rabbit_negative_control	1	PIPELINE_HOME/.test/HN6_H3K4me3_1.R1.fastq.gz	PIPELINE_HOME/.test/HN6_H3K4me3_1.R2.fastq.gz
-HN6_IgG_rabbit_negative_control	1	Y	-	-	PIPELINE_HOME/.test/HN6_IgG_rabbit_negative_control_1.R1.fastq.gz	PIPELINE_HOME/.test/HN6_IgG_rabbit_negative_control_1.R2.fastq.gz
\ No newline at end of file
+HN6_IgG_rabbit_negative_control	1	Y	-	-	PIPELINE_HOME/.test/HN6_IgG_rabbit_negative_control_1.R1.fastq.gz	PIPELINE_HOME/.test/HN6_IgG_rabbit_negative_control_1.R2.fastq.gz
diff --git a/.test/samples.test_lintr.tsv b/.test/samples.test_lintr.tsv
index a39af0b..e01483f 100644
--- a/.test/samples.test_lintr.tsv
+++ b/.test/samples.test_lintr.tsv
@@ -2,4 +2,4 @@ sampleName	replicateNumber	isControl	controlName	controlReplicateNumber	path_to_
 53_H3K4me3	1	N	HN6_IgG_rabbit_negative_control	1	/opt2/.test/53_H3K4me3_1.R1.fastq.gz	/opt2/.test/53_H3K4me3_1.R2.fastq.gz
 53_H3K4me3	2	N	HN6_IgG_rabbit_negative_control	1	/opt2/.test/53_H3K4me3_2.R1.fastq.gz	/opt2/.test/53_H3K4me3_2.R2.fastq.gz
 HN6_H3K4me3	1	N	HN6_IgG_rabbit_negative_control	1	/opt2/.test/HN6_H3K4me3_1.R1.fastq.gz	/opt2/.test/HN6_H3K4me3_1.R2.fastq.gz
-HN6_IgG_rabbit_negative_control	1	Y	-	-	/opt2/.test/HN6_IgG_rabbit_negative_control_1.R1.fastq.gz	/opt2/.test/HN6_IgG_rabbit_negative_control_1.R2.fastq.gz
\ No newline at end of file
+HN6_IgG_rabbit_negative_control	1	Y	-	-	/opt2/.test/HN6_IgG_rabbit_negative_control_1.R1.fastq.gz	/opt2/.test/HN6_IgG_rabbit_negative_control_1.R2.fastq.gz
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8f7623b..e23fcba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,25 +1,32 @@
 ## CARLISLE development version
 
-- Bug fixes: (#127, @epehrsson)
-    - Removes single-sample group check for DESeq.
-    - Increases memory for DESeq.
-    - Ensures control replicate number is an integer.
-    - Fixes FDR cutoff misassigned to log2FC cutoff.
-    - Fixes `no_dedup` variable names in library normalization scripts.
-    - Fig bug that added nonexistent directories to the singularity bind paths. (#135, @kelly-sovacool)
-- New features:
-    - Containerize rules that require R (`deseq`, `go_enrichment`, and `spikein_assessment`) to fix installation issues with common R library path. (#129, @kelly-sovacool)
-        - The `Rlib_dir` and `Rpkg_config` config options have been removed as they are no longer needed.
-    - New visualizations: (#132, @epehrsson)
-        - New rules `cov_correlation`, `homer_enrich`, `combine_homer`, `count_peaks`
-        - Add peak caller to MACS2 peak xls filename
-    - New parameters in the config file to make certain rules optional: (#133, @kelly-sovacool)
-        - GO enrichment is controlled by `run_go_enrichment` (default: `false`)
-        - ROSE is controlled by `run_rose` (default: `false`)
-- Misc:
-    - The singularity version is no longer specified, per request of the biowulf admins. (#139, @kelly-sovacool)
+### Bug fixes
+
+- Bug fixes for DESeq (#127, @epehrsson)
+  - Removes single-sample group check for DESeq.
+  - Increases memory for DESeq.
+  - Ensures control replicate number is an integer.
+  - Fixes FDR cutoff misassigned to log2FC cutoff.
+  - Fixes `no_dedup` variable names in library normalization scripts.
+- Fig bug that added nonexistent directories to the singularity bind paths. (#135, @kelly-sovacool)
+  - Containerize rules that require R (`deseq`, `go_enrichment`, and `spikein_assessment`) to fix installation issues with common R library path. (#129, @kelly-sovacool)
+    - The `Rlib_dir` and `Rpkg_config` config options have been removed as they are no longer needed.
+
+### New features
+
+- New visualizations: (#132, @epehrsson)
+  - New rules `cov_correlation`, `homer_enrich`, `combine_homer`, `count_peaks`
+  - Add peak caller to MACS2 peak xls filename
+- New parameters in the config file to make certain rules optional: (#133, @kelly-sovacool)
+  - GO enrichment is controlled by `run_go_enrichment` (default: `false`)
+  - ROSE is controlled by `run_rose` (default: `false`)
+
+### Misc
+
+- The singularity version is no longer specified, per request of the biowulf admins. (#139, @kelly-sovacool)
 
 ## CARLISLE v2.5.0
+
 - Refactors R packages to a common source location (#118, @slsevilla)
 - Adds a --force flag to allow for re-initialization of a workdir (#97, @slsevilla)
 - Fixes error with testrun in DESEQ2 (#113, @slsevilla)
@@ -30,7 +37,8 @@
 - Minor documentation improvements. (#100, @kelly-sovacool)
 - Fix: allow printing the version or help message even if singularity is not in the path. (#110, @kelly-sovacool)
 
-## CARLISLE v2.4.1 
+## CARLISLE v2.4.1
+
 - Add GitHub Action to add issues/PRs to personal project boards by @kelly-sovacool in #95
 - Create install script by @kelly-sovacool in #93
 - feat: use summits bed for homer input; save temporary files; fix deseq2 bug by @slsevilla in #108
@@ -38,12 +46,14 @@
 - Test a dryrun with GitHub Actions by @kelly-sovacool in #94
 
 ## CARLISLE v2.4.0
+
 - Feature- Merged Yue's fork, adding DEEPTOOLS by @slsevilla in #85
 - Feature- Added tracking features from SPOOK by @slsevilla in #88
 - Feature - Dev test run completed by @slsevilla in #89
 - Bug - Fixed bugs related to Biowulf transition
 
 ## CARLISLE v2.1.0
+
 - enhancement
 - update gopeaks resources
 - change SEACR to run "norm" without spikein controls, "non" with spikein controls
@@ -51,10 +61,12 @@
 - fix GoEnrich bug for failed plots
 
 ## CARLISLE v2.0.1
+
 - fix error when contrasts set to "N"
 - adjust goenrich resources to be more efficient
 
 ## CARLISLE 2.0.0
+
 - Add a MAPQ filter to samtools (rule align)
 - Add GoPeaks MultiQC module
 - Allow for library normalization to occur during first pass
@@ -71,16 +83,20 @@
 - moved ~12% of all jobs to local deployment (within SLURM submission)
 
 ## CARLISLE 1.2.0
+
 - merge increases to resources; update workflow img, contributions
 
 ## CARLISLE 1.1.1
+
 - patch for gz bigbed bug
 
 ## CARLISLE 1.1.0
+
 - add broad-cutoff to macs2 broad peaks param settings
 - add non.stringent and non.relaxed to annotation options
 - merge DESEQ and DESEQ2 rules together
 - identify some files as temp
 
 ## CARLISLE 1.0.1
+
 - contains patch for DESEQ error with non hs1 reference samples
diff --git a/annotation/README.md b/annotation/README.md
index 09240c1..a73fec3 100755
--- a/annotation/README.md
+++ b/annotation/README.md
@@ -1,9 +1,11 @@
 ## mm10 download
+
 https://bitbucket.org/young_computation/rose/src/master/annotation/mm10_refseq.ucsc
 
 `create_hs1_ucsc.sh` script was used to create the hs1 ucsc file.
 
 #### hg19_refseq.ucsc and hg38_refseq.ucsc source
+
 /data/khanlab3/gb_omics/processed_DATA/YueHu/cutrun_120822# hg19_refseq.ucsc and hg38_refseq.ucsc source
 
 # info on these resources is outlined in: https://github.com/CCBR/CARLISLE/issues/27
diff --git a/annotation/create_hs1_uscs.sh b/annotation/create_hs1_uscs.sh
index d4eb35e..52db593 100644
--- a/annotation/create_hs1_uscs.sh
+++ b/annotation/create_hs1_uscs.sh
@@ -3,4 +3,4 @@
 module load ucsc
 gtfToGenePred -genePredExt -ignoreGroupsWithoutExons /data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf hs1.genes.genePred
 head -n1 hg19_refseq.ucsc > hs1.ucsc
-awk -F"\t" -v OFS="\t" '{print $NF,$_}' hs1.genes.genePred >> hs1.ucsc
\ No newline at end of file
+awk -F"\t" -v OFS="\t" '{print $NF,$_}' hs1.genes.genePred >> hs1.ucsc
diff --git a/bin/carlisle b/bin/carlisle
index 1ac07e9..22194d6 100755
--- a/bin/carlisle
+++ b/bin/carlisle
@@ -5,4 +5,4 @@ SCRIPTDIRNAME=$(readlink -f $(dirname "$SCRIPTNAME"))
 TOOLDIR=$(dirname "$SCRIPTDIRNAME")
 TOOLNAME=$(basename "$SCRIPTNAME")
 
-${TOOLDIR}/carlisle "$@" || true
\ No newline at end of file
+${TOOLDIR}/carlisle "$@" || true
diff --git a/carlisle b/carlisle
index eb12d8d..367c2ea 100755
--- a/carlisle
+++ b/carlisle
@@ -46,7 +46,7 @@ function get_version (){
 echo "Version: $(get_version_from_path $PIPELINE_HOME)"
 }
 
-function usage() { 
+function usage() {
 # This function prints generic usage of the wrapper script.
   echo "${SCRIPTBASENAME}
   --> run CARLISLE
@@ -64,13 +64,13 @@ function usage() {
       *) runlocal : run without submitting to sbatch
       *) runtest: run on cluster with included hg38 test dataset
   2.  WORKDIR: [Type: String]: Absolute or relative path to the output folder with write permissions.
-  
-  Optional Arguments: 
+
+  Optional Arguments:
       *) -f / --force: Force flag will re-initialize a previously initialized workdir
   "
 }
 
-function err() { 
+function err() {
 # This function print a error message (argument 1), the usage and then exits with non-zero status
   cat <<< "
   #
@@ -80,7 +80,7 @@ function err() {
   #
   #
   #
-  " && usage && exit 1 1>&2; 
+  " && usage && exit 1 1>&2;
 }
 
 function init() {
@@ -89,7 +89,7 @@ function init() {
 # 2. copying essential files like config.yaml and samples.tsv into the workdir
 # 3. setting up logs and stats folders
   # create output folder
-  if [[ -d $WORKDIR ]] && [[ -z $FORCEFLAG ]]; then 
+  if [[ -d $WORKDIR ]] && [[ -z $FORCEFLAG ]]; then
     err "Folder $WORKDIR already exists! If you'd like to re-initialize use the -f/--force flag $FORCEFLAG|"
   fi
   mkdir -p $WORKDIR
@@ -116,7 +116,7 @@ function init() {
   if [ ! -d $WORKDIR/stats ];then mkdir -p $WORKDIR/stats;echo "Stats Dir: $WORKDIR/stats";fi
 
   #create links for rose file
-  if [[ ! -d $WORKDIR/annotation ]]; then 
+  if [[ ! -d $WORKDIR/annotation ]]; then
     mkdir -p $WORKDIR/annotation
     cp ${PIPELINE_HOME}/annotation/* $WORKDIR/annotation/
   fi
@@ -165,9 +165,9 @@ function controlcheck(){
   control_list=`awk '$3 ~ /Y/' ${WORKDIR}/config/samples.tsv | awk '{print $1}'`
   check1=`awk '{print $1}' ${WORKDIR}/config/contrasts.tsv`
   check2=`awk '{print $2}' ${WORKDIR}/config/contrasts.tsv`
-  
+
   for sample_id in ${control_list[@]}; do
-    if [[ $check1 == $sample_id || $check2 == $sample_id ]]; then 
+    if [[ $check1 == $sample_id || $check2 == $sample_id ]]; then
       echo "Controls ($sample_id) cannot be listed in contrast.csv - update and re-run"
       echo "$check1 okkk $check2"
       exit 0
@@ -179,7 +179,7 @@ function dryrun() {
 # Dry-run
   runcheck
   controlcheck
-  
+
   if [ ! -d ${WORKDIR}/logs/dryrun/ ]; then mkdir ${WORKDIR}/logs/dryrun/; fi
 
   if [ -f ${WORKDIR}/dryrun.log ]; then
@@ -192,7 +192,7 @@ function dryrun() {
 function unlock() {
 # Unlock the workdir if previous snakemake run ended abruptly
   runcheck
-  run "--unlock"  
+  run "--unlock"
 }
 
 function dag() {
@@ -234,18 +234,18 @@ function preruncleanup() {
   echo "Running..."
 
   # check initialization
-  check_essential_files 
+  check_essential_files
 
   cd $WORKDIR
   ## Archive previous run files
-  if [ -f ${WORKDIR}/logs/snakemake.log ];then 
+  if [ -f ${WORKDIR}/logs/snakemake.log ];then
     modtime=$(stat ${WORKDIR}/logs/snakemake.log |grep Modify|awk '{print $2,$3}'|awk -F"." '{print $1}'|sed "s/ //g"|sed "s/-//g"|sed "s/://g")
     mv ${WORKDIR}/logs/snakemake.log ${WORKDIR}/stats/snakemake.${modtime}.log
 
-    if [ -f ${WORKDIR}/logs/snakemake.log.HPC_summary.txt ];then 
+    if [ -f ${WORKDIR}/logs/snakemake.log.HPC_summary.txt ];then
       mv ${WORKDIR}/logs/snakemake.log.HPC_summary.txt ${WORKDIR}/stats/snakemake.${modtime}.log.HPC_summary.txt
     fi
-    if [ -f ${WORKDIR}/logs/snakemake.stats ];then 
+    if [ -f ${WORKDIR}/logs/snakemake.stats ];then
       mv ${WORKDIR}/logs/snakemake.stats ${WORKDIR}/stats/snakemake.${modtime}.stats
     fi
   fi
@@ -284,11 +284,11 @@ function run() {
       snakemake -s $SNAKEFILE \
       --report ${WORKDIR}/logs/runlocal_snakemake_report.html \
       --directory $WORKDIR \
-      --configfile ${WORKDIR}/config/config.yaml 
+      --configfile ${WORKDIR}/config/config.yaml
     fi
 
   elif [ "$1" == "slurm" ];then
-  
+
     preruncleanup
 
     cat > ${WORKDIR}/submit_script.sbatch << EOF
@@ -326,7 +326,7 @@ function run() {
       snakemake -s $SNAKEFILE \
       --directory $WORKDIR \
       --report ${WORKDIR}/logs/runslurm_snakemake_report.html \
-      --configfile ${WORKDIR}/config/config.yaml 
+      --configfile ${WORKDIR}/config/config.yaml
     fi
 
     bash <(curl https://raw.githubusercontent.com/CCBR/Tools/master/Biowulf/gather_cluster_stats.sh 2>/dev/null) ${WORKDIR}/logs/snakemake.log > ${WORKDIR}/logs/snakemake.log.HPC_summary.txt
@@ -334,7 +334,7 @@ EOF
 
     sbatch ${WORKDIR}/submit_script.sbatch
 
-  else # for unlock and dryrun 
+  else # for unlock and dryrun
     snakemake $1 -s $SNAKEFILE \
     --directory $WORKDIR \
     --use-envmodules \
@@ -356,7 +356,7 @@ function reset() {
   echo "Working Dir: $WORKDIR"
 
   if [ ! -d $WORKDIR ];then err "Folder $WORKDIR does not exist!";fi
-  
+
   echo "Deleting $WORKDIR"
   rm -rf $WORKDIR
   echo "Re-Initializing $WORKDIR"
diff --git a/config/config.yaml b/config/config.yaml
index f2c5bee..f6407d0 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -1,167 +1,167 @@
-#####################################################################################
-# Folders / Paths
-#####################################################################################
-# The working dir... output will be in the results subfolder of the workdir
-workdir: "WORKDIR"
-
-# scripts directory
-# by default, use the scripts copied to the working directory.
-# alternatively, use the scripts from the pipeline source.
-scriptsdir: "WORKDIR/scripts"
-#scriptsdir: "PIPELINE_HOME/workflow/scripts"
-
-# tab delimited samples file .. see samplefile for format details
-samplemanifest: "WORKDIR/config/samples.tsv"
-
-#####################################################################################
-# User parameters
-#####################################################################################
-# run sample contrasts
-run_contrasts: true # true or false, no quotes
-contrasts: "WORKDIR/config/contrasts.tsv"  # run_contrasts needs to be `true`
-contrasts_fdr_cutoff: 0.05
-contrasts_lfc_cutoff: 0.59 # FC of 1.5
-
-# these steps are long-running. use `true` if you would like to run them
-run_go_enrichment: false 
-run_rose: false
-
-# reference
-genome: "hg38"    # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below.
-
-# alignment quality threshold
-mapping_quality: 2 #only report alignment records with mapping quality of at least N (>= N).
-
-# normalization method
-## spikein: normalization will be performed based off of spike-in aligned read count; 
-## library: library normalization will be performed
-## none: no norm will be performed
-norm_method: "spikein" # method of normalization to be used; currently supports ["spikein","library","none"]
-## if norm_method ="spikein"
-spikein_genome: "ecoli" # must be species found in spikein_reference below
-spikein_scale: 1000000
-
-# user parameters for alignment
-bowtie2_parameters: "--dovetail --phred33  --very-sensitive"
-fragment_len_filter: "1000"
-
-# duplication status
-## users can select duplicated peaks (dedup) or non-deduplicated peaks (no_dedup)
-### dupstatus: "dedup" # means run deduplicated analysis only
-### dupstatus: "no_dedup" # means run non-deduplicated analysis only
-## complete list:
-### dupstatus: "dedup, no_dedup"
-dupstatus: "dedup, no_dedup"
-
-# which peaktypes to consider for differential analysis:
-# | Peak Caller | Narrow              | Broad             | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed |
-# | Macs2       | AVAILABLE           | AVAILABLE         | NA                    | NA                  | NA                        | NA                      |
-## macs2 options: macs2_narrow, macs2_broad
-### NOTE: DESeq step generally fails for broadPeak; generally has too many calls.
-
-# | Peak Caller | Narrow              | Broad             | Normalized, Stringent | Normalized, Relaxed   | Non-Normalized, Stringent| Non-Normalized, Relaxed |
-# | SEACR       | NA                  | NA                | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN     | AVAILABLE w/ SPIKEIN    |
-## seacr options: seacr_stringent, seacr_relaxed
-
-# | Peak Caller | Narrow              | Broad             | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed |
-# | GoPeaks     | AVAILABLE           | AVAILABLE         | NA                    | NA                  | NA                        | NA                      |
-## gopeaks options: gopeaks_narrow, gopeaks_broad
-
-# | Peak Caller | Narrow              | Broad             | Normalized, Stringent | Normalized, Relaxed   | Non-Normalized, Stringent | Non-Normalized, Relaxed |
-# | Macs2       | AVAILABLE           | AVAILABLE         | NA                    | NA                    | NA                        | NA                      |
-# | SEACR       | NA                  | NA                | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN      | AVAILABLE w/ SPIKEIN    |
-# | GoPeaks     | AVAILABLE           | AVAILABLE         | NA                    | NA                    | NA                        | NA                      |
-## complete list:
-### peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad"
-peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad"
-
-## macs2 additional option
-### macs2 can be run with or without the control. adding a control will increase peak specificity
-### default is "N"; selecting "Y" will run the paired control sample provided in the sample manifest
-macs2_control: "N"
-
-# qvalues
-## thresholds to be used for peak callers
-## must be a list of comma separated values. minimum of numeric value required.
-### default MACS2 qvalue is 0.05 https://manpages.ubuntu.com/manpages/xenial/man1/macs2_callpeak.1.html
-### default GOPEAKS qvalue is 0.05 https://github.com/maxsonBraunLab/gopeaks/blob/main/README.md; [link to issue]
-### default SEACR FDR threshold 1 https://github.com/FredHutch/SEACR/blob/master/README.md
-# quality_thresholds: "0.1, 0.05"
-quality_thresholds: "0.05"
-
-## MACS2, broad-peaks specific, quality threshold
-### if broadPeak is seleted as a 'peaktype', an additional quality threshold can be used
-macs2_broad_peak_threshold: "0.01"
-
-# annotations
-## rose parameters
-stitch_distance: 12500
-tss_distance: 2500
-
-## homer
-motif_size: 1000
-preparsedDir: "/data/CCBR_Pipeliner/db/PipeDB/homer/preparsedDir"
-
-## GO Enrichment
-## enrichment analysis can be performed on hg19 or hg38 samples
-## one option may be chosen for each project
-geneset_id: "GOBP" # ["GOBP" "GOCC" "GOMF" "KEGG"]
-
-#####################################################################################
-# References
-# NOTE: "gtf" is only required if TxDb is not avaiable for the species in 
-# Bioconductor eg. hs1
-#####################################################################################
-# references:
-reference:
-  hg38:
-    fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/hg38.fa"
-    gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/genes.gtf"
-    blacklist: "PIPELINE_HOME/resources/blacklistbed/hg38.bed"
-    regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY"
-    macs2_g: "hs"
-    tss_bed: "PIPELINE_HOME/resources/tss_bed/hg38.tss.bed"
-    rose: "WORKDIR/annotation/hg38_refseq.ucsc"
-  hg19:
-    fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/hg19.fa"
-    gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/genes.gtf"
-    blacklist: "PIPELINE_HOME/resources/blacklistbed/hg19.bed"
-    regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY"
-    macs2_g: "hs"
-    tss_bed: "PIPELINE_HOME/resources/tss_bed/hg19.tss.bed"
-    rose: "WORKDIR/annotation/hg19_refseq.ucsc"
-  mm10:
-    fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/mm10.fa"
-    gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/genes.gtf"
-    blacklist: "PIPELINE_HOME/resources/blacklistbed/mm10.bed"
-    regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY"
-    macs2_g: "mm"
-    tss_bed: "PIPELINE_HOME/resources/tss_bed/mm10.tss.bed"
-    rose: "WORKDIR/annotation/mm10_refseq.ucsc"
-  hs1:
-    fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/hs1.fa"
-    gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf"
-    blacklist: "PIPELINE_HOME/resources/blacklistbed/hs1.bed"
-    tss_bed: "PIPELINE_HOME/resources/tss_bed/hs1.tss.bed"
-    regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY"
-    macs2_g: "3.1e+8"
-    rose: "WORKDIR/annotation/hs1_refseq.ucsc"
-# ref: https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html
-# used faCount from http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/ to get 3.1e+8 value above
-
-spikein_reference:
-  ecoli:
-    fa: "PIPELINE_HOME/resources/spikein/Ecoli_GCF_000005845.2_ASM584v2_genomic.fna"
-  drosophila:
-    fa: "/fdb/igenomes/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa"
-  saccharomyces:
-    fa: "PIPELINE_HOME/resources/spikein/S_cer_S288C_R64.fna"
-
-adapters: "PIPELINE_HOME/resources/other/adapters.fa"
-
-#####################################################################################
-# CONTAINERS
-#####################################################################################
-containers:
-  base: "docker://nciccbr/ccbr_ubuntu_base_20.04:v6"
-  carlisle_r: "docker://nciccbr/carlisle_r:v2"
\ No newline at end of file
+#####################################################################################
+# Folders / Paths
+#####################################################################################
+# The working dir... output will be in the results subfolder of the workdir
+workdir: "WORKDIR"
+
+# scripts directory
+# by default, use the scripts copied to the working directory.
+# alternatively, use the scripts from the pipeline source.
+scriptsdir: "WORKDIR/scripts"
+#scriptsdir: "PIPELINE_HOME/workflow/scripts"
+
+# tab delimited samples file .. see samplefile for format details
+samplemanifest: "WORKDIR/config/samples.tsv"
+
+#####################################################################################
+# User parameters
+#####################################################################################
+# run sample contrasts
+run_contrasts: true # true or false, no quotes
+contrasts: "WORKDIR/config/contrasts.tsv" # run_contrasts needs to be `true`
+contrasts_fdr_cutoff: 0.05
+contrasts_lfc_cutoff: 0.59 # FC of 1.5
+
+# these steps are long-running. use `true` if you would like to run them
+run_go_enrichment: false
+run_rose: false
+
+# reference
+genome: "hg38" # currently supports hg38, hg19 and mm10. Custom genome can be added with appropriate additions to "reference" section below.
+
+# alignment quality threshold
+mapping_quality: 2 #only report alignment records with mapping quality of at least N (>= N).
+
+# normalization method
+## spikein: normalization will be performed based off of spike-in aligned read count;
+## library: library normalization will be performed
+## none: no norm will be performed
+norm_method: "spikein" # method of normalization to be used; currently supports ["spikein","library","none"]
+## if norm_method ="spikein"
+spikein_genome: "ecoli" # must be species found in spikein_reference below
+spikein_scale: 1000000
+
+# user parameters for alignment
+bowtie2_parameters: "--dovetail --phred33  --very-sensitive"
+fragment_len_filter: "1000"
+
+# duplication status
+## users can select duplicated peaks (dedup) or non-deduplicated peaks (no_dedup)
+### dupstatus: "dedup" # means run deduplicated analysis only
+### dupstatus: "no_dedup" # means run non-deduplicated analysis only
+## complete list:
+### dupstatus: "dedup, no_dedup"
+dupstatus: "dedup, no_dedup"
+
+# which peaktypes to consider for differential analysis:
+# | Peak Caller | Narrow              | Broad             | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed |
+# | Macs2       | AVAILABLE           | AVAILABLE         | NA                    | NA                  | NA                        | NA                      |
+## macs2 options: macs2_narrow, macs2_broad
+### NOTE: DESeq step generally fails for broadPeak; generally has too many calls.
+
+# | Peak Caller | Narrow              | Broad             | Normalized, Stringent | Normalized, Relaxed   | Non-Normalized, Stringent| Non-Normalized, Relaxed |
+# | SEACR       | NA                  | NA                | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN     | AVAILABLE w/ SPIKEIN    |
+## seacr options: seacr_stringent, seacr_relaxed
+
+# | Peak Caller | Narrow              | Broad             | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed |
+# | GoPeaks     | AVAILABLE           | AVAILABLE         | NA                    | NA                  | NA                        | NA                      |
+## gopeaks options: gopeaks_narrow, gopeaks_broad
+
+# | Peak Caller | Narrow              | Broad             | Normalized, Stringent | Normalized, Relaxed   | Non-Normalized, Stringent | Non-Normalized, Relaxed |
+# | Macs2       | AVAILABLE           | AVAILABLE         | NA                    | NA                    | NA                        | NA                      |
+# | SEACR       | NA                  | NA                | AVAILABLE w/o SPIKEIN | AVAILABLE w/o SPIKEIN | AVAILABLE w/ SPIKEIN      | AVAILABLE w/ SPIKEIN    |
+# | GoPeaks     | AVAILABLE           | AVAILABLE         | NA                    | NA                    | NA                        | NA                      |
+## complete list:
+### peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad"
+peaktype: "macs2_narrow, macs2_broad, seacr_stringent, seacr_relaxed, gopeaks_narrow, gopeaks_broad"
+
+## macs2 additional option
+### macs2 can be run with or without the control. adding a control will increase peak specificity
+### default is "N"; selecting "Y" will run the paired control sample provided in the sample manifest
+macs2_control: "N"
+
+# qvalues
+## thresholds to be used for peak callers
+## must be a list of comma separated values. minimum of numeric value required.
+### default MACS2 qvalue is 0.05 https://manpages.ubuntu.com/manpages/xenial/man1/macs2_callpeak.1.html
+### default GOPEAKS qvalue is 0.05 https://github.com/maxsonBraunLab/gopeaks/blob/main/README.md; [link to issue]
+### default SEACR FDR threshold 1 https://github.com/FredHutch/SEACR/blob/master/README.md
+# quality_thresholds: "0.1, 0.05"
+quality_thresholds: "0.05"
+
+## MACS2, broad-peaks specific, quality threshold
+### if broadPeak is seleted as a 'peaktype', an additional quality threshold can be used
+macs2_broad_peak_threshold: "0.01"
+
+# annotations
+## rose parameters
+stitch_distance: 12500
+tss_distance: 2500
+
+## homer
+motif_size: 1000
+preparsedDir: "/data/CCBR_Pipeliner/db/PipeDB/homer/preparsedDir"
+
+## GO Enrichment
+## enrichment analysis can be performed on hg19 or hg38 samples
+## one option may be chosen for each project
+geneset_id: "GOBP" # ["GOBP" "GOCC" "GOMF" "KEGG"]
+
+#####################################################################################
+# References
+# NOTE: "gtf" is only required if TxDb is not avaiable for the species in
+# Bioconductor eg. hs1
+#####################################################################################
+# references:
+reference:
+  hg38:
+    fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/hg38.fa"
+    gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/genes.gtf"
+    blacklist: "PIPELINE_HOME/resources/blacklistbed/hg38.bed"
+    regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY"
+    macs2_g: "hs"
+    tss_bed: "PIPELINE_HOME/resources/tss_bed/hg38.tss.bed"
+    rose: "WORKDIR/annotation/hg38_refseq.ucsc"
+  hg19:
+    fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/hg19.fa"
+    gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/genes.gtf"
+    blacklist: "PIPELINE_HOME/resources/blacklistbed/hg19.bed"
+    regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY"
+    macs2_g: "hs"
+    tss_bed: "PIPELINE_HOME/resources/tss_bed/hg19.tss.bed"
+    rose: "WORKDIR/annotation/hg19_refseq.ucsc"
+  mm10:
+    fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/mm10.fa"
+    gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/genes.gtf"
+    blacklist: "PIPELINE_HOME/resources/blacklistbed/mm10.bed"
+    regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chrX chrY"
+    macs2_g: "mm"
+    tss_bed: "PIPELINE_HOME/resources/tss_bed/mm10.tss.bed"
+    rose: "WORKDIR/annotation/mm10_refseq.ucsc"
+  hs1:
+    fa: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/hs1.fa"
+    gtf: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf"
+    blacklist: "PIPELINE_HOME/resources/blacklistbed/hs1.bed"
+    tss_bed: "PIPELINE_HOME/resources/tss_bed/hs1.tss.bed"
+    regions: "chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY"
+    macs2_g: "3.1e+8"
+    rose: "WORKDIR/annotation/hs1_refseq.ucsc"
+# ref: https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html
+# used faCount from http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/ to get 3.1e+8 value above
+
+spikein_reference:
+  ecoli:
+    fa: "PIPELINE_HOME/resources/spikein/Ecoli_GCF_000005845.2_ASM584v2_genomic.fna"
+  drosophila:
+    fa: "/fdb/igenomes/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa"
+  saccharomyces:
+    fa: "PIPELINE_HOME/resources/spikein/S_cer_S288C_R64.fna"
+
+adapters: "PIPELINE_HOME/resources/other/adapters.fa"
+
+#####################################################################################
+# CONTAINERS
+#####################################################################################
+containers:
+  base: "docker://nciccbr/ccbr_ubuntu_base_20.04:v6"
+  carlisle_r: "docker://nciccbr/carlisle_r:v2"
diff --git a/config/contrasts.tsv b/config/contrasts.tsv
index e9ba67e..3d93b0f 100644
--- a/config/contrasts.tsv
+++ b/config/contrasts.tsv
@@ -1,2 +1,2 @@
 condition1	condition2
-sample1	sample2
\ No newline at end of file
+sample1	sample2
diff --git a/config/fqscreen_config.conf b/config/fqscreen_config.conf
index 4101904..c4839c4 100644
--- a/config/fqscreen_config.conf
+++ b/config/fqscreen_config.conf
@@ -3,10 +3,10 @@
 ######################
 ## Bowtie or Bowtie2 #
 ######################
-## If the Bowtie1/2 binary is not in your PATH then you can 
+## If the Bowtie1/2 binary is not in your PATH then you can
 ## set this value to tell the program where to find it.
 ## Uncomment the line below and set the appropriate location.
-## Please note, this path should include the executable 
+## Please note, this path should include the executable
 ## filename.
 
 #BOWTIE	/usr/local/bin/bowtie/bowtie
@@ -16,10 +16,10 @@
 ###########################################
 ## Bismark (for bisulfite sequencing only)#
 ###########################################
-## If the Bismark binary is not in your PATH then you can 
+## If the Bismark binary is not in your PATH then you can
 ## set this value to tell the program where to find it.
 ## Uncomment the line below and set the appropriate location.
-## Please note, this path should include the executable 
+## Please note, this path should include the executable
 ## filename.
 
 #BISMARK	/usr/local/bin/bismark/bismark
@@ -40,26 +40,26 @@ THREADS		24
 ## This section allows you to configure multiple databases
 ## to search against in your screen. For each database
 ## you need to provide a database name (which can't contain
-## spaces) and the location of the bowtie indices which 
+## spaces) and the location of the bowtie indices which
 ## you created for that database.
-## 
-## The entries shown below are only suggested examples, you 
+##
+## The entries shown below are only suggested examples, you
 ## can add as many DATABASE sections as required, and you
 ## can comment out or remove as many of the existing entries
 ## as desired.
 ##
 ## Either the original bowtie or bowtie2 may be used for the
-## mapping. Specify the aligner to use with the command line 
-## flag --aligner with arguments 'bowtie' or 
+## mapping. Specify the aligner to use with the command line
+## flag --aligner with arguments 'bowtie' or
 ## 'bowtie2' (default).
-## 
-## The configuration file may list paths to both bowtie and 
+##
+## The configuration file may list paths to both bowtie and
 ## bowtie2 indices. FastQ Screen automatically detects whether
-## a specified index is compatible with bowtie or bowtie2. 
+## a specified index is compatible with bowtie or bowtie2.
 ##
-## Although the configuration file may list paths to both 
-## bowtie and bowtie2 indices, only one aligner will be used 
-## for the mapping, as specified by the --aligner flag.  
+## Although the configuration file may list paths to both
+## bowtie and bowtie2 indices, only one aligner will be used
+## for the mapping, as specified by the --aligner flag.
 ##
 ## The path to the index files SHOULD INCLUDE THE BASENAME of
 ## the index, e.g:
@@ -72,8 +72,8 @@ THREADS		24
 ##
 ## Human - sequences available from
 ## ftp://ftp.ensembl.org/pub/current/fasta/homo_sapiens/dna/
-DATABASE     Human   /data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen_db/hg38/hg38                                         
-DATABASE     Mouse  /data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen_db/mm10/mm10                                                       
+DATABASE     Human   /data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen_db/hg38/hg38
+DATABASE     Mouse  /data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen_db/mm10/mm10
 DATABASE     Bacteria        /data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen_db/Bacteria/bacteria
 DATABASE     Fungi        /data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen_db/Fungi/fungi
-DATABASE     Virus  /data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen_db/Virus/virus
\ No newline at end of file
+DATABASE     Virus  /data/CCBR_Pipeliner/db/PipeDB/lib/fastq_screen_db/Virus/virus
diff --git a/config/multiqc_config.yaml b/config/multiqc_config.yaml
index adb6d41..292be60 100644
--- a/config/multiqc_config.yaml
+++ b/config/multiqc_config.yaml
@@ -1,10 +1,10 @@
 #Intro title information
 title: "CUTRUN Analysis Report"
 report_header_info:
-    - FASTQ Analysis: 'Analysis includes basic QC metrics'
-    - FASTQScreen Analysis: 'Analysis includes human, mouse, and bacteria'
-    - SAMStats Analysis: 'Analysis included flagstats and idxstats'
-    - GoPeaks: 'Analysis includes the number of peaks called per sample via the general table and the bar plot'
+  - FASTQ Analysis: "Analysis includes basic QC metrics"
+  - FASTQScreen Analysis: "Analysis includes human, mouse, and bacteria"
+  - SAMStats Analysis: "Analysis included flagstats and idxstats"
+  - GoPeaks: "Analysis includes the number of peaks called per sample via the general table and the bar plot"
 
 #include fastqscreen
 fastqscreen_simpleplot: true
@@ -14,14 +14,14 @@ show_analysis_paths: False
 
 #find samples
 sp:
-    fastq_screen:
-        fn: '*_screen.txt'
-    gopeaks:
-        fn: '*gopeaks.json'
+  fastq_screen:
+    fn: "*_screen.txt"
+  gopeaks:
+    fn: "*gopeaks.json"
 
 module_order:
-# Sample Quality control
-    - 'fastqc'
-    - 'fastq_screen'
-    - 'samtools'
-    - 'gopeaks'
\ No newline at end of file
+  # Sample Quality control
+  - "fastqc"
+  - "fastq_screen"
+  - "samtools"
+  - "gopeaks"
diff --git a/config/samples.tsv b/config/samples.tsv
index 56c314d..4cc4e66 100644
--- a/config/samples.tsv
+++ b/config/samples.tsv
@@ -4,4 +4,4 @@ sample1	2	N	igg1	2	<abs/path/to/R1.fastq.gz>	<abs/path/to/R2.fastq.gz>
 sample2	1	N	igg2	1	<abs/path/to/R1.fastq.gz>	<abs/path/to/R2.fastq.gz>
 igg1	1	Y	-	-	<abs/path/to/R1.fastq.gz>	<abs/path/to/R2.fastq.gz>
 igg1	2	Y	-	-	<abs/path/to/R1.fastq.gz>	<abs/path/to/R2.fastq.gz>
-igg2	1	Y	-	-	<abs/path/to/R1.fastq.gz>	<abs/path/to/R2.fastq.gz>
\ No newline at end of file
+igg2	1	Y	-	-	<abs/path/to/R1.fastq.gz>	<abs/path/to/R2.fastq.gz>
diff --git a/docs/requirements.txt b/docs/requirements.txt
index e63c63e..9d5ddac 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -7,4 +7,4 @@ mkdocs-git-revision-date-plugin==0.3.2
 #https://pypi.org/project/mkdocs-material/
 mkdocs-material==9.1.6
 #https://pypi.org/project/mkdocs-material-extensions/
-mkdocs-material-extensions==1.1.1
\ No newline at end of file
+mkdocs-material-extensions==1.1.1
diff --git a/docs/user-guide/contributions.md b/docs/user-guide/contributions.md
index a05202e..ed0b68f 100644
--- a/docs/user-guide/contributions.md
+++ b/docs/user-guide/contributions.md
@@ -1,4 +1,5 @@
 # Contributions
+
 The following members contributed to the development of the CARLISLE pipeline:
 
 - [Samantha Sevilla](https://github.com/slsevilla)
@@ -8,4 +9,4 @@ The following members contributed to the development of the CARLISLE pipeline:
 - [Yue Hu](https://github.com/YueHu-002)
 - Vassiliki Saloura
 
-VK, SS, SK, HC contributed to the generating the source code and all members contributed to the main concepts and analysis.
\ No newline at end of file
+VK, SS, SK, HC contributed to the generating the source code and all members contributed to the main concepts and analysis.
diff --git a/docs/user-guide/getting-started.md b/docs/user-guide/getting-started.md
index 1274913..329bda2 100644
--- a/docs/user-guide/getting-started.md
+++ b/docs/user-guide/getting-started.md
@@ -1,9 +1,11 @@
 # Overview
+
 The CARLISLE github repository is stored locally, and will be used for project deployment. Multiple projects can be deployed from this one point simultaneously, without concern.
 
 ## 1. Getting Started
 
 ## 1.1 Introduction
+
 The CARLISLE Pipelie beings with raw FASTQ files and performs trimming followed by alignment using [BOWTIE2](https://bowtie-bio.sourceforge.net/bowtie2/index.shtml). Data is then normalized through either the use of an user-species species (IE E.Coli) spike-in control or through the determined library size. Peaks are then called using [MACS2](https://hbctraining.github.io/Intro-to-ChIPseq/lessons/05_peak_calling_macs.html), [SEACR](https://github.com/FredHutch/SEACR), and [GoPEAKS](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-022-02707-w) with various options selected by the user. Peaks are then annotated, and summarized into reports. If designated, differential analysis is performed using [DESEQ2](https://bioconductor.org/packages/release/bioc/html/DESeq2.html). QC reports are also generated with each project using [FASTQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) and [MULTIQC](https://multiqc.info/). Annotations are added using [HOMER](http://homer.ucsd.edu/homer/ngs/annotation.html) and [ROSE](https://github.com/stjude/ROSE). GSEA Enrichment analysis predictions are added using [CHIPENRICH](https://bioconductor.org/packages/devel/bioc/vignettes/chipenrich/inst/doc/chipenrich-vignette.html).
 
 The following are sub-commands used within CARLISLE:
@@ -19,6 +21,7 @@ The following are sub-commands used within CARLISLE:
 - runtest: copies test manifests and files to WORKDIR
 
 ## 1.2 Setup Dependencies
+
 CARLISLE has several dependencies listed below. These dependencies can be installed by a sysadmin. All dependencies will be automatically loaded if running from Biowulf.
 
 - bedtools: "bedtools/2.30.0"
@@ -42,13 +45,16 @@ CARLISLE has several dependencies listed below. These dependencies can be instal
 - ucsc: "ucsc/407"
 
 ## 1.3 Login to the cluster
+
 CARLISLE has been exclusively tested on Biowulf HPC. Login to the cluster's head node and move into the pipeline location.
+
 ```
 # ssh into cluster's head node
 ssh -Y $USER@biowulf.nih.gov
 ```
 
-## 1.4 Load an interactive session 
+## 1.4 Load an interactive session
+
 An interactive session should be started before performing any of the pipeline sub-commands, even if the pipeline is to be executed on the cluster.
 
 ```
diff --git a/docs/user-guide/output.md b/docs/user-guide/output.md
index 0f8de72..7e48447 100644
--- a/docs/user-guide/output.md
+++ b/docs/user-guide/output.md
@@ -1,18 +1,19 @@
 # 4. Expected Outputs
+
 The following directories are created under the WORKDIR/results directory:
 
 - alignment_stats: this directory include information on the alignment of each sample
 - bam: this directory includes BAM files, statistics on samples, statistics on spike-in controls for each sample
 - bedgraph: this directory includes BEDGRAPH files and statistic summaries for each sample
-- bigwig: this directory includes the bigwig files for each sample 
+- bigwig: this directory includes the bigwig files for each sample
 - peaks: this directory contains a sub-directory that relates to the quality threshold used.
-    - quality threshold
-        - contrasts: this directory includes the contrasts for each line listed in the contrast manifest
-        - peak_caller: this directory includes all peak calls from each peak_caller (SEACR, MACS2, GOPEAKS) for each sample
-            - annotation
-                - go_enrichment: this directory includes gene set enrichment pathway predictions when `run_go_enrichment` is set to `true` in the config file.
-                - homer: this directory includes the annotation output from HOMER
-                - rose: this directory includes the annotation output from ROSE when `run_rose` is set to `true` in the config file.
+  - quality threshold
+    - contrasts: this directory includes the contrasts for each line listed in the contrast manifest
+    - peak_caller: this directory includes all peak calls from each peak_caller (SEACR, MACS2, GOPEAKS) for each sample
+      - annotation
+        - go_enrichment: this directory includes gene set enrichment pathway predictions when `run_go_enrichment` is set to `true` in the config file.
+        - homer: this directory includes the annotation output from HOMER
+        - rose: this directory includes the annotation output from ROSE when `run_rose` is set to `true` in the config file.
 - qc: this directory includes MULTIQC reports and spike-in control reports (when applicable)
 
 ```
@@ -117,4 +118,4 @@ The following directories are created under the WORKDIR/results directory:
 └── qc
     ├── fastqc_raw
     └── fqscreen_raw
-```
\ No newline at end of file
+```
diff --git a/docs/user-guide/preparing-files.md b/docs/user-guide/preparing-files.md
index 547738a..93be694 100644
--- a/docs/user-guide/preparing-files.md
+++ b/docs/user-guide/preparing-files.md
@@ -1,7 +1,9 @@
 # 2. Preparing Files
+
 The pipeline is controlled through editing configuration and manifest files. Defaults are found in the /WORKDIR/config and /WORKDIR/manifest directories, after initialization.
 
 ## 2.1 Configs
+
 The configuration files control parameters and software of the pipeline. These files are listed below:
 
 - config/config.yaml
@@ -9,26 +11,32 @@ The configuration files control parameters and software of the pipeline. These f
 - resources/tools.yaml
 
 ### 2.1.1 Cluster Config
+
 The cluster configuration file dictates the resouces to be used during submission to Biowulf HPC. There are two differnt ways to control these parameters - first, to control the default settings, and second, to create or edit individual rules. These parameters should be edited with caution, after significant testing.
 
 ### 2.1.2 Tools Config
+
 The tools configuration file dictates the version of each software or program that is being used in the pipeline.
 
 ### 2.1.3 Config YAML
+
 There are several groups of parameters that are editable for the user to control the various aspects of the pipeline. These are :
 
 - Folders and Paths
-    - These parameters will include the input and ouput files of the pipeline, as well as list all manifest names.
+  - These parameters will include the input and ouput files of the pipeline, as well as list all manifest names.
 - User parameters
-    - These parameters will control the pipeline features. These include thresholds and whether to perform processes.
+  - These parameters will control the pipeline features. These include thresholds and whether to perform processes.
 - References
-    - These parameters will control the location of index files, spike-in references, adaptors and species calling information.
+  - These parameters will control the location of index files, spike-in references, adaptors and species calling information.
+
+#### 2.1.3.1 User Parameters
 
-#### 2.1.3.1 User Parameters 
 ##### 2.1.3.1.1 (Spike in Controls)
+
 The pipeline allows for the use of a species specific spike-in control, or the use of normalization via library size. The parameter `spikein_genome` should be set to the species term used in `spikein_reference`.
 
 For example for ecoli spike-in:
+
 ```
 run_contrasts: true
 norm_method: "spikein"
@@ -40,6 +48,7 @@ spikein_reference:
 ```
 
 For example for drosophila spike-in:
+
 ```
 run_contrasts: true
 norm_method: "spikein"
@@ -51,39 +60,49 @@ spikein_reference:
 ```
 
 If it's determined that the amount of spike-in is not sufficient for the run, a library normaliaztion can be performed.
+
 1. Complete a CARLISLE run with spike-in set to "Y". This will allow for the complete assessment of the spike-in.
 2. Run inital QC analysis on the output data
 3. Add the alignment_stats dir to the configuration file.
-4. Re-run the CARLISLE pipeline 
+4. Re-run the CARLISLE pipeline
 
 ##### 2.1.3.1.2 Duplication Status
+
 Users can select duplicated peaks (dedup) or non-deduplicated peaks (no_dedup) through the user parameter.
+
 ```
-dupstatus: "dedup, no_dedup" 
+dupstatus: "dedup, no_dedup"
 ```
 
 ##### 2.1.3.1.3 Peak Caller
+
 Three peak callers are available for deployment within the pipeline, with different settings deployed for each caller.
 
 1. [MACS2](https://github.com/macs3-project/MACS) is available with two peak calling options: narrowPeak or broadPeak. NOTE: DESeq step generally fails for broadPeak; generally has too many calls.
+
 ```
 peaktype: "macs2_narrow, macs2_broad,"
 ```
+
 2. [SEACR](https://github.com/FredHutch/SEACR) is available with four peak calling options: stringent or relaxed parameters, to be paired with "norm" for samples without a spike-in control and "non" for samples with a spikein control
+
 ```
 peaktype: "seacr_stringent, seacr_relaxed"
 ```
+
 3. [GOPEAKS](https://github.com/maxsonBraunLab/gopeaks) is available with two peak calling options: narrowpeaks or broadpeaks
+
 ```
 peaktype: "gopeaks_narrow, gopeaks_broad"
 ```
+
 A complete list of the available peak calling parameters and the recommended list of parameters is provided below:
 
-| Peak Caller | Narrow              | Broad             | Normalized, Stringent | Normalized, Relaxed   | Non-Normalized, Stringent | Non-Normalized, Relaxed |
-| --- | --- | --- |--- |--- |--- |--- |
-| Macs2 | AVAIL | AVAIL | NA | NA | NA | NA |
-| SEACR | NA | NA | AVAIL w/o SPIKEIN | AVAIL w/o SPIKEIN | AVAIL w/ SPIKEIN | AVAIL w/ SPIKEIN |
-| GoPeaks | AVAIL | AVAIL | NA | NA | NA | NA |
+| Peak Caller | Narrow | Broad | Normalized, Stringent | Normalized, Relaxed | Non-Normalized, Stringent | Non-Normalized, Relaxed |
+| ----------- | ------ | ----- | --------------------- | ------------------- | ------------------------- | ----------------------- |
+| Macs2       | AVAIL  | AVAIL | NA                    | NA                  | NA                        | NA                      |
+| SEACR       | NA     | NA    | AVAIL w/o SPIKEIN     | AVAIL w/o SPIKEIN   | AVAIL w/ SPIKEIN          | AVAIL w/ SPIKEIN        |
+| GoPeaks     | AVAIL  | AVAIL | NA                    | NA                  | NA                        | NA                      |
 
 ```
 # Recommended list
@@ -94,22 +113,26 @@ A complete list of the available peak calling parameters and the recommended lis
 ```
 
 ##### 2.1.3.1.3.1 Macs2 additional option
+
 MACS2 can be run with or without the control. adding a control will increase peak specificity
 Selecting "Y" for the `macs2_control` will run the paired control sample provided in the sample manifest
 
 ##### 2.1.3.1.4 Quality Tresholds
+
 Thresholds for quality can be controled through the `quality_tresholds` parameter. This must be a list of comma separated values. minimum of numeric value required.
 
 - default MACS2 qvalue is 0.05 https://manpages.ubuntu.com/manpages/xenial/man1/macs2_callpeak.1.html
 - default GOPEAKS pvalue is 0.05 https://github.com/maxsonBraunLab/gopeaks/blob/main/README.md
 - default SEACR FDR threshold 1 https://github.com/FredHutch/SEACR/blob/master/README.md
+
 ```
 #default values
 quality_thresholds: "0.1, 0.05, 0.01"
 ```
 
 #### 2.1.3.2 References
-Additional reference files may be added to the pipeline, if other species were to be used. 
+
+Additional reference files may be added to the pipeline, if other species were to be used.
 
 The absolute file paths which must be included are:
 
@@ -119,15 +142,17 @@ The absolute file paths which must be included are:
 The following information must be included:
 
 1. regions: "list of regions to be included; IE chr1 chr2 chr3"
-2.  macs2_g: "macs2 genome shorthand; IE mm IE hs"
+2. macs2_g: "macs2 genome shorthand; IE mm IE hs"
 
 ## 2.2 Preparing Manifests
+
 There are two manifests, one which required for all pipeliens and one that is only required if running a differential analysis. These files describe information on the samples and desired contrasts. The paths of these files are defined in the snakemake_config.yaml file. These files are:
 
 - samplemanifest
 - contrasts
 
 ### 2.2.1 Samples Manifest (REQUIRED)
+
 This manifest will include information to sample level information. It includes the following column headers:
 
 - sampleName: the sample name WITHOUT replicate number (IE "SAMPLE")
@@ -140,23 +165,22 @@ This manifest will include information to sample level information. It includes
 
 An example sampleManifest file is shown below:
 
-
-| sampleName| replicateNumber| isControl| controlName| controlReplicateNumber| path_to_R1| path_to_R2
-| --- |--- |--- |--- |--- |--- |--- |
-| 53_H3K4me3| 1| N| HN6_IgG_rabbit_negative_control| 1| PIPELINE_HOME/.test/53_H3K4me3_1.R1.fastq.gz| PIPELINE_HOME/.test/53_H3K4me3_1.R2.fastq.gz
-| 53_H3K4me3| 2| N| HN6_IgG_rabbit_negative_control| 1| PIPELINE_HOME/.test/53_H3K4me3_2.R1.fastq.gz| PIPELINE_HOME/.test/53_H3K4me3_2.R2.fastq.gz
-| HN6_H3K4me3| 1| N| HN6_IgG_rabbit_negative_control| 1| PIPELINE_HOME/.test/HN6_H3K4me3_1.R1.fastq.gz| PIPELINE_HOME/.test/HN6_H3K4me3_1.R2.fastq.gz
-| HN6_H3K4me3| 2| N| HN6_IgG_rabbit_negative_control| 1| PIPELINE_HOME/.test/HN6_H3K4me3_2.R1.fastq.gz| PIPELINE_HOME/.test/HN6_H3K4me3_2.R2.fastq.gz
-| HN6_IgG_rabbit_negative_control| 1| Y| -| -| PIPELINE_HOME/.test/HN6_IgG_rabbit_negative_control_1.R1.fastq.gz| PIPELINE_HOME/.test/HN6_IgG_rabbit_negative_control_1.R2.fastq.gz
-
+| sampleName                      | replicateNumber | isControl | controlName                     | controlReplicateNumber | path_to_R1                                                        | path_to_R2                                                        |
+| ------------------------------- | --------------- | --------- | ------------------------------- | ---------------------- | ----------------------------------------------------------------- | ----------------------------------------------------------------- |
+| 53_H3K4me3                      | 1               | N         | HN6_IgG_rabbit_negative_control | 1                      | PIPELINE_HOME/.test/53_H3K4me3_1.R1.fastq.gz                      | PIPELINE_HOME/.test/53_H3K4me3_1.R2.fastq.gz                      |
+| 53_H3K4me3                      | 2               | N         | HN6_IgG_rabbit_negative_control | 1                      | PIPELINE_HOME/.test/53_H3K4me3_2.R1.fastq.gz                      | PIPELINE_HOME/.test/53_H3K4me3_2.R2.fastq.gz                      |
+| HN6_H3K4me3                     | 1               | N         | HN6_IgG_rabbit_negative_control | 1                      | PIPELINE_HOME/.test/HN6_H3K4me3_1.R1.fastq.gz                     | PIPELINE_HOME/.test/HN6_H3K4me3_1.R2.fastq.gz                     |
+| HN6_H3K4me3                     | 2               | N         | HN6_IgG_rabbit_negative_control | 1                      | PIPELINE_HOME/.test/HN6_H3K4me3_2.R1.fastq.gz                     | PIPELINE_HOME/.test/HN6_H3K4me3_2.R2.fastq.gz                     |
+| HN6_IgG_rabbit_negative_control | 1               | Y         | -                               | -                      | PIPELINE_HOME/.test/HN6_IgG_rabbit_negative_control_1.R1.fastq.gz | PIPELINE_HOME/.test/HN6_IgG_rabbit_negative_control_1.R2.fastq.gz |
 
 ### 2.2.2 Contrast Manifest (OPTIONAL)
+
 This manifest will include sample information to performed differential comparisons.
 
 An example contrast file:
 
-| condition1 | condition2 |
-| --- | --- |
+| condition1              | condition2           |
+| ----------------------- | -------------------- |
 | MOC1_siSmyd3_2m_25_HCHO | MOC1_siNC_2m_25_HCHO |
 
 **Note**: you must have more than one sample per condition in order to perform differential analysis with DESeq2
diff --git a/docs/user-guide/run.md b/docs/user-guide/run.md
index e08f634..bd03a45 100644
--- a/docs/user-guide/run.md
+++ b/docs/user-guide/run.md
@@ -1,7 +1,9 @@
 # 3. Running the Pipeline
 
 ## 3.1 Pipeline Overview
+
 The Snakemake workflow has a multiple options:
+
 ```
 Usage: bash ./data/CCBR_Pipeliner/Pipelines/CARLISLE/carlisle -m/--runmode=<RUNMODE> -w/--workdir=<WORKDIR>
 1.  RUNMODE: [Type: String] Valid options:
@@ -16,30 +18,34 @@ Usage: bash ./data/CCBR_Pipeliner/Pipelines/CARLISLE/carlisle -m/--runmode=<RUNM
 ```
 
 ## 3.2 Commands explained
+
 The following explains each of the command options:
 
 - Preparation Commands
-    - init (REQUIRED): This must be performed before any Snakemake run (dry, local, cluster) can be performed. This will copy the necessary config, manifest and Snakefiles needed to run the pipeline to the provided output directory.
-        - the -f/--force flag can be used in order to re-initialize a workdir that has already been created 
-    - dryrun (OPTIONAL): This is an optional step, to be performed before any Snakemake run (local, cluster). This will check for errors within the pipeline, and ensure that you have read/write access to the files needed to run the full pipeline.
+  - init (REQUIRED): This must be performed before any Snakemake run (dry, local, cluster) can be performed. This will copy the necessary config, manifest and Snakefiles needed to run the pipeline to the provided output directory.
+    - the -f/--force flag can be used in order to re-initialize a workdir that has already been created
+  - dryrun (OPTIONAL): This is an optional step, to be performed before any Snakemake run (local, cluster). This will check for errors within the pipeline, and ensure that you have read/write access to the files needed to run the full pipeline.
 - Processing Commands
-    - local: This will run the pipeline on a local node. NOTE: This should only be performed on an interactive node.
-    - run: This will submit a master job to the cluster, and subsequent sub-jobs as needed to complete the workflow. An email will be sent when the pipeline begins, if there are any errors, and when it completes.
+  - local: This will run the pipeline on a local node. NOTE: This should only be performed on an interactive node.
+  - run: This will submit a master job to the cluster, and subsequent sub-jobs as needed to complete the workflow. An email will be sent when the pipeline begins, if there are any errors, and when it completes.
 - Other Commands (All optional)
-    - unlock: This will unlock the pipeline if an error caused it to stop in the middle of a run.
-    - runtest: This will run a test of the pipeline with test data
+  - unlock: This will unlock the pipeline if an error caused it to stop in the middle of a run.
+  - runtest: This will run a test of the pipeline with test data
 
 To run any of these commands, follow the the syntax:
+
 ```
 bash ./data/CCBR_Pipeliner/Pipelines/CARLISLE/carlisle --runmode=COMMAND --workdir=/path/to/output/dir
 ```
 
 ## 3.3 Typical Workflow
+
 A typical command workflow, running on the cluser, is as follows:
+
 ```
 bash ./data/CCBR_Pipeliner/Pipelines/CARLISLE/carlisle --runmode=init --workdir=/path/to/output/dir
 
 bash ./data/CCBR_Pipeliner/Pipelines/CARLISLE/carlisle --runmode=dryrun --workdir=/path/to/output/dir
 
 bash ./data/CCBR_Pipeliner/Pipelines/CARLISLE/carlisle --runmode=run --workdir=/path/to/output/dir
-```
\ No newline at end of file
+```
diff --git a/docs/user-guide/test-info.md b/docs/user-guide/test-info.md
index d1d7a58..1a4b585 100644
--- a/docs/user-guide/test-info.md
+++ b/docs/user-guide/test-info.md
@@ -1,26 +1,32 @@
 # 5. Pipeline Tutorial
+
 Welcome to the CARLISLE Pipeline Tutorial!
 
 ## 5.1 Getting Started
+
 Review the information on the [Getting Started](https://ccbr.github.io/CARLISLE/user-guide/getting-started/) for a complete overview the pipeline. The tutorial below will use test data available on NIH Biowulf HPC only. All example code will assume you are running v1.0 of the pipeline, using test data available on GitHub.
 
 A. Change working directory to the CARLISLE repository
 
 B. Initialize Pipeline
+
 ```
 bash ./path/to/dir/carlisle --runmode=init --workdir=/path/to/output/dir
 ```
 
 ## 5.2 Submit the test data
+
 Test data is included in the .test directory as well as the config directory.
 
 A Run the test command to prepare the data, perform a dry-run and submit to the cluster
+
 ```
 bash ./path/to/dir/carlisle --runmode=runtest --workdir=/path/to/output/dir
 
 ```
 
 - An expected output for the `runtest` is as follows:
+
 ```
 Job stats:
 job                              count    min threads    max threads
@@ -55,4 +61,5 @@ total                                 478              1             56
 ```
 
 ## 5.3 Review outputs
-Review the expected outputs on the [Output](https://ccbr.github.io/CARLISLE/user-guide/output/) page. If there are errors, review and performing stesp described on the [Troubleshooting](https://ccbr.github.io/CARLISLE/user-guide/troubleshooting/) page as needed.
\ No newline at end of file
+
+Review the expected outputs on the [Output](https://ccbr.github.io/CARLISLE/user-guide/output/) page. If there are errors, review and performing stesp described on the [Troubleshooting](https://ccbr.github.io/CARLISLE/user-guide/troubleshooting/) page as needed.
diff --git a/docs/user-guide/troubleshooting.md b/docs/user-guide/troubleshooting.md
index 8c830fd..dae16d6 100644
--- a/docs/user-guide/troubleshooting.md
+++ b/docs/user-guide/troubleshooting.md
@@ -1,17 +1,22 @@
 # Troubleshooting
+
 Recommended steps to troubleshoot the pipeline.
 
 ## 1.1 Email
+
 Check your email for an email regarding pipeline failure. You will receive an email from slurm@biowulf.nih.gov with the subject: Slurm Job_id=[#] Name=CARLISLE Failed, Run time [time], FAILED, ExitCode 1
 
 ## 1.2 Review the log files
+
 Review the logs in two ways:
 
 1. Review the master slurm file: This file will be found in the `/path/to/results/dir/` and titled `slurm-[jobid].out`. Reviewing this file will tell you what rule errored, and for any local SLURM jobs, provide error details
 2. Review the individual rule log files: After reviewing the master slurm-file, review the specific rules that failed within the `/path/to/results/dir/logs/`. Each rule will include a `.err` and `.out` file, with the following formatting: `{rulename}.{masterjobID}.{individualruleID}.{wildcards from the rule}.{out or err}`
 
 ## 1.3 Restart the run
+
 After addressing the issue, unlock the output directory, perform another dry-run and check the status of the pipeline, then resubmit to the cluster.
+
 ```
 #unlock dir
 bash ./data/CCBR_Pipeliner/Pipelines/CARLISLE/carlisle --runmode=unlock --workdir=/path/to/output/dir
@@ -24,4 +29,5 @@ bash ./data/CCBR_Pipeliner/Pipelines/CARLISLE/carlisle --runmode=run --workdir=/
 ```
 
 ## 1.4 Contact information
-If after troubleshooting, the error cannot be resolved, or if a bug is found, please create an [issue](https://github.com/CCBR/CARLISLE/issues) and send and email to [Samantha Chill](mailto:samantha.sevilla@nih.gov).
\ No newline at end of file
+
+If after troubleshooting, the error cannot be resolved, or if a bug is found, please create an [issue](https://github.com/CCBR/CARLISLE/issues) and send and email to [Samantha Chill](mailto:samantha.sevilla@nih.gov).
diff --git a/install.sh b/install.sh
index 59e7a29..e022ee1 100755
--- a/install.sh
+++ b/install.sh
@@ -25,7 +25,7 @@ cp $DIRNAME/carlisle $INSTALL_PATH/
 
 ## all config & workflow scripts;
 for subdir in annotation config workflow resources .test
-do  
+do
     mkdir -p ${INSTALL_PATH}/$subdir
     cp -r ${DIRNAME}/$subdir/* ${INSTALL_PATH}/$subdir
 done
diff --git a/mkdocs.yml b/mkdocs.yml
index 7aa4809..78a0d56 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -18,7 +18,7 @@ theme:
     - navigation.tabs
     - navigation.top
     - navigation.indexes
-    - toc.integrate 
+    - toc.integrate
   palette:
     - scheme: default
       primary: indigo
@@ -40,7 +40,6 @@ plugins:
   - minify:
       minify_html: true
 
-
 # Customization
 extra:
   social:
@@ -53,7 +52,6 @@ extra:
   version:
     provider: mike
 
-
 # Extensions
 markdown_extensions:
   - markdown.extensions.admonition
@@ -93,13 +91,13 @@ markdown_extensions:
 
 # Page Tree
 nav:
-  - Background : index.md
-  - Usage: 
-    - 1. Getting Started : user-guide/getting-started.md
-    - 2. Preparing Files: user-guide/preparing-files.md
-    - 3. Running the Pipeline: user-guide/run.md
-    - 4. Expected Output: user-guide/output.md
-    - 5. Running Test Data: user-guide/test-info.md
+  - Background: index.md
+  - Usage:
+      - 1. Getting Started: user-guide/getting-started.md
+      - 2. Preparing Files: user-guide/preparing-files.md
+      - 3. Running the Pipeline: user-guide/run.md
+      - 4. Expected Output: user-guide/output.md
+      - 5. Running Test Data: user-guide/test-info.md
   - FAQ:
-    - Troubleshooting: user-guide/troubleshooting.md
-    - Contributions: user-guide/contributions.md
\ No newline at end of file
+      - Troubleshooting: user-guide/troubleshooting.md
+      - Contributions: user-guide/contributions.md
diff --git a/resources/README.md b/resources/README.md
index 49d7d35..b12923e 100644
--- a/resources/README.md
+++ b/resources/README.md
@@ -2,4 +2,4 @@
 
 This folder, `resources/`, is meant to contain all resources necessary for running the workflow. This can be small reference files, such as reference sequences or small databases.
 
-`cluster.json` file can make specific resource requests to biowulf via slurm.
\ No newline at end of file
+`cluster.json` file can make specific resource requests to biowulf via slurm.
diff --git a/resources/blacklistbed/README.md b/resources/blacklistbed/README.md
index 291b085..76fd11c 100644
--- a/resources/blacklistbed/README.md
+++ b/resources/blacklistbed/README.md
@@ -1,15 +1,25 @@
 # current blacklist
+
 ## developed from https://genomebiology.biomedcentral.com/articles/10.1186/s13059-023-03027-3#MOESM2
+
 ## data location in Additional file 2
+
 ### hg38
+
 ### mm10
+
 ### hs1 (t2t)
 
 ## hg19
+
 source location: /data/khanlab3/gb_omics/processed_DATA/YueHu/cutrun_120822/hg19_blacklist.bed
 
 # Retiring blacklists below: 9/22/23
+
 ## hg38 blacklist
+
 source location: /data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/indexes/hg38.blacklist.v3.fa
+
 ## t2t blacklist
-source location: /data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/T2T.excluderanges.bed
\ No newline at end of file
+
+source location: /data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/T2T.excluderanges.bed
diff --git a/resources/blacklistbed/hs1.bed b/resources/blacklistbed/hs1.bed
index d40adb1..51d46c0 100644
--- a/resources/blacklistbed/hs1.bed
+++ b/resources/blacklistbed/hs1.bed
@@ -210,4 +210,4 @@ chr6	59533011	59535689
 chr6	60720266	60723143
 chr6	172122919	172126461
 chr9	122901	125088
-chrM	1	16569
\ No newline at end of file
+chrM	1	16569
diff --git a/resources/cluster_biowulf.yaml b/resources/cluster_biowulf.yaml
index 4d83b6e..352edbd 100644
--- a/resources/cluster_biowulf.yaml
+++ b/resources/cluster_biowulf.yaml
@@ -1,82 +1,82 @@
-# cluster configuration
-###################################################################
-__default__:
-    gres: lscratch:96
-    mem: 40g
-    partition: norm
-    time: 00-02:00:00
-    threads: 2
-    name: "{rule}.{wildcards}"
-    output: "logs/{rule}.${{SLURM_JOBID}}.%j.{wildcards}.out"
-    error: "logs/{rule}.${{SLURM_JOBID}}.%j.{wildcards}.err"
-###################################################################
-align:
-    mem: 200g
-    time: 00-12:00:00
-    threads: 56
-###################################################################
-qc_fastq_screen_validator:
-    mem: 20g
-    threads: 4
-###################################################################
-create_reference:
-    mem: 200g
-    time: 00-12:00:00
-    threads: 32
-###################################################################
-bam2bg:
-    mem: 30g
-    time: 00-1:00:00
-    threads: 4
-###################################################################
-filter:
-    mem: 100g
-    time: 00-12:00:00
-###################################################################
-trim:
-    mem: 200g
-    time: 00-12:00:00
-    threads: 56
-###################################################################
-gopeaks_narrow:
-    mem: 75g
-    threads: 8
-###################################################################
-gopeaks_broad:
-    mem: 75g
-    threads: 8
-###################################################################
-findMotif:
-    mem: 10g
-    time: 02-00:00:00
-    threads: 6
-###################################################################
-rose:
-    mem: 64g
-    threads: 2
-    time: 00-12:00:00
-###################################################################
-go_enrichment:
-    time: 02-00:00:00
-    mem: 10g
-###################################################################
-deeptools_bw:
-    mem: 30g
-    time: 00-10:00:00
-###################################################################
-deeptools_mat:
-    mem: 30g
-    threads: 16
-    time: 01-00:00:00
-###################################################################
-deeptools_heatmap:
-    mem: 30g
-    time: 01-00:00:00
-###################################################################
-DESeq:
-    mem: 80g
-    time: 00-02:00:00
-    threads: 2
-###################################################################
-cov_correlation:
-    threads: 4
+# cluster configuration
+###################################################################
+__default__:
+  gres: lscratch:96
+  mem: 40g
+  partition: norm
+  time: 00-02:00:00
+  threads: 2
+  name: "{rule}.{wildcards}"
+  output: "logs/{rule}.${{SLURM_JOBID}}.%j.{wildcards}.out"
+  error: "logs/{rule}.${{SLURM_JOBID}}.%j.{wildcards}.err"
+###################################################################
+align:
+  mem: 200g
+  time: 00-12:00:00
+  threads: 56
+###################################################################
+qc_fastq_screen_validator:
+  mem: 20g
+  threads: 4
+###################################################################
+create_reference:
+  mem: 200g
+  time: 00-12:00:00
+  threads: 32
+###################################################################
+bam2bg:
+  mem: 30g
+  time: 00-1:00:00
+  threads: 4
+###################################################################
+filter:
+  mem: 100g
+  time: 00-12:00:00
+###################################################################
+trim:
+  mem: 200g
+  time: 00-12:00:00
+  threads: 56
+###################################################################
+gopeaks_narrow:
+  mem: 75g
+  threads: 8
+###################################################################
+gopeaks_broad:
+  mem: 75g
+  threads: 8
+###################################################################
+findMotif:
+  mem: 10g
+  time: 02-00:00:00
+  threads: 6
+###################################################################
+rose:
+  mem: 64g
+  threads: 2
+  time: 00-12:00:00
+###################################################################
+go_enrichment:
+  time: 02-00:00:00
+  mem: 10g
+###################################################################
+deeptools_bw:
+  mem: 30g
+  time: 00-10:00:00
+###################################################################
+deeptools_mat:
+  mem: 30g
+  threads: 16
+  time: 01-00:00:00
+###################################################################
+deeptools_heatmap:
+  mem: 30g
+  time: 01-00:00:00
+###################################################################
+DESeq:
+  mem: 80g
+  time: 00-02:00:00
+  threads: 2
+###################################################################
+cov_correlation:
+  threads: 4
diff --git a/resources/spikein/README.md b/resources/spikein/README.md
index 533b617..a94758b 100644
--- a/resources/spikein/README.md
+++ b/resources/spikein/README.md
@@ -1,2 +1,3 @@
 # saccharomyces
-source location: /data/khanlab3/gb_omics/processed_DATA/YueHu/cutrun_120822/S_cer_S288C_R64.fna
\ No newline at end of file
+
+source location: /data/khanlab3/gb_omics/processed_DATA/YueHu/cutrun_120822/S_cer_S288C_R64.fna
diff --git a/resources/spikein/S_cer_S288C_R64.fna b/resources/spikein/S_cer_S288C_R64.fna
index dd4ae5f..51e3091 100755
--- a/resources/spikein/S_cer_S288C_R64.fna
+++ b/resources/spikein/S_cer_S288C_R64.fna
@@ -173694,4 +173694,4 @@ ATTATATAAATTTAATTATAATTTAAATAAATTTTATATAAATTAATATAATATTAATAAAGTAATTAGT
 ATAAATAAATAATATGAAAATAAAACTTAATAAATATATAAATATAGTCCGGCCCGCCCCCCCGCGGCGG
 GCGGACCCCGAAGGAGTGAGGGACCCCTCCCTAATGGGAGGGGGACCGAACCCCTTTTTAAGAAGGAGTC
 CATATATATATATTAATAAAAAAAAGTAATATATATATATATATTGGAATAGTTATATTATTATACAGAA
-ATATGCTTAATTATAATATAATATCCATA
\ No newline at end of file
+ATATGCTTAATTATAATATAATATCCATA
diff --git a/resources/tools_biowulf.yaml b/resources/tools_biowulf.yaml
index 3305aef..dc04611 100644
--- a/resources/tools_biowulf.yaml
+++ b/resources/tools_biowulf.yaml
@@ -1,21 +1,21 @@
-bedtools: "bedtools/2.30.0"
-bedops: "bedops/2.4.41"
-bowtie2: "bowtie/2-2.4.5"
-cutadapt: "cutadapt/4.0"
-deeptools: "deeptools/3.5.1"
-fastqc: "fastqc/0.11.9"
-fastq_screen: "fastq_screen/0.15.2"
-fastxtoolkit: "fastxtoolkit/0.0.14"
-homer: "homer/4.11.1"
-macs2: "macs/2.2.7.1"
-multiqc: "multiqc/1.14"
-perl: "perl/5.34"
-picard: "picard/2.27.3"
-python3: "python/3.9"
-R: "R/4.3.2"
-rose: "ROSE/1.3.1"
-samtools: "samtools/1.15"
-seacr: "SEACR/1.4-beta.2"
-ucsc: "ucsc/445"
-gopeaks: "/data/CCBR_Pipeliner/bin/GoPeaks/gopeaks"
-fastq_val: "/data/CCBR_Pipeliner/bin/fastQValidator"
\ No newline at end of file
+bedtools: "bedtools/2.30.0"
+bedops: "bedops/2.4.41"
+bowtie2: "bowtie/2-2.4.5"
+cutadapt: "cutadapt/4.0"
+deeptools: "deeptools/3.5.1"
+fastqc: "fastqc/0.11.9"
+fastq_screen: "fastq_screen/0.15.2"
+fastxtoolkit: "fastxtoolkit/0.0.14"
+homer: "homer/4.11.1"
+macs2: "macs/2.2.7.1"
+multiqc: "multiqc/1.14"
+perl: "perl/5.34"
+picard: "picard/2.27.3"
+python3: "python/3.9"
+R: "R/4.3.2"
+rose: "ROSE/1.3.1"
+samtools: "samtools/1.15"
+seacr: "SEACR/1.4-beta.2"
+ucsc: "ucsc/445"
+gopeaks: "/data/CCBR_Pipeliner/bin/GoPeaks/gopeaks"
+fastq_val: "/data/CCBR_Pipeliner/bin/fastQValidator"
diff --git a/resources/tss_bed/README.md b/resources/tss_bed/README.md
index e6232f2..85a24a1 100644
--- a/resources/tss_bed/README.md
+++ b/resources/tss_bed/README.md
@@ -1,4 +1,5 @@
 `gtf2tssBed.py` script is used to generate the BED file from GTF annotations.
+
 ```bash
 ./gtf2tssBed.py --gtf /data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/genes.gtf --bed hg38.tss.bed
 ./gtf2tssBed.py --gtf /data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/genes.gtf --bed hg19.tss.bed
@@ -6,13 +7,19 @@
 ./gtf2tssBed.py --gtf /data/CCBR_Pipeliner/db/PipeDB/Indices/GTFs/mm10/gencode.vM25.annotation.gtf --bed mm10.tss.bed
 
 ```
+
 > NOTE: Previously, the following files were used as the tss BEDs but these:
-> * are 1-based (BEDs are 0-based)
-> * have 5 columns (BEDs have 3 or 6 or 12 columns)
-> * column 5 is strand (BEDs have column 6 reserved for strand)
-> 
->  Hence, we have replaced these with the new BED files created using the above commands.
+>
+> - are 1-based (BEDs are 0-based)
+> - have 5 columns (BEDs have 3 or 6 or 12 columns)
+> - column 5 is strand (BEDs have column 6 reserved for strand)
+>
+> Hence, we have replaced these with the new BED files created using the above commands.
+>
 > #### hg38.tss.bed source
+>
 > https://github.com/CCRGeneticsBranch/khanlab_pipeline/blob/master/ref/hg38.tss.bed
+>
 > #### hg19.tss.bed source
+>
 > https://github.com/CCRGeneticsBranch/khanlab_pipeline/blob/master/ref/hg19.tss.bed
diff --git a/resources/tss_bed/gtf2tssBed.py b/resources/tss_bed/gtf2tssBed.py
index 6dbbdf5..4568d0d 100755
--- a/resources/tss_bed/gtf2tssBed.py
+++ b/resources/tss_bed/gtf2tssBed.py
@@ -2,46 +2,58 @@
 import argparse
 import os
 
+
 def get_gene_name(s):
-        s=list(map(lambda x:x.replace(";","").replace("\"",""), s.strip().split(" ")))
-        for i,v in enumerate(s):
-                if v == "gene_name":
-                        gene_name = s[i+1]
-                        return gene_name
-                elif v == "gene":
-                        gene_name = s[i+1]
-                        return gene_name
-        else:
-                return "Unknown"
+    s = list(map(lambda x: x.replace(";", "").replace('"', ""), s.strip().split(" ")))
+    for i, v in enumerate(s):
+        if v == "gene_name":
+            gene_name = s[i + 1]
+            return gene_name
+        elif v == "gene":
+            gene_name = s[i + 1]
+            return gene_name
+    else:
+        return "Unknown"
+
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--gtf", help="GTF input file", required=True)
 parser.add_argument("--bed", help="TSS BED output file", required=True)
 args = parser.parse_args()
 
-def get_gene_start(start,end,strand):
-        if strand == "+":
-                return int(start)-1
-        else:
-                return int(end)-1
+
+def get_gene_start(start, end, strand):
+    if strand == "+":
+        return int(start) - 1
+    else:
+        return int(end) - 1
+
 
 if not os.path.exists(args.gtf):
-        print("%s : GTF File is missing!"%(args.gtf))
-        exit()
+    print("%s : GTF File is missing!" % (args.gtf))
+    exit()
 
-gtf = open(args.gtf,'r')
-bed = open(args.bed,'w')
+gtf = open(args.gtf, "r")
+bed = open(args.bed, "w")
 for l in gtf.readlines():
-        if l.startswith("#"): continue
-        l = l.split("\t")
-        if l[2] == "gene":
-                gene_name = get_gene_name(l[8])
-                chrom = l[0]
-                start = l[3]
-                end = l[4]
-                strand = l[6]
-                gene_start = get_gene_start(start,end,strand)
-                bed_line = [chrom, str(gene_start), str(gene_start+1),gene_name,str(0),strand]
-                bed.write("%s\n"%("\t".join(bed_line)))
+    if l.startswith("#"):
+        continue
+    l = l.split("\t")
+    if l[2] == "gene":
+        gene_name = get_gene_name(l[8])
+        chrom = l[0]
+        start = l[3]
+        end = l[4]
+        strand = l[6]
+        gene_start = get_gene_start(start, end, strand)
+        bed_line = [
+            chrom,
+            str(gene_start),
+            str(gene_start + 1),
+            gene_name,
+            str(0),
+            strand,
+        ]
+        bed.write("%s\n" % ("\t".join(bed_line)))
 gtf.close()
 bed.close()
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 9654dd4..75232e7 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -99,7 +99,7 @@ def run_contrasts(wildcards):
     files=[]
     if config["run_contrasts"]:
         files.append(join(RESULTSDIR,"replicate_sample.tsv"))
-        
+
         # inputs for matrix
         inputs=expand(join(RESULTSDIR,"peaks","{qthresholds}","contrasts","{contrast_list}.{dupstatus}","{contrast_list}.{dupstatus}.{peak_caller_type}.txt"),qthresholds=QTRESHOLDS, contrast_list=CONTRAST_LIST,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE)
         files.extend(inputs)
@@ -159,7 +159,7 @@ def get_enrich(wildcards):
 
 def get_combined(wildcards):
     files = []
-    if ("macs2_narrow" in PEAKTYPE) or ("macs2_broad" in PEAKTYPE):    
+    if ("macs2_narrow" in PEAKTYPE) or ("macs2_broad" in PEAKTYPE):
         combined_m = expand(join(RESULTSDIR,"peaks","{qthresholds}","macs2","annotation","homer","{treatment_control_list}.{dupstatus}.{peak_caller_type}.annotation_qvalue.tsv"),qthresholds=QTRESHOLDS,dupstatus=DUPSTATUS,peak_caller_type=PEAKTYPE_M,treatment_control_list=TREATMENT_LIST_M)
         files.extend(combined_m)
     return files
@@ -213,7 +213,7 @@ rule all:
         ##########################################
         # manifests
         unpack(run_pipe_prep),
-        
+
         # ALIGN / deeptools_bw
         unpack(run_deeptools_bw),
 
@@ -231,7 +231,7 @@ rule all:
 
         # ALIGN / alignstats
         expand(join(RESULTSDIR,"alignment_stats","{replicate}.alignment_stats.yaml"),replicate=REPLICATES),
-        
+
         # ALIGN / gather_alignstats
         join(RESULTSDIR,"alignment_stats","alignment_stats.tsv"),
 
@@ -247,7 +247,7 @@ rule all:
 
         # DIFF / create_contrast_data_files
         unpack(run_contrasts),
-        
+
         # ANNOTATION / findMotif, rose, create_contrast_peakcaller_files, go_enrichment
         unpack(get_motifs),
         unpack(get_enrich),
@@ -302,11 +302,11 @@ onerror:
 
         # ALIGN / alignstats
         join(RESULTSDIR,"alignment_stats","{replicate}.alignment_stats.yaml"),
-        
+
         # ALIGN / gather_alignstats
         join(RESULTSDIR,"alignment_stats","alignment_stats.tsv"),
 
-        # ALIGN / create_library_norm_scales     
+        # ALIGN / create_library_norm_scales
         join(RESULTSDIR,"alignment_stats","library_scale.tsv"),
 
         # ALIGN / bam2bg
diff --git a/workflow/envs/dummy_env.yaml b/workflow/envs/dummy_env.yaml
index 8b9de65..8724513 100644
--- a/workflow/envs/dummy_env.yaml
+++ b/workflow/envs/dummy_env.yaml
@@ -1,3 +1,3 @@
 # conda environments needed for the workflow should be stored here.
 # An alternative to conda environments, is containers (singularity or docker)
-# Check https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html?highlight=conda#containerization-of-conda-based-workflows for details.
\ No newline at end of file
+# Check https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html?highlight=conda#containerization-of-conda-based-workflows for details.
diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk
index ca3e533..c20451e 100644
--- a/workflow/rules/align.smk
+++ b/workflow/rules/align.smk
@@ -37,7 +37,7 @@ rule trim:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -62,7 +62,7 @@ rule trim:
 rule align:
     """
     Align using bowtie:
-    * use --dovetail option via "bowtie2_parameters" in config.yaml. This is recommended for 
+    * use --dovetail option via "bowtie2_parameters" in config.yaml. This is recommended for
     Cut and Run where overlapping R1 and R2 alignments are expected
     BAM is sorted and indexed, and stats are collected using flagstat and idxstats
     """
@@ -88,7 +88,7 @@ rule align:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -127,7 +127,7 @@ rule filter:
         bamidxstats=join(RESULTSDIR,"bam","{replicate}.{dupstatus}.bam.idxstats"),
     params:
         replicate = "{replicate}",
-        dupstatus = "{dupstatus}",  # can be "dedup" or "no_dedup"       
+        dupstatus = "{dupstatus}",  # can be "dedup" or "no_dedup"
         fragment_len_filter = config["fragment_len_filter"],
         pyscript = join(SCRIPTSDIR,"_filter_bam.py"),
         memg = getmemg("filter")
@@ -141,7 +141,7 @@ rule filter:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -158,7 +158,7 @@ rule filter:
             --TMP_DIR ${{TMPDIR}}/{params.replicate}_{params.dupstatus}_picardtmp \\
             --CREATE_INDEX true \\
             --METRICS_FILE {output.bam}.dupmetrics
-            
+
             python {params.pyscript} --inputBAM ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.filtered.tmp1.bam \\
             --outputBAM ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.filtered.bam \\
             --fragmentlength {params.fragment_len_filter} \\
@@ -168,12 +168,12 @@ rule filter:
             # deduplicate only the spikeins
             genome_regions=$(cut -f1 {input.genome_len} | tr '\\n' ' ')
             samtools view -@{threads} -b {input.bam} $genome_regions | \\
-                samtools sort -@{threads} -T $TMPDIR -o ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.genome.bam -    
+                samtools sort -@{threads} -T $TMPDIR -o ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.genome.bam -
 
             spikein_regions=$(cut -f1 {input.spikein_len} | tr '\\n' ' ')
             samtools view -@{threads} -b {input.bam} $spikein_regions | \\
-                samtools sort -@{threads} -T $TMPDIR -o ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.spikein.bam 
-            
+                samtools sort -@{threads} -T $TMPDIR -o ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.spikein.bam
+
             mkdir -p ${{TMPDIR}}/{params.replicate}_{params.dupstatus}_picardtmp
             java -Xmx{params.memg} -jar $PICARDJARPATH/picard.jar MarkDuplicates \\
             --INPUT ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.spikein.bam \\
@@ -321,7 +321,7 @@ rule bam2bg:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -333,16 +333,16 @@ rule bam2bg:
             echo "No spike-in scale was used"
             spikein_scale=1
         elif [[ "{params.spikein}" == "LIBRARY" ]];then
-            spikein_scale=`cat {input.library_file} | grep {params.replicate} | grep {params.dupstatus} | cut -f2 -d" " | head -n1`    
+            spikein_scale=`cat {input.library_file} | grep {params.replicate} | grep {params.dupstatus} | cut -f2 -d" " | head -n1`
             echo "The spike-in is generated from the library size"
         else
             spikein_readcount=$(while read a b;do awk -v a=$a '{{if ($1==a) {{print $3}}}}' {input.bamidxstats};done < {input.spikein_len} | awk '{{sum=sum+$1}}END{{print sum}}')
-            
+
             # if the spikein_readcount is below threshold, then there is not enough of the spikein control to use
             total_count=$(awk '{{sum+=$3; sum+=$4;}}END{{print sum;}}' {input.bamidxstats})
             spikein_percent=`echo "scale=6 ; $spikein_readcount / $total_count * 100" | bc`;\
 
-            if [[ $spikein_percent < 0.001 ]]; then 
+            if [[ $spikein_percent < 0.001 ]]; then
                 echo "The spikein percentage of {input.bam} was below the threshold (0.001%) at $spikein_percent%. The spikein_scale was set to 1."
                 spikein_scale=1
             else
@@ -353,12 +353,12 @@ rule bam2bg:
         # create fragments file
         samtools view -b -@{threads} {input.bam} {params.regions} | samtools sort -n -@{threads} -T $TMPDIR -o ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.bam -
         bedtools bamtobed -bedpe -i ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.bam > ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.bed
-        
+
         awk -v fl={params.fragment_len_filter} '{{ if ($1==$4 && $6-$2 < fl) {{print $0}}}}' ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.bed > ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.clean.bed
-        
+
         cut -f 1,2,6 ${{TMPDIR}}/{params.replicate}.{params.dupstatus}.clean.bed | \\
             LC_ALL=C sort --buffer-size={params.memG} --parallel={threads} --temporary-directory=$TMPDIR -k1,1 -k2,2n -k3,3n > {output.fragments_bed}
-        
+
         # run bedtools
         bedtools genomecov -bg -scale $spikein_scale -i {output.fragments_bed} -g {input.genome_len} > {output.bg}
 
@@ -483,7 +483,7 @@ rule cov_correlation:
         pearson_plot=join(RESULTSDIR,"deeptools","all.{dupstatus}.PearsonCorr.png"),
         pca=join(RESULTSDIR,"deeptools","all.{dupstatus}.PCA.tab"),
         hc=join(RESULTSDIR,"deeptools","all.{dupstatus}.Pearson_heatmap.png"),
-        pca_format=join(RESULTSDIR,"deeptools","all.{dupstatus}.PearsonPCA.png")        
+        pca_format=join(RESULTSDIR,"deeptools","all.{dupstatus}.PearsonPCA.png")
     params:
         rscript=join(SCRIPTSDIR,"_plot_correlation.R"),
         dupstatus="{dupstatus}"
@@ -506,7 +506,7 @@ rule cov_correlation:
          -o {output.pearson_plot} \
          --removeOutliers \
          --outFileCorMatrix {output.pearson_corr}
-        
+
         # Plot PCA
         plotPCA -in {output.counts}  --outFileNameData {output.pca}
 
diff --git a/workflow/rules/diff.smk b/workflow/rules/diff.smk
index 97b966a..3ead6d0 100644
--- a/workflow/rules/diff.smk
+++ b/workflow/rules/diff.smk
@@ -28,14 +28,14 @@ rule create_contrast_data_files:
     """
     Output file should include 6 columns
     1)condition     2)sample
-    53_H3K4me3	    53_H3K4me3_1 
-    
+    53_H3K4me3	    53_H3K4me3_1
+
     3)bed
     /results/peaks/gopeaks/53_H3K4me3_1_vs_HN6_IgG_rabbit_negative_control_1.dedup.broadGo_peaks.bed
-    
+
     4)bedgraph                                      5)scaling factor
-    /results/bedgraph/53_H3K4me3_1.dedup.bedgraph	86.32596685082872928176	
-    
+    /results/bedgraph/53_H3K4me3_1.dedup.bedgraph	86.32596685082872928176
+
     6)bed
     /results/fragments/53_H3K4me3_1.dedup.fragments.bed
     """
@@ -130,7 +130,7 @@ rule make_counts_matrix:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -172,7 +172,7 @@ rule DESeq:
         """
         set -exo pipefail
         dirname=$(basename $(mktemp))
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID/$dirname"
         else
             TMPDIR="/dev/shm/$dirname"
@@ -281,7 +281,7 @@ rule diffbb:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -290,7 +290,7 @@ rule diffbb:
         fi
 
         ## add check to ensure that DESEQ2 ran to completion
-        ## mainly used in tinytest scenarios, but also used if 
+        ## mainly used in tinytest scenarios, but also used if
         ## Nsamples/group is 1
         check=`wc -c {input.results} | cut -f1 -d" "`
         if [[ $check == 0 ]]; then
diff --git a/workflow/rules/init.smk b/workflow/rules/init.smk
index e4e729a..8e99fdd 100644
--- a/workflow/rules/init.smk
+++ b/workflow/rules/init.smk
@@ -12,7 +12,7 @@ pp = pprint.PrettyPrinter(indent=4)
 #########################################################
 
 #########################################################
-# FILE-ACTION FUNCTIONS 
+# FILE-ACTION FUNCTIONS
 #########################################################
 def check_existence(filename):
   if not os.path.exists(filename):
@@ -42,7 +42,7 @@ def get_file_size(filename):
 #########################################################
 CONFIGFILE = str(workflow.overwrite_configfiles[0])
 
-# set memory limit 
+# set memory limit
 # used for sambamba sort, etc
 MEMORYG="100G"
 
@@ -121,7 +121,7 @@ for i,t in enumerate(list(df[df['isControl']=="N"]['replicateName'].unique())):
     if not c in REPLICATES:
         print("# Control NOT found for sampleName_replicateNumber:"+t)
         print("# "+config["samplemanifest"]+" has no entry for sample:"+crow.controlName+"  replicateNumber:"+str(crow.controlReplicateNumber))
-        exit()        
+        exit()
     print("## "+str(i+1)+") "+t+"        "+c)
     process_replicates.extend([t,c])
     TREATMENTS.append(t)
@@ -138,7 +138,7 @@ if len(process_replicates)!=len(REPLICATES):
     REPLICATES = process_replicates
 print("# Contrast manifest is confirmed!")
 
-# write out the treatment:cntrl 
+# write out the treatment:cntrl
 fpath=join(RESULTSDIR,"treatment_control_list.txt")
 originalDF = pd.DataFrame(TREAT_to_CONTRL_DICT.items()).rename(columns={0: 'key', 1: 'val'})
 split_keysDF = pd.DataFrame(originalDF['key'].str.split(':').tolist())
@@ -366,7 +366,7 @@ rule create_replicate_sample_table:
         print("# %s file created!"%(rg_file))
 
 rule create_reference:
-    input: 
+    input:
         genomefa_source=GENOMEFA,
         blacklist_source=GENOMEBLACKLIST,
     output:
@@ -384,7 +384,7 @@ rule create_reference:
         spiked_source=SPIKED_GENOMEFA,
         spiked_output=join(BOWTIE2_INDEX,"spikein.fa"),
         refdata=refdata,
-    envmodules: 
+    envmodules:
         TOOLS["bowtie2"],
         TOOLS["samtools"],
         TOOLS["bedtools"],
@@ -392,7 +392,7 @@ rule create_reference:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -401,7 +401,7 @@ rule create_reference:
         fi
 
         # create dir and links
-        if [[ -d {params.bowtie2_dir} ]]; then rm -r {params.bowtie2_dir}; fi 
+        if [[ -d {params.bowtie2_dir} ]]; then rm -r {params.bowtie2_dir}; fi
         mkdir -p {params.bowtie2_dir}/ref
         ln -s {input.genomefa_source} {output.genomefa}
         ln -s {input.blacklist_source} {output.blacklist}
diff --git a/workflow/rules/peakcalls.smk b/workflow/rules/peakcalls.smk
index 173fd12..add11a6 100644
--- a/workflow/rules/peakcalls.smk
+++ b/workflow/rules/peakcalls.smk
@@ -1,10 +1,10 @@
 # TSV file should include 6 columns
 # 1)condition     2)sample
-# 53_H3K4me3	    53_H3K4me3_1         
+# 53_H3K4me3	    53_H3K4me3_1
 # 3)bed
 # /results/peaks/gopeaks/53_H3K4me3_1_vs_HN6_IgG_rabbit_negative_control_1.dedup.broadGo_peaks.bed
 # 4)bedgraph                                      5)scaling factor
-# /results/bedgraph/53_H3K4me3_1.dedup.bedgraph	86.32596685082872928176	
+# /results/bedgraph/53_H3K4me3_1.dedup.bedgraph	86.32596685082872928176
 # 6)bed
 # /results/fragments/53_H3K4me3_1.dedup.fragments.bed
 
@@ -63,7 +63,7 @@ rule macs2_narrow:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -75,13 +75,13 @@ rule macs2_narrow:
         # pull treatment and control ids
         treatment=`echo {params.tc_file} | awk -F"_vs_" '{{print $1}}'`
         control=`echo {params.tc_file} | awk -F"_vs_" '{{print $2}}'`
-         
+
         # set frag file
         treat_bed={params.frag_bed_path}/${{treatment}}.{params.dupstatus}.fragments.bed
         cntrl_bed={params.frag_bed_path}/${{control}}.{params.dupstatus}.fragments.bed
 
         # run with or without control
-        if [[ {params.control_flag} == "Y" ]]; then 
+        if [[ {params.control_flag} == "Y" ]]; then
             file_base="${{treatment}}_vs_${{control}}.{params.dupstatus}"
 
             macs2 callpeak \\
@@ -98,7 +98,7 @@ rule macs2_narrow:
             --nomodel
         else
             file_base="${{treatment}}_vs_${{control}}.{params.dupstatus}"
-                                
+
             macs2 callpeak \\
             -t ${{treat_bed}} \\
             -f BED \\
@@ -111,12 +111,12 @@ rule macs2_narrow:
             --call-summits \\
             --nomodel
         fi
-    
+
         # mv output and rename for consistency
         mv $TMPDIR/${{file_base}}_peaks.narrowPeak {output.peak_file}
         mv $TMPDIR/${{file_base}}_summits.bed {output.summit_file}
         mv $TMPDIR/${{file_base}}_peaks.xls {output.xls_file}
-        
+
         # create bigbed files, zip
         cut -f1-3 {output.peak_file} | LC_ALL=C sort --buffer-size={params.memG} --parallel={threads} --temporary-directory=$TMPDIR -k1,1 -k2,2n | uniq > ${{TMPDIR}}/narrow.bed
         bedToBigBed -type=bed3 ${{TMPDIR}}/narrow.bed {input.genome_len} ${{TMPDIR}}/narrow.bigbed
@@ -153,7 +153,7 @@ rule macs2_broad:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -165,15 +165,15 @@ rule macs2_broad:
         # pull treatment and control ids
         treatment=`echo {params.tc_file} | awk -F"_vs_" '{{print $1}}'`
         control=`echo {params.tc_file} | awk -F"_vs_" '{{print $2}}'`
-            
+
         # set frag file
         treat_bed={params.frag_bed_path}/${{treatment}}.{params.dupstatus}.fragments.bed
         cntrl_bed={params.frag_bed_path}/${{control}}.{params.dupstatus}.fragments.bed
 
         # run with or without control
-        if [[ {params.control_flag} == "Y" ]]; then 
+        if [[ {params.control_flag} == "Y" ]]; then
             file_base="${{treatment}}_vs_${{control}}.{params.dupstatus}"
-                    
+
             macs2 callpeak \\
             -t ${{treat_bed}} \\
             -c ${{cntrl_bed}} \\
@@ -188,7 +188,7 @@ rule macs2_broad:
             --nomodel
         else
             file_base="${{treatment}}_vs_nocontrol.{params.dupstatus}"
-                    
+
             macs2 callpeak \\
             -t ${{treat_bed}} \\
             -f BED \\
@@ -201,7 +201,7 @@ rule macs2_broad:
             --broad --broad-cutoff {params.broadtreshold} \\
             --nomodel
         fi
-            
+
         # mv output and rename for consistency
         mv $TMPDIR/${{file_base}}_peaks.broadPeak {output.peak_file}
         mv $TMPDIR/${{file_base}}_peaks.xls {output.xls_file}
@@ -237,7 +237,7 @@ rule seacr_stringent:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -249,7 +249,7 @@ rule seacr_stringent:
         # pull treatment and control ids
         treatment=`echo {params.tc_file} | awk -F"_vs_" '{{print $1}}'`
         control=`echo {params.tc_file} | awk -F"_vs_" '{{print $2}}'`
-         
+
         # set frag file
         treat_bed={params.bg_path}/${{treatment}}.{params.dupstatus}.bedgraph
         cntrl_bed={params.bg_path}/${{control}}.{params.dupstatus}.bedgraph
@@ -262,7 +262,7 @@ rule seacr_stringent:
                 --mode stringent \\
                 --threshold {params.qthresholds} \\
                 --output norm
-            
+
             # mv output and rename for consistency
             mv $TMPDIR/norm.stringent.bed {output.peak_file}
         else
@@ -308,7 +308,7 @@ rule seacr_relaxed:
     shell:
         """
             set -exo pipefail
-            if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+            if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
                 TMPDIR="/lscratch/$SLURM_JOB_ID"
             else
                 dirname=$(basename $(mktemp))
@@ -320,7 +320,7 @@ rule seacr_relaxed:
             # pull treatment and control ids
             treatment=`echo {params.tc_file} | awk -F"_vs_" '{{print $1}}'`
             control=`echo {params.tc_file} | awk -F"_vs_" '{{print $2}}'`
-            
+
             # set frag file
             treat_bed={params.bg_path}/${{treatment}}.{params.dupstatus}.bedgraph
             cntrl_bed={params.bg_path}/${{control}}.{params.dupstatus}.bedgraph
@@ -332,7 +332,7 @@ rule seacr_relaxed:
                     --normalize norm \\
                     --mode relaxed \\
                     --threshold {params.qthresholds} \\
-                    --output norm    
+                    --output norm
 
                 # mv output and rename for consistency
                 mv $TMPDIR/norm.relaxed.bed {output.peak_file}
@@ -344,7 +344,7 @@ rule seacr_relaxed:
                     --mode relaxed \\
                     --threshold {params.qthresholds} \\
                     --output non
-                
+
                 mv $TMPDIR/non.relaxed.bed {output.peak_file}
             fi
 
@@ -378,7 +378,7 @@ rule gopeaks_narrow:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -389,7 +389,7 @@ rule gopeaks_narrow:
         # pull treatment and control ids
         treatment=`echo {params.tc_file} | awk -F"_vs_" '{{print $1}}'`
         control=`echo {params.tc_file} | awk -F"_vs_" '{{print $2}}'`
-            
+
         # set bam file
         treat_bam={params.bam_path}/${{treatment}}.{params.dupstatus}.bam
         cntrl_bam={params.bam_path}/${{control}}.{params.dupstatus}.bam
@@ -402,7 +402,7 @@ rule gopeaks_narrow:
         mv $TMPDIR/narrow_gopeaks.json {output.json}
 
         # create bigbed files
-        cut -f1-3 {output.peak_file} | LC_ALL=C sort --buffer-size={params.memG} --parallel={threads} --temporary-directory=$TMPDIR -k1,1 -k2,2n | uniq > ${{TMPDIR}}/narrow_peaks.bed 
+        cut -f1-3 {output.peak_file} | LC_ALL=C sort --buffer-size={params.memG} --parallel={threads} --temporary-directory=$TMPDIR -k1,1 -k2,2n | uniq > ${{TMPDIR}}/narrow_peaks.bed
         bedToBigBed -type=bed3 ${{TMPDIR}}/narrow_peaks.bed  {input.genome_len} ${{TMPDIR}}/narrow_peaks.bigbed
         bgzip -c ${{TMPDIR}}/narrow_peaks.bigbed > {output.bg_file}
         """
@@ -431,7 +431,7 @@ rule gopeaks_broad:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -442,7 +442,7 @@ rule gopeaks_broad:
         # pull treatment and control ids
         treatment=`echo {params.tc_file} | awk -F"_vs_" '{{print $1}}'`
         control=`echo {params.tc_file} | awk -F"_vs_" '{{print $2}}'`
-            
+
         # set bam file
         treat_bam={params.bam_path}/${{treatment}}.{params.dupstatus}.bam
         cntrl_bam={params.bam_path}/${{control}}.{params.dupstatus}.bam
@@ -455,7 +455,7 @@ rule gopeaks_broad:
         mv $TMPDIR/broad_gopeaks.json {output.json}
 
         # create bigbed files
-        cut -f1-3 {output.peak_file} | LC_ALL=C sort --buffer-size={params.memG} --parallel={threads} --temporary-directory=$TMPDIR -k1,1 -k2,2n | uniq > ${{TMPDIR}}/broad.bed 
+        cut -f1-3 {output.peak_file} | LC_ALL=C sort --buffer-size={params.memG} --parallel={threads} --temporary-directory=$TMPDIR -k1,1 -k2,2n | uniq > ${{TMPDIR}}/broad.bed
         bedToBigBed -type=bed3 ${{TMPDIR}}/broad.bed  {input.genome_len} ${{TMPDIR}}/broad.bigbed
         bgzip -c ${{TMPDIR}}/broad.bigbed > {output.bg_file}
         """
diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk
index 143f770..1ead23d 100644
--- a/workflow/rules/qc.smk
+++ b/workflow/rules/qc.smk
@@ -39,7 +39,7 @@ rule qc_fastqc:
     shell:
         """
         set -exo pipefail
-        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+        if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
             TMPDIR="/lscratch/$SLURM_JOB_ID"
         else
             dirname=$(basename $(mktemp))
@@ -49,7 +49,7 @@ rule qc_fastqc:
 
         # run FASTQC
         fastqc {input.R1} {input.R2} -o {params.base}
- 
+
         # Gzip input files
         gunzip -c {input.R1} > ${{TMPDIR}}/{params.R1}
         gunzip -c {input.R2} > ${{TMPDIR}}/{params.R2}
@@ -62,7 +62,7 @@ rule qc_fastqc:
             --subset 1000000 \\
             --aligner bowtie2 \\
             --force
-        
+
         # Run FastQ Validator
         mkdir -p {params.base_val}
         {params.fastq_val} \\
@@ -104,12 +104,12 @@ rule spikein_assessment:
     shell:
         """
         if [[ {params.spikein} == "ecoli" ]]; then species_name="NC_000913.3"; else species_name=""; fi
-        
+
         # get sample list
         sample_list="{input.bams}"
         clean_sample_list=`echo $sample_list | sed "s/\s/xxx/g"`
 
-        # rum script       
+        # rum script
         Rscript {params.rscript_wrapper} \\
             --rmd {params.rmd} \\
             --carlisle_functions {params.carlisle_functions} \\
@@ -146,7 +146,7 @@ if ("gopeaks_narrow" in PEAKTYPE) or ("gopeaks_broad" in PEAKTYPE):
         shell:
             """
             set -exo pipefail
-            if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+            if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
                 TMPDIR="/lscratch/$SLURM_JOB_ID"
             else
                 dirname=$(basename $(mktemp))
@@ -190,7 +190,7 @@ else:
         shell:
             """
             set -exo pipefail
-            if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then 
+            if [[ -d "/lscratch/$SLURM_JOB_ID" ]]; then
                 TMPDIR="/lscratch/$SLURM_JOB_ID"
             else
                 dirname=$(basename $(mktemp))
@@ -207,4 +207,4 @@ else:
                 -o $TMPDIR/qc
 
             mv $TMPDIR/qc/multiqc_report.html {output.report}
-            """
\ No newline at end of file
+            """
diff --git a/workflow/scripts/_carlisle_functions.R b/workflow/scripts/_carlisle_functions.R
index 0f3c28b..9aee339 100644
--- a/workflow/scripts/_carlisle_functions.R
+++ b/workflow/scripts/_carlisle_functions.R
@@ -2,8 +2,8 @@
 # LIBRARY
 ########################################################################
 library(tidyverse)
-load_packages <- function(){
-  pkgs <- 'BSgenome.Hsapiens.NCBI.T2T.CHM13v2.0
+load_packages <- function() {
+  pkgs <- "BSgenome.Hsapiens.NCBI.T2T.CHM13v2.0
 chipenrich
 ChIPseeker
 DESeq2
@@ -28,39 +28,43 @@ tidyverse
 TxDb.Hsapiens.UCSC.hg19.knownGene
 TxDb.Hsapiens.UCSC.hg38.knownGene
 TxDb.Mmusculus.UCSC.mm10.knownGene
-yaml'
-  package_vctr <- pkgs %>% stringr::str_split('\n') %>% unlist()
+yaml"
+  package_vctr <- pkgs %>%
+    stringr::str_split("\n") %>%
+    unlist()
   invisible(lapply(package_vctr, library, character.only = TRUE))
 }
 
 ########################################################################
 # SPIKE IN PLOT
 ########################################################################
-GENERATE_SPIKEIN_PLOT<-function(input_df,spike_type){
-  spike_df=data.frame()
-  for (rowid in rownames(input_df,spike_type)){
+GENERATE_SPIKEIN_PLOT <- function(input_df, spike_type) {
+  spike_df <- data.frame()
+  for (rowid in rownames(input_df, spike_type)) {
     # spike_type="NC_000913.3"
-    
+
     # read in file
-    stats=read.table(input_df[rowid,"bam"])
-    stats=stats[,c("V1","V3")]
-    colnames(stats)=c("location","read_count")
-    
+    stats <- read.table(input_df[rowid, "bam"])
+    stats <- stats[, c("V1", "V3")]
+    colnames(stats) <- c("location", "read_count")
+
     # add metadata
-    stats$sampleid=input_df[rowid,"repid"]
-    stats$groupid=input_df[rowid,"sampleid"]
-    
-    if(nrow(stats)==0){
-      spike_df=subset(stats,location==spike_type)
-    } else{
-      spike_df=rbind(subset(stats,location==spike_type),
-                     spike_df)
+    stats$sampleid <- input_df[rowid, "repid"]
+    stats$groupid <- input_df[rowid, "sampleid"]
+
+    if (nrow(stats) == 0) {
+      spike_df <- subset(stats, location == spike_type)
+    } else {
+      spike_df <- rbind(
+        subset(stats, location == spike_type),
+        spike_df
+      )
     }
   }
-  
-  p=ggplot(data=spike_df,aes(x=sampleid,y=read_count,fill=groupid)) + 
-    geom_bar(stat="identity")
-  p_final=p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
+
+  p <- ggplot(data = spike_df, aes(x = sampleid, y = read_count, fill = groupid)) +
+    geom_bar(stat = "identity")
+  p_final <- p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
     ggtitle(paste0("Spike-in control values\n", spike_type))
   print(p_final)
 
@@ -70,178 +74,197 @@ GENERATE_SPIKEIN_PLOT<-function(input_df,spike_type){
 ########################################################################
 # GO ENRICHMENT
 ########################################################################
-READ_PEAK_FILE<-function(peak_file_in){
-  peak_df=read.csv(peak_file_in,sep="\t",header=FALSE)[,c("V1","V2","V3")]
-  colnames(peak_df)=c("chrom","start","end")
-  
+READ_PEAK_FILE <- function(peak_file_in) {
+  peak_df <- read.csv(peak_file_in, sep = "\t", header = FALSE)[, c("V1", "V2", "V3")]
+  colnames(peak_df) <- c("chrom", "start", "end")
+
   return(peak_df)
 }
 
-SET_LOCUST_DEF<-function(locus_loc_in){
+SET_LOCUST_DEF <- function(locus_loc_in) {
   # depending on where the majority of peaks fall, ID locus choice to use
-  if (locus_loc_in=="<0.1" | locus_loc_in=="0.1 - 1"){
-    locus_loc_in="1kb"
-  } else if (locus_loc_in=="1 - 5"){
-    locus_loc_in="5kb"
-  } else if (locus_loc_in=="5 - 10" ) {
-    locus_loc_in="10kb"
+  if (locus_loc_in == "<0.1" | locus_loc_in == "0.1 - 1") {
+    locus_loc_in <- "1kb"
+  } else if (locus_loc_in == "1 - 5") {
+    locus_loc_in <- "5kb"
+  } else if (locus_loc_in == "5 - 10") {
+    locus_loc_in <- "10kb"
   } else {
-    locus_loc_in="not applicable given distribution"
+    locus_loc_in <- "not applicable given distribution"
   }
-  
-  print(paste0("The locust defintion is determined to be: ",locus_loc))
+
+  print(paste0("The locust defintion is determined to be: ", locus_loc))
   return(locus_loc)
 }
 
-SET_LOCUST_LIST<-function(locus_loc_in){
+SET_LOCUST_LIST <- function(locus_loc_in) {
   # depending on where the majority of peaks fall, ID locus choice to use
-  if (locus_loc_in=="<0.1" | locus_loc_in=="0.1 - 1"){
-    locusdf_list=c("1kb","1kb_outside","1kb_outside_upstream")
-  } else if (locus_loc_in=="1 - 5"){
-    locusdf_list=c("5kb","5kb_outside","5kb_outside_upstream")
-  } else if (locus_loc_in=="5 - 10" ) {
-    locusdf_list=c("10kb","10kb_outside","10kb_outside_upstream")
+  if (locus_loc_in == "<0.1" | locus_loc_in == "0.1 - 1") {
+    locusdf_list <- c("1kb", "1kb_outside", "1kb_outside_upstream")
+  } else if (locus_loc_in == "1 - 5") {
+    locusdf_list <- c("5kb", "5kb_outside", "5kb_outside_upstream")
+  } else if (locus_loc_in == "5 - 10") {
+    locusdf_list <- c("10kb", "10kb_outside", "10kb_outside_upstream")
   } else {
-    locusdf_list="none"
+    locusdf_list <- "none"
   }
-  
+
   return(locusdf_list)
 }
 
-PLOT_QC_FUNCTIONS<-function(function_in,rowid_in,l_id){
-  
-  if (function_in=="polyenrich"){
-    p=plot_polyenrich_spline(peaks = READ_PEAK_FILE(peak_df[rowid_in,"peak_bed"]),
-                             locusdef = l_id, genome = speciesID)
-  } else if (function_in=="spline"){
-    p=plot_chipenrich_spline(peaks = READ_PEAK_FILE(peak_df[rowid_in,"peak_bed"]), 
-                             locusdef = l_id, genome = speciesID)
-  } else if (function_in=="cov"){
-    p=plot_gene_coverage(peaks = READ_PEAK_FILE(peak_df[rowid_in,"peak_bed"]), 
-                         locusdef = l_id,  genome = speciesID)
-  } 
-  
-  else{
-    print(paste0("Missing function",function_in))
+PLOT_QC_FUNCTIONS <- function(function_in, rowid_in, l_id) {
+  if (function_in == "polyenrich") {
+    p <- plot_polyenrich_spline(
+      peaks = READ_PEAK_FILE(peak_df[rowid_in, "peak_bed"]),
+      locusdef = l_id, genome = speciesID
+    )
+  } else if (function_in == "spline") {
+    p <- plot_chipenrich_spline(
+      peaks = READ_PEAK_FILE(peak_df[rowid_in, "peak_bed"]),
+      locusdef = l_id, genome = speciesID
+    )
+  } else if (function_in == "cov") {
+    p <- plot_gene_coverage(
+      peaks = READ_PEAK_FILE(peak_df[rowid_in, "peak_bed"]),
+      locusdef = l_id, genome = speciesID
+    )
+  } else {
+    print(paste0("Missing function", function_in))
   }
   return(p)
 }
 
-PLOT_QC_MAIN<-function(function_in,rowid_in){
-  locusdf_list=c(strsplit(peak_df[1,"locusdf_list"],",")[[1]],"nearest_tss","nearest_gene")
-  locusdf_list=locusdf_list[locusdf_list != "none"]
-  
+PLOT_QC_MAIN <- function(function_in, rowid_in) {
+  locusdf_list <- c(strsplit(peak_df[1, "locusdf_list"], ",")[[1]], "nearest_tss", "nearest_gene")
+  locusdf_list <- locusdf_list[locusdf_list != "none"]
+
   # create plots, catching for errors
-  counter=1
-  legend_text=""
-  row_count=1; col_count=1
-  p=list()
-  for (l_id in locusdf_list){
-    plot_test = tryCatch({
-      PLOT_QC_FUNCTIONS(function_in,rowid_in,l_id)
-    }, warning = function(w) {
-      "warning"
-    }, error = function(e) {
-      "failed"
-    })
-    
+  counter <- 1
+  legend_text <- ""
+  row_count <- 1
+  col_count <- 1
+  p <- list()
+  for (l_id in locusdf_list) {
+    plot_test <- tryCatch(
+      {
+        PLOT_QC_FUNCTIONS(function_in, rowid_in, l_id)
+      },
+      warning = function(w) {
+        "warning"
+      },
+      error = function(e) {
+        "failed"
+      }
+    )
+
     # save successful plots in plot_list
     # save successful plots legend
-    if (class(plot_test) == "trellis"){
-      p[[counter]]=plot_test
-      counter=counter+1
-      
+    if (class(plot_test) == "trellis") {
+      p[[counter]] <- plot_test
+      counter <- counter + 1
+
       # if true, this is a new row
-      if (col_count==1){
-        legend_text=paste0(legend_text,"Row ",row_count,":  | Col 1 (",l_id,")")
-        col_count=2
-      } else if (col_count==2 & row_count==1){
-        legend_text=paste0(legend_text," | Col 2 (",l_id,")\n")
-        row_count=row_count+1
-        col_count=1
-      } else if (col_count==2 & row_count>1){
-        legend_text=paste0(legend_text," | Col 2 (",l_id,")")
-        col_count=col_count+1
-      } else{
-        legend_text=paste0(legend_text," | Col 3 (",l_id,")\n")
-        col_count=1
-        row_count=row_count+1
+      if (col_count == 1) {
+        legend_text <- paste0(legend_text, "Row ", row_count, ":  | Col 1 (", l_id, ")")
+        col_count <- 2
+      } else if (col_count == 2 & row_count == 1) {
+        legend_text <- paste0(legend_text, " | Col 2 (", l_id, ")\n")
+        row_count <- row_count + 1
+        col_count <- 1
+      } else if (col_count == 2 & row_count > 1) {
+        legend_text <- paste0(legend_text, " | Col 2 (", l_id, ")")
+        col_count <- col_count + 1
+      } else {
+        legend_text <- paste0(legend_text, " | Col 3 (", l_id, ")\n")
+        col_count <- 1
+        row_count <- row_count + 1
       }
     }
   }
-  
+
   # print with data key
   cat(legend_text)
-  n=length(p)
-  if (n==0){
+  n <- length(p)
+  if (n == 0) {
     print("All plots failed for this sample")
-  } else if (n==1){
+  } else if (n == 1) {
     plot(c(p[[1]]))
-  } else if (n==2){
-    plot(c(p[[1]],p[[2]]))
-  } else if (n==3){
-    plot(c(p[[1]],p[[2]],p[[3]]))
-  } else if (n==4){
-    plot(c(p[[1]],p[[2]],p[[3]],p[[4]]))
-  } else if (n==5){
-    plot(c(p[[1]],p[[2]],p[[3]],p[[4]],p[[5]]))
-  } else if (n==6){
-    plot(c(p[[1]],p[[2]],p[[3]],p[[4]],p[[5]],p[[6]]))
-  } else{
-    plot(paste0("Missing N",n))
+  } else if (n == 2) {
+    plot(c(p[[1]], p[[2]]))
+  } else if (n == 3) {
+    plot(c(p[[1]], p[[2]], p[[3]]))
+  } else if (n == 4) {
+    plot(c(p[[1]], p[[2]], p[[3]], p[[4]]))
+  } else if (n == 5) {
+    plot(c(p[[1]], p[[2]], p[[3]], p[[4]], p[[5]]))
+  } else if (n == 6) {
+    plot(c(p[[1]], p[[2]], p[[3]], p[[4]], p[[5]], p[[6]]))
+  } else {
+    plot(paste0("Missing N", n))
   }
 }
 
-GO_ANALYSIS_MAIN<-function(rowid,peak_enrichment){
-  sampleid=peak_df[rowid,"sampleid"]
-  peakcaller=peak_df[rowid,"peak_caller"]
-  peaktype=peak_df[rowid,"peak_type"]
-  locus_def=peak_df[rowid,"locus_loc_short"]
-  
-  print(paste0("** ",sampleid," | ", peakcaller, " | ", peaktype," **"))
-  print(paste0("-- Peak analysis: ",peak_enrichment))
-  
-  if (locus_def=="none"){
-    locus_def="nearest_tss"
+GO_ANALYSIS_MAIN <- function(rowid, peak_enrichment) {
+  sampleid <- peak_df[rowid, "sampleid"]
+  peakcaller <- peak_df[rowid, "peak_caller"]
+  peaktype <- peak_df[rowid, "peak_type"]
+  locus_def <- peak_df[rowid, "locus_loc_short"]
+
+  print(paste0("** ", sampleid, " | ", peakcaller, " | ", peaktype, " **"))
+  print(paste0("-- Peak analysis: ", peak_enrichment))
+
+  if (locus_def == "none") {
+    locus_def <- "nearest_tss"
   }
-  
-  if (peak_enrichment=="hybrid"){
-    results = hybridenrich(peaks = READ_PEAK_FILE(peak_df[rowid,"peak_bed"]),
-                           genome = speciesID, genesets = geneset_id,
-                           locusdef = locus_def, qc_plots = F, #randomization = 'complete',
-                           out_name = NULL, n_cores = 1, min_geneset_size=10)
-  } else if (peak_enrichment=="broad"){
-    results = broadenrich(peaks = READ_PEAK_FILE(peak_df[rowid,"peak_bed"]),
-                          genome = speciesID, genesets = geneset_id, 
-                          locusdef = locus_def, qc_plots = FALSE, #randomization = 'complete',
-                          out_name = NULL, n_cores=1, min_geneset_size=10)
-  } else if (peak_enrichment=="enrich"){
-    results = chipenrich(peaks = READ_PEAK_FILE(peak_df[rowid,"peak_bed"]),
-                         genome = speciesID, genesets = geneset_id, 
-                         locusdef = locus_def, qc_plots = FALSE, #randomization = 'complete',
-                         out_name = NULL, n_cores=1)
-  } else if (peak_enrichment=="poly"){
-    results = polyenrich(peaks = READ_PEAK_FILE(peak_df[rowid,"peak_bed"]),
-                         genome = speciesID, genesets = geneset_id, 
-                         method = 'polyenrich', locusdef = locus_def, 
-                         qc_plots = FALSE, out_name = NULL, n_cores = 1)
-  } else if (peak_enrichment=="poly_weighted"){
-    results = polyenrich(peaks = READ_PEAK_FILE(peak_df[rowid,"peak_bed"]),
-                         genome = speciesID, genesets = geneset_id, 
-                         method="polyenrich_weighted", locusdef = "enhancer_plus5kb", 
-                         qc_plots = FALSE, out_name = NULL, n_cores = 1)
-  } else if  (peak_enrichment=="reglocation"){
-    results = proxReg(READ_PEAK_FILE(peak_df[rowid,"peak_bed"]), 
-                      reglocation = 'tss', genome = speciesID, 
-                      genesets=geneset_id, out_name=NULL)
+
+  if (peak_enrichment == "hybrid") {
+    results <- hybridenrich(
+      peaks = READ_PEAK_FILE(peak_df[rowid, "peak_bed"]),
+      genome = speciesID, genesets = geneset_id,
+      locusdef = locus_def, qc_plots = F, # randomization = 'complete',
+      out_name = NULL, n_cores = 1, min_geneset_size = 10
+    )
+  } else if (peak_enrichment == "broad") {
+    results <- broadenrich(
+      peaks = READ_PEAK_FILE(peak_df[rowid, "peak_bed"]),
+      genome = speciesID, genesets = geneset_id,
+      locusdef = locus_def, qc_plots = FALSE, # randomization = 'complete',
+      out_name = NULL, n_cores = 1, min_geneset_size = 10
+    )
+  } else if (peak_enrichment == "enrich") {
+    results <- chipenrich(
+      peaks = READ_PEAK_FILE(peak_df[rowid, "peak_bed"]),
+      genome = speciesID, genesets = geneset_id,
+      locusdef = locus_def, qc_plots = FALSE, # randomization = 'complete',
+      out_name = NULL, n_cores = 1
+    )
+  } else if (peak_enrichment == "poly") {
+    results <- polyenrich(
+      peaks = READ_PEAK_FILE(peak_df[rowid, "peak_bed"]),
+      genome = speciesID, genesets = geneset_id,
+      method = "polyenrich", locusdef = locus_def,
+      qc_plots = FALSE, out_name = NULL, n_cores = 1
+    )
+  } else if (peak_enrichment == "poly_weighted") {
+    results <- polyenrich(
+      peaks = READ_PEAK_FILE(peak_df[rowid, "peak_bed"]),
+      genome = speciesID, genesets = geneset_id,
+      method = "polyenrich_weighted", locusdef = "enhancer_plus5kb",
+      qc_plots = FALSE, out_name = NULL, n_cores = 1
+    )
+  } else if (peak_enrichment == "reglocation") {
+    results <- proxReg(READ_PEAK_FILE(peak_df[rowid, "peak_bed"]),
+      reglocation = "tss", genome = speciesID,
+      genesets = geneset_id, out_name = NULL
+    )
   }
 
   # set results, sig
-  result.out = results$results
-  alpha = sum(result.out$P.value < 0.05) / nrow(result.out)
-  print(paste0("--sig: ",alpha))
-    
+  result.out <- results$results
+  alpha <- sum(result.out$P.value < 0.05) / nrow(result.out)
+  print(paste0("--sig: ", alpha))
+
   # write out
-  fpath=paste0(output_dir,"/",sampleid,".",peakcaller,".",dedup_status,".",peaktype,".",peak_enrichment,"_",geneset_id,".csv")
-  write.csv(result.out,fpath)
-}
\ No newline at end of file
+  fpath <- paste0(output_dir, "/", sampleid, ".", peakcaller, ".", dedup_status, ".", peaktype, ".", peak_enrichment, "_", geneset_id, ".csv")
+  write.csv(result.out, fpath)
+}
diff --git a/workflow/scripts/_diff_markdown.Rmd b/workflow/scripts/_diff_markdown.Rmd
index 66e9d05..54a7355 100644
--- a/workflow/scripts/_diff_markdown.Rmd
+++ b/workflow/scripts/_diff_markdown.Rmd
@@ -21,7 +21,7 @@ params:
   elbowlimits: "~/../../../Volumes/ccbr1155/CS030666/analysis/results/peaks/contrasts/siNC_H4K20me3_vs_siSmyd3_H4K20me3__dedup__narrowGo_peaks.bed/siNC_H4K20me3_vs_siSmyd3_H4K20me3__dedup__narrowGo_peaks.bed_AUCbased_diffanalysis_elbowlimits.yaml"
   species: "hg38"
   gtf: ""
-editor_options: 
+editor_options:
   chunk_output_type: console
 ---
 
@@ -35,71 +35,72 @@ load_packages()
 
 ## Loading SampleInfo and Counts
 ```{r sampleinfo, include=TRUE, echo=FALSE}
-sampleinfo = read.csv(params$coldata,header = TRUE,sep="\t",strip.white = TRUE,check.names = FALSE,colClasses = "character")
+sampleinfo <- read.csv(params$coldata, header = TRUE, sep = "\t", strip.white = TRUE, check.names = FALSE, colClasses = "character")
 # filter based off of params
-sampleinfo = sampleinfo[sampleinfo$group == params$condition1 | sampleinfo$group == params$condition2,]
-sampleinfo$group = relevel(as.factor(sampleinfo$group),params$condition2)
-rawcounts = read.csv(params$rawcountsmatrix,
-                     header = TRUE,sep="\t",
-                     comment.char = "#", 
-                     strip.white = TRUE,
-                     check.names = FALSE,
-                     colClasses = "character")
-rawcounts = as.data.frame(rawcounts)
-rawcounts %>% column_to_rownames(var="peakID") -> rawcounts
+sampleinfo <- sampleinfo[sampleinfo$group == params$condition1 | sampleinfo$group == params$condition2, ]
+sampleinfo$group <- relevel(as.factor(sampleinfo$group), params$condition2)
+rawcounts <- read.csv(params$rawcountsmatrix,
+  header = TRUE, sep = "\t",
+  comment.char = "#",
+  strip.white = TRUE,
+  check.names = FALSE,
+  colClasses = "character"
+)
+rawcounts <- as.data.frame(rawcounts)
+rawcounts %>% column_to_rownames(var = "peakID") -> rawcounts
 
 # filter based off of sampleinfo
-rawcounts = rawcounts[,colnames(rawcounts)==sampleinfo$samplename]
+rawcounts <- rawcounts[, colnames(rawcounts) == sampleinfo$samplename]
 
 # convert character to numeric to integer
-x = matrix(as.numeric(as.matrix(rawcounts)),ncol=ncol(rawcounts))
-x = matrix(mapply(x,FUN=as.integer),ncol=ncol(rawcounts))
-x = as.data.frame(x)
-colnames(x) = colnames(rawcounts)
-rownames(x) = rownames(rawcounts)
+x <- matrix(as.numeric(as.matrix(rawcounts)), ncol = ncol(rawcounts))
+x <- matrix(mapply(x, FUN = as.integer), ncol = ncol(rawcounts))
+x <- as.data.frame(x)
+colnames(x) <- colnames(rawcounts)
+rownames(x) <- rownames(rawcounts)
 
 # if lib size is greater than max integer size allowed, handle samples
 # cant replace with the original value since max integer is 2147483647
 # anything larger will be replaced with an NA
-x[is.na(x)]=2147483647
-
-rawcounts = x
-sampleinfo=sampleinfo[sampleinfo$samplename==colnames(rawcounts),]
-
-#determine lib reduction factor
-if (mean(colSums(rawcounts))>1000000000){
-  lib_factor=1e8
-}else if (mean(colSums(rawcounts))>100000000){
-  lib_factor=1e7
-}else if (mean(colSums(rawcounts))>10000000){
-  lib_factor=1e6
-} else if (mean(colSums(rawcounts))>1000000){
-    lib_factor=1e5
-} else if (mean(colSums(rawcounts))>100000){
-      lib_factor=1e4
-} else if (mean(colSums(rawcounts))>10000){
-      lib_factor=1e3
-} else if (mean(colSums(rawcounts))>1000){
-      lib_factor=1e2
-} else if (mean(colSums(rawcounts))>100){
-      lib_factor=1e1
+x[is.na(x)] <- 2147483647
+
+rawcounts <- x
+sampleinfo <- sampleinfo[sampleinfo$samplename == colnames(rawcounts), ]
+
+# determine lib reduction factor
+if (mean(colSums(rawcounts)) > 1000000000) {
+  lib_factor <- 1e8
+} else if (mean(colSums(rawcounts)) > 100000000) {
+  lib_factor <- 1e7
+} else if (mean(colSums(rawcounts)) > 10000000) {
+  lib_factor <- 1e6
+} else if (mean(colSums(rawcounts)) > 1000000) {
+  lib_factor <- 1e5
+} else if (mean(colSums(rawcounts)) > 100000) {
+  lib_factor <- 1e4
+} else if (mean(colSums(rawcounts)) > 10000) {
+  lib_factor <- 1e3
+} else if (mean(colSums(rawcounts)) > 1000) {
+  lib_factor <- 1e2
+} else if (mean(colSums(rawcounts)) > 100) {
+  lib_factor <- 1e1
 } else {
-  lib_factor=1e1
+  lib_factor <- 1e1
 }
 
-sampleinfo$library_size=colSums(rawcounts)/lib_factor
-sampleinfodf = as.data.frame(sampleinfo)
-sampleinfodf$dupstatus = params$dupstatus
-rownames(sampleinfo) = sampleinfo$samplename
-pander(sampleinfodf,style="rmarkdown")
-rawcounts_logcpm = log2(cpm(rawcounts))
-cpm_melt=reshape2::melt(rawcounts_logcpm)
-colnames(cpm_melt)=c("peakID","samplename","log2cpm")
-fdr_cutoff=as.double(params$fdr_cutoff)
-log2fc_cutoff=as.double(params$log2fc_cutoff)
+sampleinfo$library_size <- colSums(rawcounts) / lib_factor
+sampleinfodf <- as.data.frame(sampleinfo)
+sampleinfodf$dupstatus <- params$dupstatus
+rownames(sampleinfo) <- sampleinfo$samplename
+pander(sampleinfodf, style = "rmarkdown")
+rawcounts_logcpm <- log2(cpm(rawcounts))
+cpm_melt <- reshape2::melt(rawcounts_logcpm)
+colnames(cpm_melt) <- c("peakID", "samplename", "log2cpm")
+fdr_cutoff <- as.double(params$fdr_cutoff)
+log2fc_cutoff <- as.double(params$log2fc_cutoff)
 ```
 
-Total Peaks: `r nrow(rawcounts)` 
+Total Peaks: `r nrow(rawcounts)`
 Total Samples: `r ncol(rawcounts)`
 
 ```{r fdr_check}
@@ -109,68 +110,75 @@ print(log2fc_cutoff)
 
 
 ```{r cpmplots, echo=FALSE}
-ggplot(cpm_melt,aes(x=samplename,y=log2cpm)) + 
-  geom_boxplot(fill=as.factor(as.numeric(as.factor(sampleinfo$group))+1)) +
+ggplot(cpm_melt, aes(x = samplename, y = log2cpm)) +
+  geom_boxplot(fill = as.factor(as.numeric(as.factor(sampleinfo$group)) + 1)) +
   theme_classic() +
   coord_flip()
-  # theme(legend.title = element_blank(),axis.text.x = element_text(angle = 90),legend.text=element_text(size=6),legend.position = "none")
+# theme(legend.title = element_blank(),axis.text.x = element_text(angle = 90),legend.text=element_text(size=6),legend.position = "none")
 ```
 
 ## Run DESeq2
 
 ```{r dds, include=TRUE, echo=FALSE}
-dds <- DESeqDataSetFromMatrix(countData = as.matrix(rawcounts),
-                                colData = sampleinfo[,c("samplename","group")],
-                                design = ~ group)
-if (params$spiked=="SPIKEIN" & !is.null(params$contrast_data)) {
-  bbpaths_df = read.csv(params$contrast_data,
-                      header = FALSE,sep="\t",
-                      comment.char = "#", 
-                      strip.white = TRUE)
-  colnames(bbpaths_df)=c("sample",
-                        "replicate",
-                        "bedfile",
-                        "bedgraph",
-                        "scalingfactor",
-                        "bed")
-  sf_df=unique(bbpaths_df[,c("replicate","scalingfactor")])
-  dds_cols=colnames(dds)
-  sfs=c()
-  for (i in dds_cols){
-    if (i %in% sf_df$replicate){
-      sfs=c(sfs,sf_df[sf_df$replicate==i,"scalingfactor"])
+dds <- DESeqDataSetFromMatrix(
+  countData = as.matrix(rawcounts),
+  colData = sampleinfo[, c("samplename", "group")],
+  design = ~group
+)
+if (params$spiked == "SPIKEIN" & !is.null(params$contrast_data)) {
+  bbpaths_df <- read.csv(params$contrast_data,
+    header = FALSE, sep = "\t",
+    comment.char = "#",
+    strip.white = TRUE
+  )
+  colnames(bbpaths_df) <- c(
+    "sample",
+    "replicate",
+    "bedfile",
+    "bedgraph",
+    "scalingfactor",
+    "bed"
+  )
+  sf_df <- unique(bbpaths_df[, c("replicate", "scalingfactor")])
+  dds_cols <- colnames(dds)
+  sfs <- c()
+  for (i in dds_cols) {
+    if (i %in% sf_df$replicate) {
+      sfs <- c(sfs, sf_df[sf_df$replicate == i, "scalingfactor"])
     }
   }
-  if (length(sfs)==length(dds_cols)){
-# scaling factor magnitudes are variable and depend on the constant used while scaling using spiked-in reads
-# DESeq2 size factors are generally hovering around 1
-# we try to rescale the scaling factors by dividing them by mean of all scaling factors ... this way they also 
-# start hovering around 1 ... based on suggestion from Sohyoung.
+  if (length(sfs) == length(dds_cols)) {
+    # scaling factor magnitudes are variable and depend on the constant used while scaling using spiked-in reads
+    # DESeq2 size factors are generally hovering around 1
+    # we try to rescale the scaling factors by dividing them by mean of all scaling factors ... this way they also
+    # start hovering around 1 ... based on suggestion from Sohyoung.
     if (params$scalesfbymean == "Y") {
-      adjusted_sfs = sfs/mean(sfs)
+      adjusted_sfs <- sfs / mean(sfs)
     }
-# AUC-based counts are prescaled, but fragmentbased counts are not prescaled
-# DESeq2 should ingest rawcounts ... prescaled counts need to be divided by sfs
-# to convert them back to rawcounts
+    # AUC-based counts are prescaled, but fragmentbased counts are not prescaled
+    # DESeq2 should ingest rawcounts ... prescaled counts need to be divided by sfs
+    # to convert them back to rawcounts
     if (params$rawcountsprescaled == "Y") {
-      rawrawcounts=round(t(t(rawcounts) / sfs))
-      dds <- DESeqDataSetFromMatrix(countData = as.matrix(rawrawcounts),
-                                  colData = sampleinfo[,c("samplename","group")],
-                                  design = ~ group)
+      rawrawcounts <- round(t(t(rawcounts) / sfs))
+      dds <- DESeqDataSetFromMatrix(
+        countData = as.matrix(rawrawcounts),
+        colData = sampleinfo[, c("samplename", "group")],
+        design = ~group
+      )
     }
-    DESeq2::sizeFactors(dds)= 1/adjusted_sfs
+    DESeq2::sizeFactors(dds) <- 1 / adjusted_sfs
   } else {
     print("Samples are spiked, but DESeq2 scaling factors used!!")
   }
-}                                
-                               
+}
+
 dds <- DESeq(dds)
-if ( params$htsfilter == "Y" ){
-  dds <- HTSFilter::HTSFilter(dds,s.len=50, plot=TRUE)$filteredData
+if (params$htsfilter == "Y") {
+  dds <- HTSFilter::HTSFilter(dds, s.len = 50, plot = TRUE)$filteredData
 }
 results <- results(dds)
 results_df <- as.data.frame(results)
-results_df %>% rownames_to_column(var="peakID") -> results_df
+results_df %>% rownames_to_column(var = "peakID") -> results_df
 ```
 
 ### DESeq MAplot
@@ -185,64 +193,66 @@ DESeq2::plotMA(results)
 # default for nsub is 1000, if there are less than 1000 rows with a mean > 5 this will error
 # If there are less than 1K check hether there are enough istances to use nsub value set to the n rows
 # if there are not then variancestablizing must be used
-if (sum( rowMeans( counts(dds, normalized=TRUE)) > 5 ) > 1000){
+if (sum(rowMeans(counts(dds, normalized = TRUE)) > 5) > 1000) {
   print("VST: Using standard method")
   rld <- vst(dds)
-} else{
+} else {
   # run test for dds
-  t <- try(vst(dds,nsub=nrow(dds)),silent = TRUE)
-  
+  t <- try(vst(dds, nsub = nrow(dds)), silent = TRUE)
+
   # if this completes then run nsub equal to nrow
   # if it fails run separate function
-  if (class(t)[1] == "DESeqTransform"){
+  if (class(t)[1] == "DESeqTransform") {
     print("VST: Using nrow as nsub")
-    rld <- vst(dds,nsub=nrow(dds))
-  } else if(grepl( "Error", t, fixed = TRUE)){
-    print ("VST: Using variance Stablizing transformation")
+    rld <- vst(dds, nsub = nrow(dds))
+  } else if (grepl("Error", t, fixed = TRUE)) {
+    print("VST: Using variance Stablizing transformation")
     rld <- varianceStabilizingTransformation(dds)
-  } else{
+  } else {
     print("VST: Using nrow as nsub")
-    rld <- vst(dds,nsub=nrow(dds))
+    rld <- vst(dds, nsub = nrow(dds))
   }
-} 
-
-assayrld = as.data.frame(assay(rld))
-assayrld$row_variance = rowVars(as.matrix(assayrld))
-assayrld = arrange(assayrld,desc(row_variance))
-zero_variance_rows=assayrld$row_variance<1e-5
-assayrld$row_variance = NULL
-assayrld = assayrld[!zero_variance_rows,]
-if (nrow(assayrld) > 500){
-  assayrld=assayrld[1:500,]
 }
-pca=prcomp(t(assayrld),scale. = T)
-m.pc1 = round(pca$sdev[1]^2/sum(pca$sdev^2)*100,2)
-m.pc2 = round(pca$sdev[2]^2/sum(pca$sdev^2)*100,2)
-m.pc3 = round(pca$sdev[3]^2/sum(pca$sdev^2)*100,2)
-xlab=paste0("PC1(",m.pc1,"%)")
-ylab=paste0("PC2(",m.pc2,"%)")
-ggplot(pca$x,aes(x=PC1,y=PC2,label=rownames(pca$x)))+geom_point(col=as.factor(as.numeric(as.factor(sampleinfo$group))+1))+
-  xlab(xlab)+ylab(ylab)+
-  geom_text_repel(max.overlaps = 10,size=2)+
+
+assayrld <- as.data.frame(assay(rld))
+assayrld$row_variance <- rowVars(as.matrix(assayrld))
+assayrld <- arrange(assayrld, desc(row_variance))
+zero_variance_rows <- assayrld$row_variance < 1e-5
+assayrld$row_variance <- NULL
+assayrld <- assayrld[!zero_variance_rows, ]
+if (nrow(assayrld) > 500) {
+  assayrld <- assayrld[1:500, ]
+}
+pca <- prcomp(t(assayrld), scale. = T)
+m.pc1 <- round(pca$sdev[1]^2 / sum(pca$sdev^2) * 100, 2)
+m.pc2 <- round(pca$sdev[2]^2 / sum(pca$sdev^2) * 100, 2)
+m.pc3 <- round(pca$sdev[3]^2 / sum(pca$sdev^2) * 100, 2)
+xlab <- paste0("PC1(", m.pc1, "%)")
+ylab <- paste0("PC2(", m.pc2, "%)")
+ggplot(pca$x, aes(x = PC1, y = PC2, label = rownames(pca$x))) +
+  geom_point(col = as.factor(as.numeric(as.factor(sampleinfo$group)) + 1)) +
+  xlab(xlab) +
+  ylab(ylab) +
+  geom_text_repel(max.overlaps = 10, size = 2) +
   theme_light()
 ```
 
 ### DESeq Elbow
 
 ```{r elbow,include=TRUE,echo=FALSE}
-limits=ELBOW::do_elbow_rnaseq(results)
+limits <- ELBOW::do_elbow_rnaseq(results)
 ELBOW::plot_dataset(results, "log2FoldChange", limits$up_limit, limits$low_limit)
-write_yaml(limits,file=params$elbowlimits)
+write_yaml(limits, file = params$elbowlimits)
 ```
 
 ```{r elbow2,include=TRUE,echo=FALSE}
-lim=c(limits$up_limit,limits$low_limit)
-lim=as.data.frame(lim)
-rownames(lim)=c("UpLimit","DownLimit")
-colnames(lim)="log2FC"
-lim$FC=2^lim$log2FC
-lim["DownLimit","FC"]=-1/lim["DownLimit","FC"]
-lim %>% rownames_to_column(var="Limit") -> lim
+lim <- c(limits$up_limit, limits$low_limit)
+lim <- as.data.frame(lim)
+rownames(lim) <- c("UpLimit", "DownLimit")
+colnames(lim) <- "log2FC"
+lim$FC <- 2^lim$log2FC
+lim["DownLimit", "FC"] <- -1 / lim["DownLimit", "FC"]
+lim %>% rownames_to_column(var = "Limit") -> lim
 pander(lim)
 # DT::datatable(lim) %>% formatSignif(columns=colnames(lim),digits=4)
 ```
@@ -250,119 +260,122 @@ pander(lim)
 ### DESeq Annotation
 
 ```{r annotate,include=TRUE,echo=FALSE}
-x = as.data.frame(rownames(results)) 
-colnames(x) = c("peakID")
-x %>% separate(col = c("peakID"),into = c("chrom","coord"),sep = ":") %>% 
-  separate(col = c("coord"),into = c("start","end"),sep = "-") -> x
+x <- as.data.frame(rownames(results))
+colnames(x) <- c("peakID")
+x %>%
+  separate(col = c("peakID"), into = c("chrom", "coord"), sep = ":") %>%
+  separate(col = c("coord"), into = c("start", "end"), sep = "-") -> x
 peaks <- GenomicRanges::makeGRangesFromDataFrame(x)
 
-if (params$species=="mm10"){
+if (params$species == "mm10") {
   txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
-  anno_db<-"org.Mm.eg.db"
-} else if (params$species=="hg19"){
+  anno_db <- "org.Mm.eg.db"
+} else if (params$species == "hg19") {
   txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
-  anno_db<-"org.Hs.eg.db"
-} else if (params$species=="hg38"){
+  anno_db <- "org.Hs.eg.db"
+} else if (params$species == "hg38") {
   txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
-  anno_db<-"org.Hs.eg.db"
-} else if (params$species=="hs1"){
+  anno_db <- "org.Hs.eg.db"
+} else if (params$species == "hs1") {
   # make txdb for T2T HS1
   ## modify seq info to match GTF
-  temp_seqnames = paste0("chr",seqnames(BSgenome.Hsapiens.NCBI.T2T.CHM13v2.0))
-  temp_seqnames = gsub("MT","M",temp_seqnames)
+  temp_seqnames <- paste0("chr", seqnames(BSgenome.Hsapiens.NCBI.T2T.CHM13v2.0))
+  temp_seqnames <- gsub("MT", "M", temp_seqnames)
 
   ## rename all other features
-  BSgenome.T2T_updated=BSgenome.Hsapiens.NCBI.T2T.CHM13v2.0
-  seqnames(BSgenome.T2T_updated)=temp_seqnames
-  names(seqlengths(BSgenome.T2T_updated))=temp_seqnames
-  names(isCircular(BSgenome.T2T_updated))=temp_seqnames
-  names(genome(BSgenome.T2T_updated))=temp_seqnames
+  BSgenome.T2T_updated <- BSgenome.Hsapiens.NCBI.T2T.CHM13v2.0
+  seqnames(BSgenome.T2T_updated) <- temp_seqnames
+  names(seqlengths(BSgenome.T2T_updated)) <- temp_seqnames
+  names(isCircular(BSgenome.T2T_updated)) <- temp_seqnames
+  names(genome(BSgenome.T2T_updated)) <- temp_seqnames
 
   ## https://rdrr.io/bioc/GenomicFeatures/man/makeTxDbFromGFF.html
   ## create TXDB object
-  txdb<-makeTxDbFromGFF(params$gtf,
-                  format=c("auto"),
-                  dataSource="/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf",
-                  organism="human",
-                  taxonomyId=9606,
-                  chrominfo=seqinfo(BSgenome.T2T_updated))
-  anno_db<-"org.Hs.eg.db"
+  txdb <- makeTxDbFromGFF(params$gtf,
+    format = c("auto"),
+    dataSource = "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf",
+    organism = "human",
+    taxonomyId = 9606,
+    chrominfo = seqinfo(BSgenome.T2T_updated)
+  )
+  anno_db <- "org.Hs.eg.db"
 }
 options(ChIPseeker.downstreamDistance = 0)
 peakAnno <- ChIPseeker::annotatePeak(peaks,
-                                     tssRegion = c(-2000,200),
-                                     TxDb = txdb,
-                                     level = "gene",
-                                     overlap = "all",
-                                     annoDb = anno_db)
+  tssRegion = c(-2000, 200),
+  TxDb = txdb,
+  level = "gene",
+  overlap = "all",
+  annoDb = anno_db
+)
 pa <- as.data.frame(peakAnno)
-pa$shortAnno=stringr::word(pa$annotation,1)
-pa$shortAnno[pa$shortAnno=="5'"]="5'UTR"
-pa$shortAnno[pa$shortAnno=="3'"]="3'UTR"
-pa$peakID = paste0(pa$seqnames,":",pa$start,"-",pa$end)
-results_df = merge(results_df,pa,by=c("peakID"))
-write.table(results_df,file=params$results,quote=FALSE,sep="\t",row.names = FALSE,col.names = TRUE)
-
-up=as.data.frame(table(results_df[results_df$padj < fdr_cutoff & results_df$log2FoldChange > log2fc_cutoff,]$shortAnno))
-down=as.data.frame(table(results_df[results_df$padj < fdr_cutoff & results_df$log2FoldChange < -1*log2fc_cutoff,]$shortAnno))
-if(nrow(up)==0){
+pa$shortAnno <- stringr::word(pa$annotation, 1)
+pa$shortAnno[pa$shortAnno == "5'"] <- "5'UTR"
+pa$shortAnno[pa$shortAnno == "3'"] <- "3'UTR"
+pa$peakID <- paste0(pa$seqnames, ":", pa$start, "-", pa$end)
+results_df <- merge(results_df, pa, by = c("peakID"))
+write.table(results_df, file = params$results, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
+
+up <- as.data.frame(table(results_df[results_df$padj < fdr_cutoff & results_df$log2FoldChange > log2fc_cutoff, ]$shortAnno))
+down <- as.data.frame(table(results_df[results_df$padj < fdr_cutoff & results_df$log2FoldChange < -1 * log2fc_cutoff, ]$shortAnno))
+if (nrow(up) == 0) {
   up <- data.frame(matrix(ncol = 2, nrow = 0))
 }
-if(nrow(down)==0){
+if (nrow(down) == 0) {
   down <- data.frame(matrix(ncol = 2, nrow = 0))
 }
-colnames(up)=c("shortAnno","UP")
-colnames(down)=c("shortAnno","DOWN")
-deg=as.data.frame(merge(up,down,by=c("shortAnno"),all=TRUE))
+colnames(up) <- c("shortAnno", "UP")
+colnames(down) <- c("shortAnno", "DOWN")
+deg <- as.data.frame(merge(up, down, by = c("shortAnno"), all = TRUE))
 deg[is.na(deg)] <- 0
-deg %>% column_to_rownames(var="shortAnno") -> deg
-deg=cbind(deg,rowSums(deg))
-deg=rbind(deg,colSums(deg))
-colnames(deg)[length(colnames(deg))]="Total"
-rownames(deg)[length(rownames(deg))]="Total"
-deg %>% rownames_to_column(var="Annotation") -> deg
+deg %>% column_to_rownames(var = "shortAnno") -> deg
+deg <- cbind(deg, rowSums(deg))
+deg <- rbind(deg, colSums(deg))
+colnames(deg)[length(colnames(deg))] <- "Total"
+rownames(deg)[length(rownames(deg))] <- "Total"
+deg %>% rownames_to_column(var = "Annotation") -> deg
 pander(deg)
 
 # add catch for hs1 which names SYMBOLS col as geneId
-if(params$species == "hs1"){
+if (params$species == "hs1") {
   lookup <- c(SYMBOL = "geneId")
-  results_df=results_df %>%
-     dplyr::rename(any_of(lookup))
+  results_df <- results_df %>%
+    dplyr::rename(any_of(lookup))
 }
 ```
 
 ### DESeq Volcano
 
 ```{r volcano,fig.width=8, fig.height=10,include=TRUE,echo=FALSE}
-colors=brewer.pal(7,"Set1")
-anno_types=levels(as.factor(results_df$shortAnno))
-keyvals=rep("grey",times=nrow(results_df))
-names(keyvals)=rep("NS",times=length(keyvals))
-for ( i in seq(1,length(anno_types))) {
-  keyvals[ abs(results_df$log2FoldChange) > log2fc_cutoff & results_df$padj < fdr_cutoff & results_df$shortAnno == anno_types[i] ] = colors[i]
+colors <- brewer.pal(7, "Set1")
+anno_types <- levels(as.factor(results_df$shortAnno))
+keyvals <- rep("grey", times = nrow(results_df))
+names(keyvals) <- rep("NS", times = length(keyvals))
+for (i in seq(1, length(anno_types))) {
+  keyvals[abs(results_df$log2FoldChange) > log2fc_cutoff & results_df$padj < fdr_cutoff & results_df$shortAnno == anno_types[i]] <- colors[i]
   names(keyvals)[keyvals == colors[i]] <- anno_types[i]
 }
 # names(keyvals)[names(keyvals)=="NA"]="NS"
 EnhancedVolcano(results_df,
-                lab = results_df$SYMBOL,
-                x = 'log2FoldChange',
-                y = 'padj',
-                ylab = bquote(~-Log[10] ~ FDR),
-                pCutoff = fdr_cutoff,
-                FCcutoff = log2fc_cutoff,
-                labSize = 4,
-                title = "",
-                subtitle = "",
-                titleLabSize = 1,
-                subtitleLabSize = 1,
-                #captionLabSize = 10,
-                colCustom = keyvals,
-                colAlpha = 1,
-                # boxedLabels = TRUE,
-                # drawConnectors = TRUE,
-                legendLabels = c("NS", expression(Log[2] ~ FC), "FDR", expression(FDR ~ and ~ log[2] ~ FC)),
-                legendLabSize = 10
-  )
+  lab = results_df$SYMBOL,
+  x = "log2FoldChange",
+  y = "padj",
+  ylab = bquote(~ -Log[10] ~ FDR),
+  pCutoff = fdr_cutoff,
+  FCcutoff = log2fc_cutoff,
+  labSize = 4,
+  title = "",
+  subtitle = "",
+  titleLabSize = 1,
+  subtitleLabSize = 1,
+  # captionLabSize = 10,
+  colCustom = keyvals,
+  colAlpha = 1,
+  # boxedLabels = TRUE,
+  # drawConnectors = TRUE,
+  legendLabels = c("NS", expression(Log[2] ~ FC), "FDR", expression(FDR ~ and ~ log[2] ~ FC)),
+  legendLabSize = 10
+)
 ```
 
 ### DESeq Detailed Results
diff --git a/workflow/scripts/_diff_markdown_wrapper.R b/workflow/scripts/_diff_markdown_wrapper.R
index e562feb..9aa629d 100644
--- a/workflow/scripts/_diff_markdown_wrapper.R
+++ b/workflow/scripts/_diff_markdown_wrapper.R
@@ -1,121 +1,175 @@
 #!/usr/bin/env Rscript
 suppressPackageStartupMessages(library("argparse"))
 
-#debug="TRUE"
-#path="~/CCBR/projects/ccbr1155/CS030586_CARAP/diff"
+# debug="TRUE"
+# path="~/CCBR/projects/ccbr1155/CS030586_CARAP/diff"
 
-debug="FALSE"
-path="/data/CCBR/projects/ccbr1155/CS030586_CARAP/diff"
+debug <- "FALSE"
+path <- "/data/CCBR/projects/ccbr1155/CS030586_CARAP/diff"
 
 # create parser object
 parser <- ArgumentParser()
-parser$add_argument("--rmd", type="character", required=TRUE,
-          help="path to rmd")
-parser$add_argument("--carlisle_functions", type="character", required=TRUE, 
-          help="path to carlisle functions file")
-parser$add_argument("--spiked", type="character", required=TRUE, 
-          help="type of normalization used")
-parser$add_argument("--rawcountsprescaled", action='store_true',
-          help="if counts are scaled by spike-in already ... Y (for AUC-based method) or N (for fragments-based method)")
-parser$add_argument("--scalesfbymean", action='store_true',
-          help="DESeq2 scaling factors are around 1. To ensure that spike-in scaling factors are also around 1 divide each scaling factor by mean of all scaling factors.")
-parser$add_argument("--htsfilter", action='store_true',
-          help="Use HTSFilter")
-parser$add_argument("--contrast_data", type="character", required=FALSE, default=NULL,
-          help="contrast_data inputs file")
-parser$add_argument("--countsmatrix", type="character", required=TRUE,
-          help="countsmatrix as TSV")
-parser$add_argument("--sampleinfo", type="character", required=TRUE,
-          help="sample info as TSV")
-parser$add_argument("--dupstatus", type="character", required=TRUE,
-          help="either dedup or no_dedup")
-parser$add_argument("--fdr_cutoff", type="double", default=0.05, required=FALSE,
-          help="FDR cutoff [default %(default)s]")
-parser$add_argument("--log2fc_cutoff", type="double", default=0.59, required=FALSE,
-          help="log2foldchange cutoff [default %(default)s]")
-parser$add_argument("--condition1", type="character", required=TRUE,
-          help = "condition1")
-parser$add_argument("--condition2", type="character", required=TRUE,
-          help = "condition2")
-parser$add_argument("--results", type="character", required=TRUE,
-          help = "path to results TSV")
-parser$add_argument("--report", type="character", required=TRUE,
-          help = "HTML report")
-parser$add_argument("--elbowlimits", type="character", required=TRUE,
-          help = "YAML ELBOW limits")
-parser$add_argument("--tmpdir", type="character", required=FALSE, default="/tmp",
-          help = "tmpdir")
-parser$add_argument("--species", type="character", required=TRUE,
-          help = "species")
-parser$add_argument("--gtf", type="character", required=FALSE,
-          help = "gtf path - needed for HS1")
+parser$add_argument("--rmd",
+  type = "character", required = TRUE,
+  help = "path to rmd"
+)
+parser$add_argument("--carlisle_functions",
+  type = "character", required = TRUE,
+  help = "path to carlisle functions file"
+)
+parser$add_argument("--spiked",
+  type = "character", required = TRUE,
+  help = "type of normalization used"
+)
+parser$add_argument("--rawcountsprescaled",
+  action = "store_true",
+  help = "if counts are scaled by spike-in already ... Y (for AUC-based method) or N (for fragments-based method)"
+)
+parser$add_argument("--scalesfbymean",
+  action = "store_true",
+  help = "DESeq2 scaling factors are around 1. To ensure that spike-in scaling factors are also around 1 divide each scaling factor by mean of all scaling factors."
+)
+parser$add_argument("--htsfilter",
+  action = "store_true",
+  help = "Use HTSFilter"
+)
+parser$add_argument("--contrast_data",
+  type = "character", required = FALSE, default = NULL,
+  help = "contrast_data inputs file"
+)
+parser$add_argument("--countsmatrix",
+  type = "character", required = TRUE,
+  help = "countsmatrix as TSV"
+)
+parser$add_argument("--sampleinfo",
+  type = "character", required = TRUE,
+  help = "sample info as TSV"
+)
+parser$add_argument("--dupstatus",
+  type = "character", required = TRUE,
+  help = "either dedup or no_dedup"
+)
+parser$add_argument("--fdr_cutoff",
+  type = "double", default = 0.05, required = FALSE,
+  help = "FDR cutoff [default %(default)s]"
+)
+parser$add_argument("--log2fc_cutoff",
+  type = "double", default = 0.59, required = FALSE,
+  help = "log2foldchange cutoff [default %(default)s]"
+)
+parser$add_argument("--condition1",
+  type = "character", required = TRUE,
+  help = "condition1"
+)
+parser$add_argument("--condition2",
+  type = "character", required = TRUE,
+  help = "condition2"
+)
+parser$add_argument("--results",
+  type = "character", required = TRUE,
+  help = "path to results TSV"
+)
+parser$add_argument("--report",
+  type = "character", required = TRUE,
+  help = "HTML report"
+)
+parser$add_argument("--elbowlimits",
+  type = "character", required = TRUE,
+  help = "YAML ELBOW limits"
+)
+parser$add_argument("--tmpdir",
+  type = "character", required = FALSE, default = "/tmp",
+  help = "tmpdir"
+)
+parser$add_argument("--species",
+  type = "character", required = TRUE,
+  help = "species"
+)
+parser$add_argument("--gtf",
+  type = "character", required = FALSE,
+  help = "gtf path - needed for HS1"
+)
 args <- parser$parse_args()
 
 gtf <- args$gtf
-if (debug){
-  carlisle_functions="/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/workflow/scripts/_carlisle_functions.R"
-  rawcountsmatrix="~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/counts_matrix.txt"
-  coldata="~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/sample_info.txt"
-  dupstatus="dedup"
-  condition1="siSmyd3_2m_Smyd3_0.25HCHO_500K"
-  condition2="siNC_2m_Smyd3_0.25HCHO_500K"
-  indexcols="peakID"
-  fdr_cutoff=0.05
-  log2fc_cutoff=0.59
-  results="~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/results.txt"
-  report="~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/report.html"
-  spiked="LIBRARY"
-  rawcountsprescaled="N"
-  scalesfbymean="N"
-  htsfilter="Y"
-  elbowlimits="~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/elbow.yaml"
-  tmpdir="/dev/shm"
-  gtf="~/../../Volumes/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf"
+if (debug) {
+  carlisle_functions <- "/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/workflow/scripts/_carlisle_functions.R"
+  rawcountsmatrix <- "~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/counts_matrix.txt"
+  coldata <- "~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/sample_info.txt"
+  dupstatus <- "dedup"
+  condition1 <- "siSmyd3_2m_Smyd3_0.25HCHO_500K"
+  condition2 <- "siNC_2m_Smyd3_0.25HCHO_500K"
+  indexcols <- "peakID"
+  fdr_cutoff <- 0.05
+  log2fc_cutoff <- 0.59
+  results <- "~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/results.txt"
+  report <- "~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/report.html"
+  spiked <- "LIBRARY"
+  rawcountsprescaled <- "N"
+  scalesfbymean <- "N"
+  htsfilter <- "Y"
+  elbowlimits <- "~/CCBR/projects/ccbr1155/CS030586_CARAP/diff/elbow.yaml"
+  tmpdir <- "/dev/shm"
+  gtf <- "~/../../Volumes/CCBR_Pipeliner/db/PipeDB/Indices/hs1/genes.gtf"
 } else {
-  carlisle_functions=args$carlisle_functions
-  rawcountsmatrix=args$countsmatrix
-  coldata=args$sampleinfo
-  dupstatus=args$dupstatus
-  condition1=args$condition1
-  condition2=args$condition2
-  fdr_cutoff=args$fdr_cutoff
-  log2fc_cutoff=args$log2fc_cutoff
-  indexcols="peakID"
-  results=args$results
-  report=args$report
-  spiked=args$spiked
-  contrast_data=args$contrast_data
-  elbowlimits=args$elbowlimits
-  if (args$rawcountsprescaled) {rawcountsprescaled="Y"} else {rawcountsprescaled="N"}
-  if (args$scalesfbymean) {scalesfbymean="Y"} else {scalesfbymean="N"}
-  tmpdir=args$tmpdir
-  if (args$htsfilter) {htsfilter="Y"} else {htsfilter="N"}
-  species=args$species
-  gtf=args$gtf
+  carlisle_functions <- args$carlisle_functions
+  rawcountsmatrix <- args$countsmatrix
+  coldata <- args$sampleinfo
+  dupstatus <- args$dupstatus
+  condition1 <- args$condition1
+  condition2 <- args$condition2
+  fdr_cutoff <- args$fdr_cutoff
+  log2fc_cutoff <- args$log2fc_cutoff
+  indexcols <- "peakID"
+  results <- args$results
+  report <- args$report
+  spiked <- args$spiked
+  contrast_data <- args$contrast_data
+  elbowlimits <- args$elbowlimits
+  if (args$rawcountsprescaled) {
+    rawcountsprescaled <- "Y"
+  } else {
+    rawcountsprescaled <- "N"
+  }
+  if (args$scalesfbymean) {
+    scalesfbymean <- "Y"
+  } else {
+    scalesfbymean <- "N"
+  }
+  tmpdir <- args$tmpdir
+  if (args$htsfilter) {
+    htsfilter <- "Y"
+  } else {
+    htsfilter <- "N"
+  }
+  species <- args$species
+  gtf <- args$gtf
 }
 
-parameters=list(
-  carlisle_functions=carlisle_functions,
-  rawcountsmatrix=rawcountsmatrix,
-  coldata=coldata,
-  spiked=spiked,
-  rawcountsprescaled=rawcountsprescaled,
-  scalesfbymean=scalesfbymean,
-  contrast_data=contrast_data,
-  dupstatus=dupstatus,
-  condition1=condition1,
-  condition2=condition2,
-  indexcols=indexcols,
-  htsfilter=htsfilter,
-  fdr_cutoff=fdr_cutoff,
-  log2fc_cutoff=log2fc_cutoff,
-  results=results,
-  elbowlimits=elbowlimits,
-  species=species,
-  gtf=gtf)
+parameters <- list(
+  carlisle_functions = carlisle_functions,
+  rawcountsmatrix = rawcountsmatrix,
+  coldata = coldata,
+  spiked = spiked,
+  rawcountsprescaled = rawcountsprescaled,
+  scalesfbymean = scalesfbymean,
+  contrast_data = contrast_data,
+  dupstatus = dupstatus,
+  condition1 = condition1,
+  condition2 = condition2,
+  indexcols = indexcols,
+  htsfilter = htsfilter,
+  fdr_cutoff = fdr_cutoff,
+  log2fc_cutoff = log2fc_cutoff,
+  results = results,
+  elbowlimits = elbowlimits,
+  species = species,
+  gtf = gtf
+)
 
 rmarkdown::render(args$rmd,
-  params=parameters,
+  params = parameters,
   output_file = report,
-  intermediates_dir = paste(tmpdir,"intermediates_dir",sep="/"),
-  knit_root_dir = paste(tmpdir,"knit_root_dir",sep="/"))
+  intermediates_dir = paste(tmpdir, "intermediates_dir", sep = "/"),
+  knit_root_dir = paste(tmpdir, "knit_root_dir", sep = "/")
+)
diff --git a/workflow/scripts/_filter_bam.py b/workflow/scripts/_filter_bam.py
index dcf85a8..58fb979 100644
--- a/workflow/scripts/_filter_bam.py
+++ b/workflow/scripts/_filter_bam.py
@@ -11,25 +11,33 @@
 import pysam
 import argparse
 
-parser = argparse.ArgumentParser(description='Filter BAM by readids')
-parser.add_argument('--inputBAM', type=str, required=True,
-                    help='input BAM file')
-parser.add_argument('--outputBAM', type=str, required=True,
-                    help='filtered output BAM file')
-parser.add_argument('--fragmentlength',  type=int, required=False, default=1000,
-                    help='discard flagment lengths larger than this integer')
-parser.add_argument('--removemarkedduplicates', action='store_true',
-                    help='removed marked and optical duplicates')
+parser = argparse.ArgumentParser(description="Filter BAM by readids")
+parser.add_argument("--inputBAM", type=str, required=True, help="input BAM file")
+parser.add_argument(
+    "--outputBAM", type=str, required=True, help="filtered output BAM file"
+)
+parser.add_argument(
+    "--fragmentlength",
+    type=int,
+    required=False,
+    default=1000,
+    help="discard flagment lengths larger than this integer",
+)
+parser.add_argument(
+    "--removemarkedduplicates",
+    action="store_true",
+    help="removed marked and optical duplicates",
+)
 args = parser.parse_args()
 
 inBAM = pysam.AlignmentFile(args.inputBAM, "rb")
 outBAM = pysam.AlignmentFile(args.outputBAM, "wb", template=inBAM)
 
-count=0
+count = 0
 for read in inBAM.fetch():
-    count+=1
-    if count%1000000 == 0:
-        print("%d reads read!"%(count))
+    count += 1
+    if count % 1000000 == 0:
+        print("%d reads read!" % (count))
     if not read.is_proper_pair or read.is_unmapped:
         continue
     if read.template_length > args.fragmentlength:
@@ -38,4 +46,4 @@
         continue
     outBAM.write(read)
 inBAM.close()
-outBAM.close()
\ No newline at end of file
+outBAM.close()
diff --git a/workflow/scripts/_generate_spikein_plots.Rmd b/workflow/scripts/_generate_spikein_plots.Rmd
index f9db808..2070a9b 100644
--- a/workflow/scripts/_generate_spikein_plots.Rmd
+++ b/workflow/scripts/_generate_spikein_plots.Rmd
@@ -8,7 +8,7 @@ params:
   carlisle_functions: "/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/workflow/scripts/_carlisle_functions.R"
   bam_list: "~/../../Volumes/data/carlisle/v2.0/results/bam/raw/53_H3K4me3_1.bam.idxstatsxxx~/../../Volumes/data/carlisle/v2.0/results/bam/raw/53_H3K4me3_2.bam.idxstatsxxx~/../../Volumes/data/carlisle/v2.0/results/bam/raw/HN6_H3K4me3_1.bam.idxstatsxxx~/../../Volumes/data/carlisle/v2.0/results/bam/raw/HN6_H3K4me3_2.bam.idxstatsxxx~/../../Volumes/data/carlisle/v2.0/results/bam/raw/53_H4K20m3_1.bam.idxstatsxxx~/../../Volumes/data/carlisle/v2.0/results/bam/raw/53_H4K20m3_2.bam.idxstatsxxx~/../../Volumes/data/carlisle/v2.0/results/bam/raw/HN6_H4K20me3_1.bam.idxstatsxxx~/../../Volumes/data/carlisle/v2.0/results/bam/raw/HN6_H4K20me3_2.bam.idxstatsxxx~/../../Volumes/data/carlisle/v2.0/results/bam/raw/HN6_IgG_rabbit_negative_control_1.bam.idxstats"
   spikein_control: "NC_000913.3"
-editor_options: 
+editor_options:
   chunk_output_type: console
 ---
 
@@ -23,17 +23,17 @@ load_packages()
 
 ```{r params, include=FALSE, echo=FALSE}
 # set params
-bam_list=params$bam_list
-spikein_control=params$spikein_control
-debug="N"
+bam_list <- params$bam_list
+spikein_control <- params$spikein_control
+debug <- "N"
 
-if (debug=="Y"){
-  spikein_control="NC_000913.3"
-  bam_list=""
+if (debug == "Y") {
+  spikein_control <- "NC_000913.3"
+  bam_list <- ""
 }
 
 # set up bam list
-bam_list=as.list(strsplit(bam_list, 'xxx'))[[1]]
+bam_list <- as.list(strsplit(bam_list, "xxx"))[[1]]
 ```
 
 
@@ -42,29 +42,30 @@ bam_list=as.list(strsplit(bam_list, 'xxx'))[[1]]
 # /data/sevillas2/carlisle/v2.0/results/bam/HN6_H4K20me3_2.dedup.bam.idxstats
 
 # create df
-proj_df=data.frame()
-for (bam in bam_list){
-  total_str_length=length(strsplit(bam,"/")[[1]])
-  
+proj_df <- data.frame()
+for (bam in bam_list) {
+  total_str_length <- length(strsplit(bam, "/")[[1]])
+
   # example: # /data/sevillas2/carlisle/v2.0/results/bam/HN6_H4K20me3_2.dedup.bam.idxstats
-  proj_df[nrow(proj_df)+1,"bam"]=bam
-  
+  proj_df[nrow(proj_df) + 1, "bam"] <- bam
+
   # example: HN6_H4K20me3_2.dedup.bam.idxstats
-  ext=strsplit(bam,"/")[[1]][total_str_length]
-  
+  ext <- strsplit(bam, "/")[[1]][total_str_length]
+
   # example: HN6_H4K20me3_2
-  proj_df[nrow(proj_df),"repid"]=strsplit(ext,"[.]")[[1]][1]
-  
+  proj_df[nrow(proj_df), "repid"] <- strsplit(ext, "[.]")[[1]][1]
+
   # example: HN6_H4K20me3
-  proj_df[nrow(proj_df),"sampleid"]=gsub("_[1,2,3]","",proj_df[nrow(proj_df),"repid"])
+  proj_df[nrow(proj_df), "sampleid"] <- gsub("_[1,2,3]", "", proj_df[nrow(proj_df), "repid"])
 }
 
 DT::datatable(proj_df)
 ```
 
 ```{r spikeplot, echo=FALSE}
-spike_df=GENERATE_SPIKEIN_PLOT(input_df=proj_df,
-                      spike_type=spikein_control)
+spike_df <- GENERATE_SPIKEIN_PLOT(
+  input_df = proj_df,
+  spike_type = spikein_control
+)
 DT::datatable(spike_df)
 ```
-
diff --git a/workflow/scripts/_generate_spikein_wrapper.R b/workflow/scripts/_generate_spikein_wrapper.R
index 468090b..9cd60ac 100644
--- a/workflow/scripts/_generate_spikein_wrapper.R
+++ b/workflow/scripts/_generate_spikein_wrapper.R
@@ -3,36 +3,48 @@ suppressPackageStartupMessages(library("argparse"))
 
 # create parser object
 parser <- ArgumentParser()
-parser$add_argument("--rmd", type="character", required=TRUE,
-                    help="path to rmd")
-parser$add_argument("--carlisle_functions", type="character", required=TRUE, 
-          help="path to carlisle functions file")
-parser$add_argument("--report", type="character", required=TRUE,
-                    help = "HTML report")
-parser$add_argument("--bam_list", type="character", required=TRUE,
-                    help = "list of .idxstats bam files")
-parser$add_argument("--spikein_control", type="character", required=TRUE,
-                    help = "spike in species type")
+parser$add_argument("--rmd",
+  type = "character", required = TRUE,
+  help = "path to rmd"
+)
+parser$add_argument("--carlisle_functions",
+  type = "character", required = TRUE,
+  help = "path to carlisle functions file"
+)
+parser$add_argument("--report",
+  type = "character", required = TRUE,
+  help = "HTML report"
+)
+parser$add_argument("--bam_list",
+  type = "character", required = TRUE,
+  help = "list of .idxstats bam files"
+)
+parser$add_argument("--spikein_control",
+  type = "character", required = TRUE,
+  help = "spike in species type"
+)
 args <- parser$parse_args()
 
-debug="FALSE"
-if (debug){
-  carlisle_functions="/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/workflow/scripts/_carlisle_functions.R"
-  report="~/../../Volumes/data/tmp/carlisle/spike_report.html"
-  bam_list="macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.narrow.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.narrow.peaks.bed"
-  spikein_control="NC_000913.3"
+debug <- "FALSE"
+if (debug) {
+  carlisle_functions <- "/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/workflow/scripts/_carlisle_functions.R"
+  report <- "~/../../Volumes/data/tmp/carlisle/spike_report.html"
+  bam_list <- "macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.narrow.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.narrow.peaks.bed"
+  spikein_control <- "NC_000913.3"
 } else {
-  carlisle_functions=args$carlisle_functions
-  report=args$report
-  bam_list=args$bam_list
-  spikein_control=args$spikein_control
+  carlisle_functions <- args$carlisle_functions
+  report <- args$report
+  bam_list <- args$bam_list
+  spikein_control <- args$spikein_control
 }
 
-parameters=list(
-  carlisle_functions=carlisle_functions,
-  bam_list=bam_list,
-  spikein_control=spikein_control)
+parameters <- list(
+  carlisle_functions = carlisle_functions,
+  bam_list = bam_list,
+  spikein_control = spikein_control
+)
 
 rmarkdown::render(args$rmd,
-                  params=parameters,
-                  output_file = report)
\ No newline at end of file
+  params = parameters,
+  output_file = report
+)
diff --git a/workflow/scripts/_get_nreads_stats.py b/workflow/scripts/_get_nreads_stats.py
index 938506e..543c49e 100644
--- a/workflow/scripts/_get_nreads_stats.py
+++ b/workflow/scripts/_get_nreads_stats.py
@@ -3,6 +3,7 @@
 import sys
 import subprocess
 import yaml
+
 # arguments
 # @Inputs
 # @param1 = R1 fastq file
@@ -13,32 +14,44 @@
 # @param6 = filtered idx stats file - dedup
 # @param7 = out yaml file
 
-stats=dict()
+stats = dict()
 
 r1fastqgz = sys.argv[1]
 
-spikeinlen = pandas.read_csv(sys.argv[2],sep="\t",header=None)
-genomelen = pandas.read_csv(sys.argv[3],sep="\t",header=None)
-raw_idxstats = pandas.read_csv(sys.argv[4],sep="\t",header=None)
-nodedup_idxstats = pandas.read_csv(sys.argv[5],sep="\t",header=None)
-dedup_idxstats = pandas.read_csv(sys.argv[6],sep="\t",header=None)
-
-stats["raw_nreads_genome"] = sum(list(raw_idxstats[raw_idxstats[0].isin(list(genomelen[0]))][2]))
-stats["raw_nreads_spikein"] = sum(list(raw_idxstats[raw_idxstats[0].isin(list(spikeinlen[0]))][2]))
-
-stats["no_dedup_nreads_genome"] = sum(list(nodedup_idxstats[nodedup_idxstats[0].isin(list(genomelen[0]))][2]))
-stats["no_dedup_nreads_spikein"] = sum(list(nodedup_idxstats[nodedup_idxstats[0].isin(list(spikeinlen[0]))][2]))
-
-stats["dedup_nreads_genome"] = sum(list(dedup_idxstats[dedup_idxstats[0].isin(list(genomelen[0]))][2]))
-stats["dedup_nreads_spikein"] = sum(list(dedup_idxstats[dedup_idxstats[0].isin(list(spikeinlen[0]))][2]))
+spikeinlen = pandas.read_csv(sys.argv[2], sep="\t", header=None)
+genomelen = pandas.read_csv(sys.argv[3], sep="\t", header=None)
+raw_idxstats = pandas.read_csv(sys.argv[4], sep="\t", header=None)
+nodedup_idxstats = pandas.read_csv(sys.argv[5], sep="\t", header=None)
+dedup_idxstats = pandas.read_csv(sys.argv[6], sep="\t", header=None)
+
+stats["raw_nreads_genome"] = sum(
+    list(raw_idxstats[raw_idxstats[0].isin(list(genomelen[0]))][2])
+)
+stats["raw_nreads_spikein"] = sum(
+    list(raw_idxstats[raw_idxstats[0].isin(list(spikeinlen[0]))][2])
+)
+
+stats["no_dedup_nreads_genome"] = sum(
+    list(nodedup_idxstats[nodedup_idxstats[0].isin(list(genomelen[0]))][2])
+)
+stats["no_dedup_nreads_spikein"] = sum(
+    list(nodedup_idxstats[nodedup_idxstats[0].isin(list(spikeinlen[0]))][2])
+)
+
+stats["dedup_nreads_genome"] = sum(
+    list(dedup_idxstats[dedup_idxstats[0].isin(list(genomelen[0]))][2])
+)
+stats["dedup_nreads_spikein"] = sum(
+    list(dedup_idxstats[dedup_idxstats[0].isin(list(spikeinlen[0]))][2])
+)
 
 # get number of reads
 cmd = "zcat " + r1fastqgz + " | wc -l "
-ps = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
+ps = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 nlines = ps.communicate()[0]
 nlines = int(nlines)
-npairs = nlines/4
-stats["nreads"] = int(npairs*2)
+npairs = nlines / 4
+stats["nreads"] = int(npairs * 2)
 
-with open(sys.argv[7], 'w') as file:
+with open(sys.argv[7], "w") as file:
     dumped = yaml.dump(stats, file)
diff --git a/workflow/scripts/_get_spikeinreads.py b/workflow/scripts/_get_spikeinreads.py
index 9d6db9b..454e491 100644
--- a/workflow/scripts/_get_spikeinreads.py
+++ b/workflow/scripts/_get_spikeinreads.py
@@ -1,13 +1,14 @@
 #!/usr/bin/env python3
 import pandas
 import sys
+
 # arguments
 # @Inputs
 # @param1 $1 = spike-in len file
 # @param2 $2 = idx stats file
 
-spikeinlen = pandas.read_csv(sys.argv[1],sep="\t",header=None)
-idxstats = pandas.read_csv(sys.argv[2],sep="\t",header=None)
+spikeinlen = pandas.read_csv(sys.argv[1], sep="\t", header=None)
+idxstats = pandas.read_csv(sys.argv[2], sep="\t", header=None)
 
 nreads_spikein = sum(list(idxstats[idxstats[0].isin(list(spikeinlen[0]))][2]))
 
diff --git a/workflow/scripts/_go_enrichment.Rmd b/workflow/scripts/_go_enrichment.Rmd
index 7c12efc..702a309 100644
--- a/workflow/scripts/_go_enrichment.Rmd
+++ b/workflow/scripts/_go_enrichment.Rmd
@@ -11,13 +11,13 @@ params:
   species: "hg38"
   geneset_id: "GOBP"
   dedup_status: "dedup"
-editor_options: 
+editor_options:
   chunk_output_type: console
 ---
 
 ```{r setup, include=FALSE}
 # reference
-#https://bioconductor.org/packages/devel/bioc/vignettes/chipenrich/inst/doc/chipenrich-vignette.html
+# https://bioconductor.org/packages/devel/bioc/vignettes/chipenrich/inst/doc/chipenrich-vignette.html
 
 knitr::opts_chunk$set(echo = TRUE)
 
@@ -28,28 +28,28 @@ load_packages()
 
 ```{r params, include=FALSE}
 # set params
-speciesID=params$species
-output_dir=params$output_dir
-peak_dir=params$peak_dir
-peak_list=params$peak_list
-debug="N"
-geneset_id=params$geneset_id
-dedup_status=params$dedup_status
-
-if (debug=="Y"){
-  speciesID="hg19"
-  functionFile="~/../../Volumes/Pipelines-1/CARLISLE_dev/workflow/scripts/_carlisle_functions.R"
-  output_dir="~/../../Volumes/data/carlisle/v2.0/results/peaks/0.05/go_enrichment"
-  peak_dir="~/../../Volumes/data/carlisle/v2.0/results/peaks/0.05/"
-  peak_list="macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.narrow.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.narrow.peaks.bed"
-  geneset_id="GOBP" # supported_genesets() c("GOBP", "GOCC", "GOMF")
+speciesID <- params$species
+output_dir <- params$output_dir
+peak_dir <- params$peak_dir
+peak_list <- params$peak_list
+debug <- "N"
+geneset_id <- params$geneset_id
+dedup_status <- params$dedup_status
+
+if (debug == "Y") {
+  speciesID <- "hg19"
+  functionFile <- "~/../../Volumes/Pipelines-1/CARLISLE_dev/workflow/scripts/_carlisle_functions.R"
+  output_dir <- "~/../../Volumes/data/carlisle/v2.0/results/peaks/0.05/go_enrichment"
+  peak_dir <- "~/../../Volumes/data/carlisle/v2.0/results/peaks/0.05/"
+  peak_list <- "macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.narrow.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.narrow.peaks.bed"
+  geneset_id <- "GOBP" # supported_genesets() c("GOBP", "GOCC", "GOMF")
 }
 
 # set up peak list
-peak_list=as.list(strsplit(peak_list, 'xxx'))[[1]]
+peak_list <- as.list(strsplit(peak_list, "xxx"))[[1]]
 
-for (i in 1:length(peak_list[[1]])){
-  peak_list[[1]][i]=paste0(peak_dir,peak_list[[1]][i])
+for (i in 1:length(peak_list[[1]])) {
+  peak_list[[1]][i] <- paste0(peak_dir, peak_list[[1]][i])
 }
 ```
 
@@ -57,30 +57,40 @@ for (i in 1:length(peak_list[[1]])){
 
 ```{r peakdf, echo=FALSE}
 # create df for peak info
-peak_df=data.frame(peak_list)
-colnames(peak_df)="peak_bed"
+peak_df <- data.frame(peak_list)
+colnames(peak_df) <- "peak_bed"
 
 # pull sampleID's
-for (rowid in rownames(peak_df)){
-  total_string_length=length(strsplit(peak_df[rowid,"peak_bed"],"/")[[1]])
-  strsplit(peak_df[rowid,"peak_bed"],"/")[[1]][total_string_length]
-  
-  peak_df[rowid,"threshold"]=strsplit(peak_df[rowid,"peak_bed"]
-                                      ,"/")[[1]][total_string_length-3]
-  peak_df[rowid,"peak_caller"]=strsplit(peak_df[rowid,"peak_bed"],
-                                        "/")[[1]][total_string_length-2]
-  peak_df[rowid,"sampleid"]=strsplit(strsplit(peak_df[rowid,"peak_bed"],
-                                              "/")[[1]][total_string_length],"[.]")[[1]][1]
-  peak_df[rowid,"peak_type"]=strsplit(strsplit(peak_df[rowid,"peak_bed"],
-                                               "/")[[1]][total_string_length],"[.]")[[1]][3]
-  peak_df[rowid,"dedup_status"]=strsplit(strsplit(peak_df[rowid,"peak_bed"],
-                                                  "/")[[1]][total_string_length],"[.]")[[1]][2]
+for (rowid in rownames(peak_df)) {
+  total_string_length <- length(strsplit(peak_df[rowid, "peak_bed"], "/")[[1]])
+  strsplit(peak_df[rowid, "peak_bed"], "/")[[1]][total_string_length]
+
+  peak_df[rowid, "threshold"] <- strsplit(
+    peak_df[rowid, "peak_bed"],
+    "/"
+  )[[1]][total_string_length - 3]
+  peak_df[rowid, "peak_caller"] <- strsplit(
+    peak_df[rowid, "peak_bed"],
+    "/"
+  )[[1]][total_string_length - 2]
+  peak_df[rowid, "sampleid"] <- strsplit(strsplit(
+    peak_df[rowid, "peak_bed"],
+    "/"
+  )[[1]][total_string_length], "[.]")[[1]][1]
+  peak_df[rowid, "peak_type"] <- strsplit(strsplit(
+    peak_df[rowid, "peak_bed"],
+    "/"
+  )[[1]][total_string_length], "[.]")[[1]][3]
+  peak_df[rowid, "dedup_status"] <- strsplit(strsplit(
+    peak_df[rowid, "peak_bed"],
+    "/"
+  )[[1]][total_string_length], "[.]")[[1]][2]
 }
 
-if (debug=="Y"){
-  max_samples=2
-} else{
-  max_samples=nrow(peak_df)
+if (debug == "Y") {
+  max_samples <- 2
+} else {
+  max_samples <- nrow(peak_df)
 }
 
 print(peak_df)
@@ -92,24 +102,28 @@ print(peak_df)
 This plot gives a distribution of the distance of the peak midpoints to the TSSs. It can help in selecting a locus definition. For example, if genes are primarily within 5kb of TSSs, then the 5kb locus definition may be a good choice. In contrast, if most genes fall far from TSSs, the nearest_tss locus definition may be a good choice.
 
 ```{r qc1, echo=FALSE}
-for (rowid in rownames(peak_df[c(1:max_samples),])){
-  print(paste0("** ",peak_df[rowid,"sampleid"]," | ", 
-               peak_df[rowid,"peak_caller"], " | ", 
-               peak_df[rowid,"peak_type"]," **"))
+for (rowid in rownames(peak_df[c(1:max_samples), ])) {
+  print(paste0(
+    "** ", peak_df[rowid, "sampleid"], " | ",
+    peak_df[rowid, "peak_caller"], " | ",
+    peak_df[rowid, "peak_type"], " **"
+  ))
 
   print("--QC Step 1")
-  tss_obj=plot_dist_to_tss(peaks = READ_PEAK_FILE(peak_file_in=peak_df[rowid,"peak_bed"]), 
-                           genome = speciesID)
+  tss_obj <- plot_dist_to_tss(
+    peaks = READ_PEAK_FILE(peak_file_in = peak_df[rowid, "peak_bed"]),
+    genome = speciesID
+  )
   print(tss_obj)
-  
+
   # create df from values to determine locust
-  ts_df=data.frame(x=tss_obj$panel.args[[1]]$x,y=tss_obj$panel.args[[1]]$y)
-  ts_max=max(ts_df$y)
-  locus_loc=subset(ts_df,y==ts_max)$x
-  
-  peak_df[rowid,"locus_loc_def"]=SET_LOCUST_DEF(locus_loc)
-  peak_df[rowid,"locusdf_list"]=paste(SET_LOCUST_LIST(locus_loc),collapse=",")
-  peak_df[rowid,"locus_loc_short"]=SET_LOCUST_LIST(locus_loc)[[1]]
+  ts_df <- data.frame(x = tss_obj$panel.args[[1]]$x, y = tss_obj$panel.args[[1]]$y)
+  ts_max <- max(ts_df$y)
+  locus_loc <- subset(ts_df, y == ts_max)$x
+
+  peak_df[rowid, "locus_loc_def"] <- SET_LOCUST_DEF(locus_loc)
+  peak_df[rowid, "locusdf_list"] <- paste(SET_LOCUST_LIST(locus_loc), collapse = ",")
+  peak_df[rowid, "locus_loc_short"] <- SET_LOCUST_LIST(locus_loc)[[1]]
 }
 ```
 
@@ -128,13 +142,15 @@ If the defined locus is within 1, 5, or 10kb, then the following three plots are
 - Nkb_outside:The locus is the region more than Nkb upstream or downstream from a TSS to the midpoint between the adjacent TSS.
 
 ```{r qc2, echo=FALSE}
-for (rowid in rownames(peak_df[c(1:max_samples),])){
-  print(paste0("** ",peak_df[rowid,"sampleid"]," | ", 
-               peak_df[rowid,"peak_caller"], " | ", 
-               peak_df[rowid,"peak_type"]," **"))
+for (rowid in rownames(peak_df[c(1:max_samples), ])) {
+  print(paste0(
+    "** ", peak_df[rowid, "sampleid"], " | ",
+    peak_df[rowid, "peak_caller"], " | ",
+    peak_df[rowid, "peak_type"], " **"
+  ))
 
   print("--QC Step 2")
-  PLOT_QC_MAIN(function_in="spline",rowid_in=rowid)
+  PLOT_QC_MAIN(function_in = "spline", rowid_in = rowid)
 }
 ```
 
@@ -144,13 +160,15 @@ This plot visualizes the relationship between the number of peaks assigned to a
 If many gene loci have multiple peaks assigned to them, polyenrich is likely an appropriate method. If there are a low number of peaks per gene, then chipenrich() may be the appropriate method.
 
 ```{r qc3, echo=FALSE}
-for (rowid in rownames(peak_df[c(1:max_samples),])){
-  print(paste0("** ",peak_df[rowid,"sampleid"]," | ", 
-               peak_df[rowid,"peak_caller"], " | ", 
-               peak_df[rowid,"peak_type"]," **"))
+for (rowid in rownames(peak_df[c(1:max_samples), ])) {
+  print(paste0(
+    "** ", peak_df[rowid, "sampleid"], " | ",
+    peak_df[rowid, "peak_caller"], " | ",
+    peak_df[rowid, "peak_type"], " **"
+  ))
 
   print("--QC Step 3")
-  PLOT_QC_MAIN("polyenrich",rowid)
+  PLOT_QC_MAIN("polyenrich", rowid)
 }
 ```
 
@@ -158,13 +176,15 @@ for (rowid in rownames(peak_df[c(1:max_samples),])){
 This plot visualizes the relationship between proportion of the gene locus covered by peaks and the locus length (on the log10 scale). For clarity of visualization, each point represents 25 gene loci binned after sorting by locus length.
 
 ```{r qc4, echo=FALSE}
-for (rowid in rownames(peak_df[c(1:max_samples),])){
-  print(paste0("** ",peak_df[rowid,"sampleid"]," | ", 
-               peak_df[rowid,"peak_caller"], " | ", 
-               peak_df[rowid,"peak_type"]," **"))
+for (rowid in rownames(peak_df[c(1:max_samples), ])) {
+  print(paste0(
+    "** ", peak_df[rowid, "sampleid"], " | ",
+    peak_df[rowid, "peak_caller"], " | ",
+    peak_df[rowid, "peak_type"], " **"
+  ))
 
   print("--QC Step 4")
-  PLOT_QC_MAIN("cov",rowid)
+  PLOT_QC_MAIN("cov", rowid)
 }
 ```
 
@@ -174,10 +194,10 @@ The hybrid method is used when one is unsure of which method, between ChIP-Enric
 The hybrid p-value is given as 2*min(chipenrich_pvalue, polyenrich_pvalue). This test will retain the same Type 1 level and will be a consistent test if one of chipenrich or polyenrich is consistent. This can be extended to any number of tests, but currently we only allow a hybrid test for chipenrich and polyenrich. Currently this method is only employed for SEACR analysis.
 
 ```{r peak_h, echo=FALSE}
-for (rowid in rownames(peak_df[c(1:max_samples),])){
-  peak_caller=peak_df[rowid,"peak_caller"]
-  if (peak_caller=="seacr"){
-    GO_ANALYSIS_MAIN(rowid,"hybrid") 
+for (rowid in rownames(peak_df[c(1:max_samples), ])) {
+  peak_caller <- peak_df[rowid, "peak_caller"]
+  if (peak_caller == "seacr") {
+    GO_ANALYSIS_MAIN(rowid, "hybrid")
   }
 }
 ```
@@ -189,10 +209,10 @@ The Broad-Enrich method uses the cumulative peak coverage of genes in its logist
 ```{r peak_b, echo=FALSE}
 # set flag to run broad
 # run BROAD
-for (rowid in rownames(peak_df[c(1:max_samples),])){
-  peaktype=peak_df[rowid,"peak_type"]
-  if(peaktype=="broad"){
-    GO_ANALYSIS_MAIN(rowid,"broad")
+for (rowid in rownames(peak_df[c(1:max_samples), ])) {
+  peaktype <- peak_df[rowid, "peak_type"]
+  if (peaktype == "broad") {
+    GO_ANALYSIS_MAIN(rowid, "broad")
   }
 }
 ```
@@ -203,24 +223,24 @@ ChIP-Enrich is designed for use with 1,000s or 10,000s of narrow genomic regions
 
 The ChIP-Enrich method uses the presence of a peak in its logistic regression model for enrichment: peak ~ GO + s(log10_length). Here, GO is a binary vector indicating whether a gene is in the gene set being tested, peak is a binary vector indicating the presence of a peak in a gene, and s(log10_length) is a binomial cubic smoothing spline which adjusts for the relationship between the presence of a peak and locus length. Currently, only MACS2 and GOPEAKS utilize this method.
 ```{r narrow1, echo=FALSE}
-for (rowid in rownames(peak_df[c(1:max_samples),])){
-  peaktype=peak_df[rowid,"peak_type"]
-  if(peaktype=="narrow"){
-    GO_ANALYSIS_MAIN(rowid,"enrich")
+for (rowid in rownames(peak_df[c(1:max_samples), ])) {
+  peaktype <- peak_df[rowid, "peak_type"]
+  if (peaktype == "narrow") {
+    GO_ANALYSIS_MAIN(rowid, "enrich")
   }
 }
 ```
 
 #### 2) Poly-Enrich
 ###### 2A): Without linking distal enhancers
-Poly-Enrich is also designed for narrow peaks, for experiments with 100,000s of peaks, or in cases where the number of binding sites per gene affects its regulation. 
+Poly-Enrich is also designed for narrow peaks, for experiments with 100,000s of peaks, or in cases where the number of binding sites per gene affects its regulation.
 
 The Poly-Enrich method uses the number of peaks in genes in its negative binomial regression model for enrichment: num_peaks ~ GO + s(log10_length). Here, GO is a binary vector indicating whether a gene is in the gene set being tested, num_peaks is a numeric vector indicating the number of peaks in each gene, and s(log10_length) is a negative binomial cubic smoothing spline which adjusts for the relationship between the number of peaks in a gene and locus length. Currently, only MACS2 and GOPEAKS utilize this method.
 ```{r narrow2a, echo=FALSE}
-for (rowid in rownames(peak_df[c(1:max_samples),])){
-  peaktype=peak_df[rowid,"peak_type"]
-  if(peaktype=="narrow"){
-    GO_ANALYSIS_MAIN(rowid,"poly")
+for (rowid in rownames(peak_df[c(1:max_samples), ])) {
+  peaktype <- peak_df[rowid, "peak_type"]
+  if (peaktype == "narrow") {
+    GO_ANALYSIS_MAIN(rowid, "poly")
   }
 }
 ```
@@ -230,11 +250,11 @@ Poly-Enrich can also be used for linking human distal enhancer regions to their
 
 Poly-Enrich also allows weighting of individual genomic regions based on a score, which can be useful for differential methylation enrichment analysis and ChIP-seq. Currently the options are: signalValue and logsignalValue. signalValue weighs each genomic region or peak based on the Signal Value given in the narrowPeak format or a user-supplied column (column name should be signalValue), while logsignalValue takes the log of these values. Currently, only MACS2 and GOPEAKS utilize this method.
 ```{r narrow2b, echo=FALSE}
-for (rowid in rownames(peak_df[c(1:max_samples),])){
-    peaktype=peak_df[rowid,"peak_type"]
-    peakcaller=peak_df[rowid,"peak_caller"]
-  if(peaktype=="narrow" & peakcaller != "gopeaks"){
-    GO_ANALYSIS_MAIN(rowid,"poly_weighted")
+for (rowid in rownames(peak_df[c(1:max_samples), ])) {
+  peaktype <- peak_df[rowid, "peak_type"]
+  peakcaller <- peak_df[rowid, "peak_caller"]
+  if (peaktype == "narrow" & peakcaller != "gopeaks") {
+    GO_ANALYSIS_MAIN(rowid, "poly_weighted")
   }
 }
 ```
@@ -245,7 +265,7 @@ The proximity to regulatory regions (proxReg) test is a complementary test to an
 ProxReg first calculates the distance between the midpoints of peaks and the nearest transcription start site or the nearest enhancer region midpoint for each genomic region. Each genomic region is then assigned to the gene with the nearest transcription start site. The distances are then classified according to whether the gene is in the gene set or not, and a signed Wilcoxon rank-sum test is used to calculate if the regions are closer or farther in the gene set than average.
 
 ```{r reg, echo=FALSE}
-for (rowid in rownames(peak_df[c(1:max_samples),])){
-  GO_ANALYSIS_MAIN(rowid,"reglocation")
+for (rowid in rownames(peak_df[c(1:max_samples), ])) {
+  GO_ANALYSIS_MAIN(rowid, "reglocation")
 }
 ```
diff --git a/workflow/scripts/_go_enrichment_wrapper.R b/workflow/scripts/_go_enrichment_wrapper.R
index 1c3ee4c..f745105 100644
--- a/workflow/scripts/_go_enrichment_wrapper.R
+++ b/workflow/scripts/_go_enrichment_wrapper.R
@@ -3,51 +3,69 @@ suppressPackageStartupMessages(library("argparse"))
 
 # create parser object
 parser <- ArgumentParser()
-parser$add_argument("--rmd", type="character", required=TRUE,
-                    help="path to rmd")
-parser$add_argument("--carlisle_functions", type="character", required=TRUE, 
-          help="path to carlisle functions file")
-parser$add_argument("--output_dir", type="character", required=FALSE,
-                    help = "output_dir")
-parser$add_argument("--report", type="character", required=TRUE,
-                    help = "HTML report")
-parser$add_argument("--peak_list", type="character", required=TRUE,
-                    help = "peak_list")
-parser$add_argument("--species", type="character", required=TRUE,
-                    help = "species")
-parser$add_argument("--geneset_id", type="character", required=FALSE, default="GOBP",
-                    help = "geneset_id")
-parser$add_argument("--dedup_status", type="character", required=FALSE, default="dedup",
-                    help = "deduplication status")
+parser$add_argument("--rmd",
+  type = "character", required = TRUE,
+  help = "path to rmd"
+)
+parser$add_argument("--carlisle_functions",
+  type = "character", required = TRUE,
+  help = "path to carlisle functions file"
+)
+parser$add_argument("--output_dir",
+  type = "character", required = FALSE,
+  help = "output_dir"
+)
+parser$add_argument("--report",
+  type = "character", required = TRUE,
+  help = "HTML report"
+)
+parser$add_argument("--peak_list",
+  type = "character", required = TRUE,
+  help = "peak_list"
+)
+parser$add_argument("--species",
+  type = "character", required = TRUE,
+  help = "species"
+)
+parser$add_argument("--geneset_id",
+  type = "character", required = FALSE, default = "GOBP",
+  help = "geneset_id"
+)
+parser$add_argument("--dedup_status",
+  type = "character", required = FALSE, default = "dedup",
+  help = "deduplication status"
+)
 args <- parser$parse_args()
 
-debug="FALSE"
-if (debug){
-  carlisle_functions="/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/workflow/scripts/_carlisle_functions.R"
-  output_dir="~/../../Volumes/data/tmp"
-  report="~/../../Volumes/data/tmp/carlisle/report.html"
-  peak_list="macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.narrow.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.narrow.peaks.bed"
-  species="hg38"
-  geneset_id="GOBP"
-  dedup_status="dedup"
+debug <- "FALSE"
+if (debug) {
+  carlisle_functions <- "/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/workflow/scripts/_carlisle_functions.R"
+  output_dir <- "~/../../Volumes/data/tmp"
+  report <- "~/../../Volumes/data/tmp/carlisle/report.html"
+  peak_list <- "macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_1_vs_nocontrol.dedup.narrow.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.broad.peaks.bed macs2/peak_output/53_H3K4me3_2_vs_nocontrol.dedup.narrow.peaks.bed"
+  species <- "hg38"
+  geneset_id <- "GOBP"
+  dedup_status <- "dedup"
 } else {
-  carlisle_functions=args$carlisle_functions
-  output_dir=args$output_dir
-  report=args$report
-  peak_list=args$peak_list
-  species=args$species
-  geneset_id=args$geneset_id
-  dedup_status=args$dedup_status
+  carlisle_functions <- args$carlisle_functions
+  output_dir <- args$output_dir
+  report <- args$report
+  peak_list <- args$peak_list
+  species <- args$species
+  geneset_id <- args$geneset_id
+  dedup_status <- args$dedup_status
 }
 
-parameters=list(
-  carlisle_functions=carlisle_functions,
-  output_dir=output_dir,
-  peak_list=peak_list,
-  species=species,
-  geneset_id=geneset_id,
-  dedup_status=dedup_status)
+parameters <- list(
+  carlisle_functions = carlisle_functions,
+  output_dir = output_dir,
+  peak_list = peak_list,
+  species = species,
+  geneset_id = geneset_id,
+  dedup_status = dedup_status
+)
 
 rmarkdown::render(args$rmd,
-                  params=parameters,
-                  output_file = report)
\ No newline at end of file
+  params = parameters,
+  output_file = report
+)
diff --git a/workflow/scripts/_jobby.py b/workflow/scripts/_jobby.py
index cb5f584..747183e 100644
--- a/workflow/scripts/_jobby.py
+++ b/workflow/scripts/_jobby.py
@@ -4,14 +4,14 @@
 # -*- coding: UTF-8 -*-
 
 """
-ABOUT: 
+ABOUT:
     `jobby` will take your past jobs and display their job information.
-    Why? We have pipelines running on several different clusters and 
+    Why? We have pipelines running on several different clusters and
     job schedulers. `jobby` is an attempt to centralize and abstract
     the process of querying different job schedulers. On each supported
-    target system, `jobby` will attempt to determine the best method for 
-    getting job information to return to the user in a standardized 
-    format and unified cli.  
+    target system, `jobby` will attempt to determine the best method for
+    getting job information to return to the user in a standardized
+    format and unified cli.
 
 REQUIRES:
   - python>=3.5
@@ -22,26 +22,26 @@
 
        National Institute of Allergy and Infectious Diseases (NIAID)
     This software/database is a "United  States Government Work" under
-    the terms of the United  States Copyright Act.  It was written as 
+    the terms of the United  States Copyright Act.  It was written as
     part of the author's official duties as a United States Government
     employee and thus cannot be copyrighted. This software is freely
     available to the public for use.
-    
+
     Although all  reasonable  efforts have been taken  to ensure  the
     accuracy and reliability of the software and data, NCBR do not and
-    cannot warrant the performance or results that may  be obtained by 
+    cannot warrant the performance or results that may  be obtained by
     using this software or data. NCBR and NIH disclaim all warranties,
-    express  or  implied,  including   warranties   of   performance, 
+    express  or  implied,  including   warranties   of   performance,
     merchantability or fitness for any particular purpose.
-    
-    Please cite the author and NIH resources like the "Biowulf Cluster" 
+
+    Please cite the author and NIH resources like the "Biowulf Cluster"
     in any work or product based on this material.
 
 USAGE:
   $ jobby [OPTIONS] JOB_ID [JOB_ID ...]
 
 EXAMPLE:
-  $ jobby 18627545 15627516 58627597 
+  $ jobby 18627545 15627516 58627597
 """
 
 # Python standard library
@@ -50,52 +50,53 @@
 from subprocess import PIPE
 import argparse  # added in python/3.5
 import textwrap  # added in python/3.5
-import tempfile  # added in python/3.5 
+import tempfile  # added in python/3.5
 
 # Jobby metadata
-__version__ = 'v0.2.0'
-__authors__ = 'Skyler Kuhn'
-__email__ = 'skyler.kuhn@nih.gov'
-__home__  =  os.path.dirname(os.path.abspath(__file__))
+__version__ = "v0.2.0"
+__authors__ = "Skyler Kuhn"
+__email__ = "skyler.kuhn@nih.gov"
+__home__ = os.path.dirname(os.path.abspath(__file__))
 _name = os.path.basename(sys.argv[0])
-_description = 'Will take your job(s)... and display their information!'
+_description = "Will take your job(s)... and display their information!"
 
 
 # Classes
-class Colors():
+class Colors:
     """Class encoding for ANSI escape sequeces for styling terminal text.
     Any string that is formatting with these styles must be terminated with
     the escape sequence, i.e. `Colors.end`.
     """
+
     # Escape sequence
-    end = '\33[0m'
+    end = "\33[0m"
     # Formatting options
-    bold   = '\33[1m'
-    italic = '\33[3m'
-    url    = '\33[4m'
-    blink  = '\33[5m'
-    higlighted = '\33[7m'
+    bold = "\33[1m"
+    italic = "\33[3m"
+    url = "\33[4m"
+    blink = "\33[5m"
+    higlighted = "\33[7m"
     # Text Colors
-    black  = '\33[30m'
-    red    = '\33[31m'
-    green  = '\33[32m'
-    yellow = '\33[33m'
-    blue   = '\33[34m'
-    pink  = '\33[35m'
-    cyan  = '\33[96m'
-    white = '\33[37m'
+    black = "\33[30m"
+    red = "\33[31m"
+    green = "\33[32m"
+    yellow = "\33[33m"
+    blue = "\33[34m"
+    pink = "\33[35m"
+    cyan = "\33[96m"
+    white = "\33[37m"
     # Background fill colors
-    bg_black  = '\33[40m'
-    bg_red    = '\33[41m'
-    bg_green  = '\33[42m'
-    bg_yellow = '\33[43m'
-    bg_blue   = '\33[44m'
-    bg_pink  = '\33[45m'
-    bg_cyan  = '\33[46m'
-    bg_white = '\33[47m'
+    bg_black = "\33[40m"
+    bg_red = "\33[41m"
+    bg_green = "\33[42m"
+    bg_yellow = "\33[43m"
+    bg_blue = "\33[44m"
+    bg_pink = "\33[45m"
+    bg_cyan = "\33[46m"
+    bg_white = "\33[47m"
 
 
-# Helper Functions 
+# Helper Functions
 def which(cmd, path=None):
     """Checks if an executable is in $PATH
     @param cmd <str>:
@@ -120,7 +121,7 @@ def which(cmd, path=None):
 
 def err(*message, **kwargs):
     """Prints any provided args to standard error.
-    kwargs can be provided to modify print functions 
+    kwargs can be provided to modify print functions
     behavior.
     @param message <any>:
         Values printed to standard error
@@ -130,7 +131,6 @@ def err(*message, **kwargs):
     print(*message, file=sys.stderr, **kwargs)
 
 
-
 def fatal(*message, **kwargs):
     """Prints any provided args to standard error
     and exits with an exit code of 1.
@@ -146,42 +146,40 @@ def fatal(*message, **kwargs):
 def get_toolkit(tool_list):
     """Finds the best suited tool from a list of
     possible choices. Assumes tool list is already
-    ordered from the best to worst choice. The first 
+    ordered from the best to worst choice. The first
     tool found in a user's $PATH is returned.
     @param tool_list list[<str>]:
         List of ordered tools to find
     @returns best_choice <str>:
         First tool found in tool_list
     """
-    best_choice = None 
+    best_choice = None
     for exe in tool_list:
         if which(exe):
             best_choice = exe
             break
-    
+
     # Did not find any tools
     # to potentially use
     if not best_choice:
-        err(
-            'Error: Did not find any tools to get job information!'
-        )
+        err("Error: Did not find any tools to get job information!")
         fatal(
-            'Expected one of the following tools to be in $PATH:'
-            '\t{0}'.format(tool_list)
+            "Expected one of the following tools to be in $PATH:"
+            "\t{0}".format(tool_list)
         )
-    
+
     return best_choice
 
 
 def add_missing(linelist, insertion_dict):
-    """Adds missing information to a list. This can be used 
-    to add missing job information fields to the results of 
+    """Adds missing information to a list. This can be used
+    to add missing job information fields to the results of
     job querying tool.
     @param linelist list[<str>]:
         List containing job information for each field of interest
     @param insertion_dict dict[<int>] = str
         Dictionary used to insert missing information to a given
-        index, where the keys are indices of the `linelist` and the 
+        index, where the keys are indices of the `linelist` and the
         values are information to add. Please note that the indices
         should be zero based. Note that multiple consequetive values
         should be inserted at once as a list, see example below:
@@ -192,16 +190,16 @@ def add_missing(linelist, insertion_dict):
     # Get the order of indices
     # add missing information
     # starting from largest to
-    # smallest, if we insert 
-    # missing values in this 
+    # smallest, if we insert
+    # missing values in this
     # order we do not need to
-    # calculate the offset of 
+    # calculate the offset of
     # new indices
     tmp_list = linelist
     indices = sorted(list(insertion_dict.keys()), reverse=True)
     for i in indices:
         # Check if multiple values
-        # need to be inserted at a 
+        # need to be inserted at a
         # given index
         if isinstance(insertion_dict[i], list):
             for v in reversed(insertion_dict[i]):
@@ -212,17 +210,12 @@ def add_missing(linelist, insertion_dict):
 
 
 def convert_size(size_bytes):
-    """Converts bytes to a human readable format.
-    """
-    # Sizes range from B to YiB, 
+    """Converts bytes to a human readable format."""
+    # Sizes range from B to YiB,
     # warning larger sizes storage
-    # may results in blackhole  
-    size_name = (
-        "B", "KiB", "MiB", 
-        "GiB", "TiB", "PiB", 
-        "EiB", "ZiB", "YiB"
-    )
-    if size_bytes == 0: 
+    # may results in blackhole
+    size_name = ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")
+    if size_bytes == 0:
         return "0B"
     i = int(math.floor(math.log(size_bytes, 1024)))
     p = math.pow(1024, i)
@@ -234,36 +227,53 @@ def to_bytes(size):
     """Convert a human readable size unit into bytes.
     Returns None if cannot convert/parse provided size."""
     size2bytes = {
-        "b":1, "bytes":1, "byte":1,
-        "k":1024, "kib":1024, "kb":1000,
-        "m": 1024**2, "mib": 1024**2, "mb": 1000**2,
-        "g": 1024**3, "gib": 1024**3, "gb": 1000**3,
-        "t": 1024**4, "tib": 1024**4, "tb": 1000**4,
-        "p": 1024**5, "pib": 1024**5, "pb": 1000**5,
-        "e": 1024**6, "eib": 1024**6, "eb": 1000**6,
-        "z": 1024**7, "zib": 1024**7, "zb": 1000**7,
-        "y": 1024**8, "yib": 1024**8, "yb": 1000**8
+        "b": 1,
+        "bytes": 1,
+        "byte": 1,
+        "k": 1024,
+        "kib": 1024,
+        "kb": 1000,
+        "m": 1024**2,
+        "mib": 1024**2,
+        "mb": 1000**2,
+        "g": 1024**3,
+        "gib": 1024**3,
+        "gb": 1000**3,
+        "t": 1024**4,
+        "tib": 1024**4,
+        "tb": 1000**4,
+        "p": 1024**5,
+        "pib": 1024**5,
+        "pb": 1000**5,
+        "e": 1024**6,
+        "eib": 1024**6,
+        "eb": 1000**6,
+        "z": 1024**7,
+        "zib": 1024**7,
+        "zb": 1000**7,
+        "y": 1024**8,
+        "yib": 1024**8,
+        "yb": 1000**8,
     }
-    
-    size = size.replace(' ','')
-    match = re.search('(?P<size>[0-9.]+)(?P<units>[a-zA-Z]+)$', size)
-    
+
+    size = size.replace(" ", "")
+    match = re.search("(?P<size>[0-9.]+)(?P<units>[a-zA-Z]+)$", size)
+
     if match:
-        human_units = match.group('units').lower()
+        human_units = match.group("units").lower()
         human_units = human_units.lstrip().rstrip()
         scaling_factor = size2bytes[human_units]
-        bytes = int(math.ceil(scaling_factor * float(match.group('size'))))
+        bytes = int(math.ceil(scaling_factor * float(match.group("size"))))
     else:
         # Cannot parse units,
         # cannot convert value
         # into bytes
         return None
-    
-    return bytes
 
+    return bytes
 
 
-# Core logic for getting 
+# Core logic for getting
 # job information
 def sge(jobs, threads, tmp_dir):
     """Displays SGE job information to standard output.
@@ -281,19 +291,19 @@ def uge(jobs, threads, tmp_dir):
         Parsed command-line arguments
     @return None
     """
-    # NOTE: add later for LOCUS cluster 
+    # NOTE: add later for LOCUS cluster
     pass
 
 
 def dashboard_cli(jobs, threads=1, tmp_dir=None):
     """Biowulf-specific tool to get SLURM job information.
-    HPC staff recommend using this over the default slurm 
-    `sacct` command for performance reasons. By default, 
+    HPC staff recommend using this over the default slurm
+    `sacct` command for performance reasons. By default,
     the `dashboard_cli` returns information for the following
     fields:
-      jobid         state         submit_time   partition     nodes         
-      cpus          mem           timelimit     gres          dependency    
-      queued_time   state_reason  start_time    elapsed_time  end_time      
+      jobid         state         submit_time   partition     nodes
+      cpus          mem           timelimit     gres          dependency
+      queued_time   state_reason  start_time    elapsed_time  end_time
       cpu_max       mem_max       eval
     Runs command:
         $  dashboard_cli jobs \\
@@ -302,41 +312,50 @@ def dashboard_cli(jobs, threads=1, tmp_dir=None):
             --tab --archive
     """
     fields = [
-        "jobid","jobname",
-        "state","partition",
-        "gres","cpus","mem",
-        "cpu_max","mem_max",
-        "timelimit","queued_time",
-        "start_time","end_time",
-        "elapsed_time","nodelist",
-        "user", "std_out", "std_err",
-        "work_dir"
+        "jobid",
+        "jobname",
+        "state",
+        "partition",
+        "gres",
+        "cpus",
+        "mem",
+        "cpu_max",
+        "mem_max",
+        "timelimit",
+        "queued_time",
+        "start_time",
+        "end_time",
+        "elapsed_time",
+        "nodelist",
+        "user",
+        "std_out",
+        "std_err",
+        "work_dir",
     ]
-    
+
     # Display header information,
     # --tab option does not print
     # the header
-    print('\t'.join(fields))
+    print("\t".join(fields))
     # Display job information
     cmd = subprocess.run(
-        'dashboard_cli jobs --archive --tab --joblist {0} --fields {1}'.format(
-            ','.join(jobs),
-            ','.join(fields)
+        "dashboard_cli jobs --archive --tab --joblist {0} --fields {1}".format(
+            ",".join(jobs), ",".join(fields)
         ),
         stdout=PIPE,
         stderr=PIPE,
         universal_newlines=True,
-        shell=True
+        shell=True,
     )
 
     # Check for failure
     # of the last command
     if cmd.returncode != 0:
         err("\nError: Failed to get job information with 'dashboard_cli'!")
-        err('Please see error message below:')
-        fatal('  └── ', cmd.stderr)
+        err("Please see error message below:")
+        fatal("  └── ", cmd.stderr)
 
-    print(cmd.stdout.rstrip('\n'))
+    print(cmd.stdout.rstrip("\n"))
 
 
 def sacct(jobs, threads=1, tmp_dir=None):
@@ -344,11 +363,11 @@ def sacct(jobs, threads=1, tmp_dir=None):
     `sacct` should be available on all SLURM clusters.
     The `dashboard_cli` is prioritized over using `sacct`
     due to perform reasons; however, this method will be
-    portable across different SLURM clusters. To get maximum 
-    memory usage for a job, we will need to parse the MaxRSS 
+    portable across different SLURM clusters. To get maximum
+    memory usage for a job, we will need to parse the MaxRSS
     field from the `$SLURM_JOBID.batch` lines.
     Returns job information for the following fields:
-      jobid        jobname    state      partition    reqtres  
+      jobid        jobname    state      partition    reqtres
       alloccpus    reqmem     maxrss     timelimit    reserved
       start        end        elapsed    nodelist     user
       workdir
@@ -357,49 +376,70 @@ def sacct(jobs, threads=1, tmp_dir=None):
     Runs command:
         $  sacct -j 12345679,12345680 \\
             --fields FIELD,FIELD,FIELD \\
-            -P --delimiter $'\t' 
+            -P --delimiter $'\t'
     """
-    header =  [
-        "jobid","jobname","state","partition",
-        "gres","cpus","mem","cpu_max","mem_max",
-        "timelimit","queued_time","start_time",
-        "end_time","elapsed_time","nodelist",
-        "user","std_out","std_err", "work_dir"
+    header = [
+        "jobid",
+        "jobname",
+        "state",
+        "partition",
+        "gres",
+        "cpus",
+        "mem",
+        "cpu_max",
+        "mem_max",
+        "timelimit",
+        "queued_time",
+        "start_time",
+        "end_time",
+        "elapsed_time",
+        "nodelist",
+        "user",
+        "std_out",
+        "std_err",
+        "work_dir",
     ]
     fields = [
-        "jobid", "jobname",
-        "state", "partition",
-        "reqtres", "alloccpus",
-        "reqmem", "maxrss",
-        "timelimit", "reserved",
-        "start", "end",
-        "elapsed", "nodelist",
-        "user", "workdir"
+        "jobid",
+        "jobname",
+        "state",
+        "partition",
+        "reqtres",
+        "alloccpus",
+        "reqmem",
+        "maxrss",
+        "timelimit",
+        "reserved",
+        "start",
+        "end",
+        "elapsed",
+        "nodelist",
+        "user",
+        "workdir",
     ]
 
     # Missing std_out and std_err
-    missing_fields = {15:['-','-']}
+    missing_fields = {15: ["-", "-"]}
     # Display header information,
-    print('\t'.join(header))
+    print("\t".join(header))
     # Display job information
     cmd = subprocess.run(
         "sacct -j {0} -P --delimiter $'\\t' --format={1}".format(
-            ','.join(jobs),
-            ','.join(fields)
+            ",".join(jobs), ",".join(fields)
         ),
-        stdout=PIPE, 
+        stdout=PIPE,
         stderr=PIPE,
         universal_newlines=True,
-        shell=True
+        shell=True,
     )
 
     # Check for failure
     # of the last command
     if cmd.returncode != 0:
         err("\nError: Failed to get job information with 'dashboard_cli'!")
-        err('Please see error message below:')
-        fatal('  └── ', cmd.stderr)
-    
+        err("Please see error message below:")
+        fatal("  └── ", cmd.stderr)
+
     # Get max memory information,
     # Stored as $SLURM_JOBID.batch
     # in the MaxRSS field
@@ -407,22 +447,22 @@ def sacct(jobs, threads=1, tmp_dir=None):
     # Remove trailing newline from
     # standard output and split lines
     # on remaining newline characters
-    job_information = cmd.stdout.rstrip('\n').split('\n')
+    job_information = cmd.stdout.rstrip("\n").split("\n")
     for i, line in enumerate(job_information):
         if i < 1:
             # skip over header
             continue
-        linelist = line.lstrip().rstrip().split('\t')
-        if linelist[0].endswith('.batch'):
-            jobid = linelist[0].strip().split('.')[0]
-            maxmem = linelist[7].replace(' ', '')
+        linelist = line.lstrip().rstrip().split("\t")
+        if linelist[0].endswith(".batch"):
+            jobid = linelist[0].strip().split(".")[0]
+            maxmem = linelist[7].replace(" ", "")
             mem_bytes = to_bytes(maxmem)
             if not mem_bytes:
                 # Could not convert
-                # max_mem value into 
+                # max_mem value into
                 # bytes
-                j2m[jobid] = '-'
-                continue   # goto next line
+                j2m[jobid] = "-"
+                continue  # goto next line
 
             human_readable_mem = convert_size(mem_bytes)
             j2m[jobid] = human_readable_mem
@@ -432,22 +472,22 @@ def sacct(jobs, threads=1, tmp_dir=None):
         if i < 1:
             # skip over header
             continue
-        linelist = line.lstrip().rstrip().split('\t')
+        linelist = line.lstrip().rstrip().split("\t")
         jobid = linelist[0].strip()
-        if '.' not in jobid:
+        if "." not in jobid:
             try:
                 max_mem = j2m[jobid]
             except KeyError:
-                # Job maybe still be 
+                # Job maybe still be
                 # running or in a non-
                 # completed state.
-                max_mem = '-'
-            status = linelist[2].split(' ')[0]
+                max_mem = "-"
+            status = linelist[2].split(" ")[0]
             linelist[2] = status
             missing_fields[8] = max_mem
             linelist = add_missing(linelist, missing_fields)
-            linelist = [info if info else '-' for info in linelist]
-            print('\t'.join(linelist))
+            linelist = [info if info else "-" for info in linelist]
+            print("\t".join(linelist))
 
 
 def slurm(jobs, threads, tmp_dir):
@@ -456,11 +496,11 @@ def slurm(jobs, threads, tmp_dir):
         Parsed command-line arguments
     @return None
     """
-    # Try to use the following tools in this 
+    # Try to use the following tools in this
     # order to get job information!
     # [1] `dashboard_cli` is Biowulf-specific
     # [2] `sacct` should always be there
-    tool_priority = ['dashboard_cli', 'sacct']
+    tool_priority = ["dashboard_cli", "sacct"]
     job_tool = get_toolkit(tool_priority)
     # Get information about each job
     # must use eval() to make string
@@ -470,65 +510,57 @@ def slurm(jobs, threads, tmp_dir):
 
 def jobby(args):
     """
-    Wrapper to each supported job scheduler: slurm, etc. 
+    Wrapper to each supported job scheduler: slurm, etc.
     Each scheduler has a custom handler to most effectively
-    get and parse job information. 
+    get and parse job information.
     @param sub_args <parser.parse_args() object>:
         Parsed command-line arguments
     @return None
     """
     # Get command line options
     abstract_handler = None
-    job_ids   = args.JOB_ID
+    job_ids = args.JOB_ID
     scheduler = args.scheduler
-    threads   = args.threads
-    tmp_dir   = args.tmp_dir
+    threads = args.threads
+    tmp_dir = args.tmp_dir
 
-    # Set handler for each 
-    # supported scheduler    
-    if scheduler == 'slurm':
+    # Set handler for each
+    # supported scheduler
+    if scheduler == "slurm":
         abstract_handler = slurm
     else:
         # Unsupported job scheduler,
         # needs to be implemented
-        fatal(
-            'Error: "{0}" is an unsupported job scheduler!'.format(scheduler)
-        )
-    
-    # Display job(s) information 
+        fatal('Error: "{0}" is an unsupported job scheduler!'.format(scheduler))
+
+    # Display job(s) information
     # to standard output
-    abstract_handler(
-        jobs=job_ids,
-        threads=threads,
-        tmp_dir=tmp_dir
-    )
+    abstract_handler(jobs=job_ids, threads=threads, tmp_dir=tmp_dir)
 
 
 # Parse command-line arguments
 def parsed_arguments(name, description):
-    """Parses user-provided command-line arguments. This requires 
-    argparse and textwrap packages. To create custom help formatting 
-    a text wrapped docstring is used to create the help message for 
-    required options. As so, the help message for require options 
-    must be suppressed. If a new required argument is added to the 
+    """Parses user-provided command-line arguments. This requires
+    argparse and textwrap packages. To create custom help formatting
+    a text wrapped docstring is used to create the help message for
+    required options. As so, the help message for require options
+    must be suppressed. If a new required argument is added to the
     cli, it must be updated in the usage statement docstring below.
     @param name <str>:
-        Name of the pipeline or command-line tool 
+        Name of the pipeline or command-line tool
     @param description <str>:
-        Short description of pipeline or command-line tool 
+        Short description of pipeline or command-line tool
     """
     # Add styled name and description
     c = Colors
-    styled_name = "{0}{1}{2}{3}{4}".format(
-        c.bold, c.bg_black, 
-        c.cyan, name, c.end
-    )
+    styled_name = "{0}{1}{2}{3}{4}".format(c.bold, c.bg_black, c.cyan, name, c.end)
     description = "{0}{1}{2}".format(c.bold, description, c.end)
     temp = tempfile.gettempdir()
 
     # Please note: update the usage statement
     # below if a new option is added!
-    usage_statement = textwrap.dedent("""\
+    usage_statement = textwrap.dedent(
+        """\
         {0}: {1}
 
         {3}{4}Synopsis:{5}
@@ -538,138 +570,136 @@ def parsed_arguments(name, description):
                 <JOB_ID [JOB_ID ...]>
 
         {3}{4}Description:{5}
-          {2} will take your past jobs and display their job information 
+          {2} will take your past jobs and display their job information
         in a standardized format. Why???! We have pipelines running on several
-        different clusters (using different job schedulers). {2} centralizes 
+        different clusters (using different job schedulers). {2} centralizes
         and abstracts the process of querying different job schedulers within
-        a unified command-line interface. 
-        
+        a unified command-line interface.
+
           For each supported scheduler, jobby will determine the best method
-        on a given target system for getting job information to return to the 
+        on a given target system for getting job information to return to the
         user in a common output format.
 
         {3}{4}Required Positional Arguments:{5}
           <JOB_ID [JOB_ID ...]>
-                               Identiers of past jobs. One or more JOB_IDs 
-                               can be provided. Multiple JOB_IDs should be 
-                               seperated by a space. Information for each 
-                               of the JOB_IDs will be displayed to standard 
-                               output. Please see example section below for 
+                               Identiers of past jobs. One or more JOB_IDs
+                               can be provided. Multiple JOB_IDs should be
+                               seperated by a space. Information for each
+                               of the JOB_IDs will be displayed to standard
+                               output. Please see example section below for
                                more information.
 
         {3}{4}Options:{5}
-          -s,--scheduler {{slurm | ...}}  
+          -s,--scheduler {{slurm | ...}}
                                 @Default: slurm
-                                Job scheduler. Defines the job scheduler 
+                                Job scheduler. Defines the job scheduler
                                 of the target system. Additional support
-                                for more schedulers coming soon! 
+                                for more schedulers coming soon!
                                   @Example: --scheduler slurm
-          -n, --threads THREADS     
+          -n, --threads THREADS
                                 @Default: 1
-                                Number of threads to query the scheduler 
+                                Number of threads to query the scheduler
                                 in parallel.
                                   @Example: --threads: 8
-          -t, --tmp-dir TMP_DIR 
+          -t, --tmp-dir TMP_DIR
                                 @Default: {7}/
-                                Temporary directory. Path on the filesystem 
-                                for writing temporary output files. Ideally, 
-                                this path should point to a dedicated space 
-                                on the filesystem for writing tmp files. If 
-                                you need to inject a variable into this path 
-                                that should NOT be expanded, please quote the 
-                                options value in single quotes. The default 
+                                Temporary directory. Path on the filesystem
+                                for writing temporary output files. Ideally,
+                                this path should point to a dedicated space
+                                on the filesystem for writing tmp files. If
+                                you need to inject a variable into this path
+                                that should NOT be expanded, please quote the
+                                options value in single quotes. The default
                                 location of this option is set to the system
                                 default via the $TMPDIR environment variable.
                                   @Example: --tmp-dir '/scratch/$USER/'
-          
+
           -h, --help            Shows help and usage information and exits.
                                   @Example: --help
-          
+
           -v, --version         Displays version information and exits.
                                   @Example: --version
-        """.format(styled_name, description, name, c.bold, c.url, c.end, c.italic, temp))
+        """.format(
+            styled_name, description, name, c.bold, c.url, c.end, c.italic, temp
+        )
+    )
 
     # Display example usage in epilog
-    run_epilog = textwrap.dedent("""\
+    run_epilog = textwrap.dedent(
+        """\
         {2}{3}Example:{4}
-        # Please avoid running jobby 
+        # Please avoid running jobby
         # on a cluster's head node!
         ./jobby -s slurm  -n 4 18627542 13627516 58627597 48627666
 
         {2}{3}Version:{4}
           {1}
-        """.format(name, __version__, c.bold, c.url, c.end))
+        """.format(
+            name, __version__, c.bold, c.url, c.end
+        )
+    )
 
     # Create a top-level parser
     parser = argparse.ArgumentParser(
-        usage = argparse.SUPPRESS,
+        usage=argparse.SUPPRESS,
         formatter_class=argparse.RawDescriptionHelpFormatter,
-        description = usage_statement,
-        epilog  = run_epilog,
-        add_help=False
+        description=usage_statement,
+        epilog=run_epilog,
+        add_help=False,
     )
 
     # Required Positional Arguments
     # List of JOB_IDs, 1 ... N_JOB_IDS
-    parser.add_argument(
-        'JOB_ID',
-        nargs = '+',
-        help = argparse.SUPPRESS
-    )
+    parser.add_argument("JOB_ID", nargs="+", help=argparse.SUPPRESS)
 
     # Options
     # Adding verison information
     parser.add_argument(
-        '-v', '--version', 
-        action  = 'version', 
-        version = '%(prog)s {}'.format(__version__),
-        help = argparse.SUPPRESS
+        "-v",
+        "--version",
+        action="version",
+        version="%(prog)s {}".format(__version__),
+        help=argparse.SUPPRESS,
     )
 
     # Add custom help message
-    parser.add_argument(
-        '-h', '--help', 
-        action='help', 
-        help=argparse.SUPPRESS
-    )
+    parser.add_argument("-h", "--help", action="help", help=argparse.SUPPRESS)
 
-    # Base directory to write 
-    # temporary/intermediate files 
+    # Base directory to write
+    # temporary/intermediate files
     parser.add_argument(
-        '-t', '--tmp-dir',
-        type = str,
-        required = False,
-        default = temp,
-        help = argparse.SUPPRESS
+        "-t",
+        "--tmp-dir",
+        type=str,
+        required=False,
+        default=temp,
+        help=argparse.SUPPRESS,
     )
 
-    # Number of threads for the 
+    # Number of threads for the
     # pipeline's main proceess
-    # This is only applicable for 
-    # local rules or when running 
+    # This is only applicable for
+    # local rules or when running
     # in local mode.
     parser.add_argument(
-        '-n', '--threads',
-        type = int,
-        required = False,
-        default = 1,
-        help = argparse.SUPPRESS
+        "-n", "--threads", type=int, required=False, default=1, help=argparse.SUPPRESS
     )
 
     # Job scheduler to query,
     # available: SLURM, ...
     # More coming soon!
     parser.add_argument(
-        '-s', '--scheduler',
-        type = lambda s: str(s).lower(),
-        required = False,
-        default = "slurm",
-        choices = ['slurm'],
-        help = argparse.SUPPRESS
+        "-s",
+        "--scheduler",
+        type=lambda s: str(s).lower(),
+        required=False,
+        default="slurm",
+        choices=["slurm"],
+        help=argparse.SUPPRESS,
     )
 
     # Define handlers for each sub-parser
-    parser.set_defaults(func = jobby)
+    parser.set_defaults(func=jobby)
     # Parse command-line args
     args = parser.parse_args()
 
@@ -680,20 +710,17 @@ def main():
     # Sanity check for usage
     if len(sys.argv) == 1:
         # Nothing was provided
-        fatal('Invalid usage: {} [-h] [--version] ...'.format(_name))
-    
+        fatal("Invalid usage: {} [-h] [--version] ...".format(_name))
+
     # Collect args for sub-command
-    args = parsed_arguments(
-        name = _name, 
-        description = _description
-    )
-    
+    args = parsed_arguments(name=_name, description=_description)
+
     # Display version information
-    err('{} ({})'.format(_name, __version__))
-    # Mediator method to call the 
+    err("{} ({})".format(_name, __version__))
+    # Mediator method to call the
     # default handler function
     args.func(args)
 
 
-if __name__ == '__main__':
-    main()
\ No newline at end of file
+if __name__ == "__main__":
+    main()
diff --git a/workflow/scripts/_make_alignment_stats_table.R b/workflow/scripts/_make_alignment_stats_table.R
index b17f9c8..1812efd 100644
--- a/workflow/scripts/_make_alignment_stats_table.R
+++ b/workflow/scripts/_make_alignment_stats_table.R
@@ -9,60 +9,73 @@ suppressPackageStartupMessages(library("tidyverse"))
 parser <- ArgumentParser()
 
 
-parser$add_argument("--yamlDir", type="character", required=TRUE,
-                    help="absolute path to location of yamls")
-parser$add_argument("--excludeFromName", type="character", required=TRUE, 
-                    help="sample info as TSV")
-parser$add_argument("--scaleConstant", type="double", default = 1e6, required=FALSE, 
-                    help="scaling constant, typically 1e6.[default %(default)s]")
-parser$add_argument("--outTable", type="character", required=TRUE,
-                    help="absolute path to output table TSV file.")
+parser$add_argument("--yamlDir",
+  type = "character", required = TRUE,
+  help = "absolute path to location of yamls"
+)
+parser$add_argument("--excludeFromName",
+  type = "character", required = TRUE,
+  help = "sample info as TSV"
+)
+parser$add_argument("--scaleConstant",
+  type = "double", default = 1e6, required = FALSE,
+  help = "scaling constant, typically 1e6.[default %(default)s]"
+)
+parser$add_argument("--outTable",
+  type = "character", required = TRUE,
+  help = "absolute path to output table TSV file."
+)
 
 args <- parser$parse_args()
-debug=0
-if (debug==1){
-  yaml_dir="/home/kopardevn/CCBR/projects/ccbr1155/CS030586_CARAP/results/alignment_stats"
-  exclude_from_name = ".alignment_stats.yaml"
-  scale_constant = 1e6
-  out_table = "test.tsv"
+debug <- 0
+if (debug == 1) {
+  yaml_dir <- "/home/kopardevn/CCBR/projects/ccbr1155/CS030586_CARAP/results/alignment_stats"
+  exclude_from_name <- ".alignment_stats.yaml"
+  scale_constant <- 1e6
+  out_table <- "test.tsv"
 } else {
-  yaml_dir=args$yamlDir
-  exclude_from_name=args$excludeFromName
-  scale_constant=args$scaleConstant
-  out_table=args$outTable
+  yaml_dir <- args$yamlDir
+  exclude_from_name <- args$excludeFromName
+  scale_constant <- args$scaleConstant
+  out_table <- args$outTable
 }
 setwd(yaml_dir)
-files=list.files(path = yaml_dir,
-                 pattern = "*.yaml")
-df = data.frame()
-for ( f in files ){
-  sampleName <- gsub(pattern = exclude_from_name, replacement = "",f)
+files <- list.files(
+  path = yaml_dir,
+  pattern = "*.yaml"
+)
+df <- data.frame()
+for (f in files) {
+  sampleName <- gsub(pattern = exclude_from_name, replacement = "", f)
   readinyaml <- as.data.frame(read_yaml(f, fileEncoding = "UTF-8"))
-  rownames(readinyaml)=c(sampleName)
-  df = rbind(df,readinyaml)
+  rownames(readinyaml) <- c(sampleName)
+  df <- rbind(df, readinyaml)
 }
-df$scaling_factor = scale_constant/df$dedup_nreads_spikein
-df$duplication_rate_genome = (df$raw_nreads_genome - df$dedup_nreads_genome) / df$raw_nreads_genome
-df$duplication_rate_spikein = (df$raw_nreads_spikein - df$dedup_nreads_spikein) / df$raw_nreads_spikein
-column_order = c("sample_name",
-                 "nreads",
-                 "raw_nreads_genome",
-                 "raw_nreads_spikein",
-                 "scaling_factor",
-                 "duplication_rate_genome",
-                 "duplication_rate_spikein",
-                 "no_dedup_nreads_genome",
-                 "no_dedup_nreads_spikein",
-                 "dedup_nreads_genome",
-                 "dedup_nreads_spikein")
+df$scaling_factor <- scale_constant / df$dedup_nreads_spikein
+df$duplication_rate_genome <- (df$raw_nreads_genome - df$dedup_nreads_genome) / df$raw_nreads_genome
+df$duplication_rate_spikein <- (df$raw_nreads_spikein - df$dedup_nreads_spikein) / df$raw_nreads_spikein
+column_order <- c(
+  "sample_name",
+  "nreads",
+  "raw_nreads_genome",
+  "raw_nreads_spikein",
+  "scaling_factor",
+  "duplication_rate_genome",
+  "duplication_rate_spikein",
+  "no_dedup_nreads_genome",
+  "no_dedup_nreads_spikein",
+  "dedup_nreads_genome",
+  "dedup_nreads_spikein"
+)
 
 
-df %>% rownames_to_column(var="sample_name") -> df
-df = df[,column_order]
+df %>% rownames_to_column(var = "sample_name") -> df
+df <- df[, column_order]
 
 write.table(df,
-          file=out_table,
-          quote = FALSE,
-          sep = "\t",
-          row.names = FALSE,
-          col.names = TRUE)
+  file = out_table,
+  quote = FALSE,
+  sep = "\t",
+  row.names = FALSE,
+  col.names = TRUE
+)
diff --git a/workflow/scripts/_make_counts_matrix.py b/workflow/scripts/_make_counts_matrix.py
index d5f5567..d7c13a6 100644
--- a/workflow/scripts/_make_counts_matrix.py
+++ b/workflow/scripts/_make_counts_matrix.py
@@ -6,79 +6,121 @@
 # sums to countsmatrix.tsv
 # sampleinfo.tsv is also created to be used as colData for downstream DESeq2 analysis
 
-import subprocess,argparse,sys,pandas,os,functools,uuid
-parser = argparse.ArgumentParser(description='create counts matrix')
-parser.add_argument('--bedbedgraph', required=True, type=str, help="list of peak calls and scaled bedgraph file, tab delimited, group|sampleName|bedfile|bedgraph|scalingFactor|fragmentsBed per line")
-parser.add_argument('--tmpdir', required=True, type=str, help="TMP dir")
-parser.add_argument('--countsmatrix', required=True, type=str, help="bedgraph-AUC-based output counts matrix TSV")
-parser.add_argument('--fragmentscountsmatrix', required=True, type=str, help="fragmentsBed-based output counts matrix TSV")
-parser.add_argument('--sampleinfo', required=True, type=str, help="output sample info TSV")
+import subprocess, argparse, sys, pandas, os, functools, uuid
+
+parser = argparse.ArgumentParser(description="create counts matrix")
+parser.add_argument(
+    "--bedbedgraph",
+    required=True,
+    type=str,
+    help="list of peak calls and scaled bedgraph file, tab delimited, group|sampleName|bedfile|bedgraph|scalingFactor|fragmentsBed per line",
+)
+parser.add_argument("--tmpdir", required=True, type=str, help="TMP dir")
+parser.add_argument(
+    "--countsmatrix",
+    required=True,
+    type=str,
+    help="bedgraph-AUC-based output counts matrix TSV",
+)
+parser.add_argument(
+    "--fragmentscountsmatrix",
+    required=True,
+    type=str,
+    help="fragmentsBed-based output counts matrix TSV",
+)
+parser.add_argument(
+    "--sampleinfo", required=True, type=str, help="output sample info TSV"
+)
 
 args = parser.parse_args()
 
 randstr = str(uuid.uuid4())
 
 if not os.path.exists(args.tmpdir):
-	try:
-		os.mkdir(args.tmpdir)
-	except:
-		exit(args.tmpdir + ": Folder doesnt exist and cannot be created!")
+    try:
+        os.mkdir(args.tmpdir)
+    except:
+        exit(args.tmpdir + ": Folder doesnt exist and cannot be created!")
 
-if not os.access(args.tmpdir,os.W_OK):
-	exit(args.tmpdir + ": Folder is not writeable!")
+if not os.access(args.tmpdir, os.W_OK):
+    exit(args.tmpdir + ": Folder is not writeable!")
 
-bedbedgraph = pandas.read_csv(args.bedbedgraph,header=None,sep="\t")
-bedbedgraph.columns=["group","sampleName","bed","bedgraph","scalingFactor","fragmentsBed"]
+bedbedgraph = pandas.read_csv(args.bedbedgraph, header=None, sep="\t")
+bedbedgraph.columns = [
+    "group",
+    "sampleName",
+    "bed",
+    "bedgraph",
+    "scalingFactor",
+    "fragmentsBed",
+]
 
-concatfile = os.path.join(args.tmpdir,randstr+".concat.bed")
-cmd = "cat " + " ".join(list(bedbedgraph["bed"])) + " | cut -f1-3 | sort -k1,1 -k2,2n > " + concatfile
+concatfile = os.path.join(args.tmpdir, randstr + ".concat.bed")
+cmd = (
+    "cat "
+    + " ".join(list(bedbedgraph["bed"]))
+    + " | cut -f1-3 | sort -k1,1 -k2,2n > "
+    + concatfile
+)
 concat = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
 
-mergedfile = os.path.join(args.tmpdir,randstr+".merged.bed")
+mergedfile = os.path.join(args.tmpdir, randstr + ".merged.bed")
 cmd = "bedtools merge -i " + concatfile + " > " + mergedfile
 merge = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
 
-sampleinfofile = open(args.sampleinfo,'w')
+sampleinfofile = open(args.sampleinfo, "w")
 sampleinfofile.write("samplename\tgroup\n")
-counts=dict() # bedgraph AUC based counts
-fcounts=dict() # fragment based counts
-for i,row in bedbedgraph.iterrows():
-	group = row['group']
-	bg = row['bedgraph']
-	samplename = row['sampleName']
-	sf = row['scalingFactor']
-	fbed = row['fragmentsBed']
-	filepath,filename = os.path.split(bg)
-	basename,ext = os.path.splitext(filename)
-	sampleinfofile.write("%s\t%s\n"%(samplename,group))
-	cmd = "bedtools intersect -wa -wb -a "+bg+" -b "+mergedfile
-	print("Now running: %s"%(cmd))
-	bt = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
-	peakCounts=dict()
-	for l in bt.stdout.split("\n"):
-		l=l.strip().split("\t")
-		if len(l) != 7:
-			continue
-		peakID = l[4] + ":" + l[5] + "-" + l[6]
-		score = (int(l[2]) - int(l[1])) * float(l[3])
-		if not peakID in peakCounts:
-			peakCounts[peakID]=0.0
-		peakCounts[peakID]+=score
-	counts[samplename] = pandas.DataFrame.from_dict(peakCounts,orient='index',columns=[samplename])
-	cmd = "bedmap --echo-ref-name --count "+mergedfile+" "+fbed
-	print("Now running %s"%(cmd))
-	bo = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
-	fPeakCounts=dict()
-	for l in bo.stdout.split('\n'):
-		l = l.strip().split('|')
-		if len(l)!=2: 
-			continue
-		fPeakCounts[l[0]]=l[1]
-	fcounts[samplename] = pandas.DataFrame.from_dict(fPeakCounts,orient='index',columns=[samplename])
+counts = dict()  # bedgraph AUC based counts
+fcounts = dict()  # fragment based counts
+for i, row in bedbedgraph.iterrows():
+    group = row["group"]
+    bg = row["bedgraph"]
+    samplename = row["sampleName"]
+    sf = row["scalingFactor"]
+    fbed = row["fragmentsBed"]
+    filepath, filename = os.path.split(bg)
+    basename, ext = os.path.splitext(filename)
+    sampleinfofile.write("%s\t%s\n" % (samplename, group))
+    cmd = "bedtools intersect -wa -wb -a " + bg + " -b " + mergedfile
+    print("Now running: %s" % (cmd))
+    bt = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
+    peakCounts = dict()
+    for l in bt.stdout.split("\n"):
+        l = l.strip().split("\t")
+        if len(l) != 7:
+            continue
+        peakID = l[4] + ":" + l[5] + "-" + l[6]
+        score = (int(l[2]) - int(l[1])) * float(l[3])
+        if not peakID in peakCounts:
+            peakCounts[peakID] = 0.0
+        peakCounts[peakID] += score
+    counts[samplename] = pandas.DataFrame.from_dict(
+        peakCounts, orient="index", columns=[samplename]
+    )
+    cmd = "bedmap --echo-ref-name --count " + mergedfile + " " + fbed
+    print("Now running %s" % (cmd))
+    bo = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=True)
+    fPeakCounts = dict()
+    for l in bo.stdout.split("\n"):
+        l = l.strip().split("|")
+        if len(l) != 2:
+            continue
+        fPeakCounts[l[0]] = l[1]
+    fcounts[samplename] = pandas.DataFrame.from_dict(
+        fPeakCounts, orient="index", columns=[samplename]
+    )
 sampleinfofile.close()
-counts_matrix_df = functools.reduce(lambda x,y:x.merge(y,left_index=True,right_index=True),counts.values())
-counts_matrix_df.to_csv(args.countsmatrix,sep="\t",header=True,index=True,index_label="peakID")
-fcounts_matrix_df = functools.reduce(lambda x,y:x.merge(y,left_index=True,right_index=True),fcounts.values())
-fcounts_matrix_df.to_csv(args.fragmentscountsmatrix,sep="\t",header=True,index=True,index_label="peakID")
+counts_matrix_df = functools.reduce(
+    lambda x, y: x.merge(y, left_index=True, right_index=True), counts.values()
+)
+counts_matrix_df.to_csv(
+    args.countsmatrix, sep="\t", header=True, index=True, index_label="peakID"
+)
+fcounts_matrix_df = functools.reduce(
+    lambda x, y: x.merge(y, left_index=True, right_index=True), fcounts.values()
+)
+fcounts_matrix_df.to_csv(
+    args.fragmentscountsmatrix, sep="\t", header=True, index=True, index_label="peakID"
+)
 
-#os.rmdir(args.tmpdir)
\ No newline at end of file
+# os.rmdir(args.tmpdir)
diff --git a/workflow/scripts/_make_library_norm_table.R b/workflow/scripts/_make_library_norm_table.R
index 7279e87..6156915 100644
--- a/workflow/scripts/_make_library_norm_table.R
+++ b/workflow/scripts/_make_library_norm_table.R
@@ -9,72 +9,80 @@ suppressPackageStartupMessages(library("tidyverse"))
 parser <- ArgumentParser()
 
 
-parser$add_argument("--yamlDir", type="character", required=TRUE,
-                    help="absolute path to location of yamls")
-parser$add_argument("--excludeFromName", type="character", required=TRUE, 
-                    help="sample info as TSV")
-parser$add_argument("--outTable", type="character", required=TRUE,
-                    help="absolute path to output table TSV file.")
+parser$add_argument("--yamlDir",
+  type = "character", required = TRUE,
+  help = "absolute path to location of yamls"
+)
+parser$add_argument("--excludeFromName",
+  type = "character", required = TRUE,
+  help = "sample info as TSV"
+)
+parser$add_argument("--outTable",
+  type = "character", required = TRUE,
+  help = "absolute path to output table TSV file."
+)
 args <- parser$parse_args()
 
 # assign params variables
-debug=0
-if (debug==1){
-  yaml_dir="~/../../Volumes/CUTRUN/analysis/CS030666/carlisle_230118/results/alignment_stats"
-  exclude_from_name = ".alignment_stats.yaml"
-  scale_constant = 1e6
-  out_table = "test.tsv"
+debug <- 0
+if (debug == 1) {
+  yaml_dir <- "~/../../Volumes/CUTRUN/analysis/CS030666/carlisle_230118/results/alignment_stats"
+  exclude_from_name <- ".alignment_stats.yaml"
+  scale_constant <- 1e6
+  out_table <- "test.tsv"
 } else {
-  yaml_dir=args$yamlDir
-  exclude_from_name=args$excludeFromName
-  scale_constant=args$scaleConstant
-  out_table=args$outTable
+  yaml_dir <- args$yamlDir
+  exclude_from_name <- args$excludeFromName
+  scale_constant <- args$scaleConstant
+  out_table <- args$outTable
 }
 # grab all files
 setwd(yaml_dir)
-files=list.files(path = yaml_dir,
-                 pattern = "*.yaml")
+files <- list.files(
+  path = yaml_dir,
+  pattern = "*.yaml"
+)
 
 # read in df and prep
-df = data.frame()
-for ( f in files ){
-  sampleName <- gsub(pattern = exclude_from_name, replacement = "",f)
+df <- data.frame()
+for (f in files) {
+  sampleName <- gsub(pattern = exclude_from_name, replacement = "", f)
   readinyaml <- as.data.frame(read_yaml(f, fileEncoding = "UTF-8"))
-  rownames(readinyaml)=c(sampleName)
-  df = rbind(df,readinyaml)
+  rownames(readinyaml) <- c(sampleName)
+  df <- rbind(df, readinyaml)
 }
 
 # run once for dedup, once for nondedup
-final_df=data.frame()
-for (dedup_type in c("dedup_nreads_genome","no_dedup_nreads_genome")){
+final_df <- data.frame()
+for (dedup_type in c("dedup_nreads_genome", "no_dedup_nreads_genome")) {
   # determine scaling factor dependent on library size
-  col_median=median(df[,dedup_type])
-  if (col_median>1000000000){
-    lib_factor=1e8
-  }else if (col_median>100000000){
-    lib_factor=1e7
-  }else if (col_median>10000000){
-    lib_factor=1e6
-  } else if (col_median>1000000){
-    lib_factor=1e5
-  } else if (col_median>100000){
-    lib_factor=1e4
-  } else if (col_median>10000){
-    lib_factor=1e3
-  } else if (col_median>1000){
-    lib_factor=1e2
+  col_median <- median(df[, dedup_type])
+  if (col_median > 1000000000) {
+    lib_factor <- 1e8
+  } else if (col_median > 100000000) {
+    lib_factor <- 1e7
+  } else if (col_median > 10000000) {
+    lib_factor <- 1e6
+  } else if (col_median > 1000000) {
+    lib_factor <- 1e5
+  } else if (col_median > 100000) {
+    lib_factor <- 1e4
+  } else if (col_median > 10000) {
+    lib_factor <- 1e3
+  } else if (col_median > 1000) {
+    lib_factor <- 1e2
   } else {
-    lib_factor=1e1
+    lib_factor <- 1e1
   }
 
-  df$library_size=df[,dedup_type]/lib_factor
-  df$sampleid=rownames(df)
-  df$dedup_type=strsplit(dedup_type,"_")[[1]][1]
-  df$dedup_type=gsub("no","no_dedup",df$dedup_type)
+  df$library_size <- df[, dedup_type] / lib_factor
+  df$sampleid <- rownames(df)
+  df$dedup_type <- strsplit(dedup_type, "_")[[1]][1]
+  df$dedup_type <- gsub("no", "no_dedup", df$dedup_type)
 
   # create final df
-  select_cols=c("sampleid","library_size","dedup_type")
-  final_df=rbind(final_df,df[,select_cols])
+  select_cols <- c("sampleid", "library_size", "dedup_type")
+  final_df <- rbind(final_df, df[, select_cols])
 }
 
-write.table(final_df,out_table,row.names = FALSE)
+write.table(final_df, out_table, row.names = FALSE)
diff --git a/workflow/scripts/_make_results_bed.py b/workflow/scripts/_make_results_bed.py
index 2cbe8bb..d58c78f 100644
--- a/workflow/scripts/_make_results_bed.py
+++ b/workflow/scripts/_make_results_bed.py
@@ -1,43 +1,70 @@
 #!/usr/bin/env python3
 
-import argparse,pandas,os,yaml,subprocess,uuid
+import argparse, pandas, os, yaml, subprocess, uuid
 from operator import index
 from email import header
 
-randbed=str(uuid.uuid4())+".bed"
+randbed = str(uuid.uuid4()) + ".bed"
 
-parser = argparse.ArgumentParser(description='create bed')
-parser.add_argument('--results', required=True, type=str, help="results bed file")
-parser.add_argument('--fdr_cutoff', required=True, type=float, help="FDR cutoff")
-parser.add_argument('--log2FC_cutoff', required=True, type=float, help="log2FC cutoff")
-parser.add_argument('--elbowyaml', required=True, type=str, help="elbow limits")
-parser.add_argument('--bed', required=True, type=str, help="output bed with colors")
+parser = argparse.ArgumentParser(description="create bed")
+parser.add_argument("--results", required=True, type=str, help="results bed file")
+parser.add_argument("--fdr_cutoff", required=True, type=float, help="FDR cutoff")
+parser.add_argument("--log2FC_cutoff", required=True, type=float, help="log2FC cutoff")
+parser.add_argument("--elbowyaml", required=True, type=str, help="elbow limits")
+parser.add_argument("--bed", required=True, type=str, help="output bed with colors")
 args = parser.parse_args()
 
-df = pandas.read_csv(args.results,sep="\t",header=0,usecols=['seqnames','start','end','log2FoldChange','padj'])
-df['color'] = ["160,160,160"]*len(df.index)
-df['name'] = ["."]*len(df.index)
-df['strand'] = ["+"]*len(df.index)
-df['score'] = [0]*len(df.index)
-df['seven'] = [0]*len(df.index)
-df['eight'] = [0]*len(df.index)
+df = pandas.read_csv(
+    args.results,
+    sep="\t",
+    header=0,
+    usecols=["seqnames", "start", "end", "log2FoldChange", "padj"],
+)
+df["color"] = ["160,160,160"] * len(df.index)
+df["name"] = ["."] * len(df.index)
+df["strand"] = ["+"] * len(df.index)
+df["score"] = [0] * len(df.index)
+df["seven"] = [0] * len(df.index)
+df["eight"] = [0] * len(df.index)
 
-df.loc[df['log2FoldChange'] < 0,'strand'] = "-"
+df.loc[df["log2FoldChange"] < 0, "strand"] = "-"
 with open(args.elbowyaml) as f:
     elbowdf = yaml.safe_load(f)
-elbowdown = elbowdf['low_limit']
-elbowup = elbowdf['up_limit']
+elbowdown = elbowdf["low_limit"]
+elbowup = elbowdf["up_limit"]
 if abs(elbowdown) < args.log2FC_cutoff:
-    df.loc[(df['padj'] < args.fdr_cutoff) & (df['log2FoldChange'] < elbowdown),'color']="229,255,204"
-df.loc[(df['padj'] < args.fdr_cutoff) & (df['log2FoldChange'] < args.log2FC_cutoff),'color']="128,255,0"
+    df.loc[
+        (df["padj"] < args.fdr_cutoff) & (df["log2FoldChange"] < elbowdown), "color"
+    ] = "229,255,204"
+df.loc[
+    (df["padj"] < args.fdr_cutoff) & (df["log2FoldChange"] < args.log2FC_cutoff),
+    "color",
+] = "128,255,0"
 if elbowup < args.log2FC_cutoff:
-    df.loc[(df['padj'] < args.fdr_cutoff) & (df['log2FoldChange'] > elbowup),'color']="255,153,51"
-df.loc[(df['padj'] < args.fdr_cutoff) & (df['log2FoldChange'] > args.log2FC_cutoff),'color']="255,51,51"
-df = df.reindex(columns=['seqnames','start','end','name','score','strand','seven','eight','color'])
+    df.loc[
+        (df["padj"] < args.fdr_cutoff) & (df["log2FoldChange"] > elbowup), "color"
+    ] = "255,153,51"
+df.loc[
+    (df["padj"] < args.fdr_cutoff) & (df["log2FoldChange"] > args.log2FC_cutoff),
+    "color",
+] = "255,51,51"
+df = df.reindex(
+    columns=[
+        "seqnames",
+        "start",
+        "end",
+        "name",
+        "score",
+        "strand",
+        "seven",
+        "eight",
+        "color",
+    ]
+)
 
-df.to_csv(randbed,sep="\t",header=False,index=False)
+df.to_csv(randbed, sep="\t", header=False, index=False)
 
-cmd = "sort -S 40G -T /dev/shm -k1,1 -k2,2n "+randbed+" > "+args.bed
-s = subprocess.run(cmd,shell=True,check=True,text=True,capture_output=True)
+cmd = "sort -S 40G -T /dev/shm -k1,1 -k2,2n " + randbed + " > " + args.bed
+s = subprocess.run(cmd, shell=True, check=True, text=True, capture_output=True)
 
 os.remove(randbed)
diff --git a/workflow/scripts/_plot_correlation.R b/workflow/scripts/_plot_correlation.R
index 3727320..60bf691 100644
--- a/workflow/scripts/_plot_correlation.R
+++ b/workflow/scripts/_plot_correlation.R
@@ -1,92 +1,102 @@
-# Plot correlation heatmap and PCA
-
-library(ggplot2)
-library(reshape2)
-library(ComplexHeatmap)
-library(RColorBrewer)
-library(circlize)
-
-args = commandArgs(trailingOnly=TRUE)
-
-in.corr = args[1]
-in.pca = args[2]
-in.read_depth = args[3]
-dupstatus = args[4]
-out.corr = args[5]
-out.pca = args[6]
-
-# Read deeptools output
-print("Load heatmap")
-heatmap = read.table(in.corr,check.names = FALSE)
-colnames(heatmap) = gsub("[.]no_dedup","",gsub("[.]dedup","",colnames(heatmap)))
-rownames(heatmap) = gsub("[.]no_dedup","",gsub("[.]dedup","",rownames(heatmap)))
-print(heatmap)
-
-print("Load PCA")
-pca = read.table(in.pca,header=TRUE,check.names = FALSE)
-print(pca)
-
-# Create metadata table
-print("Create metadata table")
-metadata = data.frame(Replicate=colnames(heatmap))
-metadata$Sample = gsub("_[1-9]$","",metadata$Replicate)
-
-# Add read depth
-read_depth = read.table(in.read_depth,sep='\t',header=TRUE)
-metadata = merge(metadata,read_depth[,c("sample_name","nreads","no_dedup_nreads_genome","dedup_nreads_genome")],
-                 by.x="Replicate",by.y="sample_name")
-if(dupstatus == "dedup"){
-      metadata$Depth = metadata$dedup_nreads_genome/1000000
-}else{
-      metadata$Depth = metadata$no_dedup_nreads_genome/1000000
-}
-metadata = metadata[match(rownames(heatmap),metadata$Replicate),]
-print(metadata)
-
-# Create colors
-sample_colors = setNames(colorRampPalette(brewer.pal(11, "Spectral"))(length(unique(metadata$Sample))),
-                       unique(metadata$Sample))
-depth_colors = colorRamp2(breaks=c(0,30,60),colors=c("white","grey50","black"))
-
-colors = list(Sample=sample_colors,
-              Depth=depth_colors)
-
-# Heatmap
-
-## Plot heatmap
-print("Print heatmap")
-png(out.corr,width=600,height=600)
-print(Heatmap(heatmap,
-        top_annotation = HeatmapAnnotation(df=metadata[,c("Sample","Depth")],
-                                           col=colors[c("Sample","Depth")]),
-        show_row_dend = FALSE,
-        show_row_names = FALSE,
-        col = colorRamp2(c(0,1), c("white", "blue")),
-	heatmap_legend_param = list(
-          title = "Pearson\ncorrelation"
-        ),
-        column_dend_height = unit(0.8,"inches"),
-        name="Pearson correlation, genome-wide coverage (10kb bins)"))
-dev.off()
-
-# PCA
-print("Print PCA")
-
-## Calculate variance from eigenvalue
-eigenvalue = pca[,c("Component","Eigenvalue")]
-eigenvalue$Variance = eigenvalue$Eigenvalue/sum(eigenvalue$Eigenvalue)*100
-
-pca = melt(pca[,1:dim(pca)[2]-1],id.var="Component",variable.name="Replicate",value.name="Loading")
-pca = dcast(pca,Replicate~Component,value.var="Loading")
-pca$Replicate = gsub("[.]no_dedup","",gsub("[.]dedup","",pca$Replicate))
-pca = merge(pca,metadata,by="Replicate")
-
-## Plot PCA
-png(out.pca)
-print(ggplot(pca,aes(x=`1`,y=`2`,color=Sample)) + geom_point(size=3) +
-  xlab(paste("PC1 (",round(eigenvalue$Variance[1],1),"%)",sep="")) +
-  ylab(paste("PC2 (",round(eigenvalue$Variance[2],1),"%)",sep="")) +
-  scale_color_manual(values=sample_colors) +
-  ggtitle("PCA, genome-wide coverage (10kb bins)") +
-  theme_classic() + theme(legend.position="bottom"))
-dev.off()
+# Plot correlation heatmap and PCA
+
+library(ggplot2)
+library(reshape2)
+library(ComplexHeatmap)
+library(RColorBrewer)
+library(circlize)
+
+args <- commandArgs(trailingOnly = TRUE)
+
+in.corr <- args[1]
+in.pca <- args[2]
+in.read_depth <- args[3]
+dupstatus <- args[4]
+out.corr <- args[5]
+out.pca <- args[6]
+
+# Read deeptools output
+print("Load heatmap")
+heatmap <- read.table(in.corr, check.names = FALSE)
+colnames(heatmap) <- gsub("[.]no_dedup", "", gsub("[.]dedup", "", colnames(heatmap)))
+rownames(heatmap) <- gsub("[.]no_dedup", "", gsub("[.]dedup", "", rownames(heatmap)))
+print(heatmap)
+
+print("Load PCA")
+pca <- read.table(in.pca, header = TRUE, check.names = FALSE)
+print(pca)
+
+# Create metadata table
+print("Create metadata table")
+metadata <- data.frame(Replicate = colnames(heatmap))
+metadata$Sample <- gsub("_[1-9]$", "", metadata$Replicate)
+
+# Add read depth
+read_depth <- read.table(in.read_depth, sep = "\t", header = TRUE)
+metadata <- merge(metadata, read_depth[, c("sample_name", "nreads", "no_dedup_nreads_genome", "dedup_nreads_genome")],
+  by.x = "Replicate", by.y = "sample_name"
+)
+if (dupstatus == "dedup") {
+  metadata$Depth <- metadata$dedup_nreads_genome / 1000000
+} else {
+  metadata$Depth <- metadata$no_dedup_nreads_genome / 1000000
+}
+metadata <- metadata[match(rownames(heatmap), metadata$Replicate), ]
+print(metadata)
+
+# Create colors
+sample_colors <- setNames(
+  colorRampPalette(brewer.pal(11, "Spectral"))(length(unique(metadata$Sample))),
+  unique(metadata$Sample)
+)
+depth_colors <- colorRamp2(breaks = c(0, 30, 60), colors = c("white", "grey50", "black"))
+
+colors <- list(
+  Sample = sample_colors,
+  Depth = depth_colors
+)
+
+# Heatmap
+
+## Plot heatmap
+print("Print heatmap")
+png(out.corr, width = 600, height = 600)
+print(Heatmap(heatmap,
+  top_annotation = HeatmapAnnotation(
+    df = metadata[, c("Sample", "Depth")],
+    col = colors[c("Sample", "Depth")]
+  ),
+  show_row_dend = FALSE,
+  show_row_names = FALSE,
+  col = colorRamp2(c(0, 1), c("white", "blue")),
+  heatmap_legend_param = list(
+    title = "Pearson\ncorrelation"
+  ),
+  column_dend_height = unit(0.8, "inches"),
+  name = "Pearson correlation, genome-wide coverage (10kb bins)"
+))
+dev.off()
+
+# PCA
+print("Print PCA")
+
+## Calculate variance from eigenvalue
+eigenvalue <- pca[, c("Component", "Eigenvalue")]
+eigenvalue$Variance <- eigenvalue$Eigenvalue / sum(eigenvalue$Eigenvalue) * 100
+
+pca <- melt(pca[, 1:dim(pca)[2] - 1], id.var = "Component", variable.name = "Replicate", value.name = "Loading")
+pca <- dcast(pca, Replicate ~ Component, value.var = "Loading")
+pca$Replicate <- gsub("[.]no_dedup", "", gsub("[.]dedup", "", pca$Replicate))
+pca <- merge(pca, metadata, by = "Replicate")
+
+## Plot PCA
+png(out.pca)
+print(ggplot(pca, aes(x = `1`, y = `2`, color = Sample)) +
+  geom_point(size = 3) +
+  xlab(paste("PC1 (", round(eigenvalue$Variance[1], 1), "%)", sep = "")) +
+  ylab(paste("PC2 (", round(eigenvalue$Variance[2], 1), "%)", sep = "")) +
+  scale_color_manual(values = sample_colors) +
+  ggtitle("PCA, genome-wide coverage (10kb bins)") +
+  theme_classic() +
+  theme(legend.position = "bottom"))
+dev.off()
diff --git a/workflow/scripts/_plot_feature_enrichment.R b/workflow/scripts/_plot_feature_enrichment.R
index 343976f..67ef5b5 100644
--- a/workflow/scripts/_plot_feature_enrichment.R
+++ b/workflow/scripts/_plot_feature_enrichment.R
@@ -1,47 +1,58 @@
-# Plot HOMER enrichment
-
-library(reshape2)
-library(ggplot2)
-library(plyr)
-library(ComplexHeatmap)
-library(circlize)
-
-theme_set(theme_bw())
-
-args = commandArgs(trailingOnly=TRUE)
-
-peaks.dir = args[1]
-peak_mode = args[2]
-dupstatus = args[3]
-fig = args[4]
-
-# Load HOMER annotation summary
-homer.files = list.files(path=peaks.dir,pattern="annotation.summary",
-                         recursive=TRUE,full.names = TRUE)
-homer = lapply(homer.files,function(x) read.table(x,sep='\t',header=TRUE,check.names = FALSE,nrows=14))
-names(homer) = gsub("gopeaks/|macs2/|seacr/","",
-                    gsub(".annotation.summary","",
-                         gsub("/annotation/homer","",
-                              gsub("^.*results/peaks/","",homer.files))))
-homer = lapply(homer,function(x) x[1:rownames(x[which(x$Annotation == "Annotation"),])-1,])
-homer = ldply(homer,.id="File")
-
-homer = cbind(homer,colsplit(homer$File,"/",c("Threshold","Sample")))
-homer = cbind(homer,colsplit(homer$Sample,"[.]",c("Comparison","Duplication","Caller")))
-homer = cbind(homer,colsplit(homer$Comparison,"_vs_",c("Replicate","Control")))
-homer = homer[which(homer$Duplication == dupstatus & homer$Caller == peak_mode),]
-homer$`LogP enrichment (+values depleted)` = as.numeric(homer$`LogP enrichment (+values depleted)`)
-
-# Plot enrichment heatmap
-homer = dcast(homer,Replicate~Annotation,value.var="LogP enrichment (+values depleted)")
-rownames(homer) = homer$Replicate
-homer = homer[,2:dim(homer)[2]]
-
-png(fig,width = 700, height = 400)
-hm = Heatmap(homer,
-             name="LogP enrichment\n(+values depleted)",
-             col=colorRamp2(breaks=c(-70000,-50,0,50,70000),
-                        colors=c("red","yellow","white","green","blue")),
-             width=unit(100, "mm"))
-draw(hm,heatmap_legend_side = "left")
-dev.off()
+# Plot HOMER enrichment
+
+library(reshape2)
+library(ggplot2)
+library(plyr)
+library(ComplexHeatmap)
+library(circlize)
+
+theme_set(theme_bw())
+
+args <- commandArgs(trailingOnly = TRUE)
+
+peaks.dir <- args[1]
+peak_mode <- args[2]
+dupstatus <- args[3]
+fig <- args[4]
+
+# Load HOMER annotation summary
+homer.files <- list.files(
+  path = peaks.dir, pattern = "annotation.summary",
+  recursive = TRUE, full.names = TRUE
+)
+homer <- lapply(homer.files, function(x) read.table(x, sep = "\t", header = TRUE, check.names = FALSE, nrows = 14))
+names(homer) <- gsub(
+  "gopeaks/|macs2/|seacr/", "",
+  gsub(
+    ".annotation.summary", "",
+    gsub(
+      "/annotation/homer", "",
+      gsub("^.*results/peaks/", "", homer.files)
+    )
+  )
+)
+homer <- lapply(homer, function(x) x[1:rownames(x[which(x$Annotation == "Annotation"), ]) - 1, ])
+homer <- ldply(homer, .id = "File")
+
+homer <- cbind(homer, colsplit(homer$File, "/", c("Threshold", "Sample")))
+homer <- cbind(homer, colsplit(homer$Sample, "[.]", c("Comparison", "Duplication", "Caller")))
+homer <- cbind(homer, colsplit(homer$Comparison, "_vs_", c("Replicate", "Control")))
+homer <- homer[which(homer$Duplication == dupstatus & homer$Caller == peak_mode), ]
+homer$`LogP enrichment (+values depleted)` <- as.numeric(homer$`LogP enrichment (+values depleted)`)
+
+# Plot enrichment heatmap
+homer <- dcast(homer, Replicate ~ Annotation, value.var = "LogP enrichment (+values depleted)")
+rownames(homer) <- homer$Replicate
+homer <- homer[, 2:dim(homer)[2]]
+
+png(fig, width = 700, height = 400)
+hm <- Heatmap(homer,
+  name = "LogP enrichment\n(+values depleted)",
+  col = colorRamp2(
+    breaks = c(-70000, -50, 0, 50, 70000),
+    colors = c("red", "yellow", "white", "green", "blue")
+  ),
+  width = unit(100, "mm")
+)
+draw(hm, heatmap_legend_side = "left")
+dev.off()
diff --git a/workflow/scripts/_plot_peak_counts.R b/workflow/scripts/_plot_peak_counts.R
index eee5774..263441b 100644
--- a/workflow/scripts/_plot_peak_counts.R
+++ b/workflow/scripts/_plot_peak_counts.R
@@ -1,64 +1,68 @@
-# Plot number of peaks called per sample by caller, duplication status, and threshold
-
-library(reshape2)
-library(ggplot2)
-library(plyr)
-library(openxlsx)
-
-theme_set(theme_bw())
-
-args = commandArgs(trailingOnly=TRUE)
-
-input = args[1]
-
-# Load number of peaks
-print("Load peaks")
-peaks = read.table(input,sep="")
-colnames(peaks) = c("Peaks","File")
-peaks = peaks[which(peaks$File != "total"),]
-peaks$File = gsub(".peaks.bed$","",gsub("/peak_output","",gsub("^.*results/peaks/","",peaks$File)))
-peaks = cbind(peaks,colsplit(peaks$File,"/",c("Threshold","Caller","Sample")))
-peaks = cbind(peaks,colsplit(peaks$Sample,"[.]",c("Comparison","Duplication","Mode")))
-peaks$Caller = paste(peaks$Caller,peaks$Mode,sep="_")
-peaks = peaks[,c("Comparison","Caller","Duplication","Threshold","Peaks")]
-peaks = cbind(peaks,colsplit(peaks$Comparison,"_vs_",c("Replicate","Control")))
-peaks$Sample = gsub("_[1-9]$","",peaks$Replicate)
-
-setwd(args[2])
-
-# Write out table
-print("Write table")
-peak_output = dcast(peaks,Threshold+Duplication+Comparison~Caller,value.var="Peaks")
-peak_output = peak_output[order(peak_output$Threshold,peak_output$Duplication,peak_output$Comparison),]
-write.xlsx(peak_output,file="Peak counts.xlsx")
-
-# For each threshold, plot number of peaks
-for (threshold in unique(peaks$Threshold)){
-	print(threshold)
-peaks_threshold = peaks[which(peaks$Threshold == threshold),]
-
-# Compare peaks with and without duplication
-peaks_threshold_dup = dcast(peaks_threshold,Replicate+Caller~Duplication,value.var="Peaks")
-cor=round(as.numeric(unlist(cor.test(peaks_threshold_dup$dedup,peaks_threshold_dup$no_dedup))["estimate.cor"]),2)
-peaks.max=max(max(peaks_threshold_dup$dedup),max(peaks_threshold_dup$no_dedup))
-
-png(paste("duplication_corr.",threshold,".png",sep=""),width = 350, height = 300)
-    print(ggplot(peaks_threshold_dup,aes(x=log10(no_dedup),y=log10(dedup))) + geom_point(aes(color=Caller)) +
-        geom_abline(intercept=0,slope=1,linetype="dashed") +
-        xlab("log10(Peaks), no deduplication") + ylab("log10(Peaks), deduplication") +
-        xlim(0,log10(peaks.max)+0.5) + ylim(0,log10(peaks.max)+0.5) +
-        ggtitle(paste("Peaks by read duplication, q-value threshold ",threshold,sep=""),
-	    subtitle=paste("Pearson correlation: ",cor,sep="")))
-dev.off()
-
-# Plot summary across callers
-width=(length(unique(peaks_threshold$Sample))*50)+100
-height=(length(unique(peaks_threshold$Caller))*100)+100
-png(paste("peaks_by_caller.",threshold,".png",sep=""),width = width, height = height)
-    print(ggplot(peaks_threshold[which(peaks_threshold$Duplication == "dedup"),],aes(x=Sample,y=Peaks)) +
-  	geom_boxplot() +
-	facet_wrap(~Caller,nrow=length(unique(peaks_threshold$Caller)),scales="free_y") +
-	theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) +
- 	ggtitle(paste("Peaks,\nq-value threshold ",threshold,",\ndeduplicated reads",sep="")))	
-dev.off()
-}
+# Plot number of peaks called per sample by caller, duplication status, and threshold
+
+library(reshape2)
+library(ggplot2)
+library(plyr)
+library(openxlsx)
+
+theme_set(theme_bw())
+
+args <- commandArgs(trailingOnly = TRUE)
+
+input <- args[1]
+
+# Load number of peaks
+print("Load peaks")
+peaks <- read.table(input, sep = "")
+colnames(peaks) <- c("Peaks", "File")
+peaks <- peaks[which(peaks$File != "total"), ]
+peaks$File <- gsub(".peaks.bed$", "", gsub("/peak_output", "", gsub("^.*results/peaks/", "", peaks$File)))
+peaks <- cbind(peaks, colsplit(peaks$File, "/", c("Threshold", "Caller", "Sample")))
+peaks <- cbind(peaks, colsplit(peaks$Sample, "[.]", c("Comparison", "Duplication", "Mode")))
+peaks$Caller <- paste(peaks$Caller, peaks$Mode, sep = "_")
+peaks <- peaks[, c("Comparison", "Caller", "Duplication", "Threshold", "Peaks")]
+peaks <- cbind(peaks, colsplit(peaks$Comparison, "_vs_", c("Replicate", "Control")))
+peaks$Sample <- gsub("_[1-9]$", "", peaks$Replicate)
+
+setwd(args[2])
+
+# Write out table
+print("Write table")
+peak_output <- dcast(peaks, Threshold + Duplication + Comparison ~ Caller, value.var = "Peaks")
+peak_output <- peak_output[order(peak_output$Threshold, peak_output$Duplication, peak_output$Comparison), ]
+write.xlsx(peak_output, file = "Peak counts.xlsx")
+
+# For each threshold, plot number of peaks
+for (threshold in unique(peaks$Threshold)) {
+  print(threshold)
+  peaks_threshold <- peaks[which(peaks$Threshold == threshold), ]
+
+  # Compare peaks with and without duplication
+  peaks_threshold_dup <- dcast(peaks_threshold, Replicate + Caller ~ Duplication, value.var = "Peaks")
+  cor <- round(as.numeric(unlist(cor.test(peaks_threshold_dup$dedup, peaks_threshold_dup$no_dedup))["estimate.cor"]), 2)
+  peaks.max <- max(max(peaks_threshold_dup$dedup), max(peaks_threshold_dup$no_dedup))
+
+  png(paste("duplication_corr.", threshold, ".png", sep = ""), width = 350, height = 300)
+  print(ggplot(peaks_threshold_dup, aes(x = log10(no_dedup), y = log10(dedup))) +
+    geom_point(aes(color = Caller)) +
+    geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
+    xlab("log10(Peaks), no deduplication") +
+    ylab("log10(Peaks), deduplication") +
+    xlim(0, log10(peaks.max) + 0.5) +
+    ylim(0, log10(peaks.max) + 0.5) +
+    ggtitle(paste("Peaks by read duplication, q-value threshold ", threshold, sep = ""),
+      subtitle = paste("Pearson correlation: ", cor, sep = "")
+    ))
+  dev.off()
+
+  # Plot summary across callers
+  width <- (length(unique(peaks_threshold$Sample)) * 50) + 100
+  height <- (length(unique(peaks_threshold$Caller)) * 100) + 100
+  png(paste("peaks_by_caller.", threshold, ".png", sep = ""), width = width, height = height)
+  print(ggplot(peaks_threshold[which(peaks_threshold$Duplication == "dedup"), ], aes(x = Sample, y = Peaks)) +
+    geom_boxplot() +
+    facet_wrap(~Caller, nrow = length(unique(peaks_threshold$Caller)), scales = "free_y") +
+    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
+    ggtitle(paste("Peaks,\nq-value threshold ", threshold, ",\ndeduplicated reads", sep = "")))
+  dev.off()
+}
diff --git a/workflow/scripts/_plot_results_venn.R b/workflow/scripts/_plot_results_venn.R
index be23302..615d011 100644
--- a/workflow/scripts/_plot_results_venn.R
+++ b/workflow/scripts/_plot_results_venn.R
@@ -1,76 +1,94 @@
 #!/usr/bin/env Rscript
-rm(list=ls())
+rm(list = ls())
 
 # package list
-list.of.packages=c("argparse","ggvenn")
+list.of.packages <- c("argparse", "ggvenn")
 # load packages
 invisible(lapply(list.of.packages, library, character.only = TRUE))
 
 # handle debugging
-debug="FALSE"
+debug <- "FALSE"
 # files=list.files(path=path)
 # debug="TRUE"
 # path="~/Documents/Projects/ccbr1155/CS030586/contrasts/siSmyd3_2m_Smyd3_0.25HCHO_500K_vs_siNC_2m_Smyd3_0.25HCHO_500K__dedup__narrowPeak"
 # path="/data/CCBR/projects/ccbr1155/CS030586_CARAP/diff"
 
-if (debug){
-  auc = grep(pattern = "_AUCbased_diffresults.txt",files)[1]
-  aucresults = paste(path,files[auc],sep="/")
-  frag = grep(pattern = "_fragmentsbased_diffresults.txt",files)[1]
-  fragmentsresults = paste(path,files[frag],sep="/")
+if (debug) {
+  auc <- grep(pattern = "_AUCbased_diffresults.txt", files)[1]
+  aucresults <- paste(path, files[auc], sep = "/")
+  frag <- grep(pattern = "_fragmentsbased_diffresults.txt", files)[1]
+  fragmentsresults <- paste(path, files[frag], sep = "/")
 } else {
   # create parser object
   parser <- ArgumentParser()
-  
-  parser$add_argument("--aucresults", type="character", required=TRUE,
-                      help="path to aucresults")
-  parser$add_argument("--fragmentsresults", type="character", required=TRUE,
-                      help="path to fragmentsresults")
-  parser$add_argument("--pdf", type="character", required=TRUE,
-                      help="output pdf file")
-  parser$add_argument("--title", type="character", required=TRUE,
-                      help="plot title in pdf")
-  parser$add_argument("--fdr_cutoff", type="double", default=0.05, required=FALSE,
-                      help="FDR cutoff [default %(default)s]")
-  parser$add_argument("--log2fc_cutoff", type="double", default=0.59, required=FALSE,
-                      help="log2foldchange cutoff [default %(default)s]")
-  
+
+  parser$add_argument("--aucresults",
+    type = "character", required = TRUE,
+    help = "path to aucresults"
+  )
+  parser$add_argument("--fragmentsresults",
+    type = "character", required = TRUE,
+    help = "path to fragmentsresults"
+  )
+  parser$add_argument("--pdf",
+    type = "character", required = TRUE,
+    help = "output pdf file"
+  )
+  parser$add_argument("--title",
+    type = "character", required = TRUE,
+    help = "plot title in pdf"
+  )
+  parser$add_argument("--fdr_cutoff",
+    type = "double", default = 0.05, required = FALSE,
+    help = "FDR cutoff [default %(default)s]"
+  )
+  parser$add_argument("--log2fc_cutoff",
+    type = "double", default = 0.59, required = FALSE,
+    help = "log2foldchange cutoff [default %(default)s]"
+  )
+
   args <- parser$parse_args()
-  aucresults=args$aucresults
-  fragmentsresults=args$fragmentsresults
+  aucresults <- args$aucresults
+  fragmentsresults <- args$fragmentsresults
 }
 
-readandfilter <- function(resultsfile){
-  df = read.csv(resultsfile,
-                header = TRUE,
-                row.names = NULL,
-                sep = "\t",
-                check.names = FALSE,
-                comment.char = "#",
-                strip.white = TRUE)
+readandfilter <- function(resultsfile) {
+  df <- read.csv(resultsfile,
+    header = TRUE,
+    row.names = NULL,
+    sep = "\t",
+    check.names = FALSE,
+    comment.char = "#",
+    strip.white = TRUE
+  )
   df[is.na(df)] <- 1
-  df$significance = "NS"
-  k_fdr = df$padj < args$fdr_cutoff
-  k_up = df$log2FoldChange > args$log2fc_cutoff
-  k_down = df$log2FoldChange < (-1 * args$log2fc_cutoff)
-  if (nrow(df[k_fdr & k_up,]) != 0) {df[k_fdr & k_up,]$significance = "UP"}
-  if (nrow(df[k_fdr & k_down,]) != 0) {df[k_fdr & k_down,]$significance = "DOWN"}
+  df$significance <- "NS"
+  k_fdr <- df$padj < args$fdr_cutoff
+  k_up <- df$log2FoldChange > args$log2fc_cutoff
+  k_down <- df$log2FoldChange < (-1 * args$log2fc_cutoff)
+  if (nrow(df[k_fdr & k_up, ]) != 0) {
+    df[k_fdr & k_up, ]$significance <- "UP"
+  }
+  if (nrow(df[k_fdr & k_down, ]) != 0) {
+    df[k_fdr & k_down, ]$significance <- "DOWN"
+  }
   return(df)
 }
 
-auc_df = readandfilter(aucresults)
-frag_df = readandfilter(fragmentsresults)
+auc_df <- readandfilter(aucresults)
+frag_df <- readandfilter(fragmentsresults)
 
-AUC_UP=auc_df[auc_df$significance=="UP",]$peakID
-AUC_DOWN=auc_df[auc_df$significance=="DOWN",]$peakID
-FRAG_UP=frag_df[frag_df$significance=="UP",]$peakID
-FRAG_DOWN=frag_df[frag_df$significance=="DOWN",]$peakID
-x<-list(AUC_UP=AUC_UP,AUC_DOWN=AUC_DOWN,FRAG_UP=FRAG_UP,FRAG_DOWN=FRAG_DOWN)
+AUC_UP <- auc_df[auc_df$significance == "UP", ]$peakID
+AUC_DOWN <- auc_df[auc_df$significance == "DOWN", ]$peakID
+FRAG_UP <- frag_df[frag_df$significance == "UP", ]$peakID
+FRAG_DOWN <- frag_df[frag_df$significance == "DOWN", ]$peakID
+x <- list(AUC_UP = AUC_UP, AUC_DOWN = AUC_DOWN, FRAG_UP = FRAG_UP, FRAG_DOWN = FRAG_DOWN)
 pdf(args$pdf)
 ggvenn(x,
-       stroke_size = 0.4, 
-       set_name_size = 3,
-       fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF")) +
+  stroke_size = 0.4,
+  set_name_size = 3,
+  fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF")
+) +
   ggtitle(args$title) +
   theme(plot.title = element_text(size = 8))
 dev.off()
diff --git a/workflow/scripts/_run_jobby_on_snakemake_log b/workflow/scripts/_run_jobby_on_snakemake_log
index a6a41e7..3ca3356 100644
--- a/workflow/scripts/_run_jobby_on_snakemake_log
+++ b/workflow/scripts/_run_jobby_on_snakemake_log
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# input: 
+# input:
 ## intake snakemake log
 ## full path to jobby.py script
 
@@ -10,4 +10,4 @@ jobidswc=$(echo $jobids | wc -c)
 
 # run jobby script
 module load python
-if [ "$jobidswc" != "1" ];then python $2 $jobids; fi
\ No newline at end of file
+if [ "$jobidswc" != "1" ];then python $2 $jobids; fi
diff --git a/workflow/scripts/_spooker b/workflow/scripts/_spooker
index 9d2fd8a..3a9a0d1 100644
--- a/workflow/scripts/_spooker
+++ b/workflow/scripts/_spooker
@@ -95,4 +95,4 @@ if [[ $ISBIOWULF == true || $ISFRCE == true ]];then
 
 else # not biowulf or frce ... so exit
     exit 0
-fi
\ No newline at end of file
+fi