diff --git a/Rules/bwa.smk b/Rules/bwa.smk new file mode 100644 index 0000000..6f48145 --- /dev/null +++ b/Rules/bwa.smk @@ -0,0 +1,13 @@ +rule bwa_mem: + input: + fq1 = "02_trimmed/{sample}_val_1.fq.gz", + fq2 = "02_trimmed/{sample}_val_2.fq.gz", + ref_fa=REF_FASTA, + IDX=REF_FASTA+'.bwt' + output: + aligned = "tmp/{sample}.aligned.bam" + threads: 12 + conda: + "../Envs/bwa.yaml" + shell: + "bwa mem -5SP -T0 -t {threads} {input.ref_fa} {input.fq1} {input.fq2} | samtools view -bS - > {output.aligned}" diff --git a/Rules/call_loop.smk b/Rules/call_loop.smk new file mode 100644 index 0000000..60bad47 --- /dev/null +++ b/Rules/call_loop.smk @@ -0,0 +1,8 @@ +rule call_loop: + input: + hic = "06_contact_map/{sample}.contact_map.hic" + output: + loop = "10_called_loop/{sample}.loop.txt" + threads: 16 + shell: + "/home/dguan/.local/bin/mustache -p {threads} -norm KR -f {input.hic} -r 5000 -pt 0.05 -o {output.loop}" diff --git a/Rules/call_tad.smk b/Rules/call_tad.smk new file mode 100644 index 0000000..0bb4112 --- /dev/null +++ b/Rules/call_tad.smk @@ -0,0 +1,12 @@ +rule call_tad: + input: + hic = "06_contact_map/{sample}.hic" + output: + tad = "09_called_TAD/{sample}.{res}/{res}_blocks.bedpe" # directory("09_called_TAD/{sample}.{res}") + threads: 16 + params: + jucier=JUCIER, + res="{res}", + out_dir="09_called_TAD/{sample}.{res}" + shell: + "java -Xmx24G -Djava.awt.headless=true -jar {params.jucier} arrowhead --threads {threads} -k VC -r {params.res} {input.hic} {params.out_dir}" diff --git a/Rules/compart_plot.smk b/Rules/compart_plot.smk new file mode 100644 index 0000000..956d420 --- /dev/null +++ b/Rules/compart_plot.smk @@ -0,0 +1,9 @@ +rule compart_plot: + input: + ab = "08_AB_compart/{sample}.ab", + bed = "08_AB_compart/{sample}.AB_compart.bed" + output: + plot="08_AB_compart/{sample}.ab.chr5.pdf" + threads: 1 + shell: + "/home/dguan/.local/bin/fancplot -o {output.plot} 5 -p square {input.ab} -p line {input.bed}" diff --git a/Rules/compress_pairs.smk b/Rules/compress_pairs.smk new file mode 100644 index 0000000..a23cf38 --- /dev/null +++ b/Rules/compress_pairs.smk @@ -0,0 +1,10 @@ +rule compress_pairs: + input: + pairs = "04_pairs/{sample}.pairs", + output: + pairs_gz = "04_pairs/{sample}.pairs.gz", + threads: 16 + conda: + "../Envs/bgzip.yaml" + shell: + "bgzip -f -l -c {input.pairs} > {output.pairs_gz}" diff --git a/Rules/cont_map.smk b/Rules/cont_map.smk new file mode 100644 index 0000000..31acead --- /dev/null +++ b/Rules/cont_map.smk @@ -0,0 +1,12 @@ +rule cont_map: + input: + pairs = "04_pairs/{sample}.pairs.gz", + chrom_size = CHROM + output: + cont_map = "06_contact_map/{sample}.hic" + threads: 30 + params: + jucier=JUCIER + shell: + "java -Xmx240G -Djava.awt.headless=true -jar {params.jucier} pre -q 30 --threads {threads} {input.pairs} {output.cont_map} {input.chrom_size}" + diff --git a/Rules/cooler_matrix.smk b/Rules/cooler_matrix.smk new file mode 100644 index 0000000..81d6ffc --- /dev/null +++ b/Rules/cooler_matrix.smk @@ -0,0 +1,12 @@ +rule cooler_matrix: + input: + pairs = "04_pairs/{sample}.pairs.gz", + ref_fa = CHROM + output: + matrix = "07_cooler_map/{sample}.matrix.cool" + threads: 16 + conda: + "../Envs/cooler.yaml" + shell: + "cooler cload pairix -p {threads} {input.ref_fa}:5000 {input.pairs} {output.matrix}" + diff --git a/Rules/dedup_sam.smk b/Rules/dedup_sam.smk new file mode 100644 index 0000000..5861943 --- /dev/null +++ b/Rules/dedup_sam.smk @@ -0,0 +1,12 @@ +rule dedup_sam: + input: + sorted = "tmp/{sample}.parsed.sorted.pairsam.gz" + output: + dedup = "03_dedup/{sample}.dedup.pairsam.gz", + stats = "03_dedup/{sample}.dedup.stats" + threads: 8 + conda: + "../Envs/pairtools.yaml" + shell: + "pairtools dedup --nproc-in {threads} --nproc-out {threads} --mark-dups --output-stats {output.stats} --output {output.dedup} {input.sorted}" + diff --git a/Rules/genome_idx.smk b/Rules/genome_idx.smk new file mode 100644 index 0000000..e02591e --- /dev/null +++ b/Rules/genome_idx.smk @@ -0,0 +1,10 @@ +rule genome_idx: + input: + fa = REF_FASTA + output: + bwa_idx = genome_idx + threads: 12 + conda: + "../Envs/bwa.yaml" + shell: + "bwa index {input.fa}; samtools faidx {input.fa}" diff --git a/Rules/get_qc.smk b/Rules/get_qc.smk new file mode 100644 index 0000000..aae4ff5 --- /dev/null +++ b/Rules/get_qc.smk @@ -0,0 +1,10 @@ +rule get_qc: + input: + "03_dedup/{sample}.dedup.stats" + output: + "03_dedup/{sample}.stats.sum.txt" + threads: 1 + conda: + "../Envs/getqc.yaml" + shell: + "python3 /home/dguan/bin/MicroC/get_qc.py -p {input} > {output}" diff --git a/Rules/idx_pairix.smk b/Rules/idx_pairix.smk new file mode 100644 index 0000000..4a9e106 --- /dev/null +++ b/Rules/idx_pairix.smk @@ -0,0 +1,10 @@ +rule idx_pairix: + input: + pairs = "04_pairs/{sample}.pairs.gz", + output: + idx = "04_pairs/{sample}.pairs.gz.px2" + threads: 1 + conda: + "../Envs/pairix.yaml" + shell: + "pairix {input}" diff --git a/Rules/lib_compx.smk b/Rules/lib_compx.smk new file mode 100644 index 0000000..45fed7e --- /dev/null +++ b/Rules/lib_compx.smk @@ -0,0 +1,10 @@ +rule lib_compx: + input: + "05_mapped/{sample}.sorted.bam" + output: + "05_mapped/{sample}.lc_extrap.txt" + threads: 1 + conda: + "../Envs/preseq.yaml" + shell: + "preseq lc_extrap -bam -pe -extrap 2.1e9 -step 1e8 -seg_len 1000000000 -output {output} {input}" diff --git a/Rules/merge_libs.smk b/Rules/merge_libs.smk new file mode 100644 index 0000000..fed9deb --- /dev/null +++ b/Rules/merge_libs.smk @@ -0,0 +1,10 @@ +rule merge_libs: + input: + make_libs + output: + temp("tmp/{sample}.dedup.pairsam.gz") + threads: 8 + conda: + "../Envs/pairtools.yaml" + shell: + "pairtools merge -o {output} --memory 64G --nproc-in {threads} --nproc-out {threads} {input}" diff --git a/Rules/mul_res_matrix.smk b/Rules/mul_res_matrix.smk new file mode 100644 index 0000000..3aa84bc --- /dev/null +++ b/Rules/mul_res_matrix.smk @@ -0,0 +1,10 @@ +rule mul_res_matrix: + input: + matrix = "07_cooler_map/{sample}.matrix.cool" + output: + mres = "07_cooler_map/{sample}.matrix.mcool" + threads: 16 + conda: + "../Envs/cooler.yaml" + shell: + "cooler zoomify --balance -p {threads} {input}" diff --git a/Rules/parse_bam.smk b/Rules/parse_bam.smk new file mode 100644 index 0000000..14f4d54 --- /dev/null +++ b/Rules/parse_bam.smk @@ -0,0 +1,11 @@ +rule parse_bam: + input: + aligned = "tmp/{sample}.aligned.bam", + chrom_size=CHROM + output: + parsed = "tmp/{sample}.parsed.pairsam.gz" + threads: 8 + conda: + "../Envs/pairtools.yaml" + shell: + "pairtools parse --min-mapq 40 --walks-policy 5unique --max-inter-align-gap 30 --nproc-in {threads} --nproc-out {threads} --chroms-path {input.chrom_size} {input.aligned} -o {output.parsed}" diff --git a/Rules/sort_bam.smk b/Rules/sort_bam.smk new file mode 100644 index 0000000..66264c4 --- /dev/null +++ b/Rules/sort_bam.smk @@ -0,0 +1,10 @@ +rule sort_bam: + input: + split = "tmp/{sample}.unsorted.bam" + output: + sorted = "05_mapped/{sample}.sorted.bam" + threads: 8 + conda: + "../Envs/samtools.yaml" + shell: + "samtools sort -@ {threads} -o {output.sorted} {input.split}; samtools index {output.sorted}" diff --git a/Rules/sort_parse.smk b/Rules/sort_parse.smk new file mode 100644 index 0000000..2e407fd --- /dev/null +++ b/Rules/sort_parse.smk @@ -0,0 +1,13 @@ +rule sort_parse: + input: + parsed = "tmp/{sample}.parsed.pairsam.gz", + ref_fa=REF_FASTA + output: + sorted = "tmp/{sample}.parsed.sorted.pairsam.gz" + threads: 16 + params: + tmp=TEMP + conda: + "../Envs/pairtools.yaml" + shell: + "pairtools sort --nproc {threads} --tmpdir={params.tmp} {input.parsed} -o {output.sorted}" diff --git a/Rules/split_sam.smk b/Rules/split_sam.smk new file mode 100644 index 0000000..cd37e8f --- /dev/null +++ b/Rules/split_sam.smk @@ -0,0 +1,12 @@ +rule split_sam: + input: + dedup =rules.merge_libs.output + output: + unsorted = temp("tmp/{sample}.unsorted.bam"), + pairs = "04_pairs/{sample}.pairs" + threads: 8 + conda: + "../Envs/pairtools.yaml" + shell: + "pairtools split --nproc-in {threads} --nproc-out {threads} --output-pairs {output.pairs} --output-sam {output.unsorted} {input.dedup}" + diff --git a/Rules/trim_reads.smk b/Rules/trim_reads.smk new file mode 100644 index 0000000..13ca10c --- /dev/null +++ b/Rules/trim_reads.smk @@ -0,0 +1,17 @@ +rule trim_reads: + input: + R1 = "01_raw_fq/{sample}_R1.fq.gz", + R2 = "01_raw_fq/{sample}_R2.fq.gz" + output: + R1 = "02_trimmed/{sample}_val_1.fq.gz", + R2 = "02_trimmed/{sample}_val_2.fq.gz", + RP1 = "02_trimmed/{sample}_R1.fq.gz_trimming_report.txt", + RP2 = "02_trimmed/{sample}_R2.fq.gz_trimming_report.txt" + threads: 16 + params: + base = "{sample}", + outdir = "02_trimmed" + conda: + "../Envs/trimgalore.yaml" + shell: + "trim_galore --paired --cores {threads} --basename {params.base} -o {params.outdir} --gzip {input.R1} {input.R2}"