Skip to content

Commit

Permalink
refactor: upload
Browse files Browse the repository at this point in the history
  • Loading branch information
alienzj committed Apr 25, 2020
1 parent 8b7d84c commit 0084079
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 90 deletions.
4 changes: 3 additions & 1 deletion README.org
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ positional arguments:
classify_short_reads_kraken2_all,
classify_hmq_bins_gtdbtk_all, classify_all,
profiling_metaphlan2_all, profiling_jgi_all,
profiling_humann2_all, profiling_all, all
profiling_humann2_all, profiling_all,
upload_sequencing_all, upload_assembly_all,
upload_all, all

arguments:
-h, --help show this help message and exit
Expand Down
8 changes: 3 additions & 5 deletions metapi/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,8 @@ include: "rules/checkm.smk"
include: "rules/dereplicate.smk"
include: "rules/classify.smk"
include: "rules/profiling.smk"
include: "rules/upload.smk"

#include: "rules/coassembly.smk"
#include: "rules/cobinning.smk"
#include: "rules/upload.smk"
#include: "rules/dereplicate.smk"

rule all:
input:
Expand All @@ -103,4 +100,5 @@ rule all:
rules.checkm_all.input,
rules.dereplicate_all.input,
rules.classify_all.input,
rules.profiling_all.input
rules.profiling_all.input,
rules.upload_all.input
31 changes: 17 additions & 14 deletions metapi/cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ localrules:
- profiling_jgi_all
- profiling_humann2_all
- profiling_all
- upload_sequencing_all
- upload_assembly_all
- upload_all


__default__:
Expand Down Expand Up @@ -360,29 +363,29 @@ profiling_humann2_split_straified:
output: "logs/11.{rule}/{rule}.o"
error: "logs/11.{rule}/{rule}.e"

rmhost_md5:
mem: "128M"
output: "logs/12.{rule}/{rule}.{wildcards.sample}.o"
error: "logs/12.{rule}/{rule}.{wildcards.sample}.e"
upload_generate_samples_info:
mem: "512M"
output: "logs/12.{rule}/{rule}.o"
error: "logs/12.{rule}/{rule}.e"

assembly_md5:
upload_md5_short_reads:
mem: "128M"
output: "logs/12.{rule}/{rule}.{wildcards.sample}.o"
error: "logs/12.{rule}/{rule}.{wildcards.sample}.e"

generate_samples_info:
mem: "512M"
output: "logs/12.{rule}/{rule}.o"
error: "logs/12.{rule}/{rule}.e"

generate_run_info:
upload_generate_run_info:
mem: "1G"
cores: 8
output: "logs/12.{rule}/{rule}.o"
error: "logs/12.{rule}/{rule}.e"

generate_assembly_info:
upload_md5_scaftigs:
mem: "128M"
output: "logs/12.{rule}/{rule}.{wildcards.sample}.{wildcards.assembler}.o"
error: "logs/12.{rule}/{rule}.{wildcards.sample}.{wildcards.assembler}.e"

upload_generate_assembly_info:
mem: "1G"
cores: 8
output: "logs/12.{rule}/{rule}.o"
error: "logs/12.{rule}/{rule}.e"
output: "logs/12.{rule}/{rule}.{wildcards.assembler}.o"
error: "logs/12.{rule}/{rule}.{wildcards.assembler}.e"
4 changes: 2 additions & 2 deletions metapi/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,10 @@ output:
classify: "results/09.classify"
dereplicate: "results/10.dereplicate"
profiling: "results/11.profiling"
upload: "results/12.upload"
upload: "results/99.upload"

upload:
do: False
do: True
threads: 8
project_accession: "CNP0000000"

Expand Down
14 changes: 8 additions & 6 deletions metapi/configer.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ class metaconfig:
"logs/11.profiling_humann2_postprocess",
"logs/11.profiling_humann2_join",
"logs/11.profiling_humann2_split_straified",
"logs/12.rmhost_md5",
"logs/12.assembly_md5",
"logs/12.generate_samples_info",
"logs/12.generate_run_info",
"logs/12.generate_assembly_info",
"logs/12.upload_generate_samples_info",
"logs/12.upload_md5_short_reads",
"logs/12.upload_generate_run_info",
"logs/12.upload_md5_scaftigs",
"logs/12.upload_generate_assembly_info",
]

def __init__(self, work_dir):
Expand Down Expand Up @@ -132,7 +132,9 @@ def __str__(self):
metapi denovo_wf --dry_run
metapi denovo_wf --qsub
""" % (self.work_dir)
""" % (
self.work_dir
)

return message

Expand Down
3 changes: 3 additions & 0 deletions metapi/corer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@
"profiling_jgi_all",
"profiling_humann2_all",
"profiling_all",
"upload_sequencing_all",
"upload_assembly_all",
"upload_all",
"all",
]

Expand Down
156 changes: 94 additions & 62 deletions metapi/rules/upload.smk
Original file line number Diff line number Diff line change
@@ -1,63 +1,95 @@
rule rmhost_md5:
input:
expand(os.path.join(config["results"]["rmhost"], "{{sample}}.rmhost{read}.fq.gz"),
read=[".1", ".2"] if IS_PE else "")
output:
os.path.join(config["results"]["rmhost"], "{sample}.rmhost.fq.gz.md5")
params:
rmhost_dir = os.path.join(config["results"]["rmhost"], "")
shell:
'''
md5sum {input} | sed 's#{params.rmhost_dir}##g' > {output}
'''


rule assembly_md5:
input:
os.path.join(config["results"]["assembly"],
"{sample}.{assembler}_out/{sample}.{assembler}.scaftigs.fa.gz"),
output:
os.path.join(config["results"]["assembly"],
"{sample}.{assembler}_out/{sample}.{assembler}.scaftigs.fa.gz.md5")
params:
assembly_dir = os.path.join(config["results"]["assembly"], "{sample}.{assembler}_out/")
shell:
'''
md5sum {input} | sed 's#{params.assembly_dir}##g' > {output}
'''


rule generate_samples_info:
input:
config["params"]["samples"]
output:
os.path.join(config["results"]["upload"], "MIxS_Samples.xlsx")
run:
metapi.gen_samples_info(SAMPLES, output[0], config)


rule generate_run_info:
input:
expand("{rmhost}/{sample}.rmhost.fq.gz.md5",
rmhost=config["results"]["rmhost"],
sample=SAMPLES.index.unique())
output:
os.path.join(config["results"]["upload"], "Experiment_Run.xlsx")
threads:
config["upload"]["threads"]
run:
metapi.gen_info(input, output[0], config, threads, "sequencing_run")


rule generate_assembly_info:
if config["upload"]["do"]:
rule upload_generate_samples_info:
input:
config["params"]["samples"]
output:
os.path.join(config["output"]["upload"], "table/MIxS_Samples.xlsx")
run:
metapi.gen_samples_info(SAMPLES, output[0], config)


rule upload_md5_short_reads:
input:
assembly_input
output:
os.path.join(config["output"]["upload"], "short_reads/{sample}.md5")
shell:
'''
md5sum {input} > {output}
'''


rule upload_generate_run_info:
input:
expand(os.path.join(
config["output"]["upload"], "short_reads/{sample}.md5"),
sample=SAMPLES.index.unique())
output:
os.path.join(config["output"]["upload"], "table/Experiment_Run.xlsx")
threads:
config["upload"]["threads"]
run:
metapi.gen_info(input, output[0], config, threads, "sequencing_run")


rule upload_sequencing_all:
input:
os.path.join(config["output"]["upload"], "table/Experiment_Run.xlsx"),
os.path.join(config["output"]["upload"], "table/MIxS_Samples.xlsx")


if len(ASSEMBLERS) != 0:
rule upload_md5_scaftigs:
input:
os.path.join(
config["output"]["assembly"],
"scaftigs/{sample}.{assembler}.out/{sample}.{assembler}.scaftigs.fa.gz")
output:
os.path.join(
config["output"]["upload"],
"scaftigs/{assembler}/{sample}.{assembler}.scaftigs.md5")
shell:
'''
md5sum {input} > {output}
'''


rule upload_generate_assembly_info:
input:
expand(os.path.join(
config["output"]["upload"],
"scaftigs/{{assembler}}/{sample}.{{assembler}}.scaftigs.md5"),
sample=SAMPLES.index.unique())
output:
os.path.join(config["output"]["upload"],
"table/Genome_Assembly_{assembler}.xlsx")
threads:
config["upload"]["threads"]
run:
metapi.gen_info(input, output[0], config, threads, "assembly")


rule upload_assembly_all:
input:
expand(os.path.join(
config["output"]["upload"],
"table/Genome_Assembly_{assembler}.xlsx"),
assembler=ASSEMBLERS)

else:
rule upload_assembly_all:
input:

else:
rule upload_sequencing_all:
input:


rule upload_assembly_all:
input:


rule upload_all:
input:
expand("{assembly}/{sample}.{assembler}_out/{sample}.{assembler}.scaftigs.fa.gz.md5",
assembly=config["results"]["assembly"],
sample=SAMPLES.index.unique(),
assembler=config["params"]["assembler"])
output:
os.path.join(config["results"]["upload"], "Genome_Assembly.xlsx")
threads:
config["upload"]["threads"]
run:
metapi.gen_info(input, output[0], config, threads, "assembly")
rules.upload_sequencing_all.input,
rules.upload_assembly_all.input
3 changes: 3 additions & 0 deletions metapi/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ def parse_md5(md5_file):
df["sample_name"] = df.apply(
lambda x: os.path.basename(x["file_name"]).split(".")[0], axis=1
)
df["file_name"] = df.apply(
lambda x: os.path.basename(x["file_name"]), axis=1
)
if len(df) == 2:
df_fq1 = df.iloc[0].to_frame().T
df_fq2 = (
Expand Down

0 comments on commit 0084079

Please sign in to comment.