Skip to content

Commit

Permalink
Adding in reporting on metric oddities (#24)
Browse files Browse the repository at this point in the history
  • Loading branch information
cschu committed Jun 9, 2020
1 parent 1f424a1 commit bbaad27
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 4 deletions.
19 changes: 19 additions & 0 deletions etc/minos_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,22 @@ collapse_metrics_thresholds:
#predicted_gene: "hom_acov_score lt 0.3 & cpc_score lt 0.25"
#hi_confidence: "classification eq 1 | hom_acov_score ge 0.8 | (hom_acov_score ge 0.6 & transcript_score ge 0.4)"
#discard: "protein_score eq 0 & transcript_score eq 0 & hom_acov_score eq 0 & expression_score lt 0.3"

report_metric_oddities:
- "{five_utr_length} >= 10000"
- "{five_utr_num} >= 5"
- "{three_utr_length} >= 10000"
- "{three_utr_num} >= 4"
- "not {is_complete}"
- "not {has_start_codon}"
- "not {has_stop_codon}"
- "{max_exon_length} >= 10000"
- "{max_intron_length} >= 500000"
- "{min_exon_length} <= 5"
- "{min_intron_length} <= 5"
- "{selected_cds_fraction} <= 0.3"
- "{canonical_intron_proportion} != 1"
- "{non_verified_introns_num} >= 1"
- "not {only_non_canonical_splicing}"
- "{proportion_verified_introns} <= 0.5"
- "{suspicious_splicing}"
23 changes: 23 additions & 0 deletions minos/scripts/metric_oddities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import csv
from collections import Counter

# for testing
METRIC_ODDITIES = ['{five_utr_length} >= 10000', '{five_utr_num} >= 5', '{three_utr_length} >= 10000', '{three_utr_num} >= 4', 'not {is_complete}', 'not {has_start_codon}', 'not {has_stop_codon}', '{max_exon_length} >= 10000', '{max_intron_length} >= 500000', '{min_exon_length} <= 5', '{min_intron_length} <= 5', '{selected_cds_fraction} <= 0.3', '{canonical_intron_proportion} != 1', '{non_verified_introns_num} >= 1', 'not {only_non_canonical_splicing}', '{proportion_verified_introns} <= 0.5', '{suspicious_splicing}']


class MetricOddityParser:
def __init__(self, metric_file, oddities, gene_filter=None):
self.table = Counter({oddity: 0 for oddity in oddities})
self.metric_file = metric_file
self.gene_filter = gene_filter
def run(self):
for row in csv.DictReader(open(self.metric_file), delimiter="\t"):
if gene_filter is None or row["tid"] in gene_filter:
counts = [oddity for oddity in self.table if eval(oddity.format(**row))]
self.table.update(counts)

for oddity, count in self.table.items():
print(oddity.replace("{", "").replace("}", ""), count, sep="\t")



31 changes: 28 additions & 3 deletions minos/zzz/minos_run.smk
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ localrules:
busco_copy_results,
busco_concat_protein_metrics,
busco_summary,
minos_create_release_metrics
minos_create_release_metrics,
minos_collate_metric_oddities


rule all:
Expand All @@ -164,6 +165,7 @@ rule all:
os.path.join(EXTERNAL_METRICS_DIR, "metrics_info.txt"),
os.path.join(config["outdir"], "MIKADO_SERIALISE_DONE"),
os.path.join(config["outdir"], "mikado.subloci.gff3"),
os.path.join(config["outdir"], "mikado.monoloci.gff3"),
os.path.join(config["outdir"], "mikado.loci.gff3"),
[
os.path.join(config["outdir"], POST_PICK_PREFIX + suffix)
Expand Down Expand Up @@ -586,7 +588,8 @@ rule minos_mikado_pick:
db = rules.minos_mikado_serialise.output[1]
output:
loci = os.path.join(config["outdir"], "mikado.loci.gff3"),
subloci = os.path.join(config["outdir"], "mikado.subloci.gff3")
subloci = os.path.join(config["outdir"], "mikado.subloci.gff3"),
monoloci = os.path.join(config["outdir"], "mikado.monoloci.gff3")
params:
program_call = config["program_calls"]["mikado"].format(container=config["mikado-container"], program="pick"),
program_params = config["params"]["mikado"]["pick"],
Expand All @@ -596,7 +599,11 @@ rule minos_mikado_pick:
resources:
mem_mb = lambda wildcards, attempt: HPC_CONFIG.get_memory("minos_mikado_pick") * attempt
shell:
"{params.program_call} {params.program_params} -od {params.outdir} --procs {threads} --json-conf {input.config} --subloci-out $(basename {output.subloci}) -db {input.db} {input.gtf}"
"{params.program_call} {params.program_params}" + \
" -od {params.outdir} --procs {threads} --json-conf {input.config}" + \
" --subloci-out $(basename {output.subloci})" + \
" --monoloci-out $(basename {output.monoloci})" + \
" -db {input.db} {input.gtf}"

rule minos_parse_mikado_pick:
input:
Expand Down Expand Up @@ -867,6 +874,24 @@ rule minos_generate_final_table:
from minos.scripts.generate_final_table import generate_final_table
generate_final_table(input.seq_table, input.bt_conf_table, input.stats_table, output.final_table, output.summary)

rule minos_collate_metric_oddities:
input:
loci = os.path.join(config["outdir"], "mikado.loci.gff3"),
subloci = os.path.join(config["outdir"], "mikado.subloci.gff3"),
monoloci = os.path.join(config["outdir"], "mikado.monoloci.gff3"),
final_table = rules.minos_generate_final_table.output.final_table
output:
rules.minos_final_sanity_check.output[0] + ".metric_oddities.tsv"
run:
from minos.scripts.metric_oddities import MetricOddityParser
tx2gene = {row[1]: row[0] for row in csv.reader(open(input.final_table), delimiter="\t") if not row[0].startswith("#")}
release_genes = set(tx2gene.values())
with open(output[0], "w") as loci_oddities_out:
MetricOddityParser(input.loci, config["report_metric_oddities"], release_genes).run(stream=loci_oddities_out)




rule split_proteins_prepare:
input:
rules.minos_gffread_extract_sequences.output[0]
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
description = long_description = description.read()

name="minos"
version = "1.5"
version = "1.6"

if sys.version_info.major != 3:
raise EnvironmentError("""minos is a python module that requires python3, and is not compatible with python2. Also, it is now 2020 and support for 2.x has ceased.""")
Expand Down

0 comments on commit bbaad27

Please sign in to comment.