From ac5c1676ca885fb2ef63595ad84d51121ed54d84 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 13 May 2021 17:37:18 +0200
Subject: [PATCH 01/22] plugin version first files
---
Plugins/__init__.py | 14 ++
Plugins/read_fasta.py | 34 +++++
Plugins/read_gff_maker_3UTR.py | 27 ++++
Plugins/read_gff_maker_5UTR.py | 27 ++++
Plugins/read_gff_maker_CDS.py | 50 +++++++
Plugins/read_gff_maker_exon.py | 1 +
Plugins/read_gff_maker_gene.py | 31 ++++
Plugins/read_gff_maker_mRNA.py | 27 ++++
Plugins/read_gff_maker_main.py | 23 +++
Plugins/read_gff_maker_source.py | 1 +
Plugins/read_tab_pannzer_CDS.py | 48 +++++++
Plugins/read_tab_pannzer_gene.py | 28 ++++
core.py | 13 ++
main.py | 236 +++++++++++--------------------
14 files changed, 407 insertions(+), 153 deletions(-)
create mode 100644 Plugins/__init__.py
create mode 100644 Plugins/read_fasta.py
create mode 100644 Plugins/read_gff_maker_3UTR.py
create mode 100644 Plugins/read_gff_maker_5UTR.py
create mode 100644 Plugins/read_gff_maker_CDS.py
create mode 100644 Plugins/read_gff_maker_exon.py
create mode 100644 Plugins/read_gff_maker_gene.py
create mode 100644 Plugins/read_gff_maker_mRNA.py
create mode 100644 Plugins/read_gff_maker_main.py
create mode 100644 Plugins/read_gff_maker_source.py
create mode 100644 Plugins/read_tab_pannzer_CDS.py
create mode 100644 Plugins/read_tab_pannzer_gene.py
create mode 100644 core.py
diff --git a/Plugins/__init__.py b/Plugins/__init__.py
new file mode 100644
index 0000000..6fc65d5
--- /dev/null
+++ b/Plugins/__init__.py
@@ -0,0 +1,14 @@
+#__init.py__
+
+from Plugins.read_fasta import Plugin
+
+from Plugins.read_gff_maker_gene import Plugin
+from Plugins.read_gff_maker_3UTR import Plugin
+from Plugins.read_gff_maker_5UTR import Plugin
+from Plugins.read_gff_maker_CDS import Plugin
+from Plugins.read_gff_maker_exon import Plugin
+from Plugins.read_gff_maker_mRNA import Plugin
+from Plugins.read_gff_maker_source import Plugin
+
+from Plugins.read_tab_pannzer_CDS import Plugin
+from Plugins.read_tab_pannzer_gene import Plugin
\ No newline at end of file
diff --git a/Plugins/read_fasta.py b/Plugins/read_fasta.py
new file mode 100644
index 0000000..a6dade6
--- /dev/null
+++ b/Plugins/read_fasta.py
@@ -0,0 +1,34 @@
+#plugin.py
+
+import importlib
+import itertools
+
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+
+class Plugin:
+
+ def process(self, handle, metadata, calls:list=[], target=None):
+
+ for record in handle:
+
+ #initialize record
+ _record_ = SeqRecord(
+ record.seq,
+ record.id,
+ dbxrefs=["Project:" + metadata["project"]],
+ annotations={"division":metadata["division"],"molecule_type":metadata["molecule_type"],"organism":metadata["organism"],"taxonomy":metadata["taxonomy"],"topology":metadata["topology"]},
+ description=""
+ )
+
+ #calls
+ receiver = []
+ for call,*args in calls:
+ receiver.extend(call.process(*args, target=_record_.id))
+
+ #post output treatment
+ _record_.features = list(itertools.chain(*receiver))
+
+ #outputing
+ with open(f"out/{_record_.id}.dat", "w") as o:
+ print(_record_.format("embl"), file=o)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_3UTR.py b/Plugins/read_gff_maker_3UTR.py
new file mode 100644
index 0000000..dabca35
--- /dev/null
+++ b/Plugins/read_gff_maker_3UTR.py
@@ -0,0 +1,27 @@
+#read_gff_maker_3UTR.py
+
+import pandas as pd
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin:
+
+ def process(self, handle, metadata, calls:list=[], target=None):
+ try:
+ location = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "three_prime_UTR"),:].reset_index())
+
+ _sub_features_ = [
+ SeqFeature(
+ FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
+ type="3'UTR",
+ qualifiers={
+ "gene":target[1],
+ "note":list()})]
+
+ #calls
+ receiver = []
+ for call,*args in calls:
+ receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "3'UTR")))
+
+ return _sub_features_
+ except KeyError:
+ return []
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_5UTR.py b/Plugins/read_gff_maker_5UTR.py
new file mode 100644
index 0000000..5b72281
--- /dev/null
+++ b/Plugins/read_gff_maker_5UTR.py
@@ -0,0 +1,27 @@
+#read_gff_maker_5UTR.py
+
+import pandas as pd
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin:
+
+ def process(self, handle, metadata, calls:list=[], target=None):
+ try:
+ location = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "five_prime_UTR"),:].reset_index())
+
+ _sub_features_ = [
+ SeqFeature(
+ FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
+ type="5'UTR",
+ qualifiers={
+ "gene":target[1],
+ "note":list()})]
+
+ #calls
+ receiver = []
+ for call,*args in calls:
+ receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "5'UTR")))
+
+ return _sub_features_
+ except KeyError:
+ return []
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_CDS.py b/Plugins/read_gff_maker_CDS.py
new file mode 100644
index 0000000..7b29f18
--- /dev/null
+++ b/Plugins/read_gff_maker_CDS.py
@@ -0,0 +1,50 @@
+#read_gff_maker_CDS.py
+
+import pandas as pd
+import itertools
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+def mergeLocations(_locationArray_):
+ return _locationArray_[0] if len(_locationArray_) == 1 else CompoundLocation(_locationArray_)
+
+class Plugin:
+
+ def process(self, handle, metadata, calls:list=[], target=None):
+ locations = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index())
+
+ _sub_features_ = [
+ SeqFeature(
+ mergeLocations(locations.apply(lambda location: FeatureLocation(int(location[0]), int(location[1]), (1,-1)[location[2] == "-"]), axis=1)),
+ type="CDS",
+ qualifiers={
+ "gene":target[1],
+ "product":list(),
+ "note":list(),
+ "db_xref":list(),
+ "translation":list(),
+ "transl_table":metadata["transl_table"]})]
+
+ #calls
+ receiver = []
+ for call,*args in calls:
+ receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "CDS")))
+
+ annotations = list(itertools.chain(receiver))
+ for annotation in annotations:
+ if "product" in annotation.keys() and annotation["product"] != []:
+ for sub_feature in _sub_features_:
+ sub_feature.qualifiers["product"].extend(annotation["product"])
+
+ if "note" in annotation.keys() and annotation["note"] != []:
+ for sub_feature in _sub_features_:
+ sub_feature.qualifiers["note"].extend(annotation["note"])
+
+ if "db_xref" in annotation.keys() and annotation["db_xref"] != []:
+ for sub_feature in _sub_features_:
+ sub_feature.qualifiers["db_xref"].extend(annotation["db_xref"])
+
+ if "translation" in annotation.keys() and annotation["translation"] != []:
+ for sub_feature in _sub_features_:
+ sub_feature.qualifiers["translation"].extend(annotation["translation"])
+
+ return _sub_features_
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_exon.py b/Plugins/read_gff_maker_exon.py
new file mode 100644
index 0000000..711c5ca
--- /dev/null
+++ b/Plugins/read_gff_maker_exon.py
@@ -0,0 +1 @@
+#read_gff_maker_exon.py
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_gene.py b/Plugins/read_gff_maker_gene.py
new file mode 100644
index 0000000..069b66b
--- /dev/null
+++ b/Plugins/read_gff_maker_gene.py
@@ -0,0 +1,31 @@
+#read_gff_maker_gene.py
+
+import pandas as pd
+import itertools
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin:
+
+ def process(self, handle, metadata, calls:list=[], target=None):
+ location = (handle.loc[(target[0], target[1], "gene"),:].reset_index())
+
+ _sub_features_ = [
+ SeqFeature(
+ FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
+ type="gene",
+ qualifiers={
+ "gene":target[1],
+ "note":list()})]
+
+ #calls
+ receiver = []
+ for call,*args in calls:
+ receiver.extend(call.process(*args, target=(target[0], target[1], "gene")))
+
+ annotations = list(itertools.chain(receiver))
+ for annotation in annotations:
+ if "note" in annotation.keys() and annotation["note"] != []:
+ for sub_feature in _sub_features_:
+ sub_feature.qualifiers["note"].extend(annotation["note"])
+
+ return _sub_features_
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_mRNA.py b/Plugins/read_gff_maker_mRNA.py
new file mode 100644
index 0000000..7639664
--- /dev/null
+++ b/Plugins/read_gff_maker_mRNA.py
@@ -0,0 +1,27 @@
+#read_gff_maker_mRNA.py
+
+import pandas as pd
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+def mergeLocations(_locationArray_):
+ return _locationArray_[0] if len(_locationArray_) == 1 else CompoundLocation(_locationArray_)
+
+class Plugin:
+
+ def process(self, handle, metadata, calls:list=[], target=None):
+ locations = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index())
+
+ #initialise
+ _sub_features_ = [
+ SeqFeature(
+ mergeLocations(locations.apply(lambda location: FeatureLocation(int(location[0]), int(location[1]), (1,-1)[location[2] == "-"]), axis=1)),
+ type="mRNA",
+ qualifiers={
+ "gene":target[1]})]
+
+ #calls
+ receiver = []
+ for call,*args in calls:
+ receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "mRNA")))
+
+ return _sub_features_
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_main.py b/Plugins/read_gff_maker_main.py
new file mode 100644
index 0000000..2043ade
--- /dev/null
+++ b/Plugins/read_gff_maker_main.py
@@ -0,0 +1,23 @@
+#read_gff_maker_main.py
+
+import pandas as pd
+import re
+
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin:
+
+ def process(self, handle, metadata, calls:list=[], target=None):
+
+ for gene in handle.loc[(target, slice(None), "gene"),:].reset_index()["sub_seq_id"]:
+
+ #initialize features
+ _features_subset_ = []
+
+ #calls
+ receiver = []
+ for call,*args in calls:
+ receiver.extend(call.process(*args, target=(target, gene)))
+
+ _features_subset_ = receiver
+ yield _features_subset_
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_source.py b/Plugins/read_gff_maker_source.py
new file mode 100644
index 0000000..f8c0bce
--- /dev/null
+++ b/Plugins/read_gff_maker_source.py
@@ -0,0 +1 @@
+#read_gff_maker_source.py
\ No newline at end of file
diff --git a/Plugins/read_tab_pannzer_CDS.py b/Plugins/read_tab_pannzer_CDS.py
new file mode 100644
index 0000000..7b0fd2e
--- /dev/null
+++ b/Plugins/read_tab_pannzer_CDS.py
@@ -0,0 +1,48 @@
+#read_tab_pannzer_CDS
+
+import pandas as pd
+
+class Plugin:
+
+ def process(self, handle, metadata, calls:list=[], target=None):
+
+ #initialisation
+ try:
+ anno_bp = handle.loc[(target[1], "BP_ARGOT"),:].reset_index()["id"]
+ except KeyError:
+ anno_bp = pd.Series([])
+
+ try:
+ anno_cc = handle.loc[(target[1], "CC_ARGOT"),:].reset_index()["id"]
+ except KeyError:
+ anno_cc = pd.Series([])
+
+ try:
+ anno_mf = handle.loc[(target[1], "MF_ARGOT"),:].reset_index()["id"]
+ except KeyError:
+ anno_mf = pd.Series([])
+
+ try:
+ anno_qsec = [handle.loc[(target[1], "qseq"),:].reset_index().iloc[0,1]]
+ except KeyError:
+ anno_qsec = list()
+
+ try:
+ anno_de = [handle.loc[(target[1], "DE"),:].reset_index().iloc[0,1]]
+ except KeyError:
+ anno_de = list()
+
+ _annotations_ = [{
+ "db_xref":[f"GO:{str(go)}" for go in pd.concat([anno_bp, anno_cc, anno_mf])],
+ "translation": anno_qsec,
+ "product": anno_de
+ }]
+
+
+ #calls
+ receiver = []
+ for call,*args in calls:
+ receiver.extend(call.process(*args, target=target))
+
+ #output
+ return _annotations_
\ No newline at end of file
diff --git a/Plugins/read_tab_pannzer_gene.py b/Plugins/read_tab_pannzer_gene.py
new file mode 100644
index 0000000..521797e
--- /dev/null
+++ b/Plugins/read_tab_pannzer_gene.py
@@ -0,0 +1,28 @@
+#read_tab_pannzer_gene.py
+
+#read_tab_pannzer_CDS
+
+import pandas as pd
+
+class Plugin:
+
+ def process(self, handle, metadata, calls:list=[], target=None):
+
+ #initialisation
+ try:
+ anno_de = handle.loc[(target[1], "DE"),:].reset_index().iloc[0,1]
+ except KeyError:
+ anno_de = []
+
+ _annotations_ = [{
+ "note": anno_de
+ }]
+
+
+ #calls
+ receiver = []
+ for call,*args in calls:
+ receiver.extend(call.process(*args, target=target))
+
+ #output
+ return _annotations_
\ No newline at end of file
diff --git a/core.py b/core.py
new file mode 100644
index 0000000..03dfa18
--- /dev/null
+++ b/core.py
@@ -0,0 +1,13 @@
+#core.py
+
+import importlib
+
+class app:
+ def __init__(self, plugins:list=[]):
+ assert plugins != [], "No plugins specified"
+
+ self.plugins = plugins
+
+ def run(self):
+ for plugin,*args in self.plugins:
+ plugin.process(*args)
\ No newline at end of file
diff --git a/main.py b/main.py
index 4e941a0..479331e 100644
--- a/main.py
+++ b/main.py
@@ -1,153 +1,83 @@
-#!/usr/bin/python3
-# -*-coding:utf8
-
-from Bio import SeqIO
-from Bio.Seq import Seq
-from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
-from Bio.SeqRecord import SeqRecord
-from joblib import Parallel, delayed
-from tqdm import tqdm
-import multiprocessing
-import os
-import pandas as pd
-import re
-import sys
-
-def read_anno(file):
- anno = pd.read_csv(file, sep="\t")
- anno = anno.sort_values(by=["qpid", "type"]).drop(["score", "PPV"], axis=1).set_index(["qpid", "type", "id", "desc"])
- return anno
-
-def read_gff(file):
- gff = pd.read_csv(file, sep="\t")
- gff = gff.reset_index()
- gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
- gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
- gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
- gff = gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"])
- gff["start"].apply(lambda x: x-1)
- return gff
-
-def read_config(file):
- config = {"description":None, "division":None, "molecule_type":None, "organism":None, "project":None, "taxonomy":None, "topology":None, "transl_table":None}
- for line in [line.rsplit("\n")[0] for line in file.readlines()]:
- config[re.split(r":", line)[0].lower()]=re.split(r":", line)[1]
- return config
-
-def mergeLocations(_locationArray_):
- return _locationArray_[0] if len(_locationArray_) == 1 else CompoundLocation(_locationArray_)
-
-def init_features(sec, gene, gff):
- ft_table = gff.loc[(sec, gene),:].reset_index()
- ft_table_mRNA = ft_table[ft_table["ft_type"] == "mRNA"].iloc[0,:]
- ft_table_CDSs = [x for i, x in ft_table[ft_table["ft_type"] == "CDS"].iterrows()]
- tmp_ft_table_3UTR = ft_table[ft_table["ft_type"] == "three_prime_UTR"]
- ft_table_3UTR = pd.Series(dtype="float64") if tmp_ft_table_3UTR.empty else tmp_ft_table_3UTR.iloc[0,:]
- tmp_ft_table_5UTR = ft_table[ft_table["ft_type"] == "five_prime_UTR"]
- ft_table_5UTR = pd.Series(dtype="float64") if tmp_ft_table_5UTR.empty else tmp_ft_table_5UTR.iloc[0,:]
-
- return {
- "location":FeatureLocation(int(ft_table_mRNA[1]),int(ft_table_mRNA[2]),(1,-1)[ft_table_mRNA[3] == "-"]),
- "qualifiers":{"gene":gene,"note":list()},
- "type":"gene"
- },{
- "location":mergeLocations([FeatureLocation(int(CDS[1]),int(CDS[2]),(1,-1)[CDS[3] == "-"]) for CDS in ft_table_CDSs]),
- "qualifiers":{"gene":gene},
- "type":"mRNA"
- },{
- "location":mergeLocations([FeatureLocation(int(CDS[1]),int(CDS[2]),(1,-1)[CDS[3] == "-"]) for CDS in ft_table_CDSs]),
- "qualifiers":{"gene":gene,"product":list(),"note":list(),"db_xref":list(),"translation":list(),"transl_table":11},
- "type":"CDS"
- },{
- "location": None if ft_table_3UTR.empty else FeatureLocation(int(ft_table_3UTR[1]), int(ft_table_3UTR[2]), (1,-1)[ft_table_3UTR[3] == "-"]),
- "qualifiers":{"gene":gene},
- "type":"3'UTR"
- },{
- "location":None if ft_table_5UTR.empty else FeatureLocation(int(ft_table_5UTR[1]), int(ft_table_5UTR[2]), (1,-1)[ft_table_5UTR[3] == "-"]),
- "qualifiers":{"gene":gene},
- "type":"5'UTR"}
-
-def merge(record, anno, gff, conf, out_dir):
- sec = record.id
-
- #Record initialisation
- _record_ = SeqRecord(
- record.seq,
- id=sec,
- dbxrefs=["Project:" + conf["project"]],
- annotations={"division":conf["division"],"molecule_type":conf["molecule_type"],"organism":conf["organism"],"taxonomy":conf["taxonomy"],"topology":conf["topology"]},
- description=conf["description"])
-
- #Source feature
- ft_table = gff.loc[(sec, slice(None), "contig"), :].reset_index()
- _source_ = SeqFeature(FeatureLocation(int(ft_table.iloc[0,3]),int(ft_table.iloc[0,4]),(1,-1)[ft_table.iloc[0,5] == "-"]),type="source",qualifiers={"organism":"test","mol_type":"genomic DNA","db_xref":list()})
- _record_.features.append(_source_)
-
- #GENE/MRNA/CDS/3UTR/5UTR features
- for gene in gff.loc[(sec, slice(None), "gene"),:].reset_index()["sub_seq_id"].apply(lambda x: x+"-mRNA-1"):
- _gene_, _mRNA_, _CDS_, _3UTR_, _5UTR_ = init_features(sec, gene, gff)
-
- try:
- anno_table = anno.loc[(gene),:].reset_index()
- anno_bp = anno.loc[(gene, "BP_ARGOT"),:].reset_index()["id"]
- anno_cc = anno.loc[(gene, "CC_ARGOT"),:].reset_index()["id"]
- anno_mf = anno.loc[(gene, "MF_ARGOT"),:].reset_index()["id"]
- _CDS_["qualifiers"]["db_xref"] = ["GO:" + str(go) for go in pd.concat([anno_bp, anno_cc, anno_mf])]
- _CDS_["qualifiers"]["translation"] = anno.loc[(gene, "qseq"),:].reset_index().iloc[0,1]
- _CDS_["qualifiers"]["transl_table"] = conf["transl_table"]
- _gene_["qualifiers"]["note"] = _CDS_["qualifiers"]["product"] = anno.loc[(gene, "DE"),:].reset_index().iloc[0,1]
- except KeyError:
- pass
-
- for feature in [_gene_, _mRNA_, _CDS_, _3UTR_, _5UTR_]:
- if feature["location"]:
- _record_.features.append(SeqFeature(feature["location"], type=feature["type"], qualifiers=feature["qualifiers"]))
-
- #Exon feature
- ft_table = gff.loc[sec, slice(None), "exon"].reset_index()
- _record_.features.extend([SeqFeature(FeatureLocation(int(exon["start"]), int(exon["stop"]), (1,-1)[exon["strand"]=="-"]), type="exon", qualifiers={}) for i,exon in ft_table.iterrows()])
-
- #Print EMBL entry in output folder
- with open(out_dir + "/" + sec + ".dat", "w") as file:
- print(_record_.format("embl"), file=file)
- file.close()
-
-if __name__ == "__main__":
-
- GFF_FILE = FASTA_FILE = ANNO_FILE = CONF_FILE = None
- OUT_DIR = "out"
-
- args = sys.argv[1:]
- for i in [0,2,4,6,8]:
- if args[i] in ["-gff", "-g"]: GFF_FILE = args[i+1]
- elif args[i] in ["-fasta", "-f"]: FASTA_FILE = args[i+1]
- elif args[i] in ["-anno", "-a"]: ANNO_FILE = args[i+1]
- elif args[i] in ["-conf", "-c"]: CONF_FILE = args[i+1]
- elif args[i] in ["-out", "-o"]: OUT_DIR = args[i+1]
-
- if not os.path.exists(OUT_DIR):
- os.makedirs(OUT_DIR)
-
- CONF = None
- with open(CONF_FILE) as conf_file:
- CONF = read_config(conf_file)
- conf_file.close()
- print("Configuration file reading : DONE!")
-
- ANNO = None
- with open(ANNO_FILE) as anno_file:
- ANNO = read_anno(anno_file)
- anno_file.close()
- print("Annotation file reading : DONE!")
-
- GFF = None
- with open(GFF_FILE) as gff_file:
- GFF = read_gff(gff_file)
- gff_file.close()
- print("Prediction file reading : DONE!")
-
- with open(FASTA_FILE) as fasta_file:
- num_cores = multiprocessing.cpu_count()
- records = list(SeqIO.parse(fasta_file, "fasta"))
- processed_list = Parallel(n_jobs=num_cores)(delayed(merge)(record, ANNO, GFF, CONF, OUT_DIR) for record in tqdm(records))
- fasta_file.close()
\ No newline at end of file
+#main.py
+
+from core import app
+
+from Bio import SeqIO
+import pandas as pd
+import re
+import importlib
+
+def fasta2handle(file_path):
+ with open(file_path) as handle:
+ return list(SeqIO.parse(handle, "fasta"))
+
+def gff_maker2handle(file_path):
+ with open(file_path) as handle:
+ gff = pd.read_csv(handle, sep="\t")
+ gff = gff.reset_index()
+ gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
+ gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
+ gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
+ return gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"])
+
+def tab_pannzer2handle(file_path):
+ with open(file_path) as handle:
+ anno = pd.read_csv(handle, sep="\t")
+ anno = anno.sort_values(by=["qpid", "type"]).drop(["score", "PPV"], axis=1).set_index(["qpid", "type", "id", "desc"])
+ return anno
+
+if __name__ == "__main__":
+
+ _GLOBALS_ = {
+ "handles":dict(),
+ "plugins":dict(),
+ "metadata":dict()
+ }
+
+ _GLOBALS_["handles"]["fasta"] = fasta2handle("files/sequences.fasta")
+ _GLOBALS_["handles"]["gff_maker"] = gff_maker2handle("files/data.gff")
+ _GLOBALS_["handles"]["tab_panzer"] = tab_pannzer2handle("files/anno.out")
+
+ _GLOBALS_["plugins"]["read_fasta"] = importlib.import_module(".read_fasta","Plugins").Plugin()
+
+ _GLOBALS_["plugins"]["read_gff_maker_3UTR"] = importlib.import_module(".read_gff_maker_3UTR","Plugins").Plugin()
+ _GLOBALS_["plugins"]["read_gff_maker_5UTR"] = importlib.import_module(".read_gff_maker_5UTR","Plugins").Plugin()
+ _GLOBALS_["plugins"]["read_gff_maker_CDS"] = importlib.import_module(".read_gff_maker_CDS","Plugins").Plugin()
+ _GLOBALS_["plugins"]["read_gff_maker_exon"] = importlib.import_module(".read_gff_maker_exon","Plugins").Plugin()
+ _GLOBALS_["plugins"]["read_gff_maker_gene"] = importlib.import_module(".read_gff_maker_gene","Plugins").Plugin()
+ _GLOBALS_["plugins"]["read_gff_maker_main"] = importlib.import_module(".read_gff_maker_main","Plugins").Plugin()
+ _GLOBALS_["plugins"]["read_gff_maker_mRNA"] = importlib.import_module(".read_gff_maker_mRNA","Plugins").Plugin()
+ _GLOBALS_["plugins"]["read_gff_maker_source"] = importlib.import_module(".read_gff_maker_source","Plugins").Plugin()
+
+ _GLOBALS_["plugins"]["read_tab_pannzer_CDS"] = importlib.import_module(".read_tab_pannzer_CDS","Plugins").Plugin()
+ _GLOBALS_["plugins"]["read_tab_pannzer_gene"] = importlib.import_module(".read_tab_pannzer_gene","Plugins").Plugin()
+
+
+ _GLOBALS_["metadata"]["project"] = "temp"
+ _GLOBALS_["metadata"]["division"] = "INV"
+ _GLOBALS_["metadata"]["taxonomy"] = "29031"
+ _GLOBALS_["metadata"]["organism"] = "Phlebotomus papatasi"
+ _GLOBALS_["metadata"]["molecule_type"] = "genomic DNA"
+ _GLOBALS_["metadata"]["topology"] = "linear"
+ _GLOBALS_["metadata"]["description"] = "description"
+ _GLOBALS_["metadata"]["transl_table"] = 0
+
+ app = app(
+ [
+ (_GLOBALS_["plugins"]["read_fasta"], _GLOBALS_["handles"]["fasta"], _GLOBALS_["metadata"], [
+ (_GLOBALS_["plugins"]["read_gff_maker_main"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
+ (_GLOBALS_["plugins"]["read_gff_maker_source"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"],[])
+ (_GLOBALS_["plugins"]["read_gff_maker_gene"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
+ (_GLOBALS_["plugins"]["read_tab_pannzer_gene"], _GLOBALS_["handles"]["tab_panzer"], _GLOBALS_["metadata"], [])
+ ]),
+ (_GLOBALS_["plugins"]["read_gff_maker_mRNA"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], []),
+ (_GLOBALS_["plugins"]["read_gff_maker_CDS"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
+ (_GLOBALS_["plugins"]["read_tab_pannzer_CDS"], _GLOBALS_["handles"]["tab_panzer"], _GLOBALS_["metadata"], [])
+ ]),
+ (_GLOBALS_["plugins"]["read_gff_maker_3UTR"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], []),
+ (_GLOBALS_["plugins"]["read_gff_maker_5UTR"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [])
+ ]),
+ (_GLOBALS_["plugins"]["read_gff_maker_exon"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [])
+ ])
+ ])
+ app.run()
\ No newline at end of file
From a3ad89bf3fd545db62f60b7bcf25f1fd15c76094 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 08:55:22 +0200
Subject: [PATCH 02/22] Delete read_gff_maker_source.py
removed this plugin as it is contained in the read_gff_maker_main plugin.
---
Plugins/read_gff_maker_source.py | 1 -
1 file changed, 1 deletion(-)
delete mode 100644 Plugins/read_gff_maker_source.py
diff --git a/Plugins/read_gff_maker_source.py b/Plugins/read_gff_maker_source.py
deleted file mode 100644
index f8c0bce..0000000
--- a/Plugins/read_gff_maker_source.py
+++ /dev/null
@@ -1 +0,0 @@
-#read_gff_maker_source.py
\ No newline at end of file
From 128eb82a0a81c3cd0a7b205f4362624248b23a1d Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 08:57:22 +0200
Subject: [PATCH 03/22] upload refering to read_gff_maker_source removal
---
main.py | 2 --
1 file changed, 2 deletions(-)
diff --git a/main.py b/main.py
index 479331e..59cd318 100644
--- a/main.py
+++ b/main.py
@@ -47,7 +47,6 @@ def tab_pannzer2handle(file_path):
_GLOBALS_["plugins"]["read_gff_maker_gene"] = importlib.import_module(".read_gff_maker_gene","Plugins").Plugin()
_GLOBALS_["plugins"]["read_gff_maker_main"] = importlib.import_module(".read_gff_maker_main","Plugins").Plugin()
_GLOBALS_["plugins"]["read_gff_maker_mRNA"] = importlib.import_module(".read_gff_maker_mRNA","Plugins").Plugin()
- _GLOBALS_["plugins"]["read_gff_maker_source"] = importlib.import_module(".read_gff_maker_source","Plugins").Plugin()
_GLOBALS_["plugins"]["read_tab_pannzer_CDS"] = importlib.import_module(".read_tab_pannzer_CDS","Plugins").Plugin()
_GLOBALS_["plugins"]["read_tab_pannzer_gene"] = importlib.import_module(".read_tab_pannzer_gene","Plugins").Plugin()
@@ -66,7 +65,6 @@ def tab_pannzer2handle(file_path):
[
(_GLOBALS_["plugins"]["read_fasta"], _GLOBALS_["handles"]["fasta"], _GLOBALS_["metadata"], [
(_GLOBALS_["plugins"]["read_gff_maker_main"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
- (_GLOBALS_["plugins"]["read_gff_maker_source"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"],[])
(_GLOBALS_["plugins"]["read_gff_maker_gene"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
(_GLOBALS_["plugins"]["read_tab_pannzer_gene"], _GLOBALS_["handles"]["tab_panzer"], _GLOBALS_["metadata"], [])
]),
From 6ce0ed2a658cc569fdb1c7b8d172426efa1ac610 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 08:59:18 +0200
Subject: [PATCH 04/22] upload refering to read_gff_maker_source removal
---
Plugins/__init__.py | 1 -
Plugins/read_gff_maker_main.py | 12 ++++++++++++
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/Plugins/__init__.py b/Plugins/__init__.py
index 6fc65d5..8937431 100644
--- a/Plugins/__init__.py
+++ b/Plugins/__init__.py
@@ -8,7 +8,6 @@
from Plugins.read_gff_maker_CDS import Plugin
from Plugins.read_gff_maker_exon import Plugin
from Plugins.read_gff_maker_mRNA import Plugin
-from Plugins.read_gff_maker_source import Plugin
from Plugins.read_tab_pannzer_CDS import Plugin
from Plugins.read_tab_pannzer_gene import Plugin
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_main.py b/Plugins/read_gff_maker_main.py
index 2043ade..eac0f9c 100644
--- a/Plugins/read_gff_maker_main.py
+++ b/Plugins/read_gff_maker_main.py
@@ -9,6 +9,18 @@ class Plugin:
def process(self, handle, metadata, calls:list=[], target=None):
+ location = (handle.loc[(target, slice(None), "contig"),:].reset_index())
+ _feature_ = [
+ SeqFeature(
+ FeatureLocation(int(location.iloc[0,3]), int(location.iloc[0,4]), (1,-1)[location.iloc[0,5] == "-"]),
+ type="source",
+ qualifiers={
+ "oganism":metadata["organism"],
+ "mol_type":metadata["molecule_type"],
+ "db_xref":list()})]
+
+ yield _feature_
+
for gene in handle.loc[(target, slice(None), "gene"),:].reset_index()["sub_seq_id"]:
#initialize features
From c0f313761cd6ca2e69ec9a5ed41052c1d44e3a73 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 09:00:02 +0200
Subject: [PATCH 05/22] read_gff_maker_exon set up
---
Plugins/read_gff_maker_exon.py | 29 ++++++++++++++++++++++++++++-
1 file changed, 28 insertions(+), 1 deletion(-)
diff --git a/Plugins/read_gff_maker_exon.py b/Plugins/read_gff_maker_exon.py
index 711c5ca..e4b84ef 100644
--- a/Plugins/read_gff_maker_exon.py
+++ b/Plugins/read_gff_maker_exon.py
@@ -1 +1,28 @@
-#read_gff_maker_exon.py
\ No newline at end of file
+#read_gff_maker_exon.py
+
+import pandas as pd
+import re
+
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+
+class Plugin:
+
+ def process(self, handle, metadata, calls:list=[], target=None):
+ exons = handle.loc[(target, slice(None), "exon"),:].reset_index()
+
+ for index, exon in exons.iterrows():
+ _features_subset_ = [
+ SeqFeature(
+ FeatureLocation(int(exon["start"]), int(exon["stop"]), (1,-1)[exon["strand"] == "-"]),
+ type="exon",
+ qualifiers={}
+ )]
+
+ #calls
+ receiver = []
+ for call, *args in calls:
+ receiver.extend(call.process(*args, target=(target)))
+
+
+ yield _features_subset_
From 0ec1b6c891cbb0eefc3cbfe375b1d515b3386af2 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 14:04:56 +0200
Subject: [PATCH 06/22] Create config.info (example file)
---
files/config.info | 49 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)
create mode 100644 files/config.info
diff --git a/files/config.info b/files/config.info
new file mode 100644
index 0000000..1aa75e3
--- /dev/null
+++ b/files/config.info
@@ -0,0 +1,49 @@
+# Axel Giottonini
+# test config.info
+# 14.05.2021
+
+
+project:temp
+division:inv
+taxonomy:29031
+organism:Phlebotomus papatasi
+molecule_type:genomic DNA
+topology:linear
+description:empty
+transl_table:0
+
+
+
+fasta2handle:.fasta2handle,Plugins
+gff_maker2handle:.gff_maker2handle,Plugins
+tab_pannzer2handle:.tab_pannzer2handle,Plugins
+read_fasta:.read_fasta,Plugins
+read_gff_maker_3UTR:.read_gff_maker_3UTR,Plugins
+read_gff_maker_5UTR:.read_gff_maker_5UTR,Plugins
+read_gff_maker_CDS:.read_gff_maker_CDS,Plugins
+read_gff_maker_exon:.read_gff_maker_exon,Plugins
+read_gff_maker_gene:.read_gff_maker_gene,Plugins
+read_gff_maker_main:.read_gff_maker_main,Plugins
+read_gff_maker_mRNA:.read_gff_maker_mRNA,Plugins
+read_tab_pannzer_CDS:.read_tab_pannzer_CDS,Plugins
+read_tab_pannzer_gene:.read_tab_pannzer_gene,Plugins
+
+
+
+fasta:fasta2handle,files/sequences.fasta
+gff_maker:gff_maker2handle,files/data.gff
+tab_pannzer:tab_pannzer2handle,files/anno.out
+
+
+
+-read_fasta,fasta
+--read_gff_maker_main,gff_maker
+---read_gff_maker_gene,gff_maker
+----read_tab_pannzer_gene,tab_pannzer
+---read_gff_maker_mRNA,gff_maker
+---read_gff_maker_CDS,gff_maker
+----read_tab_pannzer_CDS,tab_pannzer
+---read_gff_maker_3UTR,gff_maker
+---read_gff_maker_5UTR,gff_maker
+--read_gff_maker_exon,gff_maker
+
From e17f02fe47705552fbdae9b16bb6b03291509e6b Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 14:06:07 +0200
Subject: [PATCH 07/22] Create plugins for handling files
---
Plugins/__init__.py | 4 ++++
Plugins/fasta2handle.py | 9 +++++++++
Plugins/gff_maker2handle.py | 16 ++++++++++++++++
Plugins/tab_pannzer2handle.py | 11 +++++++++++
4 files changed, 40 insertions(+)
create mode 100644 Plugins/fasta2handle.py
create mode 100644 Plugins/gff_maker2handle.py
create mode 100644 Plugins/tab_pannzer2handle.py
diff --git a/Plugins/__init__.py b/Plugins/__init__.py
index 8937431..583ac6d 100644
--- a/Plugins/__init__.py
+++ b/Plugins/__init__.py
@@ -1,5 +1,9 @@
#__init.py__
+from Plugins.fasta2handle import Plugin
+from Plugins.gff_maker2handle import Plugin
+from Plugins.tab_pannzer2handle import Plugin
+
from Plugins.read_fasta import Plugin
from Plugins.read_gff_maker_gene import Plugin
diff --git a/Plugins/fasta2handle.py b/Plugins/fasta2handle.py
new file mode 100644
index 0000000..b9e319b
--- /dev/null
+++ b/Plugins/fasta2handle.py
@@ -0,0 +1,9 @@
+#fasta2handle.py
+
+from Bio import SeqIO
+
+class Plugin:
+
+ def process(self, file_path):
+ with open(file_path) as handle:
+ return list(SeqIO.parse(handle, "fasta"))
\ No newline at end of file
diff --git a/Plugins/gff_maker2handle.py b/Plugins/gff_maker2handle.py
new file mode 100644
index 0000000..39124c2
--- /dev/null
+++ b/Plugins/gff_maker2handle.py
@@ -0,0 +1,16 @@
+#gff_maker2handle.py
+
+import pandas as pd
+import re
+
+class Plugin:
+
+ def process(self, file_path):
+ with open(file_path) as handle:
+ gff = pd.read_csv(handle, sep="\t")
+ gff = gff.reset_index()
+ gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
+ gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
+ gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
+ return gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"])
+
\ No newline at end of file
diff --git a/Plugins/tab_pannzer2handle.py b/Plugins/tab_pannzer2handle.py
new file mode 100644
index 0000000..ac7c532
--- /dev/null
+++ b/Plugins/tab_pannzer2handle.py
@@ -0,0 +1,11 @@
+#tab_pannzer2handle.py
+
+import pandas as pd
+
+class Plugin:
+
+ def process(self, file_path):
+ with open(file_path) as handle:
+ anno = pd.read_csv(handle, sep="\t")
+ anno = anno.sort_values(by=["qpid", "type"]).drop(["score", "PPV"], axis=1).set_index(["qpid", "type", "id", "desc"])
+ return anno
\ No newline at end of file
From d5a80b94f85394f0f4d6ce0d157a66d1f7338fe5 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 14:07:02 +0200
Subject: [PATCH 08/22] Upload main to work with the new plugin (2handle) and
config file
---
main.py | 111 +++++++++++++++++++++++++++-----------------------------
1 file changed, 54 insertions(+), 57 deletions(-)
diff --git a/main.py b/main.py
index 59cd318..4d97e6f 100644
--- a/main.py
+++ b/main.py
@@ -7,25 +7,6 @@
import re
import importlib
-def fasta2handle(file_path):
- with open(file_path) as handle:
- return list(SeqIO.parse(handle, "fasta"))
-
-def gff_maker2handle(file_path):
- with open(file_path) as handle:
- gff = pd.read_csv(handle, sep="\t")
- gff = gff.reset_index()
- gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
- gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
- gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
- return gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"])
-
-def tab_pannzer2handle(file_path):
- with open(file_path) as handle:
- anno = pd.read_csv(handle, sep="\t")
- anno = anno.sort_values(by=["qpid", "type"]).drop(["score", "PPV"], axis=1).set_index(["qpid", "type", "id", "desc"])
- return anno
-
if __name__ == "__main__":
_GLOBALS_ = {
@@ -34,48 +15,64 @@ def tab_pannzer2handle(file_path):
"metadata":dict()
}
- _GLOBALS_["handles"]["fasta"] = fasta2handle("files/sequences.fasta")
- _GLOBALS_["handles"]["gff_maker"] = gff_maker2handle("files/data.gff")
- _GLOBALS_["handles"]["tab_panzer"] = tab_pannzer2handle("files/anno.out")
+ _PROCESSING_ = {
+ "metadata":[],
+ "plugins":[],
+ "handles":[],
+ "workflow":[]
+ }
+ current_field = None
+ with open("files/config.info") as handle:
+ for line in handle:
+ current_line = line.rstrip("\n")
+
+ if line[0] == "#": continue
+ if current_line == "": continue
- _GLOBALS_["plugins"]["read_fasta"] = importlib.import_module(".read_fasta","Plugins").Plugin()
+ if current_field:
+ assert current_field in ["metadata", "plugins", "handles", "workflow"]
+ if re.match(r"^(<\/)(\w+)(>)$", current_line):
+ current_field = None
+ else:
+ _PROCESSING_[current_field].append(current_line)
+ else:
+ assert re.match(r"^(<)(\w+)(>)$", current_line)
+ assert current_line[1:-1] in ["metadata", "plugins", "handles", "workflow"]
+ current_field = current_line[1:-1]
+ handle.close()
- _GLOBALS_["plugins"]["read_gff_maker_3UTR"] = importlib.import_module(".read_gff_maker_3UTR","Plugins").Plugin()
- _GLOBALS_["plugins"]["read_gff_maker_5UTR"] = importlib.import_module(".read_gff_maker_5UTR","Plugins").Plugin()
- _GLOBALS_["plugins"]["read_gff_maker_CDS"] = importlib.import_module(".read_gff_maker_CDS","Plugins").Plugin()
- _GLOBALS_["plugins"]["read_gff_maker_exon"] = importlib.import_module(".read_gff_maker_exon","Plugins").Plugin()
- _GLOBALS_["plugins"]["read_gff_maker_gene"] = importlib.import_module(".read_gff_maker_gene","Plugins").Plugin()
- _GLOBALS_["plugins"]["read_gff_maker_main"] = importlib.import_module(".read_gff_maker_main","Plugins").Plugin()
- _GLOBALS_["plugins"]["read_gff_maker_mRNA"] = importlib.import_module(".read_gff_maker_mRNA","Plugins").Plugin()
+ for element in _PROCESSING_["metadata"]:
+ _GLOBALS_["metadata"][element.split(":")[0]] = element.split(":")[1]
+ for element in _PROCESSING_["plugins"]:
+ _GLOBALS_["plugins"][element.split(":")[0]] = importlib.import_module(element.split(":")[1].split(",")[0],element.split(":")[1].split(",")[1]).Plugin()
+ for element in _PROCESSING_["handles"]:
+ _GLOBALS_["handles"][element.split(":")[0]] = _GLOBALS_["plugins"][element.split(":")[1].split(",")[0]].process(element.split(":")[1].split(",")[1])
- _GLOBALS_["plugins"]["read_tab_pannzer_CDS"] = importlib.import_module(".read_tab_pannzer_CDS","Plugins").Plugin()
- _GLOBALS_["plugins"]["read_tab_pannzer_gene"] = importlib.import_module(".read_tab_pannzer_gene","Plugins").Plugin()
+ max_level = 0
+ for i in range(len(_PROCESSING_["workflow"])):
+ element = _PROCESSING_["workflow"][i]
+ regex = re.compile(r"^(-)+")
+ level = len(regex.search(element).group())
+ max_level = max(max_level, level)
+ _PROCESSING_["workflow"][i] = (level, element[level:], [])
+ _PROCESSING_["workflow"].insert(0, (0,None,[]))
- _GLOBALS_["metadata"]["project"] = "temp"
- _GLOBALS_["metadata"]["division"] = "INV"
- _GLOBALS_["metadata"]["taxonomy"] = "29031"
- _GLOBALS_["metadata"]["organism"] = "Phlebotomus papatasi"
- _GLOBALS_["metadata"]["molecule_type"] = "genomic DNA"
- _GLOBALS_["metadata"]["topology"] = "linear"
- _GLOBALS_["metadata"]["description"] = "description"
- _GLOBALS_["metadata"]["transl_table"] = 0
+ for i in range(max_level, -1, -1):
+ for j in range(len(_PROCESSING_["workflow"])):
+ element = _PROCESSING_["workflow"][j]
+ if element[0] == i:
+ for k in range(1, j+1):
+ if _PROCESSING_["workflow"][j-k] is not None and _PROCESSING_["workflow"][j-k][0] == i-1:
+ _PROCESSING_["workflow"][j-k][2].append((
+ _GLOBALS_["plugins"][_PROCESSING_["workflow"][j][1].split(",")[0]],
+ _GLOBALS_["handles"][_PROCESSING_["workflow"][j][1].split(",")[1]],
+ _GLOBALS_["metadata"],
+ _PROCESSING_["workflow"][j][2]
+ ))
+ _PROCESSING_["workflow"][j] = None
+ break
+ _PROCESSING_["workflow"] = [x for x in _PROCESSING_["workflow"] if x is not None]
- app = app(
- [
- (_GLOBALS_["plugins"]["read_fasta"], _GLOBALS_["handles"]["fasta"], _GLOBALS_["metadata"], [
- (_GLOBALS_["plugins"]["read_gff_maker_main"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
- (_GLOBALS_["plugins"]["read_gff_maker_gene"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
- (_GLOBALS_["plugins"]["read_tab_pannzer_gene"], _GLOBALS_["handles"]["tab_panzer"], _GLOBALS_["metadata"], [])
- ]),
- (_GLOBALS_["plugins"]["read_gff_maker_mRNA"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], []),
- (_GLOBALS_["plugins"]["read_gff_maker_CDS"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
- (_GLOBALS_["plugins"]["read_tab_pannzer_CDS"], _GLOBALS_["handles"]["tab_panzer"], _GLOBALS_["metadata"], [])
- ]),
- (_GLOBALS_["plugins"]["read_gff_maker_3UTR"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], []),
- (_GLOBALS_["plugins"]["read_gff_maker_5UTR"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [])
- ]),
- (_GLOBALS_["plugins"]["read_gff_maker_exon"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [])
- ])
- ])
+ app = app(_PROCESSING_["workflow"][0][2])
app.run()
\ No newline at end of file
From 6b9f438d2520b6298deeb00424884403d84f21c5 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Sat, 15 May 2021 16:52:27 +0200
Subject: [PATCH 09/22] Refactoring
---
core.py | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
main.py | 72 +--------------------------
2 files changed, 145 insertions(+), 76 deletions(-)
diff --git a/core.py b/core.py
index 03dfa18..3bbc502 100644
--- a/core.py
+++ b/core.py
@@ -1,13 +1,152 @@
#core.py
import importlib
+import re
class app:
- def __init__(self, plugins:list=[]):
- assert plugins != [], "No plugins specified"
+ def __init__(self, config_path):
+ self.metadata = None
+ self.plugins = None
+ self.handles = None
+ self.workflow = None
- self.plugins = plugins
+ config = self.read_config(config_path)
+ self.set_metadata(config["metadata"])
+ self.set_plugins(config["plugins"])
+ self.set_handles(config["handles"])
+ self.set_workflow(config["workflow"])
+
+ """
+ This function reads the config file which is divided into four fields :
+ - metadata: contains the general and shared informations for EMBLish
+ as a couple key:value
+ - plugins: contains the list of the plugins that will be used as a
+ triplet plugin_key:plugin_name,plugin_package
+ - handles: contains the list of files that will be used as inputs as
+ a triplet handle_key:plugin,file_path
+ - workflow: contains a hierarchical list of the different step to run
+ as a couple plugin_key,handle_key
+ """
+ def read_config(self, config_path):
+ config = {
+ "metadata":[],
+ "plugins":[],
+ "handles":[],
+ "workflow":[]
+ }
+
+ current_field = None
+ current_line = None
+
+ with open(config_path) as handle:
+ for line in handle:
+ current_line = line.rstrip("\n")
+
+ #Comments and blank lines ignore
+ if current_line == "": continue
+ if current_line[0] == "#": continue
+
+ #Filling the config object with the content of the configuration file
+ if current_field:
+ assert current_field in ["metadata", "plugins", "handles", "workflow"]
+
+ #Checking if we are at the end of a field
+ if re.match(r"^(<\/)(\w+)(>)$", current_line):
+ current_field = None
+ else:
+ config[current_field].append(current_line)
+
+ else:
+ assert re.match(r"^(<)(\w+)(>)$", current_line)
+ assert current_line[1:-1] in ["metadata", "plugins", "handles", "workflow"]
+
+ current_field = current_line[1:-1]
+
+ handle.close()
+ return config
+
+ """
+ This function converts the array containing the metadata into a dictionnary
+ """
+ def set_metadata(self, array):
+ self.metadata = {element.split(":")[0]:element.split(":")[1] for element in array}
+
+ """
+ This function converts the array containing the plugins parameters into a dictionnary
+ with plugins to call with their key
+ """
+ def set_plugins(self, array):
+ self.plugins = {element.split(":")[0]:importlib.import_module(element.split(":")[1].split(",")[0],element.split(":")[1].split(",")[1]).Plugin() for element in array}
+
+ """
+ This function converts the array containing the handles parameters into a dictionnary
+ with handles to call with their key
+ """
+ def set_handles(self, array):
+ self.handles = {element.split(":")[0]:self.plugins[element.split(":")[1].split(",")[0]].process(element.split(":")[1].split(",")[1]) for element in array}
+
+ """
+ """
+ def set_workflow(self, array):
+ temp = self.refactor_workflow(array)
+ temp = self.merge_workflow(temp)
+ self.workflow = self.convert_workflow_task(temp[0])
+
+ """
+ Convert the list element in triplet level,,[]
+ """
+ def refactor_workflow(self, array):
+
+ for i in range(len(array)):
+ element = array[i]
+ level = len(re.search(r"^(-)+", element).group())
+
+ array[i] = (level, element[level:], [])
+
+ return array
+
+ """
+ Order the elements and create the hierarchical nodes
+ """
+ def merge_workflow(self, array):
+ array.insert(0, (0,None,[]))
+
+ max_level = max(array, key = lambda element: element[0])[0]
+
+ for i in range(max_level, -1, -1):
+ for j in range(len(array)):
+ element = array[j]
+
+ if element[0] == i:
+ for k in range(1, j+1):
+ if (
+ array[j-k] is not None and
+ array[j-k][0] == i-1
+ ):
+ array[j-k][2].append(element)
+ array[j] = None
+ break
+
+ array = [element for element in array if element != None]
+
+ return array
+
+ """
+ """
+ def convert_workflow_task(self, task):
+ if task[1]:
+ return (
+ self,
+ task[1].split(",")[0],
+ task[1].split(",")[1],
+ [self.convert_workflow_task(sub_task) for sub_task in task[2]]
+ )
+
+ return [self.convert_workflow_task(sub_task) for sub_task in task[2]]
+
+ """
+ """
def run(self):
- for plugin,*args in self.plugins:
- plugin.process(*args)
\ No newline at end of file
+ for app, key_plugin, *args in self.workflow:
+ app.plugins[key_plugin].process(app, *args)
\ No newline at end of file
diff --git a/main.py b/main.py
index 4d97e6f..1331b9a 100644
--- a/main.py
+++ b/main.py
@@ -2,77 +2,7 @@
from core import app
-from Bio import SeqIO
-import pandas as pd
-import re
-import importlib
-
if __name__ == "__main__":
- _GLOBALS_ = {
- "handles":dict(),
- "plugins":dict(),
- "metadata":dict()
- }
-
- _PROCESSING_ = {
- "metadata":[],
- "plugins":[],
- "handles":[],
- "workflow":[]
- }
- current_field = None
- with open("files/config.info") as handle:
- for line in handle:
- current_line = line.rstrip("\n")
-
- if line[0] == "#": continue
- if current_line == "": continue
-
- if current_field:
- assert current_field in ["metadata", "plugins", "handles", "workflow"]
- if re.match(r"^(<\/)(\w+)(>)$", current_line):
- current_field = None
- else:
- _PROCESSING_[current_field].append(current_line)
- else:
- assert re.match(r"^(<)(\w+)(>)$", current_line)
- assert current_line[1:-1] in ["metadata", "plugins", "handles", "workflow"]
- current_field = current_line[1:-1]
- handle.close()
-
- for element in _PROCESSING_["metadata"]:
- _GLOBALS_["metadata"][element.split(":")[0]] = element.split(":")[1]
- for element in _PROCESSING_["plugins"]:
- _GLOBALS_["plugins"][element.split(":")[0]] = importlib.import_module(element.split(":")[1].split(",")[0],element.split(":")[1].split(",")[1]).Plugin()
- for element in _PROCESSING_["handles"]:
- _GLOBALS_["handles"][element.split(":")[0]] = _GLOBALS_["plugins"][element.split(":")[1].split(",")[0]].process(element.split(":")[1].split(",")[1])
-
- max_level = 0
- for i in range(len(_PROCESSING_["workflow"])):
- element = _PROCESSING_["workflow"][i]
- regex = re.compile(r"^(-)+")
- level = len(regex.search(element).group())
- max_level = max(max_level, level)
- _PROCESSING_["workflow"][i] = (level, element[level:], [])
-
- _PROCESSING_["workflow"].insert(0, (0,None,[]))
-
- for i in range(max_level, -1, -1):
- for j in range(len(_PROCESSING_["workflow"])):
- element = _PROCESSING_["workflow"][j]
- if element[0] == i:
- for k in range(1, j+1):
- if _PROCESSING_["workflow"][j-k] is not None and _PROCESSING_["workflow"][j-k][0] == i-1:
- _PROCESSING_["workflow"][j-k][2].append((
- _GLOBALS_["plugins"][_PROCESSING_["workflow"][j][1].split(",")[0]],
- _GLOBALS_["handles"][_PROCESSING_["workflow"][j][1].split(",")[1]],
- _GLOBALS_["metadata"],
- _PROCESSING_["workflow"][j][2]
- ))
- _PROCESSING_["workflow"][j] = None
- break
- _PROCESSING_["workflow"] = [x for x in _PROCESSING_["workflow"] if x is not None]
-
- app = app(_PROCESSING_["workflow"][0][2])
+ app = app("files/config.info")
app.run()
\ No newline at end of file
From 8d141313f5f5de26e1b2a51b31f7abf0c3598ca7 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Sat, 15 May 2021 16:53:15 +0200
Subject: [PATCH 10/22] Refactoring
---
Plugins/read_fasta.py | 68 ++++++++++++++---------
Plugins/read_gff_maker_3UTR.py | 60 ++++++++++++++-------
Plugins/read_gff_maker_5UTR.py | 60 ++++++++++++++-------
Plugins/read_gff_maker_CDS.py | 92 ++++++++++++++++++--------------
Plugins/read_gff_maker_exon.py | 64 +++++++++++++++-------
Plugins/read_gff_maker_gene.py | 62 +++++++++++++--------
Plugins/read_gff_maker_mRNA.py | 75 +++++++++++++++++++-------
Plugins/read_gff_maker_main.py | 81 +++++++++++++++++++---------
Plugins/read_tab_pannzer_CDS.py | 92 +++++++++++++++++++++-----------
Plugins/read_tab_pannzer_gene.py | 47 ++++++++++------
10 files changed, 473 insertions(+), 228 deletions(-)
diff --git a/Plugins/read_fasta.py b/Plugins/read_fasta.py
index a6dade6..ba426db 100644
--- a/Plugins/read_fasta.py
+++ b/Plugins/read_fasta.py
@@ -1,6 +1,5 @@
#plugin.py
-import importlib
import itertools
from Bio import SeqIO
@@ -8,27 +7,46 @@
class Plugin:
- def process(self, handle, metadata, calls:list=[], target=None):
-
- for record in handle:
-
- #initialize record
- _record_ = SeqRecord(
- record.seq,
- record.id,
- dbxrefs=["Project:" + metadata["project"]],
- annotations={"division":metadata["division"],"molecule_type":metadata["molecule_type"],"organism":metadata["organism"],"taxonomy":metadata["taxonomy"],"topology":metadata["topology"]},
- description=""
- )
-
- #calls
- receiver = []
- for call,*args in calls:
- receiver.extend(call.process(*args, target=_record_.id))
-
- #post output treatment
- _record_.features = list(itertools.chain(*receiver))
-
- #outputing
- with open(f"out/{_record_.id}.dat", "w") as o:
- print(_record_.format("embl"), file=o)
\ No newline at end of file
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ return SeqRecord(
+ pre_feature.seq,
+ pre_feature.id,
+ dbxrefs=["Project:" + metadata["project"]],
+ annotations={
+ "division":metadata["division"],
+ "molecule_type":metadata["molecule_type"],
+ "organism":metadata["organism"],
+ "taxonomy":metadata["taxonomy"],
+ "topology":metadata["topology"]},
+ description="")
+
+ """
+ """
+ def callbacks(self, app, calls, target):
+ sender = []
+
+ for app, key_plugin, *args in calls:
+ temp = app.plugins[key_plugin].process(app, *args, target)
+ if temp:
+ sender += temp
+
+ return sender
+
+ """
+ """
+ def merge(self, feature, receiver):
+ feature.features = receiver
+
+ """
+ """
+ def process(self, app, key_handle, calls:list=[], target=None):
+
+ for element in app.handles[key_handle]:
+ feature = self.feature_initialize(element, app.metadata)
+ receiver = self.callbacks(app, calls, (feature.id))
+ self.merge(feature, receiver)
+
+ with open(f"out/{feature.id}.dat", "w") as o:
+ print(feature.format("embl"), file=o)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_3UTR.py b/Plugins/read_gff_maker_3UTR.py
index dabca35..ef46754 100644
--- a/Plugins/read_gff_maker_3UTR.py
+++ b/Plugins/read_gff_maker_3UTR.py
@@ -5,23 +5,47 @@
class Plugin:
- def process(self, handle, metadata, calls:list=[], target=None):
- try:
- location = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "three_prime_UTR"),:].reset_index())
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ return SeqFeature(
+ FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+ type="3'UTR",
+ qualifiers={
+ "gene":None,
+ "note":list()})
+
+ """
+ """
+ def callbacks(self, app, calls, target):
+ sender = []
+
+ for app, key_plugin, *args in calls:
+ temp = app.plugins[key_plugin].process(app, *args, target)
+ if temp:
+ sender.append(temp)
+
+ return sender
+
+ """
+ """
+ def merge(self, feature, receiver):
+ return feature
- _sub_features_ = [
- SeqFeature(
- FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
- type="3'UTR",
- qualifiers={
- "gene":target[1],
- "note":list()})]
-
- #calls
- receiver = []
- for call,*args in calls:
- receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "3'UTR")))
-
- return _sub_features_
+ """
+ """
+ def process(self, app, key_handle, calls:list=[], target=None):
+ try:
+ feature = self.feature_initialize(
+ app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "three_prime_UTR"),:].reset_index().iloc[0,:],
+ app.metadata)
except KeyError:
- return []
\ No newline at end of file
+ return None
+
+ feature.qualifiers["gene"] = target[1]
+ receiver = self.callbacks(
+ app,
+ calls,
+ (target[0], f"{target[1]}-mRNA-1", "3'UTR"))
+
+ return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_5UTR.py b/Plugins/read_gff_maker_5UTR.py
index 5b72281..60fae42 100644
--- a/Plugins/read_gff_maker_5UTR.py
+++ b/Plugins/read_gff_maker_5UTR.py
@@ -5,23 +5,47 @@
class Plugin:
- def process(self, handle, metadata, calls:list=[], target=None):
- try:
- location = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "five_prime_UTR"),:].reset_index())
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ return SeqFeature(
+ FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+ type="5'UTR",
+ qualifiers={
+ "gene":None,
+ "note":list()})
+
+ """
+ """
+ def callbacks(self, app, calls, target):
+ sender = []
- _sub_features_ = [
- SeqFeature(
- FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
- type="5'UTR",
- qualifiers={
- "gene":target[1],
- "note":list()})]
-
- #calls
- receiver = []
- for call,*args in calls:
- receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "5'UTR")))
-
- return _sub_features_
+ for app, key_plugin, *args in calls:
+ temp = app.plugins[key_plugin].process(app, *args, target)
+ if temp:
+ sender.append(temp)
+
+ return sender
+
+ """
+ """
+ def merge(self, feature, receiver):
+ return feature
+
+ """
+ """
+ def process(self, app, key_handle, calls:list=[], target=None):
+ try:
+ feature = self.feature_initialize(
+ app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "five_prime_UTR"),:].reset_index().iloc[0,:],
+ app.metadata)
except KeyError:
- return []
\ No newline at end of file
+ return None
+
+ feature.qualifiers["gene"] = target[1]
+ receiver = self.callbacks(
+ app,
+ calls,
+ (target[0], f"{target[1]}-mRNA-1", "5'UTR"))
+
+ return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_CDS.py b/Plugins/read_gff_maker_CDS.py
index 7b29f18..48bcb29 100644
--- a/Plugins/read_gff_maker_CDS.py
+++ b/Plugins/read_gff_maker_CDS.py
@@ -4,47 +4,61 @@
import itertools
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
-def mergeLocations(_locationArray_):
- return _locationArray_[0] if len(_locationArray_) == 1 else CompoundLocation(_locationArray_)
-
class Plugin:
- def process(self, handle, metadata, calls:list=[], target=None):
- locations = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index())
-
- _sub_features_ = [
- SeqFeature(
- mergeLocations(locations.apply(lambda location: FeatureLocation(int(location[0]), int(location[1]), (1,-1)[location[2] == "-"]), axis=1)),
- type="CDS",
- qualifiers={
- "gene":target[1],
- "product":list(),
- "note":list(),
- "db_xref":list(),
- "translation":list(),
- "transl_table":metadata["transl_table"]})]
-
- #calls
- receiver = []
- for call,*args in calls:
- receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "CDS")))
-
- annotations = list(itertools.chain(receiver))
- for annotation in annotations:
- if "product" in annotation.keys() and annotation["product"] != []:
- for sub_feature in _sub_features_:
- sub_feature.qualifiers["product"].extend(annotation["product"])
-
- if "note" in annotation.keys() and annotation["note"] != []:
- for sub_feature in _sub_features_:
- sub_feature.qualifiers["note"].extend(annotation["note"])
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ refactor_pre_feature = lambda element: FeatureLocation(
+ int(element[0]),
+ int(element[1]),
+ (1,-1)[element[2] == "-"])
+ merge_pre_feature = lambda array: array[0] if len(array) == 1 else CompoundLocation(array)
- if "db_xref" in annotation.keys() and annotation["db_xref"] != []:
- for sub_feature in _sub_features_:
- sub_feature.qualifiers["db_xref"].extend(annotation["db_xref"])
+ return SeqFeature(
+ merge_pre_feature(
+ pre_feature.apply(refactor_pre_feature, axis=1)),
+ type="CDS",
+ qualifiers={
+ "gene":None,
+ "product":list(),
+ "note":list(),
+ "db_xref":list(),
+ "translation":list(),
+ "transl_table":metadata["transl_table"]})
+
+ """
+ """
+ def callbacks(self, app, calls, target):
+ sender = []
+ for app, key_plugin, *args in calls:
+ temp = app.plugins[key_plugin].process(app, *args, target)
+ if temp:
+ sender.append(temp)
+ return sender
- if "translation" in annotation.keys() and annotation["translation"] != []:
- for sub_feature in _sub_features_:
- sub_feature.qualifiers["translation"].extend(annotation["translation"])
+ """
+ """
+ def merge(self, feature, receiver):
+ for element in receiver:
+ for key in element.keys():
+ feature.qualifiers[key].extend(element[key])
+ return feature
- return _sub_features_
\ No newline at end of file
+ """
+ """
+ def process(self, app, key_handle, calls:list=[], target=None):
+ try:
+ feature = self.feature_initialize(
+ app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index(),
+ app.metadata)
+ except KeyError:
+ return None
+
+ feature.qualifiers["gene"] = target[1]
+ receiver = self.callbacks(
+ app,
+ calls,
+ (target[0], f"{target[1]}-mRNA-1", "CDS"))
+
+ return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_exon.py b/Plugins/read_gff_maker_exon.py
index e4b84ef..1aeb42b 100644
--- a/Plugins/read_gff_maker_exon.py
+++ b/Plugins/read_gff_maker_exon.py
@@ -8,21 +8,49 @@
class Plugin:
- def process(self, handle, metadata, calls:list=[], target=None):
- exons = handle.loc[(target, slice(None), "exon"),:].reset_index()
-
- for index, exon in exons.iterrows():
- _features_subset_ = [
- SeqFeature(
- FeatureLocation(int(exon["start"]), int(exon["stop"]), (1,-1)[exon["strand"] == "-"]),
- type="exon",
- qualifiers={}
- )]
-
- #calls
- receiver = []
- for call, *args in calls:
- receiver.extend(call.process(*args, target=(target)))
-
-
- yield _features_subset_
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ return SeqFeature(
+ FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+ type="exon",
+ qualifiers={})
+
+ """
+ """
+ def multi_feature_initialize(self, pre_multi_feature, metadata):
+ for _, element in pre_multi_feature:
+ yield self.feature_initialize(element, metadata)
+
+ """
+ """
+ def callbacks(self, app, calls, target):
+ sender = []
+
+ for app, key_plugin, *args in calls:
+ temp = app.plugins[key_plugin].process(app, *args, target)
+ if temp:
+ sender.append()
+
+ return sender
+
+ """
+ """
+ def merge(self, feature, receiver):
+ return feature
+
+ """
+ """
+ def process(self, app, key_handle, calls:list=[], target=None):
+ try:
+ feature = self.multi_feature_initialize(
+ app.handles[key_handle].loc[(target, slice(None), "exon"),:].reset_index().iterrows(),
+ app.metadata)
+ except KeyError:
+ return None
+
+ receiver = self.callbacks(
+ app,
+ calls,
+ target)
+ return self.merge(feature, receiver)
diff --git a/Plugins/read_gff_maker_gene.py b/Plugins/read_gff_maker_gene.py
index 069b66b..9d5f7da 100644
--- a/Plugins/read_gff_maker_gene.py
+++ b/Plugins/read_gff_maker_gene.py
@@ -6,26 +6,46 @@
class Plugin:
- def process(self, handle, metadata, calls:list=[], target=None):
- location = (handle.loc[(target[0], target[1], "gene"),:].reset_index())
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ return SeqFeature(
+ FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+ type="gene",
+ qualifiers={
+ "gene":None,
+ "note":list()})
+
+ """
+ """
+ def callbacks(self, app, calls, target):
+ sender = []
- _sub_features_ = [
- SeqFeature(
- FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
- type="gene",
- qualifiers={
- "gene":target[1],
- "note":list()})]
-
- #calls
- receiver = []
- for call,*args in calls:
- receiver.extend(call.process(*args, target=(target[0], target[1], "gene")))
-
- annotations = list(itertools.chain(receiver))
- for annotation in annotations:
- if "note" in annotation.keys() and annotation["note"] != []:
- for sub_feature in _sub_features_:
- sub_feature.qualifiers["note"].extend(annotation["note"])
+ for app, key_plugin, *args in calls:
+ temp = app.plugins[key_plugin].process(app, *args, target)
+ if temp:
+ sender.append(temp)
+
+ return sender
- return _sub_features_
\ No newline at end of file
+ """
+ """
+ def merge(self, feature, receiver):
+ for element in receiver:
+ for key in element.keys():
+ feature.qualifiers[key].extend(element[key])
+ return feature
+
+ """
+ """
+ def process(self, app, key_handle, calls:list=[], target=None):
+ feature = self.feature_initialize(
+ app.handles[key_handle].loc[(target[0], target[1], "gene"),:].reset_index().iloc[0,:],
+ app.metadata)
+ feature.qualifiers["gene"]=target[1]
+ receiver = self.callbacks(
+ app,
+ calls,
+ (target[0], f"{target[1]}-mRNA-1", "gene"))
+
+ return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_mRNA.py b/Plugins/read_gff_maker_mRNA.py
index 7639664..e4e8bc0 100644
--- a/Plugins/read_gff_maker_mRNA.py
+++ b/Plugins/read_gff_maker_mRNA.py
@@ -3,25 +3,62 @@
import pandas as pd
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
-def mergeLocations(_locationArray_):
- return _locationArray_[0] if len(_locationArray_) == 1 else CompoundLocation(_locationArray_)
-
class Plugin:
- def process(self, handle, metadata, calls:list=[], target=None):
- locations = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index())
-
- #initialise
- _sub_features_ = [
- SeqFeature(
- mergeLocations(locations.apply(lambda location: FeatureLocation(int(location[0]), int(location[1]), (1,-1)[location[2] == "-"]), axis=1)),
- type="mRNA",
- qualifiers={
- "gene":target[1]})]
-
- #calls
- receiver = []
- for call,*args in calls:
- receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "mRNA")))
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ refactor_pre_feature = lambda element: FeatureLocation(
+ int(element[0]),
+ int(element[1]),
+ (1,-1)[element[2] == "-"]
+ )
+ merge_pre_feature = lambda array: array[0] if len(array) == 1 else CompoundLocation(array)
+
+ return SeqFeature(
+ merge_pre_feature(
+ pre_feature.apply(refactor_pre_feature, axis=1)
+ ),
+ type="mRNA",
+ qualifiers={
+ "gene":None
+ }
+ )
+
+ """
+ """
+ def callbacks(self, app, calls, target):
+ sender = []
+
+ for app, key_plugin, *args in calls:
+ temp = app.plugins[key_plugin].process(app, *args, target)
+ if temp:
+ sender.append(temp)
+
+ return sender
+
+
+ """
+ """
+ def merge(self, feature, receiver):
+ return feature
+
+ """
+ """
+ def process(self, app, key_handle, calls:list=[], target=None):
+ try:
+ feature = self.feature_initialize(
+ app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index(),
+ app.metadata
+ )
+ except KeyError:
+ return None
+
+ feature.qualifiers["gene"]=target[1]
+ receiver = self.callbacks(
+ app,
+ calls,
+ (target[0], f"{target[1]}-mRNA-1", "mRNA")
+ )
- return _sub_features_
\ No newline at end of file
+ return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_main.py b/Plugins/read_gff_maker_main.py
index eac0f9c..5cc959f 100644
--- a/Plugins/read_gff_maker_main.py
+++ b/Plugins/read_gff_maker_main.py
@@ -1,35 +1,66 @@
#read_gff_maker_main.py
import pandas as pd
-import re
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
class Plugin:
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ return SeqFeature(
+ FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+ type="source",
+ qualifiers={
+ "oganism":metadata["organism"],
+ "mol_type":metadata["molecule_type"],
+ "db_xref":list()})
+
+ """
+ """
+ def callbacks(self, app, calls, target):
+ sender = []
- def process(self, handle, metadata, calls:list=[], target=None):
-
- location = (handle.loc[(target, slice(None), "contig"),:].reset_index())
- _feature_ = [
- SeqFeature(
- FeatureLocation(int(location.iloc[0,3]), int(location.iloc[0,4]), (1,-1)[location.iloc[0,5] == "-"]),
- type="source",
- qualifiers={
- "oganism":metadata["organism"],
- "mol_type":metadata["molecule_type"],
- "db_xref":list()})]
-
- yield _feature_
-
- for gene in handle.loc[(target, slice(None), "gene"),:].reset_index()["sub_seq_id"]:
+ for app, key_plugin, *args in calls:
+ temp = app.plugins[key_plugin].process(app, *args, target)
+ if temp:
+ sender.append(temp)
+
+ return sender
+
+ """
+ """
+ def callbacks_with_iterator(self, app, calls, target, iterator):
+ sender = []
+
+ for element in iterator:
+ temp = self.callbacks(app, calls, (target, element))
+ if temp:
+ sender.extend(temp)
+ return sender
- #initialize features
- _features_subset_ = []
-
- #calls
- receiver = []
- for call,*args in calls:
- receiver.extend(call.process(*args, target=(target, gene)))
+ """
+ """
+ def merge(self, feature, receiver):
+ return [feature] + receiver
+
+ """
+ """
+ def process(self, app, key_handle, calls:list=[], target=None):
+ try:
+ feature = self.feature_initialize(
+ app.handles[key_handle].loc[(target, slice(None), "contig"),:].reset_index().iloc[0,:],
+ app.metadata)
+ except KeyError:
+ return None
- _features_subset_ = receiver
- yield _features_subset_
\ No newline at end of file
+ try:
+ receiver = self.callbacks_with_iterator(
+ app,
+ calls,
+ target,
+ app.handles[key_handle].loc[(target, slice(None), "gene"),:].reset_index()["sub_seq_id"])
+ except KeyError:
+ receiver = []
+
+ return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_tab_pannzer_CDS.py b/Plugins/read_tab_pannzer_CDS.py
index 7b0fd2e..f4fcee0 100644
--- a/Plugins/read_tab_pannzer_CDS.py
+++ b/Plugins/read_tab_pannzer_CDS.py
@@ -4,45 +4,77 @@
class Plugin:
- def process(self, handle, metadata, calls:list=[], target=None):
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ #print(self.feature_initialize_db_xref(pre_feature))
+ return {
+ "db_xref": self.feature_initialize_db_xref(pre_feature),
+ "translation": self.feature_initialize_translation(pre_feature),
+ "product": self.feature_initialize_product(pre_feature)
+ }
- #initialisation
- try:
- anno_bp = handle.loc[(target[1], "BP_ARGOT"),:].reset_index()["id"]
- except KeyError:
- anno_bp = pd.Series([])
+ """
+ """
+ def feature_initialize_db_xref(self, pre_feature):
+ sender = pd.Series([])
- try:
- anno_cc = handle.loc[(target[1], "CC_ARGOT"),:].reset_index()["id"]
- except KeyError:
- anno_cc = pd.Series([])
+ for field in ["BP_ARGOT", "CC_ARGOT", "MF_ARGOT"]:
+ try:
+ sender = pd.concat([sender, pre_feature(field)["id"]])
+ except KeyError:
+ pass
+ return [f"GO:{str(element)}" for element in sender]
+ """
+ """
+ def feature_initialize_translation(self, pre_feature):
+ sender = list()
try:
- anno_mf = handle.loc[(target[1], "MF_ARGOT"),:].reset_index()["id"]
+ sender = [pre_feature("qseq").iloc[0,1]]
except KeyError:
- anno_mf = pd.Series([])
+ pass
+ return sender
+ """
+ """
+ def feature_initialize_product(self, pre_feature):
+ sender = list()
try:
- anno_qsec = [handle.loc[(target[1], "qseq"),:].reset_index().iloc[0,1]]
+ sender = [pre_feature("DE").iloc[0,1]]
except KeyError:
- anno_qsec = list()
+ pass
+ return sender
- try:
- anno_de = [handle.loc[(target[1], "DE"),:].reset_index().iloc[0,1]]
- except KeyError:
- anno_de = list()
-
- _annotations_ = [{
- "db_xref":[f"GO:{str(go)}" for go in pd.concat([anno_bp, anno_cc, anno_mf])],
- "translation": anno_qsec,
- "product": anno_de
- }]
+ """
+ """
+ def callbacks(self, app, calls, target):
+ sender = []
+ for app, key_plugin, *args in calls:
+ temp = app.plugins[key_plugin].process(app, *args, target)
+ if temp:
+ sender += temp
- #calls
- receiver = []
- for call,*args in calls:
- receiver.extend(call.process(*args, target=target))
+ return sender
+
+ """
+ """
+ def merge(self, feature, receiver):
+ return feature
+
+ """
+ """
+ def process(self, app, key_handle, calls:list=[], target=None):
+
+ feature = self.feature_initialize(
+ (lambda field: app.handles[key_handle].loc[(target[1], field)].reset_index()),
+ app.metadata)
+
+ receiver = self.callbacks(
+ app,
+ calls,
+ target
+ )
- #output
- return _annotations_
\ No newline at end of file
+ return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_tab_pannzer_gene.py b/Plugins/read_tab_pannzer_gene.py
index 521797e..3824bc6 100644
--- a/Plugins/read_tab_pannzer_gene.py
+++ b/Plugins/read_tab_pannzer_gene.py
@@ -1,28 +1,45 @@
#read_tab_pannzer_gene.py
-#read_tab_pannzer_CDS
-
import pandas as pd
class Plugin:
- def process(self, handle, metadata, calls:list=[], target=None):
+ def feature_initialize(self, pre_feature, metadata):
+ return {
+ "note": self.feature_initialize_note(pre_feature)
+ }
- #initialisation
+ def feature_initialize_note(self, pre_feature):
+ sender = list()
try:
- anno_de = handle.loc[(target[1], "DE"),:].reset_index().iloc[0,1]
+ sender = [pre_feature("DE").iloc[0,1]]
except KeyError:
- anno_de = []
+ pass
+ return sender
- _annotations_ = [{
- "note": anno_de
- }]
+ def callbacks(self, app, calls, target):
+ sender = []
+ for app, key_plugin, *args in calls:
+ temp = app.plugins[key_plugin].process(app, *args, target)
+ if temp:
+ sender += temp
- #calls
- receiver = []
- for call,*args in calls:
- receiver.extend(call.process(*args, target=target))
+ return sender
+
+ def merge(self, feature, receiver):
+ return feature
+
+ def process(self, app, key_handle, calls:list=[], target=None):
+
+ feature = self.feature_initialize(
+ (lambda field: app.handles[key_handle].loc[(target[1], field)].reset_index()),
+ app.metadata)
+
+ receiver = self.callbacks(
+ app,
+ calls,
+ target
+ )
- #output
- return _annotations_
\ No newline at end of file
+ return self.merge(feature, receiver)
\ No newline at end of file
From 36c375948dde1098282f53f556e95dc3ca4f879e Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:24:25 +0200
Subject: [PATCH 11/22] Reader refactoring
plugins rewritting, hirarchical superclass creation
---
Plugins/__read__.py | 60 +++++++++++++++++++++++++
Plugins/__read_gff_maker__.py | 21 +++++++++
Plugins/__read_tab_pannzer__.py | 10 +++++
Plugins/read_fasta.py | 26 +++++------
Plugins/read_gff_maker_3UTR.py | 22 ++--------
Plugins/read_gff_maker_5UTR.py | 22 ++--------
Plugins/read_gff_maker_CDS.py | 22 ++++------
Plugins/read_gff_maker_exon.py | 21 ++-------
Plugins/read_gff_maker_gene.py | 20 ++-------
Plugins/read_gff_maker_mRNA.py | 23 ++--------
Plugins/read_gff_maker_misc_feature.py | 32 ++++++++++++++
Plugins/read_gff_maker_source.py | 61 ++++++++++++++++++++++++++
Plugins/read_tab_pannzer_CDS.py | 22 ++--------
Plugins/read_tab_pannzer_gene.py | 9 ++--
14 files changed, 228 insertions(+), 143 deletions(-)
create mode 100644 Plugins/__read__.py
create mode 100644 Plugins/__read_gff_maker__.py
create mode 100644 Plugins/__read_tab_pannzer__.py
create mode 100644 Plugins/read_gff_maker_misc_feature.py
create mode 100644 Plugins/read_gff_maker_source.py
diff --git a/Plugins/__read__.py b/Plugins/__read__.py
new file mode 100644
index 0000000..0950107
--- /dev/null
+++ b/Plugins/__read__.py
@@ -0,0 +1,60 @@
+# __read__.py
+
+from Plugins.__plugin__ import __Plugin__, RequiredMetadataError, UndefinedMethodError
+from Plugins.__caller__ import Caller, CallerFailedVerification
+
+class __Read__(__Plugin__):
+
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ raise UndefinedMethodError("feature_initialize has not been defined.")
+
+ """
+ """
+ def callbacks(self, app, calls, target):
+ raise UndefinedMethodError("callbacks has not been defined.")
+
+ """
+ """
+ def callbacks_extend(self, app, calls, target):
+ caller = Caller(app)
+ sender = []
+ for app, key_plugin, *args in calls:
+ temp = None
+ try:
+ temp = caller.run(app.plugins[key_plugin].process, app, *args, target)
+ except CallerFailedVerification:
+ sender = []
+ if temp:
+ sender.extend(temp)
+ return sender
+
+ """
+ """
+ def callbacks_append(self, app, calls, target):
+ caller = Caller(app)
+ sender = []
+ for app, key_plugin, *args in calls:
+ temp = None
+ try:
+ temp = caller.run(app.plugins[key_plugin].process, app, *args, target)
+ except CallerFailedVerification:
+ sender = []
+ if temp:
+ sender.append(temp)
+ return sender
+
+ """
+ """
+ def merge(self, feature, receiver):
+ return feature
+
+ """
+ """
+ def required_metadata_check(self, app, keys:list=[]):
+ if keys:
+ for key in keys:
+ if not key in app.metadata:
+ raise RequiredMetadataError(f"Required metadata attribute, {key}, not found.")
+ return True
\ No newline at end of file
diff --git a/Plugins/__read_gff_maker__.py b/Plugins/__read_gff_maker__.py
new file mode 100644
index 0000000..13ceb97
--- /dev/null
+++ b/Plugins/__read_gff_maker__.py
@@ -0,0 +1,21 @@
+#__read_gff_maker__.py
+
+from Plugins.__read__ import __Read__
+
+class __ReadGFFMaker__(__Read__):
+
+ """
+ """
+ def multi_feature_initialize(self, pre_multi_feature, metadata):
+ raise UndefinedMethodError("multi_feature_initialize has not been defined.")
+
+ """
+ """
+ def callbacks(self, app, calls, target):
+ return super().callbacks_append(app, calls, target)
+
+ """
+ """
+ def callbacks_with_iterator(self, app, calls, target, iterator):
+ raise UndefinedMethodError("callbacks_with_iterator has not been defined.")
+
diff --git a/Plugins/__read_tab_pannzer__.py b/Plugins/__read_tab_pannzer__.py
new file mode 100644
index 0000000..4c78378
--- /dev/null
+++ b/Plugins/__read_tab_pannzer__.py
@@ -0,0 +1,10 @@
+#__read_tab_pannzer__.py
+
+from Plugins.__read__ import __Read__
+
+class __ReadTabPannzer__(__Read__):
+
+ """
+ """
+ def callbacks(self, app, calls, target):
+ return self.callbacks_extend(app, calls, target)
\ No newline at end of file
diff --git a/Plugins/read_fasta.py b/Plugins/read_fasta.py
index ba426db..8876aa0 100644
--- a/Plugins/read_fasta.py
+++ b/Plugins/read_fasta.py
@@ -4,9 +4,9 @@
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
+from Plugins.__read__ import __Read__
-class Plugin:
-
+class Plugin(__Read__):
"""
"""
def feature_initialize(self, pre_feature, metadata):
@@ -25,14 +25,7 @@ def feature_initialize(self, pre_feature, metadata):
"""
"""
def callbacks(self, app, calls, target):
- sender = []
-
- for app, key_plugin, *args in calls:
- temp = app.plugins[key_plugin].process(app, *args, target)
- if temp:
- sender += temp
-
- return sender
+ return super().callbacks_extend(app, calls, target)
"""
"""
@@ -41,12 +34,19 @@ def merge(self, feature, receiver):
"""
"""
- def process(self, app, key_handle, calls:list=[], target=None):
+ def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
for element in app.handles[key_handle]:
feature = self.feature_initialize(element, app.metadata)
+ app.current_sequence = feature.seq
receiver = self.callbacks(app, calls, (feature.id))
self.merge(feature, receiver)
- with open(f"out/{feature.id}.dat", "w") as o:
- print(feature.format("embl"), file=o)
\ No newline at end of file
+ #with open(f"out/{feature.id}.dat", "w") as o:
+ with open(f"{feature.id}.dat", "w") as o:
+ print(feature.format("embl"), file=o)
+
+ """
+ """
+ def required_metadata_check(self, app, keys:list=[]):
+ return super().required_metadata_check(app, ["project", "transl_table", "molecule_type", "organism", "taxonomy", "topology"])
diff --git a/Plugins/read_gff_maker_3UTR.py b/Plugins/read_gff_maker_3UTR.py
index ef46754..91ca810 100644
--- a/Plugins/read_gff_maker_3UTR.py
+++ b/Plugins/read_gff_maker_3UTR.py
@@ -1,9 +1,10 @@
#read_gff_maker_3UTR.py
import pandas as pd
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
-class Plugin:
+class Plugin(__ReadGFFMaker__):
"""
"""
@@ -17,24 +18,7 @@ def feature_initialize(self, pre_feature, metadata):
"""
"""
- def callbacks(self, app, calls, target):
- sender = []
-
- for app, key_plugin, *args in calls:
- temp = app.plugins[key_plugin].process(app, *args, target)
- if temp:
- sender.append(temp)
-
- return sender
-
- """
- """
- def merge(self, feature, receiver):
- return feature
-
- """
- """
- def process(self, app, key_handle, calls:list=[], target=None):
+ def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
try:
feature = self.feature_initialize(
app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "three_prime_UTR"),:].reset_index().iloc[0,:],
diff --git a/Plugins/read_gff_maker_5UTR.py b/Plugins/read_gff_maker_5UTR.py
index 60fae42..f7813f0 100644
--- a/Plugins/read_gff_maker_5UTR.py
+++ b/Plugins/read_gff_maker_5UTR.py
@@ -1,9 +1,10 @@
#read_gff_maker_5UTR.py
import pandas as pd
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
-class Plugin:
+class Plugin(__ReadGFFMaker__):
"""
"""
@@ -17,24 +18,7 @@ def feature_initialize(self, pre_feature, metadata):
"""
"""
- def callbacks(self, app, calls, target):
- sender = []
-
- for app, key_plugin, *args in calls:
- temp = app.plugins[key_plugin].process(app, *args, target)
- if temp:
- sender.append(temp)
-
- return sender
-
- """
- """
- def merge(self, feature, receiver):
- return feature
-
- """
- """
- def process(self, app, key_handle, calls:list=[], target=None):
+ def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
try:
feature = self.feature_initialize(
app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "five_prime_UTR"),:].reset_index().iloc[0,:],
diff --git a/Plugins/read_gff_maker_CDS.py b/Plugins/read_gff_maker_CDS.py
index 48bcb29..8b77fd2 100644
--- a/Plugins/read_gff_maker_CDS.py
+++ b/Plugins/read_gff_maker_CDS.py
@@ -2,9 +2,10 @@
import pandas as pd
import itertools
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
-class Plugin:
+class Plugin(__ReadGFFMaker__):
"""
"""
@@ -27,16 +28,6 @@ def feature_initialize(self, pre_feature, metadata):
"translation":list(),
"transl_table":metadata["transl_table"]})
- """
- """
- def callbacks(self, app, calls, target):
- sender = []
- for app, key_plugin, *args in calls:
- temp = app.plugins[key_plugin].process(app, *args, target)
- if temp:
- sender.append(temp)
- return sender
-
"""
"""
def merge(self, feature, receiver):
@@ -47,7 +38,7 @@ def merge(self, feature, receiver):
"""
"""
- def process(self, app, key_handle, calls:list=[], target=None):
+ def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
try:
feature = self.feature_initialize(
app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index(),
@@ -61,4 +52,9 @@ def process(self, app, key_handle, calls:list=[], target=None):
calls,
(target[0], f"{target[1]}-mRNA-1", "CDS"))
- return self.merge(feature, receiver)
\ No newline at end of file
+ return self.merge(feature, receiver)
+
+ """
+ """
+ def required_metadata_check(self, app, keys:list=[]):
+ return super().required_metadata_check(app, ["transl_table"])
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_exon.py b/Plugins/read_gff_maker_exon.py
index 1aeb42b..4b23ebb 100644
--- a/Plugins/read_gff_maker_exon.py
+++ b/Plugins/read_gff_maker_exon.py
@@ -3,10 +3,11 @@
import pandas as pd
import re
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
-class Plugin:
+class Plugin(__ReadGFFMaker__):
"""
"""
@@ -22,26 +23,10 @@ def multi_feature_initialize(self, pre_multi_feature, metadata):
for _, element in pre_multi_feature:
yield self.feature_initialize(element, metadata)
- """
- """
- def callbacks(self, app, calls, target):
- sender = []
-
- for app, key_plugin, *args in calls:
- temp = app.plugins[key_plugin].process(app, *args, target)
- if temp:
- sender.append()
-
- return sender
-
- """
- """
- def merge(self, feature, receiver):
- return feature
"""
"""
- def process(self, app, key_handle, calls:list=[], target=None):
+ def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
try:
feature = self.multi_feature_initialize(
app.handles[key_handle].loc[(target, slice(None), "exon"),:].reset_index().iterrows(),
diff --git a/Plugins/read_gff_maker_gene.py b/Plugins/read_gff_maker_gene.py
index 9d5f7da..00cf5c3 100644
--- a/Plugins/read_gff_maker_gene.py
+++ b/Plugins/read_gff_maker_gene.py
@@ -2,10 +2,10 @@
import pandas as pd
import itertools
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
-class Plugin:
-
+class Plugin(__ReadGFFMaker__):
"""
"""
def feature_initialize(self, pre_feature, metadata):
@@ -15,18 +15,6 @@ def feature_initialize(self, pre_feature, metadata):
qualifiers={
"gene":None,
"note":list()})
-
- """
- """
- def callbacks(self, app, calls, target):
- sender = []
-
- for app, key_plugin, *args in calls:
- temp = app.plugins[key_plugin].process(app, *args, target)
- if temp:
- sender.append(temp)
-
- return sender
"""
"""
@@ -38,7 +26,7 @@ def merge(self, feature, receiver):
"""
"""
- def process(self, app, key_handle, calls:list=[], target=None):
+ def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
feature = self.feature_initialize(
app.handles[key_handle].loc[(target[0], target[1], "gene"),:].reset_index().iloc[0,:],
app.metadata)
@@ -47,5 +35,5 @@ def process(self, app, key_handle, calls:list=[], target=None):
app,
calls,
(target[0], f"{target[1]}-mRNA-1", "gene"))
-
+
return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_mRNA.py b/Plugins/read_gff_maker_mRNA.py
index e4e8bc0..f8e76f8 100644
--- a/Plugins/read_gff_maker_mRNA.py
+++ b/Plugins/read_gff_maker_mRNA.py
@@ -1,9 +1,10 @@
#read_gff_maker_mRNA.py
import pandas as pd
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
-class Plugin:
+class Plugin(__ReadGFFMaker__):
"""
"""
@@ -25,27 +26,9 @@ def feature_initialize(self, pre_feature, metadata):
}
)
- """
- """
- def callbacks(self, app, calls, target):
- sender = []
-
- for app, key_plugin, *args in calls:
- temp = app.plugins[key_plugin].process(app, *args, target)
- if temp:
- sender.append(temp)
-
- return sender
-
-
- """
- """
- def merge(self, feature, receiver):
- return feature
-
"""
"""
- def process(self, app, key_handle, calls:list=[], target=None):
+ def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
try:
feature = self.feature_initialize(
app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index(),
diff --git a/Plugins/read_gff_maker_misc_feature.py b/Plugins/read_gff_maker_misc_feature.py
new file mode 100644
index 0000000..aeae5fc
--- /dev/null
+++ b/Plugins/read_gff_maker_misc_feature.py
@@ -0,0 +1,32 @@
+#read_gff_maker_misc_feature.py
+
+import pandas as pd
+import itertools
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin(__ReadGFFMaker__):
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ return SeqFeature(
+ FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+ type="misc_feature",
+ qualifiers={
+ #"gene":None,
+ "note":list()})
+
+ """
+ """
+ def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
+ feature = self.feature_initialize(
+ app.handles[key_handle].loc[(target[0], target[1], "gene"),:].reset_index().iloc[0,:],
+ app.metadata)
+ feature.qualifiers["note"].append(target[1])
+ #feature.qualifiers["gene"]=target[1]
+ receiver = self.callbacks(
+ app,
+ calls,
+ (target[0], f"{target[1]}-mRNA-1", "misc_feature"))
+
+ return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_source.py b/Plugins/read_gff_maker_source.py
new file mode 100644
index 0000000..04fe6db
--- /dev/null
+++ b/Plugins/read_gff_maker_source.py
@@ -0,0 +1,61 @@
+#read_gff_maker_source.py
+
+import pandas as pd
+
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin(__ReadGFFMaker__):
+
+ """
+ """
+ def feature_initialize(self, pre_feature, metadata):
+ return SeqFeature(
+ FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+ type="source",
+ qualifiers={
+ "organism":metadata["organism"],
+ "mol_type":metadata["molecule_type"],
+ "db_xref":list()})
+
+ """
+ """
+ def callbacks_with_iterator(self, app, calls, target, iterator):
+ sender = []
+
+ for element in iterator:
+ temp = self.callbacks(app, calls, (target, element))
+ if temp:
+ sender.extend(temp)
+ return sender
+
+ """
+ """
+ def merge(self, feature, receiver):
+ return [feature] + receiver
+
+ """
+ """
+ def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
+ try:
+ feature = self.feature_initialize(
+ app.handles[key_handle].loc[(target, slice(None), "contig"),:].reset_index().iloc[0,:],
+ app.metadata)
+ except KeyError:
+ return None
+
+ try:
+ receiver = self.callbacks_with_iterator(
+ app,
+ calls,
+ target,
+ app.handles[key_handle].loc[(target, slice(None), "gene"),:].reset_index()["sub_seq_id"])
+ except KeyError:
+ receiver = []
+
+ return self.merge(feature, receiver)
+
+ """
+ """
+ def required_metadata_check(self, app, keys:list=[]):
+ return super().required_metadata_check(app, ["organism", "molecule_type"])
\ No newline at end of file
diff --git a/Plugins/read_tab_pannzer_CDS.py b/Plugins/read_tab_pannzer_CDS.py
index f4fcee0..778d6f7 100644
--- a/Plugins/read_tab_pannzer_CDS.py
+++ b/Plugins/read_tab_pannzer_CDS.py
@@ -1,8 +1,9 @@
#read_tab_pannzer_CDS
import pandas as pd
+from Plugins.__read_tab_pannzer__ import __ReadTabPannzer__
-class Plugin:
+class Plugin(__ReadTabPannzer__):
"""
"""
@@ -48,24 +49,7 @@ def feature_initialize_product(self, pre_feature):
"""
"""
- def callbacks(self, app, calls, target):
- sender = []
-
- for app, key_plugin, *args in calls:
- temp = app.plugins[key_plugin].process(app, *args, target)
- if temp:
- sender += temp
-
- return sender
-
- """
- """
- def merge(self, feature, receiver):
- return feature
-
- """
- """
- def process(self, app, key_handle, calls:list=[], target=None):
+ def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
feature = self.feature_initialize(
(lambda field: app.handles[key_handle].loc[(target[1], field)].reset_index()),
diff --git a/Plugins/read_tab_pannzer_gene.py b/Plugins/read_tab_pannzer_gene.py
index 3824bc6..ed1fb2a 100644
--- a/Plugins/read_tab_pannzer_gene.py
+++ b/Plugins/read_tab_pannzer_gene.py
@@ -1,8 +1,9 @@
#read_tab_pannzer_gene.py
import pandas as pd
+from Plugins.__read_tab_pannzer__ import __ReadTabPannzer__
-class Plugin:
+class Plugin(__ReadTabPannzer__):
def feature_initialize(self, pre_feature, metadata):
return {
@@ -27,15 +28,11 @@ def callbacks(self, app, calls, target):
return sender
- def merge(self, feature, receiver):
- return feature
-
- def process(self, app, key_handle, calls:list=[], target=None):
+ def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
feature = self.feature_initialize(
(lambda field: app.handles[key_handle].loc[(target[1], field)].reset_index()),
app.metadata)
-
receiver = self.callbacks(
app,
calls,
From 96b60a6b11a69a5d563d80c65a648624d4efb389 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:25:29 +0200
Subject: [PATCH 12/22] to_handle renaming
---
Plugins/to_handle_fasta.py | 10 ++++++++++
Plugins/to_handle_gff_maker.py | 18 ++++++++++++++++++
Plugins/to_handle_tab_pannzer.py | 16 ++++++++++++++++
3 files changed, 44 insertions(+)
create mode 100644 Plugins/to_handle_fasta.py
create mode 100644 Plugins/to_handle_gff_maker.py
create mode 100644 Plugins/to_handle_tab_pannzer.py
diff --git a/Plugins/to_handle_fasta.py b/Plugins/to_handle_fasta.py
new file mode 100644
index 0000000..c1745ef
--- /dev/null
+++ b/Plugins/to_handle_fasta.py
@@ -0,0 +1,10 @@
+#fasta2handle.py
+
+from Bio import SeqIO
+from Plugins.__plugin__ import __Plugin__
+
+class Plugin(__Plugin__):
+
+ def process(self, file_path):
+ with open(file_path) as handle:
+ return list(SeqIO.parse(handle, "fasta"))
\ No newline at end of file
diff --git a/Plugins/to_handle_gff_maker.py b/Plugins/to_handle_gff_maker.py
new file mode 100644
index 0000000..4c5e93a
--- /dev/null
+++ b/Plugins/to_handle_gff_maker.py
@@ -0,0 +1,18 @@
+#gff_maker2handle.py
+
+import pandas as pd
+import re
+from Plugins.__plugin__ import __Plugin__
+
+class Plugin(__Plugin__):
+
+ def process(self, file_path):
+ with open(file_path) as handle:
+ gff = pd.read_csv(handle, sep="\t")
+ gff = gff.reset_index()
+ gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
+ gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
+ gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
+ gff["start"] = gff["start"].apply(lambda x: x-1)
+ return gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"])
+
\ No newline at end of file
diff --git a/Plugins/to_handle_tab_pannzer.py b/Plugins/to_handle_tab_pannzer.py
new file mode 100644
index 0000000..e97c369
--- /dev/null
+++ b/Plugins/to_handle_tab_pannzer.py
@@ -0,0 +1,16 @@
+#tab_pannzer2handle.py
+
+import pandas as pd
+from Plugins.__plugin__ import __Plugin__
+
+class Plugin(__Plugin__):
+
+ def process(self, file_path):
+ with open(file_path) as handle:
+ anno = pd.read_csv(handle, sep="\t")
+
+ anno.sort_values(by=["qpid", "type"], inplace=True)
+ anno.drop(["score", "PPV"], axis=1, inplace=True)
+ anno.set_index(["qpid", "type", "id", "desc"], inplace=True)
+
+ return anno
\ No newline at end of file
From a34b6ed0791ef232f9884719a2663e8d79e51ad0 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:25:59 +0200
Subject: [PATCH 13/22] verifier plugin creation
---
Plugins/__verify__.py | 19 +++++++++++++++++
Plugins/verify_gff_maker_CDS.py | 37 +++++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+)
create mode 100644 Plugins/__verify__.py
create mode 100644 Plugins/verify_gff_maker_CDS.py
diff --git a/Plugins/__verify__.py b/Plugins/__verify__.py
new file mode 100644
index 0000000..e0f2c02
--- /dev/null
+++ b/Plugins/__verify__.py
@@ -0,0 +1,19 @@
+#__verify__.py
+
+from Plugins.__plugin__ import __Plugin__, RequiredMetadataError, UndefinedMethodError
+
+class __Verify__(__Plugin__):
+ pass
+
+class FailedVerification(Exception):
+ def __init__(self, *args):
+ if args:
+ self.message = args[0]
+ else:
+ self.message = None
+
+ def __str__(self):
+ if self.message:
+ return f"FailedVerification: {self.message}"
+ else:
+ return "FailedVerification has been raised"
\ No newline at end of file
diff --git a/Plugins/verify_gff_maker_CDS.py b/Plugins/verify_gff_maker_CDS.py
new file mode 100644
index 0000000..a9bf4aa
--- /dev/null
+++ b/Plugins/verify_gff_maker_CDS.py
@@ -0,0 +1,37 @@
+#verify_gff_maker_gene.py
+
+from Plugins.__verify__ import __Verify__, FailedVerification
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin(__Verify__):
+
+ def process(self, app, element_to_verify):
+ conversion_map = {"A":"T", "T":"A", "C":"G", "G":"C"}
+ convert = lambda array : "".join([conversion_map[element] for element in array[::-1]])
+
+ start_codon = None
+ stop_codon = None
+
+ feature_location = element_to_verify.location
+ if(isinstance(feature_location, FeatureLocation)):
+ if(feature_location.strand == 1):
+ start_codon = app.current_sequence[feature_location._start:feature_location._start+3]
+ stop_codon = app.current_sequence[feature_location._end-3:feature_location._end]
+ else:
+ start_codon = convert(app.current_sequence[feature_location._end-3:feature_location._end])
+ stop_codon = convert(app.current_sequence[feature_location._start:feature_location._start+3])
+ elif(isinstance(feature_location, CompoundLocation)):
+ if(feature_location.strand == 1):
+ start_codon = app.current_sequence[feature_location.parts[0]._start:feature_location.parts[0]._start+3]
+ stop_codon = app.current_sequence[feature_location.parts[-1]._end-3:feature_location.parts[-1]._end]
+ else:
+ start_codon = convert(app.current_sequence[feature_location.parts[-1]._end-3:feature_location.parts[-1]._end])
+ stop_codon = convert(app.current_sequence[feature_location.parts[0]._start:feature_location.parts[0]._start+3])
+
+ if start_codon not in ["ATG"]:
+ raise FailedVerification(f"invalid start codon: {start_codon}")
+
+ if stop_codon not in ["TGA", "TAG", "TAA"]:
+ raise FailedVerification(f"invalid stop codon: {stop_codon}")
+
+ return None
\ No newline at end of file
From 2f085e1370f5122f587cca99c9ca8f2d93c2792d Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:27:01 +0200
Subject: [PATCH 14/22] root superclasses and package description
---
Plugins/__caller__.py | 62 +++++++++++++++++++++++++++++++++++++++++++
Plugins/__init__.py | 21 +++++++++++----
Plugins/__plugin__.py | 38 ++++++++++++++++++++++++++
3 files changed, 116 insertions(+), 5 deletions(-)
create mode 100644 Plugins/__caller__.py
create mode 100644 Plugins/__plugin__.py
diff --git a/Plugins/__caller__.py b/Plugins/__caller__.py
new file mode 100644
index 0000000..c6a3a5a
--- /dev/null
+++ b/Plugins/__caller__.py
@@ -0,0 +1,62 @@
+#__caller__.py
+
+"""
+"""
+from Plugins.__verify__ import FailedVerification
+
+
+class Caller:
+ """
+ """
+ def __init__(self, app):
+ self.app = app
+ self.status = True
+ self.last_result = None
+
+ """
+ """
+ def run(self, callback_function, *args):
+ if args[1] not in ["default", "verify", "bypass"]:
+ raise UnknownCallerModeError(f"{args[1]} mode is not defined.")
+
+ if self.status and args[1] in ["default"]:
+ self.last_result = callback_function(*args)
+ return self.last_result
+
+ elif self.status and args[1] in ["verify"]:
+ try:
+ callback_function(self.app, self.last_result)
+ except FailedVerification:
+ self.status = False
+ raise CallerFailedVerification()
+ return None
+
+ elif not self.status and args[1] in ["bypass"]:
+ self.last_result = callback_function(*args)
+ return self.last_result
+
+class UnknownCallerModeError(Exception):
+ def __init__(self, *args):
+ if args:
+ self.message = args[0]
+ else:
+ self.message = None
+
+ def __str__(self):
+ if self.message:
+ return f"UnknownCallerModeError: {self.message}"
+ else:
+ return "UnknownCallerModeError has been raised"
+
+class CallerFailedVerification(Exception):
+ def __init__(self, *args):
+ if args:
+ self.message = args[0]
+ else:
+ self.message = None
+
+ def __str__(self):
+ if self.message:
+ return f"CallerFailedVerification: {self.message}"
+ else:
+ return "CallerFailedVerification has been raised"
\ No newline at end of file
diff --git a/Plugins/__init__.py b/Plugins/__init__.py
index 583ac6d..ff585cf 100644
--- a/Plugins/__init__.py
+++ b/Plugins/__init__.py
@@ -1,17 +1,28 @@
#__init.py__
-from Plugins.fasta2handle import Plugin
-from Plugins.gff_maker2handle import Plugin
-from Plugins.tab_pannzer2handle import Plugin
+from Plugins.__caller__ import Caller, UnknownCallerModeError, CallerFailedVerification
+
+from Plugins.__plugin__ import __Plugin__, RequiredMetadataError, UndefinedMethodError
+from Plugins.__read__ import __Read__
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
+from Plugins.__read_tab_pannzer__ import __ReadTabPannzer__
+from Plugins.__verify__ import __Verify__, FailedVerification
from Plugins.read_fasta import Plugin
-from Plugins.read_gff_maker_gene import Plugin
from Plugins.read_gff_maker_3UTR import Plugin
from Plugins.read_gff_maker_5UTR import Plugin
from Plugins.read_gff_maker_CDS import Plugin
from Plugins.read_gff_maker_exon import Plugin
+from Plugins.read_gff_maker_gene import Plugin
from Plugins.read_gff_maker_mRNA import Plugin
+from Plugins.read_gff_maker_source import Plugin
from Plugins.read_tab_pannzer_CDS import Plugin
-from Plugins.read_tab_pannzer_gene import Plugin
\ No newline at end of file
+from Plugins.read_tab_pannzer_gene import Plugin
+
+from Plugins.to_handle_fasta import Plugin
+from Plugins.to_handle_gff_maker import Plugin
+from Plugins.to_handle_tab_pannzer import Plugin
+
+from Plugins.verify_gff_maker_CDS import Plugin
\ No newline at end of file
diff --git a/Plugins/__plugin__.py b/Plugins/__plugin__.py
new file mode 100644
index 0000000..9e55c96
--- /dev/null
+++ b/Plugins/__plugin__.py
@@ -0,0 +1,38 @@
+#__plugin__.py
+
+class __Plugin__:
+ def process(*args):
+ raise UndefinedMethodError("process has not been defined")
+
+ def required_metadata_check(*args):
+ return True
+
+"""
+"""
+class RequiredMetadataError(Exception):
+ def __init__(self, *args):
+ if args:
+ self.message = args[0]
+ else:
+ self.message = None
+
+ def __str__(self):
+ if self.message:
+ return f"RequiredMetadataError: {self.message}"
+ else:
+ return "RequiredMetadataError has been raised"
+
+"""
+"""
+class UndefinedMethodError(Exception):
+ def __init__(self, *args):
+ if args:
+ self.message = args[0]
+ else:
+ self.message = None
+
+ def __str__(self):
+ if self.message:
+ return f"UndefinedMethodError: {self.message}"
+ else:
+ return "UndefinedMethodError has been raised"
\ No newline at end of file
From ec0bbf41ade13e22067879db65fb9fa0cd1e3501 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:27:32 +0200
Subject: [PATCH 15/22] renamed as to_handle_fasta
---
Plugins/fasta2handle.py | 9 ---------
1 file changed, 9 deletions(-)
delete mode 100644 Plugins/fasta2handle.py
diff --git a/Plugins/fasta2handle.py b/Plugins/fasta2handle.py
deleted file mode 100644
index b9e319b..0000000
--- a/Plugins/fasta2handle.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#fasta2handle.py
-
-from Bio import SeqIO
-
-class Plugin:
-
- def process(self, file_path):
- with open(file_path) as handle:
- return list(SeqIO.parse(handle, "fasta"))
\ No newline at end of file
From 2239814117782d0b199f41fd265c9ff62c9b7191 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:27:51 +0200
Subject: [PATCH 16/22] renamed as to_handle_gff_maker
---
Plugins/gff_maker2handle.py | 16 ----------------
1 file changed, 16 deletions(-)
delete mode 100644 Plugins/gff_maker2handle.py
diff --git a/Plugins/gff_maker2handle.py b/Plugins/gff_maker2handle.py
deleted file mode 100644
index 39124c2..0000000
--- a/Plugins/gff_maker2handle.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#gff_maker2handle.py
-
-import pandas as pd
-import re
-
-class Plugin:
-
- def process(self, file_path):
- with open(file_path) as handle:
- gff = pd.read_csv(handle, sep="\t")
- gff = gff.reset_index()
- gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
- gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
- gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
- return gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"])
-
\ No newline at end of file
From 63dfc33705cd25997a30979425f657c319447f80 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:28:14 +0200
Subject: [PATCH 17/22] renamed as to_handle_tab_pannzer
---
Plugins/tab_pannzer2handle.py | 11 -----------
1 file changed, 11 deletions(-)
delete mode 100644 Plugins/tab_pannzer2handle.py
diff --git a/Plugins/tab_pannzer2handle.py b/Plugins/tab_pannzer2handle.py
deleted file mode 100644
index ac7c532..0000000
--- a/Plugins/tab_pannzer2handle.py
+++ /dev/null
@@ -1,11 +0,0 @@
-#tab_pannzer2handle.py
-
-import pandas as pd
-
-class Plugin:
-
- def process(self, file_path):
- with open(file_path) as handle:
- anno = pd.read_csv(handle, sep="\t")
- anno = anno.sort_values(by=["qpid", "type"]).drop(["score", "PPV"], axis=1).set_index(["qpid", "type", "id", "desc"])
- return anno
\ No newline at end of file
From a4a1ba885b7e324d063db702f8c8cdf36c81b6d3 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:29:03 +0200
Subject: [PATCH 18/22] Delete files directory
---
files/config.info | 49 -----------------------------------------------
1 file changed, 49 deletions(-)
delete mode 100644 files/config.info
diff --git a/files/config.info b/files/config.info
deleted file mode 100644
index 1aa75e3..0000000
--- a/files/config.info
+++ /dev/null
@@ -1,49 +0,0 @@
-# Axel Giottonini
-# test config.info
-# 14.05.2021
-
-
-project:temp
-division:inv
-taxonomy:29031
-organism:Phlebotomus papatasi
-molecule_type:genomic DNA
-topology:linear
-description:empty
-transl_table:0
-
-
-
-fasta2handle:.fasta2handle,Plugins
-gff_maker2handle:.gff_maker2handle,Plugins
-tab_pannzer2handle:.tab_pannzer2handle,Plugins
-read_fasta:.read_fasta,Plugins
-read_gff_maker_3UTR:.read_gff_maker_3UTR,Plugins
-read_gff_maker_5UTR:.read_gff_maker_5UTR,Plugins
-read_gff_maker_CDS:.read_gff_maker_CDS,Plugins
-read_gff_maker_exon:.read_gff_maker_exon,Plugins
-read_gff_maker_gene:.read_gff_maker_gene,Plugins
-read_gff_maker_main:.read_gff_maker_main,Plugins
-read_gff_maker_mRNA:.read_gff_maker_mRNA,Plugins
-read_tab_pannzer_CDS:.read_tab_pannzer_CDS,Plugins
-read_tab_pannzer_gene:.read_tab_pannzer_gene,Plugins
-
-
-
-fasta:fasta2handle,files/sequences.fasta
-gff_maker:gff_maker2handle,files/data.gff
-tab_pannzer:tab_pannzer2handle,files/anno.out
-
-
-
--read_fasta,fasta
---read_gff_maker_main,gff_maker
----read_gff_maker_gene,gff_maker
-----read_tab_pannzer_gene,tab_pannzer
----read_gff_maker_mRNA,gff_maker
----read_gff_maker_CDS,gff_maker
-----read_tab_pannzer_CDS,tab_pannzer
----read_gff_maker_3UTR,gff_maker
----read_gff_maker_5UTR,gff_maker
---read_gff_maker_exon,gff_maker
-
From e599c67d3c9950aae4d4631a0a96c557a75cd197 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:29:53 +0200
Subject: [PATCH 19/22] core upload
handle config file as an argument
---
core.py | 157 ++++++++++++++++++++++++++++++++++++++++++++++++--------
main.py | 14 +++--
2 files changed, 146 insertions(+), 25 deletions(-)
diff --git a/core.py b/core.py
index 3bbc502..3fed957 100644
--- a/core.py
+++ b/core.py
@@ -3,13 +3,27 @@
import importlib
import re
-class app:
+"""
+"""
+class App:
+ """
+ Description:
+
+ Arguments:
+
+ Output:
+
+ Note:
+ """
def __init__(self, config_path):
+
self.metadata = None
self.plugins = None
self.handles = None
self.workflow = None
+ self.current_sequence = None
+
config = self.read_config(config_path)
self.set_metadata(config["metadata"])
@@ -17,16 +31,16 @@ def __init__(self, config_path):
self.set_handles(config["handles"])
self.set_workflow(config["workflow"])
+ self.all_plugins_required_metadata_check()
+
"""
- This function reads the config file which is divided into four fields :
- - metadata: contains the general and shared informations for EMBLish
- as a couple key:value
- - plugins: contains the list of the plugins that will be used as a
- triplet plugin_key:plugin_name,plugin_package
- - handles: contains the list of files that will be used as inputs as
- a triplet handle_key:plugin,file_path
- - workflow: contains a hierarchical list of the different step to run
- as a couple plugin_key,handle_key
+ Description:
+
+ Arguments:
+
+ Output:
+
+ Note:
"""
def read_config(self, config_path):
config = {
@@ -67,26 +81,68 @@ def read_config(self, config_path):
return config
"""
- This function converts the array containing the metadata into a dictionnary
+ Description:
+ - converts the array containing the metadata into a dictionnary by splitting
+ each string containing the metadata key and metadata value into an item with
+ the key and value.
+ Arguments:
+ - array: list of strings
+ Output:
+ - dictionnary of strings
+ Note:
"""
- def set_metadata(self, array):
- self.metadata = {element.split(":")[0]:element.split(":")[1] for element in array}
+ def set_metadata(self, array:list):
+
+ def convert(value):
+ temp = value.split(",")
+ if len(temp) > 1:
+ if temp[1] == "int":
+ return int(temp[0])
+ return value
+
+ self.metadata = {element.split(":")[0]:convert(element.split(":")[1]) for element in array}
"""
- This function converts the array containing the plugins parameters into a dictionnary
- with plugins to call with their key
+ Description:
+ - converts the array containing the plugins parameters (name, package) into a
+ dictionnary by splitting each string containing the plugin key, the plugin name
+ and the plugin package into an item with the key and the callable plugin.
+ Store the result in the self.metadata variable.
+ Arguments:
+ - array: list of strings
+ Output:
+ - dictionnary of plugin objects
+ Note:
"""
- def set_plugins(self, array):
+ def set_plugins(self, array:list):
self.plugins = {element.split(":")[0]:importlib.import_module(element.split(":")[1].split(",")[0],element.split(":")[1].split(",")[1]).Plugin() for element in array}
"""
- This function converts the array containing the handles parameters into a dictionnary
- with handles to call with their key
+ Description:
+ - converts the array containing the handles parameters into a dictionnary by
+ splitting each string containing the handle key, the handle converter and the
+ file path into an item with the key and the converted as a data frame handle.
+ Store the result in the self.plugin variable.
+ Arguments:
+ - array: list of strings
+ Output:
+ - dictionnary of data frames
+ Note:
"""
def set_handles(self, array):
self.handles = {element.split(":")[0]:self.plugins[element.split(":")[1].split(",")[0]].process(element.split(":")[1].split(",")[1]) for element in array}
"""
+ Description:
+ - converts the array containing the workflow into a recursive automaton where
+ task are described by a tuple containing the required plugin, the handle where
+ the data is found and a list of elements to call.
+ Store the result in the self.handles variable.
+ Arguments:
+ - array: list of strings
+ Output (assigned):
+ - array: list of tuples (recursive)
+ Note:
"""
def set_workflow(self, array):
temp = self.refactor_workflow(array)
@@ -94,7 +150,14 @@ def set_workflow(self, array):
self.workflow = self.convert_workflow_task(temp[0])
"""
- Convert the list element in triplet level,,[]
+ Description:
+ - converts strings into a tuple containing the level of the task, the rest of the
+ string and an empty array.
+ Arguments:
+ - array list of strings
+ Output:
+ - array: list of tuples
+ Note:
"""
def refactor_workflow(self, array):
@@ -107,7 +170,13 @@ def refactor_workflow(self, array):
return array
"""
- Order the elements and create the hierarchical nodes
+ Description:
+ - place the tasks in their parent (level-1) tasks array
+ Arguments:
+ - array: list of tuples
+ Output:
+ - array: list of tuples (recursive)
+ Note:
"""
def merge_workflow(self, array):
array.insert(0, (0,None,[]))
@@ -133,12 +202,22 @@ def merge_workflow(self, array):
return array
"""
+ Description:
+ - recursively converts the tuples containing the task string into a tuple containing
+ a reference to the application, the plugin key, the handle key and an array of
+ subtasks.
+ Arguments:
+ - array: list of tuples (recursive)
+ Output:
+ - array: list of tuples (recusrive)
+ Note:
"""
def convert_workflow_task(self, task):
if task[1]:
return (
self,
task[1].split(",")[0],
+ task[1].split(",")[2] if len(task[1].split(",")) > 2 else "default",
task[1].split(",")[1],
[self.convert_workflow_task(sub_task) for sub_task in task[2]]
)
@@ -146,7 +225,43 @@ def convert_workflow_task(self, task):
return [self.convert_workflow_task(sub_task) for sub_task in task[2]]
"""
+ Description:
+
+ Arguments:
+
+ Output:
+
+ Note:
+ """
+ def all_plugins_required_metadata_check(self):
+ for key, plugin in self.plugins.items():
+ if not plugin.required_metadata_check(self):
+ raise InvalidConfigurationError(f"{key} plugin could not find required metadata")
+
+ """
+ Description:
+
+ Arguments:
+
+ Output:
+
+ Note:
"""
def run(self):
for app, key_plugin, *args in self.workflow:
- app.plugins[key_plugin].process(app, *args)
\ No newline at end of file
+ app.plugins[key_plugin].process(app, *args)
+
+"""
+"""
+class InvalidConfigurationError(Exception):
+ def __init__(self, *args):
+ if args:
+ self.message = args[0]
+ else:
+ self.message = None
+
+ def __str__(self):
+ if self.message:
+ return f"InvalidConfigurationError, {self.message}"
+ else:
+ return "InvalidConfigurationError has been raised"
\ No newline at end of file
diff --git a/main.py b/main.py
index 1331b9a..875477b 100644
--- a/main.py
+++ b/main.py
@@ -1,8 +1,14 @@
#main.py
-from core import app
+from core import App
+import sys
-if __name__ == "__main__":
+def main():
+ args = sys.argv[1:]
+
+ config_file = args[0]
- app = app("files/config.info")
- app.run()
\ No newline at end of file
+ App(config_file).run()
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
From c418d55a43df71333a5ba2295d3d00fd17fbaa45 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:33:12 +0200
Subject: [PATCH 20/22] Update README.md
---
README.md | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 55 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 40f2b8e..734ccf6 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,58 @@
# GFF3-Tabular_to_EMBL
-## Status of the project
+```
+# Axel Giottonini
+# test config.info
+# 19.05.2021
-## Upcoming features
+
+project:PRJEB1234
+division:inv
+taxonomy:29031
+organism:Phlebotomus papatasi
+molecule_type:genomic DNA
+topology:linear
+description:empty
+transl_table:1,int
+
+
+
+read_fasta:.read_fasta,Plugins
+read_gff_maker_3UTR:.read_gff_maker_3UTR,Plugins
+read_gff_maker_5UTR:.read_gff_maker_5UTR,Plugins
+read_gff_maker_CDS:.read_gff_maker_CDS,Plugins
+read_gff_maker_exon:.read_gff_maker_exon,Plugins
+read_gff_maker_gene:.read_gff_maker_gene,Plugins
+read_gff_maker_misc_feature:.read_gff_maker_misc_feature,Plugins
+read_gff_maker_mRNA:.read_gff_maker_mRNA,Plugins
+read_gff_maker_source:.read_gff_maker_source,Plugins
+read_tab_pannzer_CDS:.read_tab_pannzer_CDS,Plugins
+read_tab_pannzer_gene:.read_tab_pannzer_gene,Plugins
+to_handle_fasta:.to_handle_fasta,Plugins
+to_handle_gff_maker:.to_handle_gff_maker,Plugins
+to_handle_tab_pannzer:.to_handle_tab_pannzer,Plugins
+verify_gff_maker_CDS:.verify_gff_maker_CDS,Plugins
+
+
+
+fasta:to_handle_fasta,sequences.fasta
+gff_maker:to_handle_gff_maker,data.gff
+tab_pannzer:to_handle_tab_pannzer,anno.out
+
+
+
+-read_fasta,fasta
+--read_gff_maker_source,gff_maker
+---read_gff_maker_gene,gff_maker
+----read_tab_pannzer_gene,tab_pannzer
+---read_gff_maker_mRNA,gff_maker
+---read_gff_maker_CDS,gff_maker
+----read_tab_pannzer_CDS,tab_pannzer
+---verify_gff_maker_CDS,NF,verify
+---read_gff_maker_misc_feature,gff_maker,bypass
+---read_gff_maker_3UTR,gff_maker
+---read_gff_maker_5UTR,gff_maker
+--read_gff_maker_exon,gff_maker
+
+
+```
From 12795dd374a55eb8e771fad6ad41fb9b816e3635 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:34:00 +0200
Subject: [PATCH 21/22] Delete Pipfile
---
Pipfile | 15 ---------------
1 file changed, 15 deletions(-)
delete mode 100644 Pipfile
diff --git a/Pipfile b/Pipfile
deleted file mode 100644
index c029e52..0000000
--- a/Pipfile
+++ /dev/null
@@ -1,15 +0,0 @@
-[[source]]
-url = "https://pypi.org/simple"
-verify_ssl = true
-name = "pypi"
-
-[packages]
-biopython = "*"
-tqdm = "*"
-pandas = "*"
-joblib = "*"
-
-[dev-packages]
-
-[requires]
-python_version = "3.9"
From 6429b39fceecbd5a635bd89928b2219c5cb805ca Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:34:07 +0200
Subject: [PATCH 22/22] Delete Pipfile.lock
---
Pipfile.lock | 134 ---------------------------------------------------
1 file changed, 134 deletions(-)
delete mode 100644 Pipfile.lock
diff --git a/Pipfile.lock b/Pipfile.lock
deleted file mode 100644
index 43ebfb6..0000000
--- a/Pipfile.lock
+++ /dev/null
@@ -1,134 +0,0 @@
-{
- "_meta": {
- "hash": {
- "sha256": "bc0663e00dfee42678acee85bb844332840e78c75b12bf90e09c3210744e343b"
- },
- "pipfile-spec": 6,
- "requires": {
- "python_version": "3.9"
- },
- "sources": [
- {
- "name": "pypi",
- "url": "https://pypi.org/simple",
- "verify_ssl": true
- }
- ]
- },
- "default": {
- "biopython": {
- "hashes": [
- "sha256:010142a8ec2549ff0649edd497658964ef1a18eefdb9fd942ec1e81b292ce2d9",
- "sha256:0b9fbb0d3022dc22716da108b8a81b80d952cd97ac1f106de491dce850f92f62",
- "sha256:0df5cddef2819c975e6508adf5d85aa046e449df5420d02b04871c7836b41273",
- "sha256:194528eda6856a4c68f840ca0bcc9b544a5edee3548b97521084e7ac38c833ca",
- "sha256:195f099c2c0c39518b6df921ab2b3cc43a601896018fc61909ac8385d5878866",
- "sha256:1ee0a0b6c2376680fea6642d5080baa419fd73df104a62d58a8baf7a8bbe4564",
- "sha256:2bd5a630be2a8e593094f7b1717fc962eda8931b68542b97fbf9bd8e2ac1e08d",
- "sha256:4565c97fab16c5697d067b821b6a1da0ec3ef36a9c96cf103ac7b4a94eb9f9ba",
- "sha256:48d424453a5512a1d1d41a4acabdfe5291da1f491a2d3606f2b0e4fbd63aeda6",
- "sha256:5c0b369f91a76b8e5e36624d075585c3f0f088ea4a6e3d015c48f08e48ce0114",
- "sha256:75b55000793f6b76334b8e80dc7e6d8cd2b019af917aa431cea6646e8e696c7f",
- "sha256:ada611f12ee3b0bef7308ef41ee7b94898613b369ab44e0268d74bd1d6a06920",
- "sha256:cc3b0b78022d14f11d508038a288a189d03c97c476d6636c7b6f98bd8bc8462b",
- "sha256:e0af107cc62a905d13d35dd7b38f335a37752ede45e4617139e84409a6a88dc4",
- "sha256:f1076653937947773768455556b1d24acad9575759e9089082f32636b09add54",
- "sha256:f5021a398c898b9cf6815cc5171c146a601b935b55364c53e6516a2545ab740c"
- ],
- "index": "pypi",
- "version": "==1.78"
- },
- "joblib": {
- "hashes": [
- "sha256:9c17567692206d2f3fb9ecf5e991084254fe631665c450b443761c4186a613f7",
- "sha256:feeb1ec69c4d45129954f1b7034954241eedfd6ba39b5e9e4b6883be3332d5e5"
- ],
- "index": "pypi",
- "version": "==1.0.1"
- },
- "numpy": {
- "hashes": [
- "sha256:2428b109306075d89d21135bdd6b785f132a1f5a3260c371cee1fae427e12727",
- "sha256:377751954da04d4a6950191b20539066b4e19e3b559d4695399c5e8e3e683bf6",
- "sha256:4703b9e937df83f5b6b7447ca5912b5f5f297aba45f91dbbbc63ff9278c7aa98",
- "sha256:471c0571d0895c68da309dacee4e95a0811d0a9f9f532a48dc1bea5f3b7ad2b7",
- "sha256:61d5b4cf73622e4d0c6b83408a16631b670fc045afd6540679aa35591a17fe6d",
- "sha256:6c915ee7dba1071554e70a3664a839fbc033e1d6528199d4621eeaaa5487ccd2",
- "sha256:6e51e417d9ae2e7848314994e6fc3832c9d426abce9328cf7571eefceb43e6c9",
- "sha256:719656636c48be22c23641859ff2419b27b6bdf844b36a2447cb39caceb00935",
- "sha256:780ae5284cb770ade51d4b4a7dce4faa554eb1d88a56d0e8b9f35fca9b0270ff",
- "sha256:878922bf5ad7550aa044aa9301d417e2d3ae50f0f577de92051d739ac6096cee",
- "sha256:924dc3f83de20437de95a73516f36e09918e9c9c18d5eac520062c49191025fb",
- "sha256:97ce8b8ace7d3b9288d88177e66ee75480fb79b9cf745e91ecfe65d91a856042",
- "sha256:9c0fab855ae790ca74b27e55240fe4f2a36a364a3f1ebcfd1fb5ac4088f1cec3",
- "sha256:9cab23439eb1ebfed1aaec9cd42b7dc50fc96d5cd3147da348d9161f0501ada5",
- "sha256:a8e6859913ec8eeef3dbe9aed3bf475347642d1cdd6217c30f28dee8903528e6",
- "sha256:aa046527c04688af680217fffac61eec2350ef3f3d7320c07fd33f5c6e7b4d5f",
- "sha256:abc81829c4039e7e4c30f7897938fa5d4916a09c2c7eb9b244b7a35ddc9656f4",
- "sha256:bad70051de2c50b1a6259a6df1daaafe8c480ca98132da98976d8591c412e737",
- "sha256:c73a7975d77f15f7f68dacfb2bca3d3f479f158313642e8ea9058eea06637931",
- "sha256:d15007f857d6995db15195217afdbddfcd203dfaa0ba6878a2f580eaf810ecd6",
- "sha256:d76061ae5cab49b83a8cf3feacefc2053fac672728802ac137dd8c4123397677",
- "sha256:e8e4fbbb7e7634f263c5b0150a629342cc19b47c5eba8d1cd4363ab3455ab576",
- "sha256:e9459f40244bb02b2f14f6af0cd0732791d72232bbb0dc4bab57ef88e75f6935",
- "sha256:edb1f041a9146dcf02cd7df7187db46ab524b9af2515f392f337c7cbbf5b52cd"
- ],
- "markers": "python_version >= '3.7'",
- "version": "==1.20.2"
- },
- "pandas": {
- "hashes": [
- "sha256:09761bf5f8c741d47d4b8b9073288de1be39bbfccc281d70b889ade12b2aad29",
- "sha256:0f27fd1adfa256388dc34895ca5437eaf254832223812afd817a6f73127f969c",
- "sha256:43e00770552595c2250d8d712ec8b6e08ca73089ac823122344f023efa4abea3",
- "sha256:46fc671c542a8392a4f4c13edc8527e3a10f6cb62912d856f82248feb747f06e",
- "sha256:475b7772b6e18a93a43ea83517932deff33954a10d4fbae18d0c1aba4182310f",
- "sha256:4d821b9b911fc1b7d428978d04ace33f0af32bb7549525c8a7b08444bce46b74",
- "sha256:5e3c8c60541396110586bcbe6eccdc335a38e7de8c217060edaf4722260b158f",
- "sha256:621c044a1b5e535cf7dcb3ab39fca6f867095c3ef223a524f18f60c7fee028ea",
- "sha256:72ffcea00ae8ffcdbdefff800284311e155fbb5ed6758f1a6110fc1f8f8f0c1c",
- "sha256:8a051e957c5206f722e83f295f95a2cf053e890f9a1fba0065780a8c2d045f5d",
- "sha256:97b1954533b2a74c7e20d1342c4f01311d3203b48f2ebf651891e6a6eaf01104",
- "sha256:9f5829e64507ad10e2561b60baf285c470f3c4454b007c860e77849b88865ae7",
- "sha256:a93e34f10f67d81de706ce00bf8bb3798403cabce4ccb2de10c61b5ae8786ab5",
- "sha256:d59842a5aa89ca03c2099312163ffdd06f56486050e641a45d926a072f04d994",
- "sha256:dbb255975eb94143f2e6ec7dadda671d25147939047839cd6b8a4aff0379bb9b",
- "sha256:df6f10b85aef7a5bb25259ad651ad1cc1d6bb09000595cab47e718cbac250b1d"
- ],
- "index": "pypi",
- "version": "==1.2.3"
- },
- "python-dateutil": {
- "hashes": [
- "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
- "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==2.8.1"
- },
- "pytz": {
- "hashes": [
- "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da",
- "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"
- ],
- "version": "==2021.1"
- },
- "six": {
- "hashes": [
- "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
- "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
- ],
- "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
- "version": "==1.15.0"
- },
- "tqdm": {
- "hashes": [
- "sha256:9fdf349068d047d4cfbe24862c425883af1db29bcddf4b0eeb2524f6fbdb23c7",
- "sha256:d666ae29164da3e517fcf125e41d4fe96e5bb375cd87ff9763f6b38b5592fe33"
- ],
- "index": "pypi",
- "version": "==4.59.0"
- }
- },
- "develop": {}
-}