From ac5c1676ca885fb2ef63595ad84d51121ed54d84 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 13 May 2021 17:37:18 +0200
Subject: [PATCH 01/22] plugin version first files

---
 Plugins/__init__.py              |  14 ++
 Plugins/read_fasta.py            |  34 +++++
 Plugins/read_gff_maker_3UTR.py   |  27 ++++
 Plugins/read_gff_maker_5UTR.py   |  27 ++++
 Plugins/read_gff_maker_CDS.py    |  50 +++++++
 Plugins/read_gff_maker_exon.py   |   1 +
 Plugins/read_gff_maker_gene.py   |  31 ++++
 Plugins/read_gff_maker_mRNA.py   |  27 ++++
 Plugins/read_gff_maker_main.py   |  23 +++
 Plugins/read_gff_maker_source.py |   1 +
 Plugins/read_tab_pannzer_CDS.py  |  48 +++++++
 Plugins/read_tab_pannzer_gene.py |  28 ++++
 core.py                          |  13 ++
 main.py                          | 236 +++++++++++--------------------
 14 files changed, 407 insertions(+), 153 deletions(-)
 create mode 100644 Plugins/__init__.py
 create mode 100644 Plugins/read_fasta.py
 create mode 100644 Plugins/read_gff_maker_3UTR.py
 create mode 100644 Plugins/read_gff_maker_5UTR.py
 create mode 100644 Plugins/read_gff_maker_CDS.py
 create mode 100644 Plugins/read_gff_maker_exon.py
 create mode 100644 Plugins/read_gff_maker_gene.py
 create mode 100644 Plugins/read_gff_maker_mRNA.py
 create mode 100644 Plugins/read_gff_maker_main.py
 create mode 100644 Plugins/read_gff_maker_source.py
 create mode 100644 Plugins/read_tab_pannzer_CDS.py
 create mode 100644 Plugins/read_tab_pannzer_gene.py
 create mode 100644 core.py

diff --git a/Plugins/__init__.py b/Plugins/__init__.py
new file mode 100644
index 0000000..6fc65d5
--- /dev/null
+++ b/Plugins/__init__.py
@@ -0,0 +1,14 @@
+#__init.py__
+
+from Plugins.read_fasta import Plugin
+
+from Plugins.read_gff_maker_gene import Plugin
+from Plugins.read_gff_maker_3UTR import Plugin
+from Plugins.read_gff_maker_5UTR import Plugin
+from Plugins.read_gff_maker_CDS import Plugin
+from Plugins.read_gff_maker_exon import Plugin
+from Plugins.read_gff_maker_mRNA import Plugin
+from Plugins.read_gff_maker_source import Plugin
+
+from Plugins.read_tab_pannzer_CDS import Plugin
+from Plugins.read_tab_pannzer_gene import Plugin
\ No newline at end of file
diff --git a/Plugins/read_fasta.py b/Plugins/read_fasta.py
new file mode 100644
index 0000000..a6dade6
--- /dev/null
+++ b/Plugins/read_fasta.py
@@ -0,0 +1,34 @@
+#plugin.py
+
+import importlib
+import itertools
+
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord 
+
+class Plugin:
+
+    def process(self, handle, metadata, calls:list=[], target=None):
+
+            for record in handle:
+
+                #initialize record
+                _record_ = SeqRecord(
+                    record.seq,
+                    record.id,
+                    dbxrefs=["Project:" + metadata["project"]],
+                    annotations={"division":metadata["division"],"molecule_type":metadata["molecule_type"],"organism":metadata["organism"],"taxonomy":metadata["taxonomy"],"topology":metadata["topology"]},
+                    description=""
+                )
+
+                #calls
+                receiver = []
+                for call,*args in calls:
+                    receiver.extend(call.process(*args, target=_record_.id))
+        
+                #post output treatment
+                _record_.features = list(itertools.chain(*receiver))
+
+                #outputing
+                with open(f"out/{_record_.id}.dat", "w") as o:
+                    print(_record_.format("embl"), file=o)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_3UTR.py b/Plugins/read_gff_maker_3UTR.py
new file mode 100644
index 0000000..dabca35
--- /dev/null
+++ b/Plugins/read_gff_maker_3UTR.py
@@ -0,0 +1,27 @@
+#read_gff_maker_3UTR.py
+
+import pandas as pd
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin:
+
+    def process(self, handle, metadata, calls:list=[], target=None):
+        try:
+            location = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "three_prime_UTR"),:].reset_index())
+
+            _sub_features_ = [
+                SeqFeature(
+                    FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
+                    type="3'UTR",
+                    qualifiers={
+                        "gene":target[1],
+                        "note":list()})]
+                
+            #calls
+            receiver = []
+            for call,*args in calls:
+                receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "3'UTR")))
+            
+            return _sub_features_
+        except KeyError:
+            return []
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_5UTR.py b/Plugins/read_gff_maker_5UTR.py
new file mode 100644
index 0000000..5b72281
--- /dev/null
+++ b/Plugins/read_gff_maker_5UTR.py
@@ -0,0 +1,27 @@
+#read_gff_maker_5UTR.py
+
+import pandas as pd
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin:
+
+    def process(self, handle, metadata, calls:list=[], target=None):
+        try:
+            location = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "five_prime_UTR"),:].reset_index())
+
+            _sub_features_ = [
+                SeqFeature(
+                    FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
+                    type="5'UTR",
+                    qualifiers={
+                        "gene":target[1],
+                        "note":list()})]
+                
+            #calls
+            receiver = []
+            for call,*args in calls:
+                receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "5'UTR")))
+            
+            return _sub_features_
+        except KeyError:
+            return []
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_CDS.py b/Plugins/read_gff_maker_CDS.py
new file mode 100644
index 0000000..7b29f18
--- /dev/null
+++ b/Plugins/read_gff_maker_CDS.py
@@ -0,0 +1,50 @@
+#read_gff_maker_CDS.py
+
+import pandas as pd
+import itertools
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+def mergeLocations(_locationArray_):
+    return _locationArray_[0] if len(_locationArray_) == 1 else CompoundLocation(_locationArray_)
+
+class Plugin:
+
+    def process(self, handle, metadata, calls:list=[], target=None):
+        locations = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index())
+
+        _sub_features_ = [
+            SeqFeature(
+                mergeLocations(locations.apply(lambda location: FeatureLocation(int(location[0]), int(location[1]), (1,-1)[location[2] == "-"]), axis=1)),
+                type="CDS",
+                qualifiers={
+                    "gene":target[1],
+                    "product":list(),
+                    "note":list(),
+                    "db_xref":list(),
+                    "translation":list(),
+                    "transl_table":metadata["transl_table"]})]
+
+        #calls
+        receiver = []
+        for call,*args in calls:
+            receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "CDS")))
+        
+        annotations = list(itertools.chain(receiver))
+        for annotation in annotations:
+            if "product" in annotation.keys() and annotation["product"] != []:
+                for sub_feature in _sub_features_:
+                    sub_feature.qualifiers["product"].extend(annotation["product"])
+
+            if "note"  in annotation.keys() and annotation["note"] != []:
+                for sub_feature in _sub_features_:
+                    sub_feature.qualifiers["note"].extend(annotation["note"])
+
+            if "db_xref" in annotation.keys() and annotation["db_xref"] != []:
+                for sub_feature in _sub_features_:
+                    sub_feature.qualifiers["db_xref"].extend(annotation["db_xref"])
+
+            if "translation" in annotation.keys() and annotation["translation"] != []:
+                for sub_feature in _sub_features_:
+                    sub_feature.qualifiers["translation"].extend(annotation["translation"])
+
+        return _sub_features_
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_exon.py b/Plugins/read_gff_maker_exon.py
new file mode 100644
index 0000000..711c5ca
--- /dev/null
+++ b/Plugins/read_gff_maker_exon.py
@@ -0,0 +1 @@
+#read_gff_maker_exon.py
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_gene.py b/Plugins/read_gff_maker_gene.py
new file mode 100644
index 0000000..069b66b
--- /dev/null
+++ b/Plugins/read_gff_maker_gene.py
@@ -0,0 +1,31 @@
+#read_gff_maker_gene.py
+
+import pandas as pd
+import itertools
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin:
+
+    def process(self, handle, metadata, calls:list=[], target=None):
+        location = (handle.loc[(target[0], target[1], "gene"),:].reset_index())
+
+        _sub_features_ = [
+            SeqFeature(
+                FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
+                type="gene",
+                qualifiers={
+                    "gene":target[1],
+                    "note":list()})]
+            
+        #calls
+        receiver = []
+        for call,*args in calls:
+            receiver.extend(call.process(*args, target=(target[0], target[1], "gene")))
+        
+        annotations = list(itertools.chain(receiver))
+        for annotation in annotations:
+            if "note" in annotation.keys() and annotation["note"] != []:
+                for sub_feature in _sub_features_:
+                    sub_feature.qualifiers["note"].extend(annotation["note"])
+
+        return _sub_features_
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_mRNA.py b/Plugins/read_gff_maker_mRNA.py
new file mode 100644
index 0000000..7639664
--- /dev/null
+++ b/Plugins/read_gff_maker_mRNA.py
@@ -0,0 +1,27 @@
+#read_gff_maker_mRNA.py
+
+import pandas as pd
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+def mergeLocations(_locationArray_):
+    return _locationArray_[0] if len(_locationArray_) == 1 else CompoundLocation(_locationArray_)
+
+class Plugin:
+
+    def process(self, handle, metadata, calls:list=[], target=None):
+        locations = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index())
+
+        #initialise
+        _sub_features_ = [
+            SeqFeature(
+                mergeLocations(locations.apply(lambda location: FeatureLocation(int(location[0]), int(location[1]), (1,-1)[location[2] == "-"]), axis=1)),
+                type="mRNA",
+                qualifiers={
+                    "gene":target[1]})]
+
+        #calls
+        receiver = []
+        for call,*args in calls:
+            receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "mRNA")))
+        
+        return _sub_features_
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_main.py b/Plugins/read_gff_maker_main.py
new file mode 100644
index 0000000..2043ade
--- /dev/null
+++ b/Plugins/read_gff_maker_main.py
@@ -0,0 +1,23 @@
+#read_gff_maker_main.py
+
+import pandas as pd
+import re
+
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin:
+
+    def process(self, handle, metadata, calls:list=[], target=None):
+        
+        for gene in handle.loc[(target, slice(None), "gene"),:].reset_index()["sub_seq_id"]:
+            
+            #initialize features
+            _features_subset_ = []
+               
+            #calls
+            receiver = []
+            for call,*args in calls:
+                receiver.extend(call.process(*args, target=(target, gene)))
+
+            _features_subset_ = receiver
+            yield _features_subset_
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_source.py b/Plugins/read_gff_maker_source.py
new file mode 100644
index 0000000..f8c0bce
--- /dev/null
+++ b/Plugins/read_gff_maker_source.py
@@ -0,0 +1 @@
+#read_gff_maker_source.py
\ No newline at end of file
diff --git a/Plugins/read_tab_pannzer_CDS.py b/Plugins/read_tab_pannzer_CDS.py
new file mode 100644
index 0000000..7b0fd2e
--- /dev/null
+++ b/Plugins/read_tab_pannzer_CDS.py
@@ -0,0 +1,48 @@
+#read_tab_pannzer_CDS
+
+import pandas as pd
+
+class Plugin:
+
+    def process(self, handle, metadata, calls:list=[], target=None):
+
+        #initialisation
+        try:
+            anno_bp = handle.loc[(target[1], "BP_ARGOT"),:].reset_index()["id"]
+        except KeyError:
+            anno_bp = pd.Series([])
+
+        try:
+            anno_cc = handle.loc[(target[1], "CC_ARGOT"),:].reset_index()["id"]
+        except KeyError:
+            anno_cc = pd.Series([])
+
+        try:
+            anno_mf = handle.loc[(target[1], "MF_ARGOT"),:].reset_index()["id"]
+        except KeyError:
+            anno_mf = pd.Series([])
+
+        try:
+            anno_qsec = [handle.loc[(target[1], "qseq"),:].reset_index().iloc[0,1]]
+        except KeyError:
+            anno_qsec = list()
+
+        try:
+            anno_de =  [handle.loc[(target[1], "DE"),:].reset_index().iloc[0,1]]
+        except KeyError:
+            anno_de = list()
+
+        _annotations_ = [{
+            "db_xref":[f"GO:{str(go)}" for go in pd.concat([anno_bp, anno_cc, anno_mf])],
+            "translation": anno_qsec,
+            "product": anno_de
+        }]
+
+    
+        #calls
+        receiver = []
+        for call,*args in calls:
+            receiver.extend(call.process(*args, target=target))
+
+        #output
+        return _annotations_
\ No newline at end of file
diff --git a/Plugins/read_tab_pannzer_gene.py b/Plugins/read_tab_pannzer_gene.py
new file mode 100644
index 0000000..521797e
--- /dev/null
+++ b/Plugins/read_tab_pannzer_gene.py
@@ -0,0 +1,28 @@
+#read_tab_pannzer_gene.py
+
+#read_tab_pannzer_CDS
+
+import pandas as pd
+
+class Plugin:
+
+    def process(self, handle, metadata, calls:list=[], target=None):
+
+        #initialisation
+        try:
+            anno_de =  handle.loc[(target[1], "DE"),:].reset_index().iloc[0,1]
+        except KeyError:
+            anno_de = []
+
+        _annotations_ = [{
+            "note": anno_de
+        }]
+
+    
+        #calls
+        receiver = []
+        for call,*args in calls:
+            receiver.extend(call.process(*args, target=target))
+
+        #output
+        return _annotations_
\ No newline at end of file
diff --git a/core.py b/core.py
new file mode 100644
index 0000000..03dfa18
--- /dev/null
+++ b/core.py
@@ -0,0 +1,13 @@
+#core.py
+
+import importlib
+
+class app:
+    def __init__(self, plugins:list=[]):
+        assert plugins != [], "No plugins specified"
+
+        self.plugins = plugins
+
+    def run(self):
+        for plugin,*args in self.plugins:
+            plugin.process(*args)
\ No newline at end of file
diff --git a/main.py b/main.py
index 4e941a0..479331e 100644
--- a/main.py
+++ b/main.py
@@ -1,153 +1,83 @@
-#!/usr/bin/python3
-# -*-coding:utf8
-
-from Bio import SeqIO
-from Bio.Seq import Seq
-from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
-from Bio.SeqRecord import SeqRecord 
-from joblib import Parallel, delayed
-from tqdm import tqdm
-import multiprocessing
-import os
-import pandas as pd
-import re
-import sys
-
-def read_anno(file):
-    anno = pd.read_csv(file, sep="\t")
-    anno = anno.sort_values(by=["qpid", "type"]).drop(["score", "PPV"], axis=1).set_index(["qpid", "type", "id", "desc"])
-    return anno
-
-def read_gff(file):
-    gff = pd.read_csv(file, sep="\t")
-    gff = gff.reset_index()
-    gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
-    gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
-    gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
-    gff = gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"])
-    gff["start"].apply(lambda x: x-1)
-    return gff
-
-def read_config(file):
-    config = {"description":None, "division":None, "molecule_type":None, "organism":None,  "project":None, "taxonomy":None, "topology":None, "transl_table":None}
-    for line in [line.rsplit("\n")[0] for line in file.readlines()]:
-        config[re.split(r":", line)[0].lower()]=re.split(r":", line)[1]
-    return config
-
-def mergeLocations(_locationArray_):
-    return _locationArray_[0] if len(_locationArray_) == 1 else CompoundLocation(_locationArray_)
-
-def init_features(sec, gene, gff):
-    ft_table = gff.loc[(sec, gene),:].reset_index()
-    ft_table_mRNA = ft_table[ft_table["ft_type"] == "mRNA"].iloc[0,:]
-    ft_table_CDSs = [x for i, x in ft_table[ft_table["ft_type"] == "CDS"].iterrows()]
-    tmp_ft_table_3UTR = ft_table[ft_table["ft_type"] == "three_prime_UTR"]
-    ft_table_3UTR = pd.Series(dtype="float64") if tmp_ft_table_3UTR.empty else tmp_ft_table_3UTR.iloc[0,:]
-    tmp_ft_table_5UTR = ft_table[ft_table["ft_type"] == "five_prime_UTR"]
-    ft_table_5UTR = pd.Series(dtype="float64") if tmp_ft_table_5UTR.empty else tmp_ft_table_5UTR.iloc[0,:]
-    
-    return {
-        "location":FeatureLocation(int(ft_table_mRNA[1]),int(ft_table_mRNA[2]),(1,-1)[ft_table_mRNA[3] == "-"]), 
-        "qualifiers":{"gene":gene,"note":list()},
-        "type":"gene"
-    },{
-        "location":mergeLocations([FeatureLocation(int(CDS[1]),int(CDS[2]),(1,-1)[CDS[3] == "-"]) for CDS in ft_table_CDSs]), 
-        "qualifiers":{"gene":gene}, 
-        "type":"mRNA"
-    },{
-        "location":mergeLocations([FeatureLocation(int(CDS[1]),int(CDS[2]),(1,-1)[CDS[3] == "-"]) for CDS in ft_table_CDSs]), 
-        "qualifiers":{"gene":gene,"product":list(),"note":list(),"db_xref":list(),"translation":list(),"transl_table":11}, 
-        "type":"CDS"
-    },{
-        "location": None if ft_table_3UTR.empty else FeatureLocation(int(ft_table_3UTR[1]), int(ft_table_3UTR[2]), (1,-1)[ft_table_3UTR[3] == "-"]),
-        "qualifiers":{"gene":gene}, 
-        "type":"3'UTR"
-    },{
-        "location":None if ft_table_5UTR.empty else FeatureLocation(int(ft_table_5UTR[1]), int(ft_table_5UTR[2]), (1,-1)[ft_table_5UTR[3] == "-"]),
-        "qualifiers":{"gene":gene}, 
-        "type":"5'UTR"}
-
-def merge(record, anno, gff, conf, out_dir):
-    sec = record.id
-    
-    #Record initialisation
-    _record_ = SeqRecord(
-        record.seq,
-        id=sec,
-        dbxrefs=["Project:" + conf["project"]],
-        annotations={"division":conf["division"],"molecule_type":conf["molecule_type"],"organism":conf["organism"],"taxonomy":conf["taxonomy"],"topology":conf["topology"]},
-        description=conf["description"])
-    
-    #Source feature
-    ft_table = gff.loc[(sec, slice(None), "contig"), :].reset_index()
-    _source_ = SeqFeature(FeatureLocation(int(ft_table.iloc[0,3]),int(ft_table.iloc[0,4]),(1,-1)[ft_table.iloc[0,5] == "-"]),type="source",qualifiers={"organism":"test","mol_type":"genomic DNA","db_xref":list()})
-    _record_.features.append(_source_)
-    
-    #GENE/MRNA/CDS/3UTR/5UTR features
-    for gene in gff.loc[(sec, slice(None), "gene"),:].reset_index()["sub_seq_id"].apply(lambda x: x+"-mRNA-1"):
-        _gene_, _mRNA_, _CDS_, _3UTR_, _5UTR_ = init_features(sec, gene, gff)
-        
-        try:
-            anno_table = anno.loc[(gene),:].reset_index()
-            anno_bp = anno.loc[(gene, "BP_ARGOT"),:].reset_index()["id"]
-            anno_cc = anno.loc[(gene, "CC_ARGOT"),:].reset_index()["id"]
-            anno_mf = anno.loc[(gene, "MF_ARGOT"),:].reset_index()["id"]
-            _CDS_["qualifiers"]["db_xref"] = ["GO:" + str(go) for go in pd.concat([anno_bp, anno_cc, anno_mf])]
-            _CDS_["qualifiers"]["translation"] = anno.loc[(gene, "qseq"),:].reset_index().iloc[0,1]
-            _CDS_["qualifiers"]["transl_table"] = conf["transl_table"]
-            _gene_["qualifiers"]["note"] = _CDS_["qualifiers"]["product"] = anno.loc[(gene, "DE"),:].reset_index().iloc[0,1]
-        except KeyError:
-            pass
-        
-        for feature in [_gene_, _mRNA_, _CDS_, _3UTR_, _5UTR_]:
-            if feature["location"]: 
-                _record_.features.append(SeqFeature(feature["location"], type=feature["type"], qualifiers=feature["qualifiers"]))
-    
-    #Exon feature
-    ft_table = gff.loc[sec, slice(None), "exon"].reset_index()
-    _record_.features.extend([SeqFeature(FeatureLocation(int(exon["start"]), int(exon["stop"]), (1,-1)[exon["strand"]=="-"]), type="exon", qualifiers={}) for i,exon in ft_table.iterrows()])
-     
-    #Print EMBL entry in output folder
-    with open(out_dir + "/" + sec + ".dat", "w") as file:
-        print(_record_.format("embl"), file=file)
-        file.close()   
-    
-if __name__ == "__main__":
-
-    GFF_FILE = FASTA_FILE = ANNO_FILE = CONF_FILE = None
-    OUT_DIR = "out"
-
-    args = sys.argv[1:]
-    for i in [0,2,4,6,8]:
-        if args[i] in ["-gff", "-g"]: GFF_FILE = args[i+1]
-        elif args[i] in ["-fasta", "-f"]: FASTA_FILE = args[i+1]
-        elif args[i] in ["-anno", "-a"]: ANNO_FILE = args[i+1]
-        elif args[i] in ["-conf", "-c"]: CONF_FILE = args[i+1]
-        elif args[i] in ["-out", "-o"]: OUT_DIR = args[i+1]
-
-    if not os.path.exists(OUT_DIR):
-        os.makedirs(OUT_DIR)
-
-    CONF = None
-    with open(CONF_FILE) as conf_file:
-        CONF = read_config(conf_file)
-        conf_file.close()
-    print("Configuration file reading : DONE!")
-
-    ANNO = None
-    with open(ANNO_FILE) as anno_file:
-        ANNO = read_anno(anno_file)
-        anno_file.close()
-    print("Annotation file reading : DONE!")
-
-    GFF = None
-    with open(GFF_FILE) as gff_file:
-        GFF = read_gff(gff_file)
-        gff_file.close()
-    print("Prediction file reading : DONE!")
-
-    with open(FASTA_FILE) as fasta_file:
-        num_cores = multiprocessing.cpu_count()
-        records = list(SeqIO.parse(fasta_file, "fasta"))
-        processed_list = Parallel(n_jobs=num_cores)(delayed(merge)(record, ANNO, GFF, CONF, OUT_DIR) for record in tqdm(records))
-        fasta_file.close()
\ No newline at end of file
+#main.py
+
+from core import app
+
+from Bio import SeqIO
+import pandas as pd
+import re
+import importlib
+
+def fasta2handle(file_path):
+    with open(file_path) as handle:
+        return list(SeqIO.parse(handle, "fasta"))
+
+def gff_maker2handle(file_path):
+    with open(file_path) as handle:
+        gff = pd.read_csv(handle, sep="\t")
+        gff = gff.reset_index()
+        gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
+        gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
+        gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
+        return gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]) 
+    
+def tab_pannzer2handle(file_path):
+    with open(file_path) as handle:
+        anno = pd.read_csv(handle, sep="\t")
+        anno = anno.sort_values(by=["qpid", "type"]).drop(["score", "PPV"], axis=1).set_index(["qpid", "type", "id", "desc"])
+        return anno
+
+if __name__ == "__main__":
+
+    _GLOBALS_ = {
+        "handles":dict(),
+        "plugins":dict(),
+        "metadata":dict()
+    }
+
+    _GLOBALS_["handles"]["fasta"] = fasta2handle("files/sequences.fasta")
+    _GLOBALS_["handles"]["gff_maker"] = gff_maker2handle("files/data.gff")
+    _GLOBALS_["handles"]["tab_panzer"] = tab_pannzer2handle("files/anno.out")
+
+    _GLOBALS_["plugins"]["read_fasta"] = importlib.import_module(".read_fasta","Plugins").Plugin()
+
+    _GLOBALS_["plugins"]["read_gff_maker_3UTR"] = importlib.import_module(".read_gff_maker_3UTR","Plugins").Plugin()
+    _GLOBALS_["plugins"]["read_gff_maker_5UTR"] = importlib.import_module(".read_gff_maker_5UTR","Plugins").Plugin()
+    _GLOBALS_["plugins"]["read_gff_maker_CDS"] = importlib.import_module(".read_gff_maker_CDS","Plugins").Plugin()
+    _GLOBALS_["plugins"]["read_gff_maker_exon"] = importlib.import_module(".read_gff_maker_exon","Plugins").Plugin()
+    _GLOBALS_["plugins"]["read_gff_maker_gene"] = importlib.import_module(".read_gff_maker_gene","Plugins").Plugin()
+    _GLOBALS_["plugins"]["read_gff_maker_main"] = importlib.import_module(".read_gff_maker_main","Plugins").Plugin()
+    _GLOBALS_["plugins"]["read_gff_maker_mRNA"] = importlib.import_module(".read_gff_maker_mRNA","Plugins").Plugin()
+    _GLOBALS_["plugins"]["read_gff_maker_source"] = importlib.import_module(".read_gff_maker_source","Plugins").Plugin()
+
+    _GLOBALS_["plugins"]["read_tab_pannzer_CDS"] = importlib.import_module(".read_tab_pannzer_CDS","Plugins").Plugin()
+    _GLOBALS_["plugins"]["read_tab_pannzer_gene"] = importlib.import_module(".read_tab_pannzer_gene","Plugins").Plugin()
+
+
+    _GLOBALS_["metadata"]["project"] = "temp"
+    _GLOBALS_["metadata"]["division"] = "INV"
+    _GLOBALS_["metadata"]["taxonomy"] = "29031"
+    _GLOBALS_["metadata"]["organism"] = "Phlebotomus papatasi"
+    _GLOBALS_["metadata"]["molecule_type"] = "genomic DNA"
+    _GLOBALS_["metadata"]["topology"] = "linear"
+    _GLOBALS_["metadata"]["description"] = "description"
+    _GLOBALS_["metadata"]["transl_table"] = 0
+
+    app = app(
+        [
+            (_GLOBALS_["plugins"]["read_fasta"], _GLOBALS_["handles"]["fasta"], _GLOBALS_["metadata"], [
+                (_GLOBALS_["plugins"]["read_gff_maker_main"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
+                    (_GLOBALS_["plugins"]["read_gff_maker_source"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"],[])
+                    (_GLOBALS_["plugins"]["read_gff_maker_gene"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
+                        (_GLOBALS_["plugins"]["read_tab_pannzer_gene"], _GLOBALS_["handles"]["tab_panzer"], _GLOBALS_["metadata"], [])
+                    ]),
+                    (_GLOBALS_["plugins"]["read_gff_maker_mRNA"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], []),
+                    (_GLOBALS_["plugins"]["read_gff_maker_CDS"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
+                        (_GLOBALS_["plugins"]["read_tab_pannzer_CDS"], _GLOBALS_["handles"]["tab_panzer"], _GLOBALS_["metadata"], [])
+                    ]),
+                    (_GLOBALS_["plugins"]["read_gff_maker_3UTR"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], []),
+                    (_GLOBALS_["plugins"]["read_gff_maker_5UTR"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [])
+                ]),
+                (_GLOBALS_["plugins"]["read_gff_maker_exon"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [])
+            ])
+        ])
+    app.run()
\ No newline at end of file

From a3ad89bf3fd545db62f60b7bcf25f1fd15c76094 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 08:55:22 +0200
Subject: [PATCH 02/22] Delete read_gff_maker_source.py

removed this plugin as it is contained in the read_gff_maker_main plugin.
---
 Plugins/read_gff_maker_source.py | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 Plugins/read_gff_maker_source.py

diff --git a/Plugins/read_gff_maker_source.py b/Plugins/read_gff_maker_source.py
deleted file mode 100644
index f8c0bce..0000000
--- a/Plugins/read_gff_maker_source.py
+++ /dev/null
@@ -1 +0,0 @@
-#read_gff_maker_source.py
\ No newline at end of file

From 128eb82a0a81c3cd0a7b205f4362624248b23a1d Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 08:57:22 +0200
Subject: [PATCH 03/22] upload refering to read_gff_maker_source removal

---
 main.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/main.py b/main.py
index 479331e..59cd318 100644
--- a/main.py
+++ b/main.py
@@ -47,7 +47,6 @@ def tab_pannzer2handle(file_path):
     _GLOBALS_["plugins"]["read_gff_maker_gene"] = importlib.import_module(".read_gff_maker_gene","Plugins").Plugin()
     _GLOBALS_["plugins"]["read_gff_maker_main"] = importlib.import_module(".read_gff_maker_main","Plugins").Plugin()
     _GLOBALS_["plugins"]["read_gff_maker_mRNA"] = importlib.import_module(".read_gff_maker_mRNA","Plugins").Plugin()
-    _GLOBALS_["plugins"]["read_gff_maker_source"] = importlib.import_module(".read_gff_maker_source","Plugins").Plugin()
 
     _GLOBALS_["plugins"]["read_tab_pannzer_CDS"] = importlib.import_module(".read_tab_pannzer_CDS","Plugins").Plugin()
     _GLOBALS_["plugins"]["read_tab_pannzer_gene"] = importlib.import_module(".read_tab_pannzer_gene","Plugins").Plugin()
@@ -66,7 +65,6 @@ def tab_pannzer2handle(file_path):
         [
             (_GLOBALS_["plugins"]["read_fasta"], _GLOBALS_["handles"]["fasta"], _GLOBALS_["metadata"], [
                 (_GLOBALS_["plugins"]["read_gff_maker_main"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
-                    (_GLOBALS_["plugins"]["read_gff_maker_source"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"],[])
                     (_GLOBALS_["plugins"]["read_gff_maker_gene"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
                         (_GLOBALS_["plugins"]["read_tab_pannzer_gene"], _GLOBALS_["handles"]["tab_panzer"], _GLOBALS_["metadata"], [])
                     ]),

From 6ce0ed2a658cc569fdb1c7b8d172426efa1ac610 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 08:59:18 +0200
Subject: [PATCH 04/22] upload refering to read_gff_maker_source removal

---
 Plugins/__init__.py            |  1 -
 Plugins/read_gff_maker_main.py | 12 ++++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/Plugins/__init__.py b/Plugins/__init__.py
index 6fc65d5..8937431 100644
--- a/Plugins/__init__.py
+++ b/Plugins/__init__.py
@@ -8,7 +8,6 @@
 from Plugins.read_gff_maker_CDS import Plugin
 from Plugins.read_gff_maker_exon import Plugin
 from Plugins.read_gff_maker_mRNA import Plugin
-from Plugins.read_gff_maker_source import Plugin
 
 from Plugins.read_tab_pannzer_CDS import Plugin
 from Plugins.read_tab_pannzer_gene import Plugin
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_main.py b/Plugins/read_gff_maker_main.py
index 2043ade..eac0f9c 100644
--- a/Plugins/read_gff_maker_main.py
+++ b/Plugins/read_gff_maker_main.py
@@ -9,6 +9,18 @@ class Plugin:
 
     def process(self, handle, metadata, calls:list=[], target=None):
         
+        location = (handle.loc[(target, slice(None), "contig"),:].reset_index())
+        _feature_ = [
+            SeqFeature(
+                FeatureLocation(int(location.iloc[0,3]), int(location.iloc[0,4]), (1,-1)[location.iloc[0,5] == "-"]),
+                type="source",
+                qualifiers={
+                    "oganism":metadata["organism"],
+                    "mol_type":metadata["molecule_type"],
+                    "db_xref":list()})]
+
+        yield _feature_
+
         for gene in handle.loc[(target, slice(None), "gene"),:].reset_index()["sub_seq_id"]:
             
             #initialize features

From c0f313761cd6ca2e69ec9a5ed41052c1d44e3a73 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 09:00:02 +0200
Subject: [PATCH 05/22] read_gff_maker_exon set up

---
 Plugins/read_gff_maker_exon.py | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/Plugins/read_gff_maker_exon.py b/Plugins/read_gff_maker_exon.py
index 711c5ca..e4b84ef 100644
--- a/Plugins/read_gff_maker_exon.py
+++ b/Plugins/read_gff_maker_exon.py
@@ -1 +1,28 @@
-#read_gff_maker_exon.py
\ No newline at end of file
+#read_gff_maker_exon.py
+
+import pandas as pd
+import re
+
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+
+class Plugin:
+
+    def process(self, handle, metadata, calls:list=[], target=None):
+        exons = handle.loc[(target, slice(None), "exon"),:].reset_index()
+
+        for index, exon in exons.iterrows():
+            _features_subset_ = [
+                SeqFeature(
+                    FeatureLocation(int(exon["start"]), int(exon["stop"]), (1,-1)[exon["strand"] == "-"]),
+                    type="exon",
+                    qualifiers={}
+                )]
+
+            #calls
+            receiver = []
+            for call, *args in calls:
+                receiver.extend(call.process(*args, target=(target)))
+
+             
+            yield _features_subset_

From 0ec1b6c891cbb0eefc3cbfe375b1d515b3386af2 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 14:04:56 +0200
Subject: [PATCH 06/22] Create config.info (example file)

---
 files/config.info | 49 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 files/config.info

diff --git a/files/config.info b/files/config.info
new file mode 100644
index 0000000..1aa75e3
--- /dev/null
+++ b/files/config.info
@@ -0,0 +1,49 @@
+# Axel Giottonini
+# test config.info
+# 14.05.2021
+
+<metadata>
+project:temp
+division:inv
+taxonomy:29031
+organism:Phlebotomus papatasi
+molecule_type:genomic DNA
+topology:linear
+description:empty
+transl_table:0
+</metadata>
+
+<plugins>
+fasta2handle:.fasta2handle,Plugins
+gff_maker2handle:.gff_maker2handle,Plugins
+tab_pannzer2handle:.tab_pannzer2handle,Plugins
+read_fasta:.read_fasta,Plugins
+read_gff_maker_3UTR:.read_gff_maker_3UTR,Plugins
+read_gff_maker_5UTR:.read_gff_maker_5UTR,Plugins
+read_gff_maker_CDS:.read_gff_maker_CDS,Plugins
+read_gff_maker_exon:.read_gff_maker_exon,Plugins
+read_gff_maker_gene:.read_gff_maker_gene,Plugins
+read_gff_maker_main:.read_gff_maker_main,Plugins
+read_gff_maker_mRNA:.read_gff_maker_mRNA,Plugins
+read_tab_pannzer_CDS:.read_tab_pannzer_CDS,Plugins
+read_tab_pannzer_gene:.read_tab_pannzer_gene,Plugins
+</plugins>
+
+<handles>
+fasta:fasta2handle,files/sequences.fasta
+gff_maker:gff_maker2handle,files/data.gff
+tab_pannzer:tab_pannzer2handle,files/anno.out
+</handles>
+
+<workflow>
+-read_fasta,fasta
+--read_gff_maker_main,gff_maker
+---read_gff_maker_gene,gff_maker
+----read_tab_pannzer_gene,tab_pannzer
+---read_gff_maker_mRNA,gff_maker
+---read_gff_maker_CDS,gff_maker
+----read_tab_pannzer_CDS,tab_pannzer
+---read_gff_maker_3UTR,gff_maker
+---read_gff_maker_5UTR,gff_maker
+--read_gff_maker_exon,gff_maker
+</workflow>

From e17f02fe47705552fbdae9b16bb6b03291509e6b Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 14:06:07 +0200
Subject: [PATCH 07/22] Create plugins for handling files

---
 Plugins/__init__.py           |  4 ++++
 Plugins/fasta2handle.py       |  9 +++++++++
 Plugins/gff_maker2handle.py   | 16 ++++++++++++++++
 Plugins/tab_pannzer2handle.py | 11 +++++++++++
 4 files changed, 40 insertions(+)
 create mode 100644 Plugins/fasta2handle.py
 create mode 100644 Plugins/gff_maker2handle.py
 create mode 100644 Plugins/tab_pannzer2handle.py

diff --git a/Plugins/__init__.py b/Plugins/__init__.py
index 8937431..583ac6d 100644
--- a/Plugins/__init__.py
+++ b/Plugins/__init__.py
@@ -1,5 +1,9 @@
 #__init.py__
 
+from Plugins.fasta2handle import Plugin
+from Plugins.gff_maker2handle import Plugin
+from Plugins.tab_pannzer2handle import Plugin
+
 from Plugins.read_fasta import Plugin
 
 from Plugins.read_gff_maker_gene import Plugin
diff --git a/Plugins/fasta2handle.py b/Plugins/fasta2handle.py
new file mode 100644
index 0000000..b9e319b
--- /dev/null
+++ b/Plugins/fasta2handle.py
@@ -0,0 +1,9 @@
+#fasta2handle.py
+
+from Bio import SeqIO
+
+class Plugin:
+
+    def process(self, file_path):
+        with open(file_path) as handle:
+            return list(SeqIO.parse(handle, "fasta"))
\ No newline at end of file
diff --git a/Plugins/gff_maker2handle.py b/Plugins/gff_maker2handle.py
new file mode 100644
index 0000000..39124c2
--- /dev/null
+++ b/Plugins/gff_maker2handle.py
@@ -0,0 +1,16 @@
+#gff_maker2handle.py
+
+import pandas as pd
+import re
+
+class Plugin:
+
+    def process(self, file_path):
+        with open(file_path) as handle:
+            gff = pd.read_csv(handle, sep="\t")
+            gff = gff.reset_index()
+            gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
+            gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
+            gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
+            return gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]) 
+        
\ No newline at end of file
diff --git a/Plugins/tab_pannzer2handle.py b/Plugins/tab_pannzer2handle.py
new file mode 100644
index 0000000..ac7c532
--- /dev/null
+++ b/Plugins/tab_pannzer2handle.py
@@ -0,0 +1,11 @@
+#tab_pannzer2handle.py
+
+import pandas as pd
+
+class Plugin:
+
+    def process(self, file_path):
+        with open(file_path) as handle:
+            anno = pd.read_csv(handle, sep="\t")
+            anno = anno.sort_values(by=["qpid", "type"]).drop(["score", "PPV"], axis=1).set_index(["qpid", "type", "id", "desc"])
+            return anno
\ No newline at end of file

From d5a80b94f85394f0f4d6ce0d157a66d1f7338fe5 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Fri, 14 May 2021 14:07:02 +0200
Subject: [PATCH 08/22] Upload main to work with the new plugin (2handle) and
 config file

---
 main.py | 111 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 54 insertions(+), 57 deletions(-)

diff --git a/main.py b/main.py
index 59cd318..4d97e6f 100644
--- a/main.py
+++ b/main.py
@@ -7,25 +7,6 @@
 import re
 import importlib
 
-def fasta2handle(file_path):
-    with open(file_path) as handle:
-        return list(SeqIO.parse(handle, "fasta"))
-
-def gff_maker2handle(file_path):
-    with open(file_path) as handle:
-        gff = pd.read_csv(handle, sep="\t")
-        gff = gff.reset_index()
-        gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
-        gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
-        gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
-        return gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]) 
-    
-def tab_pannzer2handle(file_path):
-    with open(file_path) as handle:
-        anno = pd.read_csv(handle, sep="\t")
-        anno = anno.sort_values(by=["qpid", "type"]).drop(["score", "PPV"], axis=1).set_index(["qpid", "type", "id", "desc"])
-        return anno
-
 if __name__ == "__main__":
 
     _GLOBALS_ = {
@@ -34,48 +15,64 @@ def tab_pannzer2handle(file_path):
         "metadata":dict()
     }
 
-    _GLOBALS_["handles"]["fasta"] = fasta2handle("files/sequences.fasta")
-    _GLOBALS_["handles"]["gff_maker"] = gff_maker2handle("files/data.gff")
-    _GLOBALS_["handles"]["tab_panzer"] = tab_pannzer2handle("files/anno.out")
+    _PROCESSING_ = {
+        "metadata":[],
+        "plugins":[],
+        "handles":[],
+        "workflow":[]
+    }
+    current_field = None
+    with open("files/config.info") as handle:
+        for line in handle:
+            current_line = line.rstrip("\n")
+
+            if line[0] == "#": continue
+            if current_line == "": continue
 
-    _GLOBALS_["plugins"]["read_fasta"] = importlib.import_module(".read_fasta","Plugins").Plugin()
+            if current_field:
+                assert current_field in ["metadata", "plugins", "handles", "workflow"]
+                if re.match(r"^(<\/)(\w+)(>)$", current_line):
+                    current_field = None
+                else:
+                    _PROCESSING_[current_field].append(current_line) 
+            else:
+                assert re.match(r"^(<)(\w+)(>)$", current_line)
+                assert current_line[1:-1] in ["metadata", "plugins", "handles", "workflow"]
+                current_field = current_line[1:-1]
+        handle.close()
 
-    _GLOBALS_["plugins"]["read_gff_maker_3UTR"] = importlib.import_module(".read_gff_maker_3UTR","Plugins").Plugin()
-    _GLOBALS_["plugins"]["read_gff_maker_5UTR"] = importlib.import_module(".read_gff_maker_5UTR","Plugins").Plugin()
-    _GLOBALS_["plugins"]["read_gff_maker_CDS"] = importlib.import_module(".read_gff_maker_CDS","Plugins").Plugin()
-    _GLOBALS_["plugins"]["read_gff_maker_exon"] = importlib.import_module(".read_gff_maker_exon","Plugins").Plugin()
-    _GLOBALS_["plugins"]["read_gff_maker_gene"] = importlib.import_module(".read_gff_maker_gene","Plugins").Plugin()
-    _GLOBALS_["plugins"]["read_gff_maker_main"] = importlib.import_module(".read_gff_maker_main","Plugins").Plugin()
-    _GLOBALS_["plugins"]["read_gff_maker_mRNA"] = importlib.import_module(".read_gff_maker_mRNA","Plugins").Plugin()
+    for element in _PROCESSING_["metadata"]:
+            _GLOBALS_["metadata"][element.split(":")[0]] = element.split(":")[1]
+    for element in _PROCESSING_["plugins"]:
+            _GLOBALS_["plugins"][element.split(":")[0]] = importlib.import_module(element.split(":")[1].split(",")[0],element.split(":")[1].split(",")[1]).Plugin()
+    for element in _PROCESSING_["handles"]:
+            _GLOBALS_["handles"][element.split(":")[0]] = _GLOBALS_["plugins"][element.split(":")[1].split(",")[0]].process(element.split(":")[1].split(",")[1])
 
-    _GLOBALS_["plugins"]["read_tab_pannzer_CDS"] = importlib.import_module(".read_tab_pannzer_CDS","Plugins").Plugin()
-    _GLOBALS_["plugins"]["read_tab_pannzer_gene"] = importlib.import_module(".read_tab_pannzer_gene","Plugins").Plugin()
+    max_level = 0
+    for i in range(len(_PROCESSING_["workflow"])):
+        element = _PROCESSING_["workflow"][i]
+        regex = re.compile(r"^(-)+")
+        level = len(regex.search(element).group())
+        max_level = max(max_level, level)
+        _PROCESSING_["workflow"][i] = (level, element[level:], [])
 
+    _PROCESSING_["workflow"].insert(0, (0,None,[]))
 
-    _GLOBALS_["metadata"]["project"] = "temp"
-    _GLOBALS_["metadata"]["division"] = "INV"
-    _GLOBALS_["metadata"]["taxonomy"] = "29031"
-    _GLOBALS_["metadata"]["organism"] = "Phlebotomus papatasi"
-    _GLOBALS_["metadata"]["molecule_type"] = "genomic DNA"
-    _GLOBALS_["metadata"]["topology"] = "linear"
-    _GLOBALS_["metadata"]["description"] = "description"
-    _GLOBALS_["metadata"]["transl_table"] = 0
+    for i in range(max_level, -1, -1):
+        for j in range(len(_PROCESSING_["workflow"])):
+            element = _PROCESSING_["workflow"][j]
+            if element[0] == i:
+                for k in range(1, j+1):
+                    if _PROCESSING_["workflow"][j-k] is not None and _PROCESSING_["workflow"][j-k][0] == i-1:
+                        _PROCESSING_["workflow"][j-k][2].append((
+                            _GLOBALS_["plugins"][_PROCESSING_["workflow"][j][1].split(",")[0]],
+                            _GLOBALS_["handles"][_PROCESSING_["workflow"][j][1].split(",")[1]],
+                            _GLOBALS_["metadata"],
+                            _PROCESSING_["workflow"][j][2]
+                        ))
+                        _PROCESSING_["workflow"][j] = None
+                        break
+        _PROCESSING_["workflow"] = [x for x in _PROCESSING_["workflow"] if x is not None]
 
-    app = app(
-        [
-            (_GLOBALS_["plugins"]["read_fasta"], _GLOBALS_["handles"]["fasta"], _GLOBALS_["metadata"], [
-                (_GLOBALS_["plugins"]["read_gff_maker_main"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
-                    (_GLOBALS_["plugins"]["read_gff_maker_gene"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
-                        (_GLOBALS_["plugins"]["read_tab_pannzer_gene"], _GLOBALS_["handles"]["tab_panzer"], _GLOBALS_["metadata"], [])
-                    ]),
-                    (_GLOBALS_["plugins"]["read_gff_maker_mRNA"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], []),
-                    (_GLOBALS_["plugins"]["read_gff_maker_CDS"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [
-                        (_GLOBALS_["plugins"]["read_tab_pannzer_CDS"], _GLOBALS_["handles"]["tab_panzer"], _GLOBALS_["metadata"], [])
-                    ]),
-                    (_GLOBALS_["plugins"]["read_gff_maker_3UTR"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], []),
-                    (_GLOBALS_["plugins"]["read_gff_maker_5UTR"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [])
-                ]),
-                (_GLOBALS_["plugins"]["read_gff_maker_exon"], _GLOBALS_["handles"]["gff_maker"], _GLOBALS_["metadata"], [])
-            ])
-        ])
+    app = app(_PROCESSING_["workflow"][0][2])
     app.run()
\ No newline at end of file

From 6b9f438d2520b6298deeb00424884403d84f21c5 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Sat, 15 May 2021 16:52:27 +0200
Subject: [PATCH 09/22] Refactoring

---
 core.py | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 main.py |  72 +--------------------------
 2 files changed, 145 insertions(+), 76 deletions(-)

diff --git a/core.py b/core.py
index 03dfa18..3bbc502 100644
--- a/core.py
+++ b/core.py
@@ -1,13 +1,152 @@
 #core.py
 
 import importlib
+import re
 
 class app:
-    def __init__(self, plugins:list=[]):
-        assert plugins != [], "No plugins specified"
+    def __init__(self, config_path):
+        self.metadata = None
+        self.plugins = None
+        self.handles = None
+        self.workflow = None
 
-        self.plugins = plugins
+        config = self.read_config(config_path)
 
+        self.set_metadata(config["metadata"])
+        self.set_plugins(config["plugins"])
+        self.set_handles(config["handles"])
+        self.set_workflow(config["workflow"])
+
+    """
+    This function reads the config file which is divided into four fields :
+        - metadata: contains the general and shared informations for EMBLish
+            as a couple key:value
+        - plugins: contains the list of the plugins that will be used as a
+            triplet plugin_key:plugin_name,plugin_package
+        - handles: contains the list of files that will be used as inputs as
+            a triplet handle_key:plugin,file_path
+        - workflow: contains a hierarchical list of the different step to run
+            as a couple plugin_key,handle_key
+    """
+    def read_config(self, config_path):
+        config = {
+            "metadata":[],
+            "plugins":[],
+            "handles":[],
+            "workflow":[]
+        }
+        
+        current_field = None
+        current_line = None
+
+        with open(config_path) as handle:
+            for line in handle:
+                current_line = line.rstrip("\n")
+
+                #Comments and blank lines ignore
+                if current_line == "": continue
+                if current_line[0] == "#": continue
+
+                #Filling the config object with the content of the configuration file
+                if current_field:
+                    assert current_field in ["metadata", "plugins", "handles", "workflow"]
+
+                    #Checking if we are at the end of a field
+                    if re.match(r"^(<\/)(\w+)(>)$", current_line):
+                        current_field = None
+                    else:
+                        config[current_field].append(current_line)
+
+                else:
+                    assert re.match(r"^(<)(\w+)(>)$", current_line)
+                    assert current_line[1:-1] in ["metadata", "plugins", "handles", "workflow"]
+
+                    current_field = current_line[1:-1]
+
+            handle.close()
+        return config
+
+    """
+    This function converts the array containing the metadata into a dictionnary
+    """
+    def set_metadata(self, array):
+        self.metadata = {element.split(":")[0]:element.split(":")[1] for element in array}
+
+    """
+    This function converts the array containing the plugins parameters into a dictionnary 
+    with plugins to call with their key
+    """
+    def set_plugins(self, array):
+        self.plugins = {element.split(":")[0]:importlib.import_module(element.split(":")[1].split(",")[0],element.split(":")[1].split(",")[1]).Plugin() for element in array}
+
+    """
+    This function converts the array containing the handles parameters into a dictionnary
+    with handles to call with their key
+    """
+    def set_handles(self, array):
+        self.handles = {element.split(":")[0]:self.plugins[element.split(":")[1].split(",")[0]].process(element.split(":")[1].split(",")[1]) for element in array}
+    
+    """
+    """
+    def set_workflow(self, array):
+        temp = self.refactor_workflow(array)
+        temp = self.merge_workflow(temp)
+        self.workflow = self.convert_workflow_task(temp[0])
+
+    """
+    Convert the list element in triplet level,<plugin_key,handle_key>,[]
+    """
+    def refactor_workflow(self, array):
+
+        for i in range(len(array)):
+            element = array[i]
+            level = len(re.search(r"^(-)+", element).group())
+
+            array[i] = (level, element[level:], [])
+
+        return array
+
+    """
+    Order the elements and create the hierarchical nodes
+    """
+    def merge_workflow(self, array):
+        array.insert(0, (0,None,[]))
+
+        max_level = max(array, key = lambda element: element[0])[0]
+
+        for i in range(max_level, -1, -1):
+            for j in range(len(array)):
+                element = array[j]
+    
+                if element[0] == i:
+                    for k in range(1, j+1):
+                        if (
+                            array[j-k] is not None and
+                            array[j-k][0] == i-1
+                        ):
+                            array[j-k][2].append(element)
+                            array[j] = None
+                            break
+
+            array = [element for element in array if element != None]
+            
+        return array
+
+    """
+    """
+    def convert_workflow_task(self, task):
+        if task[1]:
+            return (
+                self,
+                task[1].split(",")[0],
+                task[1].split(",")[1],
+                [self.convert_workflow_task(sub_task) for sub_task in task[2]]
+            )
+
+        return [self.convert_workflow_task(sub_task) for sub_task in task[2]]
+        
+    """
+    """
     def run(self):
-        for plugin,*args in self.plugins:
-            plugin.process(*args)
\ No newline at end of file
+        for app, key_plugin, *args in self.workflow:
+            app.plugins[key_plugin].process(app, *args)
\ No newline at end of file
diff --git a/main.py b/main.py
index 4d97e6f..1331b9a 100644
--- a/main.py
+++ b/main.py
@@ -2,77 +2,7 @@
 
 from core import app
 
-from Bio import SeqIO
-import pandas as pd
-import re
-import importlib
-
 if __name__ == "__main__":
 
-    _GLOBALS_ = {
-        "handles":dict(),
-        "plugins":dict(),
-        "metadata":dict()
-    }
-
-    _PROCESSING_ = {
-        "metadata":[],
-        "plugins":[],
-        "handles":[],
-        "workflow":[]
-    }
-    current_field = None
-    with open("files/config.info") as handle:
-        for line in handle:
-            current_line = line.rstrip("\n")
-
-            if line[0] == "#": continue
-            if current_line == "": continue
-
-            if current_field:
-                assert current_field in ["metadata", "plugins", "handles", "workflow"]
-                if re.match(r"^(<\/)(\w+)(>)$", current_line):
-                    current_field = None
-                else:
-                    _PROCESSING_[current_field].append(current_line) 
-            else:
-                assert re.match(r"^(<)(\w+)(>)$", current_line)
-                assert current_line[1:-1] in ["metadata", "plugins", "handles", "workflow"]
-                current_field = current_line[1:-1]
-        handle.close()
-
-    for element in _PROCESSING_["metadata"]:
-            _GLOBALS_["metadata"][element.split(":")[0]] = element.split(":")[1]
-    for element in _PROCESSING_["plugins"]:
-            _GLOBALS_["plugins"][element.split(":")[0]] = importlib.import_module(element.split(":")[1].split(",")[0],element.split(":")[1].split(",")[1]).Plugin()
-    for element in _PROCESSING_["handles"]:
-            _GLOBALS_["handles"][element.split(":")[0]] = _GLOBALS_["plugins"][element.split(":")[1].split(",")[0]].process(element.split(":")[1].split(",")[1])
-
-    max_level = 0
-    for i in range(len(_PROCESSING_["workflow"])):
-        element = _PROCESSING_["workflow"][i]
-        regex = re.compile(r"^(-)+")
-        level = len(regex.search(element).group())
-        max_level = max(max_level, level)
-        _PROCESSING_["workflow"][i] = (level, element[level:], [])
-
-    _PROCESSING_["workflow"].insert(0, (0,None,[]))
-
-    for i in range(max_level, -1, -1):
-        for j in range(len(_PROCESSING_["workflow"])):
-            element = _PROCESSING_["workflow"][j]
-            if element[0] == i:
-                for k in range(1, j+1):
-                    if _PROCESSING_["workflow"][j-k] is not None and _PROCESSING_["workflow"][j-k][0] == i-1:
-                        _PROCESSING_["workflow"][j-k][2].append((
-                            _GLOBALS_["plugins"][_PROCESSING_["workflow"][j][1].split(",")[0]],
-                            _GLOBALS_["handles"][_PROCESSING_["workflow"][j][1].split(",")[1]],
-                            _GLOBALS_["metadata"],
-                            _PROCESSING_["workflow"][j][2]
-                        ))
-                        _PROCESSING_["workflow"][j] = None
-                        break
-        _PROCESSING_["workflow"] = [x for x in _PROCESSING_["workflow"] if x is not None]
-
-    app = app(_PROCESSING_["workflow"][0][2])
+    app = app("files/config.info")
     app.run()
\ No newline at end of file

From 8d141313f5f5de26e1b2a51b31f7abf0c3598ca7 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Sat, 15 May 2021 16:53:15 +0200
Subject: [PATCH 10/22] Refactoring

---
 Plugins/read_fasta.py            | 68 ++++++++++++++---------
 Plugins/read_gff_maker_3UTR.py   | 60 ++++++++++++++-------
 Plugins/read_gff_maker_5UTR.py   | 60 ++++++++++++++-------
 Plugins/read_gff_maker_CDS.py    | 92 ++++++++++++++++++--------------
 Plugins/read_gff_maker_exon.py   | 64 +++++++++++++++-------
 Plugins/read_gff_maker_gene.py   | 62 +++++++++++++--------
 Plugins/read_gff_maker_mRNA.py   | 75 +++++++++++++++++++-------
 Plugins/read_gff_maker_main.py   | 81 +++++++++++++++++++---------
 Plugins/read_tab_pannzer_CDS.py  | 92 +++++++++++++++++++++-----------
 Plugins/read_tab_pannzer_gene.py | 47 ++++++++++------
 10 files changed, 473 insertions(+), 228 deletions(-)

diff --git a/Plugins/read_fasta.py b/Plugins/read_fasta.py
index a6dade6..ba426db 100644
--- a/Plugins/read_fasta.py
+++ b/Plugins/read_fasta.py
@@ -1,6 +1,5 @@
 #plugin.py
 
-import importlib
 import itertools
 
 from Bio import SeqIO
@@ -8,27 +7,46 @@
 
 class Plugin:
 
-    def process(self, handle, metadata, calls:list=[], target=None):
-
-            for record in handle:
-
-                #initialize record
-                _record_ = SeqRecord(
-                    record.seq,
-                    record.id,
-                    dbxrefs=["Project:" + metadata["project"]],
-                    annotations={"division":metadata["division"],"molecule_type":metadata["molecule_type"],"organism":metadata["organism"],"taxonomy":metadata["taxonomy"],"topology":metadata["topology"]},
-                    description=""
-                )
-
-                #calls
-                receiver = []
-                for call,*args in calls:
-                    receiver.extend(call.process(*args, target=_record_.id))
-        
-                #post output treatment
-                _record_.features = list(itertools.chain(*receiver))
-
-                #outputing
-                with open(f"out/{_record_.id}.dat", "w") as o:
-                    print(_record_.format("embl"), file=o)
\ No newline at end of file
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        return SeqRecord(
+            pre_feature.seq,
+            pre_feature.id,
+            dbxrefs=["Project:" + metadata["project"]],
+            annotations={
+                "division":metadata["division"],
+                "molecule_type":metadata["molecule_type"],
+                "organism":metadata["organism"],
+                "taxonomy":metadata["taxonomy"],
+                "topology":metadata["topology"]},
+            description="")
+    
+    """
+    """
+    def callbacks(self, app, calls, target):
+        sender = []
+
+        for app, key_plugin, *args in calls:
+            temp = app.plugins[key_plugin].process(app, *args, target)
+            if temp:
+                sender += temp
+    
+        return sender
+
+    """
+    """
+    def merge(self, feature, receiver):
+        feature.features = receiver    
+
+    """
+    """
+    def process(self, app, key_handle, calls:list=[], target=None):
+
+        for element in app.handles[key_handle]:
+            feature = self.feature_initialize(element, app.metadata)
+            receiver = self.callbacks(app, calls, (feature.id))
+            self.merge(feature, receiver)
+            
+            with open(f"out/{feature.id}.dat", "w") as o:
+                print(feature.format("embl"), file=o)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_3UTR.py b/Plugins/read_gff_maker_3UTR.py
index dabca35..ef46754 100644
--- a/Plugins/read_gff_maker_3UTR.py
+++ b/Plugins/read_gff_maker_3UTR.py
@@ -5,23 +5,47 @@
 
 class Plugin:
 
-    def process(self, handle, metadata, calls:list=[], target=None):
-        try:
-            location = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "three_prime_UTR"),:].reset_index())
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        return SeqFeature(
+            FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+            type="3'UTR",
+            qualifiers={
+                "gene":None,
+                "note":list()})
+    
+    """
+    """
+    def callbacks(self, app, calls, target):
+        sender = []
+
+        for app, key_plugin, *args in calls:
+            temp = app.plugins[key_plugin].process(app, *args, target)
+            if temp:
+                sender.append(temp)
+
+        return sender
+    
+    """
+    """
+    def merge(self, feature, receiver):
+        return feature
 
-            _sub_features_ = [
-                SeqFeature(
-                    FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
-                    type="3'UTR",
-                    qualifiers={
-                        "gene":target[1],
-                        "note":list()})]
-                
-            #calls
-            receiver = []
-            for call,*args in calls:
-                receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "3'UTR")))
-            
-            return _sub_features_
+    """
+    """
+    def process(self, app, key_handle, calls:list=[], target=None):
+        try:
+            feature = self.feature_initialize(
+                app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "three_prime_UTR"),:].reset_index().iloc[0,:],
+                app.metadata)
         except KeyError:
-            return []
\ No newline at end of file
+            return None
+
+        feature.qualifiers["gene"] = target[1]
+        receiver = self.callbacks(
+            app,
+            calls,
+            (target[0], f"{target[1]}-mRNA-1", "3'UTR"))
+        
+        return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_5UTR.py b/Plugins/read_gff_maker_5UTR.py
index 5b72281..60fae42 100644
--- a/Plugins/read_gff_maker_5UTR.py
+++ b/Plugins/read_gff_maker_5UTR.py
@@ -5,23 +5,47 @@
 
 class Plugin:
 
-    def process(self, handle, metadata, calls:list=[], target=None):
-        try:
-            location = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "five_prime_UTR"),:].reset_index())
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        return SeqFeature(
+            FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+            type="5'UTR",
+            qualifiers={
+                "gene":None,
+                "note":list()})
+    
+    """
+    """
+    def callbacks(self, app, calls, target):
+        sender = []
 
-            _sub_features_ = [
-                SeqFeature(
-                    FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
-                    type="5'UTR",
-                    qualifiers={
-                        "gene":target[1],
-                        "note":list()})]
-                
-            #calls
-            receiver = []
-            for call,*args in calls:
-                receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "5'UTR")))
-            
-            return _sub_features_
+        for app, key_plugin, *args in calls:
+            temp = app.plugins[key_plugin].process(app, *args, target)
+            if temp:
+                sender.append(temp)
+
+        return sender
+
+    """
+    """
+    def merge(self, feature, receiver):
+        return feature
+
+    """
+    """
+    def process(self, app, key_handle, calls:list=[], target=None):
+        try:
+            feature = self.feature_initialize(
+                app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "five_prime_UTR"),:].reset_index().iloc[0,:],
+                app.metadata)
         except KeyError:
-            return []
\ No newline at end of file
+            return None
+            
+        feature.qualifiers["gene"] = target[1]
+        receiver = self.callbacks(
+            app,
+            calls,
+            (target[0], f"{target[1]}-mRNA-1", "5'UTR"))
+        
+        return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_CDS.py b/Plugins/read_gff_maker_CDS.py
index 7b29f18..48bcb29 100644
--- a/Plugins/read_gff_maker_CDS.py
+++ b/Plugins/read_gff_maker_CDS.py
@@ -4,47 +4,61 @@
 import itertools
 from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
 
-def mergeLocations(_locationArray_):
-    return _locationArray_[0] if len(_locationArray_) == 1 else CompoundLocation(_locationArray_)
-
 class Plugin:
 
-    def process(self, handle, metadata, calls:list=[], target=None):
-        locations = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index())
-
-        _sub_features_ = [
-            SeqFeature(
-                mergeLocations(locations.apply(lambda location: FeatureLocation(int(location[0]), int(location[1]), (1,-1)[location[2] == "-"]), axis=1)),
-                type="CDS",
-                qualifiers={
-                    "gene":target[1],
-                    "product":list(),
-                    "note":list(),
-                    "db_xref":list(),
-                    "translation":list(),
-                    "transl_table":metadata["transl_table"]})]
-
-        #calls
-        receiver = []
-        for call,*args in calls:
-            receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "CDS")))
-        
-        annotations = list(itertools.chain(receiver))
-        for annotation in annotations:
-            if "product" in annotation.keys() and annotation["product"] != []:
-                for sub_feature in _sub_features_:
-                    sub_feature.qualifiers["product"].extend(annotation["product"])
-
-            if "note"  in annotation.keys() and annotation["note"] != []:
-                for sub_feature in _sub_features_:
-                    sub_feature.qualifiers["note"].extend(annotation["note"])
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        refactor_pre_feature = lambda element: FeatureLocation(
+            int(element[0]),
+            int(element[1]),
+            (1,-1)[element[2] == "-"])
+        merge_pre_feature = lambda  array: array[0] if len(array) == 1 else CompoundLocation(array)
 
-            if "db_xref" in annotation.keys() and annotation["db_xref"] != []:
-                for sub_feature in _sub_features_:
-                    sub_feature.qualifiers["db_xref"].extend(annotation["db_xref"])
+        return SeqFeature(
+            merge_pre_feature(
+                pre_feature.apply(refactor_pre_feature, axis=1)),
+            type="CDS",
+            qualifiers={
+                "gene":None,
+                "product":list(),
+                "note":list(),
+                "db_xref":list(),
+                "translation":list(),
+                "transl_table":metadata["transl_table"]})
+    
+    """
+    """
+    def callbacks(self, app, calls, target):
+        sender = []
+        for app, key_plugin, *args in calls:
+            temp = app.plugins[key_plugin].process(app, *args, target)
+            if temp:
+                sender.append(temp)
+        return sender
 
-            if "translation" in annotation.keys() and annotation["translation"] != []:
-                for sub_feature in _sub_features_:
-                    sub_feature.qualifiers["translation"].extend(annotation["translation"])
+    """
+    """
+    def merge(self, feature, receiver):
+        for element in receiver:
+            for key in element.keys():
+                feature.qualifiers[key].extend(element[key])
+        return feature 
 
-        return _sub_features_
\ No newline at end of file
+    """
+    """
+    def process(self, app, key_handle, calls:list=[], target=None):
+        try:
+            feature = self.feature_initialize(
+                app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index(),
+                app.metadata)
+        except KeyError:
+            return None
+            
+        feature.qualifiers["gene"] = target[1]
+        receiver = self.callbacks(
+            app,
+            calls,
+            (target[0], f"{target[1]}-mRNA-1", "CDS"))
+        
+        return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_exon.py b/Plugins/read_gff_maker_exon.py
index e4b84ef..1aeb42b 100644
--- a/Plugins/read_gff_maker_exon.py
+++ b/Plugins/read_gff_maker_exon.py
@@ -8,21 +8,49 @@
 
 class Plugin:
 
-    def process(self, handle, metadata, calls:list=[], target=None):
-        exons = handle.loc[(target, slice(None), "exon"),:].reset_index()
-
-        for index, exon in exons.iterrows():
-            _features_subset_ = [
-                SeqFeature(
-                    FeatureLocation(int(exon["start"]), int(exon["stop"]), (1,-1)[exon["strand"] == "-"]),
-                    type="exon",
-                    qualifiers={}
-                )]
-
-            #calls
-            receiver = []
-            for call, *args in calls:
-                receiver.extend(call.process(*args, target=(target)))
-
-             
-            yield _features_subset_
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        return SeqFeature(
+            FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+            type="exon",
+            qualifiers={})
+    
+    """
+    """
+    def multi_feature_initialize(self, pre_multi_feature, metadata):
+        for _, element in pre_multi_feature:
+            yield self.feature_initialize(element, metadata)
+
+    """
+    """
+    def callbacks(self, app, calls, target):
+        sender = []
+
+        for app, key_plugin, *args in calls:
+            temp = app.plugins[key_plugin].process(app, *args, target)
+            if temp:
+                sender.append()
+
+        return sender
+    
+    """
+    """
+    def merge(self, feature, receiver):
+        return feature
+
+    """
+    """
+    def process(self, app, key_handle, calls:list=[], target=None):
+        try:
+            feature = self.multi_feature_initialize(
+                app.handles[key_handle].loc[(target, slice(None), "exon"),:].reset_index().iterrows(),
+                app.metadata)
+        except KeyError:
+            return None
+            
+        receiver = self.callbacks(
+            app,
+            calls,
+            target)
+        return self.merge(feature, receiver)
diff --git a/Plugins/read_gff_maker_gene.py b/Plugins/read_gff_maker_gene.py
index 069b66b..9d5f7da 100644
--- a/Plugins/read_gff_maker_gene.py
+++ b/Plugins/read_gff_maker_gene.py
@@ -6,26 +6,46 @@
 
 class Plugin:
 
-    def process(self, handle, metadata, calls:list=[], target=None):
-        location = (handle.loc[(target[0], target[1], "gene"),:].reset_index())
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        return SeqFeature(
+            FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+            type="gene",
+            qualifiers={
+                "gene":None,
+                "note":list()})
+    
+    """
+    """
+    def callbacks(self, app, calls, target):
+        sender = []
 
-        _sub_features_ = [
-            SeqFeature(
-                FeatureLocation(int(location.iloc[0,0]), int(location.iloc[0,1]), (1,-1)[location.iloc[0,2] == "-"]),
-                type="gene",
-                qualifiers={
-                    "gene":target[1],
-                    "note":list()})]
-            
-        #calls
-        receiver = []
-        for call,*args in calls:
-            receiver.extend(call.process(*args, target=(target[0], target[1], "gene")))
-        
-        annotations = list(itertools.chain(receiver))
-        for annotation in annotations:
-            if "note" in annotation.keys() and annotation["note"] != []:
-                for sub_feature in _sub_features_:
-                    sub_feature.qualifiers["note"].extend(annotation["note"])
+        for app, key_plugin, *args in calls:
+            temp = app.plugins[key_plugin].process(app, *args, target)
+            if temp:
+                sender.append(temp)
+                
+        return sender
 
-        return _sub_features_
\ No newline at end of file
+    """
+    """
+    def merge(self, feature, receiver):
+        for element in receiver:
+            for key in element.keys():
+                feature.qualifiers[key].extend(element[key])
+        return feature
+
+    """
+    """
+    def process(self, app, key_handle, calls:list=[], target=None):
+        feature = self.feature_initialize(
+            app.handles[key_handle].loc[(target[0], target[1], "gene"),:].reset_index().iloc[0,:],
+            app.metadata)
+        feature.qualifiers["gene"]=target[1]
+        receiver = self.callbacks(
+            app,
+            calls,
+            (target[0], f"{target[1]}-mRNA-1", "gene"))
+
+        return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_mRNA.py b/Plugins/read_gff_maker_mRNA.py
index 7639664..e4e8bc0 100644
--- a/Plugins/read_gff_maker_mRNA.py
+++ b/Plugins/read_gff_maker_mRNA.py
@@ -3,25 +3,62 @@
 import pandas as pd
 from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
 
-def mergeLocations(_locationArray_):
-    return _locationArray_[0] if len(_locationArray_) == 1 else CompoundLocation(_locationArray_)
-
 class Plugin:
 
-    def process(self, handle, metadata, calls:list=[], target=None):
-        locations = (handle.loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index())
-
-        #initialise
-        _sub_features_ = [
-            SeqFeature(
-                mergeLocations(locations.apply(lambda location: FeatureLocation(int(location[0]), int(location[1]), (1,-1)[location[2] == "-"]), axis=1)),
-                type="mRNA",
-                qualifiers={
-                    "gene":target[1]})]
-
-        #calls
-        receiver = []
-        for call,*args in calls:
-            receiver.extend(call.process(*args, target=(target[0], f"{target[1]}-mRNA-1", "mRNA")))
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        refactor_pre_feature = lambda element: FeatureLocation(
+            int(element[0]),
+            int(element[1]),
+            (1,-1)[element[2] == "-"]
+        )
+        merge_pre_feature = lambda  array: array[0] if len(array) == 1 else CompoundLocation(array)
+
+        return SeqFeature(
+            merge_pre_feature(
+                pre_feature.apply(refactor_pre_feature, axis=1)
+            ),
+            type="mRNA",
+            qualifiers={
+                "gene":None
+            }
+        )
+
+    """
+    """    
+    def callbacks(self, app, calls, target):
+        sender = []
+
+        for app, key_plugin, *args in calls:
+            temp = app.plugins[key_plugin].process(app, *args, target)
+            if temp:
+                sender.append(temp)
+
+        return sender
+
+
+    """
+    """
+    def merge(self, feature, receiver):
+        return feature
+
+    """
+    """
+    def process(self, app, key_handle, calls:list=[], target=None):
+        try:
+            feature = self.feature_initialize(
+                app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index(),
+                app.metadata
+            )
+        except KeyError:
+            return None
+            
+        feature.qualifiers["gene"]=target[1]
+        receiver = self.callbacks(
+            app,
+            calls,
+            (target[0], f"{target[1]}-mRNA-1", "mRNA")
+        )
         
-        return _sub_features_
\ No newline at end of file
+        return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_main.py b/Plugins/read_gff_maker_main.py
index eac0f9c..5cc959f 100644
--- a/Plugins/read_gff_maker_main.py
+++ b/Plugins/read_gff_maker_main.py
@@ -1,35 +1,66 @@
 #read_gff_maker_main.py
 
 import pandas as pd
-import re
 
 from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
 
 class Plugin:
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        return SeqFeature(
+            FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+            type="source",
+            qualifiers={
+                "oganism":metadata["organism"],
+                "mol_type":metadata["molecule_type"],
+                "db_xref":list()})
+    
+    """
+    """
+    def callbacks(self, app, calls, target):
+        sender = []
 
-    def process(self, handle, metadata, calls:list=[], target=None):
-        
-        location = (handle.loc[(target, slice(None), "contig"),:].reset_index())
-        _feature_ = [
-            SeqFeature(
-                FeatureLocation(int(location.iloc[0,3]), int(location.iloc[0,4]), (1,-1)[location.iloc[0,5] == "-"]),
-                type="source",
-                qualifiers={
-                    "oganism":metadata["organism"],
-                    "mol_type":metadata["molecule_type"],
-                    "db_xref":list()})]
-
-        yield _feature_
-
-        for gene in handle.loc[(target, slice(None), "gene"),:].reset_index()["sub_seq_id"]:
+        for app, key_plugin, *args in calls:
+            temp = app.plugins[key_plugin].process(app, *args, target)
+            if temp:
+                sender.append(temp)
+
+        return sender
+
+    """
+    """
+    def callbacks_with_iterator(self, app, calls, target, iterator):
+        sender = []
+
+        for element in iterator:
+            temp = self.callbacks(app, calls, (target, element))
+            if temp:
+                sender.extend(temp)
+        return sender
             
-            #initialize features
-            _features_subset_ = []
-               
-            #calls
-            receiver = []
-            for call,*args in calls:
-                receiver.extend(call.process(*args, target=(target, gene)))
+    """
+    """
+    def merge(self, feature, receiver):
+        return [feature] + receiver
+
+    """
+    """
+    def process(self, app, key_handle, calls:list=[], target=None):
+        try:
+            feature = self.feature_initialize(
+                app.handles[key_handle].loc[(target, slice(None), "contig"),:].reset_index().iloc[0,:], 
+                app.metadata)
+        except KeyError:
+            return None
 
-            _features_subset_ = receiver
-            yield _features_subset_
\ No newline at end of file
+        try:
+            receiver = self.callbacks_with_iterator(
+                app, 
+                calls, 
+                target, 
+                app.handles[key_handle].loc[(target, slice(None), "gene"),:].reset_index()["sub_seq_id"])
+        except KeyError:
+            receiver = []
+        
+        return  self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_tab_pannzer_CDS.py b/Plugins/read_tab_pannzer_CDS.py
index 7b0fd2e..f4fcee0 100644
--- a/Plugins/read_tab_pannzer_CDS.py
+++ b/Plugins/read_tab_pannzer_CDS.py
@@ -4,45 +4,77 @@
 
 class Plugin:
 
-    def process(self, handle, metadata, calls:list=[], target=None):
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        #print(self.feature_initialize_db_xref(pre_feature))
+        return {
+            "db_xref": self.feature_initialize_db_xref(pre_feature),
+            "translation": self.feature_initialize_translation(pre_feature),
+            "product": self.feature_initialize_product(pre_feature)
+        }
 
-        #initialisation
-        try:
-            anno_bp = handle.loc[(target[1], "BP_ARGOT"),:].reset_index()["id"]
-        except KeyError:
-            anno_bp = pd.Series([])
+    """
+    """
+    def feature_initialize_db_xref(self, pre_feature):
+        sender = pd.Series([])
 
-        try:
-            anno_cc = handle.loc[(target[1], "CC_ARGOT"),:].reset_index()["id"]
-        except KeyError:
-            anno_cc = pd.Series([])
+        for field in ["BP_ARGOT", "CC_ARGOT", "MF_ARGOT"]:
+            try:
+                sender = pd.concat([sender, pre_feature(field)["id"]])
+            except KeyError:
+                pass
+        return [f"GO:{str(element)}" for element in sender]
 
+    """
+    """
+    def feature_initialize_translation(self, pre_feature):
+        sender = list()
         try:
-            anno_mf = handle.loc[(target[1], "MF_ARGOT"),:].reset_index()["id"]
+            sender = [pre_feature("qseq").iloc[0,1]]
         except KeyError:
-            anno_mf = pd.Series([])
+            pass
+        return sender 
 
+    """
+    """
+    def feature_initialize_product(self, pre_feature):
+        sender = list()
         try:
-            anno_qsec = [handle.loc[(target[1], "qseq"),:].reset_index().iloc[0,1]]
+            sender = [pre_feature("DE").iloc[0,1]]
         except KeyError:
-            anno_qsec = list()
+            pass
+        return sender
 
-        try:
-            anno_de =  [handle.loc[(target[1], "DE"),:].reset_index().iloc[0,1]]
-        except KeyError:
-            anno_de = list()
-
-        _annotations_ = [{
-            "db_xref":[f"GO:{str(go)}" for go in pd.concat([anno_bp, anno_cc, anno_mf])],
-            "translation": anno_qsec,
-            "product": anno_de
-        }]
+    """
+    """
+    def callbacks(self, app, calls, target):
+        sender = []
 
+        for app, key_plugin, *args in calls:
+            temp = app.plugins[key_plugin].process(app, *args, target)
+            if temp:
+                sender += temp
     
-        #calls
-        receiver = []
-        for call,*args in calls:
-            receiver.extend(call.process(*args, target=target))
+        return sender
+
+    """
+    """
+    def merge(self, feature, receiver):
+        return feature
+
+    """
+    """
+    def process(self, app, key_handle, calls:list=[], target=None):
+
+        feature = self.feature_initialize(
+            (lambda field: app.handles[key_handle].loc[(target[1], field)].reset_index()),
+            app.metadata)
+
+        receiver = self.callbacks(
+            app,
+            calls,
+            target
+        )
 
-        #output
-        return _annotations_
\ No newline at end of file
+        return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_tab_pannzer_gene.py b/Plugins/read_tab_pannzer_gene.py
index 521797e..3824bc6 100644
--- a/Plugins/read_tab_pannzer_gene.py
+++ b/Plugins/read_tab_pannzer_gene.py
@@ -1,28 +1,45 @@
 #read_tab_pannzer_gene.py
 
-#read_tab_pannzer_CDS
-
 import pandas as pd
 
 class Plugin:
 
-    def process(self, handle, metadata, calls:list=[], target=None):
+    def feature_initialize(self, pre_feature, metadata):
+        return {
+            "note": self.feature_initialize_note(pre_feature)
+        }
 
-        #initialisation
+    def feature_initialize_note(self, pre_feature):
+        sender = list()
         try:
-            anno_de =  handle.loc[(target[1], "DE"),:].reset_index().iloc[0,1]
+            sender = [pre_feature("DE").iloc[0,1]]
         except KeyError:
-            anno_de = []
+            pass
+        return sender
 
-        _annotations_ = [{
-            "note": anno_de
-        }]
+    def callbacks(self, app, calls, target):
+        sender = []
 
+        for app, key_plugin, *args in calls:
+            temp = app.plugins[key_plugin].process(app, *args, target)
+            if temp:
+                sender += temp
     
-        #calls
-        receiver = []
-        for call,*args in calls:
-            receiver.extend(call.process(*args, target=target))
+        return sender
+
+    def merge(self, feature, receiver):
+        return feature
+
+    def process(self, app, key_handle, calls:list=[], target=None):
+
+        feature = self.feature_initialize(            
+            (lambda field: app.handles[key_handle].loc[(target[1], field)].reset_index()),
+            app.metadata)
+
+        receiver = self.callbacks(
+            app,
+            calls,
+            target
+        )
 
-        #output
-        return _annotations_
\ No newline at end of file
+        return self.merge(feature, receiver)
\ No newline at end of file

From 36c375948dde1098282f53f556e95dc3ca4f879e Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:24:25 +0200
Subject: [PATCH 11/22] Reader refactoring

plugins rewritting, hirarchical superclass creation
---
 Plugins/__read__.py                    | 60 +++++++++++++++++++++++++
 Plugins/__read_gff_maker__.py          | 21 +++++++++
 Plugins/__read_tab_pannzer__.py        | 10 +++++
 Plugins/read_fasta.py                  | 26 +++++------
 Plugins/read_gff_maker_3UTR.py         | 22 ++--------
 Plugins/read_gff_maker_5UTR.py         | 22 ++--------
 Plugins/read_gff_maker_CDS.py          | 22 ++++------
 Plugins/read_gff_maker_exon.py         | 21 ++-------
 Plugins/read_gff_maker_gene.py         | 20 ++-------
 Plugins/read_gff_maker_mRNA.py         | 23 ++--------
 Plugins/read_gff_maker_misc_feature.py | 32 ++++++++++++++
 Plugins/read_gff_maker_source.py       | 61 ++++++++++++++++++++++++++
 Plugins/read_tab_pannzer_CDS.py        | 22 ++--------
 Plugins/read_tab_pannzer_gene.py       |  9 ++--
 14 files changed, 228 insertions(+), 143 deletions(-)
 create mode 100644 Plugins/__read__.py
 create mode 100644 Plugins/__read_gff_maker__.py
 create mode 100644 Plugins/__read_tab_pannzer__.py
 create mode 100644 Plugins/read_gff_maker_misc_feature.py
 create mode 100644 Plugins/read_gff_maker_source.py

diff --git a/Plugins/__read__.py b/Plugins/__read__.py
new file mode 100644
index 0000000..0950107
--- /dev/null
+++ b/Plugins/__read__.py
@@ -0,0 +1,60 @@
+# __read__.py
+
+from Plugins.__plugin__ import __Plugin__, RequiredMetadataError, UndefinedMethodError
+from Plugins.__caller__ import Caller, CallerFailedVerification
+
+class __Read__(__Plugin__):
+    
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        raise UndefinedMethodError("feature_initialize has not been defined.")
+
+    """
+    """
+    def callbacks(self, app, calls, target):
+        raise UndefinedMethodError("callbacks has not been defined.")
+
+    """
+    """
+    def callbacks_extend(self, app, calls, target):
+        caller = Caller(app)
+        sender = []
+        for app, key_plugin, *args in calls:
+            temp = None
+            try:
+                temp = caller.run(app.plugins[key_plugin].process, app, *args, target)
+            except CallerFailedVerification:
+                sender = []
+            if temp:
+                sender.extend(temp)
+        return sender
+
+    """
+    """
+    def callbacks_append(self, app, calls, target):
+        caller = Caller(app)
+        sender = []
+        for app, key_plugin, *args in calls:
+            temp = None
+            try:
+                temp = caller.run(app.plugins[key_plugin].process, app, *args, target)
+            except CallerFailedVerification:
+                sender = []
+            if temp:
+                sender.append(temp)
+        return sender
+
+    """
+    """
+    def merge(self, feature, receiver):
+        return feature
+
+    """
+    """
+    def required_metadata_check(self, app, keys:list=[]):
+        if keys:
+            for key in keys:
+                if not key in app.metadata:
+                    raise RequiredMetadataError(f"Required metadata attribute, {key}, not found.")
+        return True
\ No newline at end of file
diff --git a/Plugins/__read_gff_maker__.py b/Plugins/__read_gff_maker__.py
new file mode 100644
index 0000000..13ceb97
--- /dev/null
+++ b/Plugins/__read_gff_maker__.py
@@ -0,0 +1,21 @@
+#__read_gff_maker__.py
+
+from Plugins.__read__ import __Read__
+
+class __ReadGFFMaker__(__Read__):
+
+    """
+    """
+    def multi_feature_initialize(self, pre_multi_feature, metadata):
+        raise UndefinedMethodError("multi_feature_initialize has not been defined.")
+
+    """
+    """
+    def callbacks(self, app, calls, target):
+        return super().callbacks_append(app, calls, target)
+
+    """
+    """
+    def callbacks_with_iterator(self, app, calls, target, iterator):
+        raise UndefinedMethodError("callbacks_with_iterator has not been defined.")
+
diff --git a/Plugins/__read_tab_pannzer__.py b/Plugins/__read_tab_pannzer__.py
new file mode 100644
index 0000000..4c78378
--- /dev/null
+++ b/Plugins/__read_tab_pannzer__.py
@@ -0,0 +1,10 @@
+#__read_tab_pannzer__.py
+
+from Plugins.__read__ import __Read__
+
+class __ReadTabPannzer__(__Read__):
+
+    """
+    """
+    def callbacks(self, app, calls, target):
+        return self.callbacks_extend(app, calls, target)
\ No newline at end of file
diff --git a/Plugins/read_fasta.py b/Plugins/read_fasta.py
index ba426db..8876aa0 100644
--- a/Plugins/read_fasta.py
+++ b/Plugins/read_fasta.py
@@ -4,9 +4,9 @@
 
 from Bio import SeqIO
 from Bio.SeqRecord import SeqRecord 
+from Plugins.__read__ import __Read__
 
-class Plugin:
-
+class Plugin(__Read__):
     """
     """
     def feature_initialize(self, pre_feature, metadata):
@@ -25,14 +25,7 @@ def feature_initialize(self, pre_feature, metadata):
     """
     """
     def callbacks(self, app, calls, target):
-        sender = []
-
-        for app, key_plugin, *args in calls:
-            temp = app.plugins[key_plugin].process(app, *args, target)
-            if temp:
-                sender += temp
-    
-        return sender
+        return super().callbacks_extend(app, calls, target)
 
     """
     """
@@ -41,12 +34,19 @@ def merge(self, feature, receiver):
 
     """
     """
-    def process(self, app, key_handle, calls:list=[], target=None):
+    def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
 
         for element in app.handles[key_handle]:
             feature = self.feature_initialize(element, app.metadata)
+            app.current_sequence = feature.seq
             receiver = self.callbacks(app, calls, (feature.id))
             self.merge(feature, receiver)
             
-            with open(f"out/{feature.id}.dat", "w") as o:
-                print(feature.format("embl"), file=o)
\ No newline at end of file
+            #with open(f"out/{feature.id}.dat", "w") as o:
+            with open(f"{feature.id}.dat", "w") as o:
+                print(feature.format("embl"), file=o)
+
+    """
+    """
+    def required_metadata_check(self, app, keys:list=[]):
+        return super().required_metadata_check(app, ["project", "transl_table", "molecule_type", "organism", "taxonomy", "topology"])
diff --git a/Plugins/read_gff_maker_3UTR.py b/Plugins/read_gff_maker_3UTR.py
index ef46754..91ca810 100644
--- a/Plugins/read_gff_maker_3UTR.py
+++ b/Plugins/read_gff_maker_3UTR.py
@@ -1,9 +1,10 @@
 #read_gff_maker_3UTR.py
 
 import pandas as pd
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
 from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
 
-class Plugin:
+class Plugin(__ReadGFFMaker__):
 
     """
     """
@@ -17,24 +18,7 @@ def feature_initialize(self, pre_feature, metadata):
     
     """
     """
-    def callbacks(self, app, calls, target):
-        sender = []
-
-        for app, key_plugin, *args in calls:
-            temp = app.plugins[key_plugin].process(app, *args, target)
-            if temp:
-                sender.append(temp)
-
-        return sender
-    
-    """
-    """
-    def merge(self, feature, receiver):
-        return feature
-
-    """
-    """
-    def process(self, app, key_handle, calls:list=[], target=None):
+    def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
         try:
             feature = self.feature_initialize(
                 app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "three_prime_UTR"),:].reset_index().iloc[0,:],
diff --git a/Plugins/read_gff_maker_5UTR.py b/Plugins/read_gff_maker_5UTR.py
index 60fae42..f7813f0 100644
--- a/Plugins/read_gff_maker_5UTR.py
+++ b/Plugins/read_gff_maker_5UTR.py
@@ -1,9 +1,10 @@
 #read_gff_maker_5UTR.py
 
 import pandas as pd
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
 from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
 
-class Plugin:
+class Plugin(__ReadGFFMaker__):
 
     """
     """
@@ -17,24 +18,7 @@ def feature_initialize(self, pre_feature, metadata):
     
     """
     """
-    def callbacks(self, app, calls, target):
-        sender = []
-
-        for app, key_plugin, *args in calls:
-            temp = app.plugins[key_plugin].process(app, *args, target)
-            if temp:
-                sender.append(temp)
-
-        return sender
-
-    """
-    """
-    def merge(self, feature, receiver):
-        return feature
-
-    """
-    """
-    def process(self, app, key_handle, calls:list=[], target=None):
+    def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
         try:
             feature = self.feature_initialize(
                 app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "five_prime_UTR"),:].reset_index().iloc[0,:],
diff --git a/Plugins/read_gff_maker_CDS.py b/Plugins/read_gff_maker_CDS.py
index 48bcb29..8b77fd2 100644
--- a/Plugins/read_gff_maker_CDS.py
+++ b/Plugins/read_gff_maker_CDS.py
@@ -2,9 +2,10 @@
 
 import pandas as pd
 import itertools
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
 from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
 
-class Plugin:
+class Plugin(__ReadGFFMaker__):
 
     """
     """
@@ -27,16 +28,6 @@ def feature_initialize(self, pre_feature, metadata):
                 "translation":list(),
                 "transl_table":metadata["transl_table"]})
     
-    """
-    """
-    def callbacks(self, app, calls, target):
-        sender = []
-        for app, key_plugin, *args in calls:
-            temp = app.plugins[key_plugin].process(app, *args, target)
-            if temp:
-                sender.append(temp)
-        return sender
-
     """
     """
     def merge(self, feature, receiver):
@@ -47,7 +38,7 @@ def merge(self, feature, receiver):
 
     """
     """
-    def process(self, app, key_handle, calls:list=[], target=None):
+    def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
         try:
             feature = self.feature_initialize(
                 app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index(),
@@ -61,4 +52,9 @@ def process(self, app, key_handle, calls:list=[], target=None):
             calls,
             (target[0], f"{target[1]}-mRNA-1", "CDS"))
         
-        return self.merge(feature, receiver)
\ No newline at end of file
+        return self.merge(feature, receiver)
+
+    """
+    """
+    def required_metadata_check(self, app, keys:list=[]):
+        return super().required_metadata_check(app, ["transl_table"])
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_exon.py b/Plugins/read_gff_maker_exon.py
index 1aeb42b..4b23ebb 100644
--- a/Plugins/read_gff_maker_exon.py
+++ b/Plugins/read_gff_maker_exon.py
@@ -3,10 +3,11 @@
 import pandas as pd
 import re
 
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
 from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
 
 
-class Plugin:
+class Plugin(__ReadGFFMaker__):
 
     """
     """
@@ -22,26 +23,10 @@ def multi_feature_initialize(self, pre_multi_feature, metadata):
         for _, element in pre_multi_feature:
             yield self.feature_initialize(element, metadata)
 
-    """
-    """
-    def callbacks(self, app, calls, target):
-        sender = []
-
-        for app, key_plugin, *args in calls:
-            temp = app.plugins[key_plugin].process(app, *args, target)
-            if temp:
-                sender.append()
-
-        return sender
-    
-    """
-    """
-    def merge(self, feature, receiver):
-        return feature
 
     """
     """
-    def process(self, app, key_handle, calls:list=[], target=None):
+    def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
         try:
             feature = self.multi_feature_initialize(
                 app.handles[key_handle].loc[(target, slice(None), "exon"),:].reset_index().iterrows(),
diff --git a/Plugins/read_gff_maker_gene.py b/Plugins/read_gff_maker_gene.py
index 9d5f7da..00cf5c3 100644
--- a/Plugins/read_gff_maker_gene.py
+++ b/Plugins/read_gff_maker_gene.py
@@ -2,10 +2,10 @@
 
 import pandas as pd
 import itertools
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
 from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
 
-class Plugin:
-
+class Plugin(__ReadGFFMaker__):
     """
     """
     def feature_initialize(self, pre_feature, metadata):
@@ -15,18 +15,6 @@ def feature_initialize(self, pre_feature, metadata):
             qualifiers={
                 "gene":None,
                 "note":list()})
-    
-    """
-    """
-    def callbacks(self, app, calls, target):
-        sender = []
-
-        for app, key_plugin, *args in calls:
-            temp = app.plugins[key_plugin].process(app, *args, target)
-            if temp:
-                sender.append(temp)
-                
-        return sender
 
     """
     """
@@ -38,7 +26,7 @@ def merge(self, feature, receiver):
 
     """
     """
-    def process(self, app, key_handle, calls:list=[], target=None):
+    def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
         feature = self.feature_initialize(
             app.handles[key_handle].loc[(target[0], target[1], "gene"),:].reset_index().iloc[0,:],
             app.metadata)
@@ -47,5 +35,5 @@ def process(self, app, key_handle, calls:list=[], target=None):
             app,
             calls,
             (target[0], f"{target[1]}-mRNA-1", "gene"))
-
+            
         return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_mRNA.py b/Plugins/read_gff_maker_mRNA.py
index e4e8bc0..f8e76f8 100644
--- a/Plugins/read_gff_maker_mRNA.py
+++ b/Plugins/read_gff_maker_mRNA.py
@@ -1,9 +1,10 @@
 #read_gff_maker_mRNA.py
 
 import pandas as pd
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
 from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
 
-class Plugin:
+class Plugin(__ReadGFFMaker__):
 
     """
     """
@@ -25,27 +26,9 @@ def feature_initialize(self, pre_feature, metadata):
             }
         )
 
-    """
-    """    
-    def callbacks(self, app, calls, target):
-        sender = []
-
-        for app, key_plugin, *args in calls:
-            temp = app.plugins[key_plugin].process(app, *args, target)
-            if temp:
-                sender.append(temp)
-
-        return sender
-
-
-    """
-    """
-    def merge(self, feature, receiver):
-        return feature
-
     """
     """
-    def process(self, app, key_handle, calls:list=[], target=None):
+    def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
         try:
             feature = self.feature_initialize(
                 app.handles[key_handle].loc[(target[0], f"{target[1]}-mRNA-1", "CDS"),:].reset_index(),
diff --git a/Plugins/read_gff_maker_misc_feature.py b/Plugins/read_gff_maker_misc_feature.py
new file mode 100644
index 0000000..aeae5fc
--- /dev/null
+++ b/Plugins/read_gff_maker_misc_feature.py
@@ -0,0 +1,32 @@
+#read_gff_maker_misc_feature.py
+
+import pandas as pd
+import itertools
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin(__ReadGFFMaker__):
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        return SeqFeature(
+            FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+            type="misc_feature",
+            qualifiers={
+                #"gene":None,
+                "note":list()})
+
+    """
+    """
+    def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
+        feature = self.feature_initialize(
+            app.handles[key_handle].loc[(target[0], target[1], "gene"),:].reset_index().iloc[0,:],
+            app.metadata)
+        feature.qualifiers["note"].append(target[1])
+        #feature.qualifiers["gene"]=target[1]
+        receiver = self.callbacks(
+            app,
+            calls,
+            (target[0], f"{target[1]}-mRNA-1", "misc_feature"))
+            
+        return self.merge(feature, receiver)
\ No newline at end of file
diff --git a/Plugins/read_gff_maker_source.py b/Plugins/read_gff_maker_source.py
new file mode 100644
index 0000000..04fe6db
--- /dev/null
+++ b/Plugins/read_gff_maker_source.py
@@ -0,0 +1,61 @@
+#read_gff_maker_source.py
+
+import pandas as pd
+
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin(__ReadGFFMaker__):
+
+    """
+    """
+    def feature_initialize(self, pre_feature, metadata):
+        return SeqFeature(
+            FeatureLocation(int(pre_feature["start"]), int(pre_feature["stop"]), (1,-1)[pre_feature["strand"] == "-"]),
+            type="source",
+            qualifiers={
+                "organism":metadata["organism"],
+                "mol_type":metadata["molecule_type"],
+                "db_xref":list()})
+
+    """
+    """
+    def callbacks_with_iterator(self, app, calls, target, iterator):
+        sender = []
+
+        for element in iterator:
+            temp = self.callbacks(app, calls, (target, element))
+            if temp:
+                sender.extend(temp)
+        return sender
+            
+    """
+    """
+    def merge(self, feature, receiver):
+        return [feature] + receiver
+
+    """
+    """
+    def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
+        try:
+            feature = self.feature_initialize(
+                app.handles[key_handle].loc[(target, slice(None), "contig"),:].reset_index().iloc[0,:], 
+                app.metadata)
+        except KeyError:
+            return None
+
+        try:
+            receiver = self.callbacks_with_iterator(
+                app, 
+                calls, 
+                target, 
+                app.handles[key_handle].loc[(target, slice(None), "gene"),:].reset_index()["sub_seq_id"])
+        except KeyError:
+            receiver = []
+        
+        return  self.merge(feature, receiver)
+
+    """
+    """
+    def required_metadata_check(self, app, keys:list=[]):
+        return super().required_metadata_check(app, ["organism", "molecule_type"])
\ No newline at end of file
diff --git a/Plugins/read_tab_pannzer_CDS.py b/Plugins/read_tab_pannzer_CDS.py
index f4fcee0..778d6f7 100644
--- a/Plugins/read_tab_pannzer_CDS.py
+++ b/Plugins/read_tab_pannzer_CDS.py
@@ -1,8 +1,9 @@
 #read_tab_pannzer_CDS
 
 import pandas as pd
+from Plugins.__read_tab_pannzer__ import __ReadTabPannzer__
 
-class Plugin:
+class Plugin(__ReadTabPannzer__):
 
     """
     """
@@ -48,24 +49,7 @@ def feature_initialize_product(self, pre_feature):
 
     """
     """
-    def callbacks(self, app, calls, target):
-        sender = []
-
-        for app, key_plugin, *args in calls:
-            temp = app.plugins[key_plugin].process(app, *args, target)
-            if temp:
-                sender += temp
-    
-        return sender
-
-    """
-    """
-    def merge(self, feature, receiver):
-        return feature
-
-    """
-    """
-    def process(self, app, key_handle, calls:list=[], target=None):
+    def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
 
         feature = self.feature_initialize(
             (lambda field: app.handles[key_handle].loc[(target[1], field)].reset_index()),
diff --git a/Plugins/read_tab_pannzer_gene.py b/Plugins/read_tab_pannzer_gene.py
index 3824bc6..ed1fb2a 100644
--- a/Plugins/read_tab_pannzer_gene.py
+++ b/Plugins/read_tab_pannzer_gene.py
@@ -1,8 +1,9 @@
 #read_tab_pannzer_gene.py
 
 import pandas as pd
+from Plugins.__read_tab_pannzer__ import __ReadTabPannzer__
 
-class Plugin:
+class Plugin(__ReadTabPannzer__):
 
     def feature_initialize(self, pre_feature, metadata):
         return {
@@ -27,15 +28,11 @@ def callbacks(self, app, calls, target):
     
         return sender
 
-    def merge(self, feature, receiver):
-        return feature
-
-    def process(self, app, key_handle, calls:list=[], target=None):
+    def process(self, app, caller_mode, key_handle, calls:list=[], target=None):
 
         feature = self.feature_initialize(            
             (lambda field: app.handles[key_handle].loc[(target[1], field)].reset_index()),
             app.metadata)
-
         receiver = self.callbacks(
             app,
             calls,

From 96b60a6b11a69a5d563d80c65a648624d4efb389 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:25:29 +0200
Subject: [PATCH 12/22] to_handle renaming

---
 Plugins/to_handle_fasta.py       | 10 ++++++++++
 Plugins/to_handle_gff_maker.py   | 18 ++++++++++++++++++
 Plugins/to_handle_tab_pannzer.py | 16 ++++++++++++++++
 3 files changed, 44 insertions(+)
 create mode 100644 Plugins/to_handle_fasta.py
 create mode 100644 Plugins/to_handle_gff_maker.py
 create mode 100644 Plugins/to_handle_tab_pannzer.py

diff --git a/Plugins/to_handle_fasta.py b/Plugins/to_handle_fasta.py
new file mode 100644
index 0000000..c1745ef
--- /dev/null
+++ b/Plugins/to_handle_fasta.py
@@ -0,0 +1,10 @@
+#fasta2handle.py
+
+from Bio import SeqIO
+from Plugins.__plugin__ import __Plugin__
+
+class Plugin(__Plugin__):
+
+    def process(self, file_path):
+        with open(file_path) as handle:
+            return list(SeqIO.parse(handle, "fasta"))
\ No newline at end of file
diff --git a/Plugins/to_handle_gff_maker.py b/Plugins/to_handle_gff_maker.py
new file mode 100644
index 0000000..4c5e93a
--- /dev/null
+++ b/Plugins/to_handle_gff_maker.py
@@ -0,0 +1,18 @@
+#gff_maker2handle.py
+
+import pandas as pd
+import re
+from Plugins.__plugin__ import __Plugin__
+
+class Plugin(__Plugin__):
+
+    def process(self, file_path):
+        with open(file_path) as handle:
+            gff = pd.read_csv(handle, sep="\t")
+            gff = gff.reset_index()
+            gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
+            gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
+            gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
+            gff["start"] = gff["start"].apply(lambda x: x-1)
+            return gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]) 
+        
\ No newline at end of file
diff --git a/Plugins/to_handle_tab_pannzer.py b/Plugins/to_handle_tab_pannzer.py
new file mode 100644
index 0000000..e97c369
--- /dev/null
+++ b/Plugins/to_handle_tab_pannzer.py
@@ -0,0 +1,16 @@
+#tab_pannzer2handle.py
+
+import pandas as pd
+from Plugins.__plugin__ import __Plugin__
+
+class Plugin(__Plugin__):
+
+    def process(self, file_path):
+        with open(file_path) as handle:
+            anno = pd.read_csv(handle, sep="\t")
+
+            anno.sort_values(by=["qpid", "type"], inplace=True)
+            anno.drop(["score", "PPV"], axis=1, inplace=True)
+            anno.set_index(["qpid", "type", "id", "desc"], inplace=True)
+            
+            return anno
\ No newline at end of file

From a34b6ed0791ef232f9884719a2663e8d79e51ad0 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:25:59 +0200
Subject: [PATCH 13/22] verifier plugin creation

---
 Plugins/__verify__.py           | 19 +++++++++++++++++
 Plugins/verify_gff_maker_CDS.py | 37 +++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 Plugins/__verify__.py
 create mode 100644 Plugins/verify_gff_maker_CDS.py

diff --git a/Plugins/__verify__.py b/Plugins/__verify__.py
new file mode 100644
index 0000000..e0f2c02
--- /dev/null
+++ b/Plugins/__verify__.py
@@ -0,0 +1,19 @@
+#__verify__.py
+
+from Plugins.__plugin__ import __Plugin__, RequiredMetadataError, UndefinedMethodError
+
+class __Verify__(__Plugin__):
+    pass
+
+class FailedVerification(Exception):
+    def __init__(self, *args):
+        if args:
+            self.message = args[0]
+        else:
+            self.message = None
+
+    def __str__(self):
+        if self.message:
+            return f"FailedVerification: {self.message}"
+        else:
+            return "FailedVerification has been raised"
\ No newline at end of file
diff --git a/Plugins/verify_gff_maker_CDS.py b/Plugins/verify_gff_maker_CDS.py
new file mode 100644
index 0000000..a9bf4aa
--- /dev/null
+++ b/Plugins/verify_gff_maker_CDS.py
@@ -0,0 +1,37 @@
+#verify_gff_maker_gene.py
+
+from Plugins.__verify__ import __Verify__, FailedVerification
+from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
+
+class Plugin(__Verify__):
+    
+    def process(self, app, element_to_verify):
+        conversion_map = {"A":"T", "T":"A", "C":"G", "G":"C"}
+        convert = lambda array : "".join([conversion_map[element] for element in array[::-1]])
+
+        start_codon = None
+        stop_codon = None
+
+        feature_location = element_to_verify.location
+        if(isinstance(feature_location, FeatureLocation)):
+            if(feature_location.strand == 1):
+                start_codon = app.current_sequence[feature_location._start:feature_location._start+3]
+                stop_codon = app.current_sequence[feature_location._end-3:feature_location._end]
+            else:
+                start_codon = convert(app.current_sequence[feature_location._end-3:feature_location._end])
+                stop_codon = convert(app.current_sequence[feature_location._start:feature_location._start+3])
+        elif(isinstance(feature_location, CompoundLocation)):
+            if(feature_location.strand == 1):
+                start_codon = app.current_sequence[feature_location.parts[0]._start:feature_location.parts[0]._start+3]
+                stop_codon = app.current_sequence[feature_location.parts[-1]._end-3:feature_location.parts[-1]._end]
+            else:
+                start_codon = convert(app.current_sequence[feature_location.parts[-1]._end-3:feature_location.parts[-1]._end])
+                stop_codon = convert(app.current_sequence[feature_location.parts[0]._start:feature_location.parts[0]._start+3])
+
+        if start_codon not in ["ATG"]:
+            raise FailedVerification(f"invalid start codon: {start_codon}")
+
+        if stop_codon not in ["TGA", "TAG", "TAA"]:
+            raise FailedVerification(f"invalid stop codon: {stop_codon}")
+
+        return None 
\ No newline at end of file

From 2f085e1370f5122f587cca99c9ca8f2d93c2792d Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:27:01 +0200
Subject: [PATCH 14/22] root superclasses and package description

---
 Plugins/__caller__.py | 62 +++++++++++++++++++++++++++++++++++++++++++
 Plugins/__init__.py   | 21 +++++++++++----
 Plugins/__plugin__.py | 38 ++++++++++++++++++++++++++
 3 files changed, 116 insertions(+), 5 deletions(-)
 create mode 100644 Plugins/__caller__.py
 create mode 100644 Plugins/__plugin__.py

diff --git a/Plugins/__caller__.py b/Plugins/__caller__.py
new file mode 100644
index 0000000..c6a3a5a
--- /dev/null
+++ b/Plugins/__caller__.py
@@ -0,0 +1,62 @@
+#__caller__.py
+
+"""
+"""
+from Plugins.__verify__ import FailedVerification
+
+
+class Caller:
+    """
+    """
+    def __init__(self, app):
+        self.app = app
+        self.status = True
+        self.last_result = None
+
+    """
+    """
+    def run(self, callback_function, *args):
+        if args[1] not in ["default", "verify", "bypass"]:
+            raise UnknownCallerModeError(f"{args[1]} mode is not defined.")
+
+        if self.status and args[1] in ["default"]:
+            self.last_result = callback_function(*args)
+            return self.last_result
+
+        elif self.status and args[1] in ["verify"]:
+            try:
+                callback_function(self.app, self.last_result)
+            except FailedVerification:
+                self.status = False
+                raise CallerFailedVerification()
+            return None
+
+        elif not self.status and args[1] in ["bypass"]:
+            self.last_result = callback_function(*args)
+            return self.last_result
+
+class UnknownCallerModeError(Exception):
+    def __init__(self, *args):
+        if args:
+            self.message = args[0]
+        else:
+            self.message = None
+
+    def __str__(self):
+        if self.message:
+            return f"UnknownCallerModeError: {self.message}"
+        else:
+            return "UnknownCallerModeError has been raised"
+
+class CallerFailedVerification(Exception):
+    def __init__(self, *args):
+        if args:
+            self.message = args[0]
+        else:
+            self.message = None
+
+    def __str__(self):
+        if self.message:
+            return f"CallerFailedVerification: {self.message}"
+        else:
+            return "CallerFailedVerification has been raised"
\ No newline at end of file
diff --git a/Plugins/__init__.py b/Plugins/__init__.py
index 583ac6d..ff585cf 100644
--- a/Plugins/__init__.py
+++ b/Plugins/__init__.py
@@ -1,17 +1,28 @@
 #__init.py__
 
-from Plugins.fasta2handle import Plugin
-from Plugins.gff_maker2handle import Plugin
-from Plugins.tab_pannzer2handle import Plugin
+from Plugins.__caller__ import Caller, UnknownCallerModeError, CallerFailedVerification
+
+from Plugins.__plugin__ import __Plugin__, RequiredMetadataError, UndefinedMethodError
+from Plugins.__read__ import __Read__
+from Plugins.__read_gff_maker__ import __ReadGFFMaker__
+from Plugins.__read_tab_pannzer__ import __ReadTabPannzer__
+from Plugins.__verify__ import __Verify__, FailedVerification
 
 from Plugins.read_fasta import Plugin
 
-from Plugins.read_gff_maker_gene import Plugin
 from Plugins.read_gff_maker_3UTR import Plugin
 from Plugins.read_gff_maker_5UTR import Plugin
 from Plugins.read_gff_maker_CDS import Plugin
 from Plugins.read_gff_maker_exon import Plugin
+from Plugins.read_gff_maker_gene import Plugin
 from Plugins.read_gff_maker_mRNA import Plugin
+from Plugins.read_gff_maker_source import Plugin
 
 from Plugins.read_tab_pannzer_CDS import Plugin
-from Plugins.read_tab_pannzer_gene import Plugin
\ No newline at end of file
+from Plugins.read_tab_pannzer_gene import Plugin
+
+from Plugins.to_handle_fasta import Plugin
+from Plugins.to_handle_gff_maker import Plugin
+from Plugins.to_handle_tab_pannzer import Plugin
+
+from Plugins.verify_gff_maker_CDS import Plugin
\ No newline at end of file
diff --git a/Plugins/__plugin__.py b/Plugins/__plugin__.py
new file mode 100644
index 0000000..9e55c96
--- /dev/null
+++ b/Plugins/__plugin__.py
@@ -0,0 +1,38 @@
+#__plugin__.py
+
+class __Plugin__:
+    def process(*args):
+        raise UndefinedMethodError("process has not been defined")
+
+    def required_metadata_check(*args):
+        return True
+
+"""
+"""
+class RequiredMetadataError(Exception):
+    def __init__(self, *args):
+        if args:
+            self.message = args[0]
+        else:
+            self.message = None
+
+    def __str__(self):
+        if self.message:
+            return f"RequiredMetadataError: {self.message}"
+        else:
+            return "RequiredMetadataError has been raised"
+
+"""
+"""
+class UndefinedMethodError(Exception):
+    def __init__(self, *args):
+        if args:
+            self.message = args[0]
+        else:
+            self.message = None
+
+    def __str__(self):
+        if self.message:
+            return f"UndefinedMethodError: {self.message}"
+        else:
+            return "UndefinedMethodError has been raised"
\ No newline at end of file

From ec0bbf41ade13e22067879db65fb9fa0cd1e3501 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:27:32 +0200
Subject: [PATCH 15/22] renamed as to_handle_fasta

---
 Plugins/fasta2handle.py | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100644 Plugins/fasta2handle.py

diff --git a/Plugins/fasta2handle.py b/Plugins/fasta2handle.py
deleted file mode 100644
index b9e319b..0000000
--- a/Plugins/fasta2handle.py
+++ /dev/null
@@ -1,9 +0,0 @@
-#fasta2handle.py
-
-from Bio import SeqIO
-
-class Plugin:
-
-    def process(self, file_path):
-        with open(file_path) as handle:
-            return list(SeqIO.parse(handle, "fasta"))
\ No newline at end of file

From 2239814117782d0b199f41fd265c9ff62c9b7191 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:27:51 +0200
Subject: [PATCH 16/22] renamed as to_handle_gff_maker

---
 Plugins/gff_maker2handle.py | 16 ----------------
 1 file changed, 16 deletions(-)
 delete mode 100644 Plugins/gff_maker2handle.py

diff --git a/Plugins/gff_maker2handle.py b/Plugins/gff_maker2handle.py
deleted file mode 100644
index 39124c2..0000000
--- a/Plugins/gff_maker2handle.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#gff_maker2handle.py
-
-import pandas as pd
-import re
-
-class Plugin:
-
-    def process(self, file_path):
-        with open(file_path) as handle:
-            gff = pd.read_csv(handle, sep="\t")
-            gff = gff.reset_index()
-            gff.columns = ["seq_id", "source", "ft_type", "start", "stop", "score", "strand", "phase", "attr"]
-            gff = gff.sort_values(by=["seq_id"]).drop(["source", "score", "phase"], axis=1).dropna()
-            gff["sub_seq_id"] = [re.split(r':',re.search("^ID=.*?;", x).group(0)[3:-1])[0] for x in gff["attr"]]
-            return gff[["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]].sort_values(by=["seq_id", "sub_seq_id", "ft_type"]).set_index(["seq_id", "sub_seq_id", "ft_type", "start", "stop", "strand"]) 
-        
\ No newline at end of file

From 63dfc33705cd25997a30979425f657c319447f80 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:28:14 +0200
Subject: [PATCH 17/22] renamed as to_handle_tab_pannzer

---
 Plugins/tab_pannzer2handle.py | 11 -----------
 1 file changed, 11 deletions(-)
 delete mode 100644 Plugins/tab_pannzer2handle.py

diff --git a/Plugins/tab_pannzer2handle.py b/Plugins/tab_pannzer2handle.py
deleted file mode 100644
index ac7c532..0000000
--- a/Plugins/tab_pannzer2handle.py
+++ /dev/null
@@ -1,11 +0,0 @@
-#tab_pannzer2handle.py
-
-import pandas as pd
-
-class Plugin:
-
-    def process(self, file_path):
-        with open(file_path) as handle:
-            anno = pd.read_csv(handle, sep="\t")
-            anno = anno.sort_values(by=["qpid", "type"]).drop(["score", "PPV"], axis=1).set_index(["qpid", "type", "id", "desc"])
-            return anno
\ No newline at end of file

From a4a1ba885b7e324d063db702f8c8cdf36c81b6d3 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:29:03 +0200
Subject: [PATCH 18/22] Delete files directory

---
 files/config.info | 49 -----------------------------------------------
 1 file changed, 49 deletions(-)
 delete mode 100644 files/config.info

diff --git a/files/config.info b/files/config.info
deleted file mode 100644
index 1aa75e3..0000000
--- a/files/config.info
+++ /dev/null
@@ -1,49 +0,0 @@
-# Axel Giottonini
-# test config.info
-# 14.05.2021
-
-<metadata>
-project:temp
-division:inv
-taxonomy:29031
-organism:Phlebotomus papatasi
-molecule_type:genomic DNA
-topology:linear
-description:empty
-transl_table:0
-</metadata>
-
-<plugins>
-fasta2handle:.fasta2handle,Plugins
-gff_maker2handle:.gff_maker2handle,Plugins
-tab_pannzer2handle:.tab_pannzer2handle,Plugins
-read_fasta:.read_fasta,Plugins
-read_gff_maker_3UTR:.read_gff_maker_3UTR,Plugins
-read_gff_maker_5UTR:.read_gff_maker_5UTR,Plugins
-read_gff_maker_CDS:.read_gff_maker_CDS,Plugins
-read_gff_maker_exon:.read_gff_maker_exon,Plugins
-read_gff_maker_gene:.read_gff_maker_gene,Plugins
-read_gff_maker_main:.read_gff_maker_main,Plugins
-read_gff_maker_mRNA:.read_gff_maker_mRNA,Plugins
-read_tab_pannzer_CDS:.read_tab_pannzer_CDS,Plugins
-read_tab_pannzer_gene:.read_tab_pannzer_gene,Plugins
-</plugins>
-
-<handles>
-fasta:fasta2handle,files/sequences.fasta
-gff_maker:gff_maker2handle,files/data.gff
-tab_pannzer:tab_pannzer2handle,files/anno.out
-</handles>
-
-<workflow>
--read_fasta,fasta
---read_gff_maker_main,gff_maker
----read_gff_maker_gene,gff_maker
-----read_tab_pannzer_gene,tab_pannzer
----read_gff_maker_mRNA,gff_maker
----read_gff_maker_CDS,gff_maker
-----read_tab_pannzer_CDS,tab_pannzer
----read_gff_maker_3UTR,gff_maker
----read_gff_maker_5UTR,gff_maker
---read_gff_maker_exon,gff_maker
-</workflow>

From e599c67d3c9950aae4d4631a0a96c557a75cd197 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:29:53 +0200
Subject: [PATCH 19/22] core upload

handle config file as an argument
---
 core.py | 157 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 main.py |  14 +++--
 2 files changed, 146 insertions(+), 25 deletions(-)

diff --git a/core.py b/core.py
index 3bbc502..3fed957 100644
--- a/core.py
+++ b/core.py
@@ -3,13 +3,27 @@
 import importlib
 import re
 
-class app:
+"""
+"""
+class App:
+    """
+        Description:
+
+        Arguments:
+
+        Output:
+
+        Note:
+    """
     def __init__(self, config_path):
+
         self.metadata = None
         self.plugins = None
         self.handles = None
         self.workflow = None
 
+        self.current_sequence = None
+
         config = self.read_config(config_path)
 
         self.set_metadata(config["metadata"])
@@ -17,16 +31,16 @@ def __init__(self, config_path):
         self.set_handles(config["handles"])
         self.set_workflow(config["workflow"])
 
+        self.all_plugins_required_metadata_check()
+
     """
-    This function reads the config file which is divided into four fields :
-        - metadata: contains the general and shared informations for EMBLish
-            as a couple key:value
-        - plugins: contains the list of the plugins that will be used as a
-            triplet plugin_key:plugin_name,plugin_package
-        - handles: contains the list of files that will be used as inputs as
-            a triplet handle_key:plugin,file_path
-        - workflow: contains a hierarchical list of the different step to run
-            as a couple plugin_key,handle_key
+        Description:
+
+        Arguments:
+
+        Output:
+
+        Note:
     """
     def read_config(self, config_path):
         config = {
@@ -67,26 +81,68 @@ def read_config(self, config_path):
         return config
 
     """
-    This function converts the array containing the metadata into a dictionnary
+        Description:
+            - converts the array containing the metadata into a dictionnary by splitting
+            each string containing the metadata key and metadata value into an item with 
+            the key and value.
+        Arguments:
+            - array: list of strings
+        Output:
+            - dictionnary of strings
+        Note:
     """
-    def set_metadata(self, array):
-        self.metadata = {element.split(":")[0]:element.split(":")[1] for element in array}
+    def set_metadata(self, array:list):
+        
+        def convert(value):
+            temp = value.split(",")
+            if len(temp) > 1:
+                if temp[1] == "int":
+                    return int(temp[0])
+            return value
+
+        self.metadata = {element.split(":")[0]:convert(element.split(":")[1]) for element in array}
 
     """
-    This function converts the array containing the plugins parameters into a dictionnary 
-    with plugins to call with their key
+        Description:
+            - converts the array containing the plugins parameters (name, package) into a
+            dictionnary by splitting each string containing the plugin key, the plugin name
+            and the plugin package into an item with the key and the callable plugin.
+            Store the result in the self.metadata variable.
+        Arguments:
+            - array: list of strings
+        Output:
+            - dictionnary of plugin objects
+        Note:
     """
-    def set_plugins(self, array):
+    def set_plugins(self, array:list):
         self.plugins = {element.split(":")[0]:importlib.import_module(element.split(":")[1].split(",")[0],element.split(":")[1].split(",")[1]).Plugin() for element in array}
 
     """
-    This function converts the array containing the handles parameters into a dictionnary
-    with handles to call with their key
+        Description:
+            - converts the array containing the handles parameters into a dictionnary by 
+            splitting each string containing the handle key, the handle converter and the
+            file path into an item with the key and the converted as a data frame handle.
+            Store the result in the self.plugin variable.
+        Arguments:
+            - array: list of strings
+        Output:
+            - dictionnary of data frames
+        Note:
     """
     def set_handles(self, array):
         self.handles = {element.split(":")[0]:self.plugins[element.split(":")[1].split(",")[0]].process(element.split(":")[1].split(",")[1]) for element in array}
     
     """
+        Description:
+            - converts the array containing the workflow into a recursive automaton where
+            task are described by a tuple containing the required plugin, the handle where
+            the data is found and a list of elements to call.
+            Store the result in the self.handles variable.
+        Arguments:
+            - array: list of strings
+        Output (assigned):
+            - array: list of tuples (recursive)
+        Note:
     """
     def set_workflow(self, array):
         temp = self.refactor_workflow(array)
@@ -94,7 +150,14 @@ def set_workflow(self, array):
         self.workflow = self.convert_workflow_task(temp[0])
 
     """
-    Convert the list element in triplet level,<plugin_key,handle_key>,[]
+        Description:
+            - converts strings into a tuple containing the level of the task, the rest of the 
+            string and an empty array.
+        Arguments:
+            - array list of strings
+        Output:
+            - array: list of tuples
+        Note:
     """
     def refactor_workflow(self, array):
 
@@ -107,7 +170,13 @@ def refactor_workflow(self, array):
         return array
 
     """
-    Order the elements and create the hierarchical nodes
+        Description:
+            - place the tasks in their parent (level-1) tasks array 
+        Arguments:
+            - array: list of tuples
+        Output:
+            -  array: list of tuples (recursive)
+        Note:
     """
     def merge_workflow(self, array):
         array.insert(0, (0,None,[]))
@@ -133,12 +202,22 @@ def merge_workflow(self, array):
         return array
 
     """
+        Description:
+            - recursively converts the tuples containing the task string into a tuple containing 
+            a reference to the application, the plugin key, the handle key and an array of 
+            subtasks.
+        Arguments:
+            - array: list of tuples (recursive)
+        Output:
+            - array: list of tuples (recusrive)
+        Note:
     """
     def convert_workflow_task(self, task):
         if task[1]:
             return (
                 self,
                 task[1].split(",")[0],
+                task[1].split(",")[2] if len(task[1].split(",")) > 2 else "default",
                 task[1].split(",")[1],
                 [self.convert_workflow_task(sub_task) for sub_task in task[2]]
             )
@@ -146,7 +225,43 @@ def convert_workflow_task(self, task):
         return [self.convert_workflow_task(sub_task) for sub_task in task[2]]
         
     """
+        Description:
+
+        Arguments:
+
+        Output:
+
+        Note:
+    """
+    def all_plugins_required_metadata_check(self):
+        for key, plugin in self.plugins.items():
+            if not plugin.required_metadata_check(self):
+                raise InvalidConfigurationError(f"{key} plugin could not find required metadata")
+
+    """
+        Description:
+
+        Arguments:
+
+        Output:
+
+        Note:
     """
     def run(self):
         for app, key_plugin, *args in self.workflow:
-            app.plugins[key_plugin].process(app, *args)
\ No newline at end of file
+            app.plugins[key_plugin].process(app, *args)
+
+"""
+"""
+class InvalidConfigurationError(Exception):
+    def __init__(self, *args):
+        if args:
+            self.message = args[0]
+        else:
+            self.message = None
+
+    def __str__(self):
+        if self.message:
+            return f"InvalidConfigurationError, {self.message}"
+        else:
+            return "InvalidConfigurationError has been raised"
\ No newline at end of file
diff --git a/main.py b/main.py
index 1331b9a..875477b 100644
--- a/main.py
+++ b/main.py
@@ -1,8 +1,14 @@
 #main.py
 
-from core import app
+from core import App
+import sys
 
-if __name__ == "__main__":
+def main():
+    args = sys.argv[1:]
+
+    config_file = args[0]
 
-    app = app("files/config.info")
-    app.run()
\ No newline at end of file
+    App(config_file).run()
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From c418d55a43df71333a5ba2295d3d00fd17fbaa45 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:33:12 +0200
Subject: [PATCH 20/22] Update README.md

---
 README.md | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 40f2b8e..734ccf6 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,58 @@
 # GFF3-Tabular_to_EMBL
 
-## Status of the project
+```
+# Axel Giottonini
+# test config.info
+# 19.05.2021
 
-## Upcoming features
+<metadata>
+project:PRJEB1234
+division:inv
+taxonomy:29031
+organism:Phlebotomus papatasi
+molecule_type:genomic DNA
+topology:linear
+description:empty
+transl_table:1,int
+</metadata>
+
+<plugins>
+read_fasta:.read_fasta,Plugins
+read_gff_maker_3UTR:.read_gff_maker_3UTR,Plugins
+read_gff_maker_5UTR:.read_gff_maker_5UTR,Plugins
+read_gff_maker_CDS:.read_gff_maker_CDS,Plugins
+read_gff_maker_exon:.read_gff_maker_exon,Plugins
+read_gff_maker_gene:.read_gff_maker_gene,Plugins
+read_gff_maker_misc_feature:.read_gff_maker_misc_feature,Plugins
+read_gff_maker_mRNA:.read_gff_maker_mRNA,Plugins
+read_gff_maker_source:.read_gff_maker_source,Plugins
+read_tab_pannzer_CDS:.read_tab_pannzer_CDS,Plugins
+read_tab_pannzer_gene:.read_tab_pannzer_gene,Plugins
+to_handle_fasta:.to_handle_fasta,Plugins
+to_handle_gff_maker:.to_handle_gff_maker,Plugins
+to_handle_tab_pannzer:.to_handle_tab_pannzer,Plugins
+verify_gff_maker_CDS:.verify_gff_maker_CDS,Plugins
+</plugins>
+
+<handles>
+fasta:to_handle_fasta,sequences.fasta
+gff_maker:to_handle_gff_maker,data.gff
+tab_pannzer:to_handle_tab_pannzer,anno.out
+</handles>
+
+<workflow>
+-read_fasta,fasta
+--read_gff_maker_source,gff_maker
+---read_gff_maker_gene,gff_maker
+----read_tab_pannzer_gene,tab_pannzer
+---read_gff_maker_mRNA,gff_maker
+---read_gff_maker_CDS,gff_maker
+----read_tab_pannzer_CDS,tab_pannzer
+---verify_gff_maker_CDS,NF,verify
+---read_gff_maker_misc_feature,gff_maker,bypass
+---read_gff_maker_3UTR,gff_maker
+---read_gff_maker_5UTR,gff_maker
+--read_gff_maker_exon,gff_maker
+</workflow>
+
+```

From 12795dd374a55eb8e771fad6ad41fb9b816e3635 Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:34:00 +0200
Subject: [PATCH 21/22] Delete Pipfile

---
 Pipfile | 15 ---------------
 1 file changed, 15 deletions(-)
 delete mode 100644 Pipfile

diff --git a/Pipfile b/Pipfile
deleted file mode 100644
index c029e52..0000000
--- a/Pipfile
+++ /dev/null
@@ -1,15 +0,0 @@
-[[source]]
-url = "https://pypi.org/simple"
-verify_ssl = true
-name = "pypi"
-
-[packages]
-biopython = "*"
-tqdm = "*"
-pandas = "*"
-joblib = "*"
-
-[dev-packages]
-
-[requires]
-python_version = "3.9"

From 6429b39fceecbd5a635bd89928b2219c5cb805ca Mon Sep 17 00:00:00 2001
From: AxelGiottonini <75320942+AxelGiottonini@users.noreply.github.com>
Date: Thu, 20 May 2021 09:34:07 +0200
Subject: [PATCH 22/22] Delete Pipfile.lock

---
 Pipfile.lock | 134 ---------------------------------------------------
 1 file changed, 134 deletions(-)
 delete mode 100644 Pipfile.lock

diff --git a/Pipfile.lock b/Pipfile.lock
deleted file mode 100644
index 43ebfb6..0000000
--- a/Pipfile.lock
+++ /dev/null
@@ -1,134 +0,0 @@
-{
-    "_meta": {
-        "hash": {
-            "sha256": "bc0663e00dfee42678acee85bb844332840e78c75b12bf90e09c3210744e343b"
-        },
-        "pipfile-spec": 6,
-        "requires": {
-            "python_version": "3.9"
-        },
-        "sources": [
-            {
-                "name": "pypi",
-                "url": "https://pypi.org/simple",
-                "verify_ssl": true
-            }
-        ]
-    },
-    "default": {
-        "biopython": {
-            "hashes": [
-                "sha256:010142a8ec2549ff0649edd497658964ef1a18eefdb9fd942ec1e81b292ce2d9",
-                "sha256:0b9fbb0d3022dc22716da108b8a81b80d952cd97ac1f106de491dce850f92f62",
-                "sha256:0df5cddef2819c975e6508adf5d85aa046e449df5420d02b04871c7836b41273",
-                "sha256:194528eda6856a4c68f840ca0bcc9b544a5edee3548b97521084e7ac38c833ca",
-                "sha256:195f099c2c0c39518b6df921ab2b3cc43a601896018fc61909ac8385d5878866",
-                "sha256:1ee0a0b6c2376680fea6642d5080baa419fd73df104a62d58a8baf7a8bbe4564",
-                "sha256:2bd5a630be2a8e593094f7b1717fc962eda8931b68542b97fbf9bd8e2ac1e08d",
-                "sha256:4565c97fab16c5697d067b821b6a1da0ec3ef36a9c96cf103ac7b4a94eb9f9ba",
-                "sha256:48d424453a5512a1d1d41a4acabdfe5291da1f491a2d3606f2b0e4fbd63aeda6",
-                "sha256:5c0b369f91a76b8e5e36624d075585c3f0f088ea4a6e3d015c48f08e48ce0114",
-                "sha256:75b55000793f6b76334b8e80dc7e6d8cd2b019af917aa431cea6646e8e696c7f",
-                "sha256:ada611f12ee3b0bef7308ef41ee7b94898613b369ab44e0268d74bd1d6a06920",
-                "sha256:cc3b0b78022d14f11d508038a288a189d03c97c476d6636c7b6f98bd8bc8462b",
-                "sha256:e0af107cc62a905d13d35dd7b38f335a37752ede45e4617139e84409a6a88dc4",
-                "sha256:f1076653937947773768455556b1d24acad9575759e9089082f32636b09add54",
-                "sha256:f5021a398c898b9cf6815cc5171c146a601b935b55364c53e6516a2545ab740c"
-            ],
-            "index": "pypi",
-            "version": "==1.78"
-        },
-        "joblib": {
-            "hashes": [
-                "sha256:9c17567692206d2f3fb9ecf5e991084254fe631665c450b443761c4186a613f7",
-                "sha256:feeb1ec69c4d45129954f1b7034954241eedfd6ba39b5e9e4b6883be3332d5e5"
-            ],
-            "index": "pypi",
-            "version": "==1.0.1"
-        },
-        "numpy": {
-            "hashes": [
-                "sha256:2428b109306075d89d21135bdd6b785f132a1f5a3260c371cee1fae427e12727",
-                "sha256:377751954da04d4a6950191b20539066b4e19e3b559d4695399c5e8e3e683bf6",
-                "sha256:4703b9e937df83f5b6b7447ca5912b5f5f297aba45f91dbbbc63ff9278c7aa98",
-                "sha256:471c0571d0895c68da309dacee4e95a0811d0a9f9f532a48dc1bea5f3b7ad2b7",
-                "sha256:61d5b4cf73622e4d0c6b83408a16631b670fc045afd6540679aa35591a17fe6d",
-                "sha256:6c915ee7dba1071554e70a3664a839fbc033e1d6528199d4621eeaaa5487ccd2",
-                "sha256:6e51e417d9ae2e7848314994e6fc3832c9d426abce9328cf7571eefceb43e6c9",
-                "sha256:719656636c48be22c23641859ff2419b27b6bdf844b36a2447cb39caceb00935",
-                "sha256:780ae5284cb770ade51d4b4a7dce4faa554eb1d88a56d0e8b9f35fca9b0270ff",
-                "sha256:878922bf5ad7550aa044aa9301d417e2d3ae50f0f577de92051d739ac6096cee",
-                "sha256:924dc3f83de20437de95a73516f36e09918e9c9c18d5eac520062c49191025fb",
-                "sha256:97ce8b8ace7d3b9288d88177e66ee75480fb79b9cf745e91ecfe65d91a856042",
-                "sha256:9c0fab855ae790ca74b27e55240fe4f2a36a364a3f1ebcfd1fb5ac4088f1cec3",
-                "sha256:9cab23439eb1ebfed1aaec9cd42b7dc50fc96d5cd3147da348d9161f0501ada5",
-                "sha256:a8e6859913ec8eeef3dbe9aed3bf475347642d1cdd6217c30f28dee8903528e6",
-                "sha256:aa046527c04688af680217fffac61eec2350ef3f3d7320c07fd33f5c6e7b4d5f",
-                "sha256:abc81829c4039e7e4c30f7897938fa5d4916a09c2c7eb9b244b7a35ddc9656f4",
-                "sha256:bad70051de2c50b1a6259a6df1daaafe8c480ca98132da98976d8591c412e737",
-                "sha256:c73a7975d77f15f7f68dacfb2bca3d3f479f158313642e8ea9058eea06637931",
-                "sha256:d15007f857d6995db15195217afdbddfcd203dfaa0ba6878a2f580eaf810ecd6",
-                "sha256:d76061ae5cab49b83a8cf3feacefc2053fac672728802ac137dd8c4123397677",
-                "sha256:e8e4fbbb7e7634f263c5b0150a629342cc19b47c5eba8d1cd4363ab3455ab576",
-                "sha256:e9459f40244bb02b2f14f6af0cd0732791d72232bbb0dc4bab57ef88e75f6935",
-                "sha256:edb1f041a9146dcf02cd7df7187db46ab524b9af2515f392f337c7cbbf5b52cd"
-            ],
-            "markers": "python_version >= '3.7'",
-            "version": "==1.20.2"
-        },
-        "pandas": {
-            "hashes": [
-                "sha256:09761bf5f8c741d47d4b8b9073288de1be39bbfccc281d70b889ade12b2aad29",
-                "sha256:0f27fd1adfa256388dc34895ca5437eaf254832223812afd817a6f73127f969c",
-                "sha256:43e00770552595c2250d8d712ec8b6e08ca73089ac823122344f023efa4abea3",
-                "sha256:46fc671c542a8392a4f4c13edc8527e3a10f6cb62912d856f82248feb747f06e",
-                "sha256:475b7772b6e18a93a43ea83517932deff33954a10d4fbae18d0c1aba4182310f",
-                "sha256:4d821b9b911fc1b7d428978d04ace33f0af32bb7549525c8a7b08444bce46b74",
-                "sha256:5e3c8c60541396110586bcbe6eccdc335a38e7de8c217060edaf4722260b158f",
-                "sha256:621c044a1b5e535cf7dcb3ab39fca6f867095c3ef223a524f18f60c7fee028ea",
-                "sha256:72ffcea00ae8ffcdbdefff800284311e155fbb5ed6758f1a6110fc1f8f8f0c1c",
-                "sha256:8a051e957c5206f722e83f295f95a2cf053e890f9a1fba0065780a8c2d045f5d",
-                "sha256:97b1954533b2a74c7e20d1342c4f01311d3203b48f2ebf651891e6a6eaf01104",
-                "sha256:9f5829e64507ad10e2561b60baf285c470f3c4454b007c860e77849b88865ae7",
-                "sha256:a93e34f10f67d81de706ce00bf8bb3798403cabce4ccb2de10c61b5ae8786ab5",
-                "sha256:d59842a5aa89ca03c2099312163ffdd06f56486050e641a45d926a072f04d994",
-                "sha256:dbb255975eb94143f2e6ec7dadda671d25147939047839cd6b8a4aff0379bb9b",
-                "sha256:df6f10b85aef7a5bb25259ad651ad1cc1d6bb09000595cab47e718cbac250b1d"
-            ],
-            "index": "pypi",
-            "version": "==1.2.3"
-        },
-        "python-dateutil": {
-            "hashes": [
-                "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
-                "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==2.8.1"
-        },
-        "pytz": {
-            "hashes": [
-                "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da",
-                "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"
-            ],
-            "version": "==2021.1"
-        },
-        "six": {
-            "hashes": [
-                "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
-                "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
-            ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==1.15.0"
-        },
-        "tqdm": {
-            "hashes": [
-                "sha256:9fdf349068d047d4cfbe24862c425883af1db29bcddf4b0eeb2524f6fbdb23c7",
-                "sha256:d666ae29164da3e517fcf125e41d4fe96e5bb375cd87ff9763f6b38b5592fe33"
-            ],
-            "index": "pypi",
-            "version": "==4.59.0"
-        }
-    },
-    "develop": {}
-}