Fix logging function

Implement mobidb4 format
BioComputingUP · Aug 4, 2020 · 8e8d30b · 8e8d30b
1 parent 936df14
commit 8e8d30b
Show file tree

Hide file tree

Showing 5 changed files with 148 additions and 36 deletions.
diff --git a/mdblib/cli.py b/mdblib/cli.py
@@ -31,7 +31,7 @@ def arg_parser(cd):
                         help='log level filter. All levels <= choice will be displayed')
 
     parser.add_argument('-f', '--outputFormat', default='interpro',
-                        choices=['interpro', 'fasta', 'vertical', 'extended', 'mobidb3', 'caid'],
+                        choices=['interpro', 'fasta', 'vertical', 'extended', 'mobidb3', 'mobidb4', 'caid'],
                         help='output format, see README.md for further details')
 
     parser.add_argument('-c', '--conf', type=str, default=os.path.join(cd, 'config.ini'),

diff --git a/mdblib/logger.py b/mdblib/logger.py
@@ -1,19 +1,9 @@
 import logging
+import sys
 
 
 def set_logger(logfile, level):
-    handlers = list()
-    log_formatter = logging.Formatter('%(asctime)s | %(module)-12s | %(levelname)-8s | %(message)s')
 
-    if logfile:
-        file_handler = logging.FileHandler(logfile, 'a')
-        file_handler.setFormatter(log_formatter)
-        handlers.append(file_handler)
-    else:
-        console_handler = logging.StreamHandler()
-        console_handler.setFormatter(log_formatter)
-        handlers.append(console_handler)
-
-    logging.basicConfig(level=level,
-                        format=log_formatter,
-                        handlers=handlers)
+    logging.basicConfig(level=logging.getLevelName(level),
+                        format='%(asctime)s | %(module)-12s | %(levelname)-8s | %(message)s',
+                        stream=open(logfile, "w") if logfile else sys.stderr)
diff --git a/mdblib/outformats.py b/mdblib/outformats.py
@@ -264,6 +264,126 @@ def __repr__(self):
             return ""
 
 
+class Mobidb4Format(Formatter):
+
+    feature_tag = {'PA': 'polyampholyte',  # PA
+                   'PPE': 'positive_polyelectrolyte',  # PPE
+                   'NPE': 'negative_polyelectrolyte',  # NPE
+                   'CR': 'cystein_rich',  # CR
+                   'PR': 'proline_rich',  # PR
+                   'PO': 'polar',  # PO
+                   'GR': 'glycine_rich',  # GR
+                   'LC': 'low_complexity'}  # LC
+
+    def content_count(self, regions):
+        return reduce(lambda x, t: x + (t[1] - t[0] + 1), regions, 0)
+
+
+    def __init__(self, _acc, _seq, _mdbl_consensus,
+                 _simple_consensus, _single_predictions, **kwargs):
+        self.seq = _seq
+        self.seqlen = len(self.seq)
+        self.mdbl_consensus = _mdbl_consensus
+        self.simple_consensus = _simple_consensus
+        self.single_predictions = _single_predictions
+        self.injecting_data = kwargs.get("injection")
+        super(Mobidb4Format, self).__init__(_acc, **kwargs)
+
+        if self.multi_accessions:
+            self.multiply_by_accession("accession")
+
+    def _get_output_obj(self):
+        out_obj = dict()
+
+        if self.injecting_data is not None:
+            out_obj.update(self.injecting_data)
+
+        out_obj.setdefault("sequence", self.seq)
+
+        # MobiDB-lite consensus
+        # TODO add content_count, eliminate regions if empty?
+        count = self.content_count(self.mdbl_consensus.prediction.regions)
+        out_obj["prediction-disorder-mobidb_lite"] = {
+             'regions': [(r[0], r[1]) for r in self.mdbl_consensus.prediction.regions],
+             'scores': self.mdbl_consensus.prediction.scores,
+             'content_count': count,
+             'content_fraction': count / self.seqlen
+        }
+
+        # MobiDB-lite consensus sub regions
+        if self.mdbl_consensus.prediction.regions:
+
+            regions = {}
+            for r in self.mdbl_consensus.prediction.regions:
+                r_type = self.feature_tag.get(r[2][2:])
+                if r_type:
+                    regions.setdefault(r_type, []).append((r[0], r[1]))
+            for r_type in regions:
+                count = self.content_count(regions[r_type])
+                out_obj["prediction-{}-mobidb_lite_sub".format(r_type)] = {
+                    'regions': regions[r_type],
+                    'content_count': count,
+                    'content_fraction': count / self.seqlen
+                }
+
+        # Simple consensus
+        count = self.content_count(self.simple_consensus.prediction.regions)
+        out_obj["prediction-disorder-th_50"] = {
+            'regions': [(r[0], r[1]) for r in self.simple_consensus.prediction.regions],
+            'content_count': count,
+            'content_fraction': count / self.seqlen
+        }
+
+        # Single predictions
+        for prediction in self.single_predictions:
+            regions = [(r[0], r[1]) for r in prediction.to_regions(start_index=1, positivetag=1)]
+            count = self.content_count(regions)
+
+            if 'disorder' in prediction.types:
+                out_obj["prediction-disorder-{}".format(prediction.method)] = {
+                    'regions': regions,
+                    'content_count': count,
+                    'content_fraction': count / self.seqlen
+                }
+            elif 'lowcomp' in prediction.types:
+                out_obj["prediction-low_complexity-{}".format(prediction.method)] = {
+                    'regions': regions,
+                    'content_count': count,
+                    'content_fraction': count / self.seqlen
+                }
+            elif 'bindsite' in prediction.types:
+                out_obj["prediction-lip-anchor"] = {
+                    'regions': regions,
+                    'content_count': count,
+                    'content_fraction': count / self.seqlen
+                }
+            elif 'sspops' in prediction.types:
+                method, ptype = prediction.method.split('_')
+                out_obj["prediction-{}-fess".format(ptype)] = {
+                    'scores': prediction.scores
+                }
+            # else:
+            #     logging.debug("Type not implemented in mobidb4".format(prediction.types))
+
+        if out_obj:
+            out_obj["length"] = self.seqlen
+
+            if re.search("^UPI[A-F0-9]{10}$", self.acc):
+                out_obj['uniparc'] = self.acc
+            else:
+                out_obj['acc'] = self.acc
+
+            self.isnone = False
+
+            return [out_obj]
+
+    def __repr__(self):
+        if self.output:
+            return '\n'.join(json.dumps(oobj) for oobj in self.output)
+        else:
+            return ""
+
+
 class CaidFormat(Formatter):
     def __init__(self, _acc, seq, _mdbl_consensus, _single_predictions, **kwargs):
         self.seq = seq

diff --git a/mdblib/predictor.py b/mdblib/predictor.py
@@ -67,7 +67,7 @@ def parse(self, *args):
 class IUPredL(Predictor):
     tag = 'iupl'
     types = ['disorder', 'mobidblite']
-    groups = ['main', 'mobidb3', 'caid']
+    groups = ['main', 'mobidb3', 'mobidb4', 'caid']
     intype = 'flat'
     shared_name = 'iupred'
 
@@ -103,7 +103,7 @@ def parse(self, output):
 class IUPredS(Predictor):
     tag = 'iups'
     types = ['disorder', 'mobidblite']
-    groups = ['main', 'mobidb3', 'caid']
+    groups = ['main', 'mobidb3', 'mobidb4', 'caid']
     intype = 'flat'
     shared_name = 'iupred'
 
@@ -139,7 +139,7 @@ def parse(self, output):
 class ESpritzN(Predictor):
     tag = 'espN'
     types = ['disorder', 'mobidblite']
-    groups = ['main', 'mobidb3', 'caid']
+    groups = ['main', 'mobidb3', 'mobidb4', 'caid']
     intype = 'disbin'
     shared_name = 'espritz'
 
@@ -166,7 +166,7 @@ def parse(self, output):
 class ESpritzD(Predictor):
     tag = 'espD'
     types = ['disorder', 'mobidblite']
-    groups = ['main', 'mobidb3', 'caid']
+    groups = ['main', 'mobidb3', 'mobidb4', 'caid']
     intype = 'disbin'
     shared_name = 'espritz'
 
@@ -193,7 +193,7 @@ def parse(self, output):
 class ESpritzX(Predictor):
     tag = 'espX'
     types = ['disorder', 'mobidblite']
-    groups = ['main', 'mobidb3', 'caid']
+    groups = ['main', 'mobidb3', 'mobidb4', 'caid']
     intype = 'disbin'
     shared_name = 'espritz'
 
@@ -220,7 +220,7 @@ def parse(self, output):
 class GlobPlot(Predictor):
     tag = 'glo'
     types = ['disorder', 'mobidblite']
-    groups = ['main', 'mobidb3', 'caid']
+    groups = ['main', 'mobidb3', 'mobidb4', 'caid']
     intype = 'flat'
     shared_name = 'globplot'
     suppress_stderr = True
@@ -243,7 +243,7 @@ def parse(self, output):
 class DisEMBL(Predictor):
     tag = ['dis465', 'disHL']
     types = ['disorder', 'mobidblite']
-    groups = ['main', 'mobidb3', 'caid']
+    groups = ['main', 'mobidb3', 'mobidb4', 'caid']
     intype = 'flat'
     shared_name = 'disembl'
 
@@ -269,7 +269,7 @@ def parse(self, output):
 class VSL2b(Predictor):
     tag = 'vsl'
     types = ['disorder']
-    groups = ['mobidb3', 'caid']
+    groups = ['mobidb3', 'mobidb4', 'caid']
     intype = 'flat'
     shared_name = 'vsl2'
     suppress_stderr = True
@@ -309,7 +309,7 @@ def parse(self, output):
 class JRonn(Predictor):
     tag = 'jronn'
     types = ['disorder']
-    groups = ['mobidb3', 'caid']
+    groups = ['mobidb3', 'mobidb4', 'caid']
     intype = 'fasta'
     shared_name = 'jronn'
 
@@ -333,7 +333,7 @@ def parse(self, output):
 class Seg(Predictor):
     tag = 'seg'
     types = ['lowcomp']
-    groups = ['mobidb3', 'main']
+    groups = ['mobidb3', 'mobidb4', 'main']
     intype = 'fasta'
     shared_name = 'seg'
 
@@ -360,7 +360,7 @@ def parse(self, output):
 class Pfilt(Predictor):
     tag = 'pfilt'
     types = ['lowcomp']
-    groups = ['mobidb3']
+    groups = ['mobidb3', 'mobidb4']
     intype = 'fasta'
     shared_name = 'pfilt'
 
@@ -387,7 +387,7 @@ def parse(self, output):
 class FESS(Predictor):
     tag = ['fess_helix', 'fess_sheet', 'fess_coil']
     types = ['sspops']
-    groups = ['mobidb3']
+    groups = ['mobidb3', 'mobidb4']
     intype = 'fasta'
     shared_name = 'fess'
 
@@ -422,7 +422,7 @@ def parse(self, output):
 class DynaMine(Predictor):
     tag = 'dynamine_coil'
     types = ['sspops']
-    groups = ['mobidb3', 'caid']
+    groups = ['mobidb3', 'mobidb4', 'caid']
     intype = 'fasta'
     shared_name = 'dynamine'
 
@@ -442,7 +442,7 @@ def parse(self, output):
 class Anchor(Predictor):
     tag = 'anchor'
     types = ['bindsite']
-    groups = ['mobidb3', 'caid']
+    groups = ['mobidb3', 'mobidb4', 'caid']
     intype = 'fasta'
     shared_name = 'anchor'
 

diff --git a/mobidb_lite.py b/mobidb_lite.py
@@ -33,7 +33,7 @@
 from mdblib.setdirs import set_pred_dir
 from mdblib.streams import OutStream, InStream
 from mdblib.consensus import MobidbLiteConsensus, SimpleConsensus, feature_desc
-from mdblib.outformats import InterProFormat, ExtendedFormat, Mobidb3Format, CaidFormat, FastaFormat, VerticalFormat
+from mdblib.outformats import InterProFormat, ExtendedFormat, Mobidb3Format, Mobidb4Format, CaidFormat, FastaFormat, VerticalFormat
 
 # Suppress warnings
 warnings.filterwarnings('ignore')
@@ -51,6 +51,7 @@ class MobidbLite(object):
                  'vertical': 'main',
                  'extended': 'main',
                  'mobidb3': 'mobidb3',
+                 'mobidb4': 'mobidb4',
                  'caid': 'caid'}
 
     def __init__(self, fasta, launchdir=None, conf=None, architecture='64', threads=0, outfile=None,
@@ -172,6 +173,10 @@ def fmt_output(self, acc, uacc, seq, preds, s_cons, r_cons, m_cons):
             output = Mobidb3Format(acc, seq, m_cons, s_cons, preds, _multi_accs=multi_acc,
                                    injection=self.additional_data)
 
+        elif self.outfmt == 'mobidb4':
+            output = Mobidb4Format(acc, seq, m_cons, s_cons, preds, _multi_accs=multi_acc,
+                                   injection=self.additional_data)
+
         elif self.outfmt == 'caid':
             output = CaidFormat(acc, seq, m_cons, preds, _multi_accs=multi_acc)
 
@@ -181,23 +186,20 @@ def calc_consensus(self, predictions, sequence):
         simple_c = None
         relaxed_c = None
         mobidblite_c = MobidbLiteConsensus(predictions, sequence,
-                                           pappu=True if self.outfmt == 'mobidb3' else False,
+                                           pappu=True if self.outfmt in ['mobidb3', 'mobidb4'] else False,
                                            force=self.force_consensus)
 
         if self.outfmt == 'extended':
             relaxed_c = SimpleConsensus(predictions, sequence, force=self.force_consensus, threshold=.375)
 
-
-        if self.outfmt == 'mobidb3':
+        if self.outfmt in ['mobidb3', 'mobidb4']:
             simple_c = SimpleConsensus(predictions, sequence, force=self.force_consensus)
 
-
-
         return simple_c, relaxed_c, mobidblite_c
 
     def run(self, fasta, architecture, threads, outfile):
 
-        logging.debug('outfmt: %i outgroup: %s', self.outfmt, self.outgroup)
+        logging.debug('outfmt: %s outgroup: %s', self.outfmt, self.outgroup)
 
         with InStream(fasta) as self.instream, OutStream(outfile) as self.outstream:
             # Parse input Fasta