Skip to content

Commit

Permalink
Fix logging function
Browse files Browse the repository at this point in the history
Implement mobidb4 format
  • Loading branch information
damianopiovesan committed Aug 4, 2020
1 parent 936df14 commit 8e8d30b
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 36 deletions.
2 changes: 1 addition & 1 deletion mdblib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def arg_parser(cd):
help='log level filter. All levels <= choice will be displayed')

parser.add_argument('-f', '--outputFormat', default='interpro',
choices=['interpro', 'fasta', 'vertical', 'extended', 'mobidb3', 'caid'],
choices=['interpro', 'fasta', 'vertical', 'extended', 'mobidb3', 'mobidb4', 'caid'],
help='output format, see README.md for further details')

parser.add_argument('-c', '--conf', type=str, default=os.path.join(cd, 'config.ini'),
Expand Down
18 changes: 4 additions & 14 deletions mdblib/logger.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,9 @@
import logging
import sys


def set_logger(logfile, level):
handlers = list()
log_formatter = logging.Formatter('%(asctime)s | %(module)-12s | %(levelname)-8s | %(message)s')

if logfile:
file_handler = logging.FileHandler(logfile, 'a')
file_handler.setFormatter(log_formatter)
handlers.append(file_handler)
else:
console_handler = logging.StreamHandler()
console_handler.setFormatter(log_formatter)
handlers.append(console_handler)

logging.basicConfig(level=level,
format=log_formatter,
handlers=handlers)
logging.basicConfig(level=logging.getLevelName(level),
format='%(asctime)s | %(module)-12s | %(levelname)-8s | %(message)s',
stream=open(logfile, "w") if logfile else sys.stderr)
120 changes: 120 additions & 0 deletions mdblib/outformats.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,126 @@ def __repr__(self):
return ""


class Mobidb4Format(Formatter):

feature_tag = {'PA': 'polyampholyte', # PA
'PPE': 'positive_polyelectrolyte', # PPE
'NPE': 'negative_polyelectrolyte', # NPE
'CR': 'cystein_rich', # CR
'PR': 'proline_rich', # PR
'PO': 'polar', # PO
'GR': 'glycine_rich', # GR
'LC': 'low_complexity'} # LC

def content_count(self, regions):
return reduce(lambda x, t: x + (t[1] - t[0] + 1), regions, 0)


def __init__(self, _acc, _seq, _mdbl_consensus,
_simple_consensus, _single_predictions, **kwargs):
self.seq = _seq
self.seqlen = len(self.seq)
self.mdbl_consensus = _mdbl_consensus
self.simple_consensus = _simple_consensus
self.single_predictions = _single_predictions
self.injecting_data = kwargs.get("injection")
super(Mobidb4Format, self).__init__(_acc, **kwargs)

if self.multi_accessions:
self.multiply_by_accession("accession")

def _get_output_obj(self):
out_obj = dict()

if self.injecting_data is not None:
out_obj.update(self.injecting_data)

out_obj.setdefault("sequence", self.seq)

# MobiDB-lite consensus
# TODO add content_count, eliminate regions if empty?
count = self.content_count(self.mdbl_consensus.prediction.regions)
out_obj["prediction-disorder-mobidb_lite"] = {
'regions': [(r[0], r[1]) for r in self.mdbl_consensus.prediction.regions],
'scores': self.mdbl_consensus.prediction.scores,
'content_count': count,
'content_fraction': count / self.seqlen
}

# MobiDB-lite consensus sub regions
if self.mdbl_consensus.prediction.regions:

regions = {}
for r in self.mdbl_consensus.prediction.regions:
r_type = self.feature_tag.get(r[2][2:])
if r_type:
regions.setdefault(r_type, []).append((r[0], r[1]))
for r_type in regions:
count = self.content_count(regions[r_type])
out_obj["prediction-{}-mobidb_lite_sub".format(r_type)] = {
'regions': regions[r_type],
'content_count': count,
'content_fraction': count / self.seqlen
}

# Simple consensus
count = self.content_count(self.simple_consensus.prediction.regions)
out_obj["prediction-disorder-th_50"] = {
'regions': [(r[0], r[1]) for r in self.simple_consensus.prediction.regions],
'content_count': count,
'content_fraction': count / self.seqlen
}

# Single predictions
for prediction in self.single_predictions:
regions = [(r[0], r[1]) for r in prediction.to_regions(start_index=1, positivetag=1)]
count = self.content_count(regions)

if 'disorder' in prediction.types:
out_obj["prediction-disorder-{}".format(prediction.method)] = {
'regions': regions,
'content_count': count,
'content_fraction': count / self.seqlen
}
elif 'lowcomp' in prediction.types:
out_obj["prediction-low_complexity-{}".format(prediction.method)] = {
'regions': regions,
'content_count': count,
'content_fraction': count / self.seqlen
}
elif 'bindsite' in prediction.types:
out_obj["prediction-lip-anchor"] = {
'regions': regions,
'content_count': count,
'content_fraction': count / self.seqlen
}
elif 'sspops' in prediction.types:
method, ptype = prediction.method.split('_')
out_obj["prediction-{}-fess".format(ptype)] = {
'scores': prediction.scores
}
# else:
# logging.debug("Type not implemented in mobidb4".format(prediction.types))

if out_obj:
out_obj["length"] = self.seqlen

if re.search("^UPI[A-F0-9]{10}$", self.acc):
out_obj['uniparc'] = self.acc
else:
out_obj['acc'] = self.acc

self.isnone = False

return [out_obj]

def __repr__(self):
if self.output:
return '\n'.join(json.dumps(oobj) for oobj in self.output)
else:
return ""


class CaidFormat(Formatter):
def __init__(self, _acc, seq, _mdbl_consensus, _single_predictions, **kwargs):
self.seq = seq
Expand Down
28 changes: 14 additions & 14 deletions mdblib/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def parse(self, *args):
class IUPredL(Predictor):
tag = 'iupl'
types = ['disorder', 'mobidblite']
groups = ['main', 'mobidb3', 'caid']
groups = ['main', 'mobidb3', 'mobidb4', 'caid']
intype = 'flat'
shared_name = 'iupred'

Expand Down Expand Up @@ -103,7 +103,7 @@ def parse(self, output):
class IUPredS(Predictor):
tag = 'iups'
types = ['disorder', 'mobidblite']
groups = ['main', 'mobidb3', 'caid']
groups = ['main', 'mobidb3', 'mobidb4', 'caid']
intype = 'flat'
shared_name = 'iupred'

Expand Down Expand Up @@ -139,7 +139,7 @@ def parse(self, output):
class ESpritzN(Predictor):
tag = 'espN'
types = ['disorder', 'mobidblite']
groups = ['main', 'mobidb3', 'caid']
groups = ['main', 'mobidb3', 'mobidb4', 'caid']
intype = 'disbin'
shared_name = 'espritz'

Expand All @@ -166,7 +166,7 @@ def parse(self, output):
class ESpritzD(Predictor):
tag = 'espD'
types = ['disorder', 'mobidblite']
groups = ['main', 'mobidb3', 'caid']
groups = ['main', 'mobidb3', 'mobidb4', 'caid']
intype = 'disbin'
shared_name = 'espritz'

Expand All @@ -193,7 +193,7 @@ def parse(self, output):
class ESpritzX(Predictor):
tag = 'espX'
types = ['disorder', 'mobidblite']
groups = ['main', 'mobidb3', 'caid']
groups = ['main', 'mobidb3', 'mobidb4', 'caid']
intype = 'disbin'
shared_name = 'espritz'

Expand All @@ -220,7 +220,7 @@ def parse(self, output):
class GlobPlot(Predictor):
tag = 'glo'
types = ['disorder', 'mobidblite']
groups = ['main', 'mobidb3', 'caid']
groups = ['main', 'mobidb3', 'mobidb4', 'caid']
intype = 'flat'
shared_name = 'globplot'
suppress_stderr = True
Expand All @@ -243,7 +243,7 @@ def parse(self, output):
class DisEMBL(Predictor):
tag = ['dis465', 'disHL']
types = ['disorder', 'mobidblite']
groups = ['main', 'mobidb3', 'caid']
groups = ['main', 'mobidb3', 'mobidb4', 'caid']
intype = 'flat'
shared_name = 'disembl'

Expand All @@ -269,7 +269,7 @@ def parse(self, output):
class VSL2b(Predictor):
tag = 'vsl'
types = ['disorder']
groups = ['mobidb3', 'caid']
groups = ['mobidb3', 'mobidb4', 'caid']
intype = 'flat'
shared_name = 'vsl2'
suppress_stderr = True
Expand Down Expand Up @@ -309,7 +309,7 @@ def parse(self, output):
class JRonn(Predictor):
tag = 'jronn'
types = ['disorder']
groups = ['mobidb3', 'caid']
groups = ['mobidb3', 'mobidb4', 'caid']
intype = 'fasta'
shared_name = 'jronn'

Expand All @@ -333,7 +333,7 @@ def parse(self, output):
class Seg(Predictor):
tag = 'seg'
types = ['lowcomp']
groups = ['mobidb3', 'main']
groups = ['mobidb3', 'mobidb4', 'main']
intype = 'fasta'
shared_name = 'seg'

Expand All @@ -360,7 +360,7 @@ def parse(self, output):
class Pfilt(Predictor):
tag = 'pfilt'
types = ['lowcomp']
groups = ['mobidb3']
groups = ['mobidb3', 'mobidb4']
intype = 'fasta'
shared_name = 'pfilt'

Expand All @@ -387,7 +387,7 @@ def parse(self, output):
class FESS(Predictor):
tag = ['fess_helix', 'fess_sheet', 'fess_coil']
types = ['sspops']
groups = ['mobidb3']
groups = ['mobidb3', 'mobidb4']
intype = 'fasta'
shared_name = 'fess'

Expand Down Expand Up @@ -422,7 +422,7 @@ def parse(self, output):
class DynaMine(Predictor):
tag = 'dynamine_coil'
types = ['sspops']
groups = ['mobidb3', 'caid']
groups = ['mobidb3', 'mobidb4', 'caid']
intype = 'fasta'
shared_name = 'dynamine'

Expand All @@ -442,7 +442,7 @@ def parse(self, output):
class Anchor(Predictor):
tag = 'anchor'
types = ['bindsite']
groups = ['mobidb3', 'caid']
groups = ['mobidb3', 'mobidb4', 'caid']
intype = 'fasta'
shared_name = 'anchor'

Expand Down
16 changes: 9 additions & 7 deletions mobidb_lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from mdblib.setdirs import set_pred_dir
from mdblib.streams import OutStream, InStream
from mdblib.consensus import MobidbLiteConsensus, SimpleConsensus, feature_desc
from mdblib.outformats import InterProFormat, ExtendedFormat, Mobidb3Format, CaidFormat, FastaFormat, VerticalFormat
from mdblib.outformats import InterProFormat, ExtendedFormat, Mobidb3Format, Mobidb4Format, CaidFormat, FastaFormat, VerticalFormat

# Suppress warnings
warnings.filterwarnings('ignore')
Expand All @@ -51,6 +51,7 @@ class MobidbLite(object):
'vertical': 'main',
'extended': 'main',
'mobidb3': 'mobidb3',
'mobidb4': 'mobidb4',
'caid': 'caid'}

def __init__(self, fasta, launchdir=None, conf=None, architecture='64', threads=0, outfile=None,
Expand Down Expand Up @@ -172,6 +173,10 @@ def fmt_output(self, acc, uacc, seq, preds, s_cons, r_cons, m_cons):
output = Mobidb3Format(acc, seq, m_cons, s_cons, preds, _multi_accs=multi_acc,
injection=self.additional_data)

elif self.outfmt == 'mobidb4':
output = Mobidb4Format(acc, seq, m_cons, s_cons, preds, _multi_accs=multi_acc,
injection=self.additional_data)

elif self.outfmt == 'caid':
output = CaidFormat(acc, seq, m_cons, preds, _multi_accs=multi_acc)

Expand All @@ -181,23 +186,20 @@ def calc_consensus(self, predictions, sequence):
simple_c = None
relaxed_c = None
mobidblite_c = MobidbLiteConsensus(predictions, sequence,
pappu=True if self.outfmt == 'mobidb3' else False,
pappu=True if self.outfmt in ['mobidb3', 'mobidb4'] else False,
force=self.force_consensus)

if self.outfmt == 'extended':
relaxed_c = SimpleConsensus(predictions, sequence, force=self.force_consensus, threshold=.375)


if self.outfmt == 'mobidb3':
if self.outfmt in ['mobidb3', 'mobidb4']:
simple_c = SimpleConsensus(predictions, sequence, force=self.force_consensus)



return simple_c, relaxed_c, mobidblite_c

def run(self, fasta, architecture, threads, outfile):

logging.debug('outfmt: %i outgroup: %s', self.outfmt, self.outgroup)
logging.debug('outfmt: %s outgroup: %s', self.outfmt, self.outgroup)

with InStream(fasta) as self.instream, OutStream(outfile) as self.outstream:
# Parse input Fasta
Expand Down

0 comments on commit 8e8d30b

Please sign in to comment.