Skip to content

Commit

Permalink
Replace mobidb3 with mobidb4 format. Implement a consensus-merge func…
Browse files Browse the repository at this point in the history
…tion. Fix method tag for dynamine
  • Loading branch information
damianopiovesan committed Aug 12, 2020
1 parent 6902ad3 commit d3de720
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 424 deletions.
3 changes: 3 additions & 0 deletions changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,6 @@ Rename -f 0 --> 'interpro'
Rename -f 1 --> 'extended'
Rename -f 2 --> 'mobidb3'
Rename -f 3 --> 'caid'

3.8.3 | 12/08/2020
Replace mobidb3 with mobidb4 format
4 changes: 2 additions & 2 deletions config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ espritz=
fess=
globplot=
iupred=
jronn=
#jronn=
pfilt=
seg=
vsl2=
Expand All @@ -18,7 +18,7 @@ mdb=0.625
anchor=0.5
dis465=0.5
disHL=0.086
dynamine_coil=0.5
dynamine=0.5
espD=0.5072
espN=0.3089
espX=0.1434
Expand Down
12 changes: 11 additions & 1 deletion mdblib/consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def calc_agreement(self, seq, threshold, ptype=None, force_consensus=False):
logging.debug('%s | agreement: excluded', prediction.method)



if included_predictors != 0:
self.summed_states = agreement
agreement = [summed_states / included_predictors for summed_states in agreement]
Expand All @@ -104,6 +103,17 @@ def __init__(self, prediction_stack, seq, threshold=0.5, force=False):
self.prediction.regions = self.prediction.to_regions(start_index=1, positivetag='D')


class MergeConsensus(Consensus):
"""
Define a consensus merging all regions (e.g. for low complexity)
"""
def __init__(self, prediction_stack, seq, threshold=0.1, ptype='disorder', force=True):
logging.debug('Generating Simple consensus')
super(MergeConsensus, self).__init__(prediction_stack)
self.calc_agreement(seq, threshold, ptype=ptype, force_consensus=force)
self.prediction.regions = self.prediction.to_regions(start_index=1, positivetag=1)


class MobidbLiteConsensus(Consensus):
"""
Define consensus featured by MobiDB-Lite as its prediction.
Expand Down
211 changes: 59 additions & 152 deletions mdblib/outformats.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,122 +148,6 @@ def __repr__(self):
return ""


class Mobidb3Format(Formatter):
def __init__(self, _acc, _seq, _mdbl_consensus,
_simple_consensus, _single_predictions, **kwargs):
self.seq = _seq
self.seqlen = len(self.seq)
self.mdbl_consensus = _mdbl_consensus
self.simple_consensus = _simple_consensus
self.single_predictions = _single_predictions
self.injecting_data = kwargs.get("injection")
super(Mobidb3Format, self).__init__(_acc, **kwargs)

if self.multi_accessions:
self.multiply_by_accession("accession")

def _get_output_obj(self):
out_obj = dict()

if self.injecting_data is not None:
out_obj.update(self.injecting_data)

out_obj.setdefault("sequence", self.seq)

# MobiDB-lite consensus
out_obj \
.setdefault('mobidb_consensus', dict()) \
.setdefault('disorder', dict()) \
.setdefault('predictors', list()) \
.append(
{'method': 'mobidb_lite',
'regions': self.mdbl_consensus.prediction.regions,
'scores': self.mdbl_consensus.prediction.scores,
'dc': reduce(
lambda x, t:
x + (t[1] - t[0] + 1),
self.mdbl_consensus.prediction.regions, 0.0) / self.seqlen
if self.mdbl_consensus.prediction.regions else 0.0})

# MobiDB-lite consensus sub regions
if self.mdbl_consensus.prediction.regions:
out_obj \
.setdefault('mobidb_consensus', dict()) \
.setdefault('disorder', dict()) \
.setdefault('predictors', list()) \
.append(
{'method': 'mobidb_lite_sub',
'regions': self.mdbl_consensus.enriched_regions_tags
})

# Simple consensus
out_obj.setdefault('mobidb_consensus', dict()) \
.setdefault('disorder', dict()) \
.setdefault('predictors', list()) \
.append(
{'method': 'simple',
'regions': self.simple_consensus.prediction.regions,
'dc': reduce(
lambda x, t:
x + (t[1] - t[0] + 1),
self.simple_consensus.prediction.regions, 0.0) / self.seqlen
if self.simple_consensus.prediction.regions else 0.0})

# Single predictions
for prediction in self.single_predictions:
if any(t in prediction.types for t in ['disorder', 'lowcomp']):
prediction.translate_states({1: 'D', 0: 'S'})
out_obj \
.setdefault('mobidb_data', dict()) \
.setdefault('disorder', dict()) \
.setdefault('predictors', list()) \
.append(
{'method': prediction.method,
'regions': prediction.to_regions(start_index=1, positivetag='D')})

if 'sspops' in prediction.types:
method, ptype = prediction.method.split('_')

out_obj \
.setdefault('mobidb_data', dict()) \
.setdefault('ss_populations', dict()) \
.setdefault('predictors', list()) \
.append(
{'method': method,
'type': ptype,
'scores': prediction.scores})

if 'bindsite' in prediction.types:
prediction.translate_states({1: 'D', 0: 'S'})

out_obj \
.setdefault('mobidb_data', dict()) \
.setdefault('lips', dict()) \
.setdefault('predictors', list()) \
.append(
{'method': prediction.method,
'regions': prediction.to_regions(start_index=1, positivetag='D')})

if out_obj:
out_obj["length"] = self.seqlen

if re.search("^UPI[A-F0-9]{10}$", self.acc):
out_obj['uniparc'] = self.acc

else:
out_obj['accession'] = self.acc

self.isnone = False

return [out_obj]

def __repr__(self):
if self.output:
return '\n'.join(json.dumps(oobj) for oobj in self.output)
else:
return ""


class Mobidb4Format(Formatter):

feature_tag = {'PA': 'polyampholyte', # PA
Expand All @@ -280,12 +164,13 @@ def content_count(self, regions):


def __init__(self, _acc, _seq, _mdbl_consensus,
_simple_consensus, _single_predictions, **kwargs):
_simple_consensus, _lowcomp_consensus, _single_predictions, **kwargs):
self.seq = _seq
self.seqlen = len(self.seq)
self.mdbl_consensus = _mdbl_consensus
self.simple_consensus = _simple_consensus
self.single_predictions = _single_predictions
self.lowcomplexity_consensus = _lowcomp_consensus
self.injecting_data = kwargs.get("injection")
super(Mobidb4Format, self).__init__(_acc, **kwargs)

Expand All @@ -301,16 +186,24 @@ def _get_output_obj(self):
out_obj.setdefault("sequence", self.seq)

# MobiDB-lite consensus
# TODO eliminate regions if empty?
count = self.content_count(self.mdbl_consensus.prediction.regions)
out_obj["prediction-disorder-mobidb_lite"] = {
'regions': [(r[0], r[1]) for r in self.mdbl_consensus.prediction.regions],
'scores': self.mdbl_consensus.prediction.scores,
'content_count': count,
'content_fraction': count / self.seqlen
}
if count:
out_obj["prediction-disorder-mobidb_lite"] = {
'regions': [(r[0], r[1]) for r in self.mdbl_consensus.prediction.regions],
'scores': self.mdbl_consensus.prediction.scores,
'content_count': count,
'content_fraction': round(count / self.seqlen, 3)
}
else:
out_obj["prediction-disorder-mobidb_lite"] = {
'scores': self.mdbl_consensus.prediction.scores
}

# MobiDB-lite consensus sub regions
# TODO check:
# proline_rich
# polar
# cystein_rich ???
if self.mdbl_consensus.prediction.regions:

regions = {}
Expand All @@ -323,47 +216,61 @@ def _get_output_obj(self):
out_obj["prediction-{}-mobidb_lite_sub".format(r_type)] = {
'regions': regions[r_type],
'content_count': count,
'content_fraction': count / self.seqlen
'content_fraction': round(count / self.seqlen, 3)
}

# Simple consensus
count = self.content_count(self.simple_consensus.prediction.regions)
out_obj["prediction-disorder-th_50"] = {
'regions': [(r[0], r[1]) for r in self.simple_consensus.prediction.regions],
'content_count': count,
'content_fraction': count / self.seqlen
}
if self.simple_consensus.prediction.regions:
count = self.content_count(self.simple_consensus.prediction.regions)
out_obj["prediction-disorder-th_50"] = {
'regions': [(r[0], r[1]) for r in self.simple_consensus.prediction.regions],
'content_count': count,
'content_fraction': round(count / self.seqlen, 3)
}

if self.lowcomplexity_consensus.prediction.regions:
count = self.content_count(self.lowcomplexity_consensus.prediction.regions)
out_obj["prediction-low_complexity-merge"] = {
'regions': [(r[0], r[1]) for r in self.lowcomplexity_consensus.prediction.regions],
'content_count': count,
'content_fraction': round(count / self.seqlen, 3)
}

# Single predictions
for prediction in self.single_predictions:
regions = [(r[0], r[1]) for r in prediction.to_regions(start_index=1, positivetag=1)]
count = self.content_count(regions)

if 'disorder' in prediction.types:
out_obj["prediction-disorder-{}".format(prediction.method)] = {
'regions': regions,
'content_count': count,
'content_fraction': count / self.seqlen
}
elif 'lowcomp' in prediction.types:
out_obj["prediction-low_complexity-{}".format(prediction.method)] = {
'regions': regions,
'content_count': count,
'content_fraction': count / self.seqlen
}
elif 'bindsite' in prediction.types:
out_obj["prediction-lip-anchor"] = {
'regions': regions,
'content_count': count,
'content_fraction': count / self.seqlen
if regions:
if 'disorder' in prediction.types:
out_obj["prediction-disorder-{}".format(prediction.method)] = {
'regions': regions,
'content_count': count,
'content_fraction': round(count / self.seqlen, 3)
}
elif 'lowcomp' in prediction.types:
out_obj["prediction-low_complexity-{}".format(prediction.method)] = {
'regions': regions,
'content_count': count,
'content_fraction': round(count / self.seqlen, 3)
}
elif 'bindsite' in prediction.types:
out_obj["prediction-lip-{}".format(prediction.method)] = {
'regions': regions,
'content_count': count,
'content_fraction': round(count / self.seqlen, 3)
}

if 'rigidity' in prediction.types:
out_obj["prediction-rigidity-{}".format(prediction.method)] = {
'scores': prediction.scores
}
elif 'sspops' in prediction.types:

if 'sspops' in prediction.types:
method, ptype = prediction.method.split('_')
out_obj["prediction-{}-fess".format(ptype)] = {
out_obj["prediction-{}-{}".format(ptype, method)] = {
'scores': prediction.scores
}
# else:
# logging.debug("Type not implemented in mobidb4".format(prediction.types))

if out_obj:
out_obj["length"] = self.seqlen
Expand Down
33 changes: 0 additions & 33 deletions mdblib/prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,36 +38,3 @@ def scores_to_states(self, tags=(1, 0)):
"""
return [tags[0] if score >= self.threshold else tags[1] for score in self.scores]

# def regions_to_set(self):
# """Get list of ID amino-acids positions from region list
#
# :return: Unique amino-acid index of ID regions
# :rtype: set
# """
# positions = set()
# for start, end, _ in self.to_regions():
# positions.update(range(start, end + 1))
# return positions
#
# def regions_to_states(self, length, tags=('D', 'S'), reg_startindex=1):
# """Represent ID states as a string from a list ID regions
#
# :param length: Length of the protein sequence (num of amino-acids)
# :type length: int
# :param tags: couple of values: positive match tag, negative match tag. Order counts
# :type tags: tuple
# :param reg_startindex: start index of the input regions (default: 1)
# :type reg_startindex: int
# :return: Order/Disorder states of the amino-acids of a protein
# :rtype: str
# """
# states = ""
# positions = [pos - reg_startindex for pos in self.regions_to_set()]
#
# for i in range(0, length):
# if i in positions:
# states += tags[0]
# else:
# states += tags[1]
#
# return states
4 changes: 2 additions & 2 deletions mdblib/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,8 @@ def parse(self, output):


class DynaMine(Predictor):
tag = 'dynamine_coil'
types = ['sspops']
tag = 'dynamine'
types = ['rigidity']
groups = ['mobidb3', 'mobidb4', 'caid']
intype = 'fasta'
shared_name = 'dynamine'
Expand Down
Loading

0 comments on commit d3de720

Please sign in to comment.