Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Keep introns #79

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions lib/biocode/gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,12 @@ def get_gff3_features(gff3_file, assemblies=None):
parent_feat.add_exon(exon)
features[feat_id] = exon

elif cols[2] == 'intron':
intron = biocode.things.Intron(id=feat_id, parent=parent_feat)
intron.locate_on(target=current_assembly, fmin=rfmin, fmin_partial=fmin_partial, fmax=rfmax, fmax_partial=fmax_partial, strand=rstrand)
parent_feat.add_intron(intron)
features[feat_id] = intron

elif cols[2] == 'CDS':
if phase == '.':
phase = 0
Expand Down Expand Up @@ -729,6 +735,23 @@ def print_biogene( gene=None, fh=None, source=None, on=None ):
columns[8] = build_column_9( id=exon.id, parent=RNA.id, other=exon_annot_atts )
fh.write( "\t".join(columns) + "\n" )

## handle introns for this RNA
for intron in sorted(RNA.introns( on )):
intron_loc = intron.location_on( on )

if intron_loc is None:
raise Exception("ERROR: Expected intron {0} to be located on {1} but it wasn't".format(intron.id, on.id))

intron_partiality_string = _partiality_string(intron_loc)
intron_annot_atts = dict()
if intron_partiality_string is not None:
intron_annot_atts['Partial'] = intron_partiality_string

columns[2] = 'intron'
columns[3:5] = [str(intron_loc.fmin + 1), str(intron_loc.fmax)]
columns[8] = build_column_9( id=intron.id, parent=RNA.id, other=intron_annot_atts )
fh.write( "\t".join(columns) + "\n" )

# are there polypeptides?
for polypeptide in sorted(RNA.polypeptides()):
if len(polypeptide.locations) == 0:
Expand Down
65 changes: 38 additions & 27 deletions lib/biocode/things.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

import itertools
import sys
import re
import uuid

#from biocode import utils, gff, tbl
Expand Down Expand Up @@ -842,9 +843,10 @@ class Intron( LocatableThing ):
removed from within the transcript by splicing together the sequences (exons) on either
side of it."
'''
def __init__( self, id=None, locations=None, length=None ):
def __init__( self, id=None, locations=None, parent=None, length=None ):
super().__init__(locations)
self.id = id
self.parent = parent
self.length = length


Expand Down Expand Up @@ -1029,6 +1031,7 @@ def __init__( self, id=None, locations=None, parent=None, locus_tag=None, childr

## initialize any types needed
self.children = _initialize_type_list(self.children, 'exon')
self.children = _initialize_type_list(self.children, 'intron')
self.children = _initialize_type_list(self.children, 'CDS')
self.children = _initialize_type_list(self.children, 'polypeptide')
self.children = _initialize_type_list(self.children, 'UTR')
Expand All @@ -1044,6 +1047,10 @@ def add_exon(self, exon):
exon.parent = self
self.children['exon'].append(exon)

def add_intron(self, intron):
intron.parent = self
self.children['intron'].append(intron)

def add_five_prime_UTR(self, utr):
utr.parent = self
self.children['UTR'].append(utr)
Expand Down Expand Up @@ -1207,32 +1214,36 @@ def has_introns( self ):
return False

def introns(self, on=None):
'''
Dynamically generates Intron objects in order for the current RNA. The coordinates of the
generated introns depend on the object passed via the 'on' argument
'''
if on is None:
raise Exception("ERROR: the introns() method requires a passed molecule using the 'on' argument")

mol_on = on

intron_objs = list()
last_exon = None
last_exon_loc = None

for exon in sorted(self.exons()):
exon_loc = exon.location_on( mol_on )

if last_exon is not None:
intron_id = uuid.uuid4()
intron = Intron( id=intron_id )
intron.locate_on( target=mol_on, fmin=last_exon_loc.fmax, fmax=exon_loc.fmin, strand=exon_loc.strand )
intron_objs.append( intron )

last_exon = exon
last_exon_loc = exon_loc

return intron_objs

'''
Dynamically generates Intron objects in order for the current RNA. The coordinates of the
generated introns depend on the object passed via the 'on' argument
'''
if on is None:
raise Exception("ERROR: the introns() method requires a passed molecule using the 'on' argument")

mol_on = on

intron_objs = list()
last_exon = None
last_exon_loc = None

intron_count = 0
for exon in sorted(self.exons()):
exon_loc = exon.location_on( mol_on )

if last_exon is not None:
intron_count = intron_count + 1
intron_count_id = str(intron_count)
intron_id = str(re.sub('exon[0-9]', 'intron' + intron_count_id, exon.id)) ## ## This is generating random id, need format such as "g1.t1.intron1"
intron = Intron( id=intron_id )
intron.locate_on( target=mol_on, fmin=last_exon_loc.fmax, fmax=exon_loc.fmin, strand=exon_loc.strand )
intron_objs.append( intron )

last_exon = exon
last_exon_loc = exon_loc

return intron_objs

def polypeptides(self):
return self.children['polypeptide']
Expand Down