Skip to content

Commit

Permalink
add --locus_numbering_start to be able to continue numbering (like to…
Browse files Browse the repository at this point in the history
… add mitochondrial part into the same flat file later) + modification of combine method + Fix bug setting start_codon into the CDS feature (bug difficult to see because detected only by the validator own by ENA and not the public one available in their repository)
  • Loading branch information
Jacques Dainat committed Dec 8, 2017
1 parent 669711d commit f10e45a
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 23 deletions.
21 changes: 16 additions & 5 deletions EMBLmyGFF3/EMBLmyGFF3.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
SCRIPT_DIR=os.path.dirname(__file__)
FEATURE_DIR=SCRIPT_DIR + "/modules/features"
QUALIFIER_DIR=SCRIPT_DIR + "/modules/qualifiers"
CPT_LOCUS_GLB=0

class EMBL( object ):
"""
Expand Down Expand Up @@ -756,9 +755,10 @@ def FT(self):
#manage locus_tag
locus_tag=None
if feature.type.lower() != "source" and feature.type.lower() != "gap":
global CPT_LOCUS_GLB
CPT_LOCUS_GLB+=1
locus_tag_suffix="locus"+str(CPT_LOCUS_GLB)
cpt_locus = self.PREVIOUS_VALUES['locus_numbering_start']
locus_tag_suffix="LOCUS"+str(cpt_locus)
# now the locus has been used we can increment the locus value
self.PREVIOUS_VALUES['locus_numbering_start'] += 1

# replace locus_tag_suffix by the value of the locus_tag qualifier if this one exists
for qualifier in feature.qualifiers:
Expand Down Expand Up @@ -999,7 +999,7 @@ def set_keywords(self, keywords = []):

def set_locus_tag(self, locus_tag = ""):
"""
Sets the entry locus_tag numbers, or parses it from the current record
Sets the entry locus_tag value, or parses it from the current record
"""
if "locus_tag" in EMBL.PREVIOUS_VALUES:
self.locus_tag = EMBL.PREVIOUS_VALUES["locus_tag"]
Expand All @@ -1017,6 +1017,15 @@ def set_locus_tag(self, locus_tag = ""):
self.locus_tag = locus_tag
EMBL.PREVIOUS_VALUES["locus_tag"] = locus_tag

def set_locus_numbering_start (self, locus_numbering_start = 1):
"""
Sets the entry locus_numbering_start numbers
"""
if "locus_numbering_start" in EMBL.PREVIOUS_VALUES:
self.set_locus_numbering_start = EMBL.PREVIOUS_VALUES["locus_numbering_start"]
else:
EMBL.PREVIOUS_VALUES["locus_numbering_start"] = locus_numbering_start

def set_molecule_type(self, molecule_type = None):
"""
Sets the sample molecule type, or parses it from the current record.
Expand Down Expand Up @@ -1259,6 +1268,7 @@ def main():
parser.add_argument("--interleave_genes", action="store_false", help="Print gene features with interleaved mRNA and CDS features.")
parser.add_argument("--force_unknown_features", action="store_true", help="Force to keep feature types not accepted by EMBL. /!\ Option not suitable for submission purpose.")
parser.add_argument("--force_uncomplete_features", action="store_true", help="Force to keep features whithout all the mandatory qualifiers. /!\ Option not suitable for submission purpose.")
parser.add_argument("--locus_numbering_start", default=1, type=int, help="Start locus numbering with the provided value.")

parser.add_argument("--email", default=None, help="Email used to fetch information from NCBI taxonomy database.")
parser.add_argument("--shame", action="store_true", help="Suppress the shameless plug.")
Expand Down Expand Up @@ -1319,6 +1329,7 @@ def main():
writer.set_keep_duplicates( args.keep_duplicates )
writer.set_keywords( args.keyword )
writer.set_locus_tag( args.locus_tag )
writer.set_locus_numbering_start(args.locus_numbering_start)
writer.set_molecule_type( args.molecule_type )
writer.set_organelle( args.organelle )
writer.set_project_id( args.project_id )
Expand Down
47 changes: 29 additions & 18 deletions EMBLmyGFF3/modules/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def _load_definition(self, filename):
if "qualifier" in key:
for item, definition in value.iteritems():
#logging.error("item:%s definition:%s",item,definition)
self.legal_qualifiers += [key]
self.legal_qualifiers += [item]
mandatory = "mandatory" in key
self.qualifiers[item] = Qualifier(item, mandatory = mandatory, qualifier_definition_dir=self.qualifier_definition_dir)
else:
Expand Down Expand Up @@ -462,7 +462,17 @@ def add_qualifier(self, gff_qualifier, value):
if self.qualifier_suffix.get(gff_qualifier, None):
value = ["%s%s" % (v, self.qualifier_suffix[gff_qualifier]) for v in value]

self.qualifiers[qualifier].add_value(value)
###########################################
# add the value only if not already present
# List case
if isinstance(value, list):
for val in value:
if val not in self.qualifiers[qualifier].value:
self.qualifiers[qualifier].add_value(val)
# Scalar case
else:
if value not in self.qualifiers[qualifier].value:
self.qualifiers[qualifier].add_value(value)

def combine(self, other):
"""
Expand All @@ -472,23 +482,24 @@ def combine(self, other):
# add new location
self.location += other.location

# combine qualifiers
for name, qualifier in self.qualifiers.iteritems():
other_qualifier = other.qualifiers.get(name, None)
for val in getattr(other_qualifier, "value", []):
if val not in qualifier.value:
self.qualifiers[name].add_value(other_value)

# Sort out phase
current_phase = int(self.qualifiers.get("phase", [0])[0])
other_phase = int(other.qualifiers.get("phase", [0])[0])

phase = current_phase if self.location.start < other.location.start else other_phase
if "codon_start" in self.legal_qualifiers:
if not "codon_start" in self.qualifiers:
self.qualifiers["codon_start"] = Qualifier("codon_start", phase, qualifier_definition_dir = self.qualifier_definition_dir)
# combine qualifier except codon start
for gff_qualifier, list_val_other in other.qualifiers.iteritems():
other_qualifier = self._from_gff_qualifier(gff_qualifier) # get the real qualifier name in EMBL format to be able to compare with the one alredy saved
if other_qualifier != "codon_start":
self.add_qualifier(gff_qualifier, list_val_other)
else:
self.qualifiers["codon_start"].set_value(phase)
# as the feature are sorted by increasing order location if + strand the first CDS codon_start qualifier was the good one
# Of we are in a minus strand case we have to replace the start_codon, only hte last one will left
if self.location.strand < 0:
# get phase of the last CDS
phase = int(other.qualifiers.get("phase", [0])[0])

if "codon_start" in self.legal_qualifiers:

if not "codon_start" in self.qualifiers:
self.qualifiers["codon_start"] = Qualifier("codon_start", phase, qualifier_definition_dir = self.qualifier_definition_dir)
else:
self.qualifiers["codon_start"].set_value(phase)

def CDS_report(self, out = sys.stdout, parts = False, codon_info = True):
"""
Expand Down

0 comments on commit f10e45a

Please sign in to comment.