diff --git a/src/python/ensembl/io/genomio/gff3/extract_annotation.py b/src/python/ensembl/io/genomio/gff3/extract_annotation.py index 414e41314..79d802cc1 100644 --- a/src/python/ensembl/io/genomio/gff3/extract_annotation.py +++ b/src/python/ensembl/io/genomio/gff3/extract_annotation.py @@ -230,6 +230,7 @@ def _transfer_description_up(self, child_feature: str) -> None: for child_id, child in children_features.items(): child_description = child.get("description") if child_description is not None: + child_description = self._clean_description(child_description) # Check parent parent_id = self.get_parent(parent_type, child_id) parent = parent_features[parent_id] @@ -237,6 +238,13 @@ def _transfer_description_up(self, child_feature: str) -> None: if parent_description is None: parent["description"] = child_description + @staticmethod + def _clean_description(description: str) -> str: + """Returns the description without "transcript variant" information.""" + variant_re = re.compile(r", transcript variant [A-Z][0-9]+$", re.IGNORECASE) + description = re.sub(variant_re, "", description) + return description + @staticmethod def product_is_informative(product: str, feat_ids: Optional[List[str]] = None) -> bool: """Returns True if the product name contains informative words, False otherwise. diff --git a/src/python/tests/gff3/test_extract_annotation.py b/src/python/tests/gff3/test_extract_annotation.py index f169bb55a..47d8ba164 100644 --- a/src/python/tests/gff3/test_extract_annotation.py +++ b/src/python/tests/gff3/test_extract_annotation.py @@ -312,14 +312,22 @@ def test_get_features(feat_type: str, expected_number: int, expected: ContextMan @pytest.mark.parametrize( "gene_desc, transc_desc, transl_desc, out_gene_desc, out_transc_desc", [ - (None, None, None, None, None), - ("Foobar", None, None, "Foobar", None), # Only gene descriptions - ("gene A", "transc B", "prod C", "gene A", "transc B"), # All descriptions set - (None, None, "Foobar", "Foobar", "Foobar"), # Transfer from transl - (None, "Foobar", None, "Foobar", "Foobar"), # Transfer from transc - (None, "Foobar", "Lorem", "Foobar", "Foobar"), # Transfer from transc, transl also set - ("Hypothetical gene", "Predicted function", "Foobar", "Foobar", "Foobar"), # Non informative - (None, None, "Unknown product", None, None), # Non informative source + param(None, None, None, None, None, id="Nothing provided"), + param("Foobar", None, None, "Foobar", None, id="Only gene description"), + param("gene A", "transc B", "prod C", "gene A", "transc B", id="All descriptions set"), + param(None, None, "Foobar", "Foobar", "Foobar", id="Transfer from transl"), + param(None, "Foobar", None, "Foobar", "Foobar", id="Transfer from transc"), + param( + None, + "Foobar, transcript variant X1", + None, + "Foobar", + "Foobar, transcript variant X1", + id="transcr with variant", + ), + param(None, "Foobar", "Lorem", "Foobar", "Foobar", id="Transfer from transc, transl also set"), + param("Hypothetical gene", "Predicted function", "Foobar", "Foobar", "Foobar", id="Non informative"), + param(None, None, "Unknown product", None, None, id="Non informative source"), ], ) @pytest.mark.dependency(depends=["get_features"])