Skip to content

Commit

Permalink
Merge branch 'master' into gopreprocess-gpad20
Browse files Browse the repository at this point in the history
  • Loading branch information
sierra-moxon committed Mar 6, 2024
2 parents 1c5f283 + 4bb03df commit 4b7ce38
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 74 deletions.
5 changes: 4 additions & 1 deletion ontobio/io/gafparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,10 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u
return assocparser.ParseResult(source_line, [], True, report=report)
if gaf_line[DB_OBJECT_SYMBOL] == "":
report.error(source_line, Report.INVALID_ID, "EMPTY", "col3 is empty", taxon=gaf_line[TAXON_INDEX], rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)
return assocparser.ParseResult(source_line, [], True, report=report)
if '|' in gaf_line[DB_OBJECT_SYMBOL]:
report.error(source_line, Report.INVALID_SYMBOL, gaf_line[4], "Pipes are not allowed in gene symbol", taxon=gaf_line[TAXON_INDEX], rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)
if gaf_line[REFERENCE_INDEX] == "":
report.error(source_line, Report.INVALID_ID, "EMPTY", "reference column 6 is empty", taxon=gaf_line[TAXON_INDEX], rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)
Expand Down
63 changes: 32 additions & 31 deletions ontobio/io/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@
iba_eco = ecomapping.coderef_to_ecoclass("IBA")
iep_eco = ecomapping.coderef_to_ecoclass("IEP")
hep_eco = ecomapping.coderef_to_ecoclass("HEP")

iss_eco = ecomapping.coderef_to_ecoclass("ISS")
isa_eco = ecomapping.coderef_to_ecoclass("ISA")
iso_eco = ecomapping.coderef_to_ecoclass("ISO")
ism_eco = ecomapping.coderef_to_ecoclass("ISM")
rca_eco = ecomapping.coderef_to_ecoclass("RCA")

# TestResult = collections.namedtuple("TestResult", ["result_type", "message", "result"])
class TestResult(object):
Expand Down Expand Up @@ -185,6 +189,16 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
fails = ((annotation_obj_id == "GO:0005488" or annotation_obj_id == "GO:0005515") and annotation.negated)
return self._result(not fails)

class GoRule05(GoRule):

def __init__(self):
super().__init__("GORULE:0000005", "IEA, ISS, ISO, ISM, ISA, IBA, RCA annotations ae not allowed for direct annotations to 'binding ; GO:0005488' or 'protein binding ; GO:0005515'", FailMode.SOFT)

def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult:
evidence = str(annotation.evidence.type)
annotation_obj_id = str(annotation.object.id)
fails = ((annotation_obj_id == "GO:0005488" or annotation_obj_id == "GO:0005515") and evidence in [iea_eco, iss_eco, iso_eco, ism_eco, isa_eco, iba_eco, rca_eco])
return self._result(not fails)

class GoRule06(GoRule):

Expand Down Expand Up @@ -687,35 +701,6 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
return self._result(True)


class GoRule57(GoRule):

def __init__(self):
super().__init__("GORULE:0000057", "Group specific filter rules should be applied to annotations", FailMode.HARD, tags=["context-import"])

def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult:
# Check group_metadata is present
if config.group_metadata is None:
return self._result(True)

evidence_codes = config.group_metadata.get("filter_out", {}).get("evidence", [])
if str(annotation.evidence.type) in evidence_codes:
return self._result(False)

evidences_references = config.group_metadata.get("filter_out", {}).get("evidence_reference", [])
for er in evidences_references:
evidence_code = er["evidence"]
reference = er["reference"]
if str(annotation.evidence.type) == evidence_code and [str(ref) for ref in annotation.evidence.has_supporting_reference] == [reference]:
return self._result(False)

properties = config.group_metadata.get("filter_out", {}).get("annotation_properties", [])
for p in properties:
if p in annotation.properties.keys():
return self._result(False)

return self._result(True)


class GoRule58(RepairRule):

def __init__(self):
Expand Down Expand Up @@ -925,8 +910,24 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
return TestResult(repair_result(repair_state, self.fail_mode), "{}: {} should be one of {}. Repaired to {}".format(self.message(repair_state), relation, allowed_str, repaired_str), repaired_annotation)


class GoRule63(GoRule):

def __init__(self):
super().__init__("GORULE:0000063", "Annotations using ISS/ISA/ISO evidence should refer to a gene product (in the 'with' column)", FailMode.HARD)

def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult:
evidence = str(annotation.evidence.type)
withfrom = annotation.evidence.with_support_from


if evidence in [iss_eco, isa_eco, iso_eco] and (withfrom is None or len(withfrom) == 0):
return self._result(False)

return self._result(True)

GoRules = enum.Enum("GoRules", {
"GoRule02": GoRule02(),
"GoRule05": GoRule05(),
"GoRule06": GoRule06(),
"GoRule07": GoRule07(),
"GoRule08": GoRule08(),
Expand All @@ -947,9 +948,9 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
"GoRule50": GoRule50(),
#"GoRule51": GoRule51(), Do not run test
"GoRule55": GoRule55(),
"GoRule57": GoRule57(),
"GoRule58": GoRule58(),
"GoRule61": GoRule61(),
"GoRule63": GoRule63(),
# GoRule13 at the bottom in order to make all other rules clean up an annotation before reaching 13
"GoRule13": GoRule13()
})
Expand Down
6 changes: 6 additions & 0 deletions tests/test_gafparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,12 @@ def test_bad_date():
assert assoc_result.skipped == True
assert assoc_result.associations == []

def test_bad_gene_symbol():
p = GafParser()
assoc_result = p.parse_line("PomBase\tSPAC25B8.17\ta|pipeisnotallowed\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20231110\tPomBase\tfoo(X:1)")
assert assoc_result.skipped == True
assert assoc_result.associations == []

def test_bad_go_id():
p = GafParser()
assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tINVALID:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20231110\tPomBase\tfoo(X:1)")
Expand Down
97 changes: 55 additions & 42 deletions tests/test_qc.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pytest
import datetime
import yaml
import json

from ontobio.model import association
from ontobio.model.association import Curie
Expand All @@ -12,7 +11,7 @@
from ontobio.io import gafparser
from ontobio.io.gafparser import GafParser
from ontobio.io import gpadparser
from ontobio import ontol, ontol_factory, ecomap
from ontobio import ontol_factory, ecomap

import copy

Expand Down Expand Up @@ -94,7 +93,29 @@ def test_go_rule02():
assoc.object.id = Curie.from_str("GO:0003674")
test_result = qc.GoRule02().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.PASS


def test_go_rule_05():
fail_terms = ["GO:0005488", "GO:0005515"]
fail_codes = ["IEA", "ISS", "ISO", "ISM", "ISA", "IBA", "RCA"]
for term in fail_terms:
for code in fail_codes:
assoc = make_annotation(goid=term, evidence=code).associations[0]
test_result = qc.GoRule05().test(assoc, all_rules_config(ontology=ontology))
assert test_result.result_type == qc.ResultType.WARNING

assoc = make_annotation(goid="GO:0034655", evidence="IEA").associations[0]
test_result = qc.GoRule05().test(assoc, all_rules_config(ontology=ontology))
assert test_result.result_type == qc.ResultType.PASS


assoc = make_annotation(goid="GO:0005488", evidence="HEP").associations[0]
test_result = qc.GoRule05().test(assoc, all_rules_config(ontology=ontology))
assert test_result.result_type == qc.ResultType.PASS

assoc = make_annotation(goid="GO:0034655", evidence="HEP").associations[0]
test_result = qc.GoRule05().test(assoc, all_rules_config(ontology=ontology))
assert test_result.result_type == qc.ResultType.PASS

def test_go_rule_06():

assoc = make_annotation(goid="GO:0005575", evidence="HEP", aspect="C").associations[0]
Expand Down Expand Up @@ -629,44 +650,6 @@ def test_gorule55():
assert test_result.result_type == qc.ResultType.WARNING


def test_gorule57():
assoc = make_annotation(db="HELLO", db_id="123", qualifier="contributes_to", goid="GO:0003674", evidence=iea_eco, taxon="taxon:2", from_gaf=False).associations[0]
# Look at evidence_code, reference, annotation_properties
config = assocparser.AssocParserConfig(
group_metadata={
"id": "mgi",
"label": "Mouse Genome Informatics",
"filter_out": {
"evidence": [iea_eco],
"evidence_reference": [
{
"evidence": ikr_eco,
"reference": "PMID:21873635"
}
],
"annotation_properties": ["noctua-model-id"]
}
},
rule_set=assocparser.RuleSet.ALL
)
test_result = qc.GoRule57().test(assoc, config)
assert test_result.result_type == qc.ResultType.ERROR

assoc.evidence.type = Curie.from_str(ikr_eco)
assoc.evidence.has_supporting_reference = [Curie.from_str("PMID:21873635")]
test_result = qc.GoRule57().test(assoc, config)
assert test_result.result_type == qc.ResultType.ERROR

assoc.evidence.type = Curie.from_str("ECO:some_garbage")
assoc.evidence.has_supporting_reference = [Curie.from_str("PMID:some_garbage")]
assoc.properties = {"noctua-model-id": "some_garbage"}
test_result = qc.GoRule57().test(assoc, config)
assert test_result.result_type == qc.ResultType.ERROR

assoc.properties = {}
test_result = qc.GoRule57().test(assoc, config)
assert test_result.result_type == qc.ResultType.PASS

def test_gorule58():

with open("tests/resources/extensions-constraints.yaml") as exs_cons:
Expand Down Expand Up @@ -761,7 +744,37 @@ def test_gorule61():
version="2.2")
test_result = qc.GoRule61().test(assoc.associations[0], config)
assert test_result.result_type == qc.ResultType.PASS

def test_go_rule_63():
# ISS with anything in withfrom
assoc = make_annotation(evidence="ISS", withfrom="BLAH:12345").associations[0]
test_result = qc.GoRule63().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.PASS

# ISA with anything in withfrom
assoc = make_annotation(evidence="ISA", withfrom="BLAH:12345").associations[0]
test_result = qc.GoRule63().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.PASS

# ISO with anything in withfrom
assoc = make_annotation(evidence="ISO", withfrom="BLAH:12345").associations[0]
test_result = qc.GoRule63().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.PASS

# ISS with nothing in withfrom
assoc = make_annotation(evidence="ISS", withfrom="").associations[0]
test_result = qc.GoRule63().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.ERROR

# ISA with with nothing in withfrom
assoc = make_annotation(evidence="ISA", withfrom="").associations[0]
test_result = qc.GoRule63().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.ERROR

# ISO with with nothing in withfrom
assoc = make_annotation(evidence="ISO", withfrom="").associations[0]
test_result = qc.GoRule63().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.ERROR

def test_all_rules():
# pass
Expand All @@ -778,7 +791,7 @@ def test_all_rules():
assoc = gafparser.to_association(a).associations[0]

test_results = qc.test_go_rules(assoc, config).all_results
assert len(test_results.keys()) == 24
assert len(test_results.keys()) == 25
assert test_results[qc.GoRules.GoRule26.value].result_type == qc.ResultType.PASS
assert test_results[qc.GoRules.GoRule29.value].result_type == qc.ResultType.PASS

Expand Down

0 comments on commit 4b7ce38

Please sign in to comment.