diff --git a/ontobio/io/gafparser.py b/ontobio/io/gafparser.py index 8064a67c..306bfde8 100644 --- a/ontobio/io/gafparser.py +++ b/ontobio/io/gafparser.py @@ -393,7 +393,10 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[DB_OBJECT_SYMBOL] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "col3 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) - return assocparser.ParseResult(source_line, [], True, report=report) + return assocparser.ParseResult(source_line, [], True, report=report) + if '|' in gaf_line[DB_OBJECT_SYMBOL]: + report.error(source_line, Report.INVALID_SYMBOL, gaf_line[4], "Pipes are not allowed in gene symbol", taxon=gaf_line[TAXON_INDEX], rule=1) + return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[REFERENCE_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "reference column 6 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) diff --git a/ontobio/io/qc.py b/ontobio/io/qc.py index 808b9039..e826d654 100644 --- a/ontobio/io/qc.py +++ b/ontobio/io/qc.py @@ -33,7 +33,11 @@ iba_eco = ecomapping.coderef_to_ecoclass("IBA") iep_eco = ecomapping.coderef_to_ecoclass("IEP") hep_eco = ecomapping.coderef_to_ecoclass("HEP") - +iss_eco = ecomapping.coderef_to_ecoclass("ISS") +isa_eco = ecomapping.coderef_to_ecoclass("ISA") +iso_eco = ecomapping.coderef_to_ecoclass("ISO") +ism_eco = ecomapping.coderef_to_ecoclass("ISM") +rca_eco = ecomapping.coderef_to_ecoclass("RCA") # TestResult = collections.namedtuple("TestResult", ["result_type", "message", "result"]) class TestResult(object): @@ -185,6 +189,16 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP fails = ((annotation_obj_id == "GO:0005488" or annotation_obj_id == "GO:0005515") and annotation.negated) return self._result(not fails) +class GoRule05(GoRule): + + def __init__(self): + super().__init__("GORULE:0000005", "IEA, ISS, ISO, ISM, ISA, IBA, RCA annotations ae not allowed for direct annotations to 'binding ; GO:0005488' or 'protein binding ; GO:0005515'", FailMode.SOFT) + + def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult: + evidence = str(annotation.evidence.type) + annotation_obj_id = str(annotation.object.id) + fails = ((annotation_obj_id == "GO:0005488" or annotation_obj_id == "GO:0005515") and evidence in [iea_eco, iss_eco, iso_eco, ism_eco, isa_eco, iba_eco, rca_eco]) + return self._result(not fails) class GoRule06(GoRule): @@ -687,35 +701,6 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP return self._result(True) -class GoRule57(GoRule): - - def __init__(self): - super().__init__("GORULE:0000057", "Group specific filter rules should be applied to annotations", FailMode.HARD, tags=["context-import"]) - - def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult: - # Check group_metadata is present - if config.group_metadata is None: - return self._result(True) - - evidence_codes = config.group_metadata.get("filter_out", {}).get("evidence", []) - if str(annotation.evidence.type) in evidence_codes: - return self._result(False) - - evidences_references = config.group_metadata.get("filter_out", {}).get("evidence_reference", []) - for er in evidences_references: - evidence_code = er["evidence"] - reference = er["reference"] - if str(annotation.evidence.type) == evidence_code and [str(ref) for ref in annotation.evidence.has_supporting_reference] == [reference]: - return self._result(False) - - properties = config.group_metadata.get("filter_out", {}).get("annotation_properties", []) - for p in properties: - if p in annotation.properties.keys(): - return self._result(False) - - return self._result(True) - - class GoRule58(RepairRule): def __init__(self): @@ -925,8 +910,24 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP return TestResult(repair_result(repair_state, self.fail_mode), "{}: {} should be one of {}. Repaired to {}".format(self.message(repair_state), relation, allowed_str, repaired_str), repaired_annotation) +class GoRule63(GoRule): + + def __init__(self): + super().__init__("GORULE:0000063", "Annotations using ISS/ISA/ISO evidence should refer to a gene product (in the 'with' column)", FailMode.HARD) + + def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult: + evidence = str(annotation.evidence.type) + withfrom = annotation.evidence.with_support_from + + + if evidence in [iss_eco, isa_eco, iso_eco] and (withfrom is None or len(withfrom) == 0): + return self._result(False) + + return self._result(True) + GoRules = enum.Enum("GoRules", { "GoRule02": GoRule02(), + "GoRule05": GoRule05(), "GoRule06": GoRule06(), "GoRule07": GoRule07(), "GoRule08": GoRule08(), @@ -947,9 +948,9 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP "GoRule50": GoRule50(), #"GoRule51": GoRule51(), Do not run test "GoRule55": GoRule55(), - "GoRule57": GoRule57(), "GoRule58": GoRule58(), "GoRule61": GoRule61(), + "GoRule63": GoRule63(), # GoRule13 at the bottom in order to make all other rules clean up an annotation before reaching 13 "GoRule13": GoRule13() }) diff --git a/tests/test_gafparser.py b/tests/test_gafparser.py index ed19cb32..9c907408 100644 --- a/tests/test_gafparser.py +++ b/tests/test_gafparser.py @@ -421,6 +421,12 @@ def test_bad_date(): assert assoc_result.skipped == True assert assoc_result.associations == [] +def test_bad_gene_symbol(): + p = GafParser() + assoc_result = p.parse_line("PomBase\tSPAC25B8.17\ta|pipeisnotallowed\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20231110\tPomBase\tfoo(X:1)") + assert assoc_result.skipped == True + assert assoc_result.associations == [] + def test_bad_go_id(): p = GafParser() assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tINVALID:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20231110\tPomBase\tfoo(X:1)") diff --git a/tests/test_qc.py b/tests/test_qc.py index 2d0b270f..35facea7 100644 --- a/tests/test_qc.py +++ b/tests/test_qc.py @@ -1,7 +1,6 @@ import pytest import datetime import yaml -import json from ontobio.model import association from ontobio.model.association import Curie @@ -12,7 +11,7 @@ from ontobio.io import gafparser from ontobio.io.gafparser import GafParser from ontobio.io import gpadparser -from ontobio import ontol, ontol_factory, ecomap +from ontobio import ontol_factory, ecomap import copy @@ -94,7 +93,29 @@ def test_go_rule02(): assoc.object.id = Curie.from_str("GO:0003674") test_result = qc.GoRule02().test(assoc, all_rules_config()) assert test_result.result_type == qc.ResultType.PASS - + +def test_go_rule_05(): + fail_terms = ["GO:0005488", "GO:0005515"] + fail_codes = ["IEA", "ISS", "ISO", "ISM", "ISA", "IBA", "RCA"] + for term in fail_terms: + for code in fail_codes: + assoc = make_annotation(goid=term, evidence=code).associations[0] + test_result = qc.GoRule05().test(assoc, all_rules_config(ontology=ontology)) + assert test_result.result_type == qc.ResultType.WARNING + + assoc = make_annotation(goid="GO:0034655", evidence="IEA").associations[0] + test_result = qc.GoRule05().test(assoc, all_rules_config(ontology=ontology)) + assert test_result.result_type == qc.ResultType.PASS + + + assoc = make_annotation(goid="GO:0005488", evidence="HEP").associations[0] + test_result = qc.GoRule05().test(assoc, all_rules_config(ontology=ontology)) + assert test_result.result_type == qc.ResultType.PASS + + assoc = make_annotation(goid="GO:0034655", evidence="HEP").associations[0] + test_result = qc.GoRule05().test(assoc, all_rules_config(ontology=ontology)) + assert test_result.result_type == qc.ResultType.PASS + def test_go_rule_06(): assoc = make_annotation(goid="GO:0005575", evidence="HEP", aspect="C").associations[0] @@ -629,44 +650,6 @@ def test_gorule55(): assert test_result.result_type == qc.ResultType.WARNING -def test_gorule57(): - assoc = make_annotation(db="HELLO", db_id="123", qualifier="contributes_to", goid="GO:0003674", evidence=iea_eco, taxon="taxon:2", from_gaf=False).associations[0] - # Look at evidence_code, reference, annotation_properties - config = assocparser.AssocParserConfig( - group_metadata={ - "id": "mgi", - "label": "Mouse Genome Informatics", - "filter_out": { - "evidence": [iea_eco], - "evidence_reference": [ - { - "evidence": ikr_eco, - "reference": "PMID:21873635" - } - ], - "annotation_properties": ["noctua-model-id"] - } - }, - rule_set=assocparser.RuleSet.ALL - ) - test_result = qc.GoRule57().test(assoc, config) - assert test_result.result_type == qc.ResultType.ERROR - - assoc.evidence.type = Curie.from_str(ikr_eco) - assoc.evidence.has_supporting_reference = [Curie.from_str("PMID:21873635")] - test_result = qc.GoRule57().test(assoc, config) - assert test_result.result_type == qc.ResultType.ERROR - - assoc.evidence.type = Curie.from_str("ECO:some_garbage") - assoc.evidence.has_supporting_reference = [Curie.from_str("PMID:some_garbage")] - assoc.properties = {"noctua-model-id": "some_garbage"} - test_result = qc.GoRule57().test(assoc, config) - assert test_result.result_type == qc.ResultType.ERROR - - assoc.properties = {} - test_result = qc.GoRule57().test(assoc, config) - assert test_result.result_type == qc.ResultType.PASS - def test_gorule58(): with open("tests/resources/extensions-constraints.yaml") as exs_cons: @@ -761,7 +744,37 @@ def test_gorule61(): version="2.2") test_result = qc.GoRule61().test(assoc.associations[0], config) assert test_result.result_type == qc.ResultType.PASS + +def test_go_rule_63(): + # ISS with anything in withfrom + assoc = make_annotation(evidence="ISS", withfrom="BLAH:12345").associations[0] + test_result = qc.GoRule63().test(assoc, all_rules_config()) + assert test_result.result_type == qc.ResultType.PASS + + # ISA with anything in withfrom + assoc = make_annotation(evidence="ISA", withfrom="BLAH:12345").associations[0] + test_result = qc.GoRule63().test(assoc, all_rules_config()) + assert test_result.result_type == qc.ResultType.PASS + + # ISO with anything in withfrom + assoc = make_annotation(evidence="ISO", withfrom="BLAH:12345").associations[0] + test_result = qc.GoRule63().test(assoc, all_rules_config()) + assert test_result.result_type == qc.ResultType.PASS + + # ISS with nothing in withfrom + assoc = make_annotation(evidence="ISS", withfrom="").associations[0] + test_result = qc.GoRule63().test(assoc, all_rules_config()) + assert test_result.result_type == qc.ResultType.ERROR + # ISA with with nothing in withfrom + assoc = make_annotation(evidence="ISA", withfrom="").associations[0] + test_result = qc.GoRule63().test(assoc, all_rules_config()) + assert test_result.result_type == qc.ResultType.ERROR + + # ISO with with nothing in withfrom + assoc = make_annotation(evidence="ISO", withfrom="").associations[0] + test_result = qc.GoRule63().test(assoc, all_rules_config()) + assert test_result.result_type == qc.ResultType.ERROR def test_all_rules(): # pass @@ -778,7 +791,7 @@ def test_all_rules(): assoc = gafparser.to_association(a).associations[0] test_results = qc.test_go_rules(assoc, config).all_results - assert len(test_results.keys()) == 24 + assert len(test_results.keys()) == 25 assert test_results[qc.GoRules.GoRule26.value].result_type == qc.ResultType.PASS assert test_results[qc.GoRules.GoRule29.value].result_type == qc.ResultType.PASS