Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Go site 676 gorule 0000022 check for retracted publications #674

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion bin/ontobio-parse-assocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def main():
parser.add_argument("-g", "--gpi", type=str, required=False, default=None,
help="GPI file")
parser.add_argument("-m", "--metadata_dir", type=dir_path, required=False,
help="Path to metadata directory")
help="Path to metadata directory")
parser.add_argument("-l", "--rule", action="append", required=None, default=[], dest="rule_set",
help="Set of rules to be run. Default is no rules to be run, with the exception \
of gorule-0000027 and gorule-0000020. See command line documentation in the \
Expand Down Expand Up @@ -144,10 +144,12 @@ def main():

goref_metadata = None
ref_species_metadata = None
retracted_pub_set = None
if args.metadata_dir:
absolute_metadata = os.path.abspath(args.metadata_dir)
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
ref_species_metadata = metadata.yaml_set(absolute_metadata, "go-reference-species.yaml", "taxon_id")
retracted_pub_set = metadata.retracted_pub_set(absolute_metadata)

# set configuration
filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None
Expand All @@ -164,6 +166,7 @@ def main():
gpi_authority_path=args.gpi,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pub_set,
rule_set=rule_set
)
p = None
Expand Down
13 changes: 11 additions & 2 deletions bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from ontobio.validation import tools
from ontobio.validation import rules


from typing import Dict, Set

# logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s: %(message)s", level=logging.WARNING)
Expand Down Expand Up @@ -210,7 +211,7 @@ def create_parser(config, group, dataset, format="gaf"):
"""

@tools.gzips
def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, ref_species_metadata=None, db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None, extensions_constraints=None, rule_contexts=[], gaf_output_version="2.2", rule_set=assocparser.RuleSet.ALL):
def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, ref_species_metadata=None, retracted_pub_set=None,db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None, extensions_constraints=None, rule_contexts=[], gaf_output_version="2.2", rule_set=assocparser.RuleSet.ALL):
filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w")
config = assocparser.AssocParserConfig(
ontology=ontology_graph,
Expand All @@ -221,6 +222,7 @@ def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False,
rule_metadata=rule_metadata,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pub_set,
entity_idspaces=db_entities,
group_idspace=group_idspace,
suppress_rule_reporting_tags=suppress_rule_reporting_tags,
Expand Down Expand Up @@ -530,6 +532,8 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base
db_entities = metadata.database_entities(absolute_metadata)
group_ids = metadata.groups(absolute_metadata)
extensions_constraints = metadata.extensions_constraints_file(absolute_metadata)

retracted_pub_set = metadata.retracted_pub_set(absolute_metadata)

gaferences = None
if gaferencer_file:
Expand All @@ -550,6 +554,7 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base
rule_metadata=rule_metadata,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pub_set,
db_entities=db_entities,
group_idspace=group_ids,
suppress_rule_reporting_tags=suppress_rule_reporting_tag,
Expand Down Expand Up @@ -634,14 +639,16 @@ def paint(group, dataset, metadata, target, ontology):
absolute_target = os.path.abspath(target)
os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True)
paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint")

paint_src_gaf = check_and_download_mixin_source(paint_metadata, dataset, absolute_target)

click.echo("Loading ontology: {}...".format(ontology))
ontology_graph = OntologyFactory().create(ontology)

gpi_path = os.path.join(absolute_target, "groups", dataset, "{}.gpi".format(dataset))
click.echo("Using GPI at {}".format(gpi_path))
paint_gaf = produce_gaf("paint_{}".format(dataset), paint_src_gaf, ontology_graph, gpipath=gpi_path)
retracted_pub_set = metadata.retracted_pub_set(absolute_metadata)
paint_gaf = produce_gaf("paint_{}".format(dataset), paint_src_gaf, ontology_graph, gpipath=gpi_path, retracted_pub_set=retracted_pub_set)


@cli.command()
Expand All @@ -659,6 +666,7 @@ def rule(metadata_dir, out, ontology, gaferencer_file):
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
gorule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))
ref_species_metadata = metadata.yaml_set(absolute_metadata, "go-reference-species.yaml", "taxon_id")
retracted_pub_set = metadata.retracted_pub_set(absolute_metadata)

click.echo("Found {} GO Rules".format(len(gorule_metadata.keys())))

Expand All @@ -673,6 +681,7 @@ def rule(metadata_dir, out, ontology, gaferencer_file):
ontology=ontology_graph,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pub_set,
entity_idspaces=db_entities,
group_idspace=group_ids,
annotation_inferences=gaferences,
Expand Down
2 changes: 2 additions & 0 deletions ontobio/io/assocparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ def __init__(self,
ref_species_metadata=None,
group_metadata=None,
dbxrefs=None,
retracted_pub_set=None,
suppress_rule_reporting_tags=[],
annotation_inferences=None,
extensions_constraints=None,
Expand All @@ -258,6 +259,7 @@ def __init__(self,
self.goref_metadata = goref_metadata
self.ref_species_metadata = ref_species_metadata
self.group_metadata = group_metadata
self.retracted_pub_set = retracted_pub_set
self.suppress_rule_reporting_tags = suppress_rule_reporting_tags
self.annotation_inferences = annotation_inferences
self.entity_idspaces = entity_idspaces
Expand Down
15 changes: 15 additions & 0 deletions ontobio/io/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,20 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
return self._result(bool(withfrom))
else:
return self._result(True)

class GoRule22(GoRule):

def __init__(self):
super().__init__("GORULE:0000022", "Check for, and filter, annotations made to retracted publications", FailMode.HARD)

def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult:
if config.retracted_pub_set is not None:
references = annotation.evidence.has_supporting_reference
for ref in references:
ref = str(ref)
if ref in config.retracted_pub_set:
return self._result(False)
return self._result(True)


class GoRule26(GoRule):
Expand Down Expand Up @@ -952,6 +966,7 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
"GoRule16": GoRule16(),
"GoRule17": GoRule17(),
"GoRule18": GoRule18(),
"GoRule22": GoRule22(),
"GoRule26": GoRule26(),
"GoRule28": GoRule28(),
"GoRule29": GoRule29(),
Expand Down
18 changes: 17 additions & 1 deletion ontobio/validation/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,23 @@ def yaml_set(metadata, yaml_file_name, field) -> Set[str]:
except Exception as e:
raise click.ClickException("Could not find or read {}: {}".format(yaml_path, str(e)))

return set([yaml[field] for yaml in yaml_list])
return set([yaml[field] for yaml in yaml_list])


def retracted_pub_set(metadata)->set[str]:
retracted_path = os.path.join(metadata, "retracted-publications.txt")
try:
retracted_pubs = set()
with open(retracted_path, "r") as f:
for line in f:
li=line.strip()
if not li.startswith("!"):
if "," in li:
li = li.partition(',')[0]
retracted_pubs.add(li)
return retracted_pubs
except Exception as e:
raise click.ClickException("Could not find or read {}: {}".format(retracted_path, str(e)))



16 changes: 15 additions & 1 deletion tests/test_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,20 @@ def test_go_rule_18():
test_result = qc.GoRule18().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.PASS

def test_go_rule22():
config = assocparser.AssocParserConfig(
ontology=ontology,
retracted_pub_set={"RETRACTED:1234","PMID:37772366"},
rule_set=assocparser.RuleSet.ALL
)
assoc = make_annotation(goid="GO:1234567", evidence="IBA", references="PMID:12345").associations[0]
test_result = qc.GoRule22().test(assoc, config)
assert test_result.result_type == qc.ResultType.PASS

assoc = make_annotation(goid="GO:1234567", evidence="IBA", references="PMID:37772366").associations[0]
test_result = qc.GoRule22().test(assoc, config)
assert test_result.result_type == qc.ResultType.ERROR

def test_go_rule26():

config = assocparser.AssocParserConfig(
Expand Down Expand Up @@ -819,7 +833,7 @@ def test_all_rules():
assoc = gafparser.to_association(a).associations[0]

test_results = qc.test_go_rules(assoc, config).all_results
assert len(test_results.keys()) == 26
assert len(test_results.keys()) == 27
assert test_results[qc.GoRules.GoRule26.value].result_type == qc.ResultType.PASS
assert test_results[qc.GoRules.GoRule29.value].result_type == qc.ResultType.PASS

Expand Down
Loading