Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Go site 676 gorule 0000022 check for retracted publications #674

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions bin/ontobio-parse-assocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ def main():
parser.add_argument("-g", "--gpi", type=str, required=False, default=None,
help="GPI file")
parser.add_argument("-m", "--metadata_dir", type=dir_path, required=False,
help="Path to metadata directory")
help="Path to metadata directory")
parser.add_argument("--retracted_pub_set", type=argparse.FileType('r'), required=False,
help="Path to retracted publications file")
parser.add_argument("-l", "--rule", action="append", required=None, default=[], dest="rule_set",
help="Set of rules to be run. Default is no rules to be run, with the exception \
of gorule-0000027 and gorule-0000020. See command line documentation in the \
Expand Down Expand Up @@ -143,11 +145,17 @@ def main():
rule_set = assocparser.RuleSet.ALL

goref_metadata = None
ref_species_metadata = None
ref_species_metadata = None
if args.metadata_dir:
absolute_metadata = os.path.abspath(args.metadata_dir)
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
ref_species_metadata = metadata.yaml_set(absolute_metadata, "go-reference-species.yaml", "taxon_id")

retracted_pub_set = None
if args.retracted_pub_set:
retracted_pub_set = metadata.retracted_pub_set(args.retracted_pub_set.name)
elif args.metadata_dir:
retracted_pub_set = metadata.retracted_pub_set_from_meta(absolute_metadata)

# set configuration
filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None
Expand All @@ -164,6 +172,7 @@ def main():
gpi_authority_path=args.gpi,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pub_set,
rule_set=rule_set
)
p = None
Expand Down
29 changes: 24 additions & 5 deletions bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from ontobio.validation import tools
from ontobio.validation import rules


from typing import Dict, Set

# logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s: %(message)s", level=logging.WARNING)
Expand Down Expand Up @@ -210,7 +211,7 @@ def create_parser(config, group, dataset, format="gaf"):
"""

@tools.gzips
def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, ref_species_metadata=None, db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None, extensions_constraints=None, rule_contexts=[], gaf_output_version="2.2", rule_set=assocparser.RuleSet.ALL):
def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False, group="unknown", rule_metadata=None, goref_metadata=None, ref_species_metadata=None, retracted_pub_set=None,db_entities=None, group_idspace=None, format="gaf", suppress_rule_reporting_tags=[], annotation_inferences=None, group_metadata=None, extensions_constraints=None, rule_contexts=[], gaf_output_version="2.2", rule_set=assocparser.RuleSet.ALL):
filtered_associations = open(os.path.join(os.path.split(source_gaf)[0], "{}_noiea.gaf".format(dataset)), "w")
config = assocparser.AssocParserConfig(
ontology=ontology_graph,
Expand All @@ -221,6 +222,7 @@ def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False,
rule_metadata=rule_metadata,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pub_set,
entity_idspaces=db_entities,
group_idspace=group_idspace,
suppress_rule_reporting_tags=suppress_rule_reporting_tags,
Expand Down Expand Up @@ -493,7 +495,8 @@ def cli(ctx, verbose):
@click.option("--only-dataset", default=None)
@click.option("--gaf-output-version", default="2.2", type=click.Choice(["2.1", "2.2"]))
@click.option("--rule-set", "-l", "rule_set", default=[assocparser.RuleSet.ALL], multiple=True)
def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base_download_url, suppress_rule_reporting_tag, skip_existing_files, gaferencer_file, only_dataset, gaf_output_version, rule_set):
@click.option("--retracted_pub_set", type=click.Path(exists=True), default=None, required=False, help="Path to retracted publications file")
def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base_download_url, suppress_rule_reporting_tag, skip_existing_files, gaferencer_file, only_dataset, gaf_output_version, rule_set, retracted_pub_set):

logger.info("Logging is verbose")
products = {
Expand Down Expand Up @@ -529,7 +532,7 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base

db_entities = metadata.database_entities(absolute_metadata)
group_ids = metadata.groups(absolute_metadata)
extensions_constraints = metadata.extensions_constraints_file(absolute_metadata)
extensions_constraints = metadata.extensions_constraints_file(absolute_metadata)

gaferences = None
if gaferencer_file:
Expand All @@ -539,6 +542,12 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base
if rule_set == (assocparser.RuleSet.ALL,):
rule_set = assocparser.RuleSet.ALL

retracted_pubs = None
if retracted_pub_set:
retracted_pubs = metadata.retracted_pub_set(retracted_pub_set)
else:
retracted_pubs = metadata.retracted_pub_set_from_meta(absolute_metadata)

for dataset_metadata, source_gaf in downloaded_gaf_sources:
dataset = dataset_metadata["dataset"]
# Set paint to True when the group is "paint".
Expand All @@ -550,6 +559,7 @@ def produce(ctx, group, metadata_dir, gpad, ttl, target, ontology, exclude, base
rule_metadata=rule_metadata,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pubs,
db_entities=db_entities,
group_idspace=group_ids,
suppress_rule_reporting_tags=suppress_rule_reporting_tag,
Expand Down Expand Up @@ -634,13 +644,14 @@ def paint(group, dataset, metadata, target, ontology):
absolute_target = os.path.abspath(target)
os.makedirs(os.path.join(absolute_target, "groups"), exist_ok=True)
paint_metadata = metadata.dataset_metadata_file(absolute_metadata, "paint")

paint_src_gaf = check_and_download_mixin_source(paint_metadata, dataset, absolute_target)

click.echo("Loading ontology: {}...".format(ontology))
ontology_graph = OntologyFactory().create(ontology)

gpi_path = os.path.join(absolute_target, "groups", dataset, "{}.gpi".format(dataset))
click.echo("Using GPI at {}".format(gpi_path))
click.echo("Using GPI at {}".format(gpi_path))
paint_gaf = produce_gaf("paint_{}".format(dataset), paint_src_gaf, ontology_graph, gpipath=gpi_path)


Expand All @@ -650,7 +661,8 @@ def paint(group, dataset, metadata, target, ontology):
@click.option("--ontology", type=click.Path(), required=True)
@click.option("--gaferencer-file", "-I", type=click.Path(exists=True), default=None, required=False,
help="Path to Gaferencer output to be used for inferences")
def rule(metadata_dir, out, ontology, gaferencer_file):
@click.option("--retracted_pub_set", type=click.Path(exists=True), default=None, required=False, help="Path to retracted publications file")
def rule(metadata_dir, out, ontology, gaferencer_file, retracted_pub_set):
absolute_metadata = os.path.abspath(metadata_dir)

click.echo("Loading ontology: {}...".format(ontology))
Expand All @@ -659,6 +671,12 @@ def rule(metadata_dir, out, ontology, gaferencer_file):
goref_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "gorefs"))
gorule_metadata = metadata.yamldown_lookup(os.path.join(absolute_metadata, "rules"))
ref_species_metadata = metadata.yaml_set(absolute_metadata, "go-reference-species.yaml", "taxon_id")
retracted_pubs = None
if retracted_pub_set:
retracted_pubs = metadata.retracted_pub_set(retracted_pub_set)
else:
retracted_pubs = metadata.retracted_pub_set_from_meta(absolute_metadata)


click.echo("Found {} GO Rules".format(len(gorule_metadata.keys())))

Expand All @@ -673,6 +691,7 @@ def rule(metadata_dir, out, ontology, gaferencer_file):
ontology=ontology_graph,
goref_metadata=goref_metadata,
ref_species_metadata=ref_species_metadata,
retracted_pub_set=retracted_pubs,
entity_idspaces=db_entities,
group_idspace=group_ids,
annotation_inferences=gaferences,
Expand Down
2 changes: 2 additions & 0 deletions ontobio/io/assocparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ def __init__(self,
ref_species_metadata=None,
group_metadata=None,
dbxrefs=None,
retracted_pub_set=None,
suppress_rule_reporting_tags=[],
annotation_inferences=None,
extensions_constraints=None,
Expand All @@ -258,6 +259,7 @@ def __init__(self,
self.goref_metadata = goref_metadata
self.ref_species_metadata = ref_species_metadata
self.group_metadata = group_metadata
self.retracted_pub_set = retracted_pub_set
self.suppress_rule_reporting_tags = suppress_rule_reporting_tags
self.annotation_inferences = annotation_inferences
self.entity_idspaces = entity_idspaces
Expand Down
15 changes: 15 additions & 0 deletions ontobio/io/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,20 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
return self._result(bool(withfrom))
else:
return self._result(True)

class GoRule22(GoRule):

def __init__(self):
super().__init__("GORULE:0000022", "Check for, and filter, annotations made to retracted publications", FailMode.HARD)

def test(self, annotation: association.GoAssociation, config: assocparser.AssocParserConfig, group=None) -> TestResult:
if config.retracted_pub_set is not None:
references = annotation.evidence.has_supporting_reference
for ref in references:
ref = str(ref)
if ref in config.retracted_pub_set:
return self._result(False)
return self._result(True)


class GoRule26(GoRule):
Expand Down Expand Up @@ -952,6 +966,7 @@ def test(self, annotation: association.GoAssociation, config: assocparser.AssocP
"GoRule16": GoRule16(),
"GoRule17": GoRule17(),
"GoRule18": GoRule18(),
"GoRule22": GoRule22(),
"GoRule26": GoRule26(),
"GoRule28": GoRule28(),
"GoRule29": GoRule29(),
Expand Down
27 changes: 26 additions & 1 deletion ontobio/validation/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,32 @@ def yaml_set(metadata, yaml_file_name, field) -> Set[str]:
except Exception as e:
raise click.ClickException("Could not find or read {}: {}".format(yaml_path, str(e)))

return set([yaml[field] for yaml in yaml_list])
return set([yaml[field] for yaml in yaml_list])

def retracted_pub_set_from_meta(metadata) -> Set:
retracted_path = os.path.join(metadata, "retracted-publications.txt")
if os.access(retracted_path, os.R_OK):
return retracted_pub_set_use_abspath(retracted_path)
else:
return set()

def retracted_pub_set(abspath_retracted_file) -> Set:
return retracted_pub_set_use_abspath(os.path.abspath(abspath_retracted_file))

def retracted_pub_set_use_abspath(abspath_retracted_file) -> Set:
try:
retracted_pubs = None
with open(abspath_retracted_file, "r") as f:
retracted_pubs = set()
for line in f:
li=line.strip()
if not li.startswith("!"):
if "," in li:
li = li.partition(',')[0]
retracted_pubs.add(li)
return retracted_pubs
except Exception as e:
raise click.ClickException("Could not find or read {}: {}".format(abspath_retracted_file, str(e)))



16 changes: 15 additions & 1 deletion tests/test_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,20 @@ def test_go_rule_18():
test_result = qc.GoRule18().test(assoc, all_rules_config())
assert test_result.result_type == qc.ResultType.PASS

def test_go_rule22():
config = assocparser.AssocParserConfig(
ontology=ontology,
retracted_pub_set={"RETRACTED:1234","PMID:37772366"},
rule_set=assocparser.RuleSet.ALL
)
assoc = make_annotation(goid="GO:1234567", evidence="IBA", references="PMID:12345").associations[0]
test_result = qc.GoRule22().test(assoc, config)
assert test_result.result_type == qc.ResultType.PASS

assoc = make_annotation(goid="GO:1234567", evidence="IBA", references="PMID:37772366").associations[0]
test_result = qc.GoRule22().test(assoc, config)
assert test_result.result_type == qc.ResultType.ERROR

def test_go_rule26():

config = assocparser.AssocParserConfig(
Expand Down Expand Up @@ -819,7 +833,7 @@ def test_all_rules():
assoc = gafparser.to_association(a).associations[0]

test_results = qc.test_go_rules(assoc, config).all_results
assert len(test_results.keys()) == 26
assert len(test_results.keys()) == 27
assert test_results[qc.GoRules.GoRule26.value].result_type == qc.ResultType.PASS
assert test_results[qc.GoRules.GoRule29.value].result_type == qc.ResultType.PASS

Expand Down
Loading