Skip to content

Commit

Permalink
Merge pull request #693 from biolink/debugging_missing_gpi
Browse files Browse the repository at this point in the history
Debugging missing gpi
  • Loading branch information
sierra-moxon authored Sep 13, 2024
2 parents 568a2bd + 540d355 commit ba554f5
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 20 deletions.
52 changes: 32 additions & 20 deletions bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def download_source_gafs(group_metadata,
gaf_urls = [(data, data["source"]) for data in group_metadata["datasets"] if data["dataset"] == only_dataset]
# List of dataset metadata to gaf download url

logger.info("Found gaf_urls {}".format(", ".join([kv[0]["dataset"] for kv in gaf_urls])))
click.echo("Found gaf_urls {}".format(", ".join([kv[0]["dataset"] for kv in gaf_urls])))
downloaded_paths = []
for dataset_metadata, gaf_url in gaf_urls:
dataset = dataset_metadata["dataset"]
Expand All @@ -154,7 +154,7 @@ def download_source_gafs(group_metadata,
# otherwise file is coming in uncompressed. But we want to make sure
# to zip up the original source also
tools.zipup(path)
logger.info("Downloaded {}".format(path))
click.echo("Downloaded {}".format(path))
downloaded_paths.append((dataset_metadata, path))

return downloaded_paths
Expand Down Expand Up @@ -268,28 +268,28 @@ def produce_gaf(dataset, source_gaf, ontology_graph, gpipaths=None, paint=False,
filtered_associations.close()

report_markdown_path = os.path.join(os.path.split(source_gaf)[0], "{}.report.md".format(dataset))
logger.info("About to write markdown report to {}".format(report_markdown_path))
click.echo("About to write markdown report to {}".format(report_markdown_path))
with open(report_markdown_path, "w") as report_md:
logger.info("Opened for writing {}".format(report_markdown_path))
click.echo("Opened for writing {}".format(report_markdown_path))
report_md.write(parser.report.to_markdown())

logger.info("markdown {} written out".format(report_markdown_path))
logger.info("Markdown current stack:")
if logger.getEffectiveLevel() == logging.INFO:
traceback.print_stack()
click.echo("markdown {} written out".format(report_markdown_path))
# click.echo("Markdown current stack:")
# if logger.getEffectiveLevel() == logging.INFO:
# traceback.print_stack()

report_json_path = os.path.join(os.path.split(source_gaf)[0], "{}.report.json".format(dataset))
logger.info("About to write json report to {}".format(report_json_path))
click.echo("About to write json report to {}".format(report_json_path))
with open(report_json_path, "w") as report_json:
logger.info("Opened for writing {}".format(report_json_path))
logger.click.echo("Opened for writing {}".format(report_json_path))
report_json.write(json.dumps(parser.report.to_report_json(), indent=4))

logger.info("json {} written out".format(report_markdown_path))
logger.info("gorule-13 first 10 messages: {}".format(
click.echo("json {} written out".format(report_markdown_path))
click.echo("gorule-13 first 10 messages: {}".format(
json.dumps(parser.report.to_report_json()["messages"].get("gorule-0000013", [])[:10], indent=4)))
logger.info("json current Stack:")
if logger.getEffectiveLevel() == logging.INFO:
traceback.print_stack()
# logger.info("json current Stack:")
# if logger.getEffectiveLevel() == logging.INFO:
# traceback.print_stack()

return [validated_gaf_path, filtered_associations.name]

Expand Down Expand Up @@ -460,6 +460,7 @@ def produce_gpi(dataset, target_dir, gaf_path, ontology_graph, gpad_gpi_output_v
# If the entity is not in the cache, add it and write it out
gpi_cache.add(entity)
gpiwriter.write_entity(entity)
print("Wrote gpi to disk: {}".format(gpi_path))
return gpi_path


Expand Down Expand Up @@ -652,7 +653,6 @@ def produce(ctx, group, metadata_dir, gpad, gpad_gpi_output_version, ttl, target
absolute_metadata = os.path.abspath(metadata_dir)

print("group", group)
print("dataset", )
group_metadata = metadata.dataset_metadata_file(absolute_metadata, group)
click.echo("Loading ontology: {}...".format(ontology))
ontology_graph = OntologyFactory().create(ontology, ignore_cache=True)
Expand Down Expand Up @@ -747,7 +747,9 @@ def produce(ctx, group, metadata_dir, gpad, gpad_gpi_output_version, ttl, target

click.echo("Producing GPI for use in creating GPADs...")
gpi = produce_gpi(dataset, absolute_target, valid_gaf, ontology_graph, gpad_gpi_output_version)
click.echo("GPI file produced first time...{}".format(gpi))
gpi_list.append(gpi)
click.echo("GPI list...{}".format(gpi_list))
click.echo("Executing 'make_gpads' in validate.produce with all the assembled GAF files...")
make_gpads(dataset, valid_gaf, products,
ontology_graph, noctua_gpad_src, paint_gaf_src,
Expand All @@ -763,19 +765,26 @@ def produce(ctx, group, metadata_dir, gpad, gpad_gpi_output_version, ttl, target
click.echo("Executing the isoform fixing step in validate.produce...")
# run the resulting gaf through one last parse and replace, to handle the isoforms
# see: https://github.com/geneontology/go-site/issues/2291
click.echo("path to end gaf _temp.gaf")
click.echo(os.path)

click.echo(os.path.split(end_gaf)[0])
temp_output_gaf_path = os.path.join(os.path.split(end_gaf)[0], "{}_temp.gaf".format(dataset))
click.echo("temp_output_gaf_path: {}".format(temp_output_gaf_path))
isoform_fixed_gaf = fix_pro_isoforms_in_gaf(end_gaf, matching_gpi_path, ontology_graph, temp_output_gaf_path)
click.echo("isoform_fixed_gaf: ".format(isoform_fixed_gaf))
click.echo("matching_gpi_path: {}".format(gpi))

isoform_fixed_gaf = fix_pro_isoforms_in_gaf(end_gaf, gpi, ontology_graph, temp_output_gaf_path)
click.echo("isoform_fixed_gaf: {}".format(isoform_fixed_gaf))

final_output_gaf_path = os.path.join(os.path.split(end_gaf)[0], "{}.gaf".format(dataset))

click.echo("Rename the temporary isoform fixed file to the final GAF...")
os.rename(temp_output_gaf_path, final_output_gaf_path)
click.echo("final_output_gaf_path: ".format(final_output_gaf_path))
click.echo("final_output_gaf_path: {}".format(final_output_gaf_path))

click.echo("Producing final GPI after all GAF corrections...")
final_gpi = produce_gpi(dataset, absolute_target, final_output_gaf_path, ontology_graph, gpad_gpi_output_version)
click.echo("final_gpi: {}".format(final_gpi))

click.echo("Creating ttl files...")
make_ttls(dataset, final_output_gaf_path, products, ontology_graph)
Expand All @@ -796,6 +805,9 @@ def fix_pro_isoforms_in_gaf(gaf_file_to_fix: str,
:return: The path to the fixed GAF file
"""
fixed_associations = []
print("gpi_file", gpi_file)
if gpi_file is None:
raise ValueError("GPI file is required to fix the GAF file.", gpi_file)
gpiparser = GpiParser(config=assocparser.AssocParserConfig(ontology=ontology_graph))
# Parse the GPI file, creating a map of identifiers to GPI entries
gpis = gpiparser.parse(gpi_file, None)
Expand Down Expand Up @@ -826,7 +838,7 @@ def fix_pro_isoforms_in_gaf(gaf_file_to_fix: str,
full_old_identifier = source_assoc.subject.id.namespace + ":" + source_assoc.subject.id.identity
old_namespace = source_assoc.subject.id.namespace
old_identity = source_assoc.subject.id.identity
# TODO: right now we get the FIRST encoded_by result -- this is what the original script from Chris did??
# TODO: right now we get the FIRST encoded_by result -- this is what the original script did?
if "MGI" == gpi_map[full_old_identifier].get("encoded_by")[0].split(":")[0]:
source_assoc.subject.id.namespace = gpi_map[full_old_identifier].get("encoded_by")[0].split(":")[0]
source_assoc.subject.id.identity = "MGI:" + gpi_map[full_old_identifier].get("encoded_by")[0].split(":")[2]
Expand Down
Loading

0 comments on commit ba554f5

Please sign in to comment.