From bd9c7a793252fe8f8c228e3e604d4f8978ea3217 Mon Sep 17 00:00:00 2001 From: dustine32 Date: Mon, 20 Jul 2020 17:31:56 -0700 Subject: [PATCH] Grouping annotations by entity GPI parent ID; issue #83 --- bin/validate.py | 2 +- ontobio/rdfgen/gocamgen/gocam_builder.py | 23 +++++++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/bin/validate.py b/bin/validate.py index 30d0e824..b73131b3 100644 --- a/bin/validate.py +++ b/bin/validate.py @@ -558,7 +558,7 @@ def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology): unzip(gpad_path, unzipped) gpad_path = unzipped # NOTE: Validation on GPAD not included here since it's currently baked into produce() above. - extractor = AssocExtractor(gpad_path) + extractor = AssocExtractor(gpad_path, gpi_path) assocs_by_gene = extractor.group_assocs() absolute_target = os.path.abspath(target) diff --git a/ontobio/rdfgen/gocamgen/gocam_builder.py b/ontobio/rdfgen/gocamgen/gocam_builder.py index 630d520c..2790460e 100644 --- a/ontobio/rdfgen/gocamgen/gocam_builder.py +++ b/ontobio/rdfgen/gocamgen/gocam_builder.py @@ -158,7 +158,7 @@ def parse_gpi(gpi_file): class AssocExtractor: - def __init__(self, gpad_file, parser_config: AssocParserConfig = None): + def __init__(self, gpad_file, gpi_file, parser_config: AssocParserConfig = None): if parser_config: gpad_parser = GpadParser(config=parser_config) else: @@ -173,13 +173,15 @@ def __init__(self, gpad_file, parser_config: AssocParserConfig = None): length=lines) as associations: self.assocs = self.extract_properties_from_assocs(associations) + self.entity_parents = self.parse_gpi_parents(gpi_file) + def group_assocs(self): assocs_by_gene = {} for a in self.assocs: - # validation function - # if not self.assoc_filter.validate_line(a): - # continue subject_id = a["subject"]["id"] + # If entity has parent, assign to parent entity model + if subject_id in self.entity_parents: + subject_id = self.entity_parents[subject_id] if subject_id in assocs_by_gene: assocs_by_gene[subject_id].append(a) else: @@ -193,6 +195,19 @@ def extract_properties_from_assocs(assocs): new_assoc_list.append(extract_properties(a)) return new_assoc_list + @staticmethod + def parse_gpi_parents(gpi_file): + if gpi_file is None: + return None + parser = GpiParser() + entity_parents = {} + entities = parser.parse(gpi_file) + for entity in entities: + entity_id = entity['id'] + if len(entity['parents']) > 0: + entity_parents[entity_id] = entity['parents'][0] # There may only be one + return entity_parents + def unzip(filepath): input_file = gzip.GzipFile(filepath, "rb")