Skip to content

Commit

Permalink
merge main and fix conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
sierra-moxon committed Jul 24, 2024
2 parents 8c98187 + df8f688 commit b76b665
Show file tree
Hide file tree
Showing 4 changed files with 4,733 additions and 4,720 deletions.
1 change: 1 addition & 0 deletions ontobio/io/entityparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ def line_as_entity_subject(self, line: str):
for entity in entity_dicts:
entity_types = []
if self.gpi_version() == "2.0":

entity_types = [association.Curie.from_str(t) for t in entity["type"]]
if any(c.is_error() for c in entity_types):
logger.error("Skipping `{}` due to malformed CURIE in entity type: `{}`".format(line, entity["type"]))
Expand Down
34 changes: 21 additions & 13 deletions ontobio/io/entitywriter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""
Classes for exporting entities.
So far only one implementation
"""
"""Classes for exporting entities."""
import re
from datetime import datetime

from ontobio.model.association import map_gp_type_label_to_curie

external_taxon = re.compile("taxon:([0-9]+)")
internal_taxon = re.compile("NCBITaxon:([0-9]+)")


def stringify(s):
if s is None:
Expand All @@ -13,8 +16,6 @@ def stringify(s):
else:
return s

external_taxon = re.compile("taxon:([0-9]+)")
internal_taxon = re.compile("NCBITaxon:([0-9]+)")

def normalize_taxon(taxon):
global internal_taxon
Expand Down Expand Up @@ -100,6 +101,8 @@ def __init__(self, file=None, version=None):
if self.file:
if self.version == "2.0":
self.file.write("!gpi-version: 2.0\n")
self.file.write("!date_generated: " + datetime.now().strftime("%Y-%m-%dT%H:%M") + "\n")
self.file.write("!generated_by: GO Central\n")
else:
self.file.write("!gpi-version: 1.2\n")

Expand Down Expand Up @@ -140,14 +143,19 @@ def write_entity(self, entity):
"""

taxon = entity.get("taxon").get("id")
if normalize_taxon(taxon).startswith("taxon:"):
taxon = taxon.replace("taxon:", "NCBITaxon:")

if self.version == "2.0":
vals = [
entity.get('id'), # DB_Object_ID
entity.get('label'), # DB_Object_symbol
entity.get('full_name'), # DB_Object_Name
entity.get('synonyms'), # DB_Object_Synonyms
entity.get('type'), # DB_Object_Type
normalize_taxon(entity.get("taxon").get("id")), # DB_Object_Taxon
# GPI spec says this is single valued, GpiParser returns list, so take the first element here.
str(map_gp_type_label_to_curie(entity.get('type')[0])), # DB_Object_Type to curie vs. label
taxon, # DB_Object_Taxon, normalized to NCBITaxon prefix
"", # Encoded_by
entity.get('parents'), # Parent_Protein
"", # Protein_Containing_Complex_Members
Expand All @@ -160,10 +168,10 @@ def write_entity(self, entity):
prefix, # DB
local_id, # DB_Object_ID
entity.get('label'), # DB_Object_Symbol
entity.get('full_name'), # DB_Object_Symbol
entity.get('synonyms'), # DB_Object_Name
entity.get('type'), # DB_Object_Synonyms
normalize_taxon(entity.get("taxon").get("id")), # taxon
entity.get('full_name'), # DB_Object_Full_Name
entity.get('synonyms'), # DB_Object_Synonyms
entity.get('type'), # DB_Object_Type
normalize_taxon(entity.get("taxon").get("id")), # taxon in gpi 1.2 was prefixed by `taxon:`
entity.get('parents'), # Parent_Object_ID
entity.get('xrefs'), # DB_Xref(s)
entity.get('properties') # Properties
Expand Down
Loading

0 comments on commit b76b665

Please sign in to comment.