Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add source column to merged context CSVs #52

Merged
merged 7 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/prefixmaps/data/bioregistry.csv
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ bioregistry,INO,http://purl.obolibrary.org/obo/INO_,canonical
bioregistry,insdc,http://identifiers.org/insdc/,canonical
bioregistry,insdc.cds,http://identifiers.org/ncbiprotein/,namespace_alias
bioregistry,insdc.gca,http://identifiers.org/assembly/,namespace_alias
bioregistry,insdc.gcf,http://identifiers.org/assembly/,namespace_alias
bioregistry,insdc.run,https://www.ebi.ac.uk/ena/browser/view/,canonical
bioregistry,insdc.sra,http://identifiers.org/insdc.sra/,canonical
bioregistry,intact,http://identifiers.org/intact/,canonical
Expand Down Expand Up @@ -1034,6 +1035,7 @@ bioregistry,NCBI_gi,https://www.ncbi.nlm.nih.gov/nucleotide/,namespace_alias
bioregistry,NCBI_taxid,http://purl.obolibrary.org/obo/NCBITaxon_,namespace_alias
bioregistry,NCBI_Taxon_ID,http://purl.obolibrary.org/obo/NCBITaxon_,namespace_alias
bioregistry,ncbibook,https://www.ncbi.nlm.nih.gov/books/,canonical
bioregistry,ncbidrs,http://identifiers.org/ncbidrs/,canonical
bioregistry,NCBIGene,http://identifiers.org/ncbigene/,canonical
bioregistry,ncbigi,https://www.ncbi.nlm.nih.gov/nucleotide/,namespace_alias
bioregistry,NCBIProtein,http://identifiers.org/ncbiprotein/,canonical
Expand Down
2 changes: 2 additions & 0 deletions src/prefixmaps/data/bioregistry.upper.csv
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ bioregistry,INO,http://purl.obolibrary.org/obo/INO_,canonical
bioregistry,INSDC,http://identifiers.org/insdc/,canonical
bioregistry,insdc.cds,http://identifiers.org/ncbiprotein/,namespace_alias
bioregistry,INSDC.GCA,http://identifiers.org/assembly/,namespace_alias
bioregistry,INSDC.GCF,http://identifiers.org/assembly/,namespace_alias
bioregistry,INSDC.RUN,https://www.ebi.ac.uk/ena/browser/view/,canonical
bioregistry,INSDC.SRA,http://identifiers.org/insdc.sra/,canonical
bioregistry,INTACT,http://identifiers.org/intact/,canonical
Expand Down Expand Up @@ -1034,6 +1035,7 @@ bioregistry,NCBI_GI,https://www.ncbi.nlm.nih.gov/nucleotide/,namespace_alias
bioregistry,NCBI_taxid,http://purl.obolibrary.org/obo/NCBITaxon_,namespace_alias
bioregistry,NCBI_Taxon_ID,http://purl.obolibrary.org/obo/NCBITaxon_,namespace_alias
bioregistry,NCBIBOOK,https://www.ncbi.nlm.nih.gov/books/,canonical
bioregistry,NCBIDRS,http://identifiers.org/ncbidrs/,canonical
bioregistry,NCBIGene,http://identifiers.org/ncbigene/,canonical
bioregistry,NCBIGI,https://www.ncbi.nlm.nih.gov/nucleotide/,namespace_alias
bioregistry,NCBIProtein,http://identifiers.org/ncbiprotein/,canonical
Expand Down
9,397 changes: 4,704 additions & 4,693 deletions src/prefixmaps/data/merged.csv

Large diffs are not rendered by default.

9,397 changes: 4,704 additions & 4,693 deletions src/prefixmaps/data/merged.oak.csv

Large diffs are not rendered by default.

13 changes: 11 additions & 2 deletions src/prefixmaps/data/prefixcc.csv
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ prefixcc,biol,http://purl.org/NET/biol/ns#,canonical
prefixcc,biolink,https://w3id.org/biolink/vocab/,namespace_alias
prefixcc,biopax,http://www.biopax.org/release/biopax-level3.owl#,canonical
prefixcc,biordf,http://purl.org/net/biordfmicroarray/ns#,canonical
prefixcc,bioschemas,https://bioschemas.org/,canonical
prefixcc,bioskos,http://eulersharp.sourceforge.net/2003/03swap/bioSKOSSchemes#,canonical
prefixcc,biotop,http://purl.org/biotop/biotop.owl#,canonical
prefixcc,biro,http://purl.org/spar/biro/,canonical
Expand Down Expand Up @@ -287,6 +288,7 @@ prefixcc,cdtype,http://purl.org/cld/cdtype/,canonical
prefixcc,centrifuge,http://purl.org/twc/vocab/centrifuge#,canonical
prefixcc,ceo,https://linkeddata.cultureelerfgoed.nl/def/ceo#,canonical
prefixcc,ceox,https://linkeddata.cultureelerfgoed.nl/def/ceox#,canonical
prefixcc,cercabib,https://cercabib.ub.edu/,canonical
prefixcc,cerealstoo,http://rdf.ag/o/cerealstoo#,canonical
prefixcc,cerif,http://spi-fm.uca.es/neologism/cerif#,canonical
prefixcc,cert,http://www.w3.org/ns/auth/cert#,canonical
Expand Down Expand Up @@ -773,6 +775,7 @@ prefixcc,evset,http://dsnotify.org/vocab/eventset/0.1/,canonical
prefixcc,ewg,http://ethoinformatics.org/,canonical
prefixcc,ex,http://example.org/,canonical
prefixcc,example,http://www.example.org/rdf#,canonical
prefixcc,exekg,https://raw.githubusercontent.com/nsai-uio/ExeKGOntology/main/ds_exeKGOntology.ttl#,canonical
prefixcc,exif,http://www.w3.org/2003/12/exif/ns#,canonical
prefixcc,exo,https://w3id.org/example#,canonical
prefixcc,experts,http://emmo.info/emmo/application/maeo/experts#,canonical
Expand Down Expand Up @@ -1407,6 +1410,7 @@ prefixcc,mi,http://www.marineinfo.org/ns/ontology#,canonical
prefixcc,mibc,http://marineinfo.org/ns/library/bibcodes#,canonical
prefixcc,mibt,https://marineinfo.org/ns/library/bibtypes#,canonical
prefixcc,mico,http://www.mico-project.eu/ns/platform/1.0/schema#,canonical
prefixcc,mifesto,https://w3id.org/mifesto#,canonical
prefixcc,mil,http://rdf.muninn-project.org/ontologies/military#,canonical
prefixcc,mime,https://www.iana.org/assignments/media-types/,canonical
prefixcc,mindat,https://www.mindat.org/,canonical
Expand Down Expand Up @@ -1489,8 +1493,8 @@ prefixcc,ncal,http://www.semanticdesktop.org/ontologies/2007/04/02/ncal#,canonic
prefixcc,ncbi,https://www.ncbi.nlm.nih.gov/,canonical
prefixcc,ncbigene,http://identifiers.org/ncbigene/,canonical
prefixcc,ncbitaxon,http://purl.org/obo/owl/NCBITaxon#,canonical
prefixcc,ncicp,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#,namespace_alias
prefixcc,ncit,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#,canonical
prefixcc,ncicp,http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#,canonical
prefixcc,ncit,https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=,canonical
prefixcc,nco,http://www.semanticdesktop.org/ontologies/2007/03/22/nco#,canonical
prefixcc,ndl,http://schemas.ogf.org/nml/2013/05/base#,canonical
prefixcc,ndnp,http://chroniclingamerica.loc.gov/terms#,canonical
Expand Down Expand Up @@ -1779,6 +1783,7 @@ prefixcc,pattern,http://www.essepuntato.it/2008/12/pattern#,canonical
prefixcc,pav,http://purl.org/pav/,canonical
prefixcc,pay,http://reference.data.gov.uk/def/payment#,namespace_alias
prefixcc,payment,http://reference.data.gov.uk/def/payment#,canonical
prefixcc,pbac,https://w3id.org/pbac#,canonical
prefixcc,pbo,http://purl.org/ontology/pbo/core#,canonical
prefixcc,pbody,http://reference.data.gov.uk/def/public-body/,canonical
prefixcc,pc,http://purl.org/procurement/public-contracts#,canonical
Expand Down Expand Up @@ -1955,6 +1960,7 @@ prefixcc,qud,http://qudt.org/1.1/schema/qudt#,canonical
prefixcc,qudt,http://qudt.org/schema/qudt/,canonical
prefixcc,quest,https://rb.gy/ntg7l/,canonical
prefixcc,quid,https://w3id.org/quid/,canonical
prefixcc,quit,http://quit.aksw.org/vocab/,canonical
prefixcc,quran,http://khalidaloufi.sa/quran#,canonical
prefixcc,quty,http://www.telegraphis.net/ontology/measurement/quantity#,canonical
prefixcc,qvoc,http://mlode.nlp2rdf.org/quranvocab#,canonical
Expand Down Expand Up @@ -2259,6 +2265,7 @@ prefixcc,security,http://securitytoolbox.appspot.com/securityMain#,canonical
prefixcc,sede,http://eventography.org/sede/0.1/,canonical
prefixcc,seeds,http://deductions.github.io/seeds.owl.ttl#,canonical
prefixcc,sem,http://semanticweb.cs.vu.nl/2009/11/sem/,canonical
prefixcc,semapv,https://w3id.org/semapv/vocab/,canonical
prefixcc,semio,http://www.lingvoj.org/semio#,canonical
prefixcc,semiot,http://w3id.org/semiot/ontologies/semiot#,canonical
prefixcc,semsur,http://purl.org/SemSur/,canonical
Expand Down Expand Up @@ -2601,6 +2608,7 @@ prefixcc,states,http://www.w3.org/2005/07/aaa#,canonical
prefixcc,static,http://vocab-ld.org/vocab/static-ld#,canonical
prefixcc,stats,http://purl.org/rdfstats/stats#,canonical
prefixcc,status,http://www.w3.org/2003/06/sw-vocab-status/ns#,namespace_alias
prefixcc,stax,https://w3id.org/stax/ontology#,canonical
prefixcc,steel,http://ontorule-project.eu/resources/steel-30#,canonical
prefixcc,stencila,http://schema.stenci.la/,canonical
prefixcc,step,http://purl.org/net/step#,canonical
Expand Down Expand Up @@ -2937,6 +2945,7 @@ prefixcc,wotc,http://purl.org/wot-catalogue#,canonical
prefixcc,wotsec,https://www.w3.org/2019/wot/security#,canonical
prefixcc,wp,http://vocabularies.wikipathways.org/wp#,canonical
prefixcc,wro,http://purl.org/net/wf4ever/ro#,canonical
prefixcc,wrroc,https://w3id.org/ro/terms/workflow-run#,canonical
prefixcc,ws,http://www.w3.org/ns/pim/space#,namespace_alias
prefixcc,wsc,http://www.openk.org/wscaim.owl#,canonical
prefixcc,wscaim,http://www.openk.org/wscaim.owl#,namespace_alias
Expand Down
11 changes: 11 additions & 0 deletions src/prefixmaps/data/w3id.csv
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ w3id,cerrotti,https://w3id.org/cerrotti/,canonical
w3id,certainty_nanopubs,https://w3id.org/certainty_nanopubs/,canonical
w3id,cevo,https://w3id.org/cevo/,canonical
w3id,chainpoint,https://w3id.org/chainpoint/,canonical
w3id,chalkgrp,https://w3id.org/chalkgrp/,canonical
w3id,character-computing,https://w3id.org/character-computing/,canonical
w3id,charity-organization,https://w3id.org/charity-organization/,canonical
w3id,charta77,https://w3id.org/charta77/,canonical
Expand All @@ -161,6 +162,7 @@ w3id,cld,https://w3id.org/cld/,canonical
w3id,clinga,https://w3id.org/clinga/,canonical
w3id,clipc,https://w3id.org/clipc/,canonical
w3id,clodg,https://w3id.org/clodg/,canonical
w3id,CMECS,https://w3id.org/CMECS/,canonical
w3id,cmip6dr,https://w3id.org/cmip6dr/,canonical
w3id,cntf,https://w3id.org/cntf/,canonical
w3id,cocoon,https://w3id.org/cocoon/,canonical
Expand Down Expand Up @@ -312,6 +314,7 @@ w3id,ecodigit,https://w3id.org/ecodigit/,canonical
w3id,ecsel-dr,https://w3id.org/ecsel-dr/,canonical
w3id,ecsel-dr-prc-PMV,https://w3id.org/ecsel-dr-prc-PMV/,canonical
w3id,ecsel-dr-sn-SSP,https://w3id.org/ecsel-dr-sn-SSP/,canonical
w3id,EDH_Ctagged,https://w3id.org/EDH_Ctagged/,canonical
w3id,edu-sharing,https://w3id.org/edu-sharing/,canonical
w3id,education,https://w3id.org/education/,canonical
w3id,edukg,https://w3id.org/edukg/,canonical
Expand Down Expand Up @@ -631,6 +634,7 @@ w3id,mgkb,https://w3id.org/mgkb/,canonical
w3id,mica,https://w3id.org/mica/,canonical
w3id,midas-catalog,https://w3id.org/midas-catalog/,canonical
w3id,midas-metadata,https://w3id.org/midas-metadata/,canonical
w3id,mifesto,https://w3id.org/mifesto/,canonical
w3id,minerva,https://w3id.org/minerva/,canonical
w3id,mint,https://w3id.org/mint/,canonical
w3id,minte,https://w3id.org/minte/,canonical
Expand Down Expand Up @@ -787,10 +791,12 @@ w3id,paradise,https://w3id.org/paradise/,canonical
w3id,patent_ontologies,https://w3id.org/patent_ontologies/,canonical
w3id,payments,https://w3id.org/payments/,canonical
w3id,payswarm,https://w3id.org/payswarm/,canonical
w3id,pbac,https://w3id.org/pbac/,canonical
w3id,pbs,https://w3id.org/pbs/,canonical
w3id,pc,https://w3id.org/pc/,canonical
w3id,peco,https://w3id.org/peco/,canonical
w3id,pedigree,https://w3id.org/pedigree/,canonical
w3id,peh,https://w3id.org/peh/,canonical
w3id,people,https://w3id.org/people/,canonical
w3id,pep,https://w3id.org/pep/,canonical
w3id,per,https://w3id.org/per/,canonical
Expand Down Expand Up @@ -834,6 +840,7 @@ w3id,qb4solap,https://w3id.org/qb4solap/,canonical
w3id,quality,https://w3id.org/quality/,canonical
w3id,qudt,https://w3id.org/qudt/,canonical
w3id,quid,https://w3id.org/quid/,canonical
w3id,r74n,https://w3id.org/r74n/,canonical
w3id,rai,https://w3id.org/rai/,canonical
w3id,rail,https://w3id.org/rail/,canonical
w3id,rains,https://w3id.org/rains/,canonical
Expand Down Expand Up @@ -929,6 +936,7 @@ w3id,seneca,https://w3id.org/seneca/,canonical
w3id,sense,https://w3id.org/sense/,canonical
w3id,sentitrack,https://w3id.org/sentitrack/,canonical
w3id,seo,https://w3id.org/seo/,canonical
w3id,seovoc,https://w3id.org/seovoc/,canonical
w3id,sepses,https://w3id.org/sepses/,canonical
w3id,serdif,https://w3id.org/serdif/,canonical
w3id,sfs-ontology,https://w3id.org/sfs-ontology/,canonical
Expand Down Expand Up @@ -975,6 +983,7 @@ w3id,SpOTy,https://w3id.org/SpOTy/,canonical
w3id,sqo,https://w3id.org/sqo/,canonical
w3id,squap,https://w3id.org/squap/,canonical
w3id,squirrel,https://w3id.org/squirrel/,canonical
w3id,sri,https://w3id.org/sri/,canonical
w3id,sri-lanka,https://w3id.org/sri-lanka/,canonical
w3id,srmo,https://w3id.org/srmo/,canonical
w3id,srr,https://w3id.org/srr/,canonical
Expand All @@ -984,6 +993,7 @@ w3id,sssom,https://w3id.org/sssom/,canonical
w3id,stahl,https://w3id.org/stahl/,canonical
w3id,stargate-h2020,https://w3id.org/stargate-h2020/,canonical
w3id,stav,https://w3id.org/stav/,canonical
w3id,stax,https://w3id.org/stax/,canonical
w3id,steel,https://w3id.org/steel/,canonical
w3id,stirdata,https://w3id.org/stirdata/,canonical
w3id,stlab,https://w3id.org/stlab/,canonical
Expand Down Expand Up @@ -1033,6 +1043,7 @@ w3id,tso,https://w3id.org/tso/,canonical
w3id,tsso,https://w3id.org/tsso/,canonical
w3id,ttla,https://w3id.org/ttla/,canonical
w3id,TTRpg,https://w3id.org/TTRpg/,canonical
w3id,tvstationjp,https://w3id.org/tvstationjp/,canonical
w3id,twins,https://w3id.org/twins/,canonical
w3id,uco,https://w3id.org/uco/,canonical
w3id,ufo,https://w3id.org/ufo/,canonical
Expand Down
10 changes: 9 additions & 1 deletion src/prefixmaps/datamodel/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ class PrefixExpansion:
status: StatusType
"""Indicates whether the expansion is canonical, a prefix alias, a namespace alias, or both."""

expansion_source: Optional[str] = None
"""Indicates the source of the prefix expansion."""

def canonical(self) -> bool:
"""
True if this is the canonical mapping in both directions.
Expand Down Expand Up @@ -153,14 +156,15 @@ def combine(self, context: "Context"):
:return:
"""
for pe in context.prefix_expansions:
self.add_prefix(pe.prefix, pe.namespace, pe.status)
self.add_prefix(pe.prefix, pe.namespace, pe.status, expansion_source=context.name)

def add_prefix(
self,
prefix: PREFIX,
namespace: NAMESPACE,
status: StatusType = StatusType.canonical,
preferred: bool = False,
expansion_source: Optional[str] = None,
):
"""
Adds a prefix expansion to this context.
Expand All @@ -176,6 +180,9 @@ def add_prefix(
:param namespace: namespace to be added
:param status: the status of the prefix being added
:param preferred:
:param expansion_source: An optional annotation to be used when merging contexts together.
The source will keep track of the original context that a given prefix
expansion came from. This is used in :meth:`Context.combine`.
:return:
"""
# TODO: check status
Expand Down Expand Up @@ -203,6 +210,7 @@ def add_prefix(
prefix=prefix,
namespace=namespace,
status=status,
expansion_source=expansion_source,
)
)

Expand Down
2 changes: 1 addition & 1 deletion src/prefixmaps/ingest/etl_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def run_etl(output_directory: Union[str, Path]) -> None:
# Write all contexts
for name, context in contexts.items():
with output_directory.joinpath(f"{name}.csv").open("w", encoding="UTF-8") as file:
context_to_file(context, file)
context_to_file(context, file, include_expansion_source=context.name in COMBINED)


@click.command
Expand Down
14 changes: 12 additions & 2 deletions src/prefixmaps/io/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,27 @@ def _key(pe: PrefixExpansion):
return pe.prefix.casefold(), STATUS_TYPE_ORDER[pe.status]


def context_to_file(context: Context, file: TextIO) -> None:
def context_to_file(
context: Context, file: TextIO, *, include_expansion_source: bool = False
) -> None:
"""
Writes a context to a file

:param context:
:param file:
:param include_expansion_source: If true, include a "source" column. This is useful for
writing merged contexts since it says the highest priority simple context
from which the row corresponding to a :class:`PrefixExpansion` came.
:return:
"""
writer = DictWriter(file, fieldnames=["context", "prefix", "namespace", "status"])
field_names = ["context", "prefix", "namespace", "status"]
if include_expansion_source:
field_names.append("expansion_source")
writer = DictWriter(file, fieldnames=field_names)
writer.writeheader()
for pe in sorted(context.prefix_expansions, key=_key):
row = vars(pe)
row["status"] = pe.status.value
if not include_expansion_source:
row.pop("expansion_source", None)
writer.writerow(row)
Loading