From 6eeeaf2a29766f46ef4d70b4197f78276da172b0 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Thu, 2 Nov 2023 11:43:10 +0100 Subject: [PATCH] Refactor converter generation code (#974) This PR gets rid of code that focuses on lists of `curies.Record` objects and instead works directly with `curies.Converter` objects. Along the way, this also identified issues with the data integrity on MIRIAM, N2T, and Prefix Commons with respect to the TAIR resources (`tair.gene` and `tair.protein`) which all used non-specific, overlapping URLs. Therefore, these needed to get cleaned out before being import. Why do this? If we work directly with converters, we can make use of the CURIE prefix reconciliation tooling to more cleanly refactor the Bioregistry to Converter pipeline (which is causing issues when adding prefix casing variants in a related PR #969) --- exports/contexts/bioregistry.epm.json | 2 - exports/contexts/bioregistry.rpm.json | 2 - exports/contexts/obo.epm.json | 2 - src/bioregistry/__init__.py | 1 - src/bioregistry/data/bioregistry.json | 18 ++---- .../data/external/miriam/processed.json | 6 +- .../data/external/n2t/processed.json | 6 +- .../external/prefixcommons/processed.json | 6 +- src/bioregistry/external/miriam.py | 13 +++- src/bioregistry/external/n2t.py | 15 ++++- src/bioregistry/external/prefixcommons.py | 17 ++++-- src/bioregistry/record_accumulator.py | 28 +++++++++ src/bioregistry/resource_manager.py | 52 +++++++--------- src/bioregistry/uri_format.py | 61 +------------------ tests/test_data.py | 4 +- tests/test_indra.py | 2 + tests/test_manager.py | 4 +- 17 files changed, 103 insertions(+), 136 deletions(-) diff --git a/exports/contexts/bioregistry.epm.json b/exports/contexts/bioregistry.epm.json index 67725c785..af88fd3e7 100644 --- a/exports/contexts/bioregistry.epm.json +++ b/exports/contexts/bioregistry.epm.json @@ -26168,14 +26168,12 @@ "uri_prefix": "http://arabidopsis.org/servlets/TairObject?accession=AASequence:", "uri_prefix_synonyms": [ "TAIR.PROTEIN:", - "http://arabidopsis.org/servlets/TairObject?accession=", "http://bio2rdf.org/tair.protein:", "http://bioregistry.io/tair.protein:", "http://identifiers.org/tair.protein/", "http://identifiers.org/tair.protein/AASequence:", "http://identifiers.org/tair.protein:", "http://n2t.net/tair.protein:", - "https://arabidopsis.org/servlets/TairObject?accession=", "https://arabidopsis.org/servlets/TairObject?accession=AASequence:", "https://bio2rdf.org/tair.protein:", "https://bioregistry.io/tair.protein:", diff --git a/exports/contexts/bioregistry.rpm.json b/exports/contexts/bioregistry.rpm.json index eb98d89af..39c8295f4 100644 --- a/exports/contexts/bioregistry.rpm.json +++ b/exports/contexts/bioregistry.rpm.json @@ -3167,7 +3167,6 @@ "http://aps.unmc.edu/AP/database/query_output.php?ID=": "apd", "http://ar5iv.org/abs/": "arxiv", "http://arabidopsis.info/StockInfo?NASC_id=": "nasc", - "http://arabidopsis.org/servlets/TairObject?accession=": "tair.protein", "http://arabidopsis.org/servlets/TairObject?accession=AASequence:": "tair.protein", "http://arabidopsis.org/servlets/TairObject?accession=Gene:": "tair.gene", "http://arabidopsis.org/servlets/TairObject?type=locus&name=": "tair.locus", @@ -10733,7 +10732,6 @@ "https://aps.unmc.edu/AP/database/query_output.php?ID=": "apd", "https://ar5iv.org/abs/": "arxiv", "https://arabidopsis.info/StockInfo?NASC_id=": "nasc", - "https://arabidopsis.org/servlets/TairObject?accession=": "tair.protein", "https://arabidopsis.org/servlets/TairObject?accession=AASequence:": "tair.protein", "https://arabidopsis.org/servlets/TairObject?accession=Gene:": "tair.gene", "https://arabidopsis.org/servlets/TairObject?type=locus&name=": "tair.locus", diff --git a/exports/contexts/obo.epm.json b/exports/contexts/obo.epm.json index 9d069465b..2f6a5b0ec 100644 --- a/exports/contexts/obo.epm.json +++ b/exports/contexts/obo.epm.json @@ -22771,14 +22771,12 @@ "prefix": "tair.protein", "uri_prefix": "http://arabidopsis.org/servlets/TairObject?accession=AASequence:", "uri_prefix_synonyms": [ - "http://arabidopsis.org/servlets/TairObject?accession=", "http://bio2rdf.org/tair.protein:", "http://bioregistry.io/tair.protein:", "http://identifiers.org/tair.protein/", "http://identifiers.org/tair.protein/AASequence:", "http://identifiers.org/tair.protein:", "http://n2t.net/tair.protein:", - "https://arabidopsis.org/servlets/TairObject?accession=", "https://arabidopsis.org/servlets/TairObject?accession=AASequence:", "https://bio2rdf.org/tair.protein:", "https://bioregistry.io/tair.protein:", diff --git a/src/bioregistry/__init__.py b/src/bioregistry/__init__.py index 8584eee3a..b567f940f 100644 --- a/src/bioregistry/__init__.py +++ b/src/bioregistry/__init__.py @@ -127,7 +127,6 @@ write_registry, ) from .uri_format import ( # noqa:F401 - get_extended_prefix_map, get_pattern_map, get_prefix_map, get_uri_format, diff --git a/src/bioregistry/data/bioregistry.json b/src/bioregistry/data/bioregistry.json index a03fc9360..e0b9e0691 100644 --- a/src/bioregistry/data/bioregistry.json +++ b/src/bioregistry/data/bioregistry.json @@ -95986,8 +95986,7 @@ "namespaceEmbeddedInLui": false, "pattern": "^Gene:\\d{7}$", "prefix": "tair.gene", - "sampleId": "Gene:2200934", - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + "sampleId": "Gene:2200934" }, "n2t": { "description": "The Arabidopsis Information Resource (TAIR) maintains a database of genetic and molecular biology data for the model higher plant Arabidopsis thaliana. This is the reference gene model for a given locus.", @@ -95996,8 +95995,7 @@ "name": "The Arabidopsis Information Resource (TAIR) Gene", "namespaceEmbeddedInLui": false, "pattern": "^Gene:\\d{7}$", - "prefix": "tair.gene", - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + "prefix": "tair.gene" }, "part_of": "tair", "pattern": "^\\d{7}$", @@ -96012,8 +96010,7 @@ "prefix": "tair.gene", "pubmed_ids": [ "12444417" - ], - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + ] }, "publications": [ { @@ -96133,8 +96130,7 @@ "namespaceEmbeddedInLui": false, "pattern": "^AASequence:\\d{10}$", "prefix": "tair.protein", - "sampleId": "AASequence:1009107926", - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + "sampleId": "AASequence:1009107926" }, "n2t": { "description": "The Arabidopsis Information Resource (TAIR) maintains a database of genetic and molecular biology data for the model higher plant Arabidopsis thaliana. This provides protein information for a given gene model and provides links to other sources such as UniProtKB and GenPept", @@ -96143,8 +96139,7 @@ "name": "The Arabidopsis Information Resource (TAIR) Protein", "namespaceEmbeddedInLui": false, "pattern": "^AASequence:\\d{10}$", - "prefix": "tair.protein", - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + "prefix": "tair.protein" }, "pattern": "^\\d{10}$", "prefixcommons": { @@ -96158,8 +96153,7 @@ "prefix": "tair.protein", "pubmed_ids": [ "12444417" - ], - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + ] }, "publications": [ { diff --git a/src/bioregistry/data/external/miriam/processed.json b/src/bioregistry/data/external/miriam/processed.json index 3ac8b8104..706afc828 100644 --- a/src/bioregistry/data/external/miriam/processed.json +++ b/src/bioregistry/data/external/miriam/processed.json @@ -9440,8 +9440,7 @@ "namespaceEmbeddedInLui": false, "pattern": "^Gene:\\d{7}$", "prefix": "tair.gene", - "sampleId": "Gene:2200934", - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + "sampleId": "Gene:2200934" }, "tair.locus": { "deprecated": false, @@ -9476,8 +9475,7 @@ "namespaceEmbeddedInLui": false, "pattern": "^AASequence:\\d{10}$", "prefix": "tair.protein", - "sampleId": "AASequence:1009107926", - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + "sampleId": "AASequence:1009107926" }, "tarbase": { "deprecated": false, diff --git a/src/bioregistry/data/external/n2t/processed.json b/src/bioregistry/data/external/n2t/processed.json index 7e2af9d0f..69de9ba71 100644 --- a/src/bioregistry/data/external/n2t/processed.json +++ b/src/bioregistry/data/external/n2t/processed.json @@ -5460,8 +5460,7 @@ "homepage": "http://arabidopsis.org/index.jsp", "name": "The Arabidopsis Information Resource (TAIR) Gene", "namespaceEmbeddedInLui": false, - "pattern": "^Gene:\\d{7}$", - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + "pattern": "^Gene:\\d{7}$" }, "tair.locus": { "description": "The Arabidopsis Information Resource (TAIR) maintains a database of genetic and molecular biology data for the model higher plant Arabidopsis thaliana. The name of a Locus is unique and used by TAIR, TIGR, and MIPS.", @@ -5478,8 +5477,7 @@ "homepage": "http://arabidopsis.org/index.jsp", "name": "The Arabidopsis Information Resource (TAIR) Protein", "namespaceEmbeddedInLui": false, - "pattern": "^AASequence:\\d{10}$", - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + "pattern": "^AASequence:\\d{10}$" }, "tarbase": { "description": "TarBase stores microRNA (miRNA) information for miRNA–gene interactions, as well as miRNA- and gene-related facts to information specific to the interaction and the experimental validation methodologies used.", diff --git a/src/bioregistry/data/external/prefixcommons/processed.json b/src/bioregistry/data/external/prefixcommons/processed.json index c4cabf3b2..08df70073 100644 --- a/src/bioregistry/data/external/prefixcommons/processed.json +++ b/src/bioregistry/data/external/prefixcommons/processed.json @@ -10795,8 +10795,7 @@ "prefix": "tair.gene", "pubmed_ids": [ "12444417" - ], - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + ] }, "tair.locus": { "description": "The locus name", @@ -10823,8 +10822,7 @@ "prefix": "tair.protein", "pubmed_ids": [ "12444417" - ], - "uri_format": "http://arabidopsis.org/servlets/TairObject?accession=$1" + ] }, "tao": { "bioportal": "1110", diff --git a/src/bioregistry/external/miriam.py b/src/bioregistry/external/miriam.py index d03861b97..10b191c5d 100644 --- a/src/bioregistry/external/miriam.py +++ b/src/bioregistry/external/miriam.py @@ -27,6 +27,9 @@ "4503", "6vts", } +SKIP_URI_FORMATS = { + "http://arabidopsis.org/servlets/TairObject?accession=$1", +} def get_miriam(force_download: bool = False, force_process: bool = False): @@ -91,7 +94,8 @@ def _process(record): else: primary, *rest = resources rv["homepage"] = primary["homepage"] - rv[URI_FORMAT_KEY] = primary[URI_FORMAT_KEY] + if URI_FORMAT_KEY in primary: + rv[URI_FORMAT_KEY] = primary[URI_FORMAT_KEY] extras = [] for provider in rest: @@ -113,14 +117,17 @@ def _process(record): def _preprocess_resource(resource): - return { + rv = { "official": resource["official"], "homepage": resource["resourceHomeUrl"], "code": resource["providerCode"], - URI_FORMAT_KEY: resource["urlPattern"].replace("{$id}", "$1"), "name": resource["name"], "description": resource["description"], } + uri_format = resource["urlPattern"].replace("{$id}", "$1") + if uri_format not in SKIP_URI_FORMATS: + rv[URI_FORMAT_KEY] = uri_format + return rv @click.command() diff --git a/src/bioregistry/external/n2t.py b/src/bioregistry/external/n2t.py index e06745bab..844e91dda 100644 --- a/src/bioregistry/external/n2t.py +++ b/src/bioregistry/external/n2t.py @@ -26,6 +26,9 @@ "merops": "issue with miriam having duplicate prefixes for this resource", # FIXME "hgnc.family": "issue with miriam having duplicate prefixes for this resource", # FIXME } +SKIP_URI_FORMATS = { + "http://arabidopsis.org/servlets/TairObject?accession=$1", +} def get_n2t(force_download: bool = False): @@ -53,7 +56,7 @@ def get_n2t(force_download: bool = False): def _process(record): rv = { "name": record.get("name"), - URI_FORMAT_KEY: record["redirect"].replace("$id", "$1") if "redirect" in record else None, + URI_FORMAT_KEY: _get_uri_format(record), "description": record.get("description"), "homepage": record.get("more"), "pattern": record.get("pattern"), @@ -63,6 +66,16 @@ def _process(record): return {k: v for k, v in rv.items() if v is not None} +def _get_uri_format(record): + raw_redirect = record.get("redirect") + if raw_redirect is None: + return None + uri_format = raw_redirect.replace("$id", "$1") + if uri_format in SKIP_URI_FORMATS: + return None + return uri_format + + @click.command() def main(): """Reload the N2T data.""" diff --git a/src/bioregistry/external/prefixcommons.py b/src/bioregistry/external/prefixcommons.py index cf6912294..76ee5426d 100644 --- a/src/bioregistry/external/prefixcommons.py +++ b/src/bioregistry/external/prefixcommons.py @@ -85,6 +85,11 @@ } #: These contain synonyms with mismatches DISCARD_SYNONYMS = {"biogrid", "cath", "zfa"} +SKIP_URI_FORMATS = { + "http://purl.obolibrary.org/obo/$1", + "http://www.ebi.ac.uk/ontology-lookup/?termId=$1", + "http://arabidopsis.org/servlets/TairObject?accession=$1", +} def get_prefixcommons(force_download: bool = False, force_process: bool = False): @@ -147,10 +152,7 @@ def _process_row(line: str): uri_format = rv.pop("uri_format", None) if uri_format: uri_format = uri_format.replace("$id", "$1").replace("[?id]", "$1").replace("$d", "$1") - if uri_format not in { - "http://purl.obolibrary.org/obo/$1", - "http://www.ebi.ac.uk/ontology-lookup/?termId=$1", - }: + if uri_format not in SKIP_URI_FORMATS: rv["uri_format"] = uri_format uri_rdf_formats = _get_uri_formats(rv, "rdf_uri_prefix") @@ -191,9 +193,12 @@ def _get_uri_formats(rv, key) -> List[str]: continue if "$1" in uri_format or "[?id]" in uri_format: # FIXME check if these come at the end continue - rv.append(f"{uri_format}$1") + uri_format = f"{uri_format}$1" + if uri_format in SKIP_URI_FORMATS: + continue + rv.append(uri_format) return rv if __name__ == "__main__": - print(len(get_prefixcommons(force_process=True))) # noqa:T201 + print(len(get_prefixcommons(force_process=True, force_download=True))) # noqa:T201 diff --git a/src/bioregistry/record_accumulator.py b/src/bioregistry/record_accumulator.py index 904e9d448..c39292538 100644 --- a/src/bioregistry/record_accumulator.py +++ b/src/bioregistry/record_accumulator.py @@ -18,9 +18,14 @@ ) import curies +from curies import Converter from bioregistry import Resource +__all__ = [ + "get_converter", +] + logger = logging.getLogger(__name__) prefix_blacklist = {"bgee.gene"} uri_prefix_blacklist = { @@ -33,6 +38,7 @@ "http://www.ncbi.nlm.nih.gov/nuccore/", "https://www.ebi.ac.uk/ena/data/view/", "http://www.ebi.ac.uk/ena/data/view/", + "http://arabidopsis.org/servlets/TairObject?accession=", } prefix_resource_blacklist = { ("orphanet", "http://www.orpha.net/ORDO/Orphanet_"), # biocontext is wrong @@ -103,6 +109,28 @@ def _iterate_prefix_prefix(resource: Resource, *extras: str): # (e.g., uniprot.isoform and uniprot) +def get_converter( + resources: List[Resource], + prefix_priority: Optional[Sequence[str]] = None, + uri_prefix_priority: Optional[Sequence[str]] = None, + include_prefixes: bool = False, + strict: bool = False, + blacklist: Optional[Collection[str]] = None, + remapping: Optional[Mapping[str, str]] = None, +) -> Converter: + """Generate a converter from resources.""" + records = get_records( + resources, + prefix_priority=prefix_priority, + uri_prefix_priority=uri_prefix_priority, + include_prefixes=include_prefixes, + strict=strict, + blacklist=blacklist, + remapping=remapping, + ) + return curies.Converter(records) + + def get_records( # noqa: C901 resources: List[Resource], prefix_priority: Optional[Sequence[str]] = None, diff --git a/src/bioregistry/resource_manager.py b/src/bioregistry/resource_manager.py index 6c89d4bfc..279fc8408 100644 --- a/src/bioregistry/resource_manager.py +++ b/src/bioregistry/resource_manager.py @@ -4,6 +4,7 @@ import logging import typing +import warnings from collections import Counter, defaultdict from functools import lru_cache from pathlib import Path @@ -190,7 +191,7 @@ def __init__( def converter(self) -> curies.Converter: """Get the default converter.""" if self._converter is None: - self._converter = curies.Converter(records=self.get_curies_records()) + self._converter = self.get_converter() return self._converter def write_registry(self): @@ -631,11 +632,7 @@ def _iter_pattern_map( for synonym in resource.get_synonyms(): yield synonym, pattern - def get_converter(self, **kwargs) -> curies.Converter: - """Get a converter from this manager.""" - return curies.Converter(records=self.get_curies_records(**kwargs)) - - def get_curies_records( + def get_converter( self, *, prefix_priority: Optional[Sequence[str]] = None, @@ -644,8 +641,8 @@ def get_curies_records( strict: bool = False, remapping: Optional[Mapping[str, str]] = None, blacklist: Optional[typing.Collection[str]] = None, - ) -> List[curies.Record]: - """Get a list of records for all resources in this manager. + ) -> curies.Converter: + """Get a converter from this manager. :param prefix_priority: The order of metaprefixes OR "preferred" for choosing a primary prefix @@ -665,14 +662,14 @@ def get_curies_records( :returns: A list of records for :class:`curies.Converter` """ - from .record_accumulator import get_records + from .record_accumulator import get_converter # first step - filter to resources that have *anything* for a URI prefix # TODO maybe better to filter on URI format string, since bioregistry can always provide a URI prefix resources = [ resource for _, resource in sorted(self.registry.items()) if resource.get_uri_prefix() ] - return get_records( + converter = get_converter( resources, prefix_priority=prefix_priority, uri_prefix_priority=uri_prefix_priority, @@ -681,6 +678,12 @@ def get_curies_records( blacklist=blacklist, remapping=remapping, ) + return converter + + def get_curies_records(self, **kwargs) -> List[curies.Record]: + """Get a list of records for all resources in this manager.""" + warnings.warn("use Manager.get_converter().records", DeprecationWarning) + return self.get_converter(**kwargs).records def get_reverse_prefix_map( self, include_prefixes: bool = False, strict: bool = False @@ -692,7 +695,8 @@ def get_reverse_prefix_map( "http://purl.obolibrary.org/obo/": "obo", "https://purl.obolibrary.org/obo/": "obo", } - for record in self.get_curies_records(include_prefixes=include_prefixes, strict=strict): + converter = self.get_converter(include_prefixes=include_prefixes, strict=strict) + for record in converter.records: rv[record.uri_prefix] = record.prefix for uri_prefix in record.uri_prefix_synonyms: if uri_prefix not in rv: @@ -739,14 +743,14 @@ def get_prefix_map( :param blacklist: Prefixes to skip :return: A mapping from prefixes to URI prefixes. """ - records = self.get_curies_records( + converter = self.get_converter( prefix_priority=prefix_priority, uri_prefix_priority=uri_prefix_priority, remapping=remapping, blacklist=blacklist, ) rv = {} - for record in records: + for record in converter.records: rv[record.prefix] = record.uri_prefix if include_synonyms: for prefix in record.prefix_synonyms: @@ -1568,17 +1572,16 @@ def get_context(self, key: str) -> Optional[Context]: """ return self.contexts.get(key) - def get_records_from_context( + def get_converter_from_context( self, context: Union[str, Context], - *, strict: bool = False, include_prefixes: bool = False, - ) -> List[curies.Record]: - """Get records based on a context.""" + ) -> curies.Converter: + """Get a converter based on a context.""" if isinstance(context, str): context = self.contexts[context] - return self.get_curies_records( + return self.get_converter( prefix_priority=context.prefix_priority, uri_prefix_priority=context.uri_prefix_priority, strict=strict, @@ -1587,19 +1590,6 @@ def get_records_from_context( include_prefixes=include_prefixes, ) - def get_converter_from_context( - self, - context: Union[str, Context], - strict: bool = False, - include_prefixes: bool = False, - ) -> curies.Converter: - """Get a converter based on a context.""" - return curies.Converter( - records=self.get_records_from_context( - context=context, strict=strict, include_prefixes=include_prefixes - ) - ) - def get_context_artifacts( self, key: str, include_synonyms: Optional[bool] = None ) -> Tuple[Mapping[str, str], Mapping[str, str]]: diff --git a/src/bioregistry/uri_format.py b/src/bioregistry/uri_format.py index cbb88b810..6022cc4cd 100644 --- a/src/bioregistry/uri_format.py +++ b/src/bioregistry/uri_format.py @@ -8,9 +8,7 @@ the prefix should go, which makes them more general than URI prefix strings. """ -from typing import Collection, List, Mapping, Optional, Sequence - -import curies +from typing import Collection, Mapping, Optional, Sequence from .resource_manager import manager @@ -19,7 +17,6 @@ "get_uri_prefix", "get_prefix_map", "get_pattern_map", - "get_extended_prefix_map", ] @@ -122,59 +119,3 @@ def get_pattern_map( remapping=remapping, blacklist=blacklist, ) - - -def get_extended_prefix_map( - prefix_priority: Optional[Sequence[str]] = None, - uri_prefix_priority: Optional[Sequence[str]] = None, - include_prefixes: bool = False, - strict: bool = False, - remapping: Optional[Mapping[str, str]] = None, - blacklist: Optional[Collection[str]] = None, -) -> List[curies.Record]: - """Get an extended prefix map. - - An extended prefix map is a collection of :class:`curies.Record` objects, - each of which has the following fields: - - - ``prefix`` - the canonical prefix - - ``uri_prefix`` - the canonical URI prefix (i.e., namespace) - - ``prefix_synonyms`` - optional extra prefixes such as capitialization variants. No prefix - synonyms are allowed to be duplicate across any canonical prefixes or synonyms in other - records in the extended prefix - - ``uri_prefix_synonyms`` - optional extra URI prefixes such as variants of Identifiers.org - URLs, PURLs, etc. No URI prefix synyonms are allowed to be duplicates of either canonical - or other URI prefix synonyms. - - Extended prefix maps have the benefit over regular prefix maps in that they keep extra - information. This can be utilized by :class:`curies.Converter` to make URI compression - and CURIE expansion aware of synonyms and other lexical variants. Further, an extended - prefix map can be readily collapsed into a normal prefix map by getting the ``prefix`` - and ``uri_prefix`` fields. - - :param prefix_priority: - The order of metaprefixes OR "preferred" for choosing a primary prefix - OR "default" for Bioregistry prefixes - :param uri_prefix_priority: - The order of metaprefixes for choosing the primary URI prefix OR - "default" for Bioregistry prefixes - :param include_prefixes: Should prefixes be included with colon delimiters? - Setting this to true makes an "omni"-reverse prefix map that can be - used to parse both URIs and CURIEs - :param strict: - If true, errors on URI prefix collisions. If false, sends logging - and skips them. - :param remapping: A mapping from bioregistry prefixes to preferred prefixes. - :param blacklist: - A collection of prefixes to skip - - :returns: A list of records for :class:`curies.Converter` - """ - return manager.get_curies_records( - prefix_priority=prefix_priority, - uri_prefix_priority=uri_prefix_priority, - include_prefixes=include_prefixes, - strict=strict, - remapping=remapping, - blacklist=blacklist, - ) diff --git a/tests/test_data.py b/tests/test_data.py index 5d472c131..24ffa8d35 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -534,9 +534,9 @@ def test_has_canonical(self): def test_records(self): """Test generating records.""" + converter = bioregistry.manager.get_converter(include_prefixes=True) records: Mapping[str, curies.Record] = { - record.prefix: record - for record in bioregistry.manager.get_curies_records(include_prefixes=True) + record.prefix: record for record in converter.records } # This is a "provides" situation diff --git a/tests/test_indra.py b/tests/test_indra.py index d5f4a8001..12967797d 100644 --- a/tests/test_indra.py +++ b/tests/test_indra.py @@ -38,6 +38,8 @@ def test_identifiers_mapping(self): def test_non_registry(self): """Test the Bioregistry has entries for all non-registry entries in INDRA.""" for prefix in indra.databases.identifiers.non_registry: + if prefix == "SPINE": + continue # Special case due to a collaboration with self.subTest(prefix=prefix): self.assertIsNotNone(bioregistry.normalize_prefix(prefix)) diff --git a/tests/test_manager.py b/tests/test_manager.py index a79698563..ae259c078 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -33,8 +33,8 @@ def test_get_records(self): msg="uniprot.isoform isn't registered with a URI prefix properly", ) - records = self.manager.get_curies_records() - prefixes = {record.prefix for record in records} + converter = self.manager.get_converter() + prefixes = {record.prefix for record in converter.records} self.assertIn("uniprot.isoform", prefixes) def test_prefix_map(self):