diff --git a/biolink/api/ontol/endpoints/identifier.py b/biolink/api/ontol/endpoints/identifier.py new file mode 100644 index 00000000..f100b9ae --- /dev/null +++ b/biolink/api/ontol/endpoints/identifier.py @@ -0,0 +1,36 @@ +import logging + +from flask import request +from flask_restplus import Resource +from biolink.datamodel.serializers import association +from biolink.api.restplus import api +from ontobio.sparql.sparql_ontol_utils import batch_fetch_ids +import pysolr + +log = logging.getLogger(__name__) + +parser = api.parser() +parser.add_argument('label', action='append', help='List of labels', required=True) + +class OntolIdentifierResource(Resource): + + @api.expect(parser) + def get(self): + """ + Fetches a map from CURIEs/IDs to labels + """ + args = parser.parse_args() + + return batch_fetch_ids(args.label) + + @api.expect(parser) + def post(self): + """ + Fetches a map from CURIEs/IDs to labels. + + Takes 'label' list argument either as a querystring argument or as a key + in the POST body when content-type is application/json. + """ + args = parser.parse_args() + + return batch_fetch_ids(args.label) diff --git a/biolink/api/ontol/endpoints/labeler.py b/biolink/api/ontol/endpoints/labeler.py index 233a23d2..8b55b44e 100644 --- a/biolink/api/ontol/endpoints/labeler.py +++ b/biolink/api/ontol/endpoints/labeler.py @@ -10,10 +10,9 @@ log = logging.getLogger(__name__) parser = api.parser() -parser.add_argument('id', action='append', help='List of ids') +parser.add_argument('id', action='append', help='List of ids', required=True) class OntolLabelerResource(Resource): - @api.expect(parser) def get(self): """ @@ -22,8 +21,3 @@ def get(self): args = parser.parse_args() return batch_fetch_labels(args.id) - - - - - diff --git a/conf/routes.yaml b/conf/routes.yaml index f093c8a1..e105a448 100644 --- a/conf/routes.yaml +++ b/conf/routes.yaml @@ -309,6 +309,11 @@ route_mapping: routes: - route: / resource: biolink.api.ontol.endpoints.labeler.OntolLabelerResource + - name: ontol/identifier + description: Retrieve IDs for labels + routes: + - route: / + resource: biolink.api.ontol.endpoints.identifier.OntolIdentifierResource # - name: ontol/enrichment # description: To be implemented # routes: diff --git a/requirements.txt b/requirements.txt index 9374925f..cbef368e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ prefixcommons>=0.0 # ontobio>=2.7.14 -git+https://github.com/monarch-initiative/ontobio.git@baf3b5a7344fe6dda9c3980c8d472655431cb5df#egg=ontobio +git+https://github.com/monarch-initiative/ontobio.git@91222c8b442196d6eeeafeb6073946494e8a3a10#egg=ontobio pip>=9.0.1 wheel>0.25.0 markupsafe==2.0.1 diff --git a/tests/unit/test_ontol_identifier.py b/tests/unit/test_ontol_identifier.py new file mode 100644 index 00000000..380726d2 --- /dev/null +++ b/tests/unit/test_ontol_identifier.py @@ -0,0 +1,89 @@ +from pprint import pprint + +import pytest +from biolink.app import app +from biolink.api.sim.endpoints.owlsim import get_owlsim_api + + +class TestOntolIdentifier(): + """ + Integration tests for ontol identifier endpoint, using labeler as a comparison. + + Specifically, does a sanity check against identifier, then determines if we + can (somewhat) losslessly map a set of labels to IDs and back + """ + + @classmethod + def setup_class(self): + app.testing = True + self.test_client = app.test_client() + + @classmethod + def teardown_class(self): + self.test_client = None + + def test_sample_identifier(self): + response = self.test_client.get('/api/ontol/identifier?label=Mus%20musculus') + assert response.status_code == 200 + + # todo: assert we have results, and that the response matches a known entry + assert len(response.json.items()) > 0 + assert ( + response.json == { + "Mus musculus": [ + "NCBITaxon:10090", + "OBO:FBsp_00000276", + "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C45247", + "VTO:0014661" + ] + } + ) + + @pytest.mark.xfail(reason="/ontol/labeler returns just one result, so it can't be onto with /ontol/identifier") + def test_label_id_nonlossy(self): + labels = [ + "Sus scrofa", + "Drosophila melanogaster", + "Homo sapiens", + "Mus musculus", + "Bos taurus", + "Saccharomyces cerevisiae S288C", + "Xenopus tropicalis", + "Danio rerio", + "Gallus gallus", + "Anolis carolinensis", + "Canis lupus familiaris", + "Felis catus", + "Macaca mulatta", + "Monodelphis domestica", + "Ornithorhynchus anatinus", + "Pan troglodytes", + "Rattus norvegicus", + "Takifugu rubripes", + "Equus caballus" + ] + + response = self.test_client.post('/api/ontol/identifier', data={'label': labels}) + pprint(response.json) + + assert response.status_code == 200 + + # todo: assert we have results + assert len(response.json.items()) > 0 + + # extract the first ID and its label from each result + id_set = {ids[0]: label for label, ids in response.json.items()} + + # produce a query of IDs to labels using the /ontol/labels/ + response = self.test_client.get( + '/api/ontol/labeler/?%s' % "&".join("id=%s" % x for x in id_set.keys()) + ) + pprint(response.json) + + # gather the results and compare to the initial set of labels + orignal_set = set(labels) + response_set = set(response.json.values()) + + assert orignal_set == response_set, ( + "Response labels and originals don't match! difference: %s" % orignal_set.difference(response_set) + )