From c3d6c413abfbdc7e99357049395fa12653822137 Mon Sep 17 00:00:00 2001 From: Lukas Plank Date: Tue, 10 Dec 2024 11:02:02 +0100 Subject: [PATCH 1/3] chore(deps): install httpx --- poetry.lock | 10 +++++----- pyproject.toml | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index d3bd3a7..f5405f9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -317,13 +317,13 @@ test = ["Cython (>=0.29.24,<0.30.0)"] [[package]] name = "httpx" -version = "0.27.0" +version = "0.28.1" description = "The next generation HTTP client." optional = false python-versions = ">=3.8" files = [ - {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"}, - {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"}, + {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, + {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, ] [package.dependencies] @@ -331,13 +331,13 @@ anyio = "*" certifi = "*" httpcore = "==1.*" idna = "*" -sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +zstd = ["zstandard (>=0.18.0)"] [[package]] name = "idna" @@ -1246,4 +1246,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "4e42db8b57c13c2a19b9920a69d9d853bb73bd0c7d34fcf6fb9fbfacc93c5d1d" +content-hash = "e9ac3d16b289eb2f29bfa0587a17105664b9951db114dfe9d2e9a35b1265e117" diff --git a/pyproject.toml b/pyproject.toml index ee5a8e4..e245608 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ sparqlwrapper = "^2.0.0" pydantic = "^2.9.2" +httpx = "^0.28.1" [tool.poetry.group.dev.dependencies] ruff = "^0.7.0" deptry = "^0.20.0" From 705077a5a0b49efdbdce5a60e4f457aa3cc9ac67 Mon Sep 17 00:00:00 2001 From: Lukas Plank Date: Tue, 10 Dec 2024 11:02:18 +0100 Subject: [PATCH 2/3] feat: expose SPARQLStrategies over library interface --- rdfproxy/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/rdfproxy/__init__.py b/rdfproxy/__init__.py index 984bb66..abb8cfd 100644 --- a/rdfproxy/__init__.py +++ b/rdfproxy/__init__.py @@ -1,4 +1,9 @@ from rdfproxy.adapter import SPARQLModelAdapter # noqa: F401 from rdfproxy.mapper import ModelBindingsMapper # noqa: F401 +from rdfproxy.sparql_strategies import ( + SPARQLStrategy, # noqa: F401 + SPARQLWrapperStrategy, # noqa: F401 + HttpxStrategy, # noqa: F401 +) from rdfproxy.utils._types import ConfigDict, SPARQLBinding # noqa: F401 from rdfproxy.utils.models import Page, QueryParameters # noqa: F401 From 8e32a05aa60db58ddc2c83cf8dc15b9dd96f9a2b Mon Sep 17 00:00:00 2001 From: Lukas Plank Date: Tue, 10 Dec 2024 11:02:49 +0100 Subject: [PATCH 3/3] feat: implement strategies for SPARQL query functionality Currently, rdfproxy relies on SPARQLWrapper for querying triplestores; this is not ideal since SPARQLWrapper occasionally gets blacklisted e.g. by wikidata and caused severe performance issues in the past. The change introduces SPARQLQuery strategies for better control over what SPARQL query backend should run in RDFProxy. The SPARQLWrapperStrategy, implements exactly the previous SPARQLWrapper behavior. An HttpxStrategy (default) implements the query functionality required by RDFProxy using raw httpx instead of SPARQLWrapper. Closes #169. --- rdfproxy/adapter.py | 22 ++++++------- rdfproxy/sparql_strategies.py | 58 +++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 13 deletions(-) create mode 100644 rdfproxy/sparql_strategies.py diff --git a/rdfproxy/adapter.py b/rdfproxy/adapter.py index ef93ad3..ad98717 100644 --- a/rdfproxy/adapter.py +++ b/rdfproxy/adapter.py @@ -4,15 +4,14 @@ import math from typing import Generic -from SPARQLWrapper import JSON, SPARQLWrapper from rdfproxy.mapper import ModelBindingsMapper +from rdfproxy.sparql_strategies import HttpxStrategy, SPARQLStrategy from rdfproxy.utils._types import _TModelInstance from rdfproxy.utils.models import Page, QueryParameters from rdfproxy.utils.sparql_utils import ( calculate_offset, construct_count_query, construct_items_query, - query_with_wrapper, ) @@ -32,15 +31,16 @@ class SPARQLModelAdapter(Generic[_TModelInstance]): """ def __init__( - self, target: str | SPARQLWrapper, query: str, model: type[_TModelInstance] + self, + target: str, + query: str, + model: type[_TModelInstance], + sparql_strategy: type[SPARQLStrategy] = HttpxStrategy, ) -> None: self._query = query self._model = model - self.sparql_wrapper: SPARQLWrapper = ( - SPARQLWrapper(target) if isinstance(target, str) else target - ) - self.sparql_wrapper.setReturnFormat(JSON) + self.sparql_strategy = sparql_strategy(target) def query(self, query_parameters: QueryParameters) -> Page[_TModelInstance]: """Run a query against an endpoint and return a Page model object.""" @@ -52,9 +52,7 @@ def query(self, query_parameters: QueryParameters) -> Page[_TModelInstance]: offset=calculate_offset(query_parameters.page, query_parameters.size), ) - items_query_bindings: Iterator[dict] = query_with_wrapper( - query=items_query, sparql_wrapper=self.sparql_wrapper - ) + items_query_bindings: Iterator[dict] = self.sparql_strategy.query(items_query) mapper = ModelBindingsMapper(self._model, *items_query_bindings) @@ -75,7 +73,5 @@ def _get_count(self, query: str) -> int: Helper for SPARQLModelAdapter.query. """ - result: Iterator[dict] = query_with_wrapper( - query=query, sparql_wrapper=self.sparql_wrapper - ) + result: Iterator[dict] = self.sparql_strategy.query(query) return int(next(result)["cnt"]) diff --git a/rdfproxy/sparql_strategies.py b/rdfproxy/sparql_strategies.py new file mode 100644 index 0000000..6b61860 --- /dev/null +++ b/rdfproxy/sparql_strategies.py @@ -0,0 +1,58 @@ +"""Strategy classes for SPARQL query functionality.""" + +import abc +from collections.abc import Iterator + +from SPARQLWrapper import JSON, QueryResult, SPARQLWrapper +import httpx + + +class SPARQLStrategy(abc.ABC): + def __init__(self, endpoint: str): + self.endpoint = endpoint + + @abc.abstractmethod + def query(self, sparql_query: str) -> Iterator[dict[str, str]]: + raise NotImplementedError + + @staticmethod + def _get_bindings_from_bindings_dict(bindings_dict: dict) -> Iterator[dict]: + bindings = map( + lambda binding: {k: v["value"] for k, v in binding.items()}, + bindings_dict["results"]["bindings"], + ) + return bindings + + +class SPARQLWrapperStrategy(SPARQLStrategy): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self._sparql_wrapper = SPARQLWrapper(self.endpoint) + self._sparql_wrapper.setReturnFormat(JSON) + + def query(self, sparql_query: str) -> Iterator[dict[str, str]]: + self._sparql_wrapper.setQuery(sparql_query) + + result: QueryResult = self._sparql_wrapper.query() + return self._get_bindings_from_bindings_dict(result.convert()) + + +class HttpxStrategy(SPARQLStrategy): + def query(self, sparql_query: str) -> Iterator[dict[str, str]]: + result: httpx.Response = self._httpx_run_sparql_query(sparql_query) + return self._get_bindings_from_bindings_dict(result.json()) + + def _httpx_run_sparql_query(self, query: str) -> httpx.Response: + data = {"output": "json", "query": query} + headers = { + "Accept": "application/sparql-results+json", + } + + response = httpx.post( + self.endpoint, + headers=headers, + data=data, + ) + + return response