Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: query constructor rewrite #175

Merged
merged 3 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 9 additions & 34 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pydantic = "^2.9.2"


httpx = "^0.28.1"
rdflib = "^7.1.1"
[tool.poetry.group.dev.dependencies]
ruff = "^0.7.0"
deptry = "^0.20.0"
Expand Down
33 changes: 12 additions & 21 deletions rdfproxy/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,11 @@
import math
from typing import Generic

from rdfproxy.constructor import QueryConstructor
from rdfproxy.mapper import ModelBindingsMapper
from rdfproxy.sparql_strategies import HttpxStrategy, SPARQLStrategy
from rdfproxy.utils._types import _TModelInstance
from rdfproxy.utils.models import Page, QueryParameters
from rdfproxy.utils.sparql_utils import (
calculate_offset,
construct_count_query,
construct_items_query,
)


class SPARQLModelAdapter(Generic[_TModelInstance]):
Expand All @@ -24,10 +20,12 @@ class SPARQLModelAdapter(Generic[_TModelInstance]):
SPARQLModelAdapter.query returns a Page model object with a default pagination size of 100 results.

SPARQL bindings are implicitly assigned to model fields of the same name,
explicit SPARQL binding to model field allocation is available with typing.Annotated and rdfproxy.SPARQLBinding.
explicit SPARQL binding to model field allocation is available with rdfproxy.SPARQLBinding.

Result grouping is controlled through the model,
i.e. grouping is triggered when a field of list[pydantic.BaseModel] is encountered.

See https://github.com/acdh-oeaw/rdfproxy/tree/main/examples for examples.
"""

def __init__(
Expand All @@ -44,20 +42,21 @@ def __init__(

def query(self, query_parameters: QueryParameters) -> Page[_TModelInstance]:
"""Run a query against an endpoint and return a Page model object."""
count_query: str = construct_count_query(query=self._query, model=self._model)
items_query: str = construct_items_query(
query_constructor = QueryConstructor(
query=self._query,
query_parameters=query_parameters,
model=self._model,
limit=query_parameters.size,
offset=calculate_offset(query_parameters.page, query_parameters.size),
)

items_query_bindings: Iterator[dict] = self.sparql_strategy.query(items_query)
count_query = query_constructor.get_count_query()
items_query = query_constructor.get_items_query()

items_query_bindings: Iterator[dict] = self.sparql_strategy.query(items_query)
mapper = ModelBindingsMapper(self._model, *items_query_bindings)

items: list[_TModelInstance] = mapper.get_models()
total: int = self._get_count(count_query)

count_query_bindings: Iterator[dict] = self.sparql_strategy.query(count_query)
total: int = int(next(count_query_bindings)["cnt"])
pages: int = math.ceil(total / query_parameters.size)

return Page(
Expand All @@ -67,11 +66,3 @@ def query(self, query_parameters: QueryParameters) -> Page[_TModelInstance]:
total=total,
pages=pages,
)

def _get_count(self, query: str) -> int:
"""Run a count query and return the count result.

Helper for SPARQLModelAdapter.query.
"""
result: Iterator[dict] = self.sparql_strategy.query(query)
return int(next(result)["cnt"])
124 changes: 124 additions & 0 deletions rdfproxy/constructor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
from rdfproxy.utils._types import _TModelInstance
from rdfproxy.utils.models import QueryParameters
from rdfproxy.utils.sparql_utils import (
add_solution_modifier,
get_query_projection,
inject_into_query,
remove_sparql_prefixes,
replace_query_select_clause,
)
from rdfproxy.utils.utils import (
FieldsBindingsMap,
QueryConstructorComponent as component,
compose_left,
)


class QueryConstructor:
"""The class encapsulates dynamic SPARQL query modification logic
for implementing purely SPARQL-based, deterministic pagination.

Public methods get_items_query and get_count_query are used in rdfproxy.SPARQLModelAdapter
to construct queries for retrieving arguments for Page object instantiation.
"""

def __init__(
self,
query: str,
query_parameters: QueryParameters,
model: type[_TModelInstance],
) -> None:
self.query = query
self.query_parameters = query_parameters
self.model = model

self.bindings_map = FieldsBindingsMap(model)
self.group_by: str | None = self.bindings_map.get(
model.model_config.get("group_by")
)

def get_items_query(self) -> str:
"""Construct a SPARQL items query for use in rdfproxy.SPARQLModelAdapter."""
if self.group_by is None:
return self._get_ungrouped_items_query()
return self._get_grouped_items_query()

def get_count_query(self) -> str:
"""Construct a SPARQL count query for use in rdfproxy.SPARQLModelAdapter"""
if self.group_by is None:
select_clause = "select (count(*) as ?cnt)"
else:
select_clause = f"select (count(distinct ?{self.group_by}) as ?cnt)"

return replace_query_select_clause(self.query, select_clause)

@staticmethod
def _calculate_offset(page: int, size: int) -> int:
"""Calculate the offset value for paginated SPARQL templates."""
match page:
case 1:
return 0
case 2:
return size
case _:
return size * (page - 1)

def _get_grouped_items_query(self) -> str:
"""Construct a SPARQL items query for grouped models."""
filter_clause: str | None = self._compute_filter_clause()
select_clause: str = self._compute_select_clause()
order_by_value: str = self._compute_order_by_value()
limit, offset = self._compute_limit_offset()

subquery = compose_left(
remove_sparql_prefixes,
component(replace_query_select_clause, repl=select_clause),
component(inject_into_query, injectant=filter_clause),
component(
add_solution_modifier,
order_by=order_by_value,
limit=limit,
offset=offset,
),
)(self.query)

return inject_into_query(self.query, subquery)

def _get_ungrouped_items_query(self) -> str:
"""Construct a SPARQL items query for ungrouped models."""
filter_clause: str | None = self._compute_filter_clause()
order_by_value: str = self._compute_order_by_value()
limit, offset = self._compute_limit_offset()

return compose_left(
component(inject_into_query, injectant=filter_clause),
component(
add_solution_modifier,
order_by=order_by_value,
limit=limit,
offset=offset,
),
)(self.query)

def _compute_limit_offset(self) -> tuple[int, int]:
"""Calculate limit and offset values for SPARQL-based pagination."""
limit = self.query_parameters.size
offset = self._calculate_offset(
self.query_parameters.page, self.query_parameters.size
)

return limit, offset

def _compute_filter_clause(self) -> str | None:
"""Stub: Always None for now."""
return None

def _compute_select_clause(self):
"""Stub: Static SELECT clause for now."""
return f"select distinct ?{self.group_by}"

def _compute_order_by_value(self):
"""Stub: Only basic logic for now."""
if self.group_by is None:
return get_query_projection(self.query)[0]
return f"{self.group_by}"
3 changes: 1 addition & 2 deletions rdfproxy/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from pydantic import BaseModel
from rdfproxy.utils._types import ModelBoolPredicate, _TModelInstance
from rdfproxy.utils.utils import (
from rdfproxy.utils.mapper_utils import (
_collect_values_from_bindings,
_get_group_by,
_get_key_from_metadata,
Expand Down Expand Up @@ -65,7 +65,6 @@ def _generate_binding_pairs(
and (x[self._contexts[0]] == kwargs[self._contexts[0]]),
self.bindings,
)

value = self._get_unique_models(group_model, applicable_bindings)

elif _is_list_type(v.annotation):
Expand Down
7 changes: 5 additions & 2 deletions rdfproxy/sparql_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import abc
from collections.abc import Iterator
from typing import cast

from SPARQLWrapper import JSON, QueryResult, SPARQLWrapper
import httpx
Expand All @@ -13,7 +14,7 @@ def __init__(self, endpoint: str):

@abc.abstractmethod
def query(self, sparql_query: str) -> Iterator[dict[str, str]]:
raise NotImplementedError
raise NotImplementedError # pragma: no cover

@staticmethod
def _get_bindings_from_bindings_dict(bindings_dict: dict) -> Iterator[dict]:
Expand All @@ -35,7 +36,9 @@ def query(self, sparql_query: str) -> Iterator[dict[str, str]]:
self._sparql_wrapper.setQuery(sparql_query)

result: QueryResult = self._sparql_wrapper.query()
return self._get_bindings_from_bindings_dict(result.convert())
# SPARQLWrapper.Wrapper.convert is not overloaded properly and needs casting
# https://github.com/RDFLib/sparqlwrapper/blob/master/SPARQLWrapper/Wrapper.py#L1135
return self._get_bindings_from_bindings_dict(cast(dict, result.convert()))


class HttpxStrategy(SPARQLStrategy):
Expand Down
Loading
Loading