From 988f8367f1755b2e3202d88e3489306262447038 Mon Sep 17 00:00:00 2001 From: Lukas Plank Date: Thu, 24 Oct 2024 20:13:00 +0200 Subject: [PATCH] feat: implement pagination behavior for grouped models Closes #114. --- rdfproxy/adapter.py | 10 ++++-- rdfproxy/utils/_types.py | 6 +++- rdfproxy/utils/sparql_utils.py | 63 +++++++++++++++++++++++++++++----- 3 files changed, 66 insertions(+), 13 deletions(-) diff --git a/rdfproxy/adapter.py b/rdfproxy/adapter.py index 0b3fe48..68148d8 100644 --- a/rdfproxy/adapter.py +++ b/rdfproxy/adapter.py @@ -6,13 +6,13 @@ from SPARQLWrapper import JSON, SPARQLWrapper from rdfproxy.mapper import ModelBindingsMapper -from rdfproxy.utils._types import _TModelInstance +from rdfproxy.utils._types import ItemsQueryConstructor, _TModelInstance from rdfproxy.utils.models import Page from rdfproxy.utils.sparql_utils import ( calculate_offset, construct_count_query, + get_items_query_constructor, query_with_wrapper, - ungrouped_pagination_base_query, ) @@ -50,7 +50,11 @@ def query( ) -> Page[_TModelInstance]: """Run a query against an endpoint and return a Page model object.""" count_query: str = construct_count_query(query=self._query, model=self._model) - items_query: str = ungrouped_pagination_base_query.substitute( + + _items_query_constructor: ItemsQueryConstructor = get_items_query_constructor( + self._model + ) + items_query: str = _items_query_constructor( query=self._query, offset=calculate_offset(page, size), limit=size ) diff --git a/rdfproxy/utils/_types.py b/rdfproxy/utils/_types.py index 68df506..d00b074 100644 --- a/rdfproxy/utils/_types.py +++ b/rdfproxy/utils/_types.py @@ -1,6 +1,6 @@ """Type definitions for rdfproxy.""" -from typing import TypeVar +from typing import Protocol, TypeVar from pydantic import BaseModel @@ -8,6 +8,10 @@ _TModelInstance = TypeVar("_TModelInstance", bound=BaseModel) +class ItemsQueryConstructor(Protocol): + def __call__(self, query: str, offset: int, limit: int) -> str: ... + + class SPARQLBinding(str): """SPARQLBinding type for explicit SPARQL binding to model field allocation. diff --git a/rdfproxy/utils/sparql_utils.py b/rdfproxy/utils/sparql_utils.py index b76cb45..fd97f54 100644 --- a/rdfproxy/utils/sparql_utils.py +++ b/rdfproxy/utils/sparql_utils.py @@ -2,22 +2,25 @@ from collections.abc import Iterator from contextlib import contextmanager +from functools import partial import re from string import Template -from typing import Annotated +from textwrap import indent from typing import cast from SPARQLWrapper import QueryResult, SPARQLWrapper -from rdfproxy.utils._types import _TModelInstance +from rdfproxy.utils._types import ItemsQueryConstructor, _TModelInstance -ungrouped_pagination_base_query: Annotated[ - str, "SPARQL template for query pagination." -] = Template(""" -$query -limit $limit -offset $offset -""") +def construct_ungrouped_pagination_query(query: str, limit: int, offset: int) -> str: + """Construct an ungrouped pagination query.""" + template: Template = Template(""" + $query + limit $limit + offset $offset + """) + + return template.substitute(query=query, limit=limit, offset=offset) def replace_query_select_clause(query: str, repl: str) -> str: @@ -36,6 +39,48 @@ def replace_query_select_clause(query: str, repl: str) -> str: return count_query +def inject_subquery( + query: str, subquery: str, indent_depth: int = 4, indent_char: str = " " +) -> str: + """Inject a SPARQL query with a subquery. + + Also apply some basic indentation. + """ + indent_value = indent_char * indent_depth + indented_subquery = indent(f"\n{subquery}\n", indent_value) + indented_subclause = indent(f"\n{{{indented_subquery}}}", indent_value) + return re.sub(r".*\}$", f"{indented_subclause}\n}}", query) + + +def construct_grouped_pagination_query( + query: str, group_by_value: str, limit: int, offset: int +) -> str: + """Construct a grouped pagination query.""" + _subquery_base: str = replace_query_select_clause( + query=query, repl=f"select distinct ?{group_by_value}" + ) + subquery: str = construct_ungrouped_pagination_query( + query=_subquery_base, limit=limit, offset=offset + ) + + grouped_pagination_query: str = inject_subquery(query=query, subquery=subquery) + return grouped_pagination_query + + +def get_items_query_constructor( + model: type[_TModelInstance], +) -> ItemsQueryConstructor: + """Get the applicable query constructor function given a model class.""" + if (group_by_value := model.model_config.get("group_by", None)) is None: + query_constructor = construct_ungrouped_pagination_query + else: + query_constructor = partial( + construct_grouped_pagination_query, group_by_value=group_by_value + ) + + return cast(ItemsQueryConstructor, query_constructor) + + def construct_count_query(query: str, model: type[_TModelInstance]) -> str: """Construct a generic count query from a SELECT query.""" try: