From cb15e1439c5446b2341c1217a5c724f59450179b Mon Sep 17 00:00:00 2001 From: Lukas Plank Date: Mon, 28 Oct 2024 14:01:19 +0100 Subject: [PATCH] fix: correct regex pattern for SELECT clause extraction The new regex non-greedily matches everything after "select" and before "where". "[\s\S]*?" basically means ".*?" but is more general because it also matches linebreaks without the re.DOTALL flag. See https://docs.python.org/3/library/re.html#re.DOTALL. Fixes #122. --- rdfproxy/utils/sparql_utils.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/rdfproxy/utils/sparql_utils.py b/rdfproxy/utils/sparql_utils.py index b76cb45..5398cd2 100644 --- a/rdfproxy/utils/sparql_utils.py +++ b/rdfproxy/utils/sparql_utils.py @@ -22,18 +22,21 @@ def replace_query_select_clause(query: str, repl: str) -> str: """Replace the SELECT clause of a query with repl.""" - if re.search(r"select\s.+", query, re.I) is None: + pattern: re.Pattern = re.compile( + r"select\s+.*?(?=\s+where)", flags=re.IGNORECASE | re.DOTALL + ) + + if re.search(pattern=pattern, string=query) is None: raise Exception("Unable to obtain SELECT clause.") - count_query = re.sub( - pattern=r"select\s.+", + modified_query = re.sub( + pattern=pattern, repl=repl, string=query, count=1, - flags=re.I, ) - return count_query + return modified_query def construct_count_query(query: str, model: type[_TModelInstance]) -> str: