From 087a2df8e92efa1268e49e04ceb646dd239ad802 Mon Sep 17 00:00:00 2001 From: Carlo Dapor Date: Mon, 27 May 2024 00:27:11 +0200 Subject: [PATCH] Support for replace with regexp This PR addresses issues #346 and #347. Added support for Regexp pattern "Predefined character classes". --- .../com/schibsted/spt/data/jslt/parser/ParserImpl.java | 8 +++++++- core/src/test/resources/function-tests.json | 9 +++++++-- core/src/test/resources/json-parse-error-tests.json | 1 - functions.md | 6 +++--- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java b/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java index f5acc006..bc4ab161 100644 --- a/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java +++ b/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java @@ -503,7 +503,13 @@ private static String makeString(ParseContext ctx, Token literal) { result[pos++] = ch; else { ch = string.charAt(++ix); - + // special Regexp characters, s. https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html, + // "Predefined character classes". + if ("dDhHsSvVwW".contains(String.valueOf(ch))) { + result[pos++] = '\\'; + result[pos++] = ch; + continue; + } switch (ch) { case '\\': result[pos++] = ch; break; case '"': result[pos++] = ch; break; diff --git a/core/src/test/resources/function-tests.json b/core/src/test/resources/function-tests.json index 49f28275..3408c03f 100644 --- a/core/src/test/resources/function-tests.json +++ b/core/src/test/resources/function-tests.json @@ -1128,15 +1128,20 @@ "output": "\"\"" }, { - "query": "replace-regexp(., \"([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])\", \"$2/$3/$1\")", + "query": "replace-regexp(., \"(\\d{4})-(\\d{2})-(\\d{2})\", \"$2/$3/$1\")", "input" : "\"2019-12-31\"", "output": "\"12/31/2019\"" }, { - "query": "replace-regexp(., \"(?[0-9][0-9][0-9][0-9])-(?[0-9][0-9])-(?[0-9][0-9])\", \"${day}.${month}.${year}\")", + "query": "replace-regexp(., \"(?\\d{4})-(?\\d{2})-(?\\d{2})\", \"${day}.${month}.${year}\")", "input" : "\"2019-12-31\"", "output": "\"31.12.2019\"" }, + { + "query": "replace-regexp(., \"([a-z]+)\", \"$1\")", + "input" : "\"2019-12-31\"", + "output" : "\"2019-12-31\"" + }, { "query": "trim(.)", "input" : "\"some text\"", diff --git a/core/src/test/resources/json-parse-error-tests.json b/core/src/test/resources/json-parse-error-tests.json index 9a98f998..487245fb 100644 --- a/core/src/test/resources/json-parse-error-tests.json +++ b/core/src/test/resources/json-parse-error-tests.json @@ -1,7 +1,6 @@ { "description" : "Tests that should cause the JSLT parser to declare JSON syntax error.", "tests" : [ - "\" \\d \"", "\"\\u\"", "\"\\u0\"", "\"\\u00\"", diff --git a/functions.md b/functions.md index e41042a8..8fbc219b 100644 --- a/functions.md +++ b/functions.md @@ -523,10 +523,10 @@ If the `regexp` does not match the input,`out` corresponds to `value`. Examples: ``` -replace-regexp("2019-12-31", "([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])", "$2/$3/$1") +replace-regexp("2019-12-31", "(\\d{4})-(\\d{2})-(\\d{2})", "$2/$3/$1") => "12/31/2019" -replace-regexp("2019-12-31", "(?[0-9][0-9][0-9][0-9])-(?[0-9][0-9])-(?[0-9][0-9])", - => "${day}.${month}.${year}") => "31.12.2019" +replace-regexp("2019-12-31", "(?\\d{4})-(?\\d{2})-(?\\d{2})", "${day}.${month}.${year}") + => "31.12.2019" replace-regexp("2019-12-31", "([a-z]+)", "$1") => "2019-12-31" ```