From 5978a37bdfc57a09e5684c46e2f0c44f7adfd38a Mon Sep 17 00:00:00 2001 From: Gary Fredericks Date: Sat, 10 Aug 2019 20:32:02 -0500 Subject: [PATCH] Support large unicode characters in bracket classes This is an oversight from several commits ago, as I didn't know these could show up there. --- src/com/gfredericks/test/chuck/regexes.clj | 12 ++++++++++-- test/com/gfredericks/test/chuck/regexes_test.clj | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/com/gfredericks/test/chuck/regexes.clj b/src/com/gfredericks/test/chuck/regexes.clj index 421316e..55eefa1 100644 --- a/src/com/gfredericks/test/chuck/regexes.clj +++ b/src/com/gfredericks/test/chuck/regexes.clj @@ -129,6 +129,11 @@ {:type :large-unicode-character :code-point code-point})) +(defn ^:private large-unicode-character->string + [code-point] + (str (Character/highSurrogate code-point) + (Character/lowSurrogate code-point))) + (defn analyze [parsed-regex] (insta/transform @@ -478,6 +483,10 @@ [m] (charsets/singleton (str (:character m)))) +(defmethod compile-class :large-unicode-character + [m] + (charsets/singleton (large-unicode-character->string (:code-point m)))) + (defmulti analyzed->generator :type) (defmethod analyzed->generator :default @@ -510,8 +519,7 @@ (defmethod analyzed->generator :large-unicode-character [{:keys [code-point]}] - (gen/return (str (Character/highSurrogate code-point) - (Character/lowSurrogate code-point)))) + (gen/return (large-unicode-character->string code-point))) (defmethod analyzed->generator :repetition [{:keys [elements bounds]}] diff --git a/test/com/gfredericks/test/chuck/regexes_test.clj b/test/com/gfredericks/test/chuck/regexes_test.clj index f634d20..1cad086 100644 --- a/test/com/gfredericks/test/chuck/regexes_test.clj +++ b/test/com/gfredericks/test/chuck/regexes_test.clj @@ -171,7 +171,7 @@ ;; tests strange behavior in the QE-unquoter related to initial ;; digits - "\\c\\Q0"]) + "\\c\\Q0" "[^\\x{2f498}]"]) (defspec generator-regression-spec (times 1000) ;; TODO: make a prop in test.chuck that's like for