From 0089d25ea2ba014bac905d074a483a62faeb815b Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 16 Oct 2024 16:50:39 -0500 Subject: [PATCH] Revert "Disable regex tests to unblock CI (#11606)" (#11612) This reverts commit 2d3e0ec724713d80ff91e1d419f5157e18bff69b. Signed-off-by: Jason Lowe --- .../src/main/python/regexp_test.py | 3 --- .../RegularExpressionTranspilerSuite.scala | 27 +++++++------------ 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/integration_tests/src/main/python/regexp_test.py b/integration_tests/src/main/python/regexp_test.py index 0de404953a9..c2062605ca1 100644 --- a/integration_tests/src/main/python/regexp_test.py +++ b/integration_tests/src/main/python/regexp_test.py @@ -284,7 +284,6 @@ def test_re_replace(): # We have shims to support empty strings for zero-repetition patterns # See https://github.com/NVIDIA/spark-rapids/issues/5456 -@pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/11600") def test_re_replace_repetition(): gen = mk_str_gen('.{0,5}TEST[\ud720 A]{0,5}') assert_gpu_and_cpu_are_equal_collect( @@ -699,7 +698,6 @@ def test_regexp_octal_digits(): ), conf=_regexp_conf) -@pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/11600") def test_regexp_replace_digit(): gen = mk_str_gen('[a-z]{0,2}[0-9]{0,2}') \ .with_special_case('䤫畍킱곂⬡❽ࢅ獰᳌蛫青') \ @@ -1078,7 +1076,6 @@ def test_regexp_memory_fallback(): } ) -@pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/11600") def test_regexp_memory_ok(): gen = StringGen('test') assert_gpu_and_cpu_are_equal_collect( diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala index e1c06a88fa1..a60ea50ef4e 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala @@ -418,8 +418,7 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { } - // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed - ignore("replace_replace - ?, *, +, and {0, n} repetitions") { + test("replace_replace - ?, *, +, and {0, n} repetitions") { val patterns = Seq("D?", "D*", "D+", "D{0,}", "D{0,1}", "D{0,5}", "[1a-zA-Z]{0,}", "[1a-zA-Z]{0,2}", "A+") val inputs = Seq("SS", "DD", "SDSDSDS", "DDDD", "DDDDDD", "ABCDEFG") @@ -711,27 +710,23 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { } } - // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed - ignore("AST fuzz test - regexp_find") { + test("AST fuzz test - regexp_find") { doAstFuzzTest(Some(REGEXP_LIMITED_CHARS_FIND), REGEXP_LIMITED_CHARS_FIND, RegexFindMode) } - // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed - ignore("AST fuzz test - regexp_replace") { + test("AST fuzz test - regexp_replace") { doAstFuzzTest(Some(REGEXP_LIMITED_CHARS_REPLACE), REGEXP_LIMITED_CHARS_REPLACE, RegexReplaceMode) } - // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed - ignore("AST fuzz test - regexp_find - full unicode input") { + test("AST fuzz test - regexp_find - full unicode input") { assume(isUnicodeEnabled()) doAstFuzzTest(None, REGEXP_LIMITED_CHARS_REPLACE, RegexFindMode) } - // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed - ignore("AST fuzz test - regexp_replace - full unicode input") { + test("AST fuzz test - regexp_replace - full unicode input") { assume(isUnicodeEnabled()) doAstFuzzTest(None, REGEXP_LIMITED_CHARS_REPLACE, RegexReplaceMode) @@ -741,8 +736,7 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { Charset.defaultCharset().name() == "UTF-8" } - // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed - ignore("AST fuzz test - regexp_find - anchor focused") { + test("AST fuzz test - regexp_find - anchor focused") { doAstFuzzTest(validDataChars = Some("\r\nabc"), validPatternChars = "^$\\AZz\r\n()[]-", mode = RegexFindMode) } @@ -784,8 +778,7 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { } } - // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed - ignore("regexp_split - repetition with {0,n}, or {0,}") { + test("regexp_split - repetition with {0,n}, or {0,}") { // see https://github.com/NVIDIA/spark-rapids/issues/6958 val patterns = Set("ba{0,}", raw"a\02{0,}", "ba{0,2}", raw"b\02{0,10}") val data = Seq("abaa", "baba", "ba\u0002b", "ab\u0002b\u0002a") @@ -839,8 +832,7 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { } } - // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed - ignore("string split fuzz") { + test("string split fuzz") { val (data, patterns) = generateDataAndPatterns(Some(REGEXP_LIMITED_CHARS_REPLACE), REGEXP_LIMITED_CHARS_REPLACE, RegexSplitMode) for (limit <- Seq(-2, -1, 2, 5)) { @@ -848,8 +840,7 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { } } - // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed - ignore("string split fuzz - anchor focused") { + test("string split fuzz - anchor focused") { val (data, patterns) = generateDataAndPatterns(validDataChars = Some("\r\nabc"), validPatternChars = "^$\\AZz\r\n()", RegexSplitMode) doStringSplitTest(patterns, data, -1)