From a5292ecfcb420ca387b4fba6bf486327d24d2ca0 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 14 Oct 2024 09:24:59 -0500 Subject: [PATCH 1/2] xfail regexp tests to unblock CI Signed-off-by: Jason Lowe --- integration_tests/src/main/python/regexp_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/integration_tests/src/main/python/regexp_test.py b/integration_tests/src/main/python/regexp_test.py index c2062605ca1..0de404953a9 100644 --- a/integration_tests/src/main/python/regexp_test.py +++ b/integration_tests/src/main/python/regexp_test.py @@ -284,6 +284,7 @@ def test_re_replace(): # We have shims to support empty strings for zero-repetition patterns # See https://github.com/NVIDIA/spark-rapids/issues/5456 +@pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/11600") def test_re_replace_repetition(): gen = mk_str_gen('.{0,5}TEST[\ud720 A]{0,5}') assert_gpu_and_cpu_are_equal_collect( @@ -698,6 +699,7 @@ def test_regexp_octal_digits(): ), conf=_regexp_conf) +@pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/11600") def test_regexp_replace_digit(): gen = mk_str_gen('[a-z]{0,2}[0-9]{0,2}') \ .with_special_case('䤫畍킱곂⬡❽ࢅ獰᳌蛫青') \ @@ -1076,6 +1078,7 @@ def test_regexp_memory_fallback(): } ) +@pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/11600") def test_regexp_memory_ok(): gen = StringGen('test') assert_gpu_and_cpu_are_equal_collect( From aa9e3e91688dc674211964d57a669a1480718cd3 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 14 Oct 2024 09:38:55 -0500 Subject: [PATCH 2/2] Disable failing regexp unit test to unblock CI --- .../RegularExpressionTranspilerSuite.scala | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala index a60ea50ef4e..e1c06a88fa1 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala @@ -418,7 +418,8 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { } - test("replace_replace - ?, *, +, and {0, n} repetitions") { + // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed + ignore("replace_replace - ?, *, +, and {0, n} repetitions") { val patterns = Seq("D?", "D*", "D+", "D{0,}", "D{0,1}", "D{0,5}", "[1a-zA-Z]{0,}", "[1a-zA-Z]{0,2}", "A+") val inputs = Seq("SS", "DD", "SDSDSDS", "DDDD", "DDDDDD", "ABCDEFG") @@ -710,23 +711,27 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { } } - test("AST fuzz test - regexp_find") { + // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed + ignore("AST fuzz test - regexp_find") { doAstFuzzTest(Some(REGEXP_LIMITED_CHARS_FIND), REGEXP_LIMITED_CHARS_FIND, RegexFindMode) } - test("AST fuzz test - regexp_replace") { + // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed + ignore("AST fuzz test - regexp_replace") { doAstFuzzTest(Some(REGEXP_LIMITED_CHARS_REPLACE), REGEXP_LIMITED_CHARS_REPLACE, RegexReplaceMode) } - test("AST fuzz test - regexp_find - full unicode input") { + // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed + ignore("AST fuzz test - regexp_find - full unicode input") { assume(isUnicodeEnabled()) doAstFuzzTest(None, REGEXP_LIMITED_CHARS_REPLACE, RegexFindMode) } - test("AST fuzz test - regexp_replace - full unicode input") { + // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed + ignore("AST fuzz test - regexp_replace - full unicode input") { assume(isUnicodeEnabled()) doAstFuzzTest(None, REGEXP_LIMITED_CHARS_REPLACE, RegexReplaceMode) @@ -736,7 +741,8 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { Charset.defaultCharset().name() == "UTF-8" } - test("AST fuzz test - regexp_find - anchor focused") { + // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed + ignore("AST fuzz test - regexp_find - anchor focused") { doAstFuzzTest(validDataChars = Some("\r\nabc"), validPatternChars = "^$\\AZz\r\n()[]-", mode = RegexFindMode) } @@ -778,7 +784,8 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { } } - test("regexp_split - repetition with {0,n}, or {0,}") { + // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed + ignore("regexp_split - repetition with {0,n}, or {0,}") { // see https://github.com/NVIDIA/spark-rapids/issues/6958 val patterns = Set("ba{0,}", raw"a\02{0,}", "ba{0,2}", raw"b\02{0,10}") val data = Seq("abaa", "baba", "ba\u0002b", "ab\u0002b\u0002a") @@ -832,7 +839,8 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { } } - test("string split fuzz") { + // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed + ignore("string split fuzz") { val (data, patterns) = generateDataAndPatterns(Some(REGEXP_LIMITED_CHARS_REPLACE), REGEXP_LIMITED_CHARS_REPLACE, RegexSplitMode) for (limit <- Seq(-2, -1, 2, 5)) { @@ -840,7 +848,8 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite { } } - test("string split fuzz - anchor focused") { + // Disabling until https://github.com/NVIDIA/spark-rapids/issues/11600 is fixed + ignore("string split fuzz - anchor focused") { val (data, patterns) = generateDataAndPatterns(validDataChars = Some("\r\nabc"), validPatternChars = "^$\\AZz\r\n()", RegexSplitMode) doStringSplitTest(patterns, data, -1)