You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the bug
json_test.py::test_from_json_struct_timestamp failed on: Part of the plan is not columnar class org.apache.spark.sql.execution.ProjectExec
The failure was reported against JDK11 build, and spark320 shim's IT (JDK11-nightly/504).
FYI: commit from af91522 to 4e57f5f, the previous nightly jar built from commit 328a514 did not report the failure.
/spark-3.2.0-bin-hadoop3.2/python/pyspark/sql/utils.py:117: IllegalArgumentException
----------------------------- Captured stdout call -----------------------------
### CPU RUN ###
### GPU RUN ###
_ test_from_json_struct_timestamp[False-CORRECTED-None-[0-9]{0,2}\\.[0-9]{1,2}(true|false)] _
[gw3] linux -- Python 3.8.10 /usr/bin/python
timestamp_gen = '[0-9]{0,2}\\.[0-9]{1,2}(true|false)', timestamp_format = None
time_parser_policy = 'CORRECTED', ansi_enabled = False
@pytest.mark.parametrize('timestamp_gen', [
# "yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]"
"\"" + optional_whitespace_regex + "[1-8]{1}[0-9]{3}-[0-3]{1,2}-[0-3]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}(\\.[0-9]whitespace_regex + "\"",
# "yyyy-MM-dd"
"\"" + optional_whitespace_regex + "[1-8]{1}[0-9]{3}-[0-3]{1,2}-[0-3]{1,2}" + optional_whitespace_regex + "\"",
# "yyyy-MM"
"\"" + optional_whitespace_regex + "[1-8]{1}[0-9]{3}-[0-3]{1,2}" + optional_whitespace_regex + "\"",
# "yyyy"
"\"" + optional_whitespace_regex + yyyy_start_0001 + optional_whitespace_regex + "\"",
# "dd/MM/yyyy"
"\"" + optional_whitespace_regex + "[0-9]{2}/[0-9]{2}/[1-8]{1}[0-9]{3}" + optional_whitespace_regex + "\"",
# special constant values
pytest.param("\"" + optional_whitespace_regex + "(now|today|tomorrow|epoch)" + optional_whitespace_regex + "\"", l(condition=is_before_spark_320(), reason="https://github.com/NVIDIA/spark-rapids/issues/9724")),
# "nnnnn" (number of days since epoch prior to Spark 3.4, throws exception from 3.4)
pytest.param("\"" + optional_whitespace_regex + "[0-9]{5}" + optional_whitespace_regex + "\"", (reason="https://github.com/NVIDIA/spark-rapids/issues/9664")),
# integral
pytest.param("[0-9]{1,5}", marks=pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/9588")),
pytest.param("[1-9]{1,8}", marks=pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/4940")),
# floating-point
"[0-9]{0,2}\.[0-9]{1,2}"
# boolean
"(true|false)"
])
@pytest.mark.parametrize('timestamp_format', [
# Even valid timestamp format, CPU fallback happens still since non UTC is not supported for json.
pytest.param(None, marks=pytest.mark.allow_non_gpu(*non_utc_project_allow)),
# https://github.com/NVIDIA/spark-rapids/issues/9723
pytest.param("yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]", marks=pytest.mark.allow_non_gpu('ProjectExec')),
pytest.param("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", marks=pytest.mark.allow_non_gpu('ProjectExec')),
pytest.param("dd/MM/yyyy'T'HH:mm:ss[.SSS][XXX]", marks=pytest.mark.allow_non_gpu('ProjectExec')),
])
@pytest.mark.parametrize('time_parser_policy', [
pytest.param("LEGACY", marks=pytest.mark.allow_non_gpu('ProjectExec')),
"CORRECTED"
])
@pytest.mark.parametrize('ansi_enabled', [ True, False ])
def test_from_json_struct_timestamp(timestamp_gen, timestamp_format, time_parser_policy, ansi_enabled):
json_string_gen = StringGen(r'{ "a": ' + timestamp_gen + ' }') \
.with_special_case('{ "a": null }') \
.with_special_case('{ "a": "6395-12-21T56:86:40.205705Z" }') \
.with_special_case('null')
options = { 'timestampFormat': timestamp_format } if timestamp_format else { }
> assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, json_string_gen) \
.select(f.col('a'), f.from_json('a', 'struct<a:timestamp>', options)),
conf={"spark.rapids.sql.expression.JsonToStructs": True,
'spark.sql.legacy.timeParserPolicy': time_parser_policy,
'spark.sql.ansi.enabled': ansi_enabled })
../../src/main/python/json_test.py:750:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../../src/main/python/asserts.py:595: in assert_gpu_and_cpu_are_equal_collect
_assert_gpu_and_cpu_are_equal(func, 'COLLECT', conf=conf, is_cpu_first=is_cpu_first, nc_before_compare=result_canonicalize_func_before_compare)
../../src/main/python/asserts.py:503: in _assert_gpu_and_cpu_are_equal
from_gpu = run_on_gpu()
../../src/main/python/asserts.py:496: in run_on_gpu
from_gpu = with_gpu_session(bring_back, conf=conf)
../../src/main/python/spark_session.py:164: in with_gpu_session
return with_spark_session(func, conf=copy)
/usr/lib/python3.8/contextlib.py:75: in inner
return func(*args, **kwds)
../../src/main/python/spark_session.py:131: in with_spark_session
ret = func(_spark)
../../src/main/python/asserts.py:205: in <lambda>
bring_back = lambda spark: limit_func(spark).collect()
/spark-3.2.0-bin-hadoop3.2/python/pyspark/sql/dataframe.py:693: in collect
sock_info = self._jdf.collectToPython()
/spark-3.2.0-bin-hadoop3.2/python/lib/py4j-0.10.9.2-src.zip/py4j/java_gateway.py:1309: in __call__
return_value = get_return_value(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
a = ('xro1562019', <py4j.clientserver.JavaClient object at 0x7fbb3edc3f40>, 'o1562018', 'collectToPython')
kw = {}
converted = IllegalArgumentException('Part of the plan is not columnar class xecution.ProjectExec\nProject [...ntServerConnection.run(ClientServerConnection.java:106)\n\tat java.base/hread.java:829)\n', None)
def deco(*a, **kw):
try:
return f(*a, **kw)
except py4j.protocol.Py4JJavaError as e:
converted = convert_exception(e.java_exception)
if not isinstance(converted, UnknownException):
# Hide where the exception came from that shows a non-Pythonic
# JVM exception message.
> raise converted from None
E pyspark.sql.utils.IllegalArgumentException: Part of the plan is not columnar class xecution.ProjectExec
E Project [a#143786, from_json(StructField(a,TimestampType,true), a#143786, Some(UTC)) AS from_json(a)#143788]
E +- Scan ExistingRDD[a#143786]
/spark-3.2.0-bin-hadoop3.2/python/pyspark/sql/utils.py:117: IllegalArgumentException
FAILED ../../src/main/python/json_test.py::test_from_json_struct_timestamp[True-CORRECTED-None-"[ \t\xa0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]?[1-8]{1}[0-9]{3}-[0-3]{1,2}-[0-3]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}(\\.[0-9]{1,6})?Z?[ \t\xa0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]?"][DATAGEN_SEED=1704853794] - pyspark.sql.utils.IllegalArgumentException: Part of the plan is not columnar class org.apache.spark.sql.execution.ProjectExec
Project [a#142986, from_json(StructField(a,TimestampType,true), a#142986, Some(UTC)) AS from_json(a)#142988]
+- Scan ExistingRDD[a#142986]
FAILED ../../src/main/python/json_test.py::test_from_json_struct_timestamp[True-CORRECTED-None-"[ \t\xa0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]?[1-8]{1}[0-9]{3}-[0-3]{1,2}-[0-3]{1,2}[ \t\xa0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]?"][DATAGEN_SEED=1704853794, INJECT_OOM] - pyspark.sql.utils.IllegalArgumentException: Part of the plan is not columnar class org.apache.spark.sql.execution.ProjectExec
......
The text was updated successfully, but these errors were encountered:
24/01/10 06:26:38 WARN GpuOverrides:
!Exec <ProjectExec> cannot run on GPU because not all expressions can be replaced
@Expression <AttributeReference> a#1165 could run on GPU
@Expression <Alias> from_json(StructField(a,TimestampType,true), a#1165, Some(UTC)) AS from_json(a)#1167 could run on GPU
!Expression <JsonToStructs> from_json(StructField(a,TimestampType,true), a#1165, Some(UTC)) cannot run on GPU because Unsupported timestampFormat: yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]
@Expression <AttributeReference> a#1165 could run on GPU
! <RDDScanExec> cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec
@Expression <AttributeReference> a#1165 could run on GPU
Describe the bug
json_test.py::test_from_json_struct_timestamp failed on: Part of the plan is not columnar class org.apache.spark.sql.execution.ProjectExec
The failure was reported against JDK11 build, and spark320 shim's IT (JDK11-nightly/504).
FYI: commit from
af91522
to4e57f5f
, the previous nightly jar built from commit328a514
did not report the failure.The text was updated successfully, but these errors were encountered: