diff --git a/integration_tests/src/main/python/ast_test.py b/integration_tests/src/main/python/ast_test.py index 2c06c51a876..94976ea2208 100644 --- a/integration_tests/src/main/python/ast_test.py +++ b/integration_tests/src/main/python/ast_test.py @@ -380,3 +380,9 @@ def test_or(data_gen): f.col('a') | f.lit(True), f.lit(False) | f.col('b'), f.col('a') | f.col('b'))) + +def test_multi_tier_ast(): + assert_gpu_ast( + is_supported=True, + func=lambda spark: spark.range(10).withColumn("x", f.col("id")).repartition(1)\ + .selectExpr("(id < x) == (id < (id + x))")) diff --git a/integration_tests/src/main/python/fastparquet_compatibility_test.py b/integration_tests/src/main/python/fastparquet_compatibility_test.py index a12bd223778..11bc389fb0a 100644 --- a/integration_tests/src/main/python/fastparquet_compatibility_test.py +++ b/integration_tests/src/main/python/fastparquet_compatibility_test.py @@ -124,9 +124,12 @@ def read_with_fastparquet_or_plugin(spark): marks=pytest.mark.xfail(reason="fastparquet reads dates as timestamps.")), pytest.param(DateGen(nullable=False), marks=pytest.mark.xfail(reason="fastparquet reads far future dates (e.g. year=8705) incorrectly.")), - TimestampGen(nullable=False, - start=pandas_min_datetime, - end=pandas_max_datetime), # Vanilla case. + pytest.param(TimestampGen(nullable=False, + start=pandas_min_datetime, + end=pandas_max_datetime), + marks=pytest.mark.skipif(condition=is_not_utc(), + reason="fastparquet interprets timestamps in UTC timezone, regardless " + "of timezone settings")), # Vanilla case. pytest.param(TimestampGen(nullable=False, start=pandas_min_datetime, end=pandas_max_datetime), @@ -201,9 +204,12 @@ def test_reading_file_written_by_spark_cpu(data_gen, spark_tmp_path): marks=pytest.mark.xfail(reason="fastparquet reads dates as timestamps.")), pytest.param(DateGen(nullable=False), marks=pytest.mark.xfail(reason="fastparquet reads far future dates (e.g. year=8705) incorrectly.")), - TimestampGen(nullable=False, - start=pandas_min_datetime, - end=pandas_max_datetime), # Vanilla case. + pytest.param(TimestampGen(nullable=False, + start=pandas_min_datetime, + end=pandas_max_datetime), + marks=pytest.mark.skipif(condition=is_not_utc(), + reason="fastparquet interprets timestamps in UTC timezone, regardless " + "of timezone settings")), # Vanilla case. pytest.param(TimestampGen(nullable=False, start=datetime(1, 2, 1, tzinfo=timezone.utc), end=pandas_min_datetime), diff --git a/integration_tests/src/main/python/string_test.py b/integration_tests/src/main/python/string_test.py index 65865d0bdc1..1e871e85bd5 100644 --- a/integration_tests/src/main/python/string_test.py +++ b/integration_tests/src/main/python/string_test.py @@ -653,10 +653,10 @@ def test_byte_length(): @incompat def test_initcap(): # Because we don't use the same unicode version we need to limit - # the charicter set to something more reasonable + # the character set to something more reasonable # upper and lower should cover the corner cases, this is mostly to # see if there are issues with spaces - gen = mk_str_gen('([aAbB1357ȺéŸ_@%-]{0,15}[ \r\n\t]{1,2}){1,5}') + gen = StringGen('([aAbB1357ȺéŸ_@%-]{0,15}[ \r\n\t]{1,2}){1,5}') assert_gpu_and_cpu_are_equal_collect( lambda spark: unary_op_df(spark, gen).select( f.initcap(f.col('a'))))