Skip to content

Commit

Permalink
Merge branch 'branch-23.12' into fix_test_in_set
Browse files Browse the repository at this point in the history
  • Loading branch information
ttnghia committed Dec 1, 2023
2 parents a628087 + 9b2df3e commit b4e2400
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 8 deletions.
6 changes: 6 additions & 0 deletions integration_tests/src/main/python/ast_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,3 +380,9 @@ def test_or(data_gen):
f.col('a') | f.lit(True),
f.lit(False) | f.col('b'),
f.col('a') | f.col('b')))

def test_multi_tier_ast():
assert_gpu_ast(
is_supported=True,
func=lambda spark: spark.range(10).withColumn("x", f.col("id")).repartition(1)\
.selectExpr("(id < x) == (id < (id + x))"))
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,12 @@ def read_with_fastparquet_or_plugin(spark):
marks=pytest.mark.xfail(reason="fastparquet reads dates as timestamps.")),
pytest.param(DateGen(nullable=False),
marks=pytest.mark.xfail(reason="fastparquet reads far future dates (e.g. year=8705) incorrectly.")),
TimestampGen(nullable=False,
start=pandas_min_datetime,
end=pandas_max_datetime), # Vanilla case.
pytest.param(TimestampGen(nullable=False,
start=pandas_min_datetime,
end=pandas_max_datetime),
marks=pytest.mark.skipif(condition=is_not_utc(),
reason="fastparquet interprets timestamps in UTC timezone, regardless "
"of timezone settings")), # Vanilla case.
pytest.param(TimestampGen(nullable=False,
start=pandas_min_datetime,
end=pandas_max_datetime),
Expand Down Expand Up @@ -201,9 +204,12 @@ def test_reading_file_written_by_spark_cpu(data_gen, spark_tmp_path):
marks=pytest.mark.xfail(reason="fastparquet reads dates as timestamps.")),
pytest.param(DateGen(nullable=False),
marks=pytest.mark.xfail(reason="fastparquet reads far future dates (e.g. year=8705) incorrectly.")),
TimestampGen(nullable=False,
start=pandas_min_datetime,
end=pandas_max_datetime), # Vanilla case.
pytest.param(TimestampGen(nullable=False,
start=pandas_min_datetime,
end=pandas_max_datetime),
marks=pytest.mark.skipif(condition=is_not_utc(),
reason="fastparquet interprets timestamps in UTC timezone, regardless "
"of timezone settings")), # Vanilla case.
pytest.param(TimestampGen(nullable=False,
start=datetime(1, 2, 1, tzinfo=timezone.utc),
end=pandas_min_datetime),
Expand Down
4 changes: 2 additions & 2 deletions integration_tests/src/main/python/string_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,10 +653,10 @@ def test_byte_length():
@incompat
def test_initcap():
# Because we don't use the same unicode version we need to limit
# the charicter set to something more reasonable
# the character set to something more reasonable
# upper and lower should cover the corner cases, this is mostly to
# see if there are issues with spaces
gen = mk_str_gen('([aAbB1357ȺéŸ_@%-]{0,15}[ \r\n\t]{1,2}){1,5}')
gen = StringGen('([aAbB1357ȺéŸ_@%-]{0,15}[ \r\n\t]{1,2}){1,5}')
assert_gpu_and_cpu_are_equal_collect(
lambda spark: unary_op_df(spark, gen).select(
f.initcap(f.col('a'))))
Expand Down

0 comments on commit b4e2400

Please sign in to comment.