From c365dfc39af2e04c372e778160cbced1d31b4516 Mon Sep 17 00:00:00 2001
From: William Ayd <will_ayd@innobi.io>
Date: Sun, 17 Nov 2024 03:07:48 -0500
Subject: [PATCH] CI: update fastparquet xfails for new release (#60337)

(cherry picked from commit 34c080cb91d71a8e900a6c114279e142924c1a64)
---
 pandas/tests/io/test_fsspec.py  |  6 ++++-
 pandas/tests/io/test_gcs.py     |  3 ---
 pandas/tests/io/test_parquet.py | 46 ++++++++++++++++++++++++---------
 3 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py
index 5ed64e3eb0958..cf59e3e4c4934 100644
--- a/pandas/tests/io/test_fsspec.py
+++ b/pandas/tests/io/test_fsspec.py
@@ -5,6 +5,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas import (
     DataFrame,
     date_range,
@@ -168,7 +170,9 @@ def test_excel_options(fsspectest):
     assert fsspectest.test[0] == "read"
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
+@pytest.mark.xfail(
+    using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string) fastparquet"
+)
 def test_to_parquet_new_file(cleared_fs, df1):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index c7671bfb513aa..9fc0f6eb47766 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat.pyarrow import pa_version_under17p0
 
 from pandas import (
@@ -196,7 +194,6 @@ def test_to_csv_compression_encoding_gcs(
     tm.assert_frame_equal(df, read_df)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
 def test_to_parquet_gcs_new_file(monkeypatch, tmpdir):
     """Regression test for writing to a not-yet-existent GCS Parquet file."""
     pytest.importorskip("fastparquet")
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index e43aae6a2e9e7..1965e91941275 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1187,7 +1187,16 @@ def test_infer_string_large_string_type(self, tmp_path, pa):
 
 
 class TestParquetFastParquet(Base):
-    def test_basic(self, fp, df_full):
+    def test_basic(self, fp, df_full, request):
+        import fastparquet
+
+        if Version(fastparquet.__version__) < Version("2024.11.0"):
+            request.applymarker(
+                pytest.mark.xfail(
+                    reason=("datetime_with_nat gets incorrect values"),
+                )
+            )
+
         df = df_full
 
         dti = pd.date_range("20130101", periods=3, tz="US/Eastern")
@@ -1223,11 +1232,17 @@ def test_duplicate_columns(self, fp):
         msg = "Cannot create parquet dataset with duplicate column names"
         self.check_error_on_write(df, fp, ValueError, msg)
 
-    @pytest.mark.xfail(
-        Version(np.__version__) >= Version("2.0.0"),
-        reason="fastparquet uses np.float_ in numpy2",
-    )
-    def test_bool_with_none(self, fp):
+    def test_bool_with_none(self, fp, request):
+        import fastparquet
+
+        if Version(fastparquet.__version__) < Version("2024.11.0") and Version(
+            np.__version__
+        ) >= Version("2.0.0"):
+            request.applymarker(
+                pytest.mark.xfail(
+                    reason=("fastparquet uses np.float_ in numpy2"),
+                )
+            )
         df = pd.DataFrame({"a": [True, None, False]})
         expected = pd.DataFrame({"a": [1.0, np.nan, 0.0]}, dtype="float16")
         # Fastparquet bug in 0.7.1 makes it so that this dtype becomes
@@ -1342,12 +1357,19 @@ def test_empty_dataframe(self, fp):
         expected = df.copy()
         check_round_trip(df, fp, expected=expected)
 
-    @pytest.mark.xfail(
-        _HAVE_FASTPARQUET and Version(fastparquet.__version__) > Version("2022.12"),
-        reason="fastparquet bug, see https://github.com/dask/fastparquet/issues/929",
-    )
-    @pytest.mark.skipif(using_copy_on_write(), reason="fastparquet writes into Index")
-    def test_timezone_aware_index(self, fp, timezone_aware_date_list):
+    def test_timezone_aware_index(self, fp, timezone_aware_date_list, request):
+        import fastparquet
+
+        if Version(fastparquet.__version__) < Version("2024.11.0"):
+            request.applymarker(
+                pytest.mark.xfail(
+                    reason=(
+                        "fastparquet bug, see "
+                        "https://github.com/dask/fastparquet/issues/929"
+                    ),
+                )
+            )
+
         idx = 5 * [timezone_aware_date_list]
 
         df = pd.DataFrame(index=idx, data={"index_as_col": idx})