From 7804ba7f817b3fccf13b0084e2d7e0ac2257ff5a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 22 Apr 2024 18:09:58 -1000 Subject: [PATCH] Preserve RangeIndex.step in to_arrow/from_arrow (#15581) Noticed that step was hardcoded to `1` when it should reflect `RangeIndex.step` Authors: - Matthew Roeschke (https://github.com/mroeschke) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/15581 --- python/cudf/cudf/core/dataframe.py | 15 ++++++++------- python/cudf/cudf/tests/test_dataframe.py | 10 +++++++++- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 7b7fc87a6dc..45bb66d5d4b 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -5466,10 +5466,12 @@ def from_arrow(cls, table): out._data._level_names = col_index_names if index_col: if isinstance(index_col[0], dict): + range_meta = index_col[0] idx = cudf.RangeIndex( - index_col[0]["start"], - index_col[0]["stop"], - name=index_col[0]["name"], + start=range_meta["start"], + stop=range_meta["stop"], + step=range_meta["step"], + name=range_meta["name"], ) if len(idx) == len(out): # `idx` is generated from arrow `pandas_metadata` @@ -5550,9 +5552,9 @@ def to_arrow(self, preserve_index=None): { "kind": "range", "name": index.name, - "start": index._start, - "stop": index._stop, - "step": 1, + "start": index.start, + "stop": index.stop, + "step": index.step, } ] else: @@ -5574,7 +5576,6 @@ def to_arrow(self, preserve_index=None): ) out = super(DataFrame, data).to_arrow() - # import pdb; pdb.set_trace() metadata = pa.pandas_compat.construct_metadata( columns_to_convert=[self[col] for col in self._data.names], df=self, diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index df0e22c5e43..59e8b41e51a 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -2770,7 +2770,15 @@ def test_arrow_pandas_compat(pdf, gdf, preserve_index): @pytest.mark.parametrize( - "index", [None, cudf.RangeIndex(3, name="a"), "a", "b", ["a", "b"]] + "index", + [ + None, + cudf.RangeIndex(3, name="a"), + "a", + "b", + ["a", "b"], + cudf.RangeIndex(0, 5, 2, name="a"), + ], ) @pytest.mark.parametrize("preserve_index", [True, False, None]) def test_arrow_round_trip(preserve_index, index):