Skip to content

Commit

Permalink
raise notimplemented for chunked parquet reader nrows/skiprows
Browse files Browse the repository at this point in the history
  • Loading branch information
lithomas1 committed Jul 8, 2024
1 parent 56c88ed commit f52b606
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 4 deletions.
16 changes: 12 additions & 4 deletions python/cudf/cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,10 +911,6 @@ def _read_parquet(
*args,
**kwargs,
):
if nrows is None:
nrows = -1
if skip_rows is None:
skip_rows = 0
# Simple helper function to dispatch between
# cudf and pyarrow to read parquet data
if engine == "cudf":
Expand All @@ -933,13 +929,25 @@ def _read_parquet(
# (It's not super important now since pandas doesn't support it ATM,
# but may be relevant in the future)
# xref https://github.com/pandas-dev/pandas/issues/51830
if nrows is not None:
raise NotImplementedError(
"pandas compatibility mode doesn't support nrows in read_parquet"
)
if skip_rows is not None:
raise NotImplementedError(
"pandas compatibility mode doesn't support skip_rows in read_parquet"
)
return libparquet.ParquetReader(
filepaths_or_buffers,
columns=columns,
row_groups=row_groups,
use_pandas_metadata=use_pandas_metadata,
).read()
else:
if nrows is None:
nrows = -1
if skip_rows is None:
skip_rows = 0
return libparquet.read_parquet(
filepaths_or_buffers,
columns=columns,
Expand Down
16 changes: 16 additions & 0 deletions python/cudf/cudf/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3498,6 +3498,22 @@ def test_parquet_reader_pandas_compatibility():
assert_eq(expected, df)


@pytest.mark.parametrize(
"kwargs",
[
{"skip_rows": 0},
{"nrows": 1},
],
)
def test_parquet_reader_pandas_compatibility_unsupported(kwargs):
df = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["av", "qw", "hi", "xyz"]})
buffer = BytesIO()
df.to_parquet(buffer)
with cudf.option_context("mode.pandas_compatible", True):
with pytest.raises(NotImplementedError):
cudf.read_parquet(buffer, **kwargs)


@pytest.mark.parametrize("row_group_size", [1, 4, 33])
def test_parquet_read_rows(tmpdir, pdf, row_group_size):
if len(pdf) > 100:
Expand Down
6 changes: 6 additions & 0 deletions python/cudf/cudf/utils/ioutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,15 @@
This parameter is functional only when `use_python_file_object=False`.
skiprows : int, default None
If not None, the number of rows to skip from the start of the file.
.. pandas-compat::
This option is not supported when pandas compatibility mode is on.
nrows : int, default None
If not None, the total number of rows to read.
.. pandas-compat::
This option is not supported when pandas compatibility mode is on.
Returns
-------
DataFrame
Expand Down

0 comments on commit f52b606

Please sign in to comment.