Skip to content

Commit

Permalink
Introduce dedicated options for low memory readers (rapidsai#16289)
Browse files Browse the repository at this point in the history
This PR disables low memory readers by default in `cudf.pandas` and instead gives a provision to enable them with dedicated options.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: rapidsai#16289
  • Loading branch information
galipremsagar authored Jul 16, 2024
1 parent a6de6cc commit 3418f91
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 4 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/json.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ cpdef read_json(object filepaths_or_buffers,
else:
raise TypeError("`dtype` must be 'list like' or 'dict'")

if cudf.get_option("mode.pandas_compatible") and lines:
if cudf.get_option("io.json.low_memory") and lines:
res_cols, res_col_names, res_child_names = plc.io.json.chunked_read_json(
plc.io.SourceInfo(filepaths_or_buffers),
processed_dtypes,
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@ def _read_parquet(
"cudf engine doesn't support the "
f"following positional arguments: {list(args)}"
)
if cudf.get_option("mode.pandas_compatible"):
if cudf.get_option("io.parquet.low_memory"):
return libparquet.ParquetReader(
filepaths_or_buffers,
columns=columns,
Expand Down
26 changes: 26 additions & 0 deletions python/cudf/cudf/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,32 @@ def _integer_and_none_validator(val):
_make_contains_validator([False, True]),
)

_register_option(
"io.parquet.low_memory",
False,
textwrap.dedent(
"""
If set to `False`, reads entire parquet in one go.
If set to `True`, reads parquet file in chunks.
\tValid values are True or False. Default is False.
"""
),
_make_contains_validator([False, True]),
)

_register_option(
"io.json.low_memory",
False,
textwrap.dedent(
"""
If set to `False`, reads entire json in one go.
If set to `True`, reads json file in chunks.
\tValid values are True or False. Default is False.
"""
),
_make_contains_validator([False, True]),
)


class option_context(ContextDecorator):
"""
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -1441,6 +1441,6 @@ def test_chunked_json_reader():
df.to_json(buf, lines=True, orient="records", engine="cudf")
buf.seek(0)
df = df.to_pandas()
with cudf.option_context("mode.pandas_compatible", True):
with cudf.option_context("io.json.low_memory", True):
gdf = cudf.read_json(buf, lines=True)
assert_eq(df, gdf)
2 changes: 1 addition & 1 deletion python/cudf/cudf/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3772,6 +3772,6 @@ def test_parquet_reader_pandas_compatibility():
)
buffer = BytesIO()
df.to_parquet(buffer)
with cudf.option_context("mode.pandas_compatible", True):
with cudf.option_context("io.parquet.low_memory", True):
expected = cudf.read_parquet(buffer)
assert_eq(expected, df)

0 comments on commit 3418f91

Please sign in to comment.