Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Register read_parquet and read_csv with dask-expr #16535

Merged
merged 12 commits into from
Aug 13, 2024
35 changes: 35 additions & 0 deletions python/dask_cudf/dask_cudf/backends.py
Original file line number Diff line number Diff line change
@@ -667,6 +667,41 @@ def from_dict(
constructor=constructor,
)

@classmethod
rjzamora marked this conversation as resolved.
Show resolved Hide resolved
def read_parquet(cls, *args, engine=None, **kwargs):
rjzamora marked this conversation as resolved.
Show resolved Hide resolved
import dask_expr as dx

from dask_cudf.io.parquet import CudfEngine

return _default_backend(
dx.read_parquet, *args, engine=CudfEngine, **kwargs
)

@staticmethod
def read_csv(
path,
*args,
header="infer",
dtype_backend=None,
storage_options=None,
**kwargs,
):
import dask_expr as dx
from fsspec.utils import stringify_path

if not isinstance(path, str):
path = stringify_path(path)
return dx.new_collection(
dx.io.csv.ReadCSV(
path,
dtype_backend=dtype_backend,
storage_options=storage_options,
kwargs=kwargs,
header=header,
dataframe_backend="cudf",
)
)

@staticmethod
def read_json(*args, **kwargs):
from dask_cudf.io.json import read_json as read_json_impl
Loading