Skip to content

Commit

Permalink
rename to executor_options
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora committed Nov 26, 2024
1 parent c8af6fc commit a765cbc
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 4 deletions.
8 changes: 6 additions & 2 deletions python/cudf_polars/cudf_polars/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,15 +218,19 @@ def validate_config_options(config: dict) -> None:
"""
if unsupported := (
config.keys()
- {"raise_on_fail", "parquet_options", "parallel_options", "executor"}
- {"raise_on_fail", "parquet_options", "executor", "executor_options"}
):
raise ValueError(
f"Engine configuration contains unsupported settings: {unsupported}"
)
assert {"chunked", "chunk_read_limit", "pass_read_limit"}.issuperset(
config.get("parquet_options", {})
)
assert {"num_rows_threshold"}.issuperset(config.get("parallel_options", {}))
executor = config.get("executor", "pylibcudf")
if executor == "dask-experimental":
assert {"num_rows_threshold"}.issuperset(config.get("executor_options", {}))
elif unsupported := config.get("executor_options", {}):
raise ValueError(f"Unsupported executor_options for {executor}: {unsupported}")


def execute_with_cudf(nt: NodeTraverser, *, config: GPUEngine) -> None:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf_polars/cudf_polars/experimental/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def _concat(dfs: Sequence[DataFrame]) -> DataFrame:
def _(
ir: DataFrameScan, rec: LowerIRTransformer
) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
rows_per_partition = ir.config_options.get("parallel_options", {}).get(
rows_per_partition = ir.config_options.get("executor_options", {}).get(
"num_rows_threshold", 1_000_000
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def test_parallel_dataframescan(df, num_rows_threshold):
total_row_count = len(df.collect())
engine = pl.GPUEngine(
raise_on_fail=True,
parallel_options={"num_rows_threshold": num_rows_threshold},
executor="dask-experimental",
executor_options={"num_rows_threshold": num_rows_threshold},
)
assert_gpu_result_equal(df, engine=engine)

Expand Down

0 comments on commit a765cbc

Please sign in to comment.