rename to executor_options

rapidsai · Nov 26, 2024 · a765cbc · a765cbc
1 parent c8af6fc
commit a765cbc
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 4 deletions.
diff --git a/python/cudf_polars/cudf_polars/callback.py b/python/cudf_polars/cudf_polars/callback.py
@@ -218,15 +218,19 @@ def validate_config_options(config: dict) -> None:
     """
     if unsupported := (
         config.keys()
-        - {"raise_on_fail", "parquet_options", "parallel_options", "executor"}
+        - {"raise_on_fail", "parquet_options", "executor", "executor_options"}
     ):
         raise ValueError(
             f"Engine configuration contains unsupported settings: {unsupported}"
         )
     assert {"chunked", "chunk_read_limit", "pass_read_limit"}.issuperset(
         config.get("parquet_options", {})
     )
-    assert {"num_rows_threshold"}.issuperset(config.get("parallel_options", {}))
+    executor = config.get("executor", "pylibcudf")
+    if executor == "dask-experimental":
+        assert {"num_rows_threshold"}.issuperset(config.get("executor_options", {}))
+    elif unsupported := config.get("executor_options", {}):
+        raise ValueError(f"Unsupported executor_options for {executor}: {unsupported}")
 
 
 def execute_with_cudf(nt: NodeTraverser, *, config: GPUEngine) -> None:

diff --git a/python/cudf_polars/cudf_polars/experimental/parallel.py b/python/cudf_polars/cudf_polars/experimental/parallel.py
@@ -272,7 +272,7 @@ def _concat(dfs: Sequence[DataFrame]) -> DataFrame:
 def _(
     ir: DataFrameScan, rec: LowerIRTransformer
 ) -> tuple[IR, MutableMapping[IR, PartitionInfo]]:
-    rows_per_partition = ir.config_options.get("parallel_options", {}).get(
+    rows_per_partition = ir.config_options.get("executor_options", {}).get(
         "num_rows_threshold", 1_000_000
     )
 

diff --git a/python/cudf_polars/tests/experimental/test_dataframescan.py b/python/cudf_polars/tests/experimental/test_dataframescan.py
@@ -28,8 +28,8 @@ def test_parallel_dataframescan(df, num_rows_threshold):
     total_row_count = len(df.collect())
     engine = pl.GPUEngine(
         raise_on_fail=True,
-        parallel_options={"num_rows_threshold": num_rows_threshold},
         executor="dask-experimental",
+        executor_options={"num_rows_threshold": num_rows_threshold},
     )
     assert_gpu_result_equal(df, engine=engine)