From 304ab3529a17db474a8be8827c8b6ac85a74f18d Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 27 Mar 2024 16:49:52 +0000 Subject: [PATCH] update a bit --- tpch/notebooks/gpu/execute.ipynb | 2 +- tpch/notebooks/q1/execute.ipynb | 2 +- tpch/notebooks/q2/execute.ipynb | 10 ++++++---- tpch/notebooks/q3/execute.ipynb | 9 +++++---- tpch/notebooks/q4/execute.ipynb | 9 +++++---- tpch/notebooks/q5/execute.ipynb | 2 +- tpch/notebooks/q6/execute.ipynb | 13 ++++++------- tpch/notebooks/q7/execute.ipynb | 8 ++++---- 8 files changed, 29 insertions(+), 26 deletions(-) diff --git a/tpch/notebooks/gpu/execute.ipynb b/tpch/notebooks/gpu/execute.ipynb index f11580ae1..87ba3ae7c 100755 --- a/tpch/notebooks/gpu/execute.ipynb +++ b/tpch/notebooks/gpu/execute.ipynb @@ -22,7 +22,7 @@ }, "outputs": [], "source": [ - "!pip install -U narwhals>=0.7.1" + "!pip install -U narwhals>=0.7.2" ] }, { diff --git a/tpch/notebooks/q1/execute.ipynb b/tpch/notebooks/q1/execute.ipynb index 90bac970b..50d0c073f 100755 --- a/tpch/notebooks/q1/execute.ipynb +++ b/tpch/notebooks/q1/execute.ipynb @@ -47,7 +47,7 @@ } ], "source": [ - "!pip install -U pandas polars pyarrow narwhals>=0.7.1" + "!pip install -U polars pyarrow narwhals>=0.7.2 && pip uninstall pandas -y && pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas" ] }, { diff --git a/tpch/notebooks/q2/execute.ipynb b/tpch/notebooks/q2/execute.ipynb index b178d2718..4ad33bd93 100755 --- a/tpch/notebooks/q2/execute.ipynb +++ b/tpch/notebooks/q2/execute.ipynb @@ -47,7 +47,7 @@ } ], "source": [ - "!pip install -U pandas polars pyarrow narwhals>=0.7.1" + "!pip install -U polars pyarrow narwhals>=0.7.2 && pip uninstall pandas -y && pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas" ] }, { @@ -126,9 +126,11 @@ " .join(supplier_ds, left_on=\"ps_suppkey\", right_on=\"s_suppkey\")\n", " .join(nation_ds, left_on=\"s_nationkey\", right_on=\"n_nationkey\")\n", " .join(region_ds, left_on=\"n_regionkey\", right_on=\"r_regionkey\")\n", - " .filter(nw.col(\"p_size\") == var_1)\n", - " .filter(nw.col(\"p_type\").str.ends_with(var_2))\n", - " .filter(nw.col(\"r_name\") == var_3)\n", + " .filter(\n", + " nw.col(\"p_size\") == var_1,\n", + " nw.col(\"p_type\").str.ends_with(var_2),\n", + " nw.col(\"r_name\") == var_3,\n", + " )\n", " )\n", "\n", " final_cols = [\n", diff --git a/tpch/notebooks/q3/execute.ipynb b/tpch/notebooks/q3/execute.ipynb index 856315a10..51823c7c5 100755 --- a/tpch/notebooks/q3/execute.ipynb +++ b/tpch/notebooks/q3/execute.ipynb @@ -47,7 +47,7 @@ } ], "source": [ - "!pip install -U pandas polars pyarrow narwhals>=0.7.1" + "!pip install -U polars pyarrow narwhals>=0.7.2 && pip uninstall pandas -y && pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas" ] }, { @@ -120,9 +120,10 @@ " customer_ds.filter(nw.col(\"c_mktsegment\") == var_3)\n", " .join(orders_ds, left_on=\"c_custkey\", right_on=\"o_custkey\")\n", " .join(line_item_ds, left_on=\"o_orderkey\", right_on=\"l_orderkey\")\n", - " .filter(nw.col(\"o_orderdate\") < var_2)\n", - " .filter(nw.col(\"l_shipdate\") > var_1)\n", - " .with_columns(\n", + " .filter(\n", + " nw.col(\"o_orderdate\") < var_2,\n", + " nw.col(\"l_shipdate\") > var_1,\n", + " ).with_columns(\n", " (nw.col(\"l_extendedprice\") * (1 - nw.col(\"l_discount\"))).alias(\"revenue\")\n", " )\n", " .group_by([\"o_orderkey\", \"o_orderdate\", \"o_shippriority\"])\n", diff --git a/tpch/notebooks/q4/execute.ipynb b/tpch/notebooks/q4/execute.ipynb index 6b3d29e73..44881bfef 100755 --- a/tpch/notebooks/q4/execute.ipynb +++ b/tpch/notebooks/q4/execute.ipynb @@ -47,7 +47,7 @@ } ], "source": [ - "!pip install -U pandas polars pyarrow narwhals>=0.7.1" + "!pip install -U polars pyarrow narwhals>=0.7.2 && pip uninstall pandas -y && pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas" ] }, { @@ -116,9 +116,10 @@ "\n", " result = (\n", " line_item_ds.join(orders_ds, left_on=\"l_orderkey\", right_on=\"o_orderkey\")\n", - " .filter(nw.col(\"o_orderdate\").is_between(var_1, var_2, closed=\"left\"))\n", - " .filter(nw.col(\"l_commitdate\") < nw.col(\"l_receiptdate\"))\n", - " .unique(subset=[\"o_orderpriority\", \"l_orderkey\"])\n", + " .filter(\n", + " nw.col(\"o_orderdate\").is_between(var_1, var_2, closed=\"left\"),\n", + " nw.col(\"l_commitdate\") < nw.col(\"l_receiptdate\"),\n", + " ).unique(subset=[\"o_orderpriority\", \"l_orderkey\"])\n", " .group_by(\"o_orderpriority\")\n", " .agg(nw.len().alias(\"order_count\"))\n", " .sort(by=\"o_orderpriority\")\n", diff --git a/tpch/notebooks/q5/execute.ipynb b/tpch/notebooks/q5/execute.ipynb index ac5f40428..57f065725 100755 --- a/tpch/notebooks/q5/execute.ipynb +++ b/tpch/notebooks/q5/execute.ipynb @@ -47,7 +47,7 @@ } ], "source": [ - "!pip install -U pandas polars pyarrow narwhals>=0.7.1" + "!pip install -U polars pyarrow narwhals>=0.7.2 && pip uninstall pandas -y && pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas" ] }, { diff --git a/tpch/notebooks/q6/execute.ipynb b/tpch/notebooks/q6/execute.ipynb index 95323f67b..5cb9e5d65 100755 --- a/tpch/notebooks/q6/execute.ipynb +++ b/tpch/notebooks/q6/execute.ipynb @@ -47,7 +47,7 @@ } ], "source": [ - "!pip install -U pandas polars pyarrow narwhals>=0.7.1" + "!pip install -U polars pyarrow narwhals>=0.7.2 && pip uninstall pandas -y && pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas" ] }, { @@ -113,11 +113,10 @@ "\n", " result = (\n", " line_item_ds.filter(\n", - " nw.col(\"l_shipdate\").is_between(var_1, var_2, closed=\"left\")\n", - " )\n", - " .filter(nw.col(\"l_discount\").is_between(0.05, 0.07))\n", - " .filter(nw.col(\"l_quantity\") < var_3)\n", - " .with_columns(\n", + " nw.col(\"l_shipdate\").is_between(var_1, var_2, closed=\"left\"),\n", + " nw.col(\"l_discount\").is_between(0.05, 0.07),\n", + " nw.col(\"l_quantity\") < var_3,\n", + " ).with_columns(\n", " (nw.col(\"l_extendedprice\") * nw.col(\"l_discount\")).alias(\"revenue\")\n", " )\n", " .select(nw.sum(\"revenue\"))\n", @@ -446,7 +445,7 @@ "source": [ "tool = 'polars[lazy]'\n", "fn = IO_FUNCS[tool]\n", - "timings = %timeit -o q6(fn(lineitem))\n", + "timings = %timeit -o q6(fn(lineitem)).collect()\n", "results[tool] = timings.best" ] }, diff --git a/tpch/notebooks/q7/execute.ipynb b/tpch/notebooks/q7/execute.ipynb index bcae5a969..5ce34e8b5 100755 --- a/tpch/notebooks/q7/execute.ipynb +++ b/tpch/notebooks/q7/execute.ipynb @@ -47,7 +47,7 @@ } ], "source": [ - "!pip install -U pandas polars pyarrow narwhals>=0.7.1" + "!pip install -U polars pyarrow narwhals>=0.7.2 && pip uninstall pandas -y && pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pandas" ] }, { @@ -120,8 +120,8 @@ " n1 = nation_ds.filter(nw.col(\"n_name\") == \"FRANCE\")\n", " n2 = nation_ds.filter(nw.col(\"n_name\") == \"GERMANY\")\n", "\n", - " var_1 = date(1995, 1, 1)\n", - " var_2 = date(1996, 12, 31)\n", + " var_1 = datetime(1995, 1, 1)\n", + " var_2 = datetime(1996, 12, 31)\n", "\n", " df1 = (\n", " customer_ds.join(n1, left_on=\"c_nationkey\", right_on=\"n_nationkey\")\n", @@ -476,7 +476,7 @@ "source": [ "tool = 'polars[lazy]'\n", "fn = IO_FUNCS[tool]\n", - "timings = %timeit -o q7(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier))\n", + "timings = %timeit -o q7(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)).collect()\n", "results[tool] = timings.best" ] },