From 29663aa2a41a8cda1729c2afc23f781d0fcbca90 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 20 Feb 2024 09:20:44 +0000 Subject: [PATCH] cln --- demo.py | 31 ------------------------- f.py | 59 ------------------------------------------------ t.py | 70 --------------------------------------------------------- 3 files changed, 160 deletions(-) delete mode 100644 demo.py delete mode 100644 f.py delete mode 100644 t.py diff --git a/demo.py b/demo.py deleted file mode 100644 index 62ac4f46c..000000000 --- a/demo.py +++ /dev/null @@ -1,31 +0,0 @@ -# ruff: noqa -import polars as pl -from great_tables.data import sp500 - -from puffin import to_original_object -from puffin import to_polars_api - -# Define the start and end dates for the data range -start_date = "2010-06-07" -end_date = "2010-06-14" - -# Filter sp500 using Pandas to dates between `start_date` and `end_date` -# sp500_mini = sp500[(sp500["date"] >= start_date) & (sp500["date"] <= end_date)] - - -def dataframe_agnostic_filter(df_raw, start_date, end_date): - # opt-in to Polars API - df, pl = to_polars_api(df_raw, version="0.20") - - # Use (supported subset of) Polars API - df = df.filter( - pl.col("date") >= start_date, - pl.col("date") <= end_date, - ) - - # Return underlying dataframe (same class passed by user) - return to_original_object(df) - - -sp500_mini = dataframe_agnostic_filter(sp500, start_date, end_date) -print(pl.from_pandas(sp500_mini)) diff --git a/f.py b/f.py deleted file mode 100644 index 75cec5bbd..000000000 --- a/f.py +++ /dev/null @@ -1,59 +0,0 @@ -# ruff: noqa -from typing import TypeVar -import pandas as pd -import polars as pl - -from puffin import to_polars_api, to_original_object - -AnyDataFrame = TypeVar("AnyDataFrame") - - -def my_agnostic_function( - suppliers_native: AnyDataFrame, - parts_native: AnyDataFrame, -) -> AnyDataFrame: - suppliers, pl = to_polars_api(suppliers_native, version="0.20") - parts, _ = to_polars_api(parts_native, version="0.20") - result = ( - suppliers.join(parts, left_on="city", right_on="city") - .filter( - pl.col("color").is_in(["Red", "Green"]), - pl.col("weight") > 14, - ) - .group_by("s", "p") - .agg( - weight_mean=pl.col("weight").mean(), - weight_max=pl.col("weight").max(), - ) - ) - return to_original_object(result.collect()) - - -suppliers = { - "s": ["S1", "S2", "S3", "S4", "S5"], - "sname": ["Smith", "Jones", "Blake", "Clark", "Adams"], - "status": [20, 10, 30, 20, 30], - "city": ["London", "Paris", "Paris", "London", "Athens"], -} -parts = { - "p": ["P1", "P2", "P3", "P4", "P5", "P6"], - "pname": ["Nut", "Bolt", "Screw", "Screw", "Cam", "Cog"], - "color": ["Red", "Green", "Blue", "Red", "Blue", "Red"], - "weight": [12.0, 17.0, 17.0, 14.0, 12.0, 19.0], - "city": ["London", "Paris", "Oslo", "London", "Paris", "London"], -} - -print("pandas output:") -print( - my_agnostic_function( - pd.DataFrame(suppliers), - pd.DataFrame(parts), - ) -) -print("\nPolars output:") -print( - my_agnostic_function( - pl.LazyFrame(suppliers), - pl.LazyFrame(parts), - ) -) diff --git a/t.py b/t.py deleted file mode 100644 index 9b2445797..000000000 --- a/t.py +++ /dev/null @@ -1,70 +0,0 @@ -# ruff: noqa -import polars -import pandas as pd - -import puffin - -df_raw = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6]}) -df, pl = puffin.to_polars_api(df_raw, version="0.20") -df_raw_2 = pd.DataFrame({"a": [1, 3], "c": [7, 9]}) -df2, pl = puffin.to_polars_api(df_raw_2, version="0.20") - -result = df.sort("a", "b") -print(result.dataframe) - -result = df.filter(pl.col("a") > 1) -print(result.dataframe) - -result = df.with_columns( - c=pl.col("a") + pl.col("b"), - d=pl.col("a") - pl.col("a").mean(), -) -print(result.dataframe) -result = df.with_columns(pl.all() * 2) -print(result.dataframe) - -result = df.with_columns(horizonal_sum=pl.sum_horizontal(pl.col("a"), pl.col("b"))) -print(result.dataframe) -result = df.with_columns(horizonal_sum=pl.sum_horizontal("a", pl.col("b"))) -print(result.dataframe) - - -result = df.select(pl.all().sum()) -print(result.dataframe) -result = df.select(pl.col("a", "b") * 2) -print(result.dataframe) - -# TODO! -# result = ( -# df.collect() -# .group_by("b") -# .agg( -# pl.all().sum(), -# ) -# ) -# print(result.dataframe) - -result = ( - df.collect() - .group_by("b") - .agg( - pl.col("a").sum(), - simple=pl.col("a").sum(), - complex=(pl.col("a") + 1).sum(), - other=pl.sum("a"), - ) -) -print(result.dataframe) - -result = df.join(df2, left_on="a", right_on="a") -print(result.dataframe) - - -print(puffin.containers.is_pandas(df_raw)) -# print(puffin.containers.is_pandas(polars.from_pandas(df_raw))) -print(puffin.containers.is_polars(df_raw)) -# print(puffin.containers.is_polars(polars.from_pandas(df_raw))) - - -result = df.rename({"a": "a_new", "b": "b_new"}) -print(result.dataframe)