diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..4c75d5c11 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,128 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +https://www.linkedin.com/in/marcogorelli/. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 000000000..b9e2a9336 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024, Marco Gorelli + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/demo.py b/demo.py new file mode 100644 index 000000000..24f009e8b --- /dev/null +++ b/demo.py @@ -0,0 +1,33 @@ +# ruff: noqa +# type: ignore +from typing import Any +import polars as pl +# import modin.pandas as mpd + +import narwhals as nw + + +def func(df_raw): + df = nw.DataFrame(df_raw) + res = df.with_columns( + d=nw.col("a") + 1, + e=nw.col("a") + nw.col("b"), + ) + res = res.group_by(["a"]).agg( + nw.col("b").sum(), + d=nw.col("c").sum(), + # e=nw.len(), + ) + return nw.to_native(res) + + +import pandas as pd + +df = pd.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) +print(func(df)) +# df = mpd.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) +# print(func(df)) +df = pl.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) +print(func(df)) +df = pl.LazyFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) +print(func(df).collect()) diff --git a/design.md b/design.md new file mode 100644 index 000000000..d343e4074 --- /dev/null +++ b/design.md @@ -0,0 +1,31 @@ +# Design + +Let's do this differently. + +Here's what I'd like to get to: + +import narwhals as nw +from narwhals.translate import ( + translate_frame, + translate_series, + to_native, +) + +dfpd = ... +df = nw.DataFrame(df_any) + +df = df.with_columns(c = nw.col('a') + nw.col('b')) + +result = to_native(df) + +--- + +we need to just have a single class. can't have all this nonsense... + +then, we don't even need a spec... + +we can still define entrypoints though? + +--- + +where should extract native happen? diff --git a/f.py b/f.py new file mode 100644 index 000000000..a2f4835b4 --- /dev/null +++ b/f.py @@ -0,0 +1,55 @@ +# ruff: noqa +# type: ignore +import pandas as pd +import polars as pl + +import narwhals as nw + + +def my_agnostic_function( + suppliers_native, + parts_native, +): + suppliers = nw.LazyFrame(suppliers_native) + parts = nw.LazyFrame(parts_native) + + result = ( + suppliers.join(parts, left_on="city", right_on="city") + .filter(nw.col("weight") > 10) + .group_by("s") + .agg( + weight_mean=nw.col("weight").mean(), + weight_max=nw.col("weight").max(), + ) + ) + return nw.to_native(result) + + +suppliers = { + "s": ["S1", "S2", "S3", "S4", "S5"], + "sname": ["Smith", "Jones", "Blake", "Clark", "Adams"], + "status": [20, 10, 30, 20, 30], + "city": ["London", "Paris", "Paris", "London", "Athens"], +} +parts = { + "p": ["P1", "P2", "P3", "P4", "P5", "P6"], + "pname": ["Nut", "Bolt", "Screw", "Screw", "Cam", "Cog"], + "color": ["Red", "Green", "Blue", "Red", "Blue", "Red"], + "weight": [12.0, 17.0, 17.0, 14.0, 12.0, 19.0], + "city": ["London", "Paris", "Oslo", "London", "Paris", "London"], +} + +print("pandas output:") +print( + my_agnostic_function( + pd.DataFrame(suppliers), + pd.DataFrame(parts), + ) +) +print("\nPolars output:") +print( + my_agnostic_function( + pl.LazyFrame(suppliers), + pl.LazyFrame(parts), + ).collect() +) diff --git a/t.py b/t.py new file mode 100644 index 000000000..7ab0e9efc --- /dev/null +++ b/t.py @@ -0,0 +1,131 @@ +# ruff: noqa +# type: ignore +import polars +import pandas as pd +import polars as pl + +import narwhals as nw + +df_raw = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) +df = nw.LazyFrame(df_raw) +df_raw_2 = pd.DataFrame({"a": [1, 3], "c": [7, 9]}) +df2 = nw.LazyFrame(df_raw_2) + +result = df.sort("a", "b") +print(nw.to_native(result)) + +result = df.filter(nw.col("a") > 1) +print(nw.to_native(result)) + +result = df.with_columns( + c=nw.col("a") + nw.col("b"), + d=nw.col("a") - nw.col("a").mean(), +) +print(nw.to_native(result)) +result = df.with_columns(nw.all() * 2) +print(nw.to_native(result)) + +result = df.with_columns(horizonal_sum=nw.sum_horizontal(nw.col("a"), nw.col("b"))) +print(nw.to_native(result)) +result = df.with_columns(horizonal_sum=nw.sum_horizontal("a", nw.col("b"))) +print(nw.to_native(result)) + + +result = df.select(nw.all().sum()) +print(nw.to_native(result)) +result = df.select(nw.col("a", "b") * 2) +print(nw.to_native(result)) + +# # TODO! +# # result = ( +# # df.collect() +# # .group_by("b") +# # .agg( +# # nw.all().sum(), +# # ) +# # ) +# # print(nw.to_native(result)) + +result = ( + df.collect() + .group_by("b") + .agg( + nw.col("a").sum(), + simple=nw.col("a").sum(), + complex=(nw.col("a") + 1).sum(), + other=nw.sum("a"), + ) +) +print(nw.to_native(result)) +print("multiple simple") +result = ( + df.collect() + .group_by("b") + .agg( + nw.col("a", "z").sum(), + ) +) +print(nw.to_native(result)) + +result = df.join(df2, left_on="a", right_on="a") +print(nw.to_native(result)) + + +result = df.rename({"a": "a_new", "b": "b_new"}) +print(nw.to_native(result)) + +result = df.collect().to_dict() +print(result) +print(polars.from_pandas(nw.to_native(df)).to_dict()) + +result = df.collect().to_dict(as_series=False) +print("this") +print(result) +print("that") +print(polars.from_pandas(nw.to_native(df)).to_dict(as_series=False)) + +agg = (nw.col("b") - nw.col("z").mean()).mean() +print(nw.to_native(df.with_columns(d=agg))) +result = df.group_by("a").agg(agg) +print(nw.to_native(result)) + +print(nw.col("a") + nw.col("b")) +print(nw.col("a", "b").sum()) + +result = df.select(nw.col("a", "b").sum()) +print(nw.to_native(result)) + +print(df.schema) +print(df.schema["a"].is_numeric()) + +df_raw = pd.DataFrame( + { + "a": [1, 3, 2], + "b": [4.0, 4, 6], + "c": ["a", "b", "c"], + "d": [True, False, True], + } +) +df = nw.DataFrame(df_raw) +print(df.schema) +print(df.schema["a"].is_numeric()) +print(df.schema["b"].is_numeric()) +print(df.schema["c"].is_numeric()) +print(df.schema["d"].is_numeric()) + +result = df.with_columns(nw.col("a").cast(nw.Float32)) +print(nw.to_native(result)) +print(result._dataframe._dataframe.dtypes) + +print(df.schema) +result = df.select([col for (col, dtype) in df.schema.items() if dtype == nw.Float64]) +print(nw.to_native(result)) +print(result._dataframe._dataframe.dtypes) + +result = df.select("a", "b").select(nw.all() + nw.col("a")) +print(nw.to_native(result)) + +df = nw.DataFrame(df_raw, features=["eager"]) +print(df["a"].mean()) +df = nw.DataFrame(pl.from_pandas(df_raw), features=["eager"]) +print(df["a"].mean())