diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4eca8ac29..f967d925a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,6 +23,28 @@ repos: files: \.(py|rst|md)$ args: [--ignore-words-list=ser] exclude: ^docs/api-completeness.md$ +- repo: https://github.com/pycqa/flake8 + rev: '7.1.1' # todo: remove once https://github.com/astral-sh/ruff/issues/458 is addressed + hooks: + - id: flake8 + additional_dependencies: [darglint==1.8.1, Flake8-pyproject] + entry: flake8 --select DAR --ignore DAR402,DAR401 + exclude: | + (?x)^( + tests/.*| + # TODO: gradually enable + narwhals/series\.py$| + # TODO: gradually enable + narwhals/dataframe\.py$| + # TODO: gradually enable + narwhals/dependencies\.py$| + # some false positives in this one + narwhals/translate\.py$| + # some false positives in this one + narwhals/stable/v1/__init__\.py$| + # private, so less urgent to document too well + narwhals/_.* + )$ - repo: local hooks: - id: check-api-reference diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 278e7d71a..719bd989f 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -551,7 +551,11 @@ def __repr__(self) -> str: return f"{class_name}({dict(self)})" def to_schema(self) -> OrderedDict[str, DType | type[DType]]: - """Return Struct dtype as a schema dict.""" + """Return Struct dtype as a schema dict. + + Returns: + Mapping from column name to dtype. + """ return OrderedDict(self) diff --git a/narwhals/expr.py b/narwhals/expr.py index fce4dc5c4..bc90721d7 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -93,6 +93,11 @@ def alias(self, name: str) -> Self: def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self: """Pipe function call. + Arguments: + function: Function to apply. + args: Positional arguments to pass to function. + kwargs: Keyword arguments to pass to function. + Returns: A new expression. @@ -1075,6 +1080,9 @@ def diff(self) -> Self: def shift(self, n: int) -> Self: """Shift values by `n` positions. + Arguments: + n: Number of positions to shift values by. + Returns: A new expression. @@ -1310,9 +1318,7 @@ def is_between( Arguments: lower_bound: Lower bound value. - upper_bound: Upper bound value. - closed: Define which sides of the interval are closed (inclusive). Returns: @@ -1427,6 +1433,9 @@ def is_in(self, other: Any) -> Self: def filter(self, *predicates: Any) -> Self: """Filters elements based on a condition, returning a new expression. + Arguments: + predicates: Conditions to filter by (which get ANDed together). + Returns: A new expression. @@ -1607,9 +1616,7 @@ def fill_null( Arguments: value: Value used to fill null values. - strategy: Strategy used to fill null values. - limit: Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. Returns: diff --git a/narwhals/functions.py b/narwhals/functions.py index bfc7c7595..e3f3751e3 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -46,12 +46,11 @@ def concat( Arguments: items: DataFrames, LazyFrames to concatenate. - how: {'vertical', 'horizontal'} - * vertical: Stacks Series from DataFrames vertically and fills with `null` - if the lengths don't match. - * horizontal: Stacks Series from DataFrames horizontally and fills with `null` - if the lengths don't match. + - vertical: Stacks Series from DataFrames vertically and fills with `null` + if the lengths don't match. + - horizontal: Stacks Series from DataFrames horizontally and fills with `null` + if the lengths don't match. Returns: A new DataFrame, Lazyframe resulting from the concatenation. @@ -534,6 +533,8 @@ def _get_sys_info() -> dict[str, str]: Copied from sklearn + Returns: + Dictionary with system info. """ python = sys.version.replace("\n", " ") @@ -556,6 +557,8 @@ def _get_deps_info() -> dict[str, str]: This function and show_versions were copied from sklearn and adapted + Returns: + Mapping from dependency to version. """ deps = ( "pandas", @@ -607,9 +610,13 @@ def get_level( ) -> Literal["full", "interchange"]: """Level of support Narwhals has for current object. - This can be one of: + Arguments: + obj: Dataframe or Series. + + Returns: + This can be one of: - - 'full': full Narwhals API support - - 'metadata': only metadata operations are supported (`df.schema`) + - 'full': full Narwhals API support + - 'metadata': only metadata operations are supported (`df.schema`) """ return obj._level diff --git a/narwhals/group_by.py b/narwhals/group_by.py index 82054da60..13e625e92 100644 --- a/narwhals/group_by.py +++ b/narwhals/group_by.py @@ -35,9 +35,11 @@ def agg( Arguments: aggs: Aggregations to compute for each group of the group by operation, specified as positional arguments. - named_aggs: Additional aggregations, specified as keyword arguments. + Returns: + A new Dataframe. + Examples: Group by one column or by multiple columns and call `agg` to compute the grouped sum of another column. diff --git a/narwhals/schema.py b/narwhals/schema.py index 0d760c8ac..1d357b766 100644 --- a/narwhals/schema.py +++ b/narwhals/schema.py @@ -60,13 +60,25 @@ def __init__( super().__init__(schema) def names(self) -> list[str]: - """Get the column names of the schema.""" + """Get the column names of the schema. + + Returns: + Column names. + """ return list(self.keys()) def dtypes(self) -> list[DType]: - """Get the data types of the schema.""" + """Get the data types of the schema. + + Returns: + Data types of schema. + """ return list(self.values()) def len(self) -> int: - """Get the number of columns in the schema.""" + """Get the number of columns in the schema. + + Returns: + Number of columns. + """ return len(self) diff --git a/narwhals/selectors.py b/narwhals/selectors.py index b9c484a37..ab23ed27e 100644 --- a/narwhals/selectors.py +++ b/narwhals/selectors.py @@ -15,6 +15,9 @@ def by_dtype(*dtypes: Any) -> Expr: Arguments: dtypes: one or data types to select + Returns: + A new expression. + Examples: >>> import narwhals as nw >>> import narwhals.selectors as ncs @@ -55,6 +58,9 @@ def by_dtype(*dtypes: Any) -> Expr: def numeric() -> Expr: """Select numeric columns. + Returns: + A new expression. + Examples: >>> import narwhals as nw >>> import narwhals.selectors as ncs @@ -95,6 +101,9 @@ def numeric() -> Expr: def boolean() -> Expr: """Select boolean columns. + Returns: + A new expression. + Examples: >>> import narwhals as nw >>> import narwhals.selectors as ncs @@ -135,6 +144,9 @@ def boolean() -> Expr: def string() -> Expr: """Select string columns. + Returns: + A new expression. + Examples: >>> import narwhals as nw >>> import narwhals.selectors as ncs @@ -175,6 +187,9 @@ def string() -> Expr: def categorical() -> Expr: """Select categorical columns. + Returns: + A new expression. + Examples: >>> import narwhals as nw >>> import narwhals.selectors as ncs @@ -215,6 +230,9 @@ def categorical() -> Expr: def all() -> Expr: """Select all columns. + Returns: + A new expression. + Examples: >>> import narwhals as nw >>> import narwhals.selectors as ncs diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 3d97fa3d8..e53eb2f92 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1090,58 +1090,25 @@ def narwhalify( ) -> Callable[..., Any]: """Decorate function so it becomes dataframe-agnostic. - `narwhalify` will try to convert any dataframe/series-like object into the narwhal + This will try to convert any dataframe/series-like object into the Narwhals respective DataFrame/Series, while leaving the other parameters as they are. - - Similarly, if the output of the function is a narwhals DataFrame or Series, it will be + Similarly, if the output of the function is a Narwhals DataFrame or Series, it will be converted back to the original dataframe/series type, while if the output is another type it will be left as is. - By setting `pass_through=False`, then every input and every output will be required to be a dataframe/series-like object. - Instead of writing - - ```python - import narwhals as nw - - - def func(df): - df = nw.from_native(df, pass_through=True) - df = df.group_by("a").agg(nw.col("b").sum()) - return nw.to_native(df) - ``` - - you can just write - - ```python - import narwhals as nw - - - @nw.narwhalify - def func(df): - return df.group_by("a").agg(nw.col("b").sum()) - ``` - - You can also pass in extra arguments, e.g. - - ```python - @nw.narwhalify(eager_only=True) - ``` - - that will get passed down to `nw.from_native`. - Arguments: func: Function to wrap in a `from_native`-`to_native` block. - strict: Determine what happens if the object isn't supported by Narwhals: + strict: **Deprecated** (v1.13.0): + Please use `pass_through` instead. Note that `strict` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](https://narwhals-dev.github.io/narwhals/backcompat/). + + Determine what happens if the object isn't supported by Narwhals: - `True` (default): raise an error - `False`: pass object through as-is - - **Deprecated** (v1.13.0): - Please use `pass_through` instead. Note that `strict` is still available - (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, - see [perfect backwards compatibility policy](https://narwhals-dev.github.io/narwhals/backcompat/). pass_through: Determine what happens if the object isn't supported by Narwhals: - `False` (default): raise an error @@ -1151,6 +1118,24 @@ def func(df): implement the Dataframe Interchange Protocol. series_only: Whether to only allow series. allow_series: Whether to allow series (default is only dataframe / lazyframe). + + Returns: + Decorated function. + + Examples: + Instead of writing + + >>> import narwhals as nw + >>> def func(df): + ... df = nw.from_native(df, pass_through=True) + ... df = df.group_by("a").agg(nw.col("b").sum()) + ... return nw.to_native(df) + + you can just write + + >>> @nw.narwhalify + ... def func(df): + ... return df.group_by("a").agg(nw.col("b").sum()) """ pass_through = validate_strict_and_pass_though( strict, pass_through, pass_through_default=True, emit_deprecation_warning=False @@ -2102,12 +2087,11 @@ def concat( Arguments: items: DataFrames, LazyFrames to concatenate. - how: {'vertical', 'horizontal'} - * vertical: Stacks Series from DataFrames vertically and fills with `null` - if the lengths don't match. - * horizontal: Stacks Series from DataFrames horizontally and fills with `null` - if the lengths don't match. + - vertical: Stacks Series from DataFrames vertically and fills with `null` + if the lengths don't match. + - horizontal: Stacks Series from DataFrames horizontally and fills with `null` + if the lengths don't match. Returns: A new DataFrame, Lazyframe resulting from the concatenation. diff --git a/narwhals/translate.py b/narwhals/translate.py index 54653b1e7..64dd10593 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -713,9 +713,15 @@ def _from_native_impl( # noqa: PLR0915 return native_object -def get_native_namespace(obj: Any) -> Any: +def get_native_namespace(obj: DataFrame[Any] | LazyFrame[Any] | Series) -> Any: """Get native namespace from object. + Arguments: + obj: Dataframe, Lazyframe, or Series. + + Returns: + Native module. + Examples: >>> import polars as pl >>> import pandas as pd @@ -742,58 +748,25 @@ def narwhalify( ) -> Callable[..., Any]: """Decorate function so it becomes dataframe-agnostic. - `narwhalify` will try to convert any dataframe/series-like object into the narwhal + This will try to convert any dataframe/series-like object into the Narwhals respective DataFrame/Series, while leaving the other parameters as they are. - - Similarly, if the output of the function is a narwhals DataFrame or Series, it will be + Similarly, if the output of the function is a Narwhals DataFrame or Series, it will be converted back to the original dataframe/series type, while if the output is another type it will be left as is. - By setting `pass_through=False`, then every input and every output will be required to be a dataframe/series-like object. - Instead of writing - - ```python - import narwhals as nw - - - def func(df): - df = nw.from_native(df, pass_through=True) - df = df.group_by("a").agg(nw.col("b").sum()) - return nw.to_native(df) - ``` - - you can just write - - ```python - import narwhals as nw - - - @nw.narwhalify - def func(df): - return df.group_by("a").agg(nw.col("b").sum()) - ``` - - You can also pass in extra arguments, e.g. - - ```python - @nw.narwhalify(eager_only=True) - ``` - - that will get passed down to `nw.from_native`. - Arguments: func: Function to wrap in a `from_native`-`to_native` block. - strict: Determine what happens if the object isn't supported by Narwhals: + strict: **Deprecated** (v1.13.0): + Please use `pass_through` instead. Note that `strict` is still available + (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, + see [perfect backwards compatibility policy](https://narwhals-dev.github.io/narwhals/backcompat/). + + Determine what happens if the object isn't supported by Narwhals: - `True` (default): raise an error - `False`: pass object through as-is - - **Deprecated** (v1.13.0): - Please use `pass_through` instead. Note that `strict` is still available - (and won't emit a deprecation warning) if you use `narwhals.stable.v1`, - see [perfect backwards compatibility policy](https://narwhals-dev.github.io/narwhals/backcompat/). pass_through: Determine what happens if the object isn't supported by Narwhals: - `False` (default): raise an error @@ -803,6 +776,24 @@ def func(df): implement the Dataframe Interchange Protocol. series_only: Whether to only allow series. allow_series: Whether to allow series (default is only dataframe / lazyframe). + + Returns: + Decorated function. + + Examples: + Instead of writing + + >>> import narwhals as nw + >>> def func(df): + ... df = nw.from_native(df, pass_through=True) + ... df = df.group_by("a").agg(nw.col("b").sum()) + ... return nw.to_native(df) + + you can just write + + >>> @nw.narwhalify + ... def func(df): + ... return df.group_by("a").agg(nw.col("b").sum()) """ from narwhals.utils import validate_strict_and_pass_though @@ -863,6 +854,12 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: def to_py_scalar(scalar_like: Any) -> Any: """If a scalar is not Python native, converts it to Python native. + Arguments: + scalar_like: Scalar-like value. + + Returns: + Python scalar. + Raises: ValueError: If the object is not convertible to a scalar. diff --git a/narwhals/utils.py b/narwhals/utils.py index ee5225948..7f5e1b4ce 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -56,7 +56,14 @@ class Implementation(Enum): def from_native_namespace( cls: type[Self], native_namespace: ModuleType ) -> Implementation: # pragma: no cover - """Instantiate Implementation object from a native namespace module.""" + """Instantiate Implementation object from a native namespace module. + + Arguments: + native_namespace: Native namespace. + + Returns: + Implementation. + """ mapping = { get_pandas(): Implementation.PANDAS, get_modin(): Implementation.MODIN, @@ -68,7 +75,11 @@ def from_native_namespace( return mapping.get(native_namespace, Implementation.UNKNOWN) def to_native_namespace(self: Self) -> ModuleType: - """Return the native namespace module corresponding to Implementation.""" + """Return the native namespace module corresponding to Implementation. + + Returns: + Native module. + """ mapping = { Implementation.PANDAS: get_pandas(), Implementation.MODIN: get_modin(), @@ -127,7 +138,14 @@ def _is_iterable(arg: Any | Iterable[Any]) -> bool: def parse_version(version: Sequence[str | int]) -> tuple[int, ...]: - """Simple version parser; split into a tuple of ints for comparison.""" + """Simple version parser; split into a tuple of ints for comparison. + + Arguments: + version: Version string to parse. + + Returns: + Parsed version number. + """ # lifted from Polars if isinstance(version, str): # pragma: no cover version = version.split(".") @@ -157,6 +175,13 @@ def validate_laziness(items: Iterable[Any]) -> None: def maybe_align_index(lhs: T, rhs: Series | BaseFrame[Any]) -> T: """Align `lhs` to the Index of `rhs`, if they're both pandas-like. + Arguments: + lhs: Dataframe or Series. + rhs: Dataframe or Series to align with. + + Returns: + Same type as input. + Notes: This is only really intended for backwards-compatibility purposes, for example if your library already aligns indices for users. @@ -245,6 +270,12 @@ def _validate_index(index: Any) -> None: def maybe_get_index(obj: T) -> Any | None: """Get the index of a DataFrame or a Series, if it's pandas-like. + Arguments: + obj: Dataframe or Series. + + Returns: + Same type as input. + Notes: This is only really intended for backwards-compatibility purposes, for example if your library already aligns indices for users. @@ -289,6 +320,9 @@ def maybe_set_index( `ValueError` is raised. index: series or list of series to set as index. + Returns: + Same type as input. + Raises: ValueError: If one of the following condition happens: @@ -363,6 +397,12 @@ def maybe_set_index( def maybe_reset_index(obj: T) -> T: """Reset the index to the default integer index of a DataFrame or a Series, if it's pandas-like. + Arguments: + obj: Dataframe or Series. + + Returns: + Same type as input. + Notes: This is only really intended for backwards-compatibility purposes, for example if your library already resets the index for users. @@ -426,6 +466,9 @@ def maybe_convert_dtypes(obj: T, *args: bool, **kwargs: bool | str) -> T: *args: Additional arguments which gets passed through. **kwargs: Additional arguments which gets passed through. + Returns: + Same type as input. + Notes: For non-pandas-like inputs, this is a no-op. Also, `args` and `kwargs` just get passed down to the underlying library as-is. @@ -479,6 +522,12 @@ def is_ordered_categorical(series: Series) -> bool: - For PyArrow table: - Categoricals are ordered if `dtype.type.ordered == True`. + Arguments: + series: Input Series. + + Returns: + Whether the Series is an ordered categorical. + Examples: >>> import narwhals as nw >>> import pandas as pd @@ -601,6 +650,9 @@ def is_sequence_but_not_str(sequence: Any) -> TypeGuard[Sequence[Any]]: def find_stacklevel() -> int: """Find the first place in the stack that is not inside narwhals. + Returns: + Stacklevel. + Taken from: https://github.com/pandas-dev/pandas/blob/ab89c53f48df67709a533b6a95ce3d911871a0a8/pandas/util/_exceptions.py#L30-L51 """ @@ -640,13 +692,10 @@ def find_stacklevel() -> int: def issue_deprecation_warning(message: str, _version: str) -> None: """Issue a deprecation warning. - Parameters - ---------- - message - The message associated with the warning. - version - Narwhals version when the warning was introduced. Just used for internal - bookkeeping. + Arguments: + message: The message associated with the warning. + _version: Narwhals version when the warning was introduced. Just used for internal + bookkeeping. """ warn(message=message, category=DeprecationWarning, stacklevel=find_stacklevel()) diff --git a/tests/expr_and_series/cum_max_test.py b/tests/expr_and_series/cum_max_test.py index 2a9c634f0..054537d34 100644 --- a/tests/expr_and_series/cum_max_test.py +++ b/tests/expr_and_series/cum_max_test.py @@ -27,7 +27,9 @@ def test_cum_max_expr( if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor): request.applymarker(pytest.mark.xfail) - if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor): + if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str( + constructor + ): request.applymarker(pytest.mark.xfail) name = "reverse_cum_max" if reverse else "cum_max" @@ -45,7 +47,9 @@ def test_cum_max_series( if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor_eager): + if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str( + constructor_eager + ): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor_eager(data), eager_only=True) diff --git a/tests/expr_and_series/cum_min_test.py b/tests/expr_and_series/cum_min_test.py index c5a2dbf99..bb92f5b9d 100644 --- a/tests/expr_and_series/cum_min_test.py +++ b/tests/expr_and_series/cum_min_test.py @@ -27,7 +27,9 @@ def test_cum_min_expr( if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor): request.applymarker(pytest.mark.xfail) - if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor): + if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str( + constructor + ): request.applymarker(pytest.mark.xfail) name = "reverse_cum_min" if reverse else "cum_min" @@ -45,7 +47,9 @@ def test_cum_min_series( if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor_eager): + if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str( + constructor_eager + ): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor_eager(data), eager_only=True) diff --git a/tests/expr_and_series/cum_prod_test.py b/tests/expr_and_series/cum_prod_test.py index 78e5a036a..1d5816ff2 100644 --- a/tests/expr_and_series/cum_prod_test.py +++ b/tests/expr_and_series/cum_prod_test.py @@ -27,7 +27,9 @@ def test_cum_prod_expr( if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor): request.applymarker(pytest.mark.xfail) - if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor): + if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str( + constructor + ): request.applymarker(pytest.mark.xfail) name = "reverse_cum_prod" if reverse else "cum_prod" @@ -45,7 +47,9 @@ def test_cum_prod_series( if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor_eager): + if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str( + constructor_eager + ): request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor_eager(data), eager_only=True)