From bfe8513bc95abe788477fb23bd7314e6fbbf6d28 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 21 Feb 2024 14:54:40 +0000 Subject: [PATCH] wip typing --- narwhals/pandas_like/dataframe.py | 21 ++++++------- narwhals/pandas_like/expr.py | 42 ++++++++++++------------- narwhals/pandas_like/group_by_object.py | 4 +-- narwhals/pandas_like/namespace.py | 20 +++++++----- narwhals/pandas_like/series.py | 4 +-- narwhals/translate.py | 12 +++---- pyproject.toml | 1 + 7 files changed, 53 insertions(+), 51 deletions(-) diff --git a/narwhals/pandas_like/dataframe.py b/narwhals/pandas_like/dataframe.py index 438665343..a8fafb78b 100644 --- a/narwhals/pandas_like/dataframe.py +++ b/narwhals/pandas_like/dataframe.py @@ -22,7 +22,7 @@ from narwhals.pandas_like.group_by_object import GroupBy from narwhals.pandas_like.group_by_object import LazyGroupBy from narwhals.pandas_like.namespace import Namespace - from narwhals.pandas_like.utils import IntoExpr + from narwhals.spec import IntoExpr class DataFrame(DataFrameT): @@ -42,10 +42,10 @@ def __init__( @property def columns(self) -> list[str]: - return self.dataframe.columns.tolist() + return self.dataframe.columns.tolist() # type: ignore[no-any-return] def _dispatch_to_lazy(self, method: str, *args: Any, **kwargs: Any) -> Self: - return getattr(self.lazy(), method)(*args, **kwargs).collect() + return getattr(self.lazy(), method)(*args, **kwargs).collect() # type: ignore[no-any-return] def __repr__(self) -> str: # pragma: no cover header = f" Standard DataFrame (api_version={self._api_version}) " @@ -90,7 +90,7 @@ def __dataframe_namespace__( return Namespace( api_version=self._api_version, - implementation=self._implementation, # type: ignore[attr-defined] + implementation=self._implementation, ) @property @@ -189,7 +189,7 @@ def __init__( @property def columns(self) -> list[str]: - return self.dataframe.columns.tolist() + return self.dataframe.columns.tolist() # type: ignore[no-any-return] def __repr__(self) -> str: # pragma: no cover header = f" Standard DataFrame (api_version={self._api_version}) " @@ -241,7 +241,7 @@ def __lazyframe_namespace__( return Namespace( api_version=self._api_version, - implementation=self._implementation, # type: ignore[attr-defined] + implementation=self._implementation, ) def group_by(self, *keys: str | Iterable[str]) -> LazyGroupBy: @@ -256,7 +256,7 @@ def select( ) -> Self: new_series = evaluate_into_exprs(self, *exprs, **named_exprs) df = horizontal_concat( - [series.series for series in new_series], # type: ignore[attr-defined] + [series.series for series in new_series], implementation=self._implementation, ) return self._from_dataframe(df) @@ -279,10 +279,7 @@ def with_columns( ) -> Self: new_series = evaluate_into_exprs(self, *exprs, **named_exprs) df = self.dataframe.assign( - **{ - series.name: series.series # type: ignore[attr-defined] - for series in new_series - } + **{series.name: series.series for series in new_series} ) return self._from_dataframe(df) @@ -330,7 +327,7 @@ def join( return self._from_dataframe( self.dataframe.merge( - other.dataframe, # type: ignore[attr-defined] + other.dataframe, left_on=left_on, right_on=right_on, how=how, diff --git a/narwhals/pandas_like/expr.py b/narwhals/pandas_like/expr.py index d49f3ab12..dd08846c1 100644 --- a/narwhals/pandas_like/expr.py +++ b/narwhals/pandas_like/expr.py @@ -53,8 +53,8 @@ def from_column_names( return cls( lambda df: [ Series( - df.dataframe.loc[:, column_name], # type: ignore[union-attr] - api_version=df._api_version, # type: ignore[union-attr] # type: ignore[union-attr] + df.dataframe.loc[:, column_name], + api_version=df._api_version, implementation=implementation, ) for column_name in column_names @@ -71,7 +71,7 @@ def __expr_namespace__(self) -> NamespaceProtocol: return Namespace( api_version="todo", - implementation=self._implementation, # type: ignore[attr-defined] + implementation=self._implementation, ) def __eq__(self, other: Expr | Any) -> Self: # type: ignore[override] @@ -104,7 +104,7 @@ def __or__(self, other: Expr | bool | Any) -> Self: def __ror__(self, other: Any) -> Self: return register_expression_call(self, "__ror__", other) - def __add__(self, other: Expr | Any) -> Self: # type: ignore[override] + def __add__(self, other: Expr | Any) -> Self: return register_expression_call(self, "__add__", other) def __radd__(self, other: Any) -> Self: @@ -222,31 +222,31 @@ def ends_with(self, suffix: str) -> Expr: lambda df: [ Series( series.series.str.endswith(suffix), - api_version=df._api_version, # type: ignore[union-attr] - implementation=df._implementation, # type: ignore[union-attr] + api_version=df._api_version, + implementation=df._implementation, ) - for series in self._expr.call(df) # type: ignore[attr-defined] + for series in self._expr.call(df) ], - depth=self._expr._depth + 1, # type: ignore[attr-defined] - function_name=self._expr._function_name, # type: ignore[attr-defined] - root_names=self._expr._root_names, # type: ignore[attr-defined] - output_names=self._expr._output_names, # type: ignore[attr-defined] - implementation=self._expr._implementation, # type: ignore[attr-defined] + depth=self._expr._depth + 1, + function_name=self._expr._function_name, + root_names=self._expr._root_names, + output_names=self._expr._output_names, + implementation=self._expr._implementation, ) def strip_chars(self, characters: str = " ") -> Expr: return Expr( lambda df: [ Series( - series.series.str.strip(characters), # type: ignore[attr-defined] - api_version=df._api_version, # type: ignore[union-attr] - implementation=df._implementation, # type: ignore[union-attr] + series.series.str.strip(characters), + api_version=df._api_version, + implementation=df._implementation, ) - for series in self._expr.call(df) # type: ignore[attr-defined] + for series in self._expr.call(df) ], - depth=self._expr._depth + 1, # type: ignore[attr-defined] - function_name=self._expr._function_name, # type: ignore[attr-defined] - root_names=self._expr._root_names, # type: ignore[attr-defined] - output_names=self._expr._output_names, # type: ignore[attr-defined] - implementation=self._expr._implementation, # type: ignore[attr-defined] + depth=self._expr._depth + 1, + function_name=self._expr._function_name, + root_names=self._expr._root_names, + output_names=self._expr._output_names, + implementation=self._expr._implementation, ) diff --git a/narwhals/pandas_like/group_by_object.py b/narwhals/pandas_like/group_by_object.py index 8ccd638d4..ef9862230 100644 --- a/narwhals/pandas_like/group_by_object.py +++ b/narwhals/pandas_like/group_by_object.py @@ -61,7 +61,7 @@ def agg( implementation: str = self._df._implementation # type: ignore[attr-defined] output_names: list[str] = self._keys for expr in exprs: - expr_output_names = expr._output_names # type: ignore[attr-defined] + expr_output_names = expr._output_names if expr_output_names is None: msg = ( "Anonymous expressions are not supported in group_by.agg.\n" @@ -87,7 +87,7 @@ def agg( # TODO: it might be better to use groupby(...).apply # in this case, but I couldn't get the multi-output # case to work for cuDF. - results_keys = expr.call( # type: ignore[attr-defined] + results_keys = expr.call( LazyFrame( df_keys, api_version=self.api_version, diff --git a/narwhals/pandas_like/namespace.py b/narwhals/pandas_like/namespace.py index 663e83343..7e15431c7 100644 --- a/narwhals/pandas_like/namespace.py +++ b/narwhals/pandas_like/namespace.py @@ -1,9 +1,11 @@ from __future__ import annotations from functools import reduce +from typing import TYPE_CHECKING from typing import Any from typing import Callable from typing import Iterable +from typing import TypeVar from narwhals.pandas_like.dataframe import DataFrame from narwhals.pandas_like.dataframe import LazyFrame @@ -13,10 +15,12 @@ from narwhals.pandas_like.utils import horizontal_concat from narwhals.pandas_like.utils import parse_into_exprs from narwhals.pandas_like.utils import series_from_iterable -from narwhals.spec import AnyDataFrame from narwhals.spec import IntoExpr from narwhals.spec import Namespace as NamespaceProtocol +if TYPE_CHECKING: + AnyDataFrame = TypeVar("AnyDataFrame", DataFrame, LazyFrame) + class Namespace(NamespaceProtocol): def __init__(self, *, api_version: str, implementation: str) -> None: @@ -38,7 +42,7 @@ def concat(self, items: Iterable[AnyDataFrame], *, how: str) -> AnyDataFrame: dfs: list[Any] = [] kind: Any = {} for df in items: - dfs.append(df.dataframe) # type: ignore[union-attr, attr-defined] + dfs.append(df.dataframe) kind.append(type(df)) if len(kind) > 1: msg = "Can only concat DataFrames or LazyFrames, not mixtures of the two" @@ -88,12 +92,12 @@ def len(self) -> Expr: lambda df: [ Series( series_from_iterable( - [len(df.dataframe)], # type: ignore[union-attr] + [len(df.dataframe)], name="len", index=[0], implementation=self._implementation, ), - api_version=df._api_version, # type: ignore[union-attr] + api_version=df._api_version, implementation=self._implementation, ), ], @@ -126,8 +130,8 @@ def _create_series_from_scalar(self, value: Any, series: Series) -> Series: return Series( series_from_iterable( [value], - name=series.series.name, # type: ignore[attr-defined] - index=series.series.index[0:1], # type: ignore[attr-defined] + name=series.series.name, + index=series.series.index[0:1], implementation=self._implementation, ), api_version=self.api_version, @@ -148,8 +152,8 @@ def all(self) -> Expr: return Expr( lambda df: [ Series( - df.dataframe.loc[:, column_name], # type: ignore[union-attr] - api_version=df._api_version, # type: ignore[union-attr] + df.dataframe.loc[:, column_name], + api_version=df._api_version, implementation=self._implementation, ) for column_name in df.columns diff --git a/narwhals/pandas_like/series.py b/narwhals/pandas_like/series.py index b99a38f3b..484d46c21 100644 --- a/narwhals/pandas_like/series.py +++ b/narwhals/pandas_like/series.py @@ -266,7 +266,7 @@ def drop_nulls(self) -> Series: def n_unique(self) -> int: ser = self.series - return ser.nunique() + return ser.nunique() # type: ignore[no-any-return] def zip_with(self, mask: SeriesProtocol, other: SeriesProtocol) -> SeriesProtocol: ser = self.series @@ -280,7 +280,7 @@ def sample(self, n: int, fraction: float, *, with_replacement: bool) -> Series: def unique(self) -> SeriesProtocol: ser = self.series - return ser.unique() + return ser.unique() # type: ignore[no-any-return] def is_nan(self) -> Series: ser = self.series diff --git a/narwhals/translate.py b/narwhals/translate.py index 5b6f2310c..71c51c8a3 100644 --- a/narwhals/translate.py +++ b/narwhals/translate.py @@ -10,8 +10,8 @@ def to_polars_api(df: Any, version: str) -> tuple[LazyFrame, Namespace]: - if hasattr(df, "__narwhals__"): - return df.__narwhals__() + if hasattr(df, "__narwhals_dataframe__"): + return df.__narwhals_dataframe__() # type: ignore[no-any-return] try: import polars as pl except ModuleNotFoundError: @@ -76,12 +76,12 @@ def get_namespace(obj: Any, implementation: str | None = None) -> Namespace: if isinstance(obj, (pl.DataFrame, pl.LazyFrame, pl.Series)): return pl # type: ignore[return-value] if hasattr(obj, "__dataframe_namespace__"): - return obj.__dataframe_namespace__() + return obj.__dataframe_namespace__() # type: ignore[no-any-return] if hasattr(obj, "__series_namespace__"): - return obj.__series_namespace__() + return obj.__series_namespace__() # type: ignore[no-any-return] if hasattr(obj, "__lazyframe_namespace__"): - return obj.__lazyframe_namespace__() + return obj.__lazyframe_namespace__() # type: ignore[no-any-return] if hasattr(obj, "__expr_namespace__"): - return obj.__expr_namespace__() + return obj.__expr_namespace__() # type: ignore[no-any-return] msg = f"Could not find namespace for object {obj}" raise TypeError(msg) diff --git a/pyproject.toml b/pyproject.toml index 407767e1c..53da68bf8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,7 @@ exclude_also = [ ] [tool.mypy] +strict = true [[tool.mypy.overrides]] # the pandas API is just too inconsistent for type hinting to be useful.