From bfe8513bc95abe788477fb23bd7314e6fbbf6d28 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Wed, 21 Feb 2024 14:54:40 +0000
Subject: [PATCH] wip typing

---
 narwhals/pandas_like/dataframe.py       | 21 ++++++-------
 narwhals/pandas_like/expr.py            | 42 ++++++++++++-------------
 narwhals/pandas_like/group_by_object.py |  4 +--
 narwhals/pandas_like/namespace.py       | 20 +++++++-----
 narwhals/pandas_like/series.py          |  4 +--
 narwhals/translate.py                   | 12 +++----
 pyproject.toml                          |  1 +
 7 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/narwhals/pandas_like/dataframe.py b/narwhals/pandas_like/dataframe.py
index 438665343..a8fafb78b 100644
--- a/narwhals/pandas_like/dataframe.py
+++ b/narwhals/pandas_like/dataframe.py
@@ -22,7 +22,7 @@
     from narwhals.pandas_like.group_by_object import GroupBy
     from narwhals.pandas_like.group_by_object import LazyGroupBy
     from narwhals.pandas_like.namespace import Namespace
-    from narwhals.pandas_like.utils import IntoExpr
+    from narwhals.spec import IntoExpr
 
 
 class DataFrame(DataFrameT):
@@ -42,10 +42,10 @@ def __init__(
 
     @property
     def columns(self) -> list[str]:
-        return self.dataframe.columns.tolist()
+        return self.dataframe.columns.tolist()  # type: ignore[no-any-return]
 
     def _dispatch_to_lazy(self, method: str, *args: Any, **kwargs: Any) -> Self:
-        return getattr(self.lazy(), method)(*args, **kwargs).collect()
+        return getattr(self.lazy(), method)(*args, **kwargs).collect()  # type: ignore[no-any-return]
 
     def __repr__(self) -> str:  # pragma: no cover
         header = f" Standard DataFrame (api_version={self._api_version}) "
@@ -90,7 +90,7 @@ def __dataframe_namespace__(
 
         return Namespace(
             api_version=self._api_version,
-            implementation=self._implementation,  # type: ignore[attr-defined]
+            implementation=self._implementation,
         )
 
     @property
@@ -189,7 +189,7 @@ def __init__(
 
     @property
     def columns(self) -> list[str]:
-        return self.dataframe.columns.tolist()
+        return self.dataframe.columns.tolist()  # type: ignore[no-any-return]
 
     def __repr__(self) -> str:  # pragma: no cover
         header = f" Standard DataFrame (api_version={self._api_version}) "
@@ -241,7 +241,7 @@ def __lazyframe_namespace__(
 
         return Namespace(
             api_version=self._api_version,
-            implementation=self._implementation,  # type: ignore[attr-defined]
+            implementation=self._implementation,
         )
 
     def group_by(self, *keys: str | Iterable[str]) -> LazyGroupBy:
@@ -256,7 +256,7 @@ def select(
     ) -> Self:
         new_series = evaluate_into_exprs(self, *exprs, **named_exprs)
         df = horizontal_concat(
-            [series.series for series in new_series],  # type: ignore[attr-defined]
+            [series.series for series in new_series],
             implementation=self._implementation,
         )
         return self._from_dataframe(df)
@@ -279,10 +279,7 @@ def with_columns(
     ) -> Self:
         new_series = evaluate_into_exprs(self, *exprs, **named_exprs)
         df = self.dataframe.assign(
-            **{
-                series.name: series.series  # type: ignore[attr-defined]
-                for series in new_series
-            }
+            **{series.name: series.series for series in new_series}
         )
         return self._from_dataframe(df)
 
@@ -330,7 +327,7 @@ def join(
 
         return self._from_dataframe(
             self.dataframe.merge(
-                other.dataframe,  # type: ignore[attr-defined]
+                other.dataframe,
                 left_on=left_on,
                 right_on=right_on,
                 how=how,
diff --git a/narwhals/pandas_like/expr.py b/narwhals/pandas_like/expr.py
index d49f3ab12..dd08846c1 100644
--- a/narwhals/pandas_like/expr.py
+++ b/narwhals/pandas_like/expr.py
@@ -53,8 +53,8 @@ def from_column_names(
         return cls(
             lambda df: [
                 Series(
-                    df.dataframe.loc[:, column_name],  # type: ignore[union-attr]
-                    api_version=df._api_version,  # type: ignore[union-attr]  # type: ignore[union-attr]
+                    df.dataframe.loc[:, column_name],
+                    api_version=df._api_version,
                     implementation=implementation,
                 )
                 for column_name in column_names
@@ -71,7 +71,7 @@ def __expr_namespace__(self) -> NamespaceProtocol:
 
         return Namespace(
             api_version="todo",
-            implementation=self._implementation,  # type: ignore[attr-defined]
+            implementation=self._implementation,
         )
 
     def __eq__(self, other: Expr | Any) -> Self:  # type: ignore[override]
@@ -104,7 +104,7 @@ def __or__(self, other: Expr | bool | Any) -> Self:
     def __ror__(self, other: Any) -> Self:
         return register_expression_call(self, "__ror__", other)
 
-    def __add__(self, other: Expr | Any) -> Self:  # type: ignore[override]
+    def __add__(self, other: Expr | Any) -> Self:
         return register_expression_call(self, "__add__", other)
 
     def __radd__(self, other: Any) -> Self:
@@ -222,31 +222,31 @@ def ends_with(self, suffix: str) -> Expr:
             lambda df: [
                 Series(
                     series.series.str.endswith(suffix),
-                    api_version=df._api_version,  # type: ignore[union-attr]
-                    implementation=df._implementation,  # type: ignore[union-attr]
+                    api_version=df._api_version,
+                    implementation=df._implementation,
                 )
-                for series in self._expr.call(df)  # type: ignore[attr-defined]
+                for series in self._expr.call(df)
             ],
-            depth=self._expr._depth + 1,  # type: ignore[attr-defined]
-            function_name=self._expr._function_name,  # type: ignore[attr-defined]
-            root_names=self._expr._root_names,  # type: ignore[attr-defined]
-            output_names=self._expr._output_names,  # type: ignore[attr-defined]
-            implementation=self._expr._implementation,  # type: ignore[attr-defined]
+            depth=self._expr._depth + 1,
+            function_name=self._expr._function_name,
+            root_names=self._expr._root_names,
+            output_names=self._expr._output_names,
+            implementation=self._expr._implementation,
         )
 
     def strip_chars(self, characters: str = " ") -> Expr:
         return Expr(
             lambda df: [
                 Series(
-                    series.series.str.strip(characters),  # type: ignore[attr-defined]
-                    api_version=df._api_version,  # type: ignore[union-attr]
-                    implementation=df._implementation,  # type: ignore[union-attr]
+                    series.series.str.strip(characters),
+                    api_version=df._api_version,
+                    implementation=df._implementation,
                 )
-                for series in self._expr.call(df)  # type: ignore[attr-defined]
+                for series in self._expr.call(df)
             ],
-            depth=self._expr._depth + 1,  # type: ignore[attr-defined]
-            function_name=self._expr._function_name,  # type: ignore[attr-defined]
-            root_names=self._expr._root_names,  # type: ignore[attr-defined]
-            output_names=self._expr._output_names,  # type: ignore[attr-defined]
-            implementation=self._expr._implementation,  # type: ignore[attr-defined]
+            depth=self._expr._depth + 1,
+            function_name=self._expr._function_name,
+            root_names=self._expr._root_names,
+            output_names=self._expr._output_names,
+            implementation=self._expr._implementation,
         )
diff --git a/narwhals/pandas_like/group_by_object.py b/narwhals/pandas_like/group_by_object.py
index 8ccd638d4..ef9862230 100644
--- a/narwhals/pandas_like/group_by_object.py
+++ b/narwhals/pandas_like/group_by_object.py
@@ -61,7 +61,7 @@ def agg(
         implementation: str = self._df._implementation  # type: ignore[attr-defined]
         output_names: list[str] = self._keys
         for expr in exprs:
-            expr_output_names = expr._output_names  # type: ignore[attr-defined]
+            expr_output_names = expr._output_names
             if expr_output_names is None:
                 msg = (
                     "Anonymous expressions are not supported in group_by.agg.\n"
@@ -87,7 +87,7 @@ def agg(
                 # TODO: it might be better to use groupby(...).apply
                 # in this case, but I couldn't get the multi-output
                 # case to work for cuDF.
-                results_keys = expr.call(  # type: ignore[attr-defined]
+                results_keys = expr.call(
                     LazyFrame(
                         df_keys,
                         api_version=self.api_version,
diff --git a/narwhals/pandas_like/namespace.py b/narwhals/pandas_like/namespace.py
index 663e83343..7e15431c7 100644
--- a/narwhals/pandas_like/namespace.py
+++ b/narwhals/pandas_like/namespace.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 
 from functools import reduce
+from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
 from typing import Iterable
+from typing import TypeVar
 
 from narwhals.pandas_like.dataframe import DataFrame
 from narwhals.pandas_like.dataframe import LazyFrame
@@ -13,10 +15,12 @@
 from narwhals.pandas_like.utils import horizontal_concat
 from narwhals.pandas_like.utils import parse_into_exprs
 from narwhals.pandas_like.utils import series_from_iterable
-from narwhals.spec import AnyDataFrame
 from narwhals.spec import IntoExpr
 from narwhals.spec import Namespace as NamespaceProtocol
 
+if TYPE_CHECKING:
+    AnyDataFrame = TypeVar("AnyDataFrame", DataFrame, LazyFrame)
+
 
 class Namespace(NamespaceProtocol):
     def __init__(self, *, api_version: str, implementation: str) -> None:
@@ -38,7 +42,7 @@ def concat(self, items: Iterable[AnyDataFrame], *, how: str) -> AnyDataFrame:
         dfs: list[Any] = []
         kind: Any = {}
         for df in items:
-            dfs.append(df.dataframe)  # type: ignore[union-attr, attr-defined]
+            dfs.append(df.dataframe)
             kind.append(type(df))
         if len(kind) > 1:
             msg = "Can only concat DataFrames or LazyFrames, not mixtures of the two"
@@ -88,12 +92,12 @@ def len(self) -> Expr:
             lambda df: [
                 Series(
                     series_from_iterable(
-                        [len(df.dataframe)],  # type: ignore[union-attr]
+                        [len(df.dataframe)],
                         name="len",
                         index=[0],
                         implementation=self._implementation,
                     ),
-                    api_version=df._api_version,  # type: ignore[union-attr]
+                    api_version=df._api_version,
                     implementation=self._implementation,
                 ),
             ],
@@ -126,8 +130,8 @@ def _create_series_from_scalar(self, value: Any, series: Series) -> Series:
         return Series(
             series_from_iterable(
                 [value],
-                name=series.series.name,  # type: ignore[attr-defined]
-                index=series.series.index[0:1],  # type: ignore[attr-defined]
+                name=series.series.name,
+                index=series.series.index[0:1],
                 implementation=self._implementation,
             ),
             api_version=self.api_version,
@@ -148,8 +152,8 @@ def all(self) -> Expr:
         return Expr(
             lambda df: [
                 Series(
-                    df.dataframe.loc[:, column_name],  # type: ignore[union-attr]
-                    api_version=df._api_version,  # type: ignore[union-attr]
+                    df.dataframe.loc[:, column_name],
+                    api_version=df._api_version,
                     implementation=self._implementation,
                 )
                 for column_name in df.columns
diff --git a/narwhals/pandas_like/series.py b/narwhals/pandas_like/series.py
index b99a38f3b..484d46c21 100644
--- a/narwhals/pandas_like/series.py
+++ b/narwhals/pandas_like/series.py
@@ -266,7 +266,7 @@ def drop_nulls(self) -> Series:
 
     def n_unique(self) -> int:
         ser = self.series
-        return ser.nunique()
+        return ser.nunique()  # type: ignore[no-any-return]
 
     def zip_with(self, mask: SeriesProtocol, other: SeriesProtocol) -> SeriesProtocol:
         ser = self.series
@@ -280,7 +280,7 @@ def sample(self, n: int, fraction: float, *, with_replacement: bool) -> Series:
 
     def unique(self) -> SeriesProtocol:
         ser = self.series
-        return ser.unique()
+        return ser.unique()  # type: ignore[no-any-return]
 
     def is_nan(self) -> Series:
         ser = self.series
diff --git a/narwhals/translate.py b/narwhals/translate.py
index 5b6f2310c..71c51c8a3 100644
--- a/narwhals/translate.py
+++ b/narwhals/translate.py
@@ -10,8 +10,8 @@
 
 
 def to_polars_api(df: Any, version: str) -> tuple[LazyFrame, Namespace]:
-    if hasattr(df, "__narwhals__"):
-        return df.__narwhals__()
+    if hasattr(df, "__narwhals_dataframe__"):
+        return df.__narwhals_dataframe__()  # type: ignore[no-any-return]
     try:
         import polars as pl
     except ModuleNotFoundError:
@@ -76,12 +76,12 @@ def get_namespace(obj: Any, implementation: str | None = None) -> Namespace:
         if isinstance(obj, (pl.DataFrame, pl.LazyFrame, pl.Series)):
             return pl  # type: ignore[return-value]
     if hasattr(obj, "__dataframe_namespace__"):
-        return obj.__dataframe_namespace__()
+        return obj.__dataframe_namespace__()  # type: ignore[no-any-return]
     if hasattr(obj, "__series_namespace__"):
-        return obj.__series_namespace__()
+        return obj.__series_namespace__()  # type: ignore[no-any-return]
     if hasattr(obj, "__lazyframe_namespace__"):
-        return obj.__lazyframe_namespace__()
+        return obj.__lazyframe_namespace__()  # type: ignore[no-any-return]
     if hasattr(obj, "__expr_namespace__"):
-        return obj.__expr_namespace__()
+        return obj.__expr_namespace__()  # type: ignore[no-any-return]
     msg = f"Could not find namespace for object {obj}"
     raise TypeError(msg)
diff --git a/pyproject.toml b/pyproject.toml
index 407767e1c..53da68bf8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -83,6 +83,7 @@ exclude_also = [
 ]
 
 [tool.mypy]
+strict = true
 
 [[tool.mypy.overrides]]
 # the pandas API is just too inconsistent for type hinting to be useful.