fix: Raise informative error message if a non-existent column name is…

… passed (#3533) Co-authored-by: dangotbanned <[email protected]>
vega · Aug 12, 2024 · 95f5d31 · 95f5d31
1 parent 0062e62
commit 95f5d31
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 8 deletions.
diff --git a/altair/utils/core.py b/altair/utils/core.py
@@ -504,7 +504,7 @@ def to_eager_narwhals_dataframe(data: IntoDataFrame) -> nw.DataFrame[Any]:
 
 def parse_shorthand(  # noqa: C901
     shorthand: dict[str, Any] | str,
-    data: pd.DataFrame | DataFrameLike | None = None,
+    data: IntoDataFrame | None = None,
     parse_aggregates: bool = True,
     parse_window_ops: bool = False,
     parse_timeunits: bool = True,

diff --git a/altair/vegalite/v5/schema/channels.py b/altair/vegalite/v5/schema/channels.py
@@ -14,7 +14,7 @@
 from typing import TYPE_CHECKING, Any, Literal, Sequence, TypedDict, Union, overload
 from typing_extensions import TypeAlias
 
-from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe
+import narwhals.stable.v1 as nw
 
 from altair.utils import infer_encoding_types as _infer_encoding_types
 from altair.utils import parse_shorthand
@@ -170,7 +170,8 @@ def to_dict(
         if shorthand is Undefined:
             parsed = {}
         elif isinstance(shorthand, str):
-            parsed = parse_shorthand(shorthand, data=context.get("data", None))
+            data: nw.DataFrame | Any = context.get("data", None)
+            parsed = parse_shorthand(shorthand, data=data)
             type_required = "type" in self._kwds  # type: ignore[attr-defined]
             type_in_shorthand = "type" in parsed
             type_defined_explicitly = self._get("type") is not Undefined  # type: ignore[attr-defined]
@@ -179,7 +180,7 @@ def to_dict(
                 # We still parse it out of the shorthand, but drop it here.
                 parsed.pop("type", None)
             elif not (type_in_shorthand or type_defined_explicitly):
-                if _is_pandas_dataframe(context.get("data", None)):
+                if isinstance(data, nw.DataFrame):
                     msg = (
                         f'Unable to determine data type for the field "{shorthand}";'
                         " verify that the field name is not misspelled."

diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
@@ -10,7 +10,7 @@
 import warnings
 from collections import deque
 from functools import partial
-from typing import Any, Callable, Iterable, Sequence
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence
 
 import jsonschema
 import jsonschema.exceptions
@@ -33,6 +33,9 @@
 from altair.vegalite.v5.schema.core import FieldOneOfPredicate, Legend
 from vega_datasets import data
 
+if TYPE_CHECKING:
+    from narwhals.typing import IntoDataFrame
+
 _JSON_SCHEMA_DRAFT_URL = load_schema()["$schema"]
 # Make tests inherit from _TestSchema, so that when we test from_dict it won't
 # try to use SchemaBase objects defined elsewhere as wrappers.
@@ -881,6 +884,18 @@ def test_multiple_field_strings_in_condition():
         )
 
 
+@pytest.mark.parametrize("tp", [pd.DataFrame, pl.DataFrame])
+def test_non_existent_column_name(tp: Callable[..., IntoDataFrame]) -> None:
+    df = tp({"a": [1, 2], "b": [4, 5]})
+    msg = (
+        'Unable to determine data type for the field "c"; verify that the field name '
+        "is not misspelled. If you are referencing a field from a transform, also "
+        "confirm that the data type is specified correctly."
+    )
+    with pytest.raises(ValueError, match=msg):
+        alt.Chart(df).mark_line().encode(x="a", y="c").to_json()
+
+
 def test_serialize_numpy_types():
     m = MySchema(
         a={"date": np.datetime64("2019-01-01")},

diff --git a/tools/generate_schema_wrapper.py b/tools/generate_schema_wrapper.py
@@ -104,7 +104,8 @@ def to_dict(
         if shorthand is Undefined:
             parsed = {}
         elif isinstance(shorthand, str):
-            parsed = parse_shorthand(shorthand, data=context.get("data", None))
+            data: nw.DataFrame | Any = context.get("data", None)
+            parsed = parse_shorthand(shorthand, data=data)
             type_required = "type" in self._kwds  # type: ignore[attr-defined]
             type_in_shorthand = "type" in parsed
             type_defined_explicitly = self._get("type") is not Undefined  # type: ignore[attr-defined]
@@ -113,7 +114,7 @@ def to_dict(
                 # We still parse it out of the shorthand, but drop it here.
                 parsed.pop("type", None)
             elif not (type_in_shorthand or type_defined_explicitly):
-                if _is_pandas_dataframe(context.get("data", None)):
+                if isinstance(data, nw.DataFrame):
                     msg = (
                         f'Unable to determine data type for the field "{shorthand}";'
                         " verify that the field name is not misspelled."
@@ -677,7 +678,7 @@ def generate_vegalite_channel_wrappers(
         "from __future__ import annotations\n",
         "from typing import Any, overload, Sequence, List, Literal, Union, TYPE_CHECKING, TypedDict",
         "from typing_extensions import TypeAlias",
-        "from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe",
+        "import narwhals.stable.v1 as nw",
         "from altair.utils.schemapi import Undefined, with_property_setters",
         "from altair.utils import infer_encoding_types as _infer_encoding_types",
         "from altair.utils import parse_shorthand",