From 95f5d311af8cc3665c3138b853593f1685aaa87f Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Mon, 12 Aug 2024 18:03:30 +0100 Subject: [PATCH] fix: Raise informative error message if a non-existent column name is passed (#3533) Co-authored-by: dangotbanned <125183946+dangotbanned@users.noreply.github.com> --- altair/utils/core.py | 2 +- altair/vegalite/v5/schema/channels.py | 7 ++++--- tests/utils/test_schemapi.py | 17 ++++++++++++++++- tools/generate_schema_wrapper.py | 7 ++++--- 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/altair/utils/core.py b/altair/utils/core.py index 8ab466a87..f5ef659b1 100644 --- a/altair/utils/core.py +++ b/altair/utils/core.py @@ -504,7 +504,7 @@ def to_eager_narwhals_dataframe(data: IntoDataFrame) -> nw.DataFrame[Any]: def parse_shorthand( # noqa: C901 shorthand: dict[str, Any] | str, - data: pd.DataFrame | DataFrameLike | None = None, + data: IntoDataFrame | None = None, parse_aggregates: bool = True, parse_window_ops: bool = False, parse_timeunits: bool = True, diff --git a/altair/vegalite/v5/schema/channels.py b/altair/vegalite/v5/schema/channels.py index 34daf00ab..30774ce22 100644 --- a/altair/vegalite/v5/schema/channels.py +++ b/altair/vegalite/v5/schema/channels.py @@ -14,7 +14,7 @@ from typing import TYPE_CHECKING, Any, Literal, Sequence, TypedDict, Union, overload from typing_extensions import TypeAlias -from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe +import narwhals.stable.v1 as nw from altair.utils import infer_encoding_types as _infer_encoding_types from altair.utils import parse_shorthand @@ -170,7 +170,8 @@ def to_dict( if shorthand is Undefined: parsed = {} elif isinstance(shorthand, str): - parsed = parse_shorthand(shorthand, data=context.get("data", None)) + data: nw.DataFrame | Any = context.get("data", None) + parsed = parse_shorthand(shorthand, data=data) type_required = "type" in self._kwds # type: ignore[attr-defined] type_in_shorthand = "type" in parsed type_defined_explicitly = self._get("type") is not Undefined # type: ignore[attr-defined] @@ -179,7 +180,7 @@ def to_dict( # We still parse it out of the shorthand, but drop it here. parsed.pop("type", None) elif not (type_in_shorthand or type_defined_explicitly): - if _is_pandas_dataframe(context.get("data", None)): + if isinstance(data, nw.DataFrame): msg = ( f'Unable to determine data type for the field "{shorthand}";' " verify that the field name is not misspelled." diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py index acea9dcb6..a6107601a 100644 --- a/tests/utils/test_schemapi.py +++ b/tests/utils/test_schemapi.py @@ -10,7 +10,7 @@ import warnings from collections import deque from functools import partial -from typing import Any, Callable, Iterable, Sequence +from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence import jsonschema import jsonschema.exceptions @@ -33,6 +33,9 @@ from altair.vegalite.v5.schema.core import FieldOneOfPredicate, Legend from vega_datasets import data +if TYPE_CHECKING: + from narwhals.typing import IntoDataFrame + _JSON_SCHEMA_DRAFT_URL = load_schema()["$schema"] # Make tests inherit from _TestSchema, so that when we test from_dict it won't # try to use SchemaBase objects defined elsewhere as wrappers. @@ -881,6 +884,18 @@ def test_multiple_field_strings_in_condition(): ) +@pytest.mark.parametrize("tp", [pd.DataFrame, pl.DataFrame]) +def test_non_existent_column_name(tp: Callable[..., IntoDataFrame]) -> None: + df = tp({"a": [1, 2], "b": [4, 5]}) + msg = ( + 'Unable to determine data type for the field "c"; verify that the field name ' + "is not misspelled. If you are referencing a field from a transform, also " + "confirm that the data type is specified correctly." + ) + with pytest.raises(ValueError, match=msg): + alt.Chart(df).mark_line().encode(x="a", y="c").to_json() + + def test_serialize_numpy_types(): m = MySchema( a={"date": np.datetime64("2019-01-01")}, diff --git a/tools/generate_schema_wrapper.py b/tools/generate_schema_wrapper.py index e73902da3..a625394bd 100644 --- a/tools/generate_schema_wrapper.py +++ b/tools/generate_schema_wrapper.py @@ -104,7 +104,8 @@ def to_dict( if shorthand is Undefined: parsed = {} elif isinstance(shorthand, str): - parsed = parse_shorthand(shorthand, data=context.get("data", None)) + data: nw.DataFrame | Any = context.get("data", None) + parsed = parse_shorthand(shorthand, data=data) type_required = "type" in self._kwds # type: ignore[attr-defined] type_in_shorthand = "type" in parsed type_defined_explicitly = self._get("type") is not Undefined # type: ignore[attr-defined] @@ -113,7 +114,7 @@ def to_dict( # We still parse it out of the shorthand, but drop it here. parsed.pop("type", None) elif not (type_in_shorthand or type_defined_explicitly): - if _is_pandas_dataframe(context.get("data", None)): + if isinstance(data, nw.DataFrame): msg = ( f'Unable to determine data type for the field "{shorthand}";' " verify that the field name is not misspelled." @@ -677,7 +678,7 @@ def generate_vegalite_channel_wrappers( "from __future__ import annotations\n", "from typing import Any, overload, Sequence, List, Literal, Union, TYPE_CHECKING, TypedDict", "from typing_extensions import TypeAlias", - "from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe", + "import narwhals.stable.v1 as nw", "from altair.utils.schemapi import Undefined, with_property_setters", "from altair.utils import infer_encoding_types as _infer_encoding_types", "from altair.utils import parse_shorthand",