Skip to content

Commit

Permalink
fix: Raise informative error message if a non-existent column name is…
Browse files Browse the repository at this point in the history
… passed (#3533)

Co-authored-by: dangotbanned <[email protected]>
  • Loading branch information
MarcoGorelli and dangotbanned authored Aug 12, 2024
1 parent 0062e62 commit 95f5d31
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 8 deletions.
2 changes: 1 addition & 1 deletion altair/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ def to_eager_narwhals_dataframe(data: IntoDataFrame) -> nw.DataFrame[Any]:

def parse_shorthand( # noqa: C901
shorthand: dict[str, Any] | str,
data: pd.DataFrame | DataFrameLike | None = None,
data: IntoDataFrame | None = None,
parse_aggregates: bool = True,
parse_window_ops: bool = False,
parse_timeunits: bool = True,
Expand Down
7 changes: 4 additions & 3 deletions altair/vegalite/v5/schema/channels.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing import TYPE_CHECKING, Any, Literal, Sequence, TypedDict, Union, overload
from typing_extensions import TypeAlias

from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe
import narwhals.stable.v1 as nw

from altair.utils import infer_encoding_types as _infer_encoding_types
from altair.utils import parse_shorthand
Expand Down Expand Up @@ -170,7 +170,8 @@ def to_dict(
if shorthand is Undefined:
parsed = {}
elif isinstance(shorthand, str):
parsed = parse_shorthand(shorthand, data=context.get("data", None))
data: nw.DataFrame | Any = context.get("data", None)
parsed = parse_shorthand(shorthand, data=data)
type_required = "type" in self._kwds # type: ignore[attr-defined]
type_in_shorthand = "type" in parsed
type_defined_explicitly = self._get("type") is not Undefined # type: ignore[attr-defined]
Expand All @@ -179,7 +180,7 @@ def to_dict(
# We still parse it out of the shorthand, but drop it here.
parsed.pop("type", None)
elif not (type_in_shorthand or type_defined_explicitly):
if _is_pandas_dataframe(context.get("data", None)):
if isinstance(data, nw.DataFrame):
msg = (
f'Unable to determine data type for the field "{shorthand}";'
" verify that the field name is not misspelled."
Expand Down
17 changes: 16 additions & 1 deletion tests/utils/test_schemapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import warnings
from collections import deque
from functools import partial
from typing import Any, Callable, Iterable, Sequence
from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence

import jsonschema
import jsonschema.exceptions
Expand All @@ -33,6 +33,9 @@
from altair.vegalite.v5.schema.core import FieldOneOfPredicate, Legend
from vega_datasets import data

if TYPE_CHECKING:
from narwhals.typing import IntoDataFrame

_JSON_SCHEMA_DRAFT_URL = load_schema()["$schema"]
# Make tests inherit from _TestSchema, so that when we test from_dict it won't
# try to use SchemaBase objects defined elsewhere as wrappers.
Expand Down Expand Up @@ -881,6 +884,18 @@ def test_multiple_field_strings_in_condition():
)


@pytest.mark.parametrize("tp", [pd.DataFrame, pl.DataFrame])
def test_non_existent_column_name(tp: Callable[..., IntoDataFrame]) -> None:
df = tp({"a": [1, 2], "b": [4, 5]})
msg = (
'Unable to determine data type for the field "c"; verify that the field name '
"is not misspelled. If you are referencing a field from a transform, also "
"confirm that the data type is specified correctly."
)
with pytest.raises(ValueError, match=msg):
alt.Chart(df).mark_line().encode(x="a", y="c").to_json()


def test_serialize_numpy_types():
m = MySchema(
a={"date": np.datetime64("2019-01-01")},
Expand Down
7 changes: 4 additions & 3 deletions tools/generate_schema_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def to_dict(
if shorthand is Undefined:
parsed = {}
elif isinstance(shorthand, str):
parsed = parse_shorthand(shorthand, data=context.get("data", None))
data: nw.DataFrame | Any = context.get("data", None)
parsed = parse_shorthand(shorthand, data=data)
type_required = "type" in self._kwds # type: ignore[attr-defined]
type_in_shorthand = "type" in parsed
type_defined_explicitly = self._get("type") is not Undefined # type: ignore[attr-defined]
Expand All @@ -113,7 +114,7 @@ def to_dict(
# We still parse it out of the shorthand, but drop it here.
parsed.pop("type", None)
elif not (type_in_shorthand or type_defined_explicitly):
if _is_pandas_dataframe(context.get("data", None)):
if isinstance(data, nw.DataFrame):
msg = (
f'Unable to determine data type for the field "{shorthand}";'
" verify that the field name is not misspelled."
Expand Down Expand Up @@ -677,7 +678,7 @@ def generate_vegalite_channel_wrappers(
"from __future__ import annotations\n",
"from typing import Any, overload, Sequence, List, Literal, Union, TYPE_CHECKING, TypedDict",
"from typing_extensions import TypeAlias",
"from narwhals.dependencies import is_pandas_dataframe as _is_pandas_dataframe",
"import narwhals.stable.v1 as nw",
"from altair.utils.schemapi import Undefined, with_property_setters",
"from altair.utils import infer_encoding_types as _infer_encoding_types",
"from altair.utils import parse_shorthand",
Expand Down

0 comments on commit 95f5d31

Please sign in to comment.