Skip to content

Commit

Permalink
test: test_datasets overhaul
Browse files Browse the repository at this point in the history
- Eliminated all flaky tests
- Mocking more of the internals that is safer to run in parallel
- Split out non-threadsafe tests with `@no_xdist`
- Huge performance improvement for the slower tests
- Added some helper functions (`is_*`) where common patterns were identified
- **Removed skipping from native `pandas` backend**
  - Confirms that its now safe without `pyarrow` installed
  • Loading branch information
dangotbanned committed Jan 20, 2025
1 parent 63f4be0 commit 7433eb8
Show file tree
Hide file tree
Showing 4 changed files with 308 additions and 303 deletions.
24 changes: 13 additions & 11 deletions altair/datasets/_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
from typing import TypeAlias
else:
from typing_extensions import TypeAlias
from packaging.requirements import Requirement

from altair.datasets._typing import Dataset, Extension, Metadata
from altair.vegalite.v5.schema._typing import OneOrSeq
Expand Down Expand Up @@ -379,7 +380,7 @@ class _PyArrowReader(_Reader["pa.Table", "pa.Table"]):

def _maybe_fn(self, meta: Metadata, /) -> Callable[..., pa.Table]:
fn = super()._maybe_fn(meta)
if fn is self._read_json_polars:
if fn == self._read_json_polars:
return fn
elif meta["is_json"]:
if meta["is_tabular"]:
Expand Down Expand Up @@ -550,7 +551,7 @@ def _requirements(s: _ConcreteT, /) -> _ConcreteT: ...
def _requirements(s: Literal["pandas[pyarrow]"], /) -> tuple[_Pandas, _PyArrow]: ...


def _requirements(s: _Backend, /):
def _requirements(s: Any, /) -> Any:
concrete: set[Literal[_Polars, _Pandas, _PyArrow]] = {"polars", "pandas", "pyarrow"}
if s in concrete:
return s
Expand All @@ -559,12 +560,13 @@ def _requirements(s: _Backend, /):

req = Requirement(s)
supports_extras: set[Literal[_Pandas]] = {"pandas"}
if req.name in supports_extras:
name = req.name
if (extras := req.extras) and extras == {"pyarrow"}:
extra = "pyarrow"
return name, extra
else:
raise NotImplementedError(s)
else:
raise NotImplementedError(s)
if req.name in supports_extras and req.extras == {"pyarrow"}:
return req.name, "pyarrow"
return _requirements_unknown(req)


def _requirements_unknown(req: Requirement | str, /) -> Any:
from packaging.requirements import Requirement

req = Requirement(req) if isinstance(req, str) else req
return (req.name, *req.extras)
21 changes: 12 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -262,16 +262,18 @@ cwd = "."
[tool.taskipy.tasks]
lint = "ruff check"
format = "ruff format --diff --check"
ruff-check = "task lint && task format"
ruff-fix = "task lint && ruff format"
type-check = "mypy altair tests"

pytest = "pytest"
test = "task lint && task format && task type-check && task pytest"
test-fast = "task ruff-fix && pytest -m \"not slow\""
test-slow = "task ruff-fix && pytest -m \"slow\""
test-datasets = "task ruff-fix && pytest tests -k test_datasets -m \"\""
test-min = "task lint && task format && task type-check && hatch test --python 3.9"
test-all = "task lint && task format && task type-check && hatch test --all"
pytest-serial = "pytest -m \"no_xdist\" --numprocesses=1"
pytest = "pytest && task pytest-serial"
test = "task ruff-check && task type-check && task pytest"
test-fast = "task ruff-fix && pytest -m \"not slow and not datasets_debug and not no_xdist\""
test-slow = "task ruff-fix && pytest -m \"slow and not datasets_debug and not no_xdist\""
test-datasets = "task ruff-fix && pytest tests -k test_datasets -m \"not no_xdist\" && task pytest-serial"
test-min = "task ruff-check && task type-check && hatch test --python 3.9"
test-all = "task ruff-check && task type-check && hatch test --all"


generate-schema-wrapper = "mypy tools && python tools/generate_schema_wrapper.py && task test"
Expand Down Expand Up @@ -303,12 +305,13 @@ addopts = [
"tests",
"altair",
"tools",
"-m not datasets_debug",
"-m not datasets_debug and not no_xdist",
]
# https://docs.pytest.org/en/stable/how-to/mark.html#registering-marks
markers = [
"slow: Label tests as slow (deselect with '-m \"not slow\"')",
"datasets_debug: Disabled by default due to high number of requests"
"datasets_debug: Disabled by default due to high number of requests",
"no_xdist: Unsafe to run in parallel"
]

[tool.mypy]
Expand Down
10 changes: 10 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ def windows_has_tzdata() -> bool:
>>> hatch run test-slow --durations=25 # doctest: +SKIP
"""

no_xdist: pytest.MarkDecorator = pytest.mark.no_xdist()
"""
Custom ``pytest.mark`` decorator.
Each marked test will run **serially**, after all other selected tests.
.. tip::
Use as a last resort when a test depends on manipulating global state.
"""

skip_requires_ipython: pytest.MarkDecorator = pytest.mark.skipif(
find_spec("IPython") is None, reason="`IPython` not installed."
)
Expand Down
Loading

0 comments on commit 7433eb8

Please sign in to comment.