Skip to content

Commit

Permalink
Update datasets to 2.20.0 (#3040)
Browse files Browse the repository at this point in the history
* Update datasets to 2.20.0

* Update poetry lock files

* Force CI rerun

* Pass trust_remote_code in external_files_dataset_builder

* Avoid assertion in test_statistics_endpoint failing due to pandas bug

* Force CI rerun

* Revert commented assertion in test_statistics_endpoint

* Create test fixture with JSON-Lines dataset

* Use test fixture with JSON-Lines dataset in test_statistics_endpoint
  • Loading branch information
albertvillanova authored Aug 23, 2024
1 parent fc2f2b1 commit 29465f3
Show file tree
Hide file tree
Showing 16 changed files with 513 additions and 622 deletions.
17 changes: 17 additions & 0 deletions e2e/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@ def json_path(tmp_path_factory: TempPathFactory) -> str:
return path


@pytest.fixture(scope="session")
def jsonl_path(tmp_path_factory: TempPathFactory) -> str:
path = str(tmp_path_factory.mktemp("data") / "dataset.jsonl")
with open(path, "w") as f:
for item in DATA_JSON:
f.write(json.dumps(item) + "\n")
return path


@pytest.fixture(scope="session")
def normal_user_public_dataset(csv_path: str) -> Iterator[str]:
with tmp_dataset(namespace=NORMAL_USER, token=NORMAL_USER_TOKEN, files={"data/csv_data.csv": csv_path}) as dataset:
Expand All @@ -51,6 +60,14 @@ def normal_user_public_json_dataset(json_path: str) -> Iterator[str]:
yield dataset


@pytest.fixture(scope="session")
def normal_user_public_jsonl_dataset(jsonl_path: str) -> Iterator[str]:
with tmp_dataset(
namespace=NORMAL_USER, token=NORMAL_USER_TOKEN, files={"data/json_data.jsonl": jsonl_path}
) as dataset:
yield dataset


@pytest.fixture(scope="session")
def normal_user_images_public_dataset() -> Iterator[str]:
with tmp_dataset(
Expand Down
4 changes: 2 additions & 2 deletions e2e/tests/test_14_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from .utils import get_default_config_split, poll_until_ready_and_assert


def test_statistics_endpoint(normal_user_public_json_dataset: str) -> None:
dataset = normal_user_public_json_dataset
def test_statistics_endpoint(normal_user_public_jsonl_dataset: str) -> None:
dataset = normal_user_public_jsonl_dataset
config, split = get_default_config_split()
statistics_response = poll_until_ready_and_assert(
relative_url=f"/statistics?dataset={dataset}&config={config}&split={split}",
Expand Down
93 changes: 41 additions & 52 deletions front/admin_ui/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 29465f3

Please sign in to comment.