From 4b98df8959bead84799cee425fe629afba071a4e Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Tue, 14 May 2024 10:31:38 -0700 Subject: [PATCH] add a read_parquet test --- tests/dask_nested/test_io.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/dask_nested/test_io.py diff --git a/tests/dask_nested/test_io.py b/tests/dask_nested/test_io.py new file mode 100644 index 0000000..d7f11ec --- /dev/null +++ b/tests/dask_nested/test_io.py @@ -0,0 +1,31 @@ +import dask_nested as dn + + +def test_read_parquet(test_dataset, tmp_path): + """test the reproducibility of read_parquet""" + + # Setup a temporary directory for files + nested_save_path = tmp_path / "nested" + test_save_path = tmp_path / "test_dataset" + + # Save Nested to Parquet + flat_nested = test_dataset.nested.nest.to_flat() + flat_nested.to_parquet(nested_save_path, write_index=True) + + # Save Base to Parquet + test_dataset[["a", "b"]].to_parquet(test_save_path, write_index=True) + + # Now read + base = dn.read_parquet(test_save_path, calculate_divisions=True) + nested = dn.read_parquet(nested_save_path, calculate_divisions=True) + + base = base.add_nested(nested, "nested") + + # Check the loaded dataset against the original + assert base.divisions == test_dataset.divisions # equal divisions + assert base.compute().equals(test_dataset.compute()) # equal data + + # Check the flat nested datasets + base_nested_flat = base.nested.nest.to_flat().compute() + test_nested_flat = base.nested.nest.to_flat().compute() + assert base_nested_flat.equals(test_nested_flat)