From a2823030eeb3e59c25c741e8456a69fd9ccf795c Mon Sep 17 00:00:00 2001 From: Sandro Campos Date: Tue, 22 Oct 2024 16:37:00 -0400 Subject: [PATCH] Rebase branch --- src/lsdb/catalog/dataset/healpix_dataset.py | 25 ++++++++++++++------- tests/lsdb/catalog/test_catalog.py | 24 ++++++++++++++++++++ 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/src/lsdb/catalog/dataset/healpix_dataset.py b/src/lsdb/catalog/dataset/healpix_dataset.py index 4946b5d9..55c0a42b 100644 --- a/src/lsdb/catalog/dataset/healpix_dataset.py +++ b/src/lsdb/catalog/dataset/healpix_dataset.py @@ -1,6 +1,5 @@ from __future__ import annotations -import copy import warnings from pathlib import Path from typing import Any, Callable, Dict, Iterable, List, Tuple, cast @@ -82,6 +81,20 @@ def __len__(self): """ return len(self.hc_structure) + def _create_modified_hc_structure(self) -> HCHealpixDataset: + """Copy the catalog structure and invalidate the number of rows. + + Returns: + A copy of the catalog's structure with the total number of rows set to None. + """ + return self.hc_structure.__class__( + catalog_info=self.hc_structure.catalog_info.copy_and_update(total_rows=0), + pixels=self.hc_structure.pixel_tree, + catalog_path=self.hc_structure.catalog_path, + schema=self.hc_structure.schema, + moc=self.hc_structure.moc, + ) + def get_healpix_pixels(self) -> List[HealpixPixel]: """Get all HEALPix pixels that are contained in the catalog @@ -146,8 +159,7 @@ def query(self, expr: str) -> Self: with the query expression """ ndf = self._ddf.query(expr) - hc_structure = copy.copy(self.hc_structure) - hc_structure.catalog_info.total_rows = 0 + hc_structure = self._create_modified_hc_structure() return self.__class__(ndf, self._ddf_pixel_map, hc_structure) def _perform_search( @@ -527,8 +539,7 @@ def drop_na_part(df: npd.NestedFrame): return df ndf = self._ddf.map_partitions(drop_na_part, meta=self._ddf._meta) - hc_structure = copy.copy(self.hc_structure) - hc_structure.catalog_info.total_rows = 0 + hc_structure = self._create_modified_hc_structure() return self.__class__(ndf, self._ddf_pixel_map, hc_structure) def nest_lists( @@ -574,9 +585,7 @@ def nest_lists( list_columns=list_columns, name=name, ) - - hc_structure = copy.copy(self.hc_structure) - hc_structure.catalog_info.total_rows = 0 + hc_structure = self._create_modified_hc_structure() return self.__class__(new_ddf, self._ddf_pixel_map, hc_structure) def reduce(self, func, *args, meta=None, append_columns=False, **kwargs) -> Self: diff --git a/tests/lsdb/catalog/test_catalog.py b/tests/lsdb/catalog/test_catalog.py index 08c80794..14c419f4 100644 --- a/tests/lsdb/catalog/test_catalog.py +++ b/tests/lsdb/catalog/test_catalog.py @@ -658,3 +658,27 @@ def test_joined_catalog_has_undetermined_len( ) with pytest.raises(ValueError, match="undetermined"): len(small_sky_order1_catalog.merge_asof(small_sky_xmatch_catalog)) + + +def test_modified_hc_structure_is_a_deep_copy(small_sky_order1_catalog): + assert small_sky_order1_catalog.hc_structure.pixel_tree is not None + assert small_sky_order1_catalog.hc_structure.catalog_path is not None + assert small_sky_order1_catalog.hc_structure.schema is not None + assert small_sky_order1_catalog.hc_structure.moc is not None + assert small_sky_order1_catalog.hc_structure.catalog_info.total_rows == 131 + + modified_hc_structure = small_sky_order1_catalog._create_modified_hc_structure() + modified_hc_structure.pixel_tree = None + modified_hc_structure.catalog_path = None + modified_hc_structure.schema = None + modified_hc_structure.moc = None + + # The original catalog structure is not modified + assert small_sky_order1_catalog.hc_structure.pixel_tree is not None + assert small_sky_order1_catalog.hc_structure.catalog_path is not None + assert small_sky_order1_catalog.hc_structure.schema is not None + assert small_sky_order1_catalog.hc_structure.moc is not None + assert small_sky_order1_catalog.hc_structure.catalog_info.total_rows == 131 + + # The rows of the new structure are invalidated + assert modified_hc_structure.catalog_info.total_rows == 0