diff --git a/.github/ISSUE_TEMPLATE/4-release_tracker.md b/.github/ISSUE_TEMPLATE/4-release_tracker.md index e2e56727..36057520 100644 --- a/.github/ISSUE_TEMPLATE/4-release_tracker.md +++ b/.github/ISSUE_TEMPLATE/4-release_tracker.md @@ -20,6 +20,8 @@ Please check which packages you would like to be released: e.g. major/minor/patch, deadlines, blocking issues, breaking changes, folks to notify +## nested-* updates to include + @@ -32,14 +34,15 @@ e.g. major/minor/patch, deadlines, blocking issues, breaking changes, folks to n - [ ] tag in github - [ ] confirm on [pypi](https://pypi.org/manage/project/hats/releases/) -- [ ] request new conda-forge version (see [similar issue](https://github.com/conda-forge/hats-feedstock/issues/3)) and approve +- [ ] request new conda-forge version (open [bot command issue](https://github.com/conda-forge/hats-feedstock/issues/) with title `@conda-forge-admin, please update version`) +- [ ] approve conda-forge PR - [ ] confirm on [conda-forge](https://anaconda.org/conda-forge/hats) ## lsdb release - [ ] tag in github - [ ] confirm on [pypi](https://pypi.org/manage/project/lsdb/releases/) -- [ ] request new conda-forge version (see [similar issue](https://github.com/conda-forge/hats-feedstock/issues/3)) +- [ ] request new conda-forge version (open [bot command issue](https://github.com/conda-forge/lsdb-feedstock/issues/) with title `@conda-forge-admin, please update version`) - [ ] confirm tagged hats version and approve - [ ] confirm on [conda-forge](https://anaconda.org/conda-forge/lsdb) @@ -47,7 +50,7 @@ e.g. major/minor/patch, deadlines, blocking issues, breaking changes, folks to n - [ ] tag in github - [ ] confirm on [pypi](https://pypi.org/manage/project/hats-import/releases/) -- [ ] request new conda-forge version (see [similar issue](https://github.com/conda-forge/hats-feedstock/issues/3)) and approve +- [ ] request new conda-forge version (open [bot command issue](https://github.com/conda-forge/hats-import-feedstock/issues/) with title `@conda-forge-admin, please update version`) - [ ] confirm tagged hats version and approve - [ ] confirm on [conda-forge](https://anaconda.org/conda-forge/hats-import) diff --git a/src/lsdb/catalog/catalog.py b/src/lsdb/catalog/catalog.py index 5659af5e..8d3a6e49 100644 --- a/src/lsdb/catalog/catalog.py +++ b/src/lsdb/catalog/catalog.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Callable, List, Tuple, Type +from typing import Callable, Type import hats as hc import nested_dask as nd @@ -142,7 +142,7 @@ def assign(self, **kwargs) -> Catalog: def crossmatch( self, other: Catalog, - suffixes: Tuple[str, str] | None = None, + suffixes: tuple[str, str] | None = None, algorithm: ( Type[AbstractCrossmatchAlgorithm] | BuiltInCrossmatchAlgorithm ) = BuiltInCrossmatchAlgorithm.KD_TREE, @@ -310,7 +310,7 @@ def cone_search(self, ra: float, dec: float, radius_arcsec: float, fine: bool = """ return self.search(ConeSearch(ra, dec, radius_arcsec, fine)) - def box_search(self, ra: Tuple[float, float], dec: Tuple[float, float], fine: bool = True) -> Catalog: + def box_search(self, ra: tuple[float, float], dec: tuple[float, float], fine: bool = True) -> Catalog: """Performs filtering according to right ascension and declination ranges. The right ascension edges follow great arc circles and the declination edges follow small arc circles. @@ -375,7 +375,7 @@ def order_search(self, min_order: int = 0, max_order: int | None = None) -> Cata """ return self.search(OrderSearch(min_order, max_order)) - def pixel_search(self, pixels: List[Tuple[int, int]]) -> Catalog: + def pixel_search(self, pixels: list[tuple[int, int]]) -> Catalog: """Finds all catalog pixels that overlap with the requested pixel set. Args: @@ -419,12 +419,12 @@ def merge( self, other: Catalog, how: str = "inner", - on: str | List | None = None, - left_on: str | List | None = None, - right_on: str | List | None = None, + on: str | list | None = None, + left_on: str | list | None = None, + right_on: str | list | None = None, left_index: bool = False, right_index: bool = False, - suffixes: Tuple[str, str] | None = None, + suffixes: tuple[str, str] | None = None, ) -> nd.NestedFrame: """Performs the merge of two catalog Dataframes @@ -473,7 +473,7 @@ def merge_asof( self, other: Catalog, direction: str = "backward", - suffixes: Tuple[str, str] | None = None, + suffixes: tuple[str, str] | None = None, output_catalog_name: str | None = None, ): """Uses the pandas `merge_asof` function to merge two catalogs on their indices by distance of keys @@ -524,7 +524,7 @@ def join( left_on: str | None = None, right_on: str | None = None, through: AssociationCatalog | None = None, - suffixes: Tuple[str, str] | None = None, + suffixes: tuple[str, str] | None = None, output_catalog_name: str | None = None, ) -> Catalog: """Perform a spatial join to another catalog diff --git a/src/lsdb/catalog/dataset/dataset.py b/src/lsdb/catalog/dataset/dataset.py index b7da7bce..7db701d2 100644 --- a/src/lsdb/catalog/dataset/dataset.py +++ b/src/lsdb/catalog/dataset/dataset.py @@ -1,5 +1,3 @@ -from typing import List - import hats as hc import nested_dask as nd import nested_pandas as npd @@ -46,7 +44,7 @@ def compute(self) -> npd.NestedFrame: """Compute dask distributed dataframe to pandas dataframe""" return self._ddf.compute() - def to_delayed(self, optimize_graph: bool = True) -> List[Delayed]: + def to_delayed(self, optimize_graph: bool = True) -> list[Delayed]: """Get a list of Dask Delayed objects for each partition in the dataset Used for more advanced custom operations, but to use again with LSDB, the delayed objects diff --git a/src/lsdb/catalog/dataset/healpix_dataset.py b/src/lsdb/catalog/dataset/healpix_dataset.py index cfe48f78..c85e902f 100644 --- a/src/lsdb/catalog/dataset/healpix_dataset.py +++ b/src/lsdb/catalog/dataset/healpix_dataset.py @@ -2,7 +2,7 @@ import warnings from pathlib import Path -from typing import Any, Callable, Dict, Iterable, List, Tuple, Type, cast +from typing import Any, Callable, Iterable, Type, cast import astropy import dask @@ -118,7 +118,7 @@ def _create_modified_hc_structure(self, **kwargs) -> HCHealpixDataset: moc=self.hc_structure.moc, ) - def get_healpix_pixels(self) -> List[HealpixPixel]: + def get_healpix_pixels(self) -> list[HealpixPixel]: """Get all HEALPix pixels that are contained in the catalog Returns: @@ -126,7 +126,7 @@ def get_healpix_pixels(self) -> List[HealpixPixel]: """ return self.hc_structure.get_healpix_pixels() - def get_ordered_healpix_pixels(self) -> List[HealpixPixel]: + def get_ordered_healpix_pixels(self) -> list[HealpixPixel]: """Get all HEALPix pixels that are contained in the catalog, ordered by breadth-first nested ordering. @@ -189,7 +189,7 @@ def _perform_search( self, metadata: hc.catalog.Catalog | hc.catalog.MarginCatalog, search: AbstractSearch, - ) -> Tuple[DaskDFPixelMap, nd.NestedFrame]: + ) -> tuple[DaskDFPixelMap, nd.NestedFrame]: """Performs a search on the catalog from a list of pixels to search in Args: @@ -237,7 +237,7 @@ def map_partitions( self, func: Callable[..., npd.NestedFrame], *args, - meta: pd.DataFrame | pd.Series | Dict | Iterable | Tuple | None = None, + meta: pd.DataFrame | pd.Series | dict | Iterable | tuple | None = None, include_pixel: bool = False, **kwargs, ) -> Self | dd.Series: @@ -329,7 +329,7 @@ def prune_empty_partitions(self, persist: bool = False) -> Self: filtered_hc_structure = self.hc_structure.filter_from_pixel_list(non_empty_pixels) return self.__class__(search_ddf, ddf_partition_map, filtered_hc_structure) - def _get_non_empty_partitions(self) -> Tuple[List[HealpixPixel], np.ndarray]: + def _get_non_empty_partitions(self) -> tuple[list[HealpixPixel], np.ndarray]: """Determines which pixels and partitions of a catalog are not empty Returns: @@ -356,7 +356,7 @@ def skymap_data( order: int | None = None, default_value: Any = 0.0, **kwargs, - ) -> Dict[HealpixPixel, Delayed]: + ) -> dict[HealpixPixel, Delayed]: """Perform a function on each partition of the catalog, returning a dict of values for each pixel. Args: @@ -438,7 +438,7 @@ def skymap( order: int | None = None, default_value: Any = 0, projection="MOL", - plotting_args: Dict | None = None, + plotting_args: dict | None = None, **kwargs, ) -> tuple[Figure, WCSAxes]: """Plot a skymap of an aggregate function applied over each partition @@ -722,7 +722,7 @@ def plot_points( color_col: str | None = None, projection: str = "MOL", title: str | None = None, - fov: Quantity | Tuple[Quantity, Quantity] | None = None, + fov: Quantity | tuple[Quantity, Quantity] | None = None, center: SkyCoord | None = None, wcs: astropy.wcs.WCS | None = None, frame_class: Type[BaseFrame] | None = None, diff --git a/src/lsdb/core/crossmatch/abstract_crossmatch_algorithm.py b/src/lsdb/core/crossmatch/abstract_crossmatch_algorithm.py index 0ef7c592..5e2bc66e 100644 --- a/src/lsdb/core/crossmatch/abstract_crossmatch_algorithm.py +++ b/src/lsdb/core/crossmatch/abstract_crossmatch_algorithm.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC -from typing import TYPE_CHECKING, Tuple +from typing import TYPE_CHECKING import nested_pandas as npd import numpy as np @@ -66,7 +66,7 @@ def __init__( left_catalog_info: TableProperties, right_catalog_info: TableProperties, right_margin_catalog_info: TableProperties | None, - suffixes: Tuple[str, str], + suffixes: tuple[str, str], ): """Initializes a crossmatch algorithm @@ -107,7 +107,7 @@ def crossmatch(self, **kwargs) -> npd.NestedFrame: ) return self._create_crossmatch_df(l_inds, r_inds, extra_cols) - def perform_crossmatch(self) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame]: + def perform_crossmatch(self) -> tuple[np.ndarray, np.ndarray, pd.DataFrame]: """Performs a crossmatch to get the indices of the matching rows and any extra columns Any additional keyword arguments needed can be added to this method in the subclass, and the user diff --git a/src/lsdb/core/crossmatch/bounded_kdtree_match.py b/src/lsdb/core/crossmatch/bounded_kdtree_match.py index 3eabe4ff..dbc87854 100644 --- a/src/lsdb/core/crossmatch/bounded_kdtree_match.py +++ b/src/lsdb/core/crossmatch/bounded_kdtree_match.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Tuple +from typing import TYPE_CHECKING import numpy as np import pandas as pd @@ -36,7 +36,7 @@ def perform_crossmatch( n_neighbors: int = 1, radius_arcsec: float = 1, min_radius_arcsec: float = 0, - ) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame]: + ) -> tuple[np.ndarray, np.ndarray, pd.DataFrame]: """Perform a cross-match between the data from two HEALPix pixels Finds the n closest neighbors in the right catalog for each point in the left catalog that diff --git a/src/lsdb/core/crossmatch/kdtree_match.py b/src/lsdb/core/crossmatch/kdtree_match.py index 3406515f..216f0b50 100644 --- a/src/lsdb/core/crossmatch/kdtree_match.py +++ b/src/lsdb/core/crossmatch/kdtree_match.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Tuple +from typing import TYPE_CHECKING import numpy as np import numpy.typing as npt @@ -44,7 +44,7 @@ def perform_crossmatch( self, n_neighbors: int = 1, radius_arcsec: float = 1, - ) -> Tuple[np.ndarray, np.ndarray, pd.DataFrame]: + ) -> tuple[np.ndarray, np.ndarray, pd.DataFrame]: """Perform a cross-match between the data from two HEALPix pixels Finds the n closest neighbors in the right catalog for each point in the left catalog that @@ -72,7 +72,7 @@ def perform_crossmatch( ) return left_idx, right_idx, extra_columns - def _get_point_coordinates(self) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: + def _get_point_coordinates(self) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: left_xyz = _lon_lat_to_xyz( lon=self.left[self.left_catalog_info.ra_column].to_numpy(), lat=self.left[self.left_catalog_info.dec_column].to_numpy(), diff --git a/src/lsdb/core/crossmatch/kdtree_utils.py b/src/lsdb/core/crossmatch/kdtree_utils.py index dbdc0d57..f79028eb 100644 --- a/src/lsdb/core/crossmatch/kdtree_utils.py +++ b/src/lsdb/core/crossmatch/kdtree_utils.py @@ -1,5 +1,4 @@ import math -from typing import Tuple import numpy as np import numpy.typing as npt @@ -12,7 +11,7 @@ def _find_crossmatch_indices( n_neighbors: int, max_distance: float, min_distance: float = 0, -) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.int64], npt.NDArray[np.int64]]: +) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.int64], npt.NDArray[np.int64]]: # If right catalog is empty, tree.query will raise an exception # Left catalog cannot be empty, as it checked in perform_crossmatch() if len(right_xyz) == 0: @@ -52,7 +51,7 @@ def _query_min_max_neighbors( n_neighbors: int, min_distance: float, max_distance: float, -) -> Tuple[np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, np.ndarray]: """Finds `n_neighbors` within a distance range for all points in a pair of partitions""" left_tree = KDTree(left_xyz, compact_nodes=True, balanced_tree=True, copy_data=False) diff --git a/src/lsdb/core/plotting/plot_points.py b/src/lsdb/core/plotting/plot_points.py index 964ee91c..7cb67ef1 100644 --- a/src/lsdb/core/plotting/plot_points.py +++ b/src/lsdb/core/plotting/plot_points.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Tuple, Type +from typing import Type import astropy import matplotlib.pyplot as plt @@ -22,7 +22,7 @@ def plot_points( color_col: str | None = None, projection: str = "MOL", title: str = "", - fov: Quantity | Tuple[Quantity, Quantity] | None = None, + fov: Quantity | tuple[Quantity, Quantity] | None = None, center: SkyCoord | None = None, wcs: astropy.wcs.WCS | None = None, frame_class: Type[BaseFrame] | None = None, diff --git a/src/lsdb/core/plotting/skymap.py b/src/lsdb/core/plotting/skymap.py index fba6e5a7..305465aa 100644 --- a/src/lsdb/core/plotting/skymap.py +++ b/src/lsdb/core/plotting/skymap.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Callable, Dict +from typing import Any, Callable import hats.pixel_math.healpix_shim as hp import nested_pandas as npd @@ -40,7 +40,7 @@ def apply_func(df): def compute_skymap( - pixel_map: Dict[HealpixPixel, Any], order: int | None = None, default_value: Any = 0.0 + pixel_map: dict[HealpixPixel, Any], order: int | None = None, default_value: Any = 0.0 ) -> np.ndarray: """Returns a histogram map of healpix_pixels to values. diff --git a/src/lsdb/core/search/abstract_search.py b/src/lsdb/core/search/abstract_search.py index 4ed5269e..c7817e79 100644 --- a/src/lsdb/core/search/abstract_search.py +++ b/src/lsdb/core/search/abstract_search.py @@ -1,7 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Tuple, Type +from typing import TYPE_CHECKING, Type import astropy import nested_pandas as npd @@ -45,7 +45,7 @@ def plot( self, projection: str = "MOL", title: str = "", - fov: Quantity | Tuple[Quantity, Quantity] | None = None, + fov: Quantity | tuple[Quantity, Quantity] | None = None, center: SkyCoord | None = None, wcs: astropy.wcs.WCS | None = None, frame_class: Type[BaseFrame] | None = None, diff --git a/src/lsdb/core/search/pixel_search.py b/src/lsdb/core/search/pixel_search.py index b99a823c..f836811d 100644 --- a/src/lsdb/core/search/pixel_search.py +++ b/src/lsdb/core/search/pixel_search.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, List, Tuple +from typing import TYPE_CHECKING import nested_pandas as npd from hats.pixel_math import HealpixPixel @@ -18,7 +18,7 @@ class PixelSearch(AbstractSearch): Does not filter points inside those partitions. """ - def __init__(self, pixels: List[Tuple[int, int]]): + def __init__(self, pixels: list[tuple[int, int]]): super().__init__(fine=False) self.pixels = [HealpixPixel(o, p) for o, p in set(pixels)] diff --git a/src/lsdb/dask/crossmatch_catalog_data.py b/src/lsdb/dask/crossmatch_catalog_data.py index 0d8e7ada..07e9ac55 100644 --- a/src/lsdb/dask/crossmatch_catalog_data.py +++ b/src/lsdb/dask/crossmatch_catalog_data.py @@ -1,7 +1,7 @@ from __future__ import annotations import warnings -from typing import TYPE_CHECKING, Tuple, Type +from typing import TYPE_CHECKING, Type import dask import nested_dask as nd @@ -76,12 +76,12 @@ def perform_crossmatch( def crossmatch_catalog_data( left: Catalog, right: Catalog, - suffixes: Tuple[str, str], + suffixes: tuple[str, str], algorithm: ( Type[AbstractCrossmatchAlgorithm] | BuiltInCrossmatchAlgorithm ) = BuiltInCrossmatchAlgorithm.KD_TREE, **kwargs, -) -> Tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: +) -> tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: """Cross-matches the data from two catalogs Args: diff --git a/src/lsdb/dask/divisions.py b/src/lsdb/dask/divisions.py index b6f509ee..be353f34 100644 --- a/src/lsdb/dask/divisions.py +++ b/src/lsdb/dask/divisions.py @@ -1,14 +1,12 @@ from __future__ import annotations -from typing import List, Tuple - import numpy as np from hats.pixel_math import HealpixPixel from hats.pixel_math.healpix_pixel_function import get_pixel_argsort from hats.pixel_math.spatial_index import healpix_to_spatial_index -def get_pixels_divisions(healpix_pixels: List[HealpixPixel]) -> Tuple[int, ...] | None: +def get_pixels_divisions(healpix_pixels: list[HealpixPixel]) -> tuple[int, ...] | None: """Calculates the Dask Dataframe divisions for a list of HEALPix pixels. Divisions include the minimum value of every HEALPix pixel spatial_index diff --git a/src/lsdb/dask/join_catalog_data.py b/src/lsdb/dask/join_catalog_data.py index 6ae4750e..db35f5a9 100644 --- a/src/lsdb/dask/join_catalog_data.py +++ b/src/lsdb/dask/join_catalog_data.py @@ -2,7 +2,7 @@ from __future__ import annotations import warnings -from typing import TYPE_CHECKING, List, Tuple +from typing import TYPE_CHECKING import dask import nested_dask as nd @@ -34,7 +34,7 @@ NON_JOINING_ASSOCIATION_COLUMNS = ["Norder", "Dir", "Npix", "join_Norder", "join_Dir", "join_Npix"] -def rename_columns_with_suffixes(left: npd.NestedFrame, right: npd.NestedFrame, suffixes: Tuple[str, str]): +def rename_columns_with_suffixes(left: npd.NestedFrame, right: npd.NestedFrame, suffixes: tuple[str, str]): """Renames two dataframes with the suffixes specified Args: @@ -66,8 +66,8 @@ def perform_join_on( right_margin_catalog_info: TableProperties, left_on: str, right_on: str, - suffixes: Tuple[str, str], - right_columns: List[str], + suffixes: tuple[str, str], + right_columns: list[str], ): """Performs a join on two catalog partitions @@ -116,7 +116,7 @@ def perform_join_nested( right_margin_catalog_info: TableProperties, left_on: str, right_on: str, - right_columns: List[str], + right_columns: list[str], right_name: str, ): """Performs a join on two catalog partitions by adding the right catalog a nested column using @@ -167,8 +167,8 @@ def perform_join_through( right_catalog_info: TableProperties, right_margin_catalog_info: TableProperties, assoc_catalog_info: TableProperties, - suffixes: Tuple[str, str], - right_columns: List[str], + suffixes: tuple[str, str], + right_columns: list[str], ): """Performs a join on two catalog partitions through an association catalog @@ -235,7 +235,7 @@ def perform_merge_asof( right_pixel: HealpixPixel, left_catalog_info: TableProperties, right_catalog_info: TableProperties, - suffixes: Tuple[str, str], + suffixes: tuple[str, str], direction: str, ): """Performs a merge_asof on two catalog partitions @@ -265,8 +265,8 @@ def perform_merge_asof( def join_catalog_data_on( - left: Catalog, right: Catalog, left_on: str, right_on: str, suffixes: Tuple[str, str] -) -> Tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: + left: Catalog, right: Catalog, left_on: str, right_on: str, suffixes: tuple[str, str] +) -> tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: """Joins two catalogs spatially on a specified column Args: @@ -311,7 +311,7 @@ def join_catalog_data_nested( left_on: str, right_on: str, nested_column_name: str | None = None, -) -> Tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: +) -> tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: """Joins two catalogs spatially on a specified column, adding the right as a nested column with nested dask @@ -356,8 +356,8 @@ def join_catalog_data_nested( def join_catalog_data_through( - left: Catalog, right: Catalog, association: AssociationCatalog, suffixes: Tuple[str, str] -) -> Tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: + left: Catalog, right: Catalog, association: AssociationCatalog, suffixes: tuple[str, str] +) -> tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: """Joins two catalogs with an association table Args: @@ -421,8 +421,8 @@ def join_catalog_data_through( def merge_asof_catalog_data( - left: Catalog, right: Catalog, suffixes: Tuple[str, str], direction: str = "backward" -) -> Tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: + left: Catalog, right: Catalog, suffixes: tuple[str, str], direction: str = "backward" +) -> tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: """Uses the pandas `merge_asof` function to merge two catalogs on their indices by distance of keys Must be along catalog indices, and does not include margin caches, meaning results may be incomplete for diff --git a/src/lsdb/dask/merge_catalog_functions.py b/src/lsdb/dask/merge_catalog_functions.py index fb109f6c..2e850852 100644 --- a/src/lsdb/dask/merge_catalog_functions.py +++ b/src/lsdb/dask/merge_catalog_functions.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Callable, List, Sequence, Tuple +from typing import TYPE_CHECKING, Callable, Sequence import hats.pixel_math.healpix_shim as hp import nested_dask as nd @@ -26,7 +26,7 @@ def concat_partition_and_margin( - partition: npd.NestedFrame, margin: npd.NestedFrame | None, right_columns: List[str] + partition: npd.NestedFrame, margin: npd.NestedFrame | None, right_columns: list[str] ) -> npd.NestedFrame: """Concatenates a partition and margin dataframe together @@ -103,8 +103,8 @@ def align_catalogs(left: Catalog, right: Catalog, add_right_margin: bool = True) def align_and_apply( - catalog_mappings: List[Tuple[HealpixDataset | None, List[HealpixPixel]]], func: Callable, *args, **kwargs -) -> List[Delayed]: + catalog_mappings: list[tuple[HealpixDataset | None, list[HealpixPixel]]], func: Callable, *args, **kwargs +) -> list[Delayed]: """Aligns catalogs to a given ordering of pixels and applies a function each set of aligned partitions Args: @@ -178,8 +178,8 @@ def filter_by_spatial_index_to_pixel(dataframe: npd.NestedFrame, order: int, pix def construct_catalog_args( - partitions: List[Delayed], meta_df: npd.NestedFrame, alignment: PixelAlignment -) -> Tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: + partitions: list[Delayed], meta_df: npd.NestedFrame, alignment: PixelAlignment +) -> tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: """Constructs the arguments needed to create a catalog from a list of delayed partitions Args: @@ -202,7 +202,7 @@ def construct_catalog_args( def get_healpix_pixels_from_alignment( alignment: PixelAlignment, -) -> Tuple[List[HealpixPixel], List[HealpixPixel]]: +) -> tuple[list[HealpixPixel], list[HealpixPixel]]: """Gets the list of primary and join pixels as the HealpixPixel class from a PixelAlignment Args: @@ -347,8 +347,8 @@ def get_partition_map_from_alignment_pixels(join_pixels: pd.DataFrame) -> DaskDF def align_catalog_to_partitions( - catalog: HealpixDataset | None, pixels: List[HealpixPixel] -) -> List[Delayed | None]: + catalog: HealpixDataset | None, pixels: list[HealpixPixel] +) -> list[Delayed | None]: """Aligns the partitions of a Catalog to a dataframe with HEALPix pixels in each row Args: diff --git a/src/lsdb/dask/merge_map_catalog_data.py b/src/lsdb/dask/merge_map_catalog_data.py index d272ab3c..e660c89d 100644 --- a/src/lsdb/dask/merge_map_catalog_data.py +++ b/src/lsdb/dask/merge_map_catalog_data.py @@ -1,7 +1,7 @@ # pylint: disable=duplicate-code from __future__ import annotations -from typing import TYPE_CHECKING, Callable, Tuple +from typing import TYPE_CHECKING, Callable import dask import nested_dask as nd @@ -68,7 +68,7 @@ def merge_map_catalog_data( *args, meta: npd.NestedFrame | None = None, **kwargs, -) -> Tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: +) -> tuple[nd.NestedFrame, DaskDFPixelMap, PixelAlignment]: """Applies a function to each pair of partitions in this catalog and the map catalog. The pixels from each catalog are aligned via a `PixelAlignment`, and the respective dataframes diff --git a/src/lsdb/dask/partition_indexer.py b/src/lsdb/dask/partition_indexer.py index e4444114..87f95db8 100644 --- a/src/lsdb/dask/partition_indexer.py +++ b/src/lsdb/dask/partition_indexer.py @@ -1,7 +1,5 @@ from __future__ import annotations -from typing import List, Tuple - import numpy as np from lsdb.core.search.pixel_search import PixelSearch @@ -19,7 +17,7 @@ def __getitem__(self, item): pixels = self._get_pixels_from_partition_indices(indices) return self.catalog.search(PixelSearch(pixels)) - def _parse_partition_indices(self, item: int | List[int]) -> List[int]: + def _parse_partition_indices(self, item: int | list[int]) -> list[int]: """Parses the partition indices provided in the square brackets accessor. It is either a single integer or a sequence-like set of integers.""" if isinstance(item, int): @@ -27,7 +25,7 @@ def _parse_partition_indices(self, item: int | List[int]) -> List[int]: indices = np.arange(len(self.catalog._ddf_pixel_map), dtype=object)[item].tolist() return indices - def _get_pixels_from_partition_indices(self, indices: List[int]) -> List[Tuple[int, int]]: + def _get_pixels_from_partition_indices(self, indices: list[int]) -> list[tuple[int, int]]: """Performs a reverse-lookup in the catalog pixel-to-partition map and returns the pixels for the specified partition `indices`.""" inverted_pixel_map = {i: pixel for pixel, i in self.catalog._ddf_pixel_map.items()} diff --git a/src/lsdb/loaders/dataframe/dataframe_catalog_loader.py b/src/lsdb/loaders/dataframe/dataframe_catalog_loader.py index ab79a869..5d0b7f5f 100644 --- a/src/lsdb/loaders/dataframe/dataframe_catalog_loader.py +++ b/src/lsdb/loaders/dataframe/dataframe_catalog_loader.py @@ -2,7 +2,6 @@ import math import warnings -from typing import Dict, List, Tuple import astropy.units as u import hats as hc @@ -179,7 +178,7 @@ def _set_spatial_index(self): ) self.dataframe.set_index(SPATIAL_INDEX_COLUMN, inplace=True) - def _compute_pixel_list(self) -> List[HealpixPixel]: + def _compute_pixel_list(self) -> list[HealpixPixel]: """Compute object histogram and generate the sorted list of HEALPix pixels. The pixels are sorted by ascending spatial index. @@ -205,8 +204,8 @@ def _compute_pixel_list(self) -> List[HealpixPixel]: return list(np.array(pixel_list)[get_pixel_argsort(pixel_list)]) def _generate_dask_df_and_map( - self, pixel_list: List[HealpixPixel] - ) -> Tuple[nd.NestedFrame, DaskDFPixelMap, int]: + self, pixel_list: list[HealpixPixel] + ) -> tuple[nd.NestedFrame, DaskDFPixelMap, int]: """Load Dask DataFrame from HEALPix pixel Dataframes and generate a mapping of HEALPix pixels to HEALPix Dataframes @@ -218,10 +217,10 @@ def _generate_dask_df_and_map( to the respective Pandas Dataframes and the total number of rows. """ # Dataframes for each destination HEALPix pixel - pixel_dfs: List[npd.NestedFrame] = [] + pixel_dfs: list[npd.NestedFrame] = [] # Mapping HEALPix pixels to the respective Dataframe indices - ddf_pixel_map: Dict[HealpixPixel, int] = {} + ddf_pixel_map: dict[HealpixPixel, int] = {} for hp_pixel_index, hp_pixel in enumerate(pixel_list): # Store HEALPix pixel in map diff --git a/src/lsdb/loaders/dataframe/from_dataframe_utils.py b/src/lsdb/loaders/dataframe/from_dataframe_utils.py index 1c95f339..b124172f 100644 --- a/src/lsdb/loaders/dataframe/from_dataframe_utils.py +++ b/src/lsdb/loaders/dataframe/from_dataframe_utils.py @@ -1,5 +1,4 @@ from datetime import datetime, timezone -from typing import List, Tuple import nested_dask as nd import nested_pandas as npd @@ -16,8 +15,8 @@ def _generate_dask_dataframe( - pixel_dfs: List[npd.NestedFrame], pixels: List[HealpixPixel], use_pyarrow_types: bool = True -) -> Tuple[nd.NestedFrame, int]: + pixel_dfs: list[npd.NestedFrame], pixels: list[HealpixPixel], use_pyarrow_types: bool = True +) -> tuple[nd.NestedFrame, int]: """Create the Dask Dataframe from the list of HEALPix pixel Dataframes Args: diff --git a/src/lsdb/loaders/dataframe/margin_catalog_generator.py b/src/lsdb/loaders/dataframe/margin_catalog_generator.py index 32fbcd19..7ce9d930 100644 --- a/src/lsdb/loaders/dataframe/margin_catalog_generator.py +++ b/src/lsdb/loaders/dataframe/margin_catalog_generator.py @@ -1,7 +1,6 @@ from __future__ import annotations import warnings -from typing import Dict, List, Tuple import hats as hc import hats.pixel_math.healpix_shim as hp @@ -114,7 +113,7 @@ def _create_empty_catalog(self) -> MarginCatalog: margin_structure = hc.catalog.MarginCatalog(catalog_info, [], schema=self.margin_schema) return MarginCatalog(ddf, {}, margin_structure) - def _get_margins(self) -> Tuple[List[HealpixPixel], List[npd.NestedFrame]]: + def _get_margins(self) -> tuple[list[HealpixPixel], list[npd.NestedFrame]]: """Generates the list of pixels that have margin data, and the dataframes with the margin data for each partition @@ -131,8 +130,8 @@ def _get_margins(self) -> Tuple[List[HealpixPixel], List[npd.NestedFrame]]: return pixels, partitions def _generate_dask_df_and_map( - self, pixels: List[HealpixPixel], partitions: List[pd.DataFrame] - ) -> Tuple[nd.NestedFrame, Dict[HealpixPixel, int], int]: + self, pixels: list[HealpixPixel], partitions: list[pd.DataFrame] + ) -> tuple[nd.NestedFrame, dict[HealpixPixel, int], int]: """Create the Dask Dataframe containing the data points in the margins for the catalog as well as the mapping of those HEALPix to Dataframes @@ -154,7 +153,7 @@ def _generate_dask_df_and_map( ddf, total_rows = _generate_dask_dataframe(ordered_partitions, ordered_pixels, self.use_pyarrow_types) return ddf, ddf_pixel_map, total_rows - def _find_margin_pixel_pairs(self, pixels: List[HealpixPixel]) -> pd.DataFrame: + def _find_margin_pixel_pairs(self, pixels: list[HealpixPixel]) -> pd.DataFrame: """Calculate the pairs of catalog pixels and their margin pixels Args: @@ -184,7 +183,7 @@ def _find_margin_pixel_pairs(self, pixels: List[HealpixPixel]) -> pd.DataFrame: columns=["partition_order", "partition_pixel", "margin_pixel"], ) - def _create_margins(self, margin_pairs_df: pd.DataFrame) -> Dict[HealpixPixel, pd.DataFrame]: + def _create_margins(self, margin_pairs_df: pd.DataFrame) -> dict[HealpixPixel, pd.DataFrame]: """Compute the margins for all the pixels in the catalog Args: @@ -194,7 +193,7 @@ def _create_margins(self, margin_pairs_df: pd.DataFrame) -> Dict[HealpixPixel, p Returns: A dictionary mapping each margin pixel to the respective DataFrame. """ - margin_pixel_df_map: Dict[HealpixPixel, npd.NestedFrame] = {} + margin_pixel_df_map: dict[HealpixPixel, npd.NestedFrame] = {} self.dataframe["margin_pixel"] = hp.radec2pix( self.margin_order, self.dataframe[self.hc_structure.catalog_info.ra_column].to_numpy(), diff --git a/src/lsdb/loaders/hats/hats_loading_config.py b/src/lsdb/loaders/hats/hats_loading_config.py index f69416c2..7a125e7b 100644 --- a/src/lsdb/loaders/hats/hats_loading_config.py +++ b/src/lsdb/loaders/hats/hats_loading_config.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import Callable, List +from typing import Callable import pandas as pd from pandas.io._util import _arrow_dtype_mapping @@ -21,7 +21,7 @@ class HatsLoadingConfig: search_filter: AbstractSearch | None = None """The spatial filter to apply to the catalog""" - columns: List[str] | None = None + columns: list[str] | None = None """Columns to load from the catalog. If not specified, all columns are loaded""" margin_cache: str | Path | UPath | None = None diff --git a/src/lsdb/loaders/hats/read_hats.py b/src/lsdb/loaders/hats/read_hats.py index ae19a4f0..59d09240 100644 --- a/src/lsdb/loaders/hats/read_hats.py +++ b/src/lsdb/loaders/hats/read_hats.py @@ -1,7 +1,6 @@ from __future__ import annotations from pathlib import Path -from typing import List, Tuple import hats as hc import nested_dask as nd @@ -29,7 +28,7 @@ def read_hats( path: str | Path | UPath, search_filter: AbstractSearch | None = None, - columns: List[str] | None = None, + columns: list[str] | None = None, margin_cache: str | Path | UPath | None = None, dtype_backend: str | None = "pyarrow", **kwargs, @@ -61,6 +60,12 @@ def read_hats( Catalog object loaded from the given parameters """ # Creates a config object to store loading parameters from all keyword arguments. + + hc_catalog = hc.read_hats(path) + + if columns is None and hc_catalog.catalog_info.default_columns is not None: + columns = hc_catalog.catalog_info.default_columns + config = HatsLoadingConfig( search_filter=search_filter, columns=columns, @@ -69,7 +74,6 @@ def read_hats( kwargs=kwargs, ) - hc_catalog = hc.read_hats(path) if hc_catalog.schema is None: raise ValueError( "The catalog schema could not be loaded from metadata." @@ -182,7 +186,7 @@ def _create_dask_meta_schema(schema: pa.Schema, config) -> npd.NestedFrame: return npd.NestedFrame(dask_meta_schema) -def _load_dask_df_and_map(catalog: HCHealpixDataset, config) -> Tuple[nd.NestedFrame, DaskDFPixelMap]: +def _load_dask_df_and_map(catalog: HCHealpixDataset, config) -> tuple[nd.NestedFrame, DaskDFPixelMap]: """Load Dask DF from parquet files and make dict of HEALPix pixel to partition index""" pixels = catalog.get_healpix_pixels() ordered_pixels = np.array(pixels)[get_pixel_argsort(pixels)] diff --git a/src/lsdb/loaders/hats/read_hats.pyi b/src/lsdb/loaders/hats/read_hats.pyi index 0bbdc7e4..707f8505 100644 --- a/src/lsdb/loaders/hats/read_hats.pyi +++ b/src/lsdb/loaders/hats/read_hats.pyi @@ -14,7 +14,7 @@ For more information on stub files, view here: https://mypy.readthedocs.io/en/st from __future__ import annotations from pathlib import Path -from typing import List, Type, overload +from typing import Type, overload from upath import UPath @@ -27,7 +27,7 @@ from lsdb.loaders.hats.abstract_catalog_loader import CatalogTypeVar def read_hats( path: str | Path | UPath, search_filter: AbstractSearch | None = None, - columns: List[str] | None = None, + columns: list[str] | None = None, margin_cache: str | Path | UPath | None = None, dtype_backend: str | None = "pyarrow", **kwargs, @@ -37,7 +37,7 @@ def read_hats( path: str | Path | UPath, catalog_type: Type[CatalogTypeVar], search_filter: AbstractSearch | None = None, - columns: List[str] | None = None, + columns: list[str] | None = None, margin_cache: str | Path | UPath | None = None, dtype_backend: str | None = "pyarrow", **kwargs, diff --git a/src/lsdb/types.py b/src/lsdb/types.py index fe33c751..5ed15262 100644 --- a/src/lsdb/types.py +++ b/src/lsdb/types.py @@ -1,11 +1,11 @@ -from typing import Dict, TypeVar +from typing import TypeVar from hats.catalog.healpix_dataset.healpix_dataset import HealpixDataset as HCHealpixDataset from hats.pixel_math import HealpixPixel from lsdb.catalog.dataset.dataset import Dataset -DaskDFPixelMap = Dict[HealpixPixel, int] +DaskDFPixelMap = dict[HealpixPixel, int] CatalogTypeVar = TypeVar("CatalogTypeVar", bound=Dataset) HCCatalogTypeVar = TypeVar("HCCatalogTypeVar", bound=HCHealpixDataset) diff --git a/tests/conftest.py b/tests/conftest.py index 046595f8..5255f8fd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,6 +20,7 @@ SMALL_SKY_TO_XMATCH_SOFT_NAME = "small_sky_to_xmatch_soft" SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1" SMALL_SKY_ORDER1_NO_PANDAS_DIR_NAME = "small_sky_order1_no_pandas_meta" +SMALL_SKY_ORDER1_DEFAULT_COLS_DIR_NAME = "small_sky_order1_default_columns" SMALL_SKY_ORDER1_SOURCE_NAME = "small_sky_order1_source" SMALL_SKY_ORDER1_SOURCE_MARGIN_NAME = "small_sky_order1_source_margin" SMALL_SKY_TO_ORDER1_SOURCE_NAME = "small_sky_to_o1source" @@ -76,6 +77,11 @@ def small_sky_order1_dir(test_data_dir): return test_data_dir / SMALL_SKY_ORDER1_DIR_NAME +@pytest.fixture +def small_sky_order1_default_cols_dir(test_data_dir): + return test_data_dir / SMALL_SKY_ORDER1_DEFAULT_COLS_DIR_NAME + + @pytest.fixture def small_sky_order1_no_pandas_dir(test_data_dir): return test_data_dir / SMALL_SKY_ORDER1_NO_PANDAS_DIR_NAME diff --git a/tests/data/generate_data.ipynb b/tests/data/generate_data.ipynb index ba4677a3..f26313aa 100644 --- a/tests/data/generate_data.ipynb +++ b/tests/data/generate_data.ipynb @@ -123,6 +123,45 @@ "sso1.partition_info.write_to_file(hats.io.paths.get_partition_info_pointer(out_catalog_name))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### small_sky_order1_default_columns\n", + "\n", + "Copies small_sky_order1 but adds a list of default columns to the properties file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2025-01-23T18:41:22.261748Z", + "start_time": "2025-01-23T18:41:22.230037Z" + } + }, + "outputs": [], + "source": [ + "import os\n", + "import hats\n", + "from hats.io.paths import DATASET_DIR\n", + "from hats.io.file_io import get_upath\n", + "\n", + "out_catalog_name = \"small_sky_order1_default_columns\"\n", + "\n", + "sso1 = hats.read_hats(\"small_sky_order1\")\n", + "sso1_dataset_path = get_upath(\"small_sky_order1\") / DATASET_DIR\n", + "out_dataset_path = get_upath(out_catalog_name) / DATASET_DIR\n", + "\n", + "os.mkdir(out_catalog_name)\n", + "os.symlink(f\"../{sso1_dataset_path}\", out_dataset_path)\n", + "sso1.catalog_info.copy_and_update(\n", + " catalog_name=out_catalog_name, default_columns=[\"ra\", \"dec\", \"id\"]\n", + ").to_properties_file(out_catalog_name)\n", + "sso1.partition_info.write_to_file(hats.io.paths.get_partition_info_pointer(out_catalog_name))" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/tests/data/small_sky_order1_default_columns/dataset b/tests/data/small_sky_order1_default_columns/dataset new file mode 120000 index 00000000..6447cf36 --- /dev/null +++ b/tests/data/small_sky_order1_default_columns/dataset @@ -0,0 +1 @@ +../small_sky_order1/dataset \ No newline at end of file diff --git a/tests/data/small_sky_order1_default_columns/partition_info.csv b/tests/data/small_sky_order1_default_columns/partition_info.csv new file mode 100644 index 00000000..5771586c --- /dev/null +++ b/tests/data/small_sky_order1_default_columns/partition_info.csv @@ -0,0 +1,5 @@ +Norder,Npix +1,44 +1,45 +1,46 +1,47 diff --git a/tests/data/small_sky_order1_default_columns/properties b/tests/data/small_sky_order1_default_columns/properties new file mode 100644 index 00000000..d00933d6 --- /dev/null +++ b/tests/data/small_sky_order1_default_columns/properties @@ -0,0 +1,15 @@ +#HATS catalog +obs_collection=small_sky_order1_default_columns +dataproduct_type=object +hats_nrows=131 +hats_col_ra=ra +hats_col_dec=dec +hats_cols_default=ra dec id +hats_max_rows=1000000 +hats_order=1 +moc_sky_fraction=0.08333 +hats_builder=hats-import v0.3.6.dev26+g40366b4 +hats_creation_date=2024-10-15T14\:47UTC +hats_estsize=39 +hats_release_date=2024-09-18 +hats_version=v0.1 diff --git a/tests/lsdb/loaders/hats/test_read_hats.py b/tests/lsdb/loaders/hats/test_read_hats.py index 11f8c27b..3ac949b8 100644 --- a/tests/lsdb/loaders/hats/test_read_hats.py +++ b/tests/lsdb/loaders/hats/test_read_hats.py @@ -38,6 +38,31 @@ def test_read_hats(small_sky_order1_dir, small_sky_order1_hats_catalog, assert_d assert_index_correct(catalog) +def test_read_hats_default_cols(small_sky_order1_default_cols_dir, assert_divisions_are_correct): + catalog = lsdb.read_hats(small_sky_order1_default_cols_dir) + assert isinstance(catalog, lsdb.Catalog) + assert isinstance(catalog._ddf, nd.NestedFrame) + assert catalog.hc_structure.catalog_info.default_columns is not None + assert np.all(catalog.columns == catalog.hc_structure.catalog_info.default_columns) + assert np.all(catalog.compute().columns == catalog.hc_structure.catalog_info.default_columns) + assert isinstance(catalog.compute(), npd.NestedFrame) + assert_divisions_are_correct(catalog) + assert_index_correct(catalog) + + +def test_read_hats_default_cols_specify_cols(small_sky_order1_default_cols_dir, assert_divisions_are_correct): + filter_columns = ["ra", "dec"] + catalog = lsdb.read_hats(small_sky_order1_default_cols_dir, columns=filter_columns) + assert isinstance(catalog, lsdb.Catalog) + assert isinstance(catalog._ddf, nd.NestedFrame) + assert catalog.hc_structure.catalog_info.default_columns is not None + assert np.all(catalog.columns == filter_columns) + assert np.all(catalog.compute().columns == filter_columns) + assert isinstance(catalog.compute(), npd.NestedFrame) + assert_divisions_are_correct(catalog) + assert_index_correct(catalog) + + def test_read_hats_no_pandas(small_sky_order1_no_pandas_dir, assert_divisions_are_correct): catalog = lsdb.read_hats(small_sky_order1_no_pandas_dir) assert isinstance(catalog, lsdb.Catalog)