From 83224aa5ea6cfe5b809adb600ca25388c1938611 Mon Sep 17 00:00:00 2001
From: Sandro Campos <scampos@andrew.cmu.edu>
Date: Fri, 28 Jun 2024 10:42:16 -0400
Subject: [PATCH] Pipeline to compute angular correlation function (#8)

* Add skeleton for the autocorrelation workflow

* Perform some code cleanup

* Perform alignment accordingly

* First acf calculation with lsdb

* Add the true values for the natural estimator

* Refactor code and add more tests

* Split upper triangle and diagonal

* Pushing files back in

* Update coordinate projection

* Add catalogs with float64 types

* Counts closer to same.

* Be more tolerant!

* Improve docstrings and namings

* Improve code organization

* Fix formatting

---------

Co-authored-by: Melissa DeLucchi <delucchi@andrew.cmu.edu>
---
 .gitignore                        |   5 +-
 requirements.txt                  |   2 +-
 src/corrgi/alignment.py           |  24 ++--
 src/corrgi/corrgi.py              |  38 ++++++
 src/corrgi/dask.py                | 209 ++++++++++++++++++++++++++++++
 src/corrgi/estimators.py          |  25 ++++
 src/corrgi/parameters.py          |  53 ++++++++
 src/corrgi/utils.py               |  44 +++++++
 tests/corrgi/conftest.py          |  79 +++++++++++
 tests/corrgi/test_alignment.py    |   3 +-
 tests/corrgi/test_corrgi.py       |  69 ++++++++++
 tests/corrgi/test_counting.py     |  16 +++
 tests/corrgi/test_parameters.py   |  16 +++
 tests/data/acf/dd_acf.npy         | Bin 0 -> 392 bytes
 tests/data/acf/l_binedges_acf.npy | Bin 0 -> 392 bytes
 tests/data/acf/r_binedges_acf.npy | Bin 0 -> 392 bytes
 tests/data/acf/rr_acf.npy         | Bin 0 -> 392 bytes
 tests/data/acf/w_acf_nat.npy      | Bin 0 -> 392 bytes
 18 files changed, 565 insertions(+), 18 deletions(-)
 create mode 100644 src/corrgi/corrgi.py
 create mode 100644 src/corrgi/dask.py
 create mode 100644 src/corrgi/estimators.py
 create mode 100644 src/corrgi/parameters.py
 create mode 100644 src/corrgi/utils.py
 create mode 100644 tests/corrgi/test_corrgi.py
 create mode 100644 tests/corrgi/test_counting.py
 create mode 100644 tests/corrgi/test_parameters.py
 create mode 100644 tests/data/acf/dd_acf.npy
 create mode 100644 tests/data/acf/l_binedges_acf.npy
 create mode 100644 tests/data/acf/r_binedges_acf.npy
 create mode 100644 tests/data/acf/rr_acf.npy
 create mode 100644 tests/data/acf/w_acf_nat.npy

diff --git a/.gitignore b/.gitignore
index 85ced40..f6ad061 100644
--- a/.gitignore
+++ b/.gitignore
@@ -149,4 +149,7 @@ _html/
 # Project initialization script
 .initialize_new_project.sh
 
-.idea/
\ No newline at end of file
+.idea/
+
+# Log files generated by gundam
+tests/**/*_log
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 8c85888..e6fd3fb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 git+https://github.com/astronomy-commons/hipscat.git@main
 git+https://github.com/astronomy-commons/lsdb.git@main
-git+https://github.com/lincc-frameworks-mask-incubator/gundam.git@main
\ No newline at end of file
+git+https://github.com/lincc-frameworks-mask-incubator/gundam.git@acf-without-grid
\ No newline at end of file
diff --git a/src/corrgi/alignment.py b/src/corrgi/alignment.py
index ea8667b..6755beb 100644
--- a/src/corrgi/alignment.py
+++ b/src/corrgi/alignment.py
@@ -19,27 +19,20 @@ def autocorrelation_alignment(catalog: Catalog) -> PixelAlignment:
     """Determine all pairs of partitions that should be correlated within the same catalog.
 
     This considers all combinations, without duplicates between the "primary" and "join"
-    pixels in the alignment.
+    pixels in the alignment. It does not include the combinations of partitions with themselves.
 
     Args:
-        catalog (Catalog): catalog for autocorrelation
+        catalog (Catalog): catalog for auto-correlation.
+
     Returns:
-        alignment object where the `aligned` columns simply match the left pixel.
+        The alignment object where the `aligned` columns simply match the left pixel.
     """
     upper_triangle = [
         [left.order, left.pixel, right.order, right.pixel, left.order, left.pixel]
         for (left, right) in itertools.combinations(catalog.get_healpix_pixels(), 2)
     ]
     upper_triangle = pd.DataFrame(upper_triangle, columns=column_names)
-    diagonal = pd.DataFrame(
-        [
-            [pix.order, pix.pixel, pix.order, pix.pixel, pix.order, pix.pixel]
-            for pix in catalog.get_healpix_pixels()
-        ],
-        columns=column_names,
-    )
-    result_mapping = pd.concat([upper_triangle, diagonal])
-    return PixelAlignment(catalog.pixel_tree, result_mapping, PixelAlignmentType.OUTER)
+    return PixelAlignment(catalog.pixel_tree, upper_triangle, PixelAlignmentType.OUTER)
 
 
 def crosscorrelation_alignment(catalog_left: Catalog, catalog_right: Catalog) -> PixelAlignment:
@@ -48,10 +41,11 @@ def crosscorrelation_alignment(catalog_left: Catalog, catalog_right: Catalog) ->
     This considers the full cross-product of pixels.
 
     Args:
-        catalog_left (Catalog): left side of the cross-correlation
-        catalog_right (Catalog): right side of the cross-correlation
+        catalog_left (Catalog): left side of the cross-correlation.
+        catalog_right (Catalog): right side of the cross-correlation.
+
     Returns:
-        alignment object where the `aligned` columns simply match the left pixel.
+        The alignment object where the `aligned` columns simply match the left pixel.
     """
     full_product = [
         [left.order, left.pixel, right.order, right.pixel, left.order, left.pixel]
diff --git a/src/corrgi/corrgi.py b/src/corrgi/corrgi.py
new file mode 100644
index 0000000..ec86ee3
--- /dev/null
+++ b/src/corrgi/corrgi.py
@@ -0,0 +1,38 @@
+import numpy as np
+from lsdb import Catalog
+from munch import Munch
+
+from corrgi.dask import compute_autocorrelation_counts
+from corrgi.estimators import calculate_natural_estimate
+
+
+def compute_autocorrelation(catalog: Catalog, random: Catalog, params: Munch) -> np.ndarray:
+    """Calculates the auto-correlation for a catalog.
+
+    Args:
+        catalog (Catalog): The catalog.
+        random (Catalog): A random samples catalog.
+        params (Munch): The parameters dictionary to run gundam with.
+
+    Returns:
+        A numpy array with the result of the auto-correlation, using the natural estimator.
+    """
+    num_galaxies = catalog.hc_structure.catalog_info.total_rows
+    num_random = random.hc_structure.catalog_info.total_rows
+    counts_dd, counts_rr = compute_autocorrelation_counts(catalog, random, params)
+    return calculate_natural_estimate(counts_dd, counts_rr, num_galaxies, num_random)
+
+
+def compute_crosscorrelation(left: Catalog, right: Catalog, random: Catalog, params: Munch) -> np.ndarray:
+    """Computes the cross-correlation between two catalogs.
+
+    Args:
+        left (Catalog): Left catalog for the cross-correlation.
+        right (Catalog): Right catalog for the cross-correlation.
+        random (Catalog): A random samples catalog.
+        params (Munch): The parameters dictionary to run gundam with.
+
+    Returns:
+        A numpy array with the result of the cross-correlation, using the natural estimator.
+    """
+    raise NotImplementedError()
diff --git a/src/corrgi/dask.py b/src/corrgi/dask.py
new file mode 100644
index 0000000..c31bb0c
--- /dev/null
+++ b/src/corrgi/dask.py
@@ -0,0 +1,209 @@
+import dask
+import gundam.cflibfor as cff
+import numpy as np
+import pandas as pd
+from dask.distributed import print as dask_print
+from gundam import gundam
+from hipscat.catalog.catalog_info import CatalogInfo
+from hipscat.pixel_math import HealpixPixel
+from lsdb import Catalog
+from lsdb.dask.merge_catalog_functions import align_and_apply, get_healpix_pixels_from_alignment
+from munch import Munch
+
+from corrgi.alignment import autocorrelation_alignment, crosscorrelation_alignment
+from corrgi.parameters import generate_dd_rr_params
+from corrgi.utils import join_count_histograms, project_coordinates
+
+
+def compute_autocorrelation_counts(catalog: Catalog, random: Catalog, params: Munch) -> np.ndarray:
+    """Computes the auto-correlation counts for a catalog.
+
+    Args:
+        catalog (Catalog): The catalog with galaxy samples.
+        random (Catalog): The catalog with random samples.
+        params (dict): The gundam parameters for the Fortran subroutine.
+
+    Returns:
+        The histogram counts to calculate the auto-correlation.
+    """
+    # Calculate the angular separation bins
+    bins, _ = gundam.makebins(params.nsept, params.septmin, params.dsept, params.logsept)
+    params_dd, params_rr = generate_dd_rr_params(params)
+    # Generate the histograms with counts for each catalog
+    counts_dd = perform_auto_counts(catalog, bins, params_dd)
+    counts_rr = perform_auto_counts(random, bins, params_rr)
+    # Actually compute the results
+    return dask.compute(*[counts_dd, counts_rr])
+
+
+def perform_auto_counts(catalog: Catalog, *args) -> np.ndarray:
+    """Aligns the pixel of a single catalog and performs the pairs counting.
+
+    Args:
+        catalog (Catalog): The catalog.
+        *args: The arguments to pass to the counting methods.
+
+    Returns:
+        The histogram with the sample distance counts.
+    """
+    # Get counts between points of different partitions
+    alignment = autocorrelation_alignment(catalog.hc_structure)
+    left_pixels, right_pixels = get_healpix_pixels_from_alignment(alignment)
+    cross_partials = align_and_apply(
+        [(catalog, left_pixels), (catalog, right_pixels)], count_cross_pairs, *args
+    )
+    # Get counts between points of the same partition
+    auto_partials = [
+        count_auto_pairs(partition, catalog.hc_structure.catalog_info, *args)
+        for partition in catalog._ddf.to_delayed()
+    ]
+    all_partials = [*cross_partials, *auto_partials]
+    return join_count_histograms(all_partials)
+
+
+def perform_cross_counts(left: Catalog, right: Catalog, *args) -> np.ndarray:
+    """Aligns the pixel of two catalogs and performs the pairs counting.
+
+    Args:
+        left (Catalog): The left catalog.
+        right (Catalog): The right catalog.
+        *args: The arguments to pass to the count_pairs method.
+
+    Returns:
+        The histogram with the sample distance counts.
+    """
+    alignment = crosscorrelation_alignment(left.hc_structure, right.hc_structure)
+    left_pixels, right_pixels = get_healpix_pixels_from_alignment(alignment)
+    cross_partials = align_and_apply([(left, left_pixels), (right, right_pixels)], count_cross_pairs, *args)
+    return join_count_histograms(cross_partials)
+
+
+@dask.delayed
+def count_auto_pairs(
+    df: pd.DataFrame,
+    catalog_info: CatalogInfo,
+    bins: np.ndarray,
+    params: Munch,
+) -> np.ndarray:
+    """Calls the fortran routine to compute the counts for pairs of
+    partitions belonging to the same catalog.
+
+    Args:
+       df (pd.DataFrame): The partition dataframe.
+       catalog_info (CatalogInfo): The catalog metadata.
+       bins (np.ndarray): The separation bins, in angular space.
+       params (Munch): The gundam subroutine parameters.
+
+    Returns:
+       The count histogram for the partition pair.
+    """
+    try:
+        return _count_auto_pairs(df, catalog_info, bins, params)
+    except Exception as exception:
+        dask_print(exception)
+
+
+@dask.delayed
+def count_cross_pairs(
+    left_df: pd.DataFrame,
+    right_df: pd.DataFrame,
+    left_pix: HealpixPixel,
+    right_pix: HealpixPixel,
+    left_catalog_info: CatalogInfo,
+    right_catalog_info: CatalogInfo,
+    bins: np.ndarray,
+    params: Munch,
+) -> np.ndarray:
+    """Calls the fortran routine to compute the counts for pairs of
+    partitions belonging to two different catalogs.
+
+    Args:
+       left_df (pd.DataFrame): The left partition dataframe.
+       right_df (pd.DataFrame): The right partition dataframe.
+       left_pix (HealpixPixel): The pixel corresponding to `left_df`.
+       right_pix (HealpixPixel): The pixel corresponding to `right_df`.
+       left_catalog_info (CatalogInfo): The left catalog metadata.
+       right_catalog_info (CatalogInfo): The right catalog metadata.
+       bins (np.ndarray): The separation bins, in angular space.
+       params (Munch): The gundam subroutine parameters.
+
+    Returns:
+       The count histogram for the partition pair.
+    """
+    try:
+        return _count_cross_pairs(
+            left_df,
+            right_df,
+            left_catalog_info,
+            right_catalog_info,
+            bins,
+            params,
+        )
+    except Exception as exception:
+        dask_print(exception)
+
+
+def _count_auto_pairs(
+    df: pd.DataFrame,
+    catalog_info: CatalogInfo,
+    bins: np.ndarray,
+    params: Munch,
+) -> np.ndarray:
+    x, y, z = project_coordinates(
+        ra=df[catalog_info.ra_column].to_numpy(),
+        dec=df[catalog_info.dec_column].to_numpy(),
+    )
+    args = [
+        len(df),  # number of particles
+        x,
+        y,
+        z,  # X,Y,Z coordinates of particles
+        params.nsept,  # number of angular separation bins
+        bins,  # bins in angular separation [deg]
+    ]
+    counts = cff.mod.th_A_naiveway(*args)  # fast unweighted counting
+    return counts
+
+
+def _count_cross_pairs(
+    left_df: pd.DataFrame,
+    right_df: pd.DataFrame,
+    left_catalog_info: CatalogInfo,
+    right_catalog_info: CatalogInfo,
+    bins: np.ndarray,
+    params: Munch,
+) -> np.ndarray:
+    left_x, left_y, left_z = project_coordinates(
+        ra=left_df[left_catalog_info.ra_column].to_numpy(),
+        dec=left_df[left_catalog_info.dec_column].to_numpy(),
+    )
+    right_x, right_y, right_z = project_coordinates(
+        ra=right_df[right_catalog_info.ra_column].to_numpy(),
+        dec=right_df[right_catalog_info.dec_column].to_numpy(),
+    )
+    args = [
+        1,  # number of threads OpenMP
+        len(left_df),  # number of particles of the left partition
+        left_df[left_catalog_info.ra_column].to_numpy(),  # RA of particles [deg]
+        left_df[left_catalog_info.dec_column].to_numpy(),  # DEC of particles [deg]
+        left_x,
+        left_y,
+        left_z,  # X,Y,Z coordinates of particles
+        len(right_df),  # number of particles of the right partition
+        right_x,
+        right_y,
+        right_z,  # X,Y,Z coordinates of particles
+        params.nsept,  # number of angular separation bins
+        bins,  # bins in angular separation [deg]
+        params.sbound,
+        params.mxh1,
+        params.mxh2,
+        params.cntid,
+        params.logf,
+        params.sk1,
+        np.zeros(len(right_df)),
+        params.grid,
+    ]
+    # TODO: Create gundam th_C_naive_way that accepts only the necessary arguments
+    counts = cff.mod.th_C(*args)  # fast unweighted counting
+    return counts
diff --git a/src/corrgi/estimators.py b/src/corrgi/estimators.py
new file mode 100644
index 0000000..7d03b38
--- /dev/null
+++ b/src/corrgi/estimators.py
@@ -0,0 +1,25 @@
+import numpy as np
+from gundam import tpcf
+
+
+def calculate_natural_estimate(
+    counts_dd: np.ndarray,
+    counts_rr: np.ndarray,
+    num_galaxies: int,
+    num_random: int,
+) -> np.ndarray:
+    """Calculates the auto-correlation value for the natural estimator.
+
+    Args:
+        counts_dd (np.ndarray): The counts for the galaxy samples.
+        counts_rr (np.ndarray): The counts for the random samples.
+        num_galaxies (int): The number of galaxy samples.
+        num_random (int): The number of random samples.
+
+    Returns:
+        The natural correlation function estimate.
+    """
+    dr = 0  # We do not use DR counts for the natural estimator
+    bdd = np.zeros([len(counts_dd), 0])  # We do not compute the bootstrap counts
+    wth, _ = tpcf(num_galaxies, num_random, counts_dd, bdd, counts_rr, dr, estimator="NAT")
+    return wth
diff --git a/src/corrgi/parameters.py b/src/corrgi/parameters.py
new file mode 100644
index 0000000..1eade58
--- /dev/null
+++ b/src/corrgi/parameters.py
@@ -0,0 +1,53 @@
+from copy import deepcopy
+
+from gundam import gundam
+from munch import Munch
+
+
+def create_gundam_params(kind: str, **kwargs) -> Munch:
+    """Generates the Gundam parameters for a specific kind of correlation function.
+
+    Args:
+        kind (str): The type of correlation function (e.g. acf).
+        **kwargs: Additional gundam parameters to set/override.
+
+    Returns:
+        The dictionary of gundam parameters.
+    """
+    params = gundam.packpars(kind=kind, write=False)
+    # Disable grid and fill its unused parameters
+    params.grid = 0
+    params.autogrid = False
+    params.sbound = [1, 2, 1, 2]
+    params.mxh1 = 2
+    params.mxh2 = 2
+    params.sk1 = [[1, 2], [1, 2]]
+    # Append any additional params
+    return Munch({**params, **kwargs})
+
+
+def generate_dd_rr_params(params: Munch) -> tuple[Munch, Munch]:
+    """Generate the DD and RR parameters."""
+    # Create the parameters for both catalogs
+    par_dd = deepcopy(params)
+    par_dd.kind = "thC"
+    par_dd.cntid = "DD"
+    par_dd.logf = "DD_log"
+    par_rr = deepcopy(params)
+    par_rr.kind = "thC"
+    par_rr.cntid = "RR"
+    par_rr.logf = "RR_log"
+    par_rr.wfib = False
+    par_rr.doboot = False
+    return par_dd, par_rr
+
+
+def generate_dr_params(params: Munch) -> Munch:
+    """Generate the DR parameters to be used in cross-correlation."""
+    par_dr = deepcopy(params)
+    par_dr.kind = "thC"
+    par_dr.cntid = "DR"
+    par_dr.logf = "DR_log"
+    par_dr.wfib = False
+    par_dr.doboot = False
+    return par_dr
diff --git a/src/corrgi/utils.py b/src/corrgi/utils.py
new file mode 100644
index 0000000..93e2d03
--- /dev/null
+++ b/src/corrgi/utils.py
@@ -0,0 +1,44 @@
+import numpy as np
+from gundam import gundam
+from numpy import deg2rad
+
+
+def project_coordinates(ra: np.ndarray, dec: np.ndarray, radius: float = 0.5) -> tuple[float, float, float]:
+    """Project spherical coordinates (ra, dec) to cartesian space
+    on a sphere of radius=0.5.
+
+    Args:
+        ra (np.ndarray): Right ascension, in radians.
+        dec (np.ndarray): Declination, in radians.
+        radius (float): The radius of the sphere. Defaults to 0.5.
+
+    Returns:
+        The coordinates, in cartesian space, on a sphere of radius 0.5.
+    """
+    return gundam.radec2xyz(ra * np.pi / 180.0, dec * np.pi / 180.0, r=radius)
+
+
+def project_bins(angular_bins: np.ndarray) -> np.ndarray:
+    """Project the angular bins to cartesian space according
+    to a sphere of radius=0.5.
+
+    Args:
+        angular_bins (np.ndarray): The bins, in angular space.
+
+    Returns:
+        The distance bins in the projected cartesian space.
+    """
+    return (np.sin(0.5 * angular_bins * deg2rad)) ** 2
+
+
+def join_count_histograms(partial_histograms: list[np.ndarray]) -> np.ndarray:
+    """Stack all partial histograms and sum their counts.
+
+    Args:
+        partial_histograms (list[np.ndarray]): The list of count histograms
+            generated for each pair of partitions.
+
+    Returns:
+        The numpy array with the total counts for the partial histograms.
+    """
+    return np.sum(np.stack(partial_histograms), axis=0)
diff --git a/tests/corrgi/conftest.py b/tests/corrgi/conftest.py
index cc9b047..1543d16 100644
--- a/tests/corrgi/conftest.py
+++ b/tests/corrgi/conftest.py
@@ -1,6 +1,18 @@
 from pathlib import Path
 
+import gundam
+import numpy as np
+import pandas as pd
 import pytest
+from dask.distributed import Client
+
+
+@pytest.fixture(scope="session", name="dask_client")
+def dask_client():
+    """Create a single client for use by all unit test cases."""
+    client = Client(n_workers=3, threads_per_worker=1)
+    yield client
+    client.close()
 
 
 @pytest.fixture
@@ -13,6 +25,11 @@ def data_catalog_dir(test_data_dir):
     return test_data_dir / "DATA"
 
 
+@pytest.fixture
+def rand_catalog_dir(test_data_dir):
+    return test_data_dir / "RAND"
+
+
 @pytest.fixture
 def raw_catalog_dir(test_data_dir):
     return test_data_dir / "RAW"
@@ -26,3 +43,65 @@ def dr7_lrg_catalog_dir(test_data_dir):
 @pytest.fixture
 def dr7_lrg_rand_catalog_dir(test_data_dir):
     return test_data_dir / "DR7-lrg-rand"
+
+
+@pytest.fixture
+def single_data_partition(data_catalog_dir):
+    return pd.read_parquet(data_catalog_dir / "Norder=0" / "Dir=0" / "Npix=1.parquet")
+
+
+@pytest.fixture
+def corr_bins():
+    bins, _ = gundam.makebins(33, 0.01, 0.1, 1)
+    return bins
+
+
+@pytest.fixture
+def autocorr_params():
+    params = gundam.packpars(kind="acf", write=False)
+
+    params.dsept = 0.1
+    params.nsept = 33
+    params.septmin = 0.01
+
+    params.kind = "thA"
+    params.cntid = "DD"
+    params.logf = "DD_log"
+
+    # Disable grid and fill some mock parameters
+    params.grid = 0
+    params.sbound = [1, 2, 1, 2]
+    params.mxh1 = 2
+    params.mxh2 = 2
+    params.sk1 = [[1, 2], [1, 2]]
+    return params
+
+
+@pytest.fixture
+def acf_results_dir(test_data_dir):
+    return test_data_dir / "acf"
+
+
+@pytest.fixture
+def acf_bins_left_edges(acf_results_dir):
+    return np.load(acf_results_dir / "l_binedges_acf.npy")
+
+
+@pytest.fixture
+def acf_bins_right_edges(acf_results_dir):
+    return np.load(acf_results_dir / "r_binedges_acf.npy")
+
+
+@pytest.fixture
+def acf_dd_counts(acf_results_dir):
+    return np.load(acf_results_dir / "dd_acf.npy")
+
+
+@pytest.fixture
+def acf_rr_counts(acf_results_dir):
+    return np.load(acf_results_dir / "rr_acf.npy")
+
+
+@pytest.fixture
+def acf_nat_estimate(acf_results_dir):
+    return np.load(acf_results_dir / "w_acf_nat.npy")
diff --git a/tests/corrgi/test_alignment.py b/tests/corrgi/test_alignment.py
index e4ff359..877a39d 100644
--- a/tests/corrgi/test_alignment.py
+++ b/tests/corrgi/test_alignment.py
@@ -1,11 +1,12 @@
 import hipscat
+
 from corrgi.alignment import autocorrelation_alignment, crosscorrelation_alignment
 
 
 def test_autocorrelation_alignment(data_catalog_dir):
     data_catalog = hipscat.read_from_hipscat(data_catalog_dir)
     alignment = autocorrelation_alignment(data_catalog)
-    assert len(alignment.pixel_mapping) == 28
+    assert len(alignment.pixel_mapping) == 21
     assert len(alignment.pixel_mapping.columns) == 6
 
 
diff --git a/tests/corrgi/test_corrgi.py b/tests/corrgi/test_corrgi.py
new file mode 100644
index 0000000..035503f
--- /dev/null
+++ b/tests/corrgi/test_corrgi.py
@@ -0,0 +1,69 @@
+import hipscat
+import lsdb
+import numpy as np
+import numpy.testing as npt
+from gundam import gundam
+
+from corrgi.corrgi import compute_autocorrelation
+from corrgi.dask import compute_autocorrelation_counts
+from corrgi.estimators import calculate_natural_estimate
+
+
+def test_acf_bins_are_correct(
+    acf_bins_left_edges, acf_bins_right_edges, autocorr_params
+):
+    bins, _ = gundam.makebins(
+        autocorr_params.nsept,
+        autocorr_params.septmin,
+        autocorr_params.dsept,
+        autocorr_params.logsept,
+    )
+    all_bins = np.append(acf_bins_left_edges, acf_bins_right_edges[-1])
+    assert np.array_equal(bins, all_bins)
+
+
+def test_acf_counts_are_correct(
+    dask_client,
+    data_catalog_dir,
+    rand_catalog_dir,
+    acf_dd_counts,
+    acf_rr_counts,
+    autocorr_params,
+):
+    galaxy_catalog = lsdb.read_hipscat(data_catalog_dir)
+    random_catalog = lsdb.read_hipscat(rand_catalog_dir)
+    assert isinstance(galaxy_catalog, lsdb.Catalog)
+    assert isinstance(random_catalog, lsdb.Catalog)
+    counts_dd, counts_rr = compute_autocorrelation_counts(
+        galaxy_catalog, random_catalog, autocorr_params
+    )
+    npt.assert_allclose(counts_dd, acf_dd_counts, rtol=1e-3)
+    npt.assert_allclose(counts_rr, acf_rr_counts, rtol=2e-3)
+
+
+def test_acf_natural_estimate_is_correct(
+    data_catalog_dir,
+    rand_catalog_dir,
+    acf_dd_counts,
+    acf_rr_counts,
+    acf_nat_estimate,
+):
+    galaxy_hc_catalog = hipscat.read_from_hipscat(data_catalog_dir)
+    random_hc_catalog = hipscat.read_from_hipscat(rand_catalog_dir)
+    num_galaxies = galaxy_hc_catalog.catalog_info.total_rows
+    num_random = random_hc_catalog.catalog_info.total_rows
+    estimate = calculate_natural_estimate(
+        acf_dd_counts, acf_rr_counts, num_galaxies, num_random
+    )
+    npt.assert_allclose(acf_nat_estimate, estimate, rtol=2e-3)
+
+
+def test_acf_e2e(
+    dask_client, data_catalog_dir, rand_catalog_dir, acf_nat_estimate, autocorr_params
+):
+    galaxy_catalog = lsdb.read_hipscat(data_catalog_dir)
+    random_catalog = lsdb.read_hipscat(rand_catalog_dir)
+    assert isinstance(galaxy_catalog, lsdb.Catalog)
+    assert isinstance(random_catalog, lsdb.Catalog)
+    estimate = compute_autocorrelation(galaxy_catalog, random_catalog, autocorr_params)
+    npt.assert_allclose(estimate, acf_nat_estimate, rtol=1e-7)
diff --git a/tests/corrgi/test_counting.py b/tests/corrgi/test_counting.py
new file mode 100644
index 0000000..b6a73e3
--- /dev/null
+++ b/tests/corrgi/test_counting.py
@@ -0,0 +1,16 @@
+import hipscat
+
+from corrgi.dask import _count_auto_pairs
+
+
+def test_count_auto_pairs(
+    single_data_partition,
+    data_catalog_dir,
+    corr_bins,
+    autocorr_params,
+):
+    data_catalog = hipscat.read_from_hipscat(data_catalog_dir)
+    partial = _count_auto_pairs(
+        single_data_partition, data_catalog.catalog_info, corr_bins, autocorr_params
+    )
+    assert len(partial) == len(corr_bins) - 1
diff --git a/tests/corrgi/test_parameters.py b/tests/corrgi/test_parameters.py
new file mode 100644
index 0000000..b863324
--- /dev/null
+++ b/tests/corrgi/test_parameters.py
@@ -0,0 +1,16 @@
+import numpy as np
+
+from corrgi.parameters import create_gundam_params
+
+
+def test_create_gundam_params():
+    params = create_gundam_params(kind="acf", dsept=0.10, nsept=33, septmin=0.01)
+    assert params.kind == "acf"
+    assert params.grid == 0
+    assert np.array_equal(params.sbound, [1, 2, 1, 2])
+    assert params.mxh1 == 2
+    assert params.mxh2 == 2
+    assert np.array_equal(params.sk1, [[1, 2], [1, 2]])
+    assert params.dsept == 0.10
+    assert params.nsept == 33
+    assert params.septmin == 0.01
diff --git a/tests/data/acf/dd_acf.npy b/tests/data/acf/dd_acf.npy
new file mode 100644
index 0000000000000000000000000000000000000000..a55a1e405a7f4bad909f94ef54810103f2d53111
GIT binary patch
literal 392
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I%o#yXlh3bhL411<&-n9<_^rXNg!&=IpCblDOJeP}g=&fE;4^LK;k2B*Ui
zTJ<!TK2UNQBERJ}L_GX4m`=F*8cYkceTK-d{0*ip`j{OVfZ#+R516)CDgvf8YURN6
zqo-<MdR?#~n4Vf_1)>`stGa;soqPO1^nu7Z;UIc}G;b1!PWaHC4Wb{Yah8JU4f?VT
dAo_-QX$Ocl`0F+aL{CVOoDHIdlI|^b1OR+_RTBUJ

literal 0
HcmV?d00001

diff --git a/tests/data/acf/l_binedges_acf.npy b/tests/data/acf/l_binedges_acf.npy
new file mode 100644
index 0000000000000000000000000000000000000000..69d581528ed8c9e4b09c8a0d989c08be25f0b1f2
GIT binary patch
literal 392
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I%o#yXlh3bhL41FmY3b?y(VTI_un?iEcv(P@A5koLvRRuk-J-I;d3G;fOi
zgJ-{s4(*s}Uu?C&?~cI&d#zU!{tG58v43OM>aDqQrTwp!KMr%LZ?KPJ=3cxtbc_A0
znIN#!{$k5CU3=LB_7$7UjwW~>u|F`mCS17dq`hCk-S$f13-<h;9!bKsSM0Z{l-5tH
zx@jNkWPE-n=L7qSE8j}a8a%N#J93s=Is2tO0~ma;XI1<2qfh0FeOSVkZ#$xY+Hc(v
ztvln*fBTd(S6+rous8%q-?EDE;dFSI8zgUYli%T`5QDP@kBGzUPLoRvwvrB>rU%cx
cJ}U13F;B&zUEg~9CS6U3tcp&P<(u>!07hhyGynhq

literal 0
HcmV?d00001

diff --git a/tests/data/acf/r_binedges_acf.npy b/tests/data/acf/r_binedges_acf.npy
new file mode 100644
index 0000000000000000000000000000000000000000..e27a7198f23b0222b9f5fc167166eddca039d73f
GIT binary patch
literal 392
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I%o#yXlh3bhL411{f%dqq=EblM+1q<wL-)dc%lcc$Gh&6{HX;Mwn@Lpx^L
z7h5gxyJN7xUhCC_|AI+N?BAHRdTXv+Y5!~GkHcK*8|>qlxfgE@-C{p$CJ5}bzt}QO
z*IxF3eZ}UoqY0iz><>(?2^a1<Y42BXx4lyMf<3>dN0PAZ75l9!rS;RQZrX=B8K2+D
z`M|#7%D0lU22bqGj-2II&VFgn00tlIS=Ijh=u`P(AC_?C+m7g;_FH#E>&|%d-#+Ec
zm6zcXEDpiZx2z(3I2|752Fcsp<ac-}#Ne#KBjPZ-)8rC^t)zpe>A`cakIFkh%u{h_
c*SFrjNmtV$tD@6n`6hjb!r(}m`=5;+05rmp5C8xG

literal 0
HcmV?d00001

diff --git a/tests/data/acf/rr_acf.npy b/tests/data/acf/rr_acf.npy
new file mode 100644
index 0000000000000000000000000000000000000000..f5dfcfbf32c52012befd1bfd8295a837099fe226
GIT binary patch
literal 392
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I%o#yXlh3bhL411<&-sAzTo(*YA8bj3_4e+h&R+W?^hcY^7Ll}8}-tMd@r
z^(KT?d;+FFuzmp32ReU(=>#SgM+P7`v78@Fr+kwD(?!>m!Ss}LeK5^>+yX>5w9jw`
z^S8hA1Je&vBS7?nPP=3fePEey9++;qS`MNmHg0bM(FNAgy&!sy#n)*d`pBNA3qf>L
i__5U>`kP<IRv_Kb!H{?W#P>dU>I9HJQ0iN7$q@kd`&~-_

literal 0
HcmV?d00001

diff --git a/tests/data/acf/w_acf_nat.npy b/tests/data/acf/w_acf_nat.npy
new file mode 100644
index 0000000000000000000000000000000000000000..bd6ec6e403a97d24290a7aca933302a3ed67be42
GIT binary patch
literal 392
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I%o#yXlh3bhL41FlcWukIX<Qg%rC)vHtAt>Ey$@3V}(wzR|37ptDma+Gwq
zcInFsmX#t7_4QkKRJsT`==uL#kkrZNpf$ayU7DNQA!5_Yj9dEb4xPPC|6TZ598Pr@
z?Yt|&=up-vr6~0Kx4lqF`$@B;ANEppPfpxF|Jgp}oSow}?GN@6$^AD2ZC~1lNKRB}
zk9lI>^vN*japD8}HL`|2pTck1FEP>a@BV((KIZR-s_e>3_5nN0Txuqtw?7e-dQd0#
zl>Gw1l%(RrN9`?G_J%C4J!H=yzi7d|MSJZJIHe>;w{Npw;Jm@7<?jaj2dvMyBN<oQ
mJDhN^W!}2TzQI!Y*S)e%dxmYkj7NVp>}RM=x!L}7+I|28*p~tT

literal 0
HcmV?d00001