From bca7d991639d22828aa9460be4eac9681cc6a9f1 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Mon, 22 Apr 2024 13:55:55 +0200
Subject: [PATCH 01/12] (feat): first pass at basic benchmark

---
 benchmarks/README.md                   |  76 +++++++++++
 benchmarks/asv.conf.json               | 170 +++++++++++++++++++++++++
 benchmarks/benchmarks/__init__.py      |   0
 benchmarks/benchmarks/preprocessing.py |  81 ++++++++++++
 benchmarks/benchmarks/readwrite.py     |  64 ++++++++++
 benchmarks/benchmarks/tools.py         |  36 ++++++
 6 files changed, 427 insertions(+)
 create mode 100644 benchmarks/README.md
 create mode 100644 benchmarks/asv.conf.json
 create mode 100644 benchmarks/benchmarks/__init__.py
 create mode 100644 benchmarks/benchmarks/preprocessing.py
 create mode 100644 benchmarks/benchmarks/readwrite.py
 create mode 100644 benchmarks/benchmarks/tools.py
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 00000000..2ff077fd
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,76 @@
+# AnnData Benchmarks
+
+This repo contains some work in progress benchmarks for [AnnData](https://github.com/theislab/anndata) using [asv](https://asv.readthedocs.io).
+
+## Setup
+
+I definitley recommend reading through the asv docs. Currently, this assumes the benchmark suite can reach the `anndata` repo via the path `../anndata`. Otherwise, all you'll need to do is create a [machine file](https://asv.readthedocs.io/en/stable/commands.html#asv-machine) for your system and make sure `anndata`s dependencies are installable via `conda`.
+
+### Data
+
+Data will need to be retrieved for these benchmarks. This can be downloaded using the script fetch_datasets.py.
+
+Note that the `h5ad` format has changed since it's inception. While the `anndata` package maintains backwards compatibility, older versions of `anndata` will not be able to read files written by more recent versions. To get around this for the benchmarks, datasets have to be able to be read by all versions which can require a setup function that creates the anndata object.
+
+## Usage
+
+### Runnings the benchmarks:
+
+To run benchmarks for a particular commit: `asv run {commit} --steps 1 -b`
+
+To run benchmarks for a range of commits: `asv run {commit1}..{commit2}`
+
+You can filter out the benchmarks which are run with the `-b {patttern}` flag.
+
+### Accessing the benchmarks
+
+You can see what benchmarks you've already run using `asv show`. If you don't specify a commit, it will search for the available commits. If you specify a commit it'll show you those results. For example:
+
+```bash
+$ asv show -b "views"
+Commits with results:
+
+Machine    : mimir.mobility.unimelb.net.au
+Environment: conda-py3.7-h5py-memory_profiler-natsort-numpy-pandas-scipy
+
+    61eb5bb7
+    e9ccfc33
+    22f12994
+    0ebe187e
+```
+
+```bash
+$ asv show -b "views" 0ebe187e
+Commit: 0ebe187e <views-of-views>
+
+views.SubsetMemorySuite.track_repeated_subset_memratio [mimir.mobility.unimelb.net.au/conda-py3.7-h5py-memory_profiler-natsort-numpy-pandas-scipy]
+  ok
+  ======= ======= ========== ============ ===================== ====================== ======================
+  --                                                                   index_kind
+  --------------------------------------- -------------------------------------------------------------------
+   n_obs   n_var   attr_set   subset_dim         intarray             boolarray                slice
+  ======= ======= ========== ============ ===================== ====================== ======================
+    100     100     X-csr        obs               2.84           1.7916666666666667            0.5
+    100     100     X-csr        var        2.5357142857142856    1.8695652173913044     0.5652173913043478
+    100     100    X-dense       obs        3.1739130434782608    1.6538461538461537            0.6
+...
+```
+
+You can compare two commits with `asv compare`
+
+```bash
+$ asv compare e9ccfc 0ebe187e
+All benchmarks:
+
+       before           after         ratio
+     [e9ccfc33]       [0ebe187e]
+     <master>         <views-of-views>
+-            2.16  1.7916666666666667     0.83  views.SubsetMemorySuite.track_repeated_subset_memratio(100, 100, 'X-csr', 'obs', 'boolarray')
++ 2.533333333333333             2.84     1.12  views.SubsetMemorySuite.track_repeated_subset_memratio(100, 100, 'X-csr', 'obs', 'intarray')
+- 1.1923076923076923              0.5     0.42  views.SubsetMemorySuite.track_repeated_subset_memratio(100, 100, 'X-csr', 'obs', 'slice')
+  1.9615384615384615  1.8695652173913044     0.95  views.SubsetMemorySuite.track_repeated_subset_memratio(100, 100, 'X-csr', 'var', 'boolarray')
+```
+
+### View in the browser:
+
+You can view the benchmarks in the browser with `asv publish` followed by `asv preview`. If you want to include benchmarks of a local branch, I think you'll have to add that branch to the `"branches"` list in `asv.conf.json`.
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
new file mode 100644
index 00000000..7bb7e0d1
--- /dev/null
+++ b/benchmarks/asv.conf.json
@@ -0,0 +1,170 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+    // The name of the project being benchmarked
+    "project": "rapids_singlecell",
+    // The project's homepage
+    "project_url": "https://rapids-singlecell.readthedocs.io/",
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": "../src/rapids_singlecell",
+    // The Python project's subdirectory in your repo.  If missing or
+    // the empty string, the project is assumed to be located at the root
+    // of the repository.
+    // "repo_subdir": "",
+    // Customizable commands for building, installing, and
+    // uninstalling the project. See asv.conf.json documentation.
+    //
+    // "install_command": ["python -mpip install {wheel_file}"],
+    // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
+    "build_command": [
+        "python -m pip install build",
+        "python -m build --wheel -o {build_cache_dir} {build_dir}",
+    ],
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "default" (for mercurial).
+    "branches": [
+        "main"
+    ], // for git
+    // "branches": ["default"],    // for mercurial
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    "dvcs": "git",
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "mamba",
+    // timeout in seconds for installing any dependencies in environment
+    // defaults to 10 min
+    //"install_timeout": 600,
+    // the base URL to show a commit for the project.
+    "show_commit_url": "https://github.com/scverse/rapids_singlecell/commit/",
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    // "pythons": ["2.7", "3.6"],
+    // The list of conda channel names to be searched for benchmark
+    // dependency packages in the specified order
+    "conda_channels": [
+        "conda-forge",
+        "defaults"
+    ],
+    // The matrix of dependencies to test.  Each key is the name of a
+    // package (in PyPI) and the values are version numbers.  An empty
+    // list or empty string indicates to just test against the default
+    // (latest) version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed via
+    // pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    "matrix": {
+        "cudf-cu12": [
+            ""
+        ],
+        // "scipy": ["1.2", ""],
+        "cuml-cu12": [
+            ""
+        ],
+        "cugraph-cu12": [
+            ""
+        ],
+        "pandas": [
+            ""
+        ],
+        "memory_profiler": [
+            ""
+        ],
+        "anndata": [
+            ""
+        ],
+        "scanpy": [
+            ""
+        ],
+        "numpy": [
+            ""
+        ],
+        "scipy": [
+            ""
+        ]
+        // "scanpy": [""],
+        // "psutil": [""]
+    },
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "six": null}, // don't run without six on conda
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "numpy": "1.8"},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "mamba", "python": "2.7", "libpython": ""},
+    // ],
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    // "benchmark_dir": "benchmarks",
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": ".asv/env",
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": ".asv/results",
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": ".asv/html",
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+    // `asv` will cache results of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // the number of builds to keep, per environment.
+    // "build_cache_size": 2,
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // },
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    "some_benchmark": 0.01,     // Threshold of 1%
+    //    "another_benchmark": 0.5,   // Threshold of 50%
+    // },
+}
\ No newline at end of file
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/benchmarks/benchmarks/preprocessing.py b/benchmarks/benchmarks/preprocessing.py
new file mode 100644
index 00000000..de04104e
--- /dev/null
+++ b/benchmarks/benchmarks/preprocessing.py
@@ -0,0 +1,81 @@
+"""
+This module will benchmark preprocessing operations in Scanpy
+API documentation: https://scanpy.readthedocs.io/en/stable/api/preprocessing.html
+"""
+
+from __future__ import annotations
+
+import rapids_singlecell as rsc
+import scanpy as sc
+
+class PreprocessingSuite:
+    _data_dict = dict(pbmc68k_reduced=sc.datasets.pbmc68k_reduced())
+    params = _data_dict.keys()
+    param_names = ["input_data"]
+
+    def setup(self, input_data: str):
+        self.adata = self._data_dict[input_data].copy()
+
+    def time_calculate_qc_metrics(self, *_):
+        self.adata.var["mt"] = self.adata.var_names.str.startswith("MT-")
+        rsc.pp.calculate_qc_metrics(
+            self.adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
+        )
+
+    def peakmem_calculate_qc_metrics(self, *_):
+        self.adata.var["mt"] = self.adata.var_names.str.startswith("MT-")
+        rsc.pp.calculate_qc_metrics(
+            self.adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
+        )
+
+    def time_filter_cells(self, *_):
+        rsc.pp.filter_cells(self.adata, min_genes=200)
+
+    def peakmem_filter_cells(self, *_):
+        rsc.pp.filter_cells(self.adata, min_genes=200)
+
+    def time_filter_genes(self, *_):
+        rsc.pp.filter_genes(self.adata, min_cells=3)
+
+    def peakmem_filter_genes(self, *_):
+        rsc.pp.filter_genes(self.adata, min_cells=3)
+
+    def time_normalize_total(self, *_):
+        rsc.pp.normalize_total(self.adata, target_sum=1e4)
+
+    def peakmem_normalize_total(self, *_):
+        rsc.pp.normalize_total(self.adata, target_sum=1e4)
+
+    def time_log1p(self, *_):
+        rsc.pp.log1p(self.adata)
+
+    def peakmem_time_log1p(self, *_):
+        rsc.pp.log1p(self.adata)
+
+    def time_pca(self, *_):
+        rsc.pp.pca(self.adata, svd_solver="arpack")
+
+    def peakmem_pca(self, *_):
+        rsc.pp.pca(self.adata, svd_solver="arpack")
+
+    def time_highly_variable_genes(self, *_):
+        rsc.pp.highly_variable_genes(
+            self.adata, min_mean=0.0125, max_mean=3, min_disp=0.5
+        )
+
+    def peakmem_highly_variable_genes(self, *_):
+        rsc.pp.highly_variable_genes(
+            self.adata, min_mean=0.0125, max_mean=3, min_disp=0.5
+        )
+
+    def time_regress_out(self, *_):
+        rsc.pp.regress_out(self.adata, ["n_counts", "percent_mito"])
+
+    def peakmem_regress_out(self, *_):
+        rsc.pp.regress_out(self.adata, ["n_counts", "percent_mito"])
+
+    def time_scale(self, *_):
+        rsc.pp.scale(self.adata, max_value=10)
+
+    def peakmem_scale(self, *_):
+        rsc.pp.scale(self.adata, max_value=10)
diff --git a/benchmarks/benchmarks/readwrite.py b/benchmarks/benchmarks/readwrite.py
new file mode 100644
index 00000000..e7e2d1b3
--- /dev/null
+++ b/benchmarks/benchmarks/readwrite.py
@@ -0,0 +1,64 @@
+"""
+This module will benchmark io of Scanpy readwrite operations
+
+Things to test:
+
+* Read time, write time
+* Peak memory during io
+* File sizes
+
+Parameterized by:
+
+* What method is being used
+* What data is being included
+* Size of data being used
+
+Also interesting:
+
+* io for views
+* io for backed objects
+* Reading dense as sparse, writing sparse as dense
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+import anndata
+from rapids_singlecell.get import anndata_to_GPU
+
+import scanpy as sc
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from pathlib import Path
+
+
+@dataclass
+class Dataset:
+    path: Path
+    get: Callable[[], anndata.AnnData]
+
+
+pbmc3k = Dataset(
+    path=sc.settings.datasetdir / "pbmc3k_raw.h5ad", get=sc.datasets.pbmc3k
+)
+
+class ToGPUSuite:
+    _data_dict = dict(pbmc3k=pbmc3k)
+    params = _data_dict.keys()
+    param_names = ["input_data"]
+
+    def setup(self, input_data: str):
+        self.path = self._data_dict[input_data].path
+        self.data = self._data_dict[input_data].get()
+
+    def time_to_gpu(self, *_):
+        anndata_to_GPU(self.data)
+
+    def peakmem_to_gpu(self, *_):
+        anndata_to_GPU(self.data)
+
+    def mem_to_gpu(self, *_):
+        anndata_to_GPU(self.data)
diff --git a/benchmarks/benchmarks/tools.py b/benchmarks/benchmarks/tools.py
new file mode 100644
index 00000000..109375da
--- /dev/null
+++ b/benchmarks/benchmarks/tools.py
@@ -0,0 +1,36 @@
+"""
+This module will benchmark tool operations in Scanpy
+API documentation: https://scanpy.readthedocs.io/en/stable/api/tools.html
+"""
+
+from __future__ import annotations
+
+import rapids_singlecell as rsc
+import scanpy as sc
+
+class ToolsSuite:
+    _data_dict = dict(pbmc68k_reduced=sc.datasets.pbmc68k_reduced())
+    params = _data_dict.keys()
+    param_names = ["input_data"]
+
+    def setup(self, input_data):
+        self.adata = self._data_dict[input_data].copy()
+        assert "X_pca" in self.adata.obsm
+
+    def time_umap(self, *_):
+        rsc.tl.umap(self.adata)
+
+    def peakmem_umap(self, *_):
+        rsc.tl.umap(self.adata)
+
+    def time_diffmap(self, *_):
+        rsc.tl.diffmap(self.adata)
+
+    def peakmem_diffmap(self, *_):
+        rsc.tl.diffmap(self.adata)
+
+    def time_leiden(self, *_):
+        rsc.tl.leiden(self.adata)
+
+    def peakmem_leiden(self, *_):
+        rsc.tl.leiden(self.adata)
\ No newline at end of file

From f3ff168993efed91f54c124acb4c0a8a90c394ab Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Mon, 22 Apr 2024 14:21:03 +0200
Subject: [PATCH 02/12] (feat): add squidpy

---
 benchmarks/benchmarks/squidpy.py | 55 ++++++++++++++++++++++++++++++++
 benchmarks/benchmarks/tools.py   | 10 ++++--
 2 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100644 benchmarks/benchmarks/squidpy.py

diff --git a/benchmarks/benchmarks/squidpy.py b/benchmarks/benchmarks/squidpy.py
new file mode 100644
index 00000000..3dba29b1
--- /dev/null
+++ b/benchmarks/benchmarks/squidpy.py
@@ -0,0 +1,55 @@
+"""
+This module will benchmark tool operations in Scanpy
+API documentation: https://scanpy.readthedocs.io/en/stable/api/tools.html
+"""
+
+from __future__ import annotations
+from itertools import product
+
+import rapids_singlecell as rsc
+import scanpy as sc
+
+class ToolsSuite:
+    _data_dict = dict(visium_sge=sc.datasets.visium_sge(), )
+    params = _data_dict.keys()
+    param_names = ["input_data"]
+
+    def setup(self, input_data):
+        self.adata = self._data_dict[input_data].copy()
+        assert "X_pca" in self.adata.obsm
+
+    def time_ligrec(self, *_):
+        gene_ids = self.adata.var.index
+        interactions = tuple(product(gene_ids[:5], gene_ids[:5]))
+        rsc.gr.ligrec(
+            self.adata,
+            "leiden",
+            interactions=interactions,
+            n_perms=5,
+            use_raw=True,
+            copy=True,
+        )
+
+    def peakmem_ligrec(self, *_):
+        gene_ids = self.adata.var.index
+        interactions = tuple(product(gene_ids[:5], gene_ids[:5]))
+        rsc.gr.ligrec(
+            self.adata,
+            "leiden",
+            interactions=interactions,
+            n_perms=5,
+            use_raw=True,
+            copy=True,
+        )
+
+    def time_autocorr_moran(self, *_):
+        rsc.gr.spatial_autocorr(self.adata, mode="moran")
+
+    def peakmem_autocorr_moran(self, *_):
+        rsc.gr.spatial_autocorr(self.adata, mode="moran")
+
+    def time_autocorr_geary(self, *_):
+        rsc.gr.spatial_autocorr(self.adata, mode="geary")
+
+    def peakmem_autocorr_geary(self, *_):
+        rsc.gr.spatial_autocorr(self.adata, mode="geary")
\ No newline at end of file
diff --git a/benchmarks/benchmarks/tools.py b/benchmarks/benchmarks/tools.py
index 109375da..944a7b3e 100644
--- a/benchmarks/benchmarks/tools.py
+++ b/benchmarks/benchmarks/tools.py
@@ -9,7 +9,7 @@
 import scanpy as sc
 
 class ToolsSuite:
-    _data_dict = dict(pbmc68k_reduced=sc.datasets.pbmc68k_reduced())
+    _data_dict = dict(pbmc68k_reduced=sc.datasets.pbmc68k_reduced(), )
     params = _data_dict.keys()
     param_names = ["input_data"]
 
@@ -33,4 +33,10 @@ def time_leiden(self, *_):
         rsc.tl.leiden(self.adata)
 
     def peakmem_leiden(self, *_):
-        rsc.tl.leiden(self.adata)
\ No newline at end of file
+        rsc.tl.leiden(self.adata)
+
+    def time_embedding_denity(self, *_):
+        rsc.tl.embedding_density(self.adata, basis="X_umap")
+
+    def peakmem_embedding_denity(self, *_):
+        rsc.tl.embedding_density(self.adata, basis="X_umap")
\ No newline at end of file

From e30753adcf057f03ac95941e67969c36716d7637 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Mon, 22 Apr 2024 14:24:36 +0200
Subject: [PATCH 03/12] (feat): add neighbors

---
 benchmarks/benchmarks/preprocessing.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/benchmarks/benchmarks/preprocessing.py b/benchmarks/benchmarks/preprocessing.py
index de04104e..b5cf46ab 100644
--- a/benchmarks/benchmarks/preprocessing.py
+++ b/benchmarks/benchmarks/preprocessing.py
@@ -79,3 +79,9 @@ def time_scale(self, *_):
 
     def peakmem_scale(self, *_):
         rsc.pp.scale(self.adata, max_value=10)
+
+    def time_neighbors(self, *_):
+        rsc.pp.neighbors(self.adata, n_neighbors=15, n_pcs=100)
+
+    def peakmem_neighbors(self, *_):
+        rsc.pp.neighbors(self.adata, n_neighbors=15, n_pcs=100)

From dc73c22d77e372a598d6158a125b83c8b4e87366 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 23 Apr 2024 11:13:17 +0000
Subject: [PATCH 04/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 benchmarks/asv.conf.json               |  2 +-
 benchmarks/benchmarks/preprocessing.py |  4 +++-
 benchmarks/benchmarks/readwrite.py     |  5 +++--
 benchmarks/benchmarks/squidpy.py       | 11 ++++++++---
 benchmarks/benchmarks/tools.py         | 10 +++++++---
 5 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
index 7bb7e0d1..a72cde8f 100644
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -167,4 +167,4 @@
     //    "some_benchmark": 0.01,     // Threshold of 1%
     //    "another_benchmark": 0.5,   // Threshold of 50%
     // },
-}
\ No newline at end of file
+}
diff --git a/benchmarks/benchmarks/preprocessing.py b/benchmarks/benchmarks/preprocessing.py
index b5cf46ab..e4620dcf 100644
--- a/benchmarks/benchmarks/preprocessing.py
+++ b/benchmarks/benchmarks/preprocessing.py
@@ -5,9 +5,11 @@
 
 from __future__ import annotations
 
-import rapids_singlecell as rsc
 import scanpy as sc
 
+import rapids_singlecell as rsc
+
+
 class PreprocessingSuite:
     _data_dict = dict(pbmc68k_reduced=sc.datasets.pbmc68k_reduced())
     params = _data_dict.keys()
diff --git a/benchmarks/benchmarks/readwrite.py b/benchmarks/benchmarks/readwrite.py
index e7e2d1b3..4b314e81 100644
--- a/benchmarks/benchmarks/readwrite.py
+++ b/benchmarks/benchmarks/readwrite.py
@@ -26,10 +26,10 @@
 from typing import TYPE_CHECKING
 
 import anndata
-from rapids_singlecell.get import anndata_to_GPU
-
 import scanpy as sc
 
+from rapids_singlecell.get import anndata_to_GPU
+
 if TYPE_CHECKING:
     from collections.abc import Callable
     from pathlib import Path
@@ -45,6 +45,7 @@ class Dataset:
     path=sc.settings.datasetdir / "pbmc3k_raw.h5ad", get=sc.datasets.pbmc3k
 )
 
+
 class ToGPUSuite:
     _data_dict = dict(pbmc3k=pbmc3k)
     params = _data_dict.keys()
diff --git a/benchmarks/benchmarks/squidpy.py b/benchmarks/benchmarks/squidpy.py
index 3dba29b1..fc9de89c 100644
--- a/benchmarks/benchmarks/squidpy.py
+++ b/benchmarks/benchmarks/squidpy.py
@@ -4,13 +4,18 @@
 """
 
 from __future__ import annotations
+
 from itertools import product
 
-import rapids_singlecell as rsc
 import scanpy as sc
 
+import rapids_singlecell as rsc
+
+
 class ToolsSuite:
-    _data_dict = dict(visium_sge=sc.datasets.visium_sge(), )
+    _data_dict = dict(
+        visium_sge=sc.datasets.visium_sge(),
+    )
     params = _data_dict.keys()
     param_names = ["input_data"]
 
@@ -52,4 +57,4 @@ def time_autocorr_geary(self, *_):
         rsc.gr.spatial_autocorr(self.adata, mode="geary")
 
     def peakmem_autocorr_geary(self, *_):
-        rsc.gr.spatial_autocorr(self.adata, mode="geary")
\ No newline at end of file
+        rsc.gr.spatial_autocorr(self.adata, mode="geary")
diff --git a/benchmarks/benchmarks/tools.py b/benchmarks/benchmarks/tools.py
index 944a7b3e..7bd53684 100644
--- a/benchmarks/benchmarks/tools.py
+++ b/benchmarks/benchmarks/tools.py
@@ -5,11 +5,15 @@
 
 from __future__ import annotations
 
-import rapids_singlecell as rsc
 import scanpy as sc
 
+import rapids_singlecell as rsc
+
+
 class ToolsSuite:
-    _data_dict = dict(pbmc68k_reduced=sc.datasets.pbmc68k_reduced(), )
+    _data_dict = dict(
+        pbmc68k_reduced=sc.datasets.pbmc68k_reduced(),
+    )
     params = _data_dict.keys()
     param_names = ["input_data"]
 
@@ -39,4 +43,4 @@ def time_embedding_denity(self, *_):
         rsc.tl.embedding_density(self.adata, basis="X_umap")
 
     def peakmem_embedding_denity(self, *_):
-        rsc.tl.embedding_density(self.adata, basis="X_umap")
\ No newline at end of file
+        rsc.tl.embedding_density(self.adata, basis="X_umap")

From 60400517aad6cfe33a6b44840401dfe079bcaa28 Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Tue, 23 Apr 2024 13:35:04 +0200
Subject: [PATCH 05/12] (fix): asv conf

---
 benchmarks/README.md     | 11 ++++++++---
 benchmarks/asv.conf.json | 12 +++++++-----
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/benchmarks/README.md b/benchmarks/README.md
index 2ff077fd..d6215c47 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,16 +1,21 @@
 # AnnData Benchmarks
 
-This repo contains some work in progress benchmarks for [AnnData](https://github.com/theislab/anndata) using [asv](https://asv.readthedocs.io).
+This repo contains some work in progress benchmarks for [rapids_singlecell](https://github.com/scverse/rapids_singlecell) using [asv](https://asv.readthedocs.io).
 
 ## Setup
 
-I definitley recommend reading through the asv docs. Currently, this assumes the benchmark suite can reach the `anndata` repo via the path `../anndata`. Otherwise, all you'll need to do is create a [machine file](https://asv.readthedocs.io/en/stable/commands.html#asv-machine) for your system and make sure `anndata`s dependencies are installable via `conda`.
+I definitley recommend reading through the asv docs. Currently, this assumes the benchmark suite can reach the `rapids_singlecell` repo via the path `../src/rapids_singlecell`. Otherwise, all you'll need to do is create a [machine file](https://asv.readthedocs.io/en/stable/commands.html#asv-machine) for your system and make sure `anndata`s dependencies are installable via `conda`.
+
+```shell
+pip install chardet
+conda install mamba
+```
 
 ### Data
 
 Data will need to be retrieved for these benchmarks. This can be downloaded using the script fetch_datasets.py.
 
-Note that the `h5ad` format has changed since it's inception. While the `anndata` package maintains backwards compatibility, older versions of `anndata` will not be able to read files written by more recent versions. To get around this for the benchmarks, datasets have to be able to be read by all versions which can require a setup function that creates the anndata object.
+Note that the `h5ad` format has changed since it's inception. While the `rapids_singlecell` package maintains backwards compatibility, older versions of `anndata` will not be able to read files written by more recent versions. To get around this for the benchmarks, datasets have to be able to be read by all versions which can require a setup function that creates the anndata object.
 
 ## Usage
 
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
index 7bb7e0d1..782b4a61 100644
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -8,7 +8,7 @@
     "project_url": "https://rapids-singlecell.readthedocs.io/",
     // The URL or local path of the source code repository for the
     // project being benchmarked
-    "repo": "../src/rapids_singlecell",
+    "repo": "../",
     // The Python project's subdirectory in your repo.  If missing or
     // the empty string, the project is assumed to be located at the root
     // of the repository.
@@ -51,7 +51,9 @@
     // dependency packages in the specified order
     "conda_channels": [
         "conda-forge",
-        "defaults"
+        "defaults",
+        "rapidsai",
+        "nvidia"
     ],
     // The matrix of dependencies to test.  Each key is the name of a
     // package (in PyPI) and the values are version numbers.  An empty
@@ -64,14 +66,14 @@
     // followed by the pip installed packages).
     //
     "matrix": {
-        "cudf-cu12": [
+        "cudf": [
             ""
         ],
         // "scipy": ["1.2", ""],
-        "cuml-cu12": [
+        "cuml": [
             ""
         ],
-        "cugraph-cu12": [
+        "cugraph": [
             ""
         ],
         "pandas": [

From 82ec06b899e702499c439373ef133828b542973b Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Tue, 23 Apr 2024 15:13:07 +0200
Subject: [PATCH 06/12] (fix): try specifying version more specifically

---
 benchmarks/asv.conf.json   | 81 ++++++++++++++++++++------------------
 benchmarks/environment.yml | 17 ++++++++
 2 files changed, 59 insertions(+), 39 deletions(-)
 create mode 100644 benchmarks/environment.yml

diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
index 988b6119..cd45d4f3 100644
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -49,12 +49,12 @@
     // "pythons": ["2.7", "3.6"],
     // The list of conda channel names to be searched for benchmark
     // dependency packages in the specified order
-    "conda_channels": [
-        "conda-forge",
-        "defaults",
-        "rapidsai",
-        "nvidia"
-    ],
+    // "conda_channels": [
+    //     "conda-forge",
+    //     "defaults",
+    //     "rapidsai",
+    //     "nvidia"
+    // ],
     // The matrix of dependencies to test.  Each key is the name of a
     // package (in PyPI) and the values are version numbers.  An empty
     // list or empty string indicates to just test against the default
@@ -65,38 +65,41 @@
     // pip (with all the conda available packages installed first,
     // followed by the pip installed packages).
     //
-    "matrix": {
-        "cudf": [
-            ""
-        ],
-        // "scipy": ["1.2", ""],
-        "cuml": [
-            ""
-        ],
-        "cugraph": [
-            ""
-        ],
-        "pandas": [
-            ""
-        ],
-        "memory_profiler": [
-            ""
-        ],
-        "anndata": [
-            ""
-        ],
-        "scanpy": [
-            ""
-        ],
-        "numpy": [
-            ""
-        ],
-        "scipy": [
-            ""
-        ]
-        // "scanpy": [""],
-        // "psutil": [""]
-    },
+    "conda_environment_file": "environment.yml",
+    // "matrix": {
+    //     "cuda-version": [
+    //         "12.2"
+    //     ],
+    //     "cudf": [
+    //         "24.4"
+    //     ],
+    //     "cuml": [
+    //         "24.4"
+    //     ],
+    //     "cugraph": [
+    //         "24.4"
+    //     ],
+    //     "pandas": [
+    //         ""
+    //     ],
+    //     "memory_profiler": [
+    //         ""
+    //     ],
+    //     "anndata": [
+    //         ""
+    //     ],
+    //     "scanpy": [
+    //         ""
+    //     ],
+    //     "numpy": [
+    //         ""
+    //     ],
+    //     "scipy": [
+    //         ""
+    //     ]
+    //     // "scanpy": [""],
+    //     // "psutil": [""]
+    // },
     // Combinations of libraries/python versions can be excluded/included
     // from the set to test. Each entry is a dictionary containing additional
     // key-value pairs to include/exclude.
@@ -169,4 +172,4 @@
     //    "some_benchmark": 0.01,     // Threshold of 1%
     //    "another_benchmark": 0.5,   // Threshold of 50%
     // },
-}
+}
\ No newline at end of file
diff --git a/benchmarks/environment.yml b/benchmarks/environment.yml
new file mode 100644
index 00000000..d1e3294a
--- /dev/null
+++ b/benchmarks/environment.yml
@@ -0,0 +1,17 @@
+name: test_asv
+channels:
+  - rapidsai
+  - nvidia
+  - conda-forge
+dependencies:
+  - python=3.11
+  - cuda-version=11.8
+  - cudf=24.4
+  - cuml=24.4
+  - cugraph=24.4
+  - pandas
+  - memory_profiler
+  - anndata
+  - scanpy
+  - numpy
+  - scipy
\ No newline at end of file

From 2ce6360bb8f2ce9837938334707b4863682ff5b0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 23 Apr 2024 13:29:21 +0000
Subject: [PATCH 07/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 benchmarks/asv.conf.json   | 2 +-
 benchmarks/environment.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
index cd45d4f3..71d56a00 100644
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -172,4 +172,4 @@
     //    "some_benchmark": 0.01,     // Threshold of 1%
     //    "another_benchmark": 0.5,   // Threshold of 50%
     // },
-}
\ No newline at end of file
+}
diff --git a/benchmarks/environment.yml b/benchmarks/environment.yml
index d1e3294a..9a00718f 100644
--- a/benchmarks/environment.yml
+++ b/benchmarks/environment.yml
@@ -14,4 +14,4 @@ dependencies:
   - anndata
   - scanpy
   - numpy
-  - scipy
\ No newline at end of file
+  - scipy

From 86f28ffacc59c95334c5e83a9e9c653ace13aebe Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Wed, 24 Apr 2024 10:59:47 +0200
Subject: [PATCH 08/12] (feat): use `anndata_to_GPU`

---
 benchmarks/benchmarks/preprocessing.py |  2 +-
 benchmarks/benchmarks/readwrite.py     | 13 ++++++-------
 benchmarks/benchmarks/squidpy.py       | 14 ++++++--------
 benchmarks/benchmarks/tools.py         |  8 +++++---
 benchmarks/environment.yml             |  3 +--
 5 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/benchmarks/benchmarks/preprocessing.py b/benchmarks/benchmarks/preprocessing.py
index e4620dcf..349aabbf 100644
--- a/benchmarks/benchmarks/preprocessing.py
+++ b/benchmarks/benchmarks/preprocessing.py
@@ -16,7 +16,7 @@ class PreprocessingSuite:
     param_names = ["input_data"]
 
     def setup(self, input_data: str):
-        self.adata = self._data_dict[input_data].copy()
+        self.adata = rsc.get.anndata_to_GPU(self._data_dict[input_data].copy(), copy=True)
 
     def time_calculate_qc_metrics(self, *_):
         self.adata.var["mt"] = self.adata.var_names.str.startswith("MT-")
diff --git a/benchmarks/benchmarks/readwrite.py b/benchmarks/benchmarks/readwrite.py
index 4b314e81..263bd50a 100644
--- a/benchmarks/benchmarks/readwrite.py
+++ b/benchmarks/benchmarks/readwrite.py
@@ -34,26 +34,25 @@
     from collections.abc import Callable
     from pathlib import Path
 
+import pathlib
+
+sc.settings.datasetdir = pathlib.Path(__file__).parent.resolve() / "data"
 
 @dataclass
 class Dataset:
     path: Path
     get: Callable[[], anndata.AnnData]
 
-
-pbmc3k = Dataset(
-    path=sc.settings.datasetdir / "pbmc3k_raw.h5ad", get=sc.datasets.pbmc3k
-)
+path="/p/project/training2406/team_scverse/gold2/rapids_singlecell/benchmarks/data/pbmc3k_raw.h5ad"
 
 
 class ToGPUSuite:
-    _data_dict = dict(pbmc3k=pbmc3k)
+    _data_dict = dict(pbmc3k=anndata.read_h5ad(path))
     params = _data_dict.keys()
     param_names = ["input_data"]
 
     def setup(self, input_data: str):
-        self.path = self._data_dict[input_data].path
-        self.data = self._data_dict[input_data].get()
+        self.data = self._data_dict[input_data]
 
     def time_to_gpu(self, *_):
         anndata_to_GPU(self.data)
diff --git a/benchmarks/benchmarks/squidpy.py b/benchmarks/benchmarks/squidpy.py
index fc9de89c..134970b1 100644
--- a/benchmarks/benchmarks/squidpy.py
+++ b/benchmarks/benchmarks/squidpy.py
@@ -7,21 +7,21 @@
 
 from itertools import product
 
-import scanpy as sc
+import anndata as ad
 
 import rapids_singlecell as rsc
 
+import pathlib
 
 class ToolsSuite:
     _data_dict = dict(
-        visium_sge=sc.datasets.visium_sge(),
+        visium_sge=ad.read_h5ad("/p/project/training2406/team_scverse/gold2/rapids_singlecell/benchmarks/data/paul15.h5ad"),
     )
     params = _data_dict.keys()
     param_names = ["input_data"]
 
     def setup(self, input_data):
-        self.adata = self._data_dict[input_data].copy()
-        assert "X_pca" in self.adata.obsm
+        self.adata = rsc.get.anndata_to_GPU(self._data_dict[input_data].copy(), copy=True)
 
     def time_ligrec(self, *_):
         gene_ids = self.adata.var.index
@@ -31,8 +31,7 @@ def time_ligrec(self, *_):
             "leiden",
             interactions=interactions,
             n_perms=5,
-            use_raw=True,
-            copy=True,
+            use_raw=False,
         )
 
     def peakmem_ligrec(self, *_):
@@ -43,8 +42,7 @@ def peakmem_ligrec(self, *_):
             "leiden",
             interactions=interactions,
             n_perms=5,
-            use_raw=True,
-            copy=True,
+            use_raw=False,
         )
 
     def time_autocorr_moran(self, *_):
diff --git a/benchmarks/benchmarks/tools.py b/benchmarks/benchmarks/tools.py
index 7bd53684..d0f780fa 100644
--- a/benchmarks/benchmarks/tools.py
+++ b/benchmarks/benchmarks/tools.py
@@ -9,6 +9,8 @@
 
 import rapids_singlecell as rsc
 
+import pathlib
+
 
 class ToolsSuite:
     _data_dict = dict(
@@ -18,7 +20,7 @@ class ToolsSuite:
     param_names = ["input_data"]
 
     def setup(self, input_data):
-        self.adata = self._data_dict[input_data].copy()
+        self.adata = rsc.get.anndata_to_GPU(self._data_dict[input_data].copy(), copy=True)
         assert "X_pca" in self.adata.obsm
 
     def time_umap(self, *_):
@@ -40,7 +42,7 @@ def peakmem_leiden(self, *_):
         rsc.tl.leiden(self.adata)
 
     def time_embedding_denity(self, *_):
-        rsc.tl.embedding_density(self.adata, basis="X_umap")
+        rsc.tl.embedding_density(self.adata, basis="umap")
 
     def peakmem_embedding_denity(self, *_):
-        rsc.tl.embedding_density(self.adata, basis="X_umap")
+        rsc.tl.embedding_density(self.adata, basis="umap")
diff --git a/benchmarks/environment.yml b/benchmarks/environment.yml
index d1e3294a..a8ef69f9 100644
--- a/benchmarks/environment.yml
+++ b/benchmarks/environment.yml
@@ -1,11 +1,10 @@
-name: test_asv
 channels:
   - rapidsai
   - nvidia
   - conda-forge
 dependencies:
   - python=3.11
-  - cuda-version=11.8
+  - cuda-version=12
   - cudf=24.4
   - cuml=24.4
   - cugraph=24.4

From c19307c7b087affe197a04e3a49de4d7a7b337bb Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Thu, 25 Apr 2024 09:58:55 +0200
Subject: [PATCH 09/12] (feat): tracking peak memory, clean up args

---
 .gitignore                             |  5 +-
 benchmarks/benchmarks/preprocessing.py | 54 ++++++++++++-------
 benchmarks/benchmarks/readwrite.py     | 34 +++---------
 benchmarks/benchmarks/squidpy.py       | 40 ++++++++------
 benchmarks/benchmarks/tools.py         | 16 +++---
 benchmarks/benchmarks/utils.py         | 74 ++++++++++++++++++++++++++
 6 files changed, 152 insertions(+), 71 deletions(-)
 create mode 100644 benchmarks/benchmarks/utils.py

diff --git a/.gitignore b/.gitignore
index 667392bd..7acc6f84 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,7 @@
 __pycache__/
 /*cache/
 .ipynb_checkpoints/
-/data/
+data/
 
 # Distribution / packaging
 /dist/
@@ -16,3 +16,6 @@ __pycache__/
 
 # Venvs
 *venv/
+
+# asv
+.asv/
diff --git a/benchmarks/benchmarks/preprocessing.py b/benchmarks/benchmarks/preprocessing.py
index 349aabbf..54681cfa 100644
--- a/benchmarks/benchmarks/preprocessing.py
+++ b/benchmarks/benchmarks/preprocessing.py
@@ -9,6 +9,8 @@
 
 import rapids_singlecell as rsc
 
+from .utils import track_peakmem
+
 
 class PreprocessingSuite:
     _data_dict = dict(pbmc68k_reduced=sc.datasets.pbmc68k_reduced())
@@ -21,51 +23,59 @@ def setup(self, input_data: str):
     def time_calculate_qc_metrics(self, *_):
         self.adata.var["mt"] = self.adata.var_names.str.startswith("MT-")
         rsc.pp.calculate_qc_metrics(
-            self.adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
+            self.adata, qc_vars=["mt"], log1p=False
         )
 
-    def peakmem_calculate_qc_metrics(self, *_):
+    @track_peakmem
+    def track_peakmem_calculate_qc_metrics(self, *_):
         self.adata.var["mt"] = self.adata.var_names.str.startswith("MT-")
         rsc.pp.calculate_qc_metrics(
-            self.adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
+            self.adata, qc_vars=["mt"], log1p=False
         )
 
     def time_filter_cells(self, *_):
-        rsc.pp.filter_cells(self.adata, min_genes=200)
+        rsc.pp.filter_cells(self.adata, qc_var="n_counts", min_count=200)
+
+    @track_peakmem
+    def track_peakmem_filter_cells(self, *_):
+        rsc.pp.filter_cells(self.adata, qc_var="n_counts", min_count=200)
 
-    def peakmem_filter_cells(self, *_):
-        rsc.pp.filter_cells(self.adata, min_genes=200)
 
     def time_filter_genes(self, *_):
-        rsc.pp.filter_genes(self.adata, min_cells=3)
+        rsc.pp.filter_genes(self.adata, qc_var="n_counts", min_count=3)
 
-    def peakmem_filter_genes(self, *_):
-        rsc.pp.filter_genes(self.adata, min_cells=3)
+    @track_peakmem
+    def track_peakmem_filter_genes(self, *_):
+        rsc.pp.filter_genes(self.adata, qc_var="n_counts", min_count=3)
 
     def time_normalize_total(self, *_):
         rsc.pp.normalize_total(self.adata, target_sum=1e4)
 
-    def peakmem_normalize_total(self, *_):
+    @track_peakmem
+    def track_peakmem_normalize_total(self, *_):
         rsc.pp.normalize_total(self.adata, target_sum=1e4)
 
     def time_log1p(self, *_):
         rsc.pp.log1p(self.adata)
 
-    def peakmem_time_log1p(self, *_):
+    @track_peakmem
+    def track_peakmem_time_log1p(self, *_):
         rsc.pp.log1p(self.adata)
 
     def time_pca(self, *_):
-        rsc.pp.pca(self.adata, svd_solver="arpack")
+        rsc.pp.pca(self.adata)
 
-    def peakmem_pca(self, *_):
-        rsc.pp.pca(self.adata, svd_solver="arpack")
+    @track_peakmem
+    def track_peakmem_pca(self, *_):
+        rsc.pp.pca(self.adata)
 
     def time_highly_variable_genes(self, *_):
         rsc.pp.highly_variable_genes(
             self.adata, min_mean=0.0125, max_mean=3, min_disp=0.5
         )
 
-    def peakmem_highly_variable_genes(self, *_):
+    @track_peakmem
+    def track_peakmem_highly_variable_genes(self, *_):
         rsc.pp.highly_variable_genes(
             self.adata, min_mean=0.0125, max_mean=3, min_disp=0.5
         )
@@ -73,17 +83,21 @@ def peakmem_highly_variable_genes(self, *_):
     def time_regress_out(self, *_):
         rsc.pp.regress_out(self.adata, ["n_counts", "percent_mito"])
 
-    def peakmem_regress_out(self, *_):
+    @track_peakmem
+    def track_peakmem_regress_out(self, *_):
         rsc.pp.regress_out(self.adata, ["n_counts", "percent_mito"])
 
     def time_scale(self, *_):
         rsc.pp.scale(self.adata, max_value=10)
 
-    def peakmem_scale(self, *_):
+    @track_peakmem
+    def track_peakmem_scale(self, *_):
         rsc.pp.scale(self.adata, max_value=10)
 
     def time_neighbors(self, *_):
-        rsc.pp.neighbors(self.adata, n_neighbors=15, n_pcs=100)
+        rsc.pp.neighbors(self.adata, n_neighbors=15, n_pcs=50)
+
+    @track_peakmem
+    def track_peakmem_neighbors(self, *_):
+        rsc.pp.neighbors(self.adata, n_neighbors=15, n_pcs=50)
 
-    def peakmem_neighbors(self, *_):
-        rsc.pp.neighbors(self.adata, n_neighbors=15, n_pcs=100)
diff --git a/benchmarks/benchmarks/readwrite.py b/benchmarks/benchmarks/readwrite.py
index 263bd50a..1f3900d1 100644
--- a/benchmarks/benchmarks/readwrite.py
+++ b/benchmarks/benchmarks/readwrite.py
@@ -22,43 +22,23 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass
-from typing import TYPE_CHECKING
-
-import anndata
 import scanpy as sc
 
 from rapids_singlecell.get import anndata_to_GPU
-
-if TYPE_CHECKING:
-    from collections.abc import Callable
-    from pathlib import Path
-
-import pathlib
-
-sc.settings.datasetdir = pathlib.Path(__file__).parent.resolve() / "data"
-
-@dataclass
-class Dataset:
-    path: Path
-    get: Callable[[], anndata.AnnData]
-
-path="/p/project/training2406/team_scverse/gold2/rapids_singlecell/benchmarks/data/pbmc3k_raw.h5ad"
-
+from .utils import track_peakmem
 
 class ToGPUSuite:
-    _data_dict = dict(pbmc3k=anndata.read_h5ad(path))
+    _data_dict = dict(obmc68k_reduced=sc.datasets.pbmc68k_reduced())
     params = _data_dict.keys()
     param_names = ["input_data"]
 
     def setup(self, input_data: str):
-        self.data = self._data_dict[input_data]
+        self.adata = self._data_dict[input_data]
 
     def time_to_gpu(self, *_):
-        anndata_to_GPU(self.data)
+        anndata_to_GPU(self.adata)
 
-    def peakmem_to_gpu(self, *_):
-        anndata_to_GPU(self.data)
+    @track_peakmem
+    def track_peakmem_to_gpu(self, *_):
+        anndata_to_GPU(self.adata)
 
-    def mem_to_gpu(self, *_):
-        anndata_to_GPU(self.data)
diff --git a/benchmarks/benchmarks/squidpy.py b/benchmarks/benchmarks/squidpy.py
index 134970b1..0500bb39 100644
--- a/benchmarks/benchmarks/squidpy.py
+++ b/benchmarks/benchmarks/squidpy.py
@@ -7,52 +7,58 @@
 
 from itertools import product
 
-import anndata as ad
+import scanpy as sc
 
 import rapids_singlecell as rsc
 
-import pathlib
+from .utils import track_peakmem
 
 class ToolsSuite:
     _data_dict = dict(
-        visium_sge=ad.read_h5ad("/p/project/training2406/team_scverse/gold2/rapids_singlecell/benchmarks/data/paul15.h5ad"),
+         pbmc68k_reduced=sc.datasets.pbmc68k_reduced(),
     )
     params = _data_dict.keys()
     param_names = ["input_data"]
 
     def setup(self, input_data):
-        self.adata = rsc.get.anndata_to_GPU(self._data_dict[input_data].copy(), copy=True)
+        self.cpu_adata = self._data_dict[input_data].copy()
+        self.gpu_adata = rsc.get.anndata_to_GPU(self.cpu_adata, copy=True)
 
     def time_ligrec(self, *_):
-        gene_ids = self.adata.var.index
+        gene_ids = self.cpu_adata.var.index
         interactions = tuple(product(gene_ids[:5], gene_ids[:5]))
         rsc.gr.ligrec(
-            self.adata,
-            "leiden",
+            self.cpu_adata,
+            "louvain",
             interactions=interactions,
             n_perms=5,
             use_raw=False,
         )
 
-    def peakmem_ligrec(self, *_):
-        gene_ids = self.adata.var.index
+    @track_peakmem
+    def track_peakmem_ligrec(self, *_):
+        gene_ids = self.cpu_adata.var.index
         interactions = tuple(product(gene_ids[:5], gene_ids[:5]))
         rsc.gr.ligrec(
-            self.adata,
-            "leiden",
+            self.cpu_adata,
+            "louvain",
             interactions=interactions,
             n_perms=5,
             use_raw=False,
         )
 
+
     def time_autocorr_moran(self, *_):
-        rsc.gr.spatial_autocorr(self.adata, mode="moran")
+        rsc.gr.spatial_autocorr(self.gpu_adata, mode="moran", connectivity_key="connectivities")
 
-    def peakmem_autocorr_moran(self, *_):
-        rsc.gr.spatial_autocorr(self.adata, mode="moran")
+    @track_peakmem
+    def track_peakmem_autocorr_moran(self, *_):
+        rsc.gr.spatial_autocorr(self.gpu_adata, mode="moran", connectivity_key="connectivities")
 
     def time_autocorr_geary(self, *_):
-        rsc.gr.spatial_autocorr(self.adata, mode="geary")
+        rsc.gr.spatial_autocorr(self.gpu_adata, mode="geary", connectivity_key="connectivities")
+
+    @track_peakmem
+    def track_peakmem_autocorr_geary(self, *_):
+        rsc.gr.spatial_autocorr(self.gpu_adata, mode="geary", connectivity_key="connectivities")
 
-    def peakmem_autocorr_geary(self, *_):
-        rsc.gr.spatial_autocorr(self.adata, mode="geary")
diff --git a/benchmarks/benchmarks/tools.py b/benchmarks/benchmarks/tools.py
index d0f780fa..2dbf1e68 100644
--- a/benchmarks/benchmarks/tools.py
+++ b/benchmarks/benchmarks/tools.py
@@ -9,7 +9,7 @@
 
 import rapids_singlecell as rsc
 
-import pathlib
+from .utils import track_peakmem
 
 
 class ToolsSuite:
@@ -21,28 +21,32 @@ class ToolsSuite:
 
     def setup(self, input_data):
         self.adata = rsc.get.anndata_to_GPU(self._data_dict[input_data].copy(), copy=True)
-        assert "X_pca" in self.adata.obsm
 
     def time_umap(self, *_):
         rsc.tl.umap(self.adata)
 
-    def peakmem_umap(self, *_):
+    @track_peakmem
+    def track_peakmem_umap(self, *_):
         rsc.tl.umap(self.adata)
 
     def time_diffmap(self, *_):
         rsc.tl.diffmap(self.adata)
 
-    def peakmem_diffmap(self, *_):
+    @track_peakmem
+    def track_peakmem_diffmap(self, *_):
         rsc.tl.diffmap(self.adata)
 
     def time_leiden(self, *_):
         rsc.tl.leiden(self.adata)
 
-    def peakmem_leiden(self, *_):
+    @track_peakmem
+    def track_peakmem_leiden(self, *_):
         rsc.tl.leiden(self.adata)
 
     def time_embedding_denity(self, *_):
         rsc.tl.embedding_density(self.adata, basis="umap")
 
-    def peakmem_embedding_denity(self, *_):
+    @track_peakmem
+    def track_peakmem_embedding_denity(self, *_):
         rsc.tl.embedding_density(self.adata, basis="umap")
+
diff --git a/benchmarks/benchmarks/utils.py b/benchmarks/benchmarks/utils.py
new file mode 100644
index 00000000..3245efcb
--- /dev/null
+++ b/benchmarks/benchmarks/utils.py
@@ -0,0 +1,74 @@
+# From https://github.com/rapidsai/benchmark/blob/570531ba4bc90c508245e943d2aaa11d68a24286/rapids_pytest_benchmark/rapids_pytest_benchmark/rmm_resource_analyzer.py#L29
+
+import os
+import csv
+import rmm
+import tempfile
+
+
+class RMMResourceAnalyzer:
+    """
+    Class to control enabling, disabling, & parsing RMM resource
+    logs.
+    """
+
+    def __init__(self, benchmark_name):
+        self.max_gpu_util = -1
+        self.max_gpu_mem_usage = 0
+        self.leaked_memory = 0
+        log_file_name = benchmark_name
+        self._log_file_prefix = os.path.join(tempfile.gettempdir(), log_file_name)
+
+    def enable_logging(self):
+        """
+        Enable RMM logging. RMM creates a CSV output file derived from
+        provided file name that looks like: log_file_prefix + ".devX", where
+        X is the GPU number.
+        """
+        rmm.enable_logging(log_file_name=self._log_file_prefix)
+
+    def disable_logging(self):
+        """
+        Disable RMM logging
+        """
+        log_output_files = rmm.get_log_filenames()
+        rmm.mr._flush_logs()
+        rmm.disable_logging()
+        # FIXME: potential improvement here would be to only parse the log files for
+        # the gpu ID that's passed in via --benchmark-gpu-device
+        self._parse_results(log_output_files)
+        for _, log_file in log_output_files.items():
+            os.remove(log_file)
+
+    def _parse_results(self, log_files):
+        """
+        Parse CSV results. CSV file has columns:
+        Thread,Time,Action,Pointer,Size,Stream
+        """
+        current_mem_usage = 0
+        for _, log_file in log_files.items():
+            with open(log_file, mode="r") as csv_file:
+                csv_reader = csv.DictReader(csv_file)
+                for row in csv_reader:
+                    row_action = row["Action"]
+                    row_size = int(row["Size"])
+
+                    if row_action == "allocate":
+                        current_mem_usage += row_size
+                        if current_mem_usage > self.max_gpu_mem_usage:
+                            self.max_gpu_mem_usage = current_mem_usage
+
+                    if row_action == "free":
+                        current_mem_usage -= row_size
+        self.leaked_memory = current_mem_usage
+
+def track_peakmem(fn):
+    from functools import wraps
+    @wraps(fn)
+    def wrapper(self, *args, **kwargs):
+        resource_analyzer = RMMResourceAnalyzer(benchmark_name=fn.__name__)
+        resource_analyzer.enable_logging()
+        fn(self, *args, **kwargs)
+        resource_analyzer.disable_logging()
+        return resource_analyzer.max_gpu_mem_usage
+    return wrapper
\ No newline at end of file

From 41d9227cf6db6f9a31433815213cf8f7b61c4bb7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 25 Apr 2024 09:43:53 +0000
Subject: [PATCH 10/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 benchmarks/benchmarks/preprocessing.py | 14 +++++---------
 benchmarks/benchmarks/readwrite.py     |  3 ++-
 benchmarks/benchmarks/squidpy.py       | 21 ++++++++++++++-------
 benchmarks/benchmarks/tools.py         |  5 +++--
 benchmarks/benchmarks/utils.py         | 13 +++++++++----
 5 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/benchmarks/benchmarks/preprocessing.py b/benchmarks/benchmarks/preprocessing.py
index 54681cfa..7d995902 100644
--- a/benchmarks/benchmarks/preprocessing.py
+++ b/benchmarks/benchmarks/preprocessing.py
@@ -18,20 +18,18 @@ class PreprocessingSuite:
     param_names = ["input_data"]
 
     def setup(self, input_data: str):
-        self.adata = rsc.get.anndata_to_GPU(self._data_dict[input_data].copy(), copy=True)
+        self.adata = rsc.get.anndata_to_GPU(
+            self._data_dict[input_data].copy(), copy=True
+        )
 
     def time_calculate_qc_metrics(self, *_):
         self.adata.var["mt"] = self.adata.var_names.str.startswith("MT-")
-        rsc.pp.calculate_qc_metrics(
-            self.adata, qc_vars=["mt"], log1p=False
-        )
+        rsc.pp.calculate_qc_metrics(self.adata, qc_vars=["mt"], log1p=False)
 
     @track_peakmem
     def track_peakmem_calculate_qc_metrics(self, *_):
         self.adata.var["mt"] = self.adata.var_names.str.startswith("MT-")
-        rsc.pp.calculate_qc_metrics(
-            self.adata, qc_vars=["mt"], log1p=False
-        )
+        rsc.pp.calculate_qc_metrics(self.adata, qc_vars=["mt"], log1p=False)
 
     def time_filter_cells(self, *_):
         rsc.pp.filter_cells(self.adata, qc_var="n_counts", min_count=200)
@@ -40,7 +38,6 @@ def time_filter_cells(self, *_):
     def track_peakmem_filter_cells(self, *_):
         rsc.pp.filter_cells(self.adata, qc_var="n_counts", min_count=200)
 
-
     def time_filter_genes(self, *_):
         rsc.pp.filter_genes(self.adata, qc_var="n_counts", min_count=3)
 
@@ -100,4 +97,3 @@ def time_neighbors(self, *_):
     @track_peakmem
     def track_peakmem_neighbors(self, *_):
         rsc.pp.neighbors(self.adata, n_neighbors=15, n_pcs=50)
-
diff --git a/benchmarks/benchmarks/readwrite.py b/benchmarks/benchmarks/readwrite.py
index 1f3900d1..113ac9ba 100644
--- a/benchmarks/benchmarks/readwrite.py
+++ b/benchmarks/benchmarks/readwrite.py
@@ -25,8 +25,10 @@
 import scanpy as sc
 
 from rapids_singlecell.get import anndata_to_GPU
+
 from .utils import track_peakmem
 
+
 class ToGPUSuite:
     _data_dict = dict(obmc68k_reduced=sc.datasets.pbmc68k_reduced())
     params = _data_dict.keys()
@@ -41,4 +43,3 @@ def time_to_gpu(self, *_):
     @track_peakmem
     def track_peakmem_to_gpu(self, *_):
         anndata_to_GPU(self.adata)
-
diff --git a/benchmarks/benchmarks/squidpy.py b/benchmarks/benchmarks/squidpy.py
index 0500bb39..b1ce6742 100644
--- a/benchmarks/benchmarks/squidpy.py
+++ b/benchmarks/benchmarks/squidpy.py
@@ -13,9 +13,10 @@
 
 from .utils import track_peakmem
 
+
 class ToolsSuite:
     _data_dict = dict(
-         pbmc68k_reduced=sc.datasets.pbmc68k_reduced(),
+        pbmc68k_reduced=sc.datasets.pbmc68k_reduced(),
     )
     params = _data_dict.keys()
     param_names = ["input_data"]
@@ -47,18 +48,24 @@ def track_peakmem_ligrec(self, *_):
             use_raw=False,
         )
 
-
     def time_autocorr_moran(self, *_):
-        rsc.gr.spatial_autocorr(self.gpu_adata, mode="moran", connectivity_key="connectivities")
+        rsc.gr.spatial_autocorr(
+            self.gpu_adata, mode="moran", connectivity_key="connectivities"
+        )
 
     @track_peakmem
     def track_peakmem_autocorr_moran(self, *_):
-        rsc.gr.spatial_autocorr(self.gpu_adata, mode="moran", connectivity_key="connectivities")
+        rsc.gr.spatial_autocorr(
+            self.gpu_adata, mode="moran", connectivity_key="connectivities"
+        )
 
     def time_autocorr_geary(self, *_):
-        rsc.gr.spatial_autocorr(self.gpu_adata, mode="geary", connectivity_key="connectivities")
+        rsc.gr.spatial_autocorr(
+            self.gpu_adata, mode="geary", connectivity_key="connectivities"
+        )
 
     @track_peakmem
     def track_peakmem_autocorr_geary(self, *_):
-        rsc.gr.spatial_autocorr(self.gpu_adata, mode="geary", connectivity_key="connectivities")
-
+        rsc.gr.spatial_autocorr(
+            self.gpu_adata, mode="geary", connectivity_key="connectivities"
+        )
diff --git a/benchmarks/benchmarks/tools.py b/benchmarks/benchmarks/tools.py
index 2dbf1e68..cf65b23a 100644
--- a/benchmarks/benchmarks/tools.py
+++ b/benchmarks/benchmarks/tools.py
@@ -20,7 +20,9 @@ class ToolsSuite:
     param_names = ["input_data"]
 
     def setup(self, input_data):
-        self.adata = rsc.get.anndata_to_GPU(self._data_dict[input_data].copy(), copy=True)
+        self.adata = rsc.get.anndata_to_GPU(
+            self._data_dict[input_data].copy(), copy=True
+        )
 
     def time_umap(self, *_):
         rsc.tl.umap(self.adata)
@@ -49,4 +51,3 @@ def time_embedding_denity(self, *_):
     @track_peakmem
     def track_peakmem_embedding_denity(self, *_):
         rsc.tl.embedding_density(self.adata, basis="umap")
-
diff --git a/benchmarks/benchmarks/utils.py b/benchmarks/benchmarks/utils.py
index 3245efcb..e0314959 100644
--- a/benchmarks/benchmarks/utils.py
+++ b/benchmarks/benchmarks/utils.py
@@ -1,10 +1,12 @@
 # From https://github.com/rapidsai/benchmark/blob/570531ba4bc90c508245e943d2aaa11d68a24286/rapids_pytest_benchmark/rapids_pytest_benchmark/rmm_resource_analyzer.py#L29
+from __future__ import annotations
 
-import os
 import csv
-import rmm
+import os
 import tempfile
 
+import rmm
+
 
 class RMMResourceAnalyzer:
     """
@@ -47,7 +49,7 @@ def _parse_results(self, log_files):
         """
         current_mem_usage = 0
         for _, log_file in log_files.items():
-            with open(log_file, mode="r") as csv_file:
+            with open(log_file) as csv_file:
                 csv_reader = csv.DictReader(csv_file)
                 for row in csv_reader:
                     row_action = row["Action"]
@@ -62,8 +64,10 @@ def _parse_results(self, log_files):
                         current_mem_usage -= row_size
         self.leaked_memory = current_mem_usage
 
+
 def track_peakmem(fn):
     from functools import wraps
+
     @wraps(fn)
     def wrapper(self, *args, **kwargs):
         resource_analyzer = RMMResourceAnalyzer(benchmark_name=fn.__name__)
@@ -71,4 +75,5 @@ def wrapper(self, *args, **kwargs):
         fn(self, *args, **kwargs)
         resource_analyzer.disable_logging()
         return resource_analyzer.max_gpu_mem_usage
-    return wrapper
\ No newline at end of file
+
+    return wrapper

From 8ffadbceb81bd21619486f78edd904db25b371ac Mon Sep 17 00:00:00 2001
From: ilan-gold <ilanbassgold@gmail.com>
Date: Mon, 29 Apr 2024 14:10:16 +0200
Subject: [PATCH 11/12] (fix): use conda for deps resolutions

---
 benchmarks/asv.conf.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
index 71d56a00..9c650e99 100644
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -38,7 +38,7 @@
     // If missing or the empty string, the tool will be automatically
     // determined by looking for tools on the PATH environment
     // variable.
-    "environment_type": "mamba",
+    "environment_type": "conda",
     // timeout in seconds for installing any dependencies in environment
     // defaults to 10 min
     //"install_timeout": 600,
@@ -172,4 +172,4 @@
     //    "some_benchmark": 0.01,     // Threshold of 1%
     //    "another_benchmark": 0.5,   // Threshold of 50%
     // },
-}
+}
\ No newline at end of file

From 4a657fce61f99e5daa822283d86fb4c549082b0d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 29 Apr 2024 12:10:39 +0000
Subject: [PATCH 12/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 benchmarks/asv.conf.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
index 9c650e99..871ca00c 100644
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -172,4 +172,4 @@
     //    "some_benchmark": 0.01,     // Threshold of 1%
     //    "another_benchmark": 0.5,   // Threshold of 50%
     // },
-}
\ No newline at end of file
+}