Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Add n_observations function to read n_obs from AnnData and tiledbsoma #2097

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lamindb/core/storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from lamindb_setup.core.upath import LocalPathClasses, UPath, infer_filesystem

from ._backed_access import AnnDataAccessor, BackedAccessor
from ._observations import n_observations
from ._tiledbsoma import save_tiledbsoma_experiment
from ._valid_suffixes import VALID_SUFFIXES
from .objects import infer_suffix, write_to_disk
Expand Down
60 changes: 60 additions & 0 deletions lamindb/core/storage/_observations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import h5py
from anndata._io.specs.registry import get_spec
from lamindb_setup.core.upath import LocalPathClasses, UPath, create_mapper

from ._tiledbsoma import _open_tiledbsoma


def _X_n_obs(X):
if "shape" in X.attrs:
return X.attrs["shape"][0]

Check warning on line 14 in lamindb/core/storage/_observations.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/storage/_observations.py#L14

Added line #L14 was not covered by tests
else:
return X.shape[0]


def n_observations(storepath: UPath) -> int | None:
if storepath.is_file():
if storepath.suffix != ".h5ad":
return None

Check warning on line 22 in lamindb/core/storage/_observations.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/storage/_observations.py#L22

Added line #L22 was not covered by tests
with storepath.open(mode="rb") as open_obj:
with h5py.File(open_obj, mode="r") as storage:
return _X_n_obs(storage["X"])
else:
zarr_meta = {".zarray", ".zgroup"}
tdbsoma_meta = {"__tiledb_group.tdb", "__group", "__meta"}
is_zarr = False
is_tdbsoma = False
for path in storepath.iterdir():
path_name = path.name
if path_name in zarr_meta:
is_zarr = True
break
elif path_name in tdbsoma_meta:
is_tdbsoma = True
break
if is_zarr:
try:
import zarr
except ImportError:
return None

Check warning on line 43 in lamindb/core/storage/_observations.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/storage/_observations.py#L42-L43

Added lines #L42 - L43 were not covered by tests
storepath_str = storepath.as_posix()
if isinstance(storepath, LocalPathClasses):
open_obj = storepath_str
else:
open_obj = create_mapper(storepath.fs, storepath_str, check=True)

Check warning on line 48 in lamindb/core/storage/_observations.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/storage/_observations.py#L48

Added line #L48 was not covered by tests
storage = zarr.open(open_obj, mode="r")
if get_spec(storage).encoding_type != "anndata":
return None

Check warning on line 51 in lamindb/core/storage/_observations.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/storage/_observations.py#L51

Added line #L51 was not covered by tests
return _X_n_obs(storage["X"])
elif is_tdbsoma:
try:
with _open_tiledbsoma(storepath, mode="r") as storage:
if "obs" in storage.keys():
return len(storage["obs"])
except ImportError:
return None
return None

Check warning on line 60 in lamindb/core/storage/_observations.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/storage/_observations.py#L58-L60

Added lines #L58 - L60 were not covered by tests
6 changes: 6 additions & 0 deletions tests/storage/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import tiledbsoma.io
import zarr
from lamindb.core.loaders import load_h5ad
from lamindb.core.storage import n_observations
from lamindb.core.storage._backed_access import (
AnnDataAccessor,
BackedAccessor,
Expand Down Expand Up @@ -50,6 +51,8 @@ def bad_adata_path():
def test_anndata_io():
test_file = ln.core.datasets.anndata_file_pbmc68k_test()

assert n_observations(test_file) == 30

adata = load_h5ad(test_file)

def callback(*args, **kwargs):
Expand All @@ -58,6 +61,8 @@ def callback(*args, **kwargs):
zarr_path = test_file.with_suffix(".zarr")
write_adata_zarr(adata, zarr_path, callback)

assert n_observations(zarr_path) == 30

adata = load_anndata_zarr(zarr_path)

assert adata.shape == (30, 200)
Expand Down Expand Up @@ -263,6 +268,7 @@ def test_write_read_tiledbsoma(storage):
assert artifact_soma._key_is_virtual
assert artifact_soma._accessor == "tiledbsoma"
assert artifact_soma.n_observations == adata.n_obs
assert n_observations(artifact_soma.path) == adata.n_obs

with artifact_soma.open() as store: # mode="r" by default
assert isinstance(store, tiledbsoma.Experiment)
Expand Down
Loading