Skip to content

Commit

Permalink
Make it possible to log custom components using rr.send_columns (#8163
Browse files Browse the repository at this point in the history
)

### What

This PR primarily overhauls `rr.AnyBatchValue()`, a wrapper around
pyarrow to package any data to what looks like a serialised custom
component:
- It publicly expose it, including in the python reference
documentation.¨
- It makes it compatible with `send_columns`.
- It add the `.partition()` method.

Also in this PR:
- Makes `rr.AnyValues()` compatible with `send_columns` (you need to
call `values.as_component_batch()` though)
- Add a documentation section to explain all of the above, with a couple
of snippets

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested the web demo (if applicable):
* Using examples from latest `main` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/8163?manifest_url=https://app.rerun.io/version/main/examples_manifest.json)
* Using full set of examples from `nightly` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/8163?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG
* [x] If applicable, add a new check to the [release
checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)!
* [x] If have noted any breaking changes to the log API in
`CHANGELOG.md` and the migration guide

- [PR Build Summary](https://build.rerun.io/pr/8163)
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)

To run all checks from `main`, comment on the PR with `@rerun-bot
full-check`.

To deploy documentation changes immediately after merging this PR, add
the `deploy docs` label.
  • Loading branch information
abey79 authored Nov 22, 2024
1 parent 325ee0b commit 8992f8f
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 3 deletions.
13 changes: 12 additions & 1 deletion docs/content/howto/logging/send-columns.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,18 @@ snippet: archetypes/image_send_columns


### Using `send_columns` for logging points
Each row the in the component column can be a batch of data, e.g. a batch of positions.
Each row in the component column can be a batch of data, e.g. a batch of positions.
This lets you log the evolution of a point cloud over time efficiently.

snippet: archetypes/points3d_send_columns.py

### Using `send_columns` for logging custom components

An entire batch of a custom component can be logged at once using [`rr.AnyBatchValue`](https://ref.rerun.io/docs/python/0.20.0/common/custom_data/#rerun.AnyBatchValue?speculative-link) along with `send_column`:

snippet: howto/any_batch_value_send_columns

The [`rr.AnyValues`](https://ref.rerun.io/docs/python/0.20.0/common/custom_data/#rerun.AnyValues) class can also be used to log multiple components at a time.
It does not support partitioning, so each component batch and the timeline must hold the same number of elements.

snippet: howto/any_values_send_columns
24 changes: 24 additions & 0 deletions docs/snippets/all/howto/any_batch_value_send_columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Use `AnyBatchValue` and `send_column` to send an entire column of custom data to Rerun."""

from __future__ import annotations

import numpy as np
import rerun as rr

rr.init("rerun_example_any_batch_value_send_columns", spawn=True)

N = 64
timestamps = np.arange(0, N)
one_per_timestamp = np.sin(timestamps / 10.0)
ten_per_timestamp = np.cos(np.arange(0, N * 10) / 100.0)

rr.send_columns(
"/",
times=[rr.TimeSequenceColumn("step", timestamps)],
components=[
# log one value per timestamp
rr.AnyBatchValue("custom_component_single", one_per_timestamp),
# log ten values per timestamp
rr.AnyBatchValue("custom_component_multi", ten_per_timestamp).partition([10] * N),
],
)
22 changes: 22 additions & 0 deletions docs/snippets/all/howto/any_values_send_columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Use `AnyValues` and `send_column` to send entire columns of custom data to Rerun."""

from __future__ import annotations

import numpy as np
import rerun as rr

rr.init("rerun_example_any_values_send_columns", spawn=True)

timestamps = np.arange(0, 64)

# Log two components, named "sin" and "cos", with the corresponding values
values = rr.AnyValues(
sin=np.sin(timestamps / 10.0),
cos=np.cos(timestamps / 10.0),
)

rr.send_columns(
"/",
times=[rr.TimeSequenceColumn("step", timestamps)],
components=values.as_component_batches(),
)
8 changes: 8 additions & 0 deletions docs/snippets/snippets.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ views = [
"archetypes/points3d_send_columns" = [
"rust", # Doesn't support partitioned component batches yet.
]
"howto/any_batch_value_send_columns" = [
"cpp", # Not implemented
"rust", # Not implemented
]
"howto/any_values_send_columns" = [
"cpp", # Not implemented
"rust", # Not implemented
]
"migration/log_line" = [ # Not a complete example -- just a single log line
"cpp",
"rust",
Expand Down
5 changes: 4 additions & 1 deletion rerun_py/docs/gen_common_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,10 @@ class Section:
),
Section(
title="Custom Data",
class_list=["AnyValues"],
class_list=[
"AnyValues",
"AnyBatchValue",
],
),
################################################################################
# These are tables but don't need their own pages since they refer to types that
Expand Down
1 change: 1 addition & 0 deletions rerun_py/rerun_sdk/rerun/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
send_columns as send_columns,
)
from .any_value import (
AnyBatchValue as AnyBatchValue,
AnyValues as AnyValues,
)
from .archetypes import (
Expand Down
8 changes: 7 additions & 1 deletion rerun_py/rerun_sdk/rerun/_send_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from ._baseclasses import Archetype, ComponentBatchMixin, ComponentColumn
from ._log import IndicatorComponentBatch
from .any_value import AnyBatchValue
from .error_utils import catch_and_log_exceptions
from .recording_stream import RecordingStream

Expand Down Expand Up @@ -120,7 +121,7 @@ def as_arrow_array(self) -> pa.Array:
def send_columns(
entity_path: str,
times: Iterable[TimeColumnLike],
components: Iterable[Union[ComponentBatchMixin, ComponentColumn]],
components: Iterable[Union[ComponentBatchMixin, ComponentColumn, AnyBatchValue]],
recording: RecordingStream | None = None,
strict: bool | None = None,
) -> None:
Expand Down Expand Up @@ -227,6 +228,11 @@ def send_columns(
component_column = c
elif isinstance(c, ComponentBatchMixin):
component_column = c.partition([1] * len(c)) # type: ignore[arg-type]
elif isinstance(c, AnyBatchValue):
array = c.as_arrow_array()
if array is None:
raise ValueError(f"Expected a non-null value for component: {component_name}")
component_column = c.partition([1] * len(c.as_arrow_array())) # type: ignore[arg-type]
else:
raise TypeError(
f"Expected either a type that implements the `ComponentMixin` or a `ComponentColumn`, got: {type(c)}"
Expand Down
21 changes: 21 additions & 0 deletions rerun_py/rerun_sdk/rerun/any_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
from typing import Any, Iterable

import numpy as np
import numpy.typing as npt
import pyarrow as pa

from . import ComponentColumn
from ._log import AsComponents, ComponentBatchLike
from .error_utils import catch_and_log_exceptions

Expand Down Expand Up @@ -92,6 +94,25 @@ def component_name(self) -> str:
def as_arrow_array(self) -> pa.Array | None:
return self.pa_array

def partition(self, lengths: npt.ArrayLike) -> ComponentColumn:
"""
Partitions the component into multiple sub-batches. This wraps the inner arrow
array in a `pyarrow.ListArray` where the different lists have the lengths specified.
Lengths must sum to the total length of the component batch.
Parameters
----------
lengths : npt.ArrayLike
The offsets to partition the component at.
Returns
-------
The partitioned component.
""" # noqa: D205
return ComponentColumn(self, lengths)


class AnyValues(AsComponents):
"""
Expand Down

0 comments on commit 8992f8f

Please sign in to comment.