diff --git a/docs/content/reference/dataframes.md b/docs/content/reference/dataframes.md index ed8bb533a69a..469c25ef79a1 100644 --- a/docs/content/reference/dataframes.md +++ b/docs/content/reference/dataframes.md @@ -3,4 +3,43 @@ title: Dataframes order: 300 --- -Incoming. +Rerun, at its core, is a database. As such, you can always get your data back in the form of tables (also known as dataframes, or records, or batches...). + +This can be achieved in three different ways, depending on your needs: +* using the dataframe API, currently available in [Python](https://ref.rerun.io/docs/python/stable/common/dataframe/) and [Rust](https://docs.rs/rerun/latest/rerun/dataframe/index.html), +* using the [blueprint API](../concepts/blueprint) to configure a [dataframe view](types/views/dataframe_view) from code, +* or simply by setting up [dataframe view](types/views/dataframe_view) manually in the UI. + +This page is meant as a reference to get you up and running with these different solutions as quickly as possible. +For an in-depth introduction to the dataframe API and the possible workflows it enables, check out [our Getting Started guide](../getting-started/data-out) or one of the accompanying [How-Tos](../howto/dataframe-api). + + +> We'll need an RRD file to query. Either use one of yours, or grab some of the example ones, e.g.: +> ``` +> curl 'https://app.rerun.io/version/latest/examples/dna.rrd' -o - > /tmp/dna.rrd +> ``` + +### Using the dataframe API + +The following snippet demonstrates how to query the first 10 rows in a Rerun recording: + +snippet: reference/dataframe_query + +Check out the API reference to learn more about all the ways that data can be searched and filtered: +* [🐍 Python API reference](https://ref.rerun.io/docs/python/stable/common/dataframe/) +* [🐍 Python example](https://github.com/rerun-io/rerun/blob/c00a9f649fd4463f91620e8e2eac11355b245ac5/examples/python/dataframe_query/dataframe_query.py) +* [🦀 Rust API reference](https://docs.rs/crate/rerun/latest) +* [🦀 Rust example](https://github.com/rerun-io/rerun/blob/c00a9f649fd4463f91620e8e2eac11355b245ac5/examples/rust/dataframe_query/src/main.rs) + + +### Using the blueprint API to configure a dataframe view + +TODO(cmc): incoming. + +Check out the blueprint API reference to learn more about all the ways that data can be searched and filtered: +* [🐍 Python blueprint API reference](https://ref.rerun.io/docs/python/latest/common/blueprint_apis/) + + +### Setting up dataframe view manually in the UI + +TODO(cmc): incoming. diff --git a/docs/snippets/all/reference/dataframe_query.py b/docs/snippets/all/reference/dataframe_query.py new file mode 100644 index 000000000000..25597eb24e0d --- /dev/null +++ b/docs/snippets/all/reference/dataframe_query.py @@ -0,0 +1,18 @@ +"""Query and display the first 10 rows of a recording.""" + +import sys + +import rerun as rr + +path_to_rrd = sys.argv[1] + +recording = rr.dataframe.load_recording(path_to_rrd) +view = recording.view(index="log_time", contents="/**") +batches = view.select() + +for _ in range(10): + row = batches.read_next_batch() + if row is None: + break + # Each row is a `RecordBatch`, which can be easily passed around across different data ecosystems. + print(row) diff --git a/docs/snippets/all/reference/dataframe_query.rs b/docs/snippets/all/reference/dataframe_query.rs new file mode 100644 index 000000000000..ed3002500955 --- /dev/null +++ b/docs/snippets/all/reference/dataframe_query.rs @@ -0,0 +1,42 @@ +//! Query and display the first 10 rows of a recording. + +#![allow(clippy::unwrap_used)] + +use rerun::{ + dataframe::{QueryCache, QueryEngine, QueryExpression, SparseFillStrategy, Timeline}, + ChunkStore, ChunkStoreConfig, VersionPolicy, +}; + +fn main() -> Result<(), Box> { + let args = std::env::args().collect::>(); + + let path_to_rrd = &args[1]; + let timeline = Timeline::log_time(); + + let stores = ChunkStore::from_rrd_filepath( + &ChunkStoreConfig::DEFAULT, + path_to_rrd, + VersionPolicy::Warn, + )?; + let (_, store) = stores.first_key_value().unwrap(); + + let query_cache = QueryCache::new(store); + let query_engine = QueryEngine { + store, + cache: &query_cache, + }; + + let query = QueryExpression { + filtered_index: Some(timeline), + sparse_fill_strategy: SparseFillStrategy::LatestAtGlobal, + ..Default::default() + }; + + let query_handle = query_engine.query(query.clone()); + for row in query_handle.batch_iter().take(10) { + // Each row is a `RecordBatch`, which can be easily passed around across different data ecosystems. + println!("{row}"); + } + + Ok(()) +}