diff --git a/.github/workflows/contrib_rerun_py.yml b/.github/workflows/contrib_rerun_py.yml index 323ac0974400..25cfc2978be7 100644 --- a/.github/workflows/contrib_rerun_py.yml +++ b/.github/workflows/contrib_rerun_py.yml @@ -61,11 +61,11 @@ jobs: # this stops `re_web_viewer_server/build.rs` from running RERUN_IS_PUBLISHING: true run: | - cargo build \ + pixi run cargo build \ --locked \ -p rerun-cli \ --no-default-features \ - --features native_viewer,web_viewer \ + --features release \ --release \ --target x86_64-unknown-linux-gnu diff --git a/.github/workflows/reusable_build_and_upload_rerun_cli.yml b/.github/workflows/reusable_build_and_upload_rerun_cli.yml index f3279346b271..2316e6686bc6 100644 --- a/.github/workflows/reusable_build_and_upload_rerun_cli.yml +++ b/.github/workflows/reusable_build_and_upload_rerun_cli.yml @@ -176,11 +176,11 @@ jobs: # this stops `re_web_viewer_server/build.rs` from running RERUN_IS_PUBLISHING: true run: | - cargo build \ + pixi run cargo build \ --locked \ -p rerun-cli \ --no-default-features \ - --features native_viewer,web_viewer \ + --features release \ --release \ --target ${{ needs.set-config.outputs.TARGET }} diff --git a/crates/store/re_chunk_store/src/dataframe.rs b/crates/store/re_chunk_store/src/dataframe.rs index 66f80a8ca1f0..9b5cc068144c 100644 --- a/crates/store/re_chunk_store/src/dataframe.rs +++ b/crates/store/re_chunk_store/src/dataframe.rs @@ -542,7 +542,7 @@ pub struct QueryExpression { /// Examples: `Some(Timeline("frame"))`, `None` (only static data). // // TODO(cmc): this has to be a selector otherwise this is a horrible UX. - pub filtered_index: Option, + pub filtered_index: Option, /// The range of index values used to filter out _rows_ from the view contents. /// @@ -589,7 +589,7 @@ pub struct QueryExpression { /// Example: `ComponentColumnSelector("rerun.components.Position3D")`. // // TODO(cmc): multi-pov support - pub filtered_point_of_view: Option, + pub filtered_is_not_null: Option, /// Specifies how null values should be filled in the returned dataframe. /// @@ -792,7 +792,7 @@ impl ChunkStore { filtered_index_range: _, filtered_index_values: _, using_index_values: _, - filtered_point_of_view: _, + filtered_is_not_null: _, sparse_fill_strategy: _, selection: _, } = query; diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs index 4e5d1fc66a91..c4223d123425 100644 --- a/crates/store/re_dataframe/src/query.rs +++ b/crates/store/re_dataframe/src/query.rs @@ -445,11 +445,7 @@ impl QueryHandle<'_> { query: &RangeQuery, view_contents: &[ColumnDescriptor], ) -> (Option, Vec>) { - let mut view_pov_chunks_idx = self - .query - .filtered_point_of_view - .as_ref() - .map(|_| usize::MAX); + let mut view_pov_chunks_idx = self.query.filtered_is_not_null.as_ref().map(|_| usize::MAX); let view_chunks = view_contents .iter() @@ -462,7 +458,7 @@ impl QueryHandle<'_> { .fetch_chunks(query, &column.entity_path, [column.component_name]) .unwrap_or_default(); - if let Some(pov) = self.query.filtered_point_of_view.as_ref() { + if let Some(pov) = self.query.filtered_is_not_null.as_ref() { if pov.entity_path == column.entity_path && column.component_name.matches(&pov.component_name) { @@ -1196,7 +1192,7 @@ mod tests { // * [x] filtered_index_values // * [x] view_contents // * [x] selection - // * [x] filtered_point_of_view + // * [x] filtered_is_not_null // * [x] sparse_fill_strategy // * [x] using_index_values // @@ -1551,7 +1547,7 @@ mod tests { } #[test] - fn filtered_point_of_view() -> anyhow::Result<()> { + fn filtered_is_not_null() -> anyhow::Result<()> { re_log::setup_logging(); let store = create_nasty_store()?; @@ -1569,7 +1565,7 @@ mod tests { { let query = QueryExpression { filtered_index, - filtered_point_of_view: Some(ComponentColumnSelector { + filtered_is_not_null: Some(ComponentColumnSelector { entity_path: "no/such/entity".into(), component_name: MyPoint::name().to_string(), }), @@ -1598,7 +1594,7 @@ mod tests { { let query = QueryExpression { filtered_index, - filtered_point_of_view: Some(ComponentColumnSelector { + filtered_is_not_null: Some(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: "AComponentColumnThatDoesntExist".into(), }), @@ -1627,7 +1623,7 @@ mod tests { { let query = QueryExpression { filtered_index, - filtered_point_of_view: Some(ComponentColumnSelector { + filtered_is_not_null: Some(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: MyPoint::name().to_string(), }), @@ -1666,7 +1662,7 @@ mod tests { { let query = QueryExpression { filtered_index, - filtered_point_of_view: Some(ComponentColumnSelector { + filtered_is_not_null: Some(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: MyColor::name().to_string(), }), @@ -2180,7 +2176,7 @@ mod tests { { let query = QueryExpression { filtered_index, - filtered_point_of_view: Some(ComponentColumnSelector { + filtered_is_not_null: Some(ComponentColumnSelector { entity_path: entity_path.clone(), component_name: MyPoint::name().to_string(), }), diff --git a/crates/store/re_video/Cargo.toml b/crates/store/re_video/Cargo.toml index f9eef572ba4e..02035987964c 100644 --- a/crates/store/re_video/Cargo.toml +++ b/crates/store/re_video/Cargo.toml @@ -30,9 +30,8 @@ av1 = ["dep:dav1d"] ## Enable faster native video decoding with assembly. ## You need to install [nasm](https://nasm.us/) to compile with this feature. -# TODO(#7671): this feature flag currently does nothing on Linux. nasm = [ - # The default feature set of our dav1d fork has asm enabled (except on Linux, see above) + # The default feature set of our dav1d fork has asm enabled "dav1d?/default", ] diff --git a/crates/store/re_video/src/decode/av1.rs b/crates/store/re_video/src/decode/av1.rs index 2409199753a9..0dc631ffe86a 100644 --- a/crates/store/re_video/src/decode/av1.rs +++ b/crates/store/re_video/src/decode/av1.rs @@ -37,14 +37,22 @@ impl SyncDav1dDecoder { pub fn new(debug_name: String) -> Result { re_tracing::profile_function!(); - // TODO(#7671): enable this warning again on Linux when the `nasm` feature actually does something - #[allow(clippy::overly_complex_bool_expr)] - if !cfg!(target_os = "linux") && !cfg!(feature = "nasm") { - re_log::warn_once!( - "NOTE: native AV1 video decoder is running extra slowly. \ - Speed it up by compiling Rerun with the `nasm` feature enabled. \ - You'll need to also install nasm: https://nasm.us/" - ); + if !cfg!(feature = "nasm") { + // The `nasm` feature makes AV1 decoding much faster. + // On Linux the difference is huge (~25x). + // On Windows, the difference was also pretty big (unsure how big). + // On an M3 Mac the difference is smalelr (2-3x), + // and ever without `nasm` emilk can play an 8k video at 2x speed. + + if cfg!(target_os = "macos") && cfg!(target_arch = "aarch64") { + re_log::warn_once!( + "The native AV1 video decoder is unnecessarily slow. \ + Speed it up by compiling Rerun with the `nasm` feature enabled." + ); + } else { + // Better to return an error than to be perceived as being slow + return Err(Error::Dav1dWithoutNasm); + } } // See https://videolan.videolan.me/dav1d/structDav1dSettings.html for settings docs diff --git a/crates/store/re_video/src/decode/mod.rs b/crates/store/re_video/src/decode/mod.rs index 5d46985e4aa7..071c892948fe 100644 --- a/crates/store/re_video/src/decode/mod.rs +++ b/crates/store/re_video/src/decode/mod.rs @@ -100,6 +100,11 @@ pub enum Error { #[cfg(not(target_arch = "wasm32"))] #[error("dav1d: {0}")] Dav1d(#[from] dav1d::Error), + + #[cfg(feature = "av1")] + #[cfg(not(target_arch = "wasm32"))] + #[error("To enabled native AV1 decoding, compile Rerun with the `nasm` feature enabled.")] + Dav1dWithoutNasm, } pub type Result = std::result::Result; diff --git a/crates/store/re_video/src/lib.rs b/crates/store/re_video/src/lib.rs index 30785cc7e997..b007518fb2f4 100644 --- a/crates/store/re_video/src/lib.rs +++ b/crates/store/re_video/src/lib.rs @@ -1,102 +1,27 @@ //! Video decoding library. +mod time; + pub mod decode; pub mod demux; -pub use decode::{Chunk, Frame, PixelFormat}; -pub use demux::{Config, Sample, VideoData, VideoLoadError}; pub use re_mp4::{TrackId, TrackKind}; -/// A value in time units. -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Time(i64); - -impl Time { - pub const ZERO: Self = Self(0); - pub const MAX: Self = Self(i64::MAX); - - /// Create a new value in _time units_. - /// - /// ⚠️ Don't use this for regular timestamps in seconds/milliseconds/etc., - /// use the proper constructors for those instead! - /// This only exists for cases where you already have a value expressed in time units, - /// such as those received from the `WebCodecs` APIs. - #[inline] - pub fn new(v: i64) -> Self { - Self(v) - } - - #[inline] - pub fn from_secs(v: f64, timescale: Timescale) -> Self { - Self((v * timescale.0 as f64).round() as i64) - } - - #[inline] - pub fn from_millis(v: f64, timescale: Timescale) -> Self { - Self::from_secs(v / 1e3, timescale) - } - - #[inline] - pub fn from_micros(v: f64, timescale: Timescale) -> Self { - Self::from_secs(v / 1e6, timescale) - } - - #[inline] - pub fn from_nanos(v: i64, timescale: Timescale) -> Self { - Self::from_secs(v as f64 / 1e9, timescale) - } - - /// Convert to a duration - #[inline] - pub fn duration(self, timescale: Timescale) -> std::time::Duration { - std::time::Duration::from_nanos(self.into_nanos(timescale) as _) - } - - #[inline] - pub fn into_secs(self, timescale: Timescale) -> f64 { - self.0 as f64 / timescale.0 as f64 - } +pub use self::{ + decode::{Chunk, Frame, PixelFormat}, + demux::{Config, Sample, VideoData, VideoLoadError}, + time::{Time, Timescale}, +}; - #[inline] - pub fn into_millis(self, timescale: Timescale) -> f64 { - self.into_secs(timescale) * 1e3 +/// Which features was this crate compiled with? +pub fn features() -> Vec<&'static str> { + // TODO(emilk): is there a helper crate for this? + let mut features = vec![]; + if cfg!(feature = "av1") { + features.push("av1"); } - - #[inline] - pub fn into_micros(self, timescale: Timescale) -> f64 { - self.into_secs(timescale) * 1e6 - } - - #[inline] - pub fn into_nanos(self, timescale: Timescale) -> i64 { - (self.into_secs(timescale) * 1e9).round() as i64 - } -} - -impl std::ops::Add for Time { - type Output = Self; - - #[inline] - fn add(self, rhs: Self) -> Self::Output { - Self(self.0.saturating_add(rhs.0)) - } -} - -impl std::ops::Sub for Time { - type Output = Self; - - #[inline] - fn sub(self, rhs: Self) -> Self::Output { - Self(self.0.saturating_sub(rhs.0)) - } -} - -/// The number of time units per second. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Timescale(u64); - -impl Timescale { - pub(crate) fn new(v: u64) -> Self { - Self(v) + if cfg!(feature = "nasm") { + features.push("nasm"); } + features } diff --git a/crates/store/re_video/src/time.rs b/crates/store/re_video/src/time.rs new file mode 100644 index 000000000000..fc0c36619d4b --- /dev/null +++ b/crates/store/re_video/src/time.rs @@ -0,0 +1,93 @@ +/// The number of time units per second. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Timescale(u64); + +impl Timescale { + pub(crate) fn new(v: u64) -> Self { + Self(v) + } +} + +/// A value in time units. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Time(pub i64); + +impl Time { + pub const ZERO: Self = Self(0); + pub const MAX: Self = Self(i64::MAX); + + /// Create a new value in _time units_. + /// + /// ⚠️ Don't use this for regular timestamps in seconds/milliseconds/etc., + /// use the proper constructors for those instead! + /// This only exists for cases where you already have a value expressed in time units, + /// such as those received from the `WebCodecs` APIs. + #[inline] + pub fn new(v: i64) -> Self { + Self(v) + } + + #[inline] + pub fn from_secs(v: f64, timescale: Timescale) -> Self { + Self((v * timescale.0 as f64).round() as i64) + } + + #[inline] + pub fn from_millis(v: f64, timescale: Timescale) -> Self { + Self::from_secs(v / 1e3, timescale) + } + + #[inline] + pub fn from_micros(v: f64, timescale: Timescale) -> Self { + Self::from_secs(v / 1e6, timescale) + } + + #[inline] + pub fn from_nanos(v: i64, timescale: Timescale) -> Self { + Self::from_secs(v as f64 / 1e9, timescale) + } + + /// Convert to a duration + #[inline] + pub fn duration(self, timescale: Timescale) -> std::time::Duration { + std::time::Duration::from_nanos(self.into_nanos(timescale) as _) + } + + #[inline] + pub fn into_secs(self, timescale: Timescale) -> f64 { + self.0 as f64 / timescale.0 as f64 + } + + #[inline] + pub fn into_millis(self, timescale: Timescale) -> f64 { + self.into_secs(timescale) * 1e3 + } + + #[inline] + pub fn into_micros(self, timescale: Timescale) -> f64 { + self.into_secs(timescale) * 1e6 + } + + #[inline] + pub fn into_nanos(self, timescale: Timescale) -> i64 { + (self.into_secs(timescale) * 1e9).round() as i64 + } +} + +impl std::ops::Add for Time { + type Output = Self; + + #[inline] + fn add(self, rhs: Self) -> Self::Output { + Self(self.0.saturating_add(rhs.0)) + } +} + +impl std::ops::Sub for Time { + type Output = Self; + + #[inline] + fn sub(self, rhs: Self) -> Self::Output { + Self(self.0.saturating_sub(rhs.0)) + } +} diff --git a/crates/top/rerun-cli/Cargo.toml b/crates/top/rerun-cli/Cargo.toml index 2cf4ac433a67..ce0c0f96bf98 100644 --- a/crates/top/rerun-cli/Cargo.toml +++ b/crates/top/rerun-cli/Cargo.toml @@ -37,13 +37,18 @@ path = "src/bin/rerun.rs" doc = false [features] -# The default is what the user gets when they call `cargo install rerun-cli --locked`, -# so wer have all the bells and wistles here +## The default is what the user gets when they call `cargo install rerun-cli --locked`, +## so we have all the bells and wistles here, except those that may require extra tools +## (like "nasm"). +## That is: `cargo install rerun-cli --locked` should work for _everyone_. default = ["native_viewer", "web_viewer"] +## The features we enable when we build the pre-built binaries during our releases. +## This may enable features that require extra build tools that not everyone heas. +release = ["default", "nasm"] + ## Enable faster native video decoding with assembly. ## You need to install [nasm](https://nasm.us/) to compile with this feature. -# TODO(#7671): this feature flag currently does nothing on linux. nasm = ["rerun/nasm"] ## Support spawning a native viewer. diff --git a/crates/top/rerun/Cargo.toml b/crates/top/rerun/Cargo.toml index fb7214aad752..cc3ae783161a 100644 --- a/crates/top/rerun/Cargo.toml +++ b/crates/top/rerun/Cargo.toml @@ -73,7 +73,6 @@ log = ["dep:env_logger", "dep:log"] ## Enable faster native video decoding with assembly. ## You need to install [nasm](https://nasm.us/) to compile with this feature. -# TODO(#7671): this feature flag currently does nothing on linux. nasm = ["re_video/nasm"] ## Support spawning a native viewer. diff --git a/crates/top/rerun/src/commands/entrypoint.rs b/crates/top/rerun/src/commands/entrypoint.rs index 71b47617c9fe..6fe9e16c5b05 100644 --- a/crates/top/rerun/src/commands/entrypoint.rs +++ b/crates/top/rerun/src/commands/entrypoint.rs @@ -553,6 +553,7 @@ where if args.version { println!("{build_info}"); + println!("Video features: {}", re_video::features().join(" ")); return Ok(0); } diff --git a/crates/viewer/re_space_view_dataframe/src/dataframe_ui.rs b/crates/viewer/re_space_view_dataframe/src/dataframe_ui.rs index 60c8fb775938..56459abf6754 100644 --- a/crates/viewer/re_space_view_dataframe/src/dataframe_ui.rs +++ b/crates/viewer/re_space_view_dataframe/src/dataframe_ui.rs @@ -49,7 +49,7 @@ pub(crate) fn dataframe_ui( // salt. let table_id_salt = egui::Id::new("__dataframe__") .with(&selected_columns) - .with(&query_handle.query().filtered_point_of_view); + .with(&query_handle.query().filtered_is_not_null); // For the row expansion cache, we invalidate more aggressively for now. let row_expansion_id_salt = egui::Id::new("__dataframe_row_exp__") @@ -250,9 +250,13 @@ impl<'a> egui_table::TableDelegate for DataframeTableDelegate<'a> { // … but not so far to the right that it doesn't fit. pos.x = pos.x.at_most(ui.max_rect().right() - galley.size().x); - ui.put( + let response = ui.put( egui::Rect::from_min_size(pos, galley.size()), - egui::Label::new(galley), + egui::Button::new(galley), + ); + self.ctx.select_hovered_on_click( + &response, + re_viewer_context::Item::from(entity_path.clone()), ); } } else if cell.row_nr == 1 { diff --git a/crates/viewer/re_space_view_dataframe/src/space_view_class.rs b/crates/viewer/re_space_view_dataframe/src/space_view_class.rs index 08aa1e04ce80..3539e297d060 100644 --- a/crates/viewer/re_space_view_dataframe/src/space_view_class.rs +++ b/crates/viewer/re_space_view_dataframe/src/space_view_class.rs @@ -147,7 +147,7 @@ mode sets the default time range to _everything_. You can override this in the s view_contents: Some(view_contents), filtered_index: Some(view_query.timeline(ctx)?), filtered_index_range: Some(view_query.filter_by_range()?), - filtered_point_of_view: view_query.filter_is_not_null()?, + filtered_is_not_null: view_query.filter_is_not_null()?, sparse_fill_strategy, selection: None, diff --git a/crates/viewer/re_viewer_context/src/item.rs b/crates/viewer/re_viewer_context/src/item.rs index de435ebc701b..620159727e31 100644 --- a/crates/viewer/re_viewer_context/src/item.rs +++ b/crates/viewer/re_viewer_context/src/item.rs @@ -65,6 +65,13 @@ impl From for Item { } } +impl From for Item { + #[inline] + fn from(entity_path: EntityPath) -> Self { + Self::InstancePath(InstancePath::from(entity_path)) + } +} + impl From for Item { #[inline] fn from(instance_path: InstancePath) -> Self { diff --git a/docs/content/howto.md b/docs/content/howto.md index 9b7e8da9890d..b54cd37c34de 100644 --- a/docs/content/howto.md +++ b/docs/content/howto.md @@ -16,3 +16,4 @@ Guides for using Rerun in more advanced ways. - [By logging custom data](howto/extend/custom-data.md) - [By implementing custom visualizations (Rust only)](howto/extend/extend-ui.md) - [Efficiently log time series data using `send_columns`](howto/send_columns.md) + - [Get data out from Rerun with code](howto/dataframe-api.md) diff --git a/docs/content/howto/dataframe-api.md b/docs/content/howto/dataframe-api.md new file mode 100644 index 000000000000..6a5d766e549e --- /dev/null +++ b/docs/content/howto/dataframe-api.md @@ -0,0 +1,228 @@ +--- +title: Get data out from Rerun with code +order: 1600 +--- + +Rerun comes with a Dataframe API, which enables getting data out of Rerun from code. This page provides an overview of the API, as well as recipes to load the data in popular packages such as [Pandas](https://pandas.pydata.org), [Polars](https://pola.rs), and [DuckDB](https://duckdb.org). + + + +## The dataframe API + +### Loading a recording + +A recording can be loaded from a RRD using the `load_recording()` function: + +```python +import rerun as rr + +recording = rr.dataframe.load_recording("/path/to/file.rrd") +``` + +Although RRD files generally contain a single recording, they may occasionally contain 2 or more. This can happen, for example, if the RRD includes a blueprint, which is stored as a recording that is separate from the data. + +For such RRD, the `load_archive()` function can be used: + + + +```python +import rerun as rr + +archive = rr.dataframe.load_archive("/pat/to/file.rrd") + +print(f"The archive contains {archive.num_recordings()} recordings.") + +for recording in archive.all_recordings(): + ... +``` + + +The overall content of the recording can be inspected using the `schema()` method: + +```python +schema = recording.schema() +schema.index_columns() # list of all index columns (timelines) +schema.component_columns() # list of all component columns +``` + + +### Creating a view + +The first step for getting data out of a recording is to create a view, which requires specifying an index column and what content to include. + +As of Rerun 0.19, views must have exactly one index column, which can be any of the recording timelines. +Each row of the view will correspond to a unique value of the index column. +If a row has a `null` in the returned index (time) column, it means that data was static. +In the future, it will be possible to have other kinds of column as index, and more than a single index column. + +The `contents` define which columns are included in the view and can be flexibly specified as entity expression, +optionally providing a corresponding list of components. + +These are all valid ways to specify view content: + +```python +# everything in the recording +view = recording.view(index="frame_nr", contents="/**") + +# everything in the recording, except the /world/robot subtree +view = recording.view(index="frame_nr", contents="/**\n- /world/robot/**") + +# all `Scalar` components in the recording +view = recording.view(index="frame_nr", contents={"/**": ["Scalar"]}) + +# some components in an entity subtree and a specific component +# of a specific entity +view = recording.view(index="frame_nr", contents={ + "/world/robot/**": ["Position3D", "Color"], + "/world/scene": ["Text"], +}) +``` + +### Filtering rows in a view + +A view has several APIs to further filter the rows it will return. + + + +**Filtering by time range** + +Rows may be filtered to keep only a given range of values from its index column: + +```python +# only keep rows for frames 0 to 10 +view = view.filter_range_sequence(0, 10) +``` + +This API exists for both temporal and sequence timeline, and for various units: +- `view.filter_range_sequence(start_frame, end_frame)` (takes `int` arguments) +- `view.filter_range_seconds(stat_second, end_second)` (takes `float` arguments) +- `view.filter_range_nanos(start_nano, end_nano)` (takes `int` arguments) + +(all ranges are including both start and end values) + +**Filtering by index value** + +Rows may be filtered to keep only those whose index corresponds to a specific set of value: + +```python +view = view.filter_index_values([0, 5, 10]) +``` + +Note that a precise match is required. +Since Rerun internally stores times as `int64`, this method is only available for integer arguments (nanos or sequence number). +Floating point seconds would risk false mismatch due to numerical conversion. + + +**Filtering by column not null** + +Rows where a specific column has null values may be filtered out using the `filter_is_not_null()` method. When using this method, only rows for which a logging event exist for the provided column are returned. + +```python +# only keep rows where a position is available for the robot +view = view.filter_is_not_null(rr.dataframe.ComponentColumnSelector("/world/robot", "Position3D")) +``` + +### Specifying rows + +Instead of filtering rows based on the existing data, it is possible to specify exactly which rows must be returned by the view using the `using_index_values()` method: + +```python +# resample the first second of data at every millisecond +view = view.using_index_values(range(0, 1_000_000, 1_000_0000_000)) +``` + +In this case, the view will return rows in multiples of 1e6 nanoseconds (i.e. for each millisecond) over a period of one second. +A precise match on the index value is required for data to be produced on the row. +For this reason, a floating point version of this method is not provided for this feature. + +Note that this feature is typically used in conjunction with `fill_latest_at()` (see next paragraph) to enable arbitrary resampling of the original data. + + +### Filling empty values with latest-at data + +By default, the rows returned by the view may be sparse and contain values only for the columns where a logging event actually occurred at the corresponding index value. +The view can optionally replace these empty cells using a latest-at query. This means that, for each such empty cell, the view traces back to find the last logged value and uses it instead. This is enabled by calling the `fill_latest_at()` method: + +```python +view = view.fill_latest_at() +``` + +### Reading the data + +Once the view is fully set up (possibly using the filtering features previously described), its content can be read using the `select()` method. This method optionally allows specifying which subset of columns should be produced: + + +```python +# select all columns +record_batches = view.select() + +# select only the specified columns +record_batches = view.select( + [ + rr.dataframe.IndexColumnSelector("frame_nr"), + rr.dataframe.ComponentColumnSelector("/world/robot", "Position3D"), + ], +) +``` + +The `select()` method returns a [`pyarrow.RecordBatchReader`](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html), which is essentially an iterator over a stream of [`pyarrow.RecordBatch`](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatch.html#pyarrow-recordbatch)es containing the actual data. See the [PyArrow documentation](https://arrow.apache.org/docs/python/index.html) for more information. + +For the rest of this page, we explore how these `RecordBatch`es can be ingested in some of the popular data science packages. + + +## Load data to a PyArrow `Table` + +The `RecordBatchReader` provides a [`read_all()`](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html#pyarrow.RecordBatchReader.read_all) method which directly produces a [`pyarrow.Table`](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table): + +```python +import rerun as rr + +recording = rr.dataframe.load_recording("/path/to/file.rrd") +view = recording.view(index="frame_nr", contents="/**") + +table = view.select().read_all() +``` + + +## Load data to a Pandas dataframe + +The `RecordBatchReader` provides a [`read_pandas()`](https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html#pyarrow.RecordBatchReader.read_pandas) method which returns a [Pandas dataframe](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html): + + +```python +import rerun as rr + +recording = rr.dataframe.load_recording("/path/to/file.rrd") +view = recording.view(index="frame_nr", contents="/**") + +df = view.select().read_pandas() +``` + +## Load data to a Polars dataframe + +A [Polars dataframe](https://docs.pola.rs/api/python/stable/reference/dataframe/index.html) can be created from a PyArrow table: + +```python +import rerun as rr +import polars as pl + +recording = rr.dataframe.load_recording("/path/to/file.rrd") +view = recording.view(index="frame_nr", contents="/**") + +df = pl.from_arrow(view.select().read_all()) +``` + + +## Load data to a DuckDB relation + +A [DuckDB](https://duckdb.org) relation can be created directly using the `pyarrow.RecordBatchReader` returned by `select()`: + +```python +import rerun as rr +import duckdb + +recording = rr.dataframe.load_recording("/path/to/file.rrd") +view = recording.view(index="frame_nr", contents="/**") + +rel = duckdb.arrow(view.select()) +``` diff --git a/rerun_py/Cargo.toml b/rerun_py/Cargo.toml index 992d5b5f5977..192c5f229b1e 100644 --- a/rerun_py/Cargo.toml +++ b/rerun_py/Cargo.toml @@ -30,7 +30,6 @@ extension-module = ["pyo3/extension-module"] ## Enable faster native video decoding with assembly. ## You need to install [nasm](https://nasm.us/) to compile with this feature. -# TODO(#7671): this feature flag currently does nothing on linux. nasm = ["re_video/nasm"] ## Support serving a web viewer over HTTP with `serve()`. diff --git a/rerun_py/rerun_bindings/rerun_bindings.pyi b/rerun_py/rerun_bindings/rerun_bindings.pyi index 11e1f61423e6..0dc62e0fc11f 100644 --- a/rerun_py/rerun_bindings/rerun_bindings.pyi +++ b/rerun_py/rerun_bindings/rerun_bindings.pyi @@ -43,15 +43,27 @@ class RecordingView: """ def filter_range_sequence(self, start: int, end: int) -> RecordingView: - """Filter the view to only include data between the given index sequence numbers.""" + """ + Filter the view to only include data between the given index sequence numbers. + + This is including both the value at the start and the value at the end. + """ ... def filter_range_seconds(self, start: float, end: float) -> RecordingView: - """Filter the view to only include data between the given index time values.""" + """ + Filter the view to only include data between the given index time values. + + This is including both the value at the start and the value at the end. + """ ... def filter_range_nanos(self, start: int, end: int) -> RecordingView: - """Filter the view to only include data between the given index time values.""" + """ + Filter the view to only include data between the given index time values. + + This is including both the value at the start and the value at the end. + """ ... def filter_index_values(self, values: IndexValuesLike) -> RecordingView: diff --git a/rerun_py/src/dataframe.rs b/rerun_py/src/dataframe.rs index 32a0aedd8c12..efb818100a45 100644 --- a/rerun_py/src/dataframe.rs +++ b/rerun_py/src/dataframe.rs @@ -570,7 +570,7 @@ impl PyRecordingView { let column = column.into_selector(); let mut query_expression = self.query_expression.clone(); - query_expression.filtered_point_of_view = Some(column); + query_expression.filtered_is_not_null = Some(column); Self { recording: self.recording.clone(), @@ -743,7 +743,7 @@ impl PyRecording { filtered_index_range: None, filtered_index_values: None, using_index_values: None, - filtered_point_of_view: None, + filtered_is_not_null: None, sparse_fill_strategy: SparseFillStrategy::None, selection: None, }; diff --git a/scripts/lint.py b/scripts/lint.py index ce9a83ec8392..ece47a151c5f 100755 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -3,7 +3,8 @@ """ Runs custom linting on our code. -Adding "NOLINT" to any line makes the linter ignore that line. +Adding "NOLINT" to any line makes the linter ignore that line. Adding a pair of "NOLINT_START" and "NOLINT_END" makes +the linter ignore these lines, as well as all lines in between. """ from __future__ import annotations @@ -687,8 +688,10 @@ def lint_workspace_lints(cargo_file_content: str) -> str | None: "ML", "Numpy", "nuScenes", - "Pixi", + "Pandas", "PDF", + "Pixi", + "Polars", "Python", "Q1", "Q2", @@ -849,7 +852,7 @@ def fix_enforced_upper_case(s: str) -> str: return "".join(new_words) -def lint_markdown(filepath: str, lines_in: list[str]) -> tuple[list[str], list[str]]: +def lint_markdown(filepath: str, source: SourceFile) -> tuple[list[str], list[str]]: """Only for .md files.""" errors = [] @@ -863,12 +866,12 @@ def lint_markdown(filepath: str, lines_in: list[str]) -> tuple[list[str], list[s in_code_of_conduct = filepath.endswith("CODE_OF_CONDUCT.md") if in_code_of_conduct: - return errors, lines_in + return errors, source.lines in_code_block = False in_frontmatter = False in_metadata = False - for line_nr, line in enumerate(lines_in): + for line_nr, line in enumerate(source.lines): line_nr = line_nr + 1 if line.strip().startswith("```"): @@ -881,7 +884,7 @@ def lint_markdown(filepath: str, lines_in: list[str]) -> tuple[list[str], list[s if in_metadata and line.startswith("-->"): in_metadata = False - if not in_code_block: + if not in_code_block and not source.should_ignore(line_nr): if not in_metadata: # Check the casing on markdown headers if m := re.match(r"(\#+ )(.*)", line): @@ -973,7 +976,19 @@ def _update_content(self) -> None: self.content = "".join(self.lines) # gather lines with a `NOLINT` marker - self.no_lints = {i for i, line in enumerate(self.lines) if "NOLINT" in line} + self.nolints = set() + is_in_nolint_block = False + for i, line in enumerate(self.lines): + if "NOLINT" in line: + self.nolints.add(i) + + if "NOLINT_START" in line: + is_in_nolint_block = True + + if is_in_nolint_block: + self.nolints.add(i) + if "NOLINT_END" in line: + is_in_nolint_block = False def rewrite(self, new_lines: list[str]) -> None: """Rewrite the contents of the file.""" @@ -993,7 +1008,7 @@ def should_ignore(self, from_line: int, to_line: int | None = None) -> bool: if to_line is None: to_line = from_line - return any(i in self.no_lints for i in range(from_line - 1, to_line + 1)) + return any(i in self.nolints for i in range(from_line - 1, to_line + 1)) def should_ignore_index(self, start_idx: int, end_idx: int | None = None) -> bool: """Same as `should_ignore` but takes 0-based indices instead of line numbers.""" @@ -1022,6 +1037,9 @@ def lint_file(filepath: str, args: Any) -> int: prev_line = None for line_nr, line in enumerate(source.lines): + if source.should_ignore(line_nr): + continue + if line == "" or line[-1] != "\n": error = "Missing newline at end of file" else: @@ -1049,7 +1067,7 @@ def lint_file(filepath: str, args: Any) -> int: source.rewrite(lines_out) if filepath.endswith(".md"): - errors, lines_out = lint_markdown(filepath, source.lines) + errors, lines_out = lint_markdown(filepath, source) for error in errors: print(source.error(error))