Skip to content

Commit

Permalink
update GC accordingly
Browse files Browse the repository at this point in the history
  • Loading branch information
teh-cmc committed Nov 27, 2023
1 parent 7ecc49a commit 6828177
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 24 deletions.
2 changes: 0 additions & 2 deletions crates/re_arrow_store/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -561,8 +561,6 @@ impl Default for IndexedBucketInner {
/// ```text
/// cargo test -p re_arrow_store -- --nocapture datastore_internal_repr
/// ```
//
// TODO(#1807): timeless should be row-id ordered too then
#[derive(Debug)]
pub struct PersistentIndexedTable {
/// The entity this table is related to, for debugging purposes.
Expand Down
35 changes: 13 additions & 22 deletions crates/re_arrow_store/src/store_gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,6 @@ impl DataStore {
}

/// For each `EntityPath`, `Timeline`, `Component` find the N latest [`RowId`]s.
///
/// These are the rows that must be protected so as not to impact a latest-at query.
/// Note that latest for Timeless is currently based on insertion-order rather than
/// tuid. [See: #1807](https://github.com/rerun-io/rerun/issues/1807)
//
// TODO(jleibs): More complex functionality might required expanding this to also
// *ignore* specific entities, components, timelines, etc. for this protection.
Expand Down Expand Up @@ -366,7 +362,6 @@ impl DataStore {
}

// Find all protected rows in timeless tables
// TODO(#1807): this is still based on insertion order.
for table in self.timeless_tables.values() {
let cluster_key = table.cluster_key;
let table = table.inner.read();
Expand Down Expand Up @@ -693,43 +688,39 @@ impl PersistentIndexedTable {
cluster_key: _,
inner,
} = self;

let inner = &mut *inner.write();
inner.sort();

let PersistentIndexedTableInner {
col_insert_id,
col_row_id,
col_num_instances,
columns,
is_sorted: _,
} = &mut *inner.write();
is_sorted,
} = inner;

let mut diff: Option<StoreDiff> = None;

// TODO(#1807): Timeless data isn't sorted, so we need to do a full scan here.
// Speed this up when we implement #1807.
if let Some(row_index) = col_row_id
.iter()
.enumerate()
.find(|(_, r)| **r == row_id)
.map(|(index, _)| index)
{
if let Ok(row_index) = col_row_id.binary_search(&row_id) {
*is_sorted = row_index.saturating_add(1) == col_row_id.len();

// col_row_id
// TODO(jleibs) Use swap_remove once we have a notion of sorted
let removed_row_id = col_row_id.remove(row_index);
let removed_row_id = col_row_id.swap_remove(row_index);
debug_assert_eq!(row_id, removed_row_id);
dropped_num_bytes += removed_row_id.total_size_bytes();

// col_insert_id (if present)
if !col_insert_id.is_empty() {
// TODO(jleibs) Use swap_remove once we have a notion of sorted
dropped_num_bytes += col_insert_id.remove(row_index).total_size_bytes();
dropped_num_bytes += col_insert_id.swap_remove(row_index).total_size_bytes();
}

// col_num_instances
// TODO(jleibs) Use swap_remove once we have a notion of sorted
dropped_num_bytes += col_num_instances.remove(row_index).total_size_bytes();
dropped_num_bytes += col_num_instances.swap_remove(row_index).total_size_bytes();

// each data column
for column in columns.values_mut() {
let cell = column.0.remove(row_index);
let cell = column.0.swap_remove(row_index);

// TODO(#1809): once datatype deduplication is in, we should really not count
// autogenerated keys as part of the memory stats (same on write path).
Expand Down

0 comments on commit 6828177

Please sign in to comment.