Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

index: don't store commit ids in sorted lookup table to save disk space #3087

Merged
merged 1 commit into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions lib/src/default_index/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,7 @@ impl MutableIndexSegment {
buf.extend_from_slice(entry.commit_id.as_bytes());
}

for (commit_id, LocalPosition(pos)) in &self.commit_lookup {
buf.extend_from_slice(commit_id.as_bytes());
for LocalPosition(pos) in self.commit_lookup.values() {
buf.extend(pos.to_le_bytes());
}

Expand Down
61 changes: 21 additions & 40 deletions lib/src/default_index/readonly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ impl ReadonlyIndexLoadError {
}

/// Current format version of the index segment file.
pub(crate) const INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 5;
pub(crate) const INDEX_SEGMENT_FILE_FORMAT_VERSION: u32 = 6;

/// If set, the value is stored in the overflow table.
pub(crate) const OVERFLOW_FLAG: u32 = 0x8000_0000;
Expand Down Expand Up @@ -133,33 +133,13 @@ impl CommitGraphEntry<'_> {
u32::from_le_bytes(self.data[12..16].try_into().unwrap())
}

fn commit_id(&self) -> CommitId {
CommitId::from_bytes(&self.data[16..])
}
}

struct CommitLookupEntry<'a> {
data: &'a [u8],
commit_id_length: usize,
}

impl CommitLookupEntry<'_> {
fn size(commit_id_length: usize) -> usize {
commit_id_length + 4
}

fn commit_id(&self) -> CommitId {
CommitId::from_bytes(self.commit_id_bytes())
}

// might be better to add borrowed version of CommitId
fn commit_id_bytes(&self) -> &[u8] {
&self.data[0..self.commit_id_length]
}

fn local_pos(&self) -> LocalPosition {
let pos = u32::from_le_bytes(self.data[self.commit_id_length..][..4].try_into().unwrap());
LocalPosition(pos)
&self.data[16..]
}
}

Expand Down Expand Up @@ -188,7 +168,6 @@ impl CommitLookupEntry<'_> {
/// u32: change id position in the sorted change ids table
/// <commit id length number of bytes>: commit id
/// for each entry, sorted by commit id:
/// <commit id length number of bytes>: commit id
/// u32: local position in the graph entries table
/// for each entry, sorted by change id:
/// <change id length number of bytes>: change id
Expand Down Expand Up @@ -324,8 +303,7 @@ impl ReadonlyIndexSegment {

let commit_graph_entry_size = CommitGraphEntry::size(commit_id_length);
let graph_size = (num_local_commits as usize) * commit_graph_entry_size;
let commit_lookup_entry_size = CommitLookupEntry::size(commit_id_length);
let commit_lookup_size = (num_local_commits as usize) * commit_lookup_entry_size;
let commit_lookup_size = (num_local_commits as usize) * 4;
let change_id_table_size = (num_local_change_ids as usize) * change_id_length;
let change_pos_table_size = (num_local_change_ids as usize) * 4;
let parent_overflow_size = (num_parent_overflow_entries as usize) * 4;
Expand Down Expand Up @@ -389,14 +367,10 @@ impl ReadonlyIndexSegment {
}
}

fn commit_lookup_entry(&self, lookup_pos: u32) -> CommitLookupEntry {
fn commit_lookup_pos(&self, lookup_pos: u32) -> LocalPosition {
let table = &self.data[self.commit_lookup_base..self.change_id_table_base];
let entry_size = CommitLookupEntry::size(self.commit_id_length);
let offset = (lookup_pos as usize) * entry_size;
CommitLookupEntry {
data: &table[offset..][..entry_size],
commit_id_length: self.commit_id_length,
}
let offset = (lookup_pos as usize) * 4;
LocalPosition(u32::from_le_bytes(table[offset..][..4].try_into().unwrap()))
}

fn change_lookup_id(&self, lookup_pos: u32) -> ChangeId {
Expand Down Expand Up @@ -438,7 +412,8 @@ impl ReadonlyIndexSegment {
/// Binary searches commit id by `prefix`. Returns the lookup position.
fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult {
binary_search_pos_by(self.num_local_commits, |pos| {
let entry = self.commit_lookup_entry(pos);
let local_pos = self.commit_lookup_pos(pos);
let entry = self.graph_entry(local_pos);
entry.commit_id_bytes().cmp(prefix)
})
}
Expand Down Expand Up @@ -470,24 +445,30 @@ impl IndexSegment for ReadonlyIndexSegment {
}

fn commit_id_to_pos(&self, commit_id: &CommitId) -> Option<LocalPosition> {
let lookup_pos = self
.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes())
.ok()?;
let entry = self.commit_lookup_entry(lookup_pos);
Some(entry.local_pos())
self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes())
.ok()
.map(|pos| self.commit_lookup_pos(pos))
}

fn resolve_neighbor_commit_ids(
&self,
commit_id: &CommitId,
) -> (Option<CommitId>, Option<CommitId>) {
self.commit_id_byte_prefix_to_lookup_pos(commit_id.as_bytes())
.map_neighbors(|pos| self.commit_lookup_entry(pos).commit_id())
.map_neighbors(|pos| {
let local_pos = self.commit_lookup_pos(pos);
let entry = self.graph_entry(local_pos);
entry.commit_id()
})
}

fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
self.commit_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes())
.prefix_matches(prefix, |pos| self.commit_lookup_entry(pos).commit_id())
.prefix_matches(prefix, |pos| {
let local_pos = self.commit_lookup_pos(pos);
let entry = self.graph_entry(local_pos);
entry.commit_id()
})
.map(|(id, _)| id)
}

Expand Down