Skip to content

Commit

Permalink
lib: memory optimization of RepoPathComponent
Browse files Browse the repository at this point in the history
Summary: As suggested by Yuya. This is actually a huge win in heap allocations
for repositories with a lot of files; according to the bucket statistics from
mimalloc, on gecko-dev, a repo of ~350,000 files, this reduces the number of
small alloctions (8, 16, or 32 byte size class) from nearly ~4 million to just
over ~200k, a 20x allocation reduction, with a total peak heap size reduction of
over 50MiB.

Signed-off-by: Austin Seipp <[email protected]>
Change-Id: I4014145d740367ecec5be32c6b9e2de8
  • Loading branch information
thoughtpolice committed Nov 2, 2023
1 parent 1629e40 commit f338727
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 5 deletions.
35 changes: 35 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ chrono = { version = "0.4.31", default-features = false, features = [
"clock",
] }
config = { version = "0.13.2", default-features = false, features = ["toml"] }
compact_str = "0.7.1"
criterion = "0.5.1"
crossterm = { version = "0.26", default-features = false }
digest = "0.10.7"
Expand Down
3 changes: 2 additions & 1 deletion lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ harness = false
version_check = { workspace = true }

[dependencies]
async-trait = { workspace = true}
async-trait = { workspace = true }
backoff = { workspace = true }
blake2 = { workspace = true }
byteorder = { workspace = true }
bytes = { workspace = true }
chrono = { workspace = true }
config = { workspace = true }
compact_str = { workspace = true }
digest = { workspace = true }
futures = { workspace = true }
either = { workspace = true }
Expand Down
6 changes: 6 additions & 0 deletions lib/src/content_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ impl ContentHash for String {
}
}

impl ContentHash for compact_str::CompactString {
fn hash(&self, state: &mut impl digest::Update) {
self.as_bytes().hash(state);
}
}

impl<T: ContentHash> ContentHash for Option<T> {
fn hash(&self, state: &mut impl digest::Update) {
match self {
Expand Down
11 changes: 7 additions & 4 deletions lib/src/repo_path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
use std::fmt::{Debug, Error, Formatter};
use std::path::{Component, Path, PathBuf};

use compact_str::CompactString;
use itertools::Itertools;
use thiserror::Error;

Expand All @@ -25,7 +26,7 @@ use crate::file_util;
content_hash! {
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
pub struct RepoPathComponent {
value: String,
value: CompactString,
}
}

Expand All @@ -48,7 +49,9 @@ impl From<&str> for RepoPathComponent {
impl From<String> for RepoPathComponent {
fn from(value: String) -> Self {
assert!(!value.contains('/'));
RepoPathComponent { value }
RepoPathComponent {
value: value.into(),
}
}
}

Expand Down Expand Up @@ -76,7 +79,7 @@ impl RepoPath {
let components = value
.split('/')
.map(|value| RepoPathComponent {
value: value.to_string(),
value: value.into(),
})
.collect();
RepoPath { components }
Expand Down Expand Up @@ -141,7 +144,7 @@ impl RepoPath {
let repo_path_len: usize = self.components.iter().map(|x| x.as_str().len() + 1).sum();
let mut result = PathBuf::with_capacity(base.as_os_str().len() + repo_path_len);
result.push(base);
result.extend(self.components.iter().map(|dir| &dir.value));
result.extend(self.components.iter().map(|dir| dir.value.as_str()));
result
}

Expand Down

0 comments on commit f338727

Please sign in to comment.