From f338727ac4978fe080da7e556286fc51657ae7c0 Mon Sep 17 00:00:00 2001 From: Austin Seipp Date: Wed, 1 Nov 2023 22:48:48 -0500 Subject: [PATCH] lib: memory optimization of `RepoPathComponent` Summary: As suggested by Yuya. This is actually a huge win in heap allocations for repositories with a lot of files; according to the bucket statistics from mimalloc, on gecko-dev, a repo of ~350,000 files, this reduces the number of small alloctions (8, 16, or 32 byte size class) from nearly ~4 million to just over ~200k, a 20x allocation reduction, with a total peak heap size reduction of over 50MiB. Signed-off-by: Austin Seipp Change-Id: I4014145d740367ecec5be32c6b9e2de8 --- Cargo.lock | 35 +++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + lib/Cargo.toml | 3 ++- lib/src/content_hash.rs | 6 ++++++ lib/src/repo_path.rs | 11 +++++++---- 5 files changed, 51 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0989ec05200..04956a30ba1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -302,6 +302,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "castaway" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a17ed5635fc8536268e5d4de1e22e81ac34419e5f052d4d51f4e01dcc263fcc" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.0.83" @@ -429,6 +438,19 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "compact_str" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f86b9c4c00838774a6d902ef931eff7470720c51d90c2e32cfe15dc304737b3f" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "ryu", + "static_assertions", +] + [[package]] name = "config" version = "0.13.3" @@ -1666,6 +1688,7 @@ dependencies = [ "byteorder", "bytes 1.5.0", "chrono", + "compact_str", "config", "criterion", "digest", @@ -2477,6 +2500,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + [[package]] name = "ryu" version = "1.0.15" @@ -2670,6 +2699,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.10.0" diff --git a/Cargo.toml b/Cargo.toml index 333bd69a290..a511e516590 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ chrono = { version = "0.4.31", default-features = false, features = [ "clock", ] } config = { version = "0.13.2", default-features = false, features = ["toml"] } +compact_str = "0.7.1" criterion = "0.5.1" crossterm = { version = "0.26", default-features = false } digest = "0.10.7" diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 8143259702d..593645efe7c 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -19,13 +19,14 @@ harness = false version_check = { workspace = true } [dependencies] -async-trait = { workspace = true} +async-trait = { workspace = true } backoff = { workspace = true } blake2 = { workspace = true } byteorder = { workspace = true } bytes = { workspace = true } chrono = { workspace = true } config = { workspace = true } +compact_str = { workspace = true } digest = { workspace = true } futures = { workspace = true } either = { workspace = true } diff --git a/lib/src/content_hash.rs b/lib/src/content_hash.rs index 8f5d67a3a71..56035224dd2 100644 --- a/lib/src/content_hash.rs +++ b/lib/src/content_hash.rs @@ -73,6 +73,12 @@ impl ContentHash for String { } } +impl ContentHash for compact_str::CompactString { + fn hash(&self, state: &mut impl digest::Update) { + self.as_bytes().hash(state); + } +} + impl ContentHash for Option { fn hash(&self, state: &mut impl digest::Update) { match self { diff --git a/lib/src/repo_path.rs b/lib/src/repo_path.rs index d3ae6addaa0..c755b1441f4 100644 --- a/lib/src/repo_path.rs +++ b/lib/src/repo_path.rs @@ -17,6 +17,7 @@ use std::fmt::{Debug, Error, Formatter}; use std::path::{Component, Path, PathBuf}; +use compact_str::CompactString; use itertools::Itertools; use thiserror::Error; @@ -25,7 +26,7 @@ use crate::file_util; content_hash! { #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] pub struct RepoPathComponent { - value: String, + value: CompactString, } } @@ -48,7 +49,9 @@ impl From<&str> for RepoPathComponent { impl From for RepoPathComponent { fn from(value: String) -> Self { assert!(!value.contains('/')); - RepoPathComponent { value } + RepoPathComponent { + value: value.into(), + } } } @@ -76,7 +79,7 @@ impl RepoPath { let components = value .split('/') .map(|value| RepoPathComponent { - value: value.to_string(), + value: value.into(), }) .collect(); RepoPath { components } @@ -141,7 +144,7 @@ impl RepoPath { let repo_path_len: usize = self.components.iter().map(|x| x.as_str().len() + 1).sum(); let mut result = PathBuf::with_capacity(base.as_os_str().len() + repo_path_len); result.push(base); - result.extend(self.components.iter().map(|dir| &dir.value)); + result.extend(self.components.iter().map(|dir| dir.value.as_str())); result }