Skip to content

Commit

Permalink
Add another Cache for accessing files in ArtifactBundles (#1354)
Browse files Browse the repository at this point in the history
When profiling (via `symbolicator-stress`) processing of javascript events, the majority of the time is spent extracting minified / sourcemap files from artifact bundles (which are essentially zip files).

We introduce yet another cache which can avoid this costly unzip process by keeping these unzipped files in memory.

For the above mentioned stresstest, this has lead to a throughput increase of 1000x. Though not surprisingly, the stresstest processes the exact same event over and over and has thus a 100% cache hit rate.

Co-authored-by: Sebastian Zivota <[email protected]>
  • Loading branch information
Swatinem and loewenheim authored Jan 31, 2024
1 parent 202922a commit 8218752
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 6 deletions.
9 changes: 7 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
[workspace]
resolver = "2"
members = ["crates/*"]
default-members = ["crates/symbolicator", "crates/symbolicli", "crates/symsorter", "crates/wasm-split"]
default-members = [
"crates/symbolicator",
"crates/symbolicli",
"crates/symsorter",
"crates/wasm-split",
]

[profile.dev]
# For debug builds, we do want to optimize for both debug-ability, and fast iteration times.
Expand Down Expand Up @@ -47,4 +52,4 @@ cpp_demangle = { git = "https://github.com/getsentry/cpp_demangle", branch = "se
# symbolic = { path = "../../../symbolic/symbolic", features = ["common-serde", "debuginfo", "demangle", "minidump-serde", "symcache"] }
# See also https://doc.rust-lang.org/cargo/reference/overriding-dependencies.html
# [patch.crates-io]
# symbolic = { path = "../symbolic/symbolic" }
# symbolic-debuginfo = { path = "../symbolic/symbolic-debuginfo" }
77 changes: 77 additions & 0 deletions crates/symbolicator-js/src/bundle_lookup.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use std::fmt;
use std::time::Duration;

use symbolicator_sources::RemoteFileUri;

use crate::lookup::{CachedFileEntry, FileKey};

type FileInBundleCacheInner = moka::sync::Cache<(RemoteFileUri, FileKey), CachedFileEntry>;

/// A cache that memoizes looking up files in artifact bundles.
#[derive(Clone)]
pub struct FileInBundleCache {
cache: FileInBundleCacheInner,
}

impl std::fmt::Debug for FileInBundleCache {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("FileInBundleCache").finish()
}
}

impl FileInBundleCache {
/// Creates a new `FileInBundleCache` with a maximum size of 2GiB and
/// idle time of 1h.
pub fn new() -> Self {
// NOTE: We size the cache at 2 GiB which is quite an arbitrary pick.
// As all the files are living in memory, we return the size of the contents
// from the `weigher` which is responsible for this accounting.
const GIGS: u64 = 1 << 30;
let cache = FileInBundleCacheInner::builder()
.max_capacity(2 * GIGS)
.time_to_idle(Duration::from_secs(60 * 60))
.name("file-in-bundle")
.weigher(|_k, v| {
let content_size = v
.entry
.as_ref()
.map(|cached_file| cached_file.contents.len())
.unwrap_or_default();
(std::mem::size_of_val(v) + content_size)
.try_into()
.unwrap_or(u32::MAX)
})
.build();
Self { cache }
}

/// Tries to retrieve a file from the cache.
///
/// We look for the file under `(bundle_uri, key)` for `bundle_uri` in `bundle_uris`.
/// Retrieval is limited to a specific list of bundles so that e.g. files with the same
/// `abs_path` belonging to different events are disambiguated.
pub fn try_get(
&self,
bundle_uris: impl Iterator<Item = RemoteFileUri>,
mut key: FileKey,
) -> Option<CachedFileEntry> {
for bundle_uri in bundle_uris {
// XXX: this is a really horrible workaround for not being able to look up things via `(&A, &B)` instead of `&(A, B)`.
let lookup_key = (bundle_uri, key);
if let Some(file_entry) = self.cache.get(&lookup_key) {
return Some(file_entry);
}
key = lookup_key.1;
}
None
}

/// Inserts `file_entry` into the cache under `(bundle_uri, key)`.
///
/// Files are inserted under a specific bundle so that e.g. files with the same
/// `abs_path` belonging to different events are disambiguated.
pub fn insert(&self, bundle_uri: &RemoteFileUri, key: &FileKey, file_entry: &CachedFileEntry) {
let key = (bundle_uri.clone(), key.clone());
self.cache.insert(key, file_entry.clone())
}
}
1 change: 1 addition & 0 deletions crates/symbolicator-js/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod api_lookup;
mod bundle_index;
mod bundle_index_cache;
mod bundle_lookup;
pub mod interface;
mod lookup;
mod metrics;
Expand Down
27 changes: 23 additions & 4 deletions crates/symbolicator-js/src/lookup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ use symbolicator_service::utils::http::is_valid_origin;
use crate::api_lookup::{ArtifactHeaders, JsLookupResult, SentryLookupApi};
use crate::bundle_index::BundleIndex;
use crate::bundle_index_cache::BundleIndexCache;
use crate::bundle_lookup::FileInBundleCache;
use crate::interface::{
JsScrapingAttempt, JsScrapingFailureReason, JsStacktrace, ResolvedWith,
SymbolicateJsStacktraces,
Expand Down Expand Up @@ -163,6 +164,7 @@ impl SourceMapLookup {
pub async fn new(service: SourceMapService, request: SymbolicateJsStacktraces) -> Self {
let SourceMapService {
objects,
files_in_bundles,
sourcefiles_cache,
bundle_index_cache,
sourcemap_caches,
Expand Down Expand Up @@ -213,6 +215,7 @@ impl SourceMapLookup {

let fetcher = ArtifactFetcher {
objects,
files_in_bundles,
sourcefiles_cache,
sourcemap_caches,
api_lookup,
Expand Down Expand Up @@ -559,6 +562,10 @@ struct ArtifactFetcher {
metrics: JsMetrics,

// other services:
/// Cache for looking up files in artifact bundles.
///
/// This cache is shared between all JS symbolication requests.
files_in_bundles: FileInBundleCache,
objects: ObjectsActor,
sourcefiles_cache: Arc<SourceFilesCache>,
sourcemap_caches: Arc<Cacher<FetchSourceMapCacheInternal>>,
Expand Down Expand Up @@ -973,6 +980,14 @@ impl ArtifactFetcher {
return None;
}

// First see if we have already cached this file for any of this event's bundles.
if let Some(file_entry) = self
.files_in_bundles
.try_get(self.artifact_bundles.keys().rev().cloned(), key.clone())
{
return Some(file_entry);
}

// If we have a `DebugId`, we try a lookup based on that.
if let Some(debug_id) = key.debug_id() {
let ty = key.as_type();
Expand All @@ -988,11 +1003,13 @@ impl ArtifactFetcher {
*bundle_resolved_with,
);
tracing::trace!(?key, "Found file in artifact bundles by debug-id");
return Some(CachedFileEntry {
let file_entry = CachedFileEntry {
uri: CachedFileUri::Bundled(bundle_uri.clone(), key.clone()),
entry: CachedFile::from_descriptor(key.abs_path(), descriptor),
resolved_with: ResolvedWith::DebugId,
});
};
self.files_in_bundles.insert(bundle_uri, key, &file_entry);
return Some(file_entry);
}
}
}
Expand All @@ -1012,11 +1029,13 @@ impl ArtifactFetcher {
*resolved_with,
);
tracing::trace!(?key, url, "Found file in artifact bundles by url");
return Some(CachedFileEntry {
let file_entry = CachedFileEntry {
uri: CachedFileUri::Bundled(bundle_uri.clone(), key.clone()),
entry: CachedFile::from_descriptor(Some(abs_path), descriptor),
resolved_with: *resolved_with,
});
};
self.files_in_bundles.insert(bundle_uri, key, &file_entry);
return Some(file_entry);
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions crates/symbolicator-js/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ use symbolicator_service::services::SharedServices;

use crate::api_lookup::SentryLookupApi;
use crate::bundle_index_cache::BundleIndexCache;
use crate::bundle_lookup::FileInBundleCache;
use crate::lookup::FetchSourceMapCacheInternal;

#[derive(Debug, Clone)]
pub struct SourceMapService {
pub(crate) objects: ObjectsActor,
pub(crate) files_in_bundles: FileInBundleCache,
pub(crate) sourcefiles_cache: Arc<SourceFilesCache>,
pub(crate) bundle_index_cache: Arc<BundleIndexCache>,
pub(crate) sourcemap_caches: Arc<Cacher<FetchSourceMapCacheInternal>>,
Expand Down Expand Up @@ -46,6 +48,7 @@ impl SourceMapService {

Self {
objects,
files_in_bundles: FileInBundleCache::new(),
sourcefiles_cache,
bundle_index_cache: Arc::new(bundle_index_cache),
sourcemap_caches: Arc::new(Cacher::new(caches.sourcemap_caches.clone(), shared_cache)),
Expand Down

0 comments on commit 8218752

Please sign in to comment.