From 8ce62ca7874504248f1a228f1d03b51f665a22f1 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Fri, 6 Oct 2023 12:10:19 +0200 Subject: [PATCH] Avoid using shared cache for internally stored files (#1320) --- .../symbolicator-js/src/bundle_index_cache.rs | 7 +++++++ .../src/caching/cache_error.rs | 2 +- .../src/caching/cache_key.rs | 2 +- .../symbolicator-service/src/caching/memory.rs | 18 ++++++++++++------ .../src/services/bitcode.rs | 4 ++++ .../src/services/il2cpp.rs | 4 ++++ .../src/services/objects/data_cache.rs | 4 ++++ .../src/services/objects/mod.rs | 5 +++++ crates/symbolicator-sources/src/remotefile.rs | 17 +++++++++++++++++ 9 files changed, 55 insertions(+), 8 deletions(-) diff --git a/crates/symbolicator-js/src/bundle_index_cache.rs b/crates/symbolicator-js/src/bundle_index_cache.rs index 337e0d254..e6b88695e 100644 --- a/crates/symbolicator-js/src/bundle_index_cache.rs +++ b/crates/symbolicator-js/src/bundle_index_cache.rs @@ -89,4 +89,11 @@ impl CacheItemRequest for BundleIndexRequest { fn weight(item: &Self::Item) -> u32 { item.0.max(std::mem::size_of::() as u32) } + + fn use_shared_cache(&self) -> bool { + // These change frequently, and are being downloaded from Sentry directly. + // Here again, the question is whether we want to waste a bit of storage on GCS vs doing + // more requests to Python. + true + } } diff --git a/crates/symbolicator-service/src/caching/cache_error.rs b/crates/symbolicator-service/src/caching/cache_error.rs index 5faa6e8fc..af4e6ebd1 100644 --- a/crates/symbolicator-service/src/caching/cache_error.rs +++ b/crates/symbolicator-service/src/caching/cache_error.rs @@ -152,7 +152,7 @@ impl CacheError { /// object could not be fetched or is otherwise unusable. pub type CacheEntry = Result; -/// Parses a [`CacheEntry`] from a [`ByteView`](ByteView). +/// Parses a [`CacheEntry`] from a [`ByteView`]. pub fn cache_entry_from_bytes(bytes: ByteView<'static>) -> CacheEntry> { CacheError::from_bytes(&bytes).map(Err).unwrap_or(Ok(bytes)) } diff --git a/crates/symbolicator-service/src/caching/cache_key.rs b/crates/symbolicator-service/src/caching/cache_key.rs index bfe56b6c4..6d5d9fbaa 100644 --- a/crates/symbolicator-service/src/caching/cache_key.rs +++ b/crates/symbolicator-service/src/caching/cache_key.rs @@ -76,7 +76,7 @@ impl CacheKey { /// A builder for [`CacheKey`]s. /// -/// This builder implements the [`Write`](std::fmt::Write) trait, and the intention of it is to +/// This builder implements the [`Write`] trait, and the intention of it is to /// accept human readable, but most importantly **stable**, input. /// This input in then being hashed to form the [`CacheKey`], and can also be serialized alongside /// the cache files to help debugging. diff --git a/crates/symbolicator-service/src/caching/memory.rs b/crates/symbolicator-service/src/caching/memory.rs index e141e9ca7..873174026 100644 --- a/crates/symbolicator-service/src/caching/memory.rs +++ b/crates/symbolicator-service/src/caching/memory.rs @@ -170,6 +170,13 @@ pub trait CacheItemRequest: 'static + Send + Sync + Clone { } impl Cacher { + fn shared_cache(&self, request: &T) -> Option<&SharedCacheService> { + request + .use_shared_cache() + .then(|| self.shared_cache.get()) + .flatten() + } + /// Compute an item. /// /// The item is computed using [`T::compute`](CacheItemRequest::compute), and saved in the cache @@ -182,9 +189,8 @@ impl Cacher { let cache_path = key.cache_path(T::VERSIONS.current); let mut temp_file = self.config.tempfile()?; - let shared_cache_hit = if !request.use_shared_cache() { - false - } else if let Some(shared_cache) = self.shared_cache.get() { + let shared_cache = self.shared_cache(&request); + let shared_cache_hit = if let Some(shared_cache) = shared_cache { let temp_fd = tokio::fs::File::from_std(temp_file.reopen()?); shared_cache.fetch(name, &cache_path, temp_fd).await } else { @@ -263,9 +269,9 @@ impl Cacher { // TODO: Not handling negative caches probably has a huge perf impact. Need to // figure out negative caches. Maybe put them in redis with a TTL? - if !shared_cache_hit && request.use_shared_cache() { + if !shared_cache_hit { if let Ok(byteview) = &entry { - if let Some(shared_cache) = self.shared_cache.get() { + if let Some(shared_cache) = shared_cache { shared_cache.store(name, &cache_path, byteview.clone(), CacheStoreReason::New); } } @@ -293,7 +299,7 @@ impl Cacher { let init = Box::pin(async { // cache_path is None when caching is disabled. if let Some(cache_dir) = self.config.cache_dir() { - let shared_cache = self.shared_cache.get(); + let shared_cache = self.shared_cache(&request); let versions = std::iter::once(T::VERSIONS.current) .chain(T::VERSIONS.fallbacks.iter().copied()); diff --git a/crates/symbolicator-service/src/services/bitcode.rs b/crates/symbolicator-service/src/services/bitcode.rs index 78a7fedd6..53ff3b738 100644 --- a/crates/symbolicator-service/src/services/bitcode.rs +++ b/crates/symbolicator-service/src/services/bitcode.rs @@ -132,6 +132,10 @@ impl CacheItemRequest for FetchFileRequest { data, })) } + + fn use_shared_cache(&self) -> bool { + self.file_source.worth_using_shared_cache() + } } #[derive(Debug, Clone)] diff --git a/crates/symbolicator-service/src/services/il2cpp.rs b/crates/symbolicator-service/src/services/il2cpp.rs index 0bcb6d552..ecdc9fbb9 100644 --- a/crates/symbolicator-service/src/services/il2cpp.rs +++ b/crates/symbolicator-service/src/services/il2cpp.rs @@ -92,6 +92,10 @@ impl CacheItemRequest for FetchFileRequest { data, }) } + + fn use_shared_cache(&self) -> bool { + self.file_source.worth_using_shared_cache() + } } #[derive(Debug, Clone)] diff --git a/crates/symbolicator-service/src/services/objects/data_cache.rs b/crates/symbolicator-service/src/services/objects/data_cache.rs index 501341237..a8202f2b0 100644 --- a/crates/symbolicator-service/src/services/objects/data_cache.rs +++ b/crates/symbolicator-service/src/services/objects/data_cache.rs @@ -233,6 +233,10 @@ impl CacheItemRequest for FetchFileDataRequest { Ok(Arc::new(object_handle)) } + + fn use_shared_cache(&self) -> bool { + self.0.file_source.worth_using_shared_cache() + } } #[cfg(test)] diff --git a/crates/symbolicator-service/src/services/objects/mod.rs b/crates/symbolicator-service/src/services/objects/mod.rs index 2e308c6ae..fe9a2a211 100644 --- a/crates/symbolicator-service/src/services/objects/mod.rs +++ b/crates/symbolicator-service/src/services/objects/mod.rs @@ -74,6 +74,11 @@ pub struct FindResult { #[derive(Clone, Debug)] pub struct ObjectsActor { + // FIXME(swatinem): Having a fully fledged filesystem and shared cache for these tiny file meta + // items is heavy handed and wasteful. However, we *do* want to have this in shared cache, as + // it is the primary thing that makes cold starts fast, as we do not need to fetch the whole + // objects, but just the derived caches. Some lighter weight solution, like Redis might be more + // appropriate at some point. meta_cache: Arc>, data_cache: Arc>, download_svc: Arc, diff --git a/crates/symbolicator-sources/src/remotefile.rs b/crates/symbolicator-sources/src/remotefile.rs index d1d408d2c..a2457b0bf 100644 --- a/crates/symbolicator-sources/src/remotefile.rs +++ b/crates/symbolicator-sources/src/remotefile.rs @@ -280,6 +280,23 @@ impl RemoteFile { RemoteFile::Sentry(source) => source.host(), } } + + /// Determines if this [`RemoteFile`] is worth caching in shared cache or not. + /// + /// This is `true` for all external symbol servers, but not for our internal source, as we would + /// just save the same bytes twice, with an extra roundtrip on fetches. + pub fn worth_using_shared_cache(&self) -> bool { + match self { + // This is our internal sentry source. It is debatable if we want to rather fetch + // the file through the Python->FileStore->GCS indirection, + // or rather pay to save it twice so we can access GCS (shared cache) directly. + RemoteFile::Sentry(_) => true, + // This is most likely our scraped apple symbols, which are hosted on GCS directly, no + // need to save them twice in shared cache. + RemoteFile::Gcs(source) if source.source.id.as_str().starts_with("sentry:") => false, + _ => true, + } + } } /// A URI representing an [`RemoteFile`].