Skip to content

Commit

Permalink
Avoid using shared cache for internally stored files (#1320)
Browse files Browse the repository at this point in the history
  • Loading branch information
Swatinem authored Oct 6, 2023
1 parent d8704ed commit 8ce62ca
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 8 deletions.
7 changes: 7 additions & 0 deletions crates/symbolicator-js/src/bundle_index_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,11 @@ impl CacheItemRequest for BundleIndexRequest {
fn weight(item: &Self::Item) -> u32 {
item.0.max(std::mem::size_of::<Self::Item>() as u32)
}

fn use_shared_cache(&self) -> bool {
// These change frequently, and are being downloaded from Sentry directly.
// Here again, the question is whether we want to waste a bit of storage on GCS vs doing
// more requests to Python.
true
}
}
2 changes: 1 addition & 1 deletion crates/symbolicator-service/src/caching/cache_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ impl CacheError {
/// object could not be fetched or is otherwise unusable.
pub type CacheEntry<T = ()> = Result<T, CacheError>;

/// Parses a [`CacheEntry`] from a [`ByteView`](ByteView).
/// Parses a [`CacheEntry`] from a [`ByteView`].
pub fn cache_entry_from_bytes(bytes: ByteView<'static>) -> CacheEntry<ByteView<'static>> {
CacheError::from_bytes(&bytes).map(Err).unwrap_or(Ok(bytes))
}
2 changes: 1 addition & 1 deletion crates/symbolicator-service/src/caching/cache_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl CacheKey {

/// A builder for [`CacheKey`]s.
///
/// This builder implements the [`Write`](std::fmt::Write) trait, and the intention of it is to
/// This builder implements the [`Write`] trait, and the intention of it is to
/// accept human readable, but most importantly **stable**, input.
/// This input in then being hashed to form the [`CacheKey`], and can also be serialized alongside
/// the cache files to help debugging.
Expand Down
18 changes: 12 additions & 6 deletions crates/symbolicator-service/src/caching/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,13 @@ pub trait CacheItemRequest: 'static + Send + Sync + Clone {
}

impl<T: CacheItemRequest> Cacher<T> {
fn shared_cache(&self, request: &T) -> Option<&SharedCacheService> {
request
.use_shared_cache()
.then(|| self.shared_cache.get())
.flatten()
}

/// Compute an item.
///
/// The item is computed using [`T::compute`](CacheItemRequest::compute), and saved in the cache
Expand All @@ -182,9 +189,8 @@ impl<T: CacheItemRequest> Cacher<T> {
let cache_path = key.cache_path(T::VERSIONS.current);
let mut temp_file = self.config.tempfile()?;

let shared_cache_hit = if !request.use_shared_cache() {
false
} else if let Some(shared_cache) = self.shared_cache.get() {
let shared_cache = self.shared_cache(&request);
let shared_cache_hit = if let Some(shared_cache) = shared_cache {
let temp_fd = tokio::fs::File::from_std(temp_file.reopen()?);
shared_cache.fetch(name, &cache_path, temp_fd).await
} else {
Expand Down Expand Up @@ -263,9 +269,9 @@ impl<T: CacheItemRequest> Cacher<T> {

// TODO: Not handling negative caches probably has a huge perf impact. Need to
// figure out negative caches. Maybe put them in redis with a TTL?
if !shared_cache_hit && request.use_shared_cache() {
if !shared_cache_hit {
if let Ok(byteview) = &entry {
if let Some(shared_cache) = self.shared_cache.get() {
if let Some(shared_cache) = shared_cache {
shared_cache.store(name, &cache_path, byteview.clone(), CacheStoreReason::New);
}
}
Expand Down Expand Up @@ -293,7 +299,7 @@ impl<T: CacheItemRequest> Cacher<T> {
let init = Box::pin(async {
// cache_path is None when caching is disabled.
if let Some(cache_dir) = self.config.cache_dir() {
let shared_cache = self.shared_cache.get();
let shared_cache = self.shared_cache(&request);
let versions = std::iter::once(T::VERSIONS.current)
.chain(T::VERSIONS.fallbacks.iter().copied());

Expand Down
4 changes: 4 additions & 0 deletions crates/symbolicator-service/src/services/bitcode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ impl CacheItemRequest for FetchFileRequest {
data,
}))
}

fn use_shared_cache(&self) -> bool {
self.file_source.worth_using_shared_cache()
}
}

#[derive(Debug, Clone)]
Expand Down
4 changes: 4 additions & 0 deletions crates/symbolicator-service/src/services/il2cpp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ impl CacheItemRequest for FetchFileRequest {
data,
})
}

fn use_shared_cache(&self) -> bool {
self.file_source.worth_using_shared_cache()
}
}

#[derive(Debug, Clone)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ impl CacheItemRequest for FetchFileDataRequest {

Ok(Arc::new(object_handle))
}

fn use_shared_cache(&self) -> bool {
self.0.file_source.worth_using_shared_cache()
}
}

#[cfg(test)]
Expand Down
5 changes: 5 additions & 0 deletions crates/symbolicator-service/src/services/objects/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ pub struct FindResult {

#[derive(Clone, Debug)]
pub struct ObjectsActor {
// FIXME(swatinem): Having a fully fledged filesystem and shared cache for these tiny file meta
// items is heavy handed and wasteful. However, we *do* want to have this in shared cache, as
// it is the primary thing that makes cold starts fast, as we do not need to fetch the whole
// objects, but just the derived caches. Some lighter weight solution, like Redis might be more
// appropriate at some point.
meta_cache: Arc<Cacher<FetchFileMetaRequest>>,
data_cache: Arc<Cacher<FetchFileDataRequest>>,
download_svc: Arc<DownloadService>,
Expand Down
17 changes: 17 additions & 0 deletions crates/symbolicator-sources/src/remotefile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,23 @@ impl RemoteFile {
RemoteFile::Sentry(source) => source.host(),
}
}

/// Determines if this [`RemoteFile`] is worth caching in shared cache or not.
///
/// This is `true` for all external symbol servers, but not for our internal source, as we would
/// just save the same bytes twice, with an extra roundtrip on fetches.
pub fn worth_using_shared_cache(&self) -> bool {
match self {
// This is our internal sentry source. It is debatable if we want to rather fetch
// the file through the Python->FileStore->GCS indirection,
// or rather pay to save it twice so we can access GCS (shared cache) directly.
RemoteFile::Sentry(_) => true,
// This is most likely our scraped apple symbols, which are hosted on GCS directly, no
// need to save them twice in shared cache.
RemoteFile::Gcs(source) if source.source.id.as_str().starts_with("sentry:") => false,
_ => true,
}
}
}

/// A URI representing an [`RemoteFile`].
Expand Down

0 comments on commit 8ce62ca

Please sign in to comment.