From 3498cecdd8bd1f536ef4b8f2b9216155951e6202 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Tue, 6 Feb 2024 17:30:16 +0100 Subject: [PATCH] Rewrite making the `contents` an `Option` --- crates/symbolicator-js/src/bundle_lookup.rs | 18 ++++--- crates/symbolicator-js/src/lookup.rs | 52 ++++++++++++------- crates/symbolicator-js/src/sourcemap_cache.rs | 32 ++++++------ crates/symbolicator-js/src/symbolication.rs | 4 +- 4 files changed, 59 insertions(+), 47 deletions(-) diff --git a/crates/symbolicator-js/src/bundle_lookup.rs b/crates/symbolicator-js/src/bundle_lookup.rs index 23196399c..4a4e46239 100644 --- a/crates/symbolicator-js/src/bundle_lookup.rs +++ b/crates/symbolicator-js/src/bundle_lookup.rs @@ -1,7 +1,6 @@ use std::fmt; use std::time::Duration; -use symbolicator_service::caches::ByteViewString; use symbolicator_sources::RemoteFileUri; use crate::interface::ResolvedWith; @@ -35,11 +34,12 @@ impl FileInBundleCache { .time_to_idle(Duration::from_secs(60 * 60)) .name("file-in-bundle") .weigher(|_k, v| { - let content_size = - v.0.entry - .as_ref() - .map(|cached_file| cached_file.contents.len()) - .unwrap_or_default(); + let content_size = v + .0 + .entry + .as_ref() + .map(|cached_file| cached_file.contents.as_deref().unwrap_or_default().len()) + .unwrap_or_default(); (std::mem::size_of_val(v) + content_size) .try_into() .unwrap_or(u32::MAX) @@ -83,8 +83,10 @@ impl FileInBundleCache { let mut file_entry = file_entry.clone(); if matches!(key, FileKey::SourceMap { .. }) { if let Ok(cached_file) = file_entry.entry.as_mut() { - cached_file.is_lazy = true; - cached_file.contents = ByteViewString::from(String::new()); + // SourceMaps are usually very large, and we only need them for + // `sourcemapcache` creation. So do not persist the actual contents here. + // Rather load them later in `sourcemapcache` generation if needed. + cached_file.contents = None; } } let key = (bundle_uri.clone(), key.clone()); diff --git a/crates/symbolicator-js/src/lookup.rs b/crates/symbolicator-js/src/lookup.rs index 88d237383..2160378d8 100644 --- a/crates/symbolicator-js/src/lookup.rs +++ b/crates/symbolicator-js/src/lookup.rs @@ -484,22 +484,21 @@ impl CachedFileEntry { /// the parts that we care about. #[derive(Clone)] pub struct CachedFile { - pub is_lazy: bool, - pub contents: ByteViewString, + pub contents: Option, sourcemap_url: Option>, } impl fmt::Debug for CachedFile { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let contents = if self.contents.len() > 64 { + let contents: &str = self.contents(); + let contents = if contents.len() > 64 { // its just `Debug` prints, but we would like the end of the file, as it may // have a `sourceMappingURL` - format!("...{}", &self.contents[self.contents.len() - 61..]) + format!("...{}", &contents[contents.len() - 61..]) } else { - self.contents.to_string() + contents.to_string() }; f.debug_struct("CachedFile") - .field("is_lazy", &self.is_lazy) .field("contents", &contents) .field("sourcemap_url", &self.sourcemap_url) .finish() @@ -528,15 +527,24 @@ impl CachedFile { .into_contents() .ok_or_else(|| CacheError::Malformed("descriptor should have `contents`".into()))? .into_owned(); - let contents = ByteViewString::from(contents); + let contents = Some(ByteViewString::from(contents)); Ok(Self { - is_lazy: false, contents, sourcemap_url: sourcemap_url.map(Arc::new), }) } + pub fn contents(&self) -> &str { + self.contents.as_deref().unwrap_or_default() + } + + pub fn owned_contents(&self) -> ByteViewString { + self.contents + .clone() + .unwrap_or(ByteViewString::from(String::new())) + } + /// Returns a string representation of a SourceMap URL if it was coming from a remote resource. pub fn sourcemap_url(&self) -> Option { self.sourcemap_url @@ -633,8 +641,7 @@ impl ArtifactFetcher { Some(SourceMapUrl::Data(data)) => CachedFileEntry { uri: CachedFileUri::Embedded, entry: Ok(CachedFile { - is_lazy: false, - contents: data.clone(), + contents: Some(data.clone()), sourcemap_url: None, }), resolved_with: minified_source.resolved_with, @@ -940,8 +947,7 @@ impl ArtifactFetcher { .push(JsScrapingAttempt::success(abs_path.to_owned())); Ok(CachedFile { - is_lazy: false, - contents, + contents: Some(contents), sourcemap_url, }) } @@ -971,6 +977,8 @@ impl ArtifactFetcher { .files_in_bundles .try_get(self.artifact_bundles.keys().rev().cloned(), key.clone()) { + // we would like to gather metrics for which method we used to resolve the artifact bundle + // containing the file. we should also be doing so if we got the file from the cache. if let Some(Ok((_, bundle_resolved_with))) = self.artifact_bundles.get(&bundle_uri) { self.metrics.record_file_found_in_bundle( key.as_type(), @@ -1086,8 +1094,7 @@ impl ArtifactFetcher { // Get the sourcemap reference from the artifact, either from metadata, or file contents let sourcemap_url = resolve_sourcemap_url(abs_path, &artifact.headers, &contents); CachedFile { - is_lazy: false, - contents, + contents: Some(contents), sourcemap_url: sourcemap_url.map(Arc::new), } }), @@ -1256,7 +1263,7 @@ impl ArtifactFetcher { let smcache = match &source.entry { Ok(source_entry) => match sourcemap.entry { Ok(sourcemap_entry) => { - let source_content = source_entry.contents.clone(); + let source_content = source_entry.owned_contents(); let cache_key = { let mut cache_key = CacheKey::scoped_builder(&self.scope); @@ -1271,8 +1278,7 @@ impl ArtifactFetcher { &mut cache_key, "sourcemap", &sourcemap.uri, - // NOTE: `write_cache_key` will never use the contents of a `lazy` sourcemap - sourcemap_entry.contents.as_ref(), + sourcemap_entry.contents().as_bytes(), ); cache_key.build() @@ -1282,7 +1288,11 @@ impl ArtifactFetcher { &self.artifact_bundles, &sourcemap.uri, sourcemap_entry, - ); + ) + .unwrap_or_else(|| { + tracing::error!("expected either a `Bundled` or `Resolved` SourceMap"); + SourceMapContents::Resolved(ByteViewString::from(String::new())) + }); let req = FetchSourceMapCacheInternal { source: source_content, @@ -1307,8 +1317,10 @@ impl ArtifactFetcher { } } -pub fn open_bundle(bundle: Arc) -> CacheEntry { - SelfCell::try_new(bundle, |handle| unsafe { +/// This opens `artifact_bundle` [`Object`], ensuring that it is a [`SourceBundle`](`Object::SourceBundle`), +/// and opening up a debug session to it. +pub fn open_bundle(artifact_bundle: Arc) -> CacheEntry { + SelfCell::try_new(artifact_bundle, |handle| unsafe { match (*handle).object() { Object::SourceBundle(source_bundle) => source_bundle .debug_session() diff --git a/crates/symbolicator-js/src/sourcemap_cache.rs b/crates/symbolicator-js/src/sourcemap_cache.rs index 201fef111..15471428e 100644 --- a/crates/symbolicator-js/src/sourcemap_cache.rs +++ b/crates/symbolicator-js/src/sourcemap_cache.rs @@ -20,8 +20,10 @@ use crate::utils::get_release_file_candidate_urls; #[derive(Clone, Debug)] pub enum SourceMapContents { - Lazy(Arc, FileKey), - Eager(ByteViewString), + /// The contents have to be fetched from the given [`ArtifactBundle`]. + FromBundle(Arc, FileKey), + /// The contents of the sourcemap have already been resolved. + Resolved(ByteViewString), } impl SourceMapContents { @@ -29,23 +31,19 @@ impl SourceMapContents { artifact_bundles: &ArtifactBundles, sourcemap_uri: &CachedFileUri, sourcemap: CachedFile, - ) -> Self { - if !sourcemap.is_lazy { - return Self::Eager(sourcemap.contents); + ) -> Option { + if let Some(contents) = sourcemap.contents { + return Some(Self::Resolved(contents)); } - let bundle = if let CachedFileUri::Bundled(bundle_uri, key) = sourcemap_uri { - artifact_bundles.get(bundle_uri).and_then(|bundle| { - let Ok((bundle, _)) = bundle else { return None }; - let bundle = bundle.owner().clone(); - let contents = Self::Lazy(bundle, key.clone()); - Some(contents) - }) - } else { - None + let CachedFileUri::Bundled(bundle_uri, key) = sourcemap_uri else { + return None; }; - bundle.unwrap_or(Self::Eager(sourcemap.contents)) + let (bundle, _) = artifact_bundles.get(bundle_uri)?.as_ref().ok()?; + let bundle = bundle.owner().clone(); + let contents = Self::FromBundle(bundle, key.clone()); + Some(contents) } } @@ -63,7 +61,7 @@ impl CacheItemRequest for FetchSourceMapCacheInternal { fn compute<'a>(&'a self, temp_file: &'a mut NamedTempFile) -> BoxFuture<'a, CacheEntry> { Box::pin(async move { let sourcemap = match &self.sourcemap { - SourceMapContents::Lazy(bundle, key) => { + SourceMapContents::FromBundle(bundle, key) => { let bundle = open_bundle(bundle.clone())?; let descriptor = get_descriptor_from_bundle(&bundle, key); @@ -75,7 +73,7 @@ impl CacheItemRequest for FetchSourceMapCacheInternal { .into_owned(); ByteViewString::from(contents) } - SourceMapContents::Eager(contents) => contents.clone(), + SourceMapContents::Resolved(contents) => contents.clone(), }; write_sourcemap_cache(temp_file.as_file_mut(), &self.source, &sourcemap) diff --git a/crates/symbolicator-js/src/symbolication.rs b/crates/symbolicator-js/src/symbolication.rs index af3e79029..ad9e690ff 100644 --- a/crates/symbolicator-js/src/symbolication.rs +++ b/crates/symbolicator-js/src/symbolication.rs @@ -121,7 +121,7 @@ async fn symbolicate_js_frame( match &module.minified_source.entry { Ok(minified_source) => { if should_apply_source_context { - apply_source_context(raw_frame, &minified_source.contents)? + apply_source_context(raw_frame, minified_source.contents())? } } Err(CacheError::DownloadError(msg)) if msg == "Scraping disabled" => { @@ -267,7 +267,7 @@ async fn symbolicate_js_frame( if source_file .as_ref() .map_err(|_| JsModuleErrorKind::MissingSource) - .and_then(|file| apply_source_context(&mut frame, &file.contents)) + .and_then(|file| apply_source_context(&mut frame, file.contents())) .is_err() { // It's arguable whether we should collect it, but this is what monolith does now,