diff --git a/crates/symbolicator-js/src/bundle_lookup.rs b/crates/symbolicator-js/src/bundle_lookup.rs index 8aa8dd83e..4a4e46239 100644 --- a/crates/symbolicator-js/src/bundle_lookup.rs +++ b/crates/symbolicator-js/src/bundle_lookup.rs @@ -3,9 +3,11 @@ use std::time::Duration; use symbolicator_sources::RemoteFileUri; +use crate::interface::ResolvedWith; use crate::lookup::{CachedFileEntry, FileKey}; -type FileInBundleCacheInner = moka::sync::Cache<(RemoteFileUri, FileKey), CachedFileEntry>; +type FileInBundleCacheInner = + moka::sync::Cache<(RemoteFileUri, FileKey), (CachedFileEntry, ResolvedWith)>; /// A cache that memoizes looking up files in artifact bundles. #[derive(Clone)] @@ -33,9 +35,10 @@ impl FileInBundleCache { .name("file-in-bundle") .weigher(|_k, v| { let content_size = v + .0 .entry .as_ref() - .map(|cached_file| cached_file.contents.len()) + .map(|cached_file| cached_file.contents.as_deref().unwrap_or_default().len()) .unwrap_or_default(); (std::mem::size_of_val(v) + content_size) .try_into() @@ -54,12 +57,12 @@ impl FileInBundleCache { &self, bundle_uris: impl Iterator, mut key: FileKey, - ) -> Option { + ) -> Option<(RemoteFileUri, CachedFileEntry, ResolvedWith)> { for bundle_uri in bundle_uris { // XXX: this is a really horrible workaround for not being able to look up things via `(&A, &B)` instead of `&(A, B)`. let lookup_key = (bundle_uri, key); - if let Some(file_entry) = self.cache.get(&lookup_key) { - return Some(file_entry); + if let Some((file_entry, resolved_with)) = self.cache.get(&lookup_key) { + return Some((lookup_key.0, file_entry, resolved_with)); } key = lookup_key.1; } @@ -70,8 +73,23 @@ impl FileInBundleCache { /// /// Files are inserted under a specific bundle so that e.g. files with the same /// `abs_path` belonging to different events are disambiguated. - pub fn insert(&self, bundle_uri: &RemoteFileUri, key: &FileKey, file_entry: &CachedFileEntry) { + pub fn insert( + &self, + bundle_uri: &RemoteFileUri, + key: &FileKey, + resolved_with: ResolvedWith, + file_entry: &CachedFileEntry, + ) { + let mut file_entry = file_entry.clone(); + if matches!(key, FileKey::SourceMap { .. }) { + if let Ok(cached_file) = file_entry.entry.as_mut() { + // SourceMaps are usually very large, and we only need them for + // `sourcemapcache` creation. So do not persist the actual contents here. + // Rather load them later in `sourcemapcache` generation if needed. + cached_file.contents = None; + } + } let key = (bundle_uri.clone(), key.clone()); - self.cache.insert(key, file_entry.clone()) + self.cache.insert(key, (file_entry, resolved_with)) } } diff --git a/crates/symbolicator-js/src/lib.rs b/crates/symbolicator-js/src/lib.rs index 673500a8d..1434b54e6 100644 --- a/crates/symbolicator-js/src/lib.rs +++ b/crates/symbolicator-js/src/lib.rs @@ -6,6 +6,7 @@ pub mod interface; mod lookup; mod metrics; mod service; +mod sourcemap_cache; mod symbolication; mod utils; diff --git a/crates/symbolicator-js/src/lookup.rs b/crates/symbolicator-js/src/lookup.rs index 24e2912ce..2160378d8 100644 --- a/crates/symbolicator-js/src/lookup.rs +++ b/crates/symbolicator-js/src/lookup.rs @@ -25,12 +25,9 @@ use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::fmt::{self, Write}; -use std::fs::File; -use std::io::{self, BufWriter}; use std::sync::Arc; use std::time::SystemTime; -use futures::future::BoxFuture; use reqwest::Url; use sha2::{Digest, Sha256}; use symbolic::common::{ByteView, DebugId, SelfCell}; @@ -39,18 +36,14 @@ use symbolic::debuginfo::sourcebundle::{ SourceBundleDebugSession, SourceFileDescriptor, SourceFileType, }; use symbolic::debuginfo::Object; -use symbolic::sourcemapcache::{SourceMapCache, SourceMapCacheWriter}; +use symbolic::sourcemapcache::SourceMapCache; use symbolicator_sources::{ HttpRemoteFile, ObjectType, RemoteFile, RemoteFileUri, SentryFileId, SentryRemoteFile, SentrySourceConfig, }; -use tempfile::NamedTempFile; -use symbolicator_service::caches::versions::SOURCEMAP_CACHE_VERSIONS; use symbolicator_service::caches::{ByteViewString, SourceFilesCache}; -use symbolicator_service::caching::{ - CacheEntry, CacheError, CacheItemRequest, CacheKey, CacheKeyBuilder, CacheVersions, Cacher, -}; +use symbolicator_service::caching::{CacheEntry, CacheError, CacheKey, CacheKeyBuilder, Cacher}; use symbolicator_service::download::DownloadService; use symbolicator_service::objects::{ObjectHandle, ObjectMetaHandle, ObjectsActor}; use symbolicator_service::types::{Scope, ScrapingConfig}; @@ -65,6 +58,7 @@ use crate::interface::{ SymbolicateJsStacktraces, }; use crate::metrics::JsMetrics; +use crate::sourcemap_cache::{FetchSourceMapCacheInternal, SourceMapContents}; use crate::utils::{ cache_busting_key, extract_file_stem, get_release_file_candidate_urls, join_paths, resolve_sourcemap_url, @@ -369,7 +363,7 @@ impl SourceMapUrl { } } -type ArtifactBundle = SelfCell, SourceBundleDebugSession<'static>>; +pub type ArtifactBundle = SelfCell, SourceBundleDebugSession<'static>>; /// The lookup key of an arbitrary file. #[derive(Debug, Hash, PartialEq, Eq, Clone)] @@ -395,7 +389,7 @@ impl FileKey { } /// Returns this key's debug id, if any. - fn debug_id(&self) -> Option { + pub fn debug_id(&self) -> Option { match self { FileKey::MinifiedSource { debug_id, .. } => *debug_id, FileKey::SourceMap { debug_id, .. } => *debug_id, @@ -413,7 +407,7 @@ impl FileKey { } /// Returns the type of the file this key represents. - fn as_type(&self) -> SourceFileType { + pub fn as_type(&self) -> SourceFileType { match self { FileKey::MinifiedSource { .. } => SourceFileType::MinifiedSource, FileKey::SourceMap { .. } => SourceFileType::SourceMap, @@ -490,18 +484,19 @@ impl CachedFileEntry { /// the parts that we care about. #[derive(Clone)] pub struct CachedFile { - pub contents: ByteViewString, + pub contents: Option, sourcemap_url: Option>, } impl fmt::Debug for CachedFile { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let contents = if self.contents.len() > 64 { + let contents: &str = self.contents(); + let contents = if contents.len() > 64 { // its just `Debug` prints, but we would like the end of the file, as it may // have a `sourceMappingURL` - format!("...{}", &self.contents[self.contents.len() - 61..]) + format!("...{}", &contents[contents.len() - 61..]) } else { - self.contents.to_string() + contents.to_string() }; f.debug_struct("CachedFile") .field("contents", &contents) @@ -532,7 +527,7 @@ impl CachedFile { .into_contents() .ok_or_else(|| CacheError::Malformed("descriptor should have `contents`".into()))? .into_owned(); - let contents = ByteViewString::from(contents); + let contents = Some(ByteViewString::from(contents)); Ok(Self { contents, @@ -540,12 +535,22 @@ impl CachedFile { }) } + pub fn contents(&self) -> &str { + self.contents.as_deref().unwrap_or_default() + } + + pub fn owned_contents(&self) -> ByteViewString { + self.contents + .clone() + .unwrap_or(ByteViewString::from(String::new())) + } + /// Returns a string representation of a SourceMap URL if it was coming from a remote resource. pub fn sourcemap_url(&self) -> Option { self.sourcemap_url .as_ref() .and_then(|sm| match sm.as_ref() { - SourceMapUrl::Remote(url) => Some(url.to_string()), + SourceMapUrl::Remote(url) => Some(url.clone()), SourceMapUrl::Data(_) => None, }) } @@ -558,6 +563,8 @@ struct IndividualArtifact { resolved_with: ResolvedWith, } +pub type ArtifactBundles = BTreeMap>; + struct ArtifactFetcher { metrics: JsMetrics, @@ -586,7 +593,7 @@ struct ArtifactFetcher { scraping: ScrapingConfig, /// The set of all the artifact bundles that we have downloaded so far. - artifact_bundles: BTreeMap>, + artifact_bundles: ArtifactBundles, /// The set of individual artifacts, by their `url`. individual_artifacts: HashMap, @@ -634,7 +641,7 @@ impl ArtifactFetcher { Some(SourceMapUrl::Data(data)) => CachedFileEntry { uri: CachedFileUri::Embedded, entry: Ok(CachedFile { - contents: data.clone(), + contents: Some(data.clone()), sourcemap_url: None, }), resolved_with: minified_source.resolved_with, @@ -940,7 +947,7 @@ impl ArtifactFetcher { .push(JsScrapingAttempt::success(abs_path.to_owned())); Ok(CachedFile { - contents, + contents: Some(contents), sourcemap_url, }) } @@ -966,10 +973,20 @@ impl ArtifactFetcher { } // First see if we have already cached this file for any of this event's bundles. - if let Some(file_entry) = self + if let Some((bundle_uri, file_entry, resolved_with)) = self .files_in_bundles .try_get(self.artifact_bundles.keys().rev().cloned(), key.clone()) { + // we would like to gather metrics for which method we used to resolve the artifact bundle + // containing the file. we should also be doing so if we got the file from the cache. + if let Some(Ok((_, bundle_resolved_with))) = self.artifact_bundles.get(&bundle_uri) { + self.metrics.record_file_found_in_bundle( + key.as_type(), + resolved_with, + *bundle_resolved_with, + ); + } + return Some(file_entry); } @@ -993,7 +1010,12 @@ impl ArtifactFetcher { entry: CachedFile::from_descriptor(key.abs_path(), descriptor), resolved_with: ResolvedWith::DebugId, }; - self.files_in_bundles.insert(bundle_uri, key, &file_entry); + self.files_in_bundles.insert( + bundle_uri, + key, + ResolvedWith::DebugId, + &file_entry, + ); return Some(file_entry); } } @@ -1019,7 +1041,12 @@ impl ArtifactFetcher { entry: CachedFile::from_descriptor(Some(abs_path), descriptor), resolved_with: *resolved_with, }; - self.files_in_bundles.insert(bundle_uri, key, &file_entry); + self.files_in_bundles.insert( + bundle_uri, + key, + ResolvedWith::Url, + &file_entry, + ); return Some(file_entry); } } @@ -1067,7 +1094,7 @@ impl ArtifactFetcher { // Get the sourcemap reference from the artifact, either from metadata, or file contents let sourcemap_url = resolve_sourcemap_url(abs_path, &artifact.headers, &contents); CachedFile { - contents, + contents: Some(contents), sourcemap_url: sourcemap_url.map(Arc::new), } }), @@ -1202,18 +1229,7 @@ impl ArtifactFetcher { let object_handle = ObjectMetaHandle::for_scoped_file(self.scope.clone(), file); let fetched_bundle = self.objects.fetch(object_handle).await?; - - SelfCell::try_new(fetched_bundle, |handle| unsafe { - match (*handle).object() { - Object::SourceBundle(source_bundle) => source_bundle - .debug_session() - .map_err(CacheError::from_std_error), - obj => { - tracing::error!("expected a `SourceBundle`, got `{}`", obj.file_format()); - Err(CacheError::InternalError) - } - } - }) + open_bundle(fetched_bundle) } #[tracing::instrument(skip_all)] @@ -1247,8 +1263,7 @@ impl ArtifactFetcher { let smcache = match &source.entry { Ok(source_entry) => match sourcemap.entry { Ok(sourcemap_entry) => { - let source_content = source_entry.contents.clone(); - let sourcemap_content = sourcemap_entry.contents; + let source_content = source_entry.owned_contents(); let cache_key = { let mut cache_key = CacheKey::scoped_builder(&self.scope); @@ -1263,15 +1278,25 @@ impl ArtifactFetcher { &mut cache_key, "sourcemap", &sourcemap.uri, - sourcemap_content.as_ref(), + sourcemap_entry.contents().as_bytes(), ); cache_key.build() }; + let sourcemap = SourceMapContents::from_cachedfile( + &self.artifact_bundles, + &sourcemap.uri, + sourcemap_entry, + ) + .unwrap_or_else(|| { + tracing::error!("expected either a `Bundled` or `Resolved` SourceMap"); + SourceMapContents::Resolved(ByteViewString::from(String::new())) + }); + let req = FetchSourceMapCacheInternal { source: source_content, - sourcemap: sourcemap_content, + sourcemap, }; self.sourcemap_caches.compute_memoized(req, cache_key).await } @@ -1292,46 +1317,18 @@ impl ArtifactFetcher { } } -#[derive(Clone, Debug)] -pub struct FetchSourceMapCacheInternal { - source: ByteViewString, - sourcemap: ByteViewString, -} - -impl CacheItemRequest for FetchSourceMapCacheInternal { - type Item = OwnedSourceMapCache; - - const VERSIONS: CacheVersions = SOURCEMAP_CACHE_VERSIONS; - - fn compute<'a>(&'a self, temp_file: &'a mut NamedTempFile) -> BoxFuture<'a, CacheEntry> { - Box::pin(async move { - write_sourcemap_cache(temp_file.as_file_mut(), &self.source, &self.sourcemap) - }) - } - - fn load(&self, data: ByteView<'static>) -> CacheEntry { - parse_sourcemap_cache_owned(data) - } -} - -fn parse_sourcemap_cache_owned(byteview: ByteView<'static>) -> CacheEntry { - SelfCell::try_new(byteview, |p| unsafe { - SourceMapCache::parse(&*p).map_err(CacheError::from_std_error) +/// This opens `artifact_bundle` [`Object`], ensuring that it is a [`SourceBundle`](`Object::SourceBundle`), +/// and opening up a debug session to it. +pub fn open_bundle(artifact_bundle: Arc) -> CacheEntry { + SelfCell::try_new(artifact_bundle, |handle| unsafe { + match (*handle).object() { + Object::SourceBundle(source_bundle) => source_bundle + .debug_session() + .map_err(CacheError::from_std_error), + obj => { + tracing::error!("expected a `SourceBundle`, got `{}`", obj.file_format()); + Err(CacheError::InternalError) + } + } }) } - -/// Computes and writes the SourceMapCache. -#[tracing::instrument(skip_all)] -fn write_sourcemap_cache(file: &mut File, source: &str, sourcemap: &str) -> CacheEntry { - tracing::debug!("Converting SourceMap cache"); - - let smcache_writer = SourceMapCacheWriter::new(source, sourcemap) - .map_err(|err| CacheError::Malformed(err.to_string()))?; - - let mut writer = BufWriter::new(file); - smcache_writer.serialize(&mut writer)?; - let file = writer.into_inner().map_err(io::Error::from)?; - file.sync_all()?; - - Ok(()) -} diff --git a/crates/symbolicator-js/src/service.rs b/crates/symbolicator-js/src/service.rs index 4e5609f0e..65d4e65f1 100644 --- a/crates/symbolicator-js/src/service.rs +++ b/crates/symbolicator-js/src/service.rs @@ -11,7 +11,7 @@ use symbolicator_service::services::SharedServices; use crate::api_lookup::SentryLookupApi; use crate::bundle_index_cache::BundleIndexCache; use crate::bundle_lookup::FileInBundleCache; -use crate::lookup::FetchSourceMapCacheInternal; +use crate::sourcemap_cache::FetchSourceMapCacheInternal; #[derive(Debug, Clone)] pub struct SourceMapService { diff --git a/crates/symbolicator-js/src/sourcemap_cache.rs b/crates/symbolicator-js/src/sourcemap_cache.rs new file mode 100644 index 000000000..15471428e --- /dev/null +++ b/crates/symbolicator-js/src/sourcemap_cache.rs @@ -0,0 +1,130 @@ +use std::fs::File; +use std::io::{self, BufWriter}; +use std::sync::Arc; + +use futures::future::BoxFuture; +use symbolic::common::{ByteView, SelfCell}; +use symbolic::debuginfo::sourcebundle::SourceFileDescriptor; +use symbolic::sourcemapcache::{SourceMapCache, SourceMapCacheWriter}; +use symbolicator_service::caches::versions::SOURCEMAP_CACHE_VERSIONS; +use symbolicator_service::caches::ByteViewString; +use symbolicator_service::caching::{CacheEntry, CacheError, CacheItemRequest, CacheVersions}; +use symbolicator_service::objects::ObjectHandle; +use tempfile::NamedTempFile; + +use crate::lookup::{ + open_bundle, ArtifactBundle, ArtifactBundles, CachedFile, CachedFileUri, FileKey, + OwnedSourceMapCache, +}; +use crate::utils::get_release_file_candidate_urls; + +#[derive(Clone, Debug)] +pub enum SourceMapContents { + /// The contents have to be fetched from the given [`ArtifactBundle`]. + FromBundle(Arc, FileKey), + /// The contents of the sourcemap have already been resolved. + Resolved(ByteViewString), +} + +impl SourceMapContents { + pub fn from_cachedfile( + artifact_bundles: &ArtifactBundles, + sourcemap_uri: &CachedFileUri, + sourcemap: CachedFile, + ) -> Option { + if let Some(contents) = sourcemap.contents { + return Some(Self::Resolved(contents)); + } + + let CachedFileUri::Bundled(bundle_uri, key) = sourcemap_uri else { + return None; + }; + + let (bundle, _) = artifact_bundles.get(bundle_uri)?.as_ref().ok()?; + let bundle = bundle.owner().clone(); + let contents = Self::FromBundle(bundle, key.clone()); + Some(contents) + } +} + +#[derive(Clone, Debug)] +pub struct FetchSourceMapCacheInternal { + pub source: ByteViewString, + pub sourcemap: SourceMapContents, +} + +impl CacheItemRequest for FetchSourceMapCacheInternal { + type Item = OwnedSourceMapCache; + + const VERSIONS: CacheVersions = SOURCEMAP_CACHE_VERSIONS; + + fn compute<'a>(&'a self, temp_file: &'a mut NamedTempFile) -> BoxFuture<'a, CacheEntry> { + Box::pin(async move { + let sourcemap = match &self.sourcemap { + SourceMapContents::FromBundle(bundle, key) => { + let bundle = open_bundle(bundle.clone())?; + let descriptor = get_descriptor_from_bundle(&bundle, key); + + let contents = descriptor + .and_then(|d| d.into_contents()) + .ok_or_else(|| { + CacheError::Malformed("descriptor should have `contents`".into()) + })? + .into_owned(); + ByteViewString::from(contents) + } + SourceMapContents::Resolved(contents) => contents.clone(), + }; + + write_sourcemap_cache(temp_file.as_file_mut(), &self.source, &sourcemap) + }) + } + + fn load(&self, data: ByteView<'static>) -> CacheEntry { + parse_sourcemap_cache_owned(data) + } +} + +fn parse_sourcemap_cache_owned(byteview: ByteView<'static>) -> CacheEntry { + SelfCell::try_new(byteview, |p| unsafe { + SourceMapCache::parse(&*p).map_err(CacheError::from_std_error) + }) +} + +/// Computes and writes the SourceMapCache. +#[tracing::instrument(skip_all)] +fn write_sourcemap_cache(file: &mut File, source: &str, sourcemap: &str) -> CacheEntry { + tracing::debug!("Converting SourceMap cache"); + + let smcache_writer = SourceMapCacheWriter::new(source, sourcemap) + .map_err(|err| CacheError::Malformed(err.to_string()))?; + + let mut writer = BufWriter::new(file); + smcache_writer.serialize(&mut writer)?; + let file = writer.into_inner().map_err(io::Error::from)?; + file.sync_all()?; + + Ok(()) +} + +fn get_descriptor_from_bundle<'b>( + bundle: &'b ArtifactBundle, + key: &FileKey, +) -> Option> { + let bundle = bundle.get(); + let ty = key.as_type(); + + if let Some(debug_id) = key.debug_id() { + if let Ok(Some(descriptor)) = bundle.source_by_debug_id(debug_id, ty) { + return Some(descriptor); + } + } + if let Some(abs_path) = key.abs_path() { + for url in get_release_file_candidate_urls(abs_path) { + if let Ok(Some(descriptor)) = bundle.source_by_url(&url) { + return Some(descriptor); + } + } + } + None +} diff --git a/crates/symbolicator-js/src/symbolication.rs b/crates/symbolicator-js/src/symbolication.rs index af3e79029..ad9e690ff 100644 --- a/crates/symbolicator-js/src/symbolication.rs +++ b/crates/symbolicator-js/src/symbolication.rs @@ -121,7 +121,7 @@ async fn symbolicate_js_frame( match &module.minified_source.entry { Ok(minified_source) => { if should_apply_source_context { - apply_source_context(raw_frame, &minified_source.contents)? + apply_source_context(raw_frame, minified_source.contents())? } } Err(CacheError::DownloadError(msg)) if msg == "Scraping disabled" => { @@ -267,7 +267,7 @@ async fn symbolicate_js_frame( if source_file .as_ref() .map_err(|_| JsModuleErrorKind::MissingSource) - .and_then(|file| apply_source_context(&mut frame, &file.contents)) + .and_then(|file| apply_source_context(&mut frame, file.contents())) .is_err() { // It's arguable whether we should collect it, but this is what monolith does now,