diff --git a/Cargo.lock b/Cargo.lock index 9de607b96..c69a1b265 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4221,6 +4221,7 @@ dependencies = [ "serde_json", "symbolic", "symbolicator-crash", + "symbolicator-js", "symbolicator-service", "symbolicator-sources", "symbolicator-test", @@ -4246,6 +4247,32 @@ dependencies = [ "cmake", ] +[[package]] +name = "symbolicator-js" +version = "23.9.1" +dependencies = [ + "data-url", + "futures", + "humantime", + "insta", + "moka", + "once_cell", + "regex", + "reqwest", + "sentry", + "serde", + "serde_json", + "sha2", + "symbolic", + "symbolicator-service", + "symbolicator-sources", + "symbolicator-test", + "tempfile", + "tokio", + "tracing", + "url", +] + [[package]] name = "symbolicator-service" version = "23.9.1" @@ -4330,6 +4357,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", + "symbolicator-js", "symbolicator-service", "symbolicator-test", "tempfile", @@ -4368,6 +4396,7 @@ dependencies = [ "serde_json", "serde_yaml", "symbolic", + "symbolicator-js", "symbolicator-service", "symbolicator-sources", "tempfile", diff --git a/crates/symbolicator-js/Cargo.toml b/crates/symbolicator-js/Cargo.toml new file mode 100644 index 000000000..3e66f42c9 --- /dev/null +++ b/crates/symbolicator-js/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "symbolicator-js" +publish = false +version = "23.9.1" +authors = ["Sentry "] +edition = "2021" +license = "MIT" + +[dependencies] +data-url = "0.3.0" +futures = "0.3.12" +humantime = "2.1.0" +moka = { version = "0.12.1", features = ["future", "sync"] } +once_cell = "1.17.1" +regex = "1.5.5" +reqwest = { version = "0.11.0", features = ["gzip", "brotli", "deflate", "json", "stream", "trust-dns"] } +sentry = { version = "0.31.7", features = ["tracing"] } +serde = { version = "1.0.137", features = ["derive", "rc"] } +serde_json = "1.0.81" +sha2 = "0.10.6" +symbolic = { version = "12.4.0", features = ["common-serde", "sourcemapcache"] } +symbolicator-service = { path = "../symbolicator-service" } +symbolicator-sources = { path = "../symbolicator-sources" } +tempfile = "3.2.0" +tokio = { version = "1.24.2", features = ["rt", "macros", "fs"] } +tracing = "0.1.34" +url = { version = "2.2.0", features = ["serde"] } + +[dev-dependencies] +insta = { version = "1.18.0", features = ["redactions", "yaml"] } +symbolicator-test = { path = "../symbolicator-test" } diff --git a/crates/symbolicator-js/src/api_lookup.rs b/crates/symbolicator-js/src/api_lookup.rs new file mode 100644 index 000000000..122e83bda --- /dev/null +++ b/crates/symbolicator-js/src/api_lookup.rs @@ -0,0 +1,233 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::fmt; +use std::sync::Arc; +use std::time::Duration; + +use sentry::types::DebugId; +use sentry::SentryFutureExt; +use serde::Deserialize; +use symbolicator_service::metric; +use symbolicator_service::services::download::retry; +use symbolicator_service::services::download::sentry::{SearchQuery, SentryDownloader}; +use url::Url; + +use symbolicator_service::caching::{CacheEntry, CacheError}; +use symbolicator_service::config::InMemoryCacheConfig; +use symbolicator_service::utils::futures::{m, measure, CancelOnDrop}; +use symbolicator_service::utils::http::DownloadTimeouts; +use symbolicator_sources::{RemoteFile, SentryFileId, SentryRemoteFile, SentrySourceConfig}; + +use crate::interface::ResolvedWith; + +#[derive(Clone, Debug, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +enum RawJsLookupResult { + Bundle { + id: SentryFileId, + url: Url, + #[serde(default)] + resolved_with: ResolvedWith, + }, + File { + id: SentryFileId, + url: Url, + abs_path: String, + #[serde(default)] + headers: ArtifactHeaders, + #[serde(default)] + resolved_with: ResolvedWith, + }, +} + +pub type ArtifactHeaders = BTreeMap; + +/// The Result of looking up JS Artifacts. +#[derive(Clone, Debug)] +pub enum JsLookupResult { + /// This is an `ArtifactBundle`. + ArtifactBundle { + /// The [`RemoteFile`] to download this bundle from. + remote_file: RemoteFile, + resolved_with: ResolvedWith, + }, + /// This is an individual artifact file. + IndividualArtifact { + /// The [`RemoteFile`] to download this artifact from. + remote_file: RemoteFile, + /// The absolute path (also called `url`) of the artifact. + abs_path: String, + /// Arbitrary headers of this file, such as a `Sourcemap` reference. + headers: ArtifactHeaders, + resolved_with: ResolvedWith, + }, +} + +/// An LRU Cache for Sentry JS Artifact lookups. +type SentryJsCache = moka::future::Cache>>; + +pub struct SentryLookupApi { + client: reqwest::Client, + runtime: tokio::runtime::Handle, + js_cache: SentryJsCache, + timeouts: DownloadTimeouts, +} + +impl fmt::Debug for SentryLookupApi { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SentryDownloader") + .field("js_cache", &self.js_cache.entry_count()) + .field("timeouts", &self.timeouts) + .finish() + } +} + +impl SentryLookupApi { + pub fn new( + client: reqwest::Client, + runtime: tokio::runtime::Handle, + timeouts: DownloadTimeouts, + in_memory: &InMemoryCacheConfig, + ) -> Self { + let js_cache = SentryJsCache::builder() + .max_capacity(in_memory.sentry_index_capacity) + .time_to_live(in_memory.sentry_index_ttl) + .build(); + Self { + client, + runtime, + js_cache, + timeouts, + } + } + + /// Look up a list of bundles or individual artifact files covering the + /// `debug_ids` and `file_stems` (using the `release` + `dist`). + pub async fn lookup_js_artifacts( + &self, + source: Arc, + debug_ids: BTreeSet, + file_stems: BTreeSet, + release: Option<&str>, + dist: Option<&str>, + ) -> CacheEntry> { + let mut lookup_url = source.url.clone(); + { + let mut query = lookup_url.query_pairs_mut(); + + if let Some(release) = release { + query.append_pair("release", release); + + // A `url` is only valid in combination with a `release`. + for file_stem in file_stems { + query.append_pair("url", &file_stem); + } + } + if let Some(dist) = dist { + query.append_pair("dist", dist); + } + for debug_id in debug_ids { + query.append_pair("debug_id", &debug_id.to_string()); + } + } + + // NOTE: `http::Uri` has a hard limit defined, and reqwest unconditionally unwraps such + // errors, when converting between `Url` to `Uri`. To avoid a panic in that case, we + // duplicate the check here to gracefully error out. + if lookup_url.as_str().len() > (u16::MAX - 1) as usize { + return Err(CacheError::DownloadError("uri too long".into())); + } + + let query = SearchQuery { + index_url: lookup_url, + token: source.token.clone(), + }; + + metric!(counter("source.sentry.js_lookup.access") += 1); + + let init = Box::pin(async { + metric!(counter("source.sentry.js_lookup.computation") += 1); + tracing::debug!( + "Fetching list of Sentry JS artifacts from {}", + &query.index_url + ); + + let future = { + let client = self.client.clone(); + let query = query.clone(); + async move { retry(|| SentryDownloader::fetch_sentry_json(&client, &query)).await } + }; + + let future = + CancelOnDrop::new(self.runtime.spawn(future.bind_hub(sentry::Hub::current()))); + + let timeout = Duration::from_secs(30); + let future = tokio::time::timeout(timeout, future); + let future = measure( + "service.download.lookup_js_artifacts", + m::timed_result, + future, + ); + + future + .await + .map_err(|_| CacheError::Timeout(timeout))? + .map_err(|_| CacheError::InternalError)? + }); + + let entries = self + .js_cache + .entry_by_ref(&query) + .or_insert_with_if(init, |entry| entry.is_err()) + .await + .into_value()?; + + let results = entries + .iter() + .map(|raw| match raw { + RawJsLookupResult::Bundle { + id, + url, + resolved_with, + } => JsLookupResult::ArtifactBundle { + remote_file: make_remote_file(&source, id, url), + resolved_with: *resolved_with, + }, + RawJsLookupResult::File { + id, + url, + abs_path, + headers, + resolved_with, + } => JsLookupResult::IndividualArtifact { + remote_file: make_remote_file(&source, id, url), + abs_path: abs_path.clone(), + headers: headers.clone(), + resolved_with: *resolved_with, + }, + }) + .collect(); + Ok(results) + } +} + +/// Transforms the given `url` into a [`RemoteFile`]. +/// +/// The problem here is being forward-compatible to a future in which the Sentry API returns +/// pre-authenticated Urls on some external file storage service. +/// Whereas right now, these files are still being served from a Sentry API endpoint, which +/// needs to be authenticated via a `token` that we do not want to leak to any public Url, as +/// well as using a restricted IP that is being blocked for arbitrary HTTP files. +fn make_remote_file( + source: &Arc, + file_id: &SentryFileId, + url: &Url, +) -> RemoteFile { + let use_credentials = url.as_str().starts_with(source.url.as_str()); + SentryRemoteFile::new( + Arc::clone(source), + use_credentials, + file_id.clone(), + Some(url.clone()), + ) + .into() +} diff --git a/crates/symbolicator-service/src/services/bundle_index.rs b/crates/symbolicator-js/src/bundle_index.rs similarity index 100% rename from crates/symbolicator-service/src/services/bundle_index.rs rename to crates/symbolicator-js/src/bundle_index.rs diff --git a/crates/symbolicator-service/src/services/caches/bundle_index.rs b/crates/symbolicator-js/src/bundle_index_cache.rs similarity index 87% rename from crates/symbolicator-service/src/services/caches/bundle_index.rs rename to crates/symbolicator-js/src/bundle_index_cache.rs index 2a7aa567a..337e0d254 100644 --- a/crates/symbolicator-service/src/services/caches/bundle_index.rs +++ b/crates/symbolicator-js/src/bundle_index_cache.rs @@ -1,17 +1,16 @@ use std::sync::Arc; use symbolic::common::ByteView; -use symbolicator_sources::RemoteFile; - -use crate::caching::{ +use symbolicator_service::caching::{ Cache, CacheEntry, CacheItemRequest, CacheKey, CacheVersions, Cacher, SharedCacheRef, }; -use crate::services::bundle_index::BundleIndex; -use crate::services::download::DownloadService; -use crate::services::fetch_file; -use crate::types::Scope; +use symbolicator_service::services::caches::versions::BUNDLE_INDEX_CACHE_VERSIONS; +use symbolicator_service::services::download::DownloadService; +use symbolicator_service::services::fetch_file; +use symbolicator_service::types::Scope; +use symbolicator_sources::RemoteFile; -use super::versions::BUNDLE_INDEX_CACHE_VERSIONS; +use crate::bundle_index::BundleIndex; type BundleIndexCacheItem = (u32, Arc); diff --git a/crates/symbolicator-js/src/interface.rs b/crates/symbolicator-js/src/interface.rs new file mode 100644 index 000000000..60a3a0b90 --- /dev/null +++ b/crates/symbolicator-js/src/interface.rs @@ -0,0 +1,273 @@ +use std::collections::HashSet; +use std::fmt; +use std::sync::Arc; + +use reqwest::Url; +use serde::{Deserialize, Serialize}; + +use symbolicator_service::caching::CacheError; +use symbolicator_service::services::ScrapingConfig; +use symbolicator_service::types::{RawObjectInfo, Scope}; +use symbolicator_sources::{SentryFileId, SentrySourceConfig}; + +#[derive(Debug, Clone)] +pub struct SymbolicateJsStacktraces { + pub scope: Scope, + pub source: Arc, + pub release: Option, + pub dist: Option, + pub debug_id_index: Option, + pub url_index: Option, + pub stacktraces: Vec, + pub modules: Vec, + pub scraping: ScrapingConfig, + /// Whether to apply source context for the stack frames. + pub apply_source_context: bool, +} + +// Some of the renames are there only to make it synchronized +// with the already existing monolith naming scheme. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "type")] +pub enum JsModuleErrorKind { + InvalidLocation { line: u32, col: Option }, + InvalidAbsPath, + NoColumn, + MissingSourceContent { source: String, sourcemap: String }, + MissingSource, + MalformedSourcemap { url: String }, + MissingSourcemap, + InvalidBase64Sourcemap, + ScrapingDisabled, +} + +impl fmt::Display for JsModuleErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + JsModuleErrorKind::InvalidLocation { line, col } => { + write!(f, "Invalid source location")?; + match (line, col) { + (l, None) => write!(f, ": line:{l}")?, + (l, Some(c)) => write!(f, ": line:{l}, col:{c}")?, + } + Ok(()) + } + JsModuleErrorKind::InvalidAbsPath => write!(f, "Invalid absolute path"), + JsModuleErrorKind::NoColumn => write!(f, "No column information"), + JsModuleErrorKind::MissingSourceContent { source, sourcemap } => write!( + f, + "Missing source contents for source file {source} and sourcemap file {sourcemap}" + ), + JsModuleErrorKind::MissingSource => write!(f, "Missing source file"), + JsModuleErrorKind::MalformedSourcemap { url } => { + write!(f, "Sourcemap file at {url} is malformed") + } + JsModuleErrorKind::MissingSourcemap => write!(f, "Missing sourcemap file"), + JsModuleErrorKind::InvalidBase64Sourcemap => write!(f, "Invalid base64 sourcemap"), + JsModuleErrorKind::ScrapingDisabled => { + write!(f, "Could not download file because scraping is disabled") + } + } + } +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] +pub struct JsModuleError { + pub abs_path: String, + #[serde(flatten)] + pub kind: JsModuleErrorKind, +} + +/// An attempt to scrape a JS source or sourcemap file from the web. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct JsScrapingAttempt { + /// The URL we attempted to scrape from. + pub url: String, + /// The outcome of the attempt. + #[serde(flatten)] + pub result: JsScrapingResult, +} + +impl JsScrapingAttempt { + pub fn success(url: String) -> Self { + Self { + url, + result: JsScrapingResult::Success, + } + } + pub fn not_attempted(url: String) -> Self { + Self { + url, + result: JsScrapingResult::NotAttempted, + } + } + + pub fn failure(url: String, reason: JsScrapingFailureReason, details: String) -> Self { + Self { + url, + result: JsScrapingResult::Failure { reason, details }, + } + } +} + +/// The outcome of a scraping attempt. +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "status")] +pub enum JsScrapingResult { + /// We didn't actually attempt scraping because we already obtained the file + /// by another method. + NotAttempted, + /// The file was succesfully scraped. + Success, + /// The file couldn't be scraped. + Failure { + /// The basic reason for the failure. + reason: JsScrapingFailureReason, + #[serde(skip_serializing_if = "String::is_empty")] + /// A more detailed explanation of the failure. + details: String, + }, +} + +impl From for JsScrapingResult { + fn from(value: CacheError) -> Self { + let (reason, details) = match value { + CacheError::NotFound => (JsScrapingFailureReason::NotFound, String::new()), + CacheError::PermissionDenied(details) => { + (JsScrapingFailureReason::PermissionDenied, details) + } + CacheError::Timeout(duration) => ( + JsScrapingFailureReason::Timeout, + format!("Timeout after {}", humantime::format_duration(duration)), + ), + CacheError::DownloadError(details) => (JsScrapingFailureReason::DownloadError, details), + CacheError::Malformed(details) => (JsScrapingFailureReason::Other, details), + CacheError::InternalError => (JsScrapingFailureReason::Other, String::new()), + }; + + Self::Failure { reason, details } + } +} + +/// The basic reason a scraping attempt failed. +#[derive(Debug, Clone, Copy, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum JsScrapingFailureReason { + /// The file was not found at the given URL. + NotFound, + /// Scraping was disabled. + Disabled, + /// The URL was not in the list of allowed hosts or had + /// an invalid scheme. + InvalidHost, + /// Permission to access the file was denied. + PermissionDenied, + /// The scraping attempt timed out. + Timeout, + /// There was a non-timeout error while downloading. + DownloadError, + /// Catchall case. + /// + /// This probably can't actually happen. + Other, +} + +#[derive(Debug, Default, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub struct JsFrame { + #[serde(skip_serializing_if = "Option::is_none")] + pub function: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub filename: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub module: Option, + + pub abs_path: String, + + pub lineno: u32, + + #[serde(skip_serializing_if = "Option::is_none")] + pub colno: Option, + + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub pre_context: Vec, + + #[serde(skip_serializing_if = "Option::is_none")] + pub context_line: Option, + + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub post_context: Vec, + + #[serde(skip_serializing)] + pub token_name: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub in_app: Option, + + #[serde(default, skip_serializing_if = "JsFrameData::is_empty")] + pub data: JsFrameData, +} + +#[derive(Debug, Default, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub struct JsFrameData { + #[serde(skip_serializing_if = "Option::is_none")] + pub sourcemap: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub resolved_with: Option, + #[serde(default)] + pub symbolicated: bool, +} + +/// A marker indicating what a File was resolved with. +/// +/// This enum serves a double purpose, both marking how an individual file was found inside of a +/// bundle, as well as tracking through which method that bundle itself was found. +/// +#[derive(Debug, Default, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)] +#[serde(rename_all = "kebab-case")] +pub enum ResolvedWith { + /// Both: Found in a Bundle via DebugId + /// And: Found the Bundle via API Lookup via DebugId / Database Index + DebugId, + /// Found in a Bundle via Url matching + Url, + /// Found the Bundle via API Lookup via Database Index + Index, + /// Found the File in a Flat File / Bundle Index + BundleIndex, + /// Found the Bundle via API Lookup as an ArtifactBundle + Release, + /// Found the Bundle via API Lookup as a ReleaseFile + ReleaseOld, + /// Scraped the File from the Web + Scraping, + /// Unknown + #[default] + Unknown, +} + +impl JsFrameData { + pub fn is_empty(&self) -> bool { + *self == Self::default() + } +} + +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct JsStacktrace { + pub frames: Vec, +} + +#[derive(Debug, Default, Clone, Deserialize, Serialize)] +pub struct CompletedJsSymbolicationResponse { + pub stacktraces: Vec, + pub raw_stacktraces: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub errors: Vec, + #[serde(skip_serializing_if = "HashSet::is_empty")] + pub used_artifact_bundles: HashSet, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub scraping_attempts: Vec, +} diff --git a/crates/symbolicator-js/src/lib.rs b/crates/symbolicator-js/src/lib.rs new file mode 100644 index 000000000..9172406bd --- /dev/null +++ b/crates/symbolicator-js/src/lib.rs @@ -0,0 +1,11 @@ +mod api_lookup; +mod bundle_index; +mod bundle_index_cache; +pub mod interface; +mod lookup; +mod metrics; +mod service; +mod symbolication; +mod utils; + +pub use service::SourceMapService; diff --git a/crates/symbolicator-service/src/services/sourcemap_lookup.rs b/crates/symbolicator-js/src/lookup.rs similarity index 75% rename from crates/symbolicator-service/src/services/sourcemap_lookup.rs rename to crates/symbolicator-js/src/lookup.rs index 29aa5df4b..606fc5d35 100644 --- a/crates/symbolicator-service/src/services/sourcemap_lookup.rs +++ b/crates/symbolicator-js/src/lookup.rs @@ -46,24 +46,30 @@ use symbolicator_sources::{ }; use tempfile::NamedTempFile; -use crate::caching::{ +use symbolicator_service::caching::{ CacheEntry, CacheError, CacheItemRequest, CacheKey, CacheKeyBuilder, CacheVersions, Cacher, }; -use crate::services::download::DownloadService; -use crate::services::objects::ObjectMetaHandle; -use crate::services::symbolication::ScrapingConfig; -use crate::types::{JsScrapingAttempt, JsScrapingFailureReason, JsStacktrace, ResolvedWith, Scope}; -use crate::utils::http::is_valid_origin; - -use crate::js::JsMetrics; - -use super::bundle_index::BundleIndex; -use super::caches::versions::SOURCEMAP_CACHE_VERSIONS; -use super::caches::{BundleIndexCache, ByteViewString, SourceFilesCache}; -use super::download::sentry::{ArtifactHeaders, JsLookupResult}; -use super::objects::{ObjectHandle, ObjectsActor}; -use super::sourcemap::SourceMapService; -use super::symbolication::SymbolicateJsStacktraces; +use symbolicator_service::services::caches::versions::SOURCEMAP_CACHE_VERSIONS; +use symbolicator_service::services::caches::{ByteViewString, SourceFilesCache}; +use symbolicator_service::services::download::DownloadService; +use symbolicator_service::services::objects::{ObjectHandle, ObjectMetaHandle, ObjectsActor}; +use symbolicator_service::services::symbolication::ScrapingConfig; +use symbolicator_service::types::Scope; +use symbolicator_service::utils::http::is_valid_origin; + +use crate::api_lookup::{ArtifactHeaders, JsLookupResult, SentryLookupApi}; +use crate::bundle_index::BundleIndex; +use crate::bundle_index_cache::BundleIndexCache; +use crate::interface::{ + JsScrapingAttempt, JsScrapingFailureReason, JsStacktrace, ResolvedWith, + SymbolicateJsStacktraces, +}; +use crate::metrics::JsMetrics; +use crate::utils::{ + cache_busting_key, extract_file_stem, get_release_file_candidate_urls, join_paths, + resolve_sourcemap_url, +}; +use crate::SourceMapService; pub type OwnedSourceMapCache = SelfCell, SourceMapCache<'static>>; @@ -162,6 +168,7 @@ impl SourceMapLookup { bundle_index_cache, sourcemap_caches, download_svc, + api_lookup, } = service; let SymbolicateJsStacktraces { @@ -209,6 +216,7 @@ impl SourceMapLookup { objects, sourcefiles_cache, sourcemap_caches, + api_lookup, download_svc, scope, @@ -319,74 +327,12 @@ impl SourceMapLookup { } } -/// Joins the `right` path to the `base` path, taking care of our special `~/` prefix that is treated just -/// like an absolute url. -pub fn join_paths(base: &str, right: &str) -> String { - if right.contains("://") || right.starts_with("webpack:") { - return right.into(); - } - - let (scheme, rest) = base.split_once("://").unwrap_or(("file", base)); - - let right = right.strip_prefix('~').unwrap_or(right); - // the right path is absolute: - if right.starts_with('/') { - if scheme == "file" { - return right.into(); - } - // a leading `//` means we are skipping the hostname - if let Some(right) = right.strip_prefix("//") { - return format!("{scheme}://{right}"); - } - let hostname = rest.split('/').next().unwrap_or(rest); - return format!("{scheme}://{hostname}{right}"); - } - - let mut final_path = String::new(); - - let mut left_iter = rest.split('/').peekable(); - // add the scheme/hostname - if scheme != "file" { - let hostname = left_iter.next().unwrap_or_default(); - write!(final_path, "{scheme}://{hostname}").unwrap(); - } else if left_iter.peek() == Some(&"") { - // pop a leading `/` - let _ = left_iter.next(); - } - - // pop the basename from the back - let _ = left_iter.next_back(); - - let mut segments: Vec<_> = left_iter.collect(); - let is_http = scheme == "http" || scheme == "https"; - let mut is_first_segment = true; - for right_segment in right.split('/') { - if right_segment == ".." && (segments.pop().is_some() || is_http) { - continue; - } - if right_segment == "." && (is_http || is_first_segment) { - continue; - } - is_first_segment = false; - - segments.push(right_segment); - } - - for seg in segments { - // FIXME: do we want to skip all the `.` fragments as well? - if !seg.is_empty() { - write!(final_path, "/{seg}").unwrap(); - } - } - final_path -} - /// A URL to a sourcemap file. /// /// May either be a conventional URL or a data URL containing the sourcemap /// encoded as BASE64. #[derive(Clone, PartialEq)] -enum SourceMapUrl { +pub enum SourceMapUrl { Data(ByteViewString), Remote(String), } @@ -405,7 +351,7 @@ impl SourceMapUrl { /// /// If it starts with `"data:"`, it is parsed as a data-URL that is base64 or url-encoded. /// Otherwise, the string is joined to the `base` URL. - fn parse_with_prefix(base: &str, url_string: &str) -> CacheEntry { + pub fn parse_with_prefix(base: &str, url_string: &str) -> CacheEntry { if url_string.starts_with("data:") { let decoded = data_url::DataUrl::process(url_string) .map_err(|_| ()) @@ -618,6 +564,7 @@ struct ArtifactFetcher { sourcefiles_cache: Arc, sourcemap_caches: Arc>, download_svc: Arc, + api_lookup: Arc, // source config scope: Scope, @@ -1165,7 +1112,7 @@ impl ArtifactFetcher { self.metrics.api_requests += 1; let results = match self - .download_svc + .api_lookup .lookup_js_artifacts( self.source.clone(), debug_ids, @@ -1342,99 +1289,6 @@ impl ArtifactFetcher { } } -/// Strips the hostname (or leading tilde) from the `path` and returns the path following the -/// hostname, with a leading `/`. -pub fn strip_hostname(path: &str) -> &str { - if let Some(after_tilde) = path.strip_prefix('~') { - return after_tilde; - } - - if let Some((_scheme, rest)) = path.split_once("://") { - return rest.find('/').map(|idx| &rest[idx..]).unwrap_or(rest); - } - path -} - -/// Extracts a "file stem" from a path. -/// This is the `"/path/to/file"` in `"./path/to/file.min.js?foo=bar"`. -/// We use the most generic variant instead here, as server-side filtering is using a partial -/// match on the whole artifact path, thus `index.js` will be fetched no matter it's stored -/// as `~/index.js`, `~/index.js?foo=bar`, `http://example.com/index.js`, -/// or `http://example.com/index.js?foo=bar`. -// NOTE: We do want a leading slash to be included, eg. `/bundle/app.js` or `/index.js`, -// as it's not possible to use artifacts without proper host or `~/` wildcard. -fn extract_file_stem(path: &str) -> String { - let path = strip_hostname(path); - - path.rsplit_once('/') - .map(|(prefix, name)| { - // trim query strings and fragments - let name = name.split_once('?').map(|(name, _)| name).unwrap_or(name); - let name = name.split_once('#').map(|(name, _)| name).unwrap_or(name); - - // then, trim all the suffixes as often as they occurr - let name = trim_all_end_matches(name, FILE_SUFFIX_PATTERNS); - - format!("{prefix}/{name}") - }) - .unwrap_or(path.to_owned()) -} - -const FILE_SUFFIX_PATTERNS: &[&str] = &[ - ".min", ".js", ".map", ".cjs", ".mjs", ".ts", ".d", ".jsx", ".tsx", -]; - -/// Trims the different `patterns` from the end of the `input` string as often as possible. -fn trim_all_end_matches<'a>(mut input: &'a str, patterns: &[&str]) -> &'a str { - loop { - let mut trimmed = input; - for pattern in patterns { - trimmed = trimmed.trim_end_matches(pattern); - } - if trimmed == input { - return trimmed; - } - input = trimmed; - } -} - -/// Transforms a full absolute url into 2 or 4 generalized options. -// Based on `ReleaseFile.normalize`, see: -// https://github.com/getsentry/sentry/blob/master/src/sentry/models/releasefile.py -fn get_release_file_candidate_urls(url: &str) -> impl Iterator { - let url = url.split('#').next().unwrap_or(url); - let relative = strip_hostname(url); - - let urls = [ - // Absolute without fragment - Some(url.to_string()), - // Absolute without query - url.split_once('?').map(|s| s.0.to_string()), - // Relative without fragment - Some(format!("~{relative}")), - // Relative without query - relative.split_once('?').map(|s| format!("~{}", s.0)), - ]; - - urls.into_iter().flatten() -} - -/// Joins together frames `abs_path` and discovered sourcemap reference. -fn resolve_sourcemap_url( - abs_path: &str, - artifact_headers: &ArtifactHeaders, - artifact_source: &str, -) -> Option { - if let Some(header) = artifact_headers.get("sourcemap") { - SourceMapUrl::parse_with_prefix(abs_path, header).ok() - } else if let Some(header) = artifact_headers.get("x-sourcemap") { - SourceMapUrl::parse_with_prefix(abs_path, header).ok() - } else { - let sm_ref = discover_sourcemaps_location(artifact_source)?; - SourceMapUrl::parse_with_prefix(abs_path, sm_ref).ok() - } -} - #[derive(Clone, Debug)] pub struct FetchSourceMapCacheInternal { source: ByteViewString, @@ -1478,269 +1332,3 @@ fn write_sourcemap_cache(file: &mut File, source: &str, sourcemap: &str) -> Cach Ok(()) } - -/// This will truncate the `timestamp` to a multiple of `refresh_every`, using a stable offset -/// derived from `url` to avoid having the same cutoff for every single `url`. -fn cache_busting_key(url: &str, timestamp: u64, refresh_every: u64) -> u64 { - let url_hash = Sha256::digest(url); - let url_hash = - u64::from_le_bytes(<[u8; 8]>::try_from(&url_hash[..8]).expect("sha256 outputs >8 bytes")); - - let offset = url_hash % refresh_every; - ((timestamp - offset) / refresh_every * refresh_every) + offset -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_cache_busting_key() { - // the hashed offset for this url is `39` - let url = "https://example.com/foo.js"; - - let timestamp = 1000; - let refresh_every = 100; - - let key = cache_busting_key(url, timestamp, refresh_every); - assert_eq!(key, 939); - let key = cache_busting_key(url, timestamp + 38, refresh_every); - assert_eq!(key, 939); - let key = cache_busting_key(url, timestamp + 40, refresh_every); - assert_eq!(key, 1039); - let key = cache_busting_key(url, timestamp + 100, refresh_every); - assert_eq!(key, 1039); - } - - #[test] - fn test_strip_hostname() { - assert_eq!(strip_hostname("/absolute/unix/path"), "/absolute/unix/path"); - assert_eq!(strip_hostname("~/with/tilde"), "/with/tilde"); - assert_eq!(strip_hostname("https://example.com/"), "/"); - assert_eq!( - strip_hostname("https://example.com/some/path/file.js"), - "/some/path/file.js" - ); - } - - #[test] - fn test_get_release_file_candidate_urls() { - let url = "https://example.com/assets/bundle.min.js"; - let expected = &[ - "https://example.com/assets/bundle.min.js", - "~/assets/bundle.min.js", - ]; - let actual: Vec<_> = get_release_file_candidate_urls(url).collect(); - assert_eq!(&actual, expected); - - let url = "https://example.com/assets/bundle.min.js?foo=1&bar=baz"; - let expected = &[ - "https://example.com/assets/bundle.min.js?foo=1&bar=baz", - "https://example.com/assets/bundle.min.js", - "~/assets/bundle.min.js?foo=1&bar=baz", - "~/assets/bundle.min.js", - ]; - let actual: Vec<_> = get_release_file_candidate_urls(url).collect(); - assert_eq!(&actual, expected); - - let url = "https://example.com/assets/bundle.min.js#wat"; - let expected = &[ - "https://example.com/assets/bundle.min.js", - "~/assets/bundle.min.js", - ]; - let actual: Vec<_> = get_release_file_candidate_urls(url).collect(); - assert_eq!(&actual, expected); - - let url = "https://example.com/assets/bundle.min.js?foo=1&bar=baz#wat"; - let expected = &[ - "https://example.com/assets/bundle.min.js?foo=1&bar=baz", - "https://example.com/assets/bundle.min.js", - "~/assets/bundle.min.js?foo=1&bar=baz", - "~/assets/bundle.min.js", - ]; - let actual: Vec<_> = get_release_file_candidate_urls(url).collect(); - assert_eq!(&actual, expected); - - let url = "app:///_next/server/pages/_error.js"; - let expected = &[ - "app:///_next/server/pages/_error.js", - "~/_next/server/pages/_error.js", - ]; - let actual: Vec<_> = get_release_file_candidate_urls(url).collect(); - assert_eq!(&actual, expected); - } - - #[test] - fn test_extract_file_stem() { - let url = "https://example.com/bundle.js"; - assert_eq!(extract_file_stem(url), "/bundle"); - - let url = "https://example.com/bundle.min.js"; - assert_eq!(extract_file_stem(url), "/bundle"); - - let url = "https://example.com/assets/bundle.js"; - assert_eq!(extract_file_stem(url), "/assets/bundle"); - - let url = "https://example.com/assets/bundle.min.js"; - assert_eq!(extract_file_stem(url), "/assets/bundle"); - - let url = "https://example.com/assets/bundle.min.js?foo=1&bar=baz"; - assert_eq!(extract_file_stem(url), "/assets/bundle"); - - let url = "https://example.com/assets/bundle.min.js#wat"; - assert_eq!(extract_file_stem(url), "/assets/bundle"); - - let url = "https://example.com/assets/bundle.min.js?foo=1&bar=baz#wat"; - assert_eq!(extract_file_stem(url), "/assets/bundle"); - - // app:// urls - assert_eq!( - extract_file_stem("app:///_next/server/pages/_error.js"), - "/_next/server/pages/_error" - ); - assert_eq!( - extract_file_stem("app:///polyfills.e9f8f1606b76a9c9.js"), - "/polyfills.e9f8f1606b76a9c9" - ); - } - - #[test] - fn joining_paths() { - // (http) URLs - let base = "https://example.com/path/to/assets/bundle.min.js?foo=1&bar=baz#wat"; - - // relative - assert_eq!( - join_paths(base, "../sourcemaps/bundle.min.js.map"), - "https://example.com/path/to/sourcemaps/bundle.min.js.map" - ); - // absolute - assert_eq!(join_paths(base, "/foo.js"), "https://example.com/foo.js"); - // absolute with tilde - assert_eq!(join_paths(base, "~/foo.js"), "https://example.com/foo.js"); - - // dots - assert_eq!( - join_paths(base, ".././.././to/./sourcemaps/./bundle.min.js.map"), - "https://example.com/path/to/sourcemaps/bundle.min.js.map" - ); - - // file paths - let base = "/home/foo/bar/baz.js"; - - // relative - assert_eq!( - join_paths(base, "../sourcemaps/bundle.min.js.map"), - "/home/foo/sourcemaps/bundle.min.js.map" - ); - // absolute - assert_eq!(join_paths(base, "/foo.js"), "/foo.js"); - // absolute with tilde - assert_eq!(join_paths(base, "~/foo.js"), "/foo.js"); - - // absolute path with its own scheme - let path = "webpack:///../node_modules/scheduler/cjs/scheduler.production.min.js"; - assert_eq!(join_paths("http://example.com", path), path); - - // path with a dot in the middle - assert_eq!( - join_paths("http://example.com", "path/./to/file.min.js"), - "http://example.com/path/to/file.min.js" - ); - - assert_eq!( - join_paths("/playground/öut path/rollup/entrypoint1.js", "~/0.js.map"), - "/0.js.map" - ); - - // path with a leading dot - assert_eq!( - join_paths( - "app:///_next/static/chunks/pages/_app-569c402ef19f6d7b.js.map", - "./node_modules/@sentry/browser/esm/integrations/trycatch.js" - ), - "app:///_next/static/chunks/pages/node_modules/@sentry/browser/esm/integrations/trycatch.js" - ); - - // webpack with only a single slash - assert_eq!( - join_paths( - "app:///main-es2015.6216307eafb7335c4565.js.map", - "webpack:/node_modules/@angular/core/__ivy_ngcc__/fesm2015/core.js" - ), - "webpack:/node_modules/@angular/core/__ivy_ngcc__/fesm2015/core.js" - ); - - // double-slash in the middle - assert_eq!( - join_paths( - "https://foo.cloudfront.net/static//js/npm.sentry.d8b531aaf5202ddb7e90.js", - "npm.sentry.d8b531aaf5202ddb7e90.js.map" - ), - "https://foo.cloudfront.net/static/js/npm.sentry.d8b531aaf5202ddb7e90.js.map" - ); - - // tests ported from python: - // - assert_eq!( - join_paths("http://example.com/foo", "bar"), - "http://example.com/bar" - ); - assert_eq!( - join_paths("http://example.com/foo", "/bar"), - "http://example.com/bar" - ); - assert_eq!( - join_paths("https://example.com/foo", "/bar"), - "https://example.com/bar" - ); - assert_eq!( - join_paths("http://example.com/foo/baz", "bar"), - "http://example.com/foo/bar" - ); - assert_eq!( - join_paths("http://example.com/foo/baz", "/bar"), - "http://example.com/bar" - ); - assert_eq!( - join_paths("aps://example.com/foo", "/bar"), - "aps://example.com/bar" - ); - assert_eq!( - join_paths("apsunknown://example.com/foo", "/bar"), - "apsunknown://example.com/bar" - ); - assert_eq!( - join_paths("apsunknown://example.com/foo", "//aha/uhu"), - "apsunknown://aha/uhu" - ); - } - - #[test] - fn data_urls() { - assert_eq!( - SourceMapUrl::parse_with_prefix("/foo", "data"), - Ok(SourceMapUrl::Remote("/data".into())), - ); - assert_eq!( - SourceMapUrl::parse_with_prefix("/foo", "data:"), - Err(CacheError::Malformed("invalid `data:` url".into())), - ); - assert_eq!( - SourceMapUrl::parse_with_prefix("/foo", "data:,foo"), - Ok(SourceMapUrl::Data(String::from("foo").into())), - ); - assert_eq!( - SourceMapUrl::parse_with_prefix("/foo", "data:,Hello%2C%20World%21"), - Ok(SourceMapUrl::Data(String::from("Hello, World!").into())), - ); - assert_eq!( - SourceMapUrl::parse_with_prefix("/foo", "data:;base64,SGVsbG8sIFdvcmxkIQ=="), - Ok(SourceMapUrl::Data(String::from("Hello, World!").into())), - ); - assert_eq!( - SourceMapUrl::parse_with_prefix("/foo", "data:;base64,SGVsbG8sIFdvcmxkIQ="), - Err(CacheError::Malformed("invalid `data:` url".into())), - ); - } -} diff --git a/crates/symbolicator-service/src/js/metrics.rs b/crates/symbolicator-js/src/metrics.rs similarity index 77% rename from crates/symbolicator-service/src/js/metrics.rs rename to crates/symbolicator-js/src/metrics.rs index 913767835..11f8da3dd 100644 --- a/crates/symbolicator-service/src/js/metrics.rs +++ b/crates/symbolicator-js/src/metrics.rs @@ -1,6 +1,35 @@ +//! Metrics +//! +//! - `js.unsymbolicated_frames`: The number of unsymbolicated frames, per event. +//! Should be `0` in the best case, as we obviously should symbolicate :-) +//! +//! - `js.missing_sourcescontent`: The number of frames, per event, that have no embedded sources. +//! Should be `0` in the best case, as the SourceMaps we use should have embedded sources. +//! If they don’t, we have to fall back to applying source context from elsewhere. +//! +//! - `js.api_requests`: The number of (potentially cached) API requests, per event. +//! Should be `1` in the best case, as `prefetch_artifacts` should provide us with everything we need. +//! +//! - `js.queried_bundles` / `js.fetched_bundles`: The number of artifact bundles the API gave us, +//! and the ones we ended up using. +//! Should both be `1` in the best case, as a single bundle should ideally serve all our needs. +//! Otherwise `queried` and `fetched` should be the same, as a difference between the two means +//! that multiple API requests gave us duplicated bundles. +//! +//! - `js.queried_artifacts` / `js.fetched_artifacts`: The number of individual artifacts the API +//! gave us, and the ones we ended up using. +//! Should both be `0` as we should not be using individual artifacts but rather bundles. +//! Otherwise, `queried` should be close to `fetched`. If they differ, it means the API is sending +//! us a lot of candidate artifacts that we don’t end up using, or multiple API requests give us +//! duplicated artifacts. +//! +//! - `js.scraped_files`: The number of files that were scraped from the Web. +//! Should be `0`, as we should find/use files from within bundles or as individual artifacts. + use symbolic::debuginfo::sourcebundle::SourceFileType; +use symbolicator_service::metric; -use crate::types::ResolvedWith; +use crate::interface::ResolvedWith; /// Various metrics we want to capture *per-event* for JS events. #[derive(Debug, Default)] diff --git a/crates/symbolicator-js/src/service.rs b/crates/symbolicator-js/src/service.rs new file mode 100644 index 000000000..40f330038 --- /dev/null +++ b/crates/symbolicator-js/src/service.rs @@ -0,0 +1,56 @@ +//! Service for retrieving Artifacts and SourceMap. + +use std::sync::Arc; + +use symbolicator_service::caching::Cacher; +use symbolicator_service::services::caches::SourceFilesCache; +use symbolicator_service::services::download::DownloadService; +use symbolicator_service::services::objects::ObjectsActor; +use symbolicator_service::services::SharedServices; + +use crate::api_lookup::SentryLookupApi; +use crate::bundle_index_cache::BundleIndexCache; +use crate::lookup::FetchSourceMapCacheInternal; + +#[derive(Debug, Clone)] +pub struct SourceMapService { + pub(crate) objects: ObjectsActor, + pub(crate) sourcefiles_cache: Arc, + pub(crate) bundle_index_cache: Arc, + pub(crate) sourcemap_caches: Arc>, + pub(crate) download_svc: Arc, + pub(crate) api_lookup: Arc, +} + +impl SourceMapService { + pub fn new(services: &SharedServices) -> Self { + let caches = &services.caches; + let shared_cache = services.shared_cache.clone(); + let objects = services.objects.clone(); + let download_svc = services.download_svc.clone(); + let sourcefiles_cache = services.sourcefiles_cache.clone(); + + let bundle_index_cache = BundleIndexCache::new( + caches.bundle_index.clone(), + shared_cache.clone(), + download_svc.clone(), + ); + + let in_memory = &services.config.caches.in_memory; + let api_lookup = Arc::new(SentryLookupApi::new( + download_svc.trusted_client.clone(), + download_svc.runtime.clone(), + download_svc.timeouts, + in_memory, + )); + + Self { + objects, + sourcefiles_cache, + bundle_index_cache: Arc::new(bundle_index_cache), + sourcemap_caches: Arc::new(Cacher::new(caches.sourcemap_caches.clone(), shared_cache)), + download_svc, + api_lookup, + } + } +} diff --git a/crates/symbolicator-js/src/symbolication.rs b/crates/symbolicator-js/src/symbolication.rs new file mode 100644 index 000000000..55e64cb87 --- /dev/null +++ b/crates/symbolicator-js/src/symbolication.rs @@ -0,0 +1,304 @@ +use std::collections::BTreeSet; + +use symbolic::sourcemapcache::{ScopeLookupResult, SourcePosition}; +use symbolicator_service::caching::CacheError; +use symbolicator_service::metric; +use symbolicator_service::services::symbolication::source_context::get_context_lines; + +use crate::interface::{ + CompletedJsSymbolicationResponse, JsFrame, JsModuleError, JsModuleErrorKind, JsStacktrace, + SymbolicateJsStacktraces, +}; +use crate::lookup::SourceMapLookup; +use crate::utils::{ + fixup_webpack_filename, fold_function_name, generate_module, get_function_for_token, is_in_app, + join_paths, +}; +use crate::SourceMapService; + +impl SourceMapService { + #[tracing::instrument(skip_all)] + pub async fn symbolicate_js( + &self, + mut request: SymbolicateJsStacktraces, + ) -> CompletedJsSymbolicationResponse { + let mut raw_stacktraces = std::mem::take(&mut request.stacktraces); + let apply_source_context = request.apply_source_context; + let mut lookup = SourceMapLookup::new(self.clone(), request).await; + lookup.prepare_modules(&mut raw_stacktraces[..]); + + let mut unsymbolicated_frames = 0; + let mut missing_sourcescontent = 0; + + let num_stacktraces = raw_stacktraces.len(); + let mut stacktraces = Vec::with_capacity(num_stacktraces); + + let mut errors = BTreeSet::new(); + for raw_stacktrace in &mut raw_stacktraces { + let num_frames = raw_stacktrace.frames.len(); + let mut symbolicated_frames = Vec::with_capacity(num_frames); + let mut callsite_fn_name = None; + + for raw_frame in &mut raw_stacktrace.frames { + match symbolicate_js_frame( + &mut lookup, + raw_frame, + &mut errors, + std::mem::take(&mut callsite_fn_name), + apply_source_context, + &mut missing_sourcescontent, + ) + .await + { + Ok(mut frame) => { + std::mem::swap(&mut callsite_fn_name, &mut frame.token_name); + symbolicated_frames.push(frame); + } + Err(err) => { + unsymbolicated_frames += 1; + errors.insert(JsModuleError { + abs_path: raw_frame.abs_path.clone(), + kind: err, + }); + symbolicated_frames.push(raw_frame.clone()); + } + } + } + + stacktraces.push(JsStacktrace { + frames: symbolicated_frames, + }); + } + + lookup.record_metrics(); + metric!(time_raw("js.unsymbolicated_frames") = unsymbolicated_frames); + metric!(time_raw("js.missing_sourcescontent") = missing_sourcescontent); + + let (used_artifact_bundles, scraping_attempts) = lookup.into_records(); + + CompletedJsSymbolicationResponse { + stacktraces, + raw_stacktraces, + errors: errors.into_iter().collect(), + used_artifact_bundles, + scraping_attempts, + } + } +} + +async fn symbolicate_js_frame( + lookup: &mut SourceMapLookup, + raw_frame: &mut JsFrame, + errors: &mut BTreeSet, + callsite_fn_name: Option, + should_apply_source_context: bool, + missing_sourcescontent: &mut u64, +) -> Result { + // we check for a valid line (i.e. >= 1) first, as we want to avoid resolving / scraping the minified + // file in that case. we frequently saw 0 line/col values in combination with non-js files, + // and we want to avoid scraping a bunch of html files in that case. + let line = if raw_frame.lineno > 0 { + raw_frame.lineno + } else { + return Err(JsModuleErrorKind::InvalidLocation { + line: raw_frame.lineno, + col: raw_frame.colno, + }); + }; + + let col = raw_frame.colno.unwrap_or_default(); + + let module = lookup.get_module(&raw_frame.abs_path).await; + + tracing::trace!( + abs_path = &raw_frame.abs_path, + ?module, + "Module for `abs_path`" + ); + + // Apply source context to the raw frame. If it fails, we bail early, as it's not possible + // to construct a `SourceMapCache` without the minified source anyway. + match &module.minified_source.entry { + Ok(minified_source) => { + if should_apply_source_context { + apply_source_context(raw_frame, &minified_source.contents)? + } + } + Err(CacheError::DownloadError(msg)) if msg == "Scraping disabled" => { + return Err(JsModuleErrorKind::ScrapingDisabled); + } + Err(_) => return Err(JsModuleErrorKind::MissingSource), + } + + let sourcemap_label = &module + .minified_source + .entry + .as_ref() + .map(|entry| entry.sourcemap_url()) + .ok() + .flatten() + .unwrap_or_else(|| raw_frame.abs_path.clone()); + + let (smcache, resolved_with) = match &module.smcache { + Some(smcache) => match &smcache.entry { + Ok(entry) => (entry, smcache.resolved_with), + Err(CacheError::Malformed(_)) => { + // If we successfully resolved the sourcemap but it's broken somehow, + // We should still record that we resolved it. + raw_frame.data.resolved_with = Some(smcache.resolved_with); + return Err(JsModuleErrorKind::MalformedSourcemap { + url: sourcemap_label.to_owned(), + }); + } + Err(CacheError::DownloadError(msg)) if msg == "Scraping disabled" => { + return Err(JsModuleErrorKind::ScrapingDisabled); + } + Err(_) => return Err(JsModuleErrorKind::MissingSourcemap), + }, + // In case it's just a source file, with no sourcemap reference or any debug id, we bail. + None => return Ok(raw_frame.clone()), + }; + + let mut frame = raw_frame.clone(); + frame.data.sourcemap = Some(sourcemap_label.clone()); + frame.data.resolved_with = Some(resolved_with); + + let sp = SourcePosition::new(line - 1, col.saturating_sub(1)); + let token = smcache + .get() + .lookup(sp) + .ok_or(JsModuleErrorKind::InvalidLocation { + line, + col: Some(col), + })?; + + // We consider the frame successfully symbolicated if we can resolve the minified source position + // to a token. + frame.data.symbolicated = true; + + // Store the resolved token name, which can be used for function name resolution in next frame. + // Refer to https://blog.sentry.io/2022/11/30/how-we-made-javascript-stack-traces-awesome/ + // for more details about "caller naming". + frame.token_name = token.name().map(|n| n.to_owned()); + + let function_name = match token.scope() { + ScopeLookupResult::NamedScope(name) => { + let scope_name = name.to_string(); + // Special case for Dart async function rewrites + // https://github.com/dart-lang/sdk/blob/fab753ea277c96c7699920852dabf977a7065fa5/pkg/compiler/lib/src/js_backend/namer.dart#L1845-L1866 + // ref: https://github.com/getsentry/symbolic/issues/791 + if name.starts_with("$async$") { + token.name().map_or_else(|| scope_name, |n| n.to_owned()) + } else { + scope_name + } + } + ScopeLookupResult::AnonymousScope => "".to_string(), + ScopeLookupResult::Unknown => { + // Fallback to minified function name + raw_frame + .function + .clone() + .unwrap_or("".to_string()) + } + }; + + frame.function = Some(fold_function_name(get_function_for_token( + raw_frame.function.as_deref(), + &function_name, + callsite_fn_name.as_deref(), + ))); + + if let Some(filename) = token.file_name() { + let mut filename = filename.to_string(); + frame.abs_path = module + .source_file_base() + .map(|base| join_paths(base, &filename)) + .unwrap_or_else(|| filename.clone()); + + if filename.starts_with("webpack:") { + filename = fixup_webpack_filename(&filename); + frame.module = Some(generate_module(&filename)); + } + + frame.in_app = is_in_app(&frame.abs_path, &filename); + + if frame.module.is_none() + && (frame.abs_path.starts_with("http:") + || frame.abs_path.starts_with("https:") + || frame.abs_path.starts_with("webpack:") + || frame.abs_path.starts_with("app:")) + { + frame.module = Some(generate_module(&frame.abs_path)); + } + + frame.filename = Some(filename); + } + + frame.lineno = token.line().saturating_add(1); + frame.colno = Some(token.column().saturating_add(1)); + + if !should_apply_source_context { + return Ok(frame); + } + + if let Some(file) = token.file() { + if let Some(file_source) = file.source() { + if let Err(err) = apply_source_context(&mut frame, file_source) { + errors.insert(JsModuleError { + abs_path: raw_frame.abs_path.clone(), + kind: err, + }); + } + } else { + *missing_sourcescontent += 1; + + // If we have no source context from within the `SourceMapCache`, + // fall back to applying the source context from a raw artifact file + let file_key = file + .name() + .and_then(|filename| module.source_file_key(filename)); + + let source_file = match &file_key { + Some(key) => &lookup.get_source_file(key.clone()).await.entry, + None => &Err(CacheError::NotFound), + }; + + if source_file + .as_ref() + .map_err(|_| JsModuleErrorKind::MissingSource) + .and_then(|file| apply_source_context(&mut frame, &file.contents)) + .is_err() + { + // It's arguable whether we should collect it, but this is what monolith does now, + // and it might be useful to indicate incorrect sentry-cli rewrite behavior. + errors.insert(JsModuleError { + abs_path: raw_frame.abs_path.clone(), + kind: JsModuleErrorKind::MissingSourceContent { + source: file_key + .and_then(|key| key.abs_path().map(|path| path.to_string())) + .unwrap_or_default(), + sourcemap: sourcemap_label.clone(), + }, + }); + } + } + } + + Ok(frame) +} + +fn apply_source_context(frame: &mut JsFrame, source: &str) -> Result<(), JsModuleErrorKind> { + let lineno = frame.lineno as usize; + let column = frame.colno.map(|col| col as usize); + + if let Some((pre_context, context_line, post_context)) = + get_context_lines(source, lineno, column, None) + { + frame.pre_context = pre_context; + frame.context_line = Some(context_line); + frame.post_context = post_context; + } + + Ok(()) +} diff --git a/crates/symbolicator-js/src/utils.rs b/crates/symbolicator-js/src/utils.rs new file mode 100644 index 000000000..4cf4a99c9 --- /dev/null +++ b/crates/symbolicator-js/src/utils.rs @@ -0,0 +1,861 @@ +use std::fmt::Write; + +use once_cell::sync::Lazy; +use regex::Regex; +use sha2::{Digest, Sha256}; +use symbolic::debuginfo::js::discover_sourcemaps_location; + +use crate::api_lookup::ArtifactHeaders; +use crate::lookup::SourceMapUrl; + +static WEBPACK_NAMESPACE_RE: Lazy = + Lazy::new(|| Regex::new(r"^webpack://[a-zA-Z0-9_\-@\.]+/\./").unwrap()); +static NODE_MODULES_RE: Lazy = Lazy::new(|| Regex::new(r"\bnode_modules/").unwrap()); + +// Names that do not provide any reasonable value, and that can possibly obstruct +// better available names. In case we encounter one, we fallback to current frame fn name if available. +const USELESS_FN_NAMES: [&str; 3] = ["", "__webpack_require__", "__webpack_modules__"]; + +/// Get function name for a given frame based on the token resolved by symbolic. +/// It tries following paths in order: +/// - return token function name if we have a usable value (filtered through `USELESS_FN_NAMES` list), +/// - return mapped name of the caller (previous frame) token if it had, +/// - return token function name, including filtered values if it mapped to anything in the first place, +/// - return current frames function name as a fallback +/// +// fn get_function_for_token(frame, token, previous_frame=None): +pub fn get_function_for_token<'a>( + frame_fn_name: Option<&'a str>, + token_fn_name: &'a str, + callsite_fn_name: Option<&'a str>, +) -> &'a str { + // Try to use the function name we got from sourcemap-cache, filtering useless names. + if !USELESS_FN_NAMES.contains(&token_fn_name) { + return token_fn_name; + } + + // If not found, ask the callsite (previous token) for function name if possible. + if let Some(token_name) = callsite_fn_name { + if !token_name.is_empty() { + return token_name; + } + } + + // If there was no minified name at all, return even useless, filtered one from the original token. + if frame_fn_name.is_none() { + return token_fn_name; + } + + // Otherwise fallback to the old, minified name. + frame_fn_name.unwrap_or("") +} + +/// Fold multiple consecutive occurences of the same property name into a single group, excluding the last component. +/// +/// foo | foo +/// foo.foo | foo.foo +/// foo.foo.foo | {foo#2}.foo +/// bar.foo.foo | bar.foo.foo +/// bar.foo.foo.foo | bar.{foo#2}.foo +/// bar.foo.foo.onError | bar.{foo#2}.onError +/// bar.bar.bar.foo.foo.onError | {bar#3}.{foo#2}.onError +/// bar.foo.foo.bar.bar.onError | bar.{foo#2}.{bar#2}.onError +pub fn fold_function_name(function_name: &str) -> String { + let mut parts: Vec<_> = function_name.split('.').collect(); + + if parts.len() == 1 { + return function_name.to_string(); + } + + // unwrap: `parts` has at least a single item. + let tail = parts.pop().unwrap(); + let mut grouped: Vec> = vec![vec![]]; + + for part in parts { + // unwrap: we initialized `grouped` with at least a single slice. + let current_group = grouped.last_mut().unwrap(); + if current_group.is_empty() || current_group.last() == Some(&part) { + current_group.push(part); + } else { + grouped.push(vec![part]); + } + } + + let folded = grouped + .iter() + .map(|group| { + // unwrap: each group contains at least a single item. + if group.len() == 1 { + group.first().unwrap().to_string() + } else { + format!("{{{}#{}}}", group.first().unwrap(), group.len()) + } + }) + .collect::>() + .join("."); + + format!("{folded}.{tail}") +} + +pub fn fixup_webpack_filename(filename: &str) -> String { + if let Some((_, rest)) = filename.split_once("/~/") { + format!("~/{rest}") + } else if WEBPACK_NAMESPACE_RE.is_match(filename) { + WEBPACK_NAMESPACE_RE.replace(filename, "./").to_string() + } else if let Some(rest) = filename.strip_prefix("webpack:///") { + rest.to_string() + } else { + filename.to_string() + } +} + +pub fn is_in_app(abs_path: &str, filename: &str) -> Option { + if abs_path.starts_with("webpack:") { + Some(filename.starts_with("./") && !filename.contains("/node_modules/")) + } else if abs_path.starts_with("app:") { + Some(!NODE_MODULES_RE.is_match(filename)) + } else if abs_path.contains("/node_modules/") { + Some(false) + } else { + None + } +} + +// As a running joke, here you have a 8 year old comment from 2015: +// TODO(dcramer): replace CLEAN_MODULE_RE with tokenizer completely +static CLEAN_MODULE_RE: Lazy = Lazy::new(|| { + Regex::new( + r"(?ix) +^ +(?:/| # Leading slashes +(?: + (?:java)?scripts?|js|build|static|node_modules|bower_components|[_\.~].*?| # common folder prefixes + v?(?:\d+\.)*\d+| # version numbers, v1, 1.0.0 + [a-f0-9]{7,8}| # short sha + [a-f0-9]{32}| # md5 + [a-f0-9]{40} # sha1 +)/)+| +(?:[-\.][a-f0-9]{7,}$) # Ending in a commitish +", + ).unwrap() +}); + +/// Converts a url into a made-up module name by doing the following: +/// * Extract just the path name ignoring querystrings +/// * Trimming off the initial / +/// * Trimming off the file extension +/// * Removes off useless folder prefixes +/// e.g. `http://google.com/js/v1.0/foo/bar/baz.js` -> `foo/bar/baz` +pub fn generate_module(abs_path: &str) -> String { + let path = strip_hostname(abs_path); + let mut path = path.split(&['#', '?']).next().unwrap_or(path); + + if let Some((idx, ".")) = path.rmatch_indices(&['.', '/']).next() { + path = &path[..idx]; + } + + let path = path.strip_suffix(".min").unwrap_or(path); + + // return all the segments following a 32/40-char hash + let mut segments = path.split('/'); + while let Some(segment) = segments.next() { + if segment.len() == 32 + || segment.len() == 40 && segment.chars().all(|c| c.is_ascii_hexdigit()) + { + let mut s = String::new(); + for (i, seg) in segments.enumerate() { + if i > 0 { + s.push('/'); + } + s.push_str(seg); + } + return s; + } + } + + CLEAN_MODULE_RE.replace_all(path, "").into_owned() +} + +/// Joins the `right` path to the `base` path, taking care of our special `~/` prefix that is treated just +/// like an absolute url. +pub fn join_paths(base: &str, right: &str) -> String { + if right.contains("://") || right.starts_with("webpack:") { + return right.into(); + } + + let (scheme, rest) = base.split_once("://").unwrap_or(("file", base)); + + let right = right.strip_prefix('~').unwrap_or(right); + // the right path is absolute: + if right.starts_with('/') { + if scheme == "file" { + return right.into(); + } + // a leading `//` means we are skipping the hostname + if let Some(right) = right.strip_prefix("//") { + return format!("{scheme}://{right}"); + } + let hostname = rest.split('/').next().unwrap_or(rest); + return format!("{scheme}://{hostname}{right}"); + } + + let mut final_path = String::new(); + + let mut left_iter = rest.split('/').peekable(); + // add the scheme/hostname + if scheme != "file" { + let hostname = left_iter.next().unwrap_or_default(); + write!(final_path, "{scheme}://{hostname}").unwrap(); + } else if left_iter.peek() == Some(&"") { + // pop a leading `/` + let _ = left_iter.next(); + } + + // pop the basename from the back + let _ = left_iter.next_back(); + + let mut segments: Vec<_> = left_iter.collect(); + let is_http = scheme == "http" || scheme == "https"; + let mut is_first_segment = true; + for right_segment in right.split('/') { + if right_segment == ".." && (segments.pop().is_some() || is_http) { + continue; + } + if right_segment == "." && (is_http || is_first_segment) { + continue; + } + is_first_segment = false; + + segments.push(right_segment); + } + + for seg in segments { + // FIXME: do we want to skip all the `.` fragments as well? + if !seg.is_empty() { + write!(final_path, "/{seg}").unwrap(); + } + } + final_path +} + +/// Strips the hostname (or leading tilde) from the `path` and returns the path following the +/// hostname, with a leading `/`. +pub fn strip_hostname(path: &str) -> &str { + if let Some(after_tilde) = path.strip_prefix('~') { + return after_tilde; + } + + if let Some((_scheme, rest)) = path.split_once("://") { + return rest.find('/').map(|idx| &rest[idx..]).unwrap_or(rest); + } + path +} + +/// Extracts a "file stem" from a path. +/// This is the `"/path/to/file"` in `"./path/to/file.min.js?foo=bar"`. +/// We use the most generic variant instead here, as server-side filtering is using a partial +/// match on the whole artifact path, thus `index.js` will be fetched no matter it's stored +/// as `~/index.js`, `~/index.js?foo=bar`, `http://example.com/index.js`, +/// or `http://example.com/index.js?foo=bar`. +// NOTE: We do want a leading slash to be included, eg. `/bundle/app.js` or `/index.js`, +// as it's not possible to use artifacts without proper host or `~/` wildcard. +pub fn extract_file_stem(path: &str) -> String { + let path = strip_hostname(path); + + path.rsplit_once('/') + .map(|(prefix, name)| { + // trim query strings and fragments + let name = name.split_once('?').map(|(name, _)| name).unwrap_or(name); + let name = name.split_once('#').map(|(name, _)| name).unwrap_or(name); + + // then, trim all the suffixes as often as they occurr + let name = trim_all_end_matches(name, FILE_SUFFIX_PATTERNS); + + format!("{prefix}/{name}") + }) + .unwrap_or(path.to_owned()) +} + +const FILE_SUFFIX_PATTERNS: &[&str] = &[ + ".min", ".js", ".map", ".cjs", ".mjs", ".ts", ".d", ".jsx", ".tsx", +]; + +/// Trims the different `patterns` from the end of the `input` string as often as possible. +pub fn trim_all_end_matches<'a>(mut input: &'a str, patterns: &[&str]) -> &'a str { + loop { + let mut trimmed = input; + for pattern in patterns { + trimmed = trimmed.trim_end_matches(pattern); + } + if trimmed == input { + return trimmed; + } + input = trimmed; + } +} + +/// Transforms a full absolute url into 2 or 4 generalized options. +// Based on `ReleaseFile.normalize`, see: +// https://github.com/getsentry/sentry/blob/master/src/sentry/models/releasefile.py +pub fn get_release_file_candidate_urls(url: &str) -> impl Iterator { + let url = url.split('#').next().unwrap_or(url); + let relative = strip_hostname(url); + + let urls = [ + // Absolute without fragment + Some(url.to_string()), + // Absolute without query + url.split_once('?').map(|s| s.0.to_string()), + // Relative without fragment + Some(format!("~{relative}")), + // Relative without query + relative.split_once('?').map(|s| format!("~{}", s.0)), + ]; + + urls.into_iter().flatten() +} + +/// Joins together frames `abs_path` and discovered sourcemap reference. +pub fn resolve_sourcemap_url( + abs_path: &str, + artifact_headers: &ArtifactHeaders, + artifact_source: &str, +) -> Option { + if let Some(header) = artifact_headers.get("sourcemap") { + SourceMapUrl::parse_with_prefix(abs_path, header).ok() + } else if let Some(header) = artifact_headers.get("x-sourcemap") { + SourceMapUrl::parse_with_prefix(abs_path, header).ok() + } else { + let sm_ref = discover_sourcemaps_location(artifact_source)?; + SourceMapUrl::parse_with_prefix(abs_path, sm_ref).ok() + } +} + +/// This will truncate the `timestamp` to a multiple of `refresh_every`, using a stable offset +/// derived from `url` to avoid having the same cutoff for every single `url`. +pub fn cache_busting_key(url: &str, timestamp: u64, refresh_every: u64) -> u64 { + let url_hash = Sha256::digest(url); + let url_hash = + u64::from_le_bytes(<[u8; 8]>::try_from(&url_hash[..8]).expect("sha256 outputs >8 bytes")); + + let offset = url_hash % refresh_every; + ((timestamp - offset) / refresh_every * refresh_every) + offset +} + +#[cfg(test)] +mod tests { + use symbolicator_service::caching::CacheError; + + use super::*; + + #[test] + fn test_cache_busting_key() { + // the hashed offset for this url is `39` + let url = "https://example.com/foo.js"; + + let timestamp = 1000; + let refresh_every = 100; + + let key = cache_busting_key(url, timestamp, refresh_every); + assert_eq!(key, 939); + let key = cache_busting_key(url, timestamp + 38, refresh_every); + assert_eq!(key, 939); + let key = cache_busting_key(url, timestamp + 40, refresh_every); + assert_eq!(key, 1039); + let key = cache_busting_key(url, timestamp + 100, refresh_every); + assert_eq!(key, 1039); + } + + #[test] + fn test_strip_hostname() { + assert_eq!(strip_hostname("/absolute/unix/path"), "/absolute/unix/path"); + assert_eq!(strip_hostname("~/with/tilde"), "/with/tilde"); + assert_eq!(strip_hostname("https://example.com/"), "/"); + assert_eq!( + strip_hostname("https://example.com/some/path/file.js"), + "/some/path/file.js" + ); + } + + #[test] + fn test_get_release_file_candidate_urls() { + let url = "https://example.com/assets/bundle.min.js"; + let expected = &[ + "https://example.com/assets/bundle.min.js", + "~/assets/bundle.min.js", + ]; + let actual: Vec<_> = get_release_file_candidate_urls(url).collect(); + assert_eq!(&actual, expected); + + let url = "https://example.com/assets/bundle.min.js?foo=1&bar=baz"; + let expected = &[ + "https://example.com/assets/bundle.min.js?foo=1&bar=baz", + "https://example.com/assets/bundle.min.js", + "~/assets/bundle.min.js?foo=1&bar=baz", + "~/assets/bundle.min.js", + ]; + let actual: Vec<_> = get_release_file_candidate_urls(url).collect(); + assert_eq!(&actual, expected); + + let url = "https://example.com/assets/bundle.min.js#wat"; + let expected = &[ + "https://example.com/assets/bundle.min.js", + "~/assets/bundle.min.js", + ]; + let actual: Vec<_> = get_release_file_candidate_urls(url).collect(); + assert_eq!(&actual, expected); + + let url = "https://example.com/assets/bundle.min.js?foo=1&bar=baz#wat"; + let expected = &[ + "https://example.com/assets/bundle.min.js?foo=1&bar=baz", + "https://example.com/assets/bundle.min.js", + "~/assets/bundle.min.js?foo=1&bar=baz", + "~/assets/bundle.min.js", + ]; + let actual: Vec<_> = get_release_file_candidate_urls(url).collect(); + assert_eq!(&actual, expected); + + let url = "app:///_next/server/pages/_error.js"; + let expected = &[ + "app:///_next/server/pages/_error.js", + "~/_next/server/pages/_error.js", + ]; + let actual: Vec<_> = get_release_file_candidate_urls(url).collect(); + assert_eq!(&actual, expected); + } + + #[test] + fn test_extract_file_stem() { + let url = "https://example.com/bundle.js"; + assert_eq!(extract_file_stem(url), "/bundle"); + + let url = "https://example.com/bundle.min.js"; + assert_eq!(extract_file_stem(url), "/bundle"); + + let url = "https://example.com/assets/bundle.js"; + assert_eq!(extract_file_stem(url), "/assets/bundle"); + + let url = "https://example.com/assets/bundle.min.js"; + assert_eq!(extract_file_stem(url), "/assets/bundle"); + + let url = "https://example.com/assets/bundle.min.js?foo=1&bar=baz"; + assert_eq!(extract_file_stem(url), "/assets/bundle"); + + let url = "https://example.com/assets/bundle.min.js#wat"; + assert_eq!(extract_file_stem(url), "/assets/bundle"); + + let url = "https://example.com/assets/bundle.min.js?foo=1&bar=baz#wat"; + assert_eq!(extract_file_stem(url), "/assets/bundle"); + + // app:// urls + assert_eq!( + extract_file_stem("app:///_next/server/pages/_error.js"), + "/_next/server/pages/_error" + ); + assert_eq!( + extract_file_stem("app:///polyfills.e9f8f1606b76a9c9.js"), + "/polyfills.e9f8f1606b76a9c9" + ); + } + + #[test] + fn joining_paths() { + // (http) URLs + let base = "https://example.com/path/to/assets/bundle.min.js?foo=1&bar=baz#wat"; + + // relative + assert_eq!( + join_paths(base, "../sourcemaps/bundle.min.js.map"), + "https://example.com/path/to/sourcemaps/bundle.min.js.map" + ); + // absolute + assert_eq!(join_paths(base, "/foo.js"), "https://example.com/foo.js"); + // absolute with tilde + assert_eq!(join_paths(base, "~/foo.js"), "https://example.com/foo.js"); + + // dots + assert_eq!( + join_paths(base, ".././.././to/./sourcemaps/./bundle.min.js.map"), + "https://example.com/path/to/sourcemaps/bundle.min.js.map" + ); + + // file paths + let base = "/home/foo/bar/baz.js"; + + // relative + assert_eq!( + join_paths(base, "../sourcemaps/bundle.min.js.map"), + "/home/foo/sourcemaps/bundle.min.js.map" + ); + // absolute + assert_eq!(join_paths(base, "/foo.js"), "/foo.js"); + // absolute with tilde + assert_eq!(join_paths(base, "~/foo.js"), "/foo.js"); + + // absolute path with its own scheme + let path = "webpack:///../node_modules/scheduler/cjs/scheduler.production.min.js"; + assert_eq!(join_paths("http://example.com", path), path); + + // path with a dot in the middle + assert_eq!( + join_paths("http://example.com", "path/./to/file.min.js"), + "http://example.com/path/to/file.min.js" + ); + + assert_eq!( + join_paths("/playground/öut path/rollup/entrypoint1.js", "~/0.js.map"), + "/0.js.map" + ); + + // path with a leading dot + assert_eq!( + join_paths( + "app:///_next/static/chunks/pages/_app-569c402ef19f6d7b.js.map", + "./node_modules/@sentry/browser/esm/integrations/trycatch.js" + ), + "app:///_next/static/chunks/pages/node_modules/@sentry/browser/esm/integrations/trycatch.js" + ); + + // webpack with only a single slash + assert_eq!( + join_paths( + "app:///main-es2015.6216307eafb7335c4565.js.map", + "webpack:/node_modules/@angular/core/__ivy_ngcc__/fesm2015/core.js" + ), + "webpack:/node_modules/@angular/core/__ivy_ngcc__/fesm2015/core.js" + ); + + // double-slash in the middle + assert_eq!( + join_paths( + "https://foo.cloudfront.net/static//js/npm.sentry.d8b531aaf5202ddb7e90.js", + "npm.sentry.d8b531aaf5202ddb7e90.js.map" + ), + "https://foo.cloudfront.net/static/js/npm.sentry.d8b531aaf5202ddb7e90.js.map" + ); + + // tests ported from python: + // + assert_eq!( + join_paths("http://example.com/foo", "bar"), + "http://example.com/bar" + ); + assert_eq!( + join_paths("http://example.com/foo", "/bar"), + "http://example.com/bar" + ); + assert_eq!( + join_paths("https://example.com/foo", "/bar"), + "https://example.com/bar" + ); + assert_eq!( + join_paths("http://example.com/foo/baz", "bar"), + "http://example.com/foo/bar" + ); + assert_eq!( + join_paths("http://example.com/foo/baz", "/bar"), + "http://example.com/bar" + ); + assert_eq!( + join_paths("aps://example.com/foo", "/bar"), + "aps://example.com/bar" + ); + assert_eq!( + join_paths("apsunknown://example.com/foo", "/bar"), + "apsunknown://example.com/bar" + ); + assert_eq!( + join_paths("apsunknown://example.com/foo", "//aha/uhu"), + "apsunknown://aha/uhu" + ); + } + + #[test] + fn data_urls() { + assert_eq!( + SourceMapUrl::parse_with_prefix("/foo", "data"), + Ok(SourceMapUrl::Remote("/data".into())), + ); + assert_eq!( + SourceMapUrl::parse_with_prefix("/foo", "data:"), + Err(CacheError::Malformed("invalid `data:` url".into())), + ); + assert_eq!( + SourceMapUrl::parse_with_prefix("/foo", "data:,foo"), + Ok(SourceMapUrl::Data(String::from("foo").into())), + ); + assert_eq!( + SourceMapUrl::parse_with_prefix("/foo", "data:,Hello%2C%20World%21"), + Ok(SourceMapUrl::Data(String::from("Hello, World!").into())), + ); + assert_eq!( + SourceMapUrl::parse_with_prefix("/foo", "data:;base64,SGVsbG8sIFdvcmxkIQ=="), + Ok(SourceMapUrl::Data(String::from("Hello, World!").into())), + ); + assert_eq!( + SourceMapUrl::parse_with_prefix("/foo", "data:;base64,SGVsbG8sIFdvcmxkIQ="), + Err(CacheError::Malformed("invalid `data:` url".into())), + ); + } + + /// A faithful port of the monolith's in-app logic, for testing purposes. + fn is_in_app_faithful(abs_path: &str, filename: &str) -> Option { + let mut in_app = None; + if abs_path.starts_with("webpack:") { + if filename.starts_with("~/") + || filename.contains("/node_modules/") + || !filename.starts_with("./") + { + in_app = Some(false); + } else if filename.starts_with("./") { + in_app = Some(true); + } + } else if abs_path.contains("/node_modules/") { + in_app = Some(false); + } + + if abs_path.starts_with("app:") { + if NODE_MODULES_RE.is_match(filename) { + in_app = Some(false); + } else { + in_app = Some(true); + } + } + + in_app + } + + #[test] + fn test_get_function_name_valid_name() { + assert_eq!( + get_function_for_token(Some("original"), "lookedup", None), + "lookedup" + ); + } + #[test] + fn test_get_function_name_fallback_to_previous_frames_token_if_useless_name() { + assert_eq!( + get_function_for_token(None, "__webpack_require__", Some("previous_name")), + "previous_name" + ) + } + #[test] + fn test_get_function_name_fallback_to_useless_name() { + assert_eq!( + get_function_for_token(None, "__webpack_require__", None), + "__webpack_require__" + ) + } + #[test] + fn test_get_function_name_fallback_to_original_name() { + assert_eq!( + get_function_for_token(Some("original"), "__webpack_require__", None), + "original" + ) + } + + #[test] + fn test_fold_function_name() { + assert_eq!(fold_function_name("foo"), "foo"); + assert_eq!(fold_function_name("foo.foo"), "foo.foo"); + assert_eq!(fold_function_name("foo.foo.foo"), "{foo#2}.foo"); + assert_eq!(fold_function_name("bar.foo.foo"), "bar.foo.foo"); + assert_eq!(fold_function_name("bar.foo.foo.foo"), "bar.{foo#2}.foo"); + assert_eq!( + fold_function_name("bar.foo.foo.onError"), + "bar.{foo#2}.onError" + ); + assert_eq!( + fold_function_name("bar.bar.bar.foo.foo.onError"), + "{bar#3}.{foo#2}.onError" + ); + assert_eq!( + fold_function_name("bar.foo.foo.bar.bar.onError"), + "bar.{foo#2}.{bar#2}.onError" + ); + } + + #[test] + fn test_fixup_webpack_filename() { + let filename = "webpack:///../node_modules/@sentry/browser/esm/helpers.js"; + + assert_eq!( + fixup_webpack_filename(filename), + "../node_modules/@sentry/browser/esm/helpers.js" + ); + + let filename = "webpack:///./app/utils/requestError/createRequestError.tsx"; + + assert_eq!( + fixup_webpack_filename(filename), + "./app/utils/requestError/createRequestError.tsx" + ); + } + + #[test] + fn test_in_app_webpack() { + let abs_path = "webpack:///../node_modules/@sentry/browser/esm/helpers.js"; + let filename = "../node_modules/@sentry/browser/esm/helpers.js"; + + assert_eq!(is_in_app(abs_path, filename), Some(false)); + assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); + + let abs_path = "webpack:///~/@sentry/browser/esm/helpers.js"; + let filename = "~/@sentry/browser/esm/helpers.js"; + + assert_eq!(is_in_app(abs_path, filename), Some(false)); + assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); + + let abs_path = "webpack:///./@sentry/browser/esm/helpers.js"; + let filename = "./@sentry/browser/esm/helpers.js"; + + assert_eq!(is_in_app(abs_path, filename), Some(true)); + assert_eq!(is_in_app_faithful(abs_path, filename), Some(true)); + + let abs_path = "webpack:///./node_modules/@sentry/browser/esm/helpers.js"; + let filename = "./node_modules/@sentry/browser/esm/helpers.js"; + + assert_eq!(is_in_app(abs_path, filename), Some(false)); + assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); + } + + #[test] + fn test_in_app_app() { + let abs_path = "app:///../node_modules/@sentry/browser/esm/helpers.js"; + let filename = "../node_modules/@sentry/browser/esm/helpers.js"; + + assert_eq!(is_in_app(abs_path, filename), Some(false)); + assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); + + let abs_path = "app:///../@sentry/browser/esm/helpers.js"; + let filename = "../@sentry/browser/esm/helpers.js"; + + assert_eq!(is_in_app(abs_path, filename), Some(true)); + assert_eq!(is_in_app_faithful(abs_path, filename), Some(true)); + + let abs_path = "app:///node_modules/rxjs/internal/operators/switchMap.js"; + let filename = "node_modules/rxjs/internal/operators/switchMap.js"; + + assert_eq!(is_in_app(abs_path, filename), Some(false)); + assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); + } + + #[test] + fn test_in_app_general() { + let abs_path = "file:///../node_modules/@sentry/browser/esm/helpers.js"; + let filename = "../node_modules/@sentry/browser/esm/helpers.js"; + + assert_eq!(is_in_app(abs_path, filename), Some(false)); + assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); + + let abs_path = "file:///../@sentry/browser/esm/helpers.js"; + let filename = "../@sentry/browser/esm/helpers.js"; + + assert_eq!(is_in_app(abs_path, filename), None); + assert_eq!(is_in_app_faithful(abs_path, filename), None); + } + + #[test] + fn test_generate_module() { + assert_eq!(generate_module("http://example.com/foo.js"), "foo"); + assert_eq!(generate_module("http://example.com/foo/bar.js"), "foo/bar"); + assert_eq!( + generate_module("http://example.com/js/foo/bar.js"), + "foo/bar" + ); + assert_eq!( + generate_module("http://example.com/javascript/foo/bar.js"), + "foo/bar" + ); + assert_eq!( + generate_module("http://example.com/1.0/foo/bar.js"), + "foo/bar" + ); + assert_eq!( + generate_module("http://example.com/v1/foo/bar.js"), + "foo/bar" + ); + assert_eq!( + generate_module("http://example.com/v1.0.0/foo/bar.js"), + "foo/bar" + ); + assert_eq!( + generate_module("http://example.com/_baz/foo/bar.js"), + "foo/bar" + ); + assert_eq!( + generate_module("http://example.com/1/2/3/foo/bar.js"), + "foo/bar" + ); + assert_eq!( + generate_module("http://example.com/abcdef0/foo/bar.js"), + "foo/bar" + ); + assert_eq!( + generate_module( + "http://example.com/92cd589eca8235e7b373bf5ae94ebf898e3b949c/foo/bar.js" + ), + "foo/bar" + ); + assert_eq!( + generate_module("http://example.com/7d6d00eae0ceccdc7ee689659585d95f/foo/bar.js"), + "foo/bar" + ); + assert_eq!( + generate_module("http://example.com/foo/bar.coffee"), + "foo/bar" + ); + assert_eq!( + generate_module("http://example.com/foo/bar.js?v=1234"), + "foo/bar" + ); + assert_eq!(generate_module("/foo/bar.js"), "foo/bar"); + assert_eq!(generate_module("/foo/bar.ts"), "foo/bar"); + assert_eq!(generate_module("../../foo/bar.js"), "foo/bar"); + assert_eq!(generate_module("../../foo/bar.ts"), "foo/bar"); + assert_eq!(generate_module("../../foo/bar.awesome"), "foo/bar"); + assert_eq!(generate_module("../../foo/bar"), "foo/bar"); + assert_eq!( + generate_module("/foo/bar-7d6d00eae0ceccdc7ee689659585d95f.js"), + "foo/bar" + ); + assert_eq!(generate_module("/bower_components/foo/bar.js"), "foo/bar"); + assert_eq!(generate_module("/node_modules/foo/bar.js"), "foo/bar"); + assert_eq!( + generate_module( + "http://example.com/vendor.92cd589eca8235e7b373bf5ae94ebf898e3b949c.js", + ), + "vendor", + ); + assert_eq!( + generate_module( + "/a/javascripts/application-bundle-149360d3414c26adac3febdf6832e25c.min.js" + ), + "a/javascripts/application-bundle" + ); + assert_eq!( + generate_module("https://example.com/libs/libs-20150417171659.min.js"), + "libs/libs" + ); + assert_eq!( + generate_module("webpack:///92cd589eca8235e7b373bf5ae94ebf898e3b949c/vendor.js"), + "vendor" + ); + assert_eq!( + generate_module("webpack:///92cd589eca8235e7b373bf5ae94ebf898e3b949c/vendor.js"), + "vendor" + ); + assert_eq!( + generate_module("app:///92cd589eca8235e7b373bf5ae94ebf898e3b949c/vendor.js"), + "vendor" + ); + assert_eq!( + generate_module("app:///example/92cd589eca8235e7b373bf5ae94ebf898e3b949c/vendor.js"), + "vendor" + ); + assert_eq!( + generate_module("~/app/components/projectHeader/projectSelector.jsx"), + "app/components/projectHeader/projectSelector" + ); + } +} diff --git a/crates/symbolicator-js/tests/integration/main.rs b/crates/symbolicator-js/tests/integration/main.rs new file mode 100644 index 000000000..69176f3a8 --- /dev/null +++ b/crates/symbolicator-js/tests/integration/main.rs @@ -0,0 +1,4 @@ +mod sourcemap; +mod utils; + +pub use utils::*; diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__bundle_index.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__bundle_index.snap similarity index 91% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__bundle_index.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__bundle_index.snap index 19d0c6f50..90ecd54ac 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__bundle_index.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__bundle_index.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 694 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_multiple_smref_scraped.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_multiple_smref_scraped.snap similarity index 92% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_multiple_smref_scraped.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_multiple_smref_scraped.snap index aadf3f82e..2eb659bb6 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_multiple_smref_scraped.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_multiple_smref_scraped.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 636 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_node_debugid.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_node_debugid.snap similarity index 93% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_node_debugid.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_node_debugid.snap index 30f7cefef..7d317f5de 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_node_debugid.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_node_debugid.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 549 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_react_native.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_react_native.snap similarity index 86% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_react_native.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_react_native.snap index 461571f8d..3c60003fc 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_react_native.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_react_native.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 613 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_source_no_header.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_source_no_header.snap similarity index 91% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_source_no_header.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_source_no_header.snap index d3596c74f..9fb0fc2e3 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__e2e_source_no_header.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__e2e_source_no_header.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 579 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__fetch_error.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__fetch_error.snap similarity index 89% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__fetch_error.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__fetch_error.snap index 59272a40c..bd2c9bcc7 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__fetch_error.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__fetch_error.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 396 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__indexed_sourcemap_source_expansion.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__indexed_sourcemap_source_expansion.snap similarity index 95% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__indexed_sourcemap_source_expansion.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__indexed_sourcemap_source_expansion.snap index 341c39b12..8110a6123 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__indexed_sourcemap_source_expansion.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__indexed_sourcemap_source_expansion.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 348 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__inlined_sources.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__inlined_sources.snap similarity index 89% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__inlined_sources.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__inlined_sources.snap index 61e6e8901..c6002a360 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__inlined_sources.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__inlined_sources.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 266 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__invalid_location.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__invalid_location.snap similarity index 80% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__invalid_location.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__invalid_location.snap index 6eeedda90..13a886024 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__invalid_location.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__invalid_location.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 408 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__malformed_abs_path.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__malformed_abs_path.snap similarity index 75% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__malformed_abs_path.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__malformed_abs_path.snap index 8d71b46ac..7d01fb156 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__malformed_abs_path.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__malformed_abs_path.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 347 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__source_expansion.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__source_expansion.snap similarity index 90% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__source_expansion.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__source_expansion.snap index a318b7bda..cdfb987ac 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__source_expansion.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__source_expansion.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 240 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_embedded_source_expansion.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_embedded_source_expansion.snap similarity index 93% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_embedded_source_expansion.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_embedded_source_expansion.snap index 0c6807fd9..5ee0aafe5 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_embedded_source_expansion.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_embedded_source_expansion.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 209 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_expansion.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_expansion.snap similarity index 97% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_expansion.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_expansion.snap index ee4f200fb..180dbd509 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_expansion.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_expansion.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 118 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_nofiles_source_expansion.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_nofiles_source_expansion.snap similarity index 88% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_nofiles_source_expansion.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_nofiles_source_expansion.snap index ef6895493..8adffc4ed 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_nofiles_source_expansion.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_nofiles_source_expansion.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 298 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_source_expansion.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_source_expansion.snap similarity index 93% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_source_expansion.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_source_expansion.snap index c3d6fbf2a..78027f041 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__sourcemap_source_expansion.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__sourcemap_source_expansion.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 169 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__webpack.snap b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__webpack.snap similarity index 95% rename from crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__webpack.snap rename to crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__webpack.snap index e5fc2fdb6..4428d61c7 100644 --- a/crates/symbolicator-service/tests/integration/snapshots/integration__sourcemap__webpack.snap +++ b/crates/symbolicator-js/tests/integration/snapshots/integration__sourcemap__webpack.snap @@ -1,7 +1,6 @@ --- -source: crates/symbolicator-service/tests/integration/sourcemap.rs -assertion_line: 480 -expression: response.unwrap() +source: crates/symbolicator-js/tests/integration/sourcemap.rs +expression: response --- stacktraces: - frames: diff --git a/crates/symbolicator-service/tests/integration/sourcemap.rs b/crates/symbolicator-js/tests/integration/sourcemap.rs similarity index 95% rename from crates/symbolicator-service/tests/integration/sourcemap.rs rename to crates/symbolicator-js/tests/integration/sourcemap.rs index 27a7be446..428f0616c 100644 --- a/crates/symbolicator-service/tests/integration/sourcemap.rs +++ b/crates/symbolicator-js/tests/integration/sourcemap.rs @@ -3,11 +3,9 @@ use std::sync::Arc; use reqwest::Url; use serde_json::json; +use symbolicator_js::interface::{JsFrame, JsStacktrace, SymbolicateJsStacktraces}; use symbolicator_service::services::ScrapingConfig; -use symbolicator_service::types::{JsFrame, RawObjectInfo, Scope}; -use symbolicator_service::{ - services::symbolication::SymbolicateJsStacktraces, types::JsStacktrace, -}; +use symbolicator_service::types::{RawObjectInfo, Scope}; use symbolicator_sources::{SentrySourceConfig, SourceId}; use crate::{assert_snapshot, setup_service}; @@ -115,7 +113,7 @@ async fn test_sourcemap_expansion() { let request = make_js_request(source, frames, "[]", String::from("release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -166,7 +164,7 @@ async fn test_sourcemap_source_expansion() { let request = make_js_request(source, frames, "[]", String::from("release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -206,7 +204,7 @@ async fn test_sourcemap_embedded_source_expansion() { let request = make_js_request(source, frames, "[]", String::from("release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -237,7 +235,7 @@ async fn test_source_expansion() { let request = make_js_request(source, frames, "[]", String::from("release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -263,7 +261,7 @@ async fn test_inlined_sources() { let request = make_js_request(source, frames, "[]", String::from("release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -295,7 +293,7 @@ async fn test_sourcemap_nofiles_source_expansion() { let request = make_js_request(source, frames, "[]", String::from("release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -345,7 +343,7 @@ async fn test_indexed_sourcemap_source_expansion() { let request = make_js_request(source, frames, "[]", String::from("release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -364,7 +362,7 @@ async fn test_malformed_abs_path() { let request = make_js_request(source, frames, "[]", String::from("release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -393,7 +391,7 @@ async fn test_fetch_error() { request.scraping.enabled = true; let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -426,7 +424,7 @@ async fn test_invalid_location() { let request = make_js_request(source, frames, "[]", String::from("release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -477,7 +475,7 @@ async fn test_webpack() { let request = make_js_request(source, frames, "[]", String::from("release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -511,7 +509,7 @@ async fn test_dart_async_name() { let response = symbolication.symbolicate_js(request).await; assert_eq!( - response.unwrap().stacktraces[0].frames[0].function, + response.stacktraces[0].frames[0].function, // Without implemented workaround, it would yield `$async$be` here. // We want to assert that it uses token name instead of scope name in case of async rewrite. Some("main".into()) @@ -546,7 +544,7 @@ async fn e2e_node_debugid() { let request = make_js_request(source, frames, modules, None, None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -576,7 +574,7 @@ async fn e2e_source_no_header() { let request = make_js_request(source, frames, modules, String::from("some-release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -610,7 +608,7 @@ async fn e2e_react_native() { let request = make_js_request(source, frames, modules, String::from("some-release"), None); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -633,7 +631,7 @@ async fn e2e_multiple_smref_scraped() { let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } #[tokio::test] @@ -663,7 +661,7 @@ async fn sorted_bundles() { let response = symbolication.symbolicate_js(request).await; assert_eq!( - response.unwrap().stacktraces[0].frames[0].function, + response.stacktraces[0].frames[0].function, // The `01_wrong` bundle would yield `thisIsWrong` here. // We want to assert that bundles have stable sort order according to their `url`. // The `url` contains their `id` as it comes from sentry. This is the best we can do right now. @@ -691,7 +689,7 @@ async fn bundle_index() { let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } // A manually triggered test that can be used to locally debug monolith behavior. Requires a list @@ -736,5 +734,5 @@ async fn test_manual_processing() { let request = make_js_request(source, frames, modules, release, dist); let response = symbolication.symbolicate_js(request).await; - assert_snapshot!(response.unwrap()); + assert_snapshot!(response); } diff --git a/crates/symbolicator-js/tests/integration/utils.rs b/crates/symbolicator-js/tests/integration/utils.rs new file mode 100644 index 000000000..858d51683 --- /dev/null +++ b/crates/symbolicator-js/tests/integration/utils.rs @@ -0,0 +1,35 @@ +use symbolicator_js::SourceMapService; +use symbolicator_service::config::Config; +use symbolicator_service::services::SharedServices; +use symbolicator_test as test; + +pub use test::{assert_snapshot, fixture, read_fixture, source_config, symbol_server, Server}; + +/// Setup tests and create a test service. +/// +/// This function returns a tuple containing the service to test, and a temporary cache +/// directory. The directory is cleaned up when the [`TempDir`] instance is dropped. Keep it as +/// guard until the test has finished. +/// +/// The service is configured with `connect_to_reserved_ips = True`. This allows to use a local +/// symbol server to test object file downloads. +/// The `update_config` closure can modify any default configuration if needed before the server is +/// started. +pub fn setup_service(update_config: impl FnOnce(&mut Config)) -> (SourceMapService, test::TempDir) { + test::setup(); + + let cache_dir = test::tempdir(); + + let mut config = Config { + cache_dir: Some(cache_dir.path().to_owned()), + connect_to_reserved_ips: true, + ..Default::default() + }; + update_config(&mut config); + + let handle = tokio::runtime::Handle::current(); + let shared_services = SharedServices::new(config, handle).unwrap(); + let js = SourceMapService::new(&shared_services); + + (js, cache_dir) +} diff --git a/crates/symbolicator-service/src/caching/cache_error.rs b/crates/symbolicator-service/src/caching/cache_error.rs index f2cba64f4..5faa6e8fc 100644 --- a/crates/symbolicator-service/src/caching/cache_error.rs +++ b/crates/symbolicator-service/src/caching/cache_error.rs @@ -141,7 +141,7 @@ impl CacheError { } #[track_caller] - pub(crate) fn from_std_error(e: E) -> Self { + pub fn from_std_error(e: E) -> Self { let dynerr: &dyn std::error::Error = &e; // tracing expects a `&dyn Error` tracing::error!(error = dynerr); Self::InternalError diff --git a/crates/symbolicator-service/src/js/mod.rs b/crates/symbolicator-service/src/js/mod.rs deleted file mode 100644 index a461aa4f8..000000000 --- a/crates/symbolicator-service/src/js/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod metrics; - -pub use metrics::*; diff --git a/crates/symbolicator-service/src/lib.rs b/crates/symbolicator-service/src/lib.rs index f596601d8..4094fd5d4 100644 --- a/crates/symbolicator-service/src/lib.rs +++ b/crates/symbolicator-service/src/lib.rs @@ -6,7 +6,6 @@ pub mod metrics; pub mod caching; pub mod config; -mod js; pub mod services; pub mod types; pub mod utils; diff --git a/crates/symbolicator-service/src/services/caches/mod.rs b/crates/symbolicator-service/src/services/caches/mod.rs index bd08c8caa..b037ee744 100644 --- a/crates/symbolicator-service/src/services/caches/mod.rs +++ b/crates/symbolicator-service/src/services/caches/mod.rs @@ -1,8 +1,6 @@ //! The various caches used by the core Symbolication Service are placed here. -mod bundle_index; mod sourcefiles; pub mod versions; -pub use bundle_index::BundleIndexCache; pub use sourcefiles::{ByteViewString, SourceFilesCache}; diff --git a/crates/symbolicator-service/src/services/download/mod.rs b/crates/symbolicator-service/src/services/download/mod.rs index 999df6732..4e05f4101 100644 --- a/crates/symbolicator-service/src/services/download/mod.rs +++ b/crates/symbolicator-service/src/services/download/mod.rs @@ -3,14 +3,13 @@ //! The sources are described on //! -use std::collections::{BTreeSet, VecDeque}; +use std::collections::VecDeque; use std::convert::TryInto; use std::error::Error; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant, SystemTime}; -use ::sentry::types::DebugId; use ::sentry::SentryFutureExt; use futures::prelude::*; use reqwest::StatusCode; @@ -22,8 +21,7 @@ pub use symbolicator_sources::{ SourceFilters, SourceLocation, }; use symbolicator_sources::{ - FilesystemRemoteFile, GcsRemoteFile, HttpRemoteFile, S3RemoteFile, SentrySourceConfig, - SourceLocationIter, + FilesystemRemoteFile, GcsRemoteFile, HttpRemoteFile, S3RemoteFile, SourceLocationIter, }; use crate::caching::{CacheEntry, CacheError}; @@ -33,8 +31,6 @@ use crate::utils::gcs::GcsError; use crate::utils::http::DownloadTimeouts; use crate::utils::sentry::ConfigureScope; -use self::sentry::JsLookupResult; - mod filesystem; mod gcs; mod http; @@ -213,8 +209,9 @@ impl HostDenyList { /// rate limits and the concurrency it uses. #[derive(Debug)] pub struct DownloadService { - runtime: tokio::runtime::Handle, - timeouts: DownloadTimeouts, + pub runtime: tokio::runtime::Handle, + pub timeouts: DownloadTimeouts, + pub trusted_client: reqwest::Client, sentry: sentry::SentryDownloader, http: http::HttpDownloader, s3: s3::S3Downloader, @@ -235,6 +232,7 @@ impl DownloadService { Arc::new(Self { runtime: runtime.clone(), timeouts, + trusted_client: trusted_client.clone(), sentry: sentry::SentryDownloader::new(trusted_client, runtime, timeouts, in_memory), http: http::HttpDownloader::new(restricted_client.clone(), timeouts), s3: s3::S3Downloader::new(timeouts, in_memory.s3_client_capacity), @@ -426,21 +424,6 @@ impl DownloadService { remote_files } - /// Look up a list of bundles or individual artifact files covering the - /// `debug_ids` and `file_stems` (using the `release` + `dist`). - pub async fn lookup_js_artifacts( - &self, - source: Arc, - debug_ids: BTreeSet, - file_stems: BTreeSet, - release: Option<&str>, - dist: Option<&str>, - ) -> CacheEntry> { - self.sentry - .lookup_js_artifacts(source, debug_ids, file_stems, release, dist) - .await - } - /// Whether this download service is allowed to connect to reserved ip addresses. pub fn can_connect_to_reserved_ips(&self) -> bool { self.connect_to_reserved_ips diff --git a/crates/symbolicator-service/src/services/download/sentry.rs b/crates/symbolicator-service/src/services/download/sentry.rs index a171c4c52..d2da4b64c 100644 --- a/crates/symbolicator-service/src/services/download/sentry.rs +++ b/crates/symbolicator-service/src/services/download/sentry.rs @@ -2,12 +2,10 @@ //! //! This allows to fetch files which were directly uploaded to Sentry itself. -use std::collections::{BTreeMap, BTreeSet}; use std::fmt; use std::sync::Arc; use std::time::Duration; -use sentry::types::DebugId; use sentry::SentryFutureExt; use serde::de::DeserializeOwned; use serde::Deserialize; @@ -21,7 +19,6 @@ use symbolicator_sources::{ use super::{FileType, USER_AGENT}; use crate::caching::{CacheEntry, CacheError}; use crate::config::InMemoryCacheConfig; -use crate::types::ResolvedWith; use crate::utils::futures::{m, measure, CancelOnDrop}; use crate::utils::http::DownloadTimeouts; @@ -76,66 +73,19 @@ impl SentryFileType { } } -#[derive(Clone, Debug, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -enum RawJsLookupResult { - Bundle { - id: SentryFileId, - url: Url, - #[serde(default)] - resolved_with: ResolvedWith, - }, - File { - id: SentryFileId, - url: Url, - abs_path: String, - #[serde(default)] - headers: ArtifactHeaders, - #[serde(default)] - resolved_with: ResolvedWith, - }, -} - -pub type ArtifactHeaders = BTreeMap; - -/// The Result of looking up JS Artifacts. -#[derive(Clone, Debug)] -pub enum JsLookupResult { - /// This is an `ArtifactBundle`. - ArtifactBundle { - /// The [`RemoteFile`] to download this bundle from. - remote_file: RemoteFile, - resolved_with: ResolvedWith, - }, - /// This is an individual artifact file. - IndividualArtifact { - /// The [`RemoteFile`] to download this artifact from. - remote_file: RemoteFile, - /// The absolute path (also called `url`) of the artifact. - abs_path: String, - /// Arbitrary headers of this file, such as a `Sourcemap` reference. - headers: ArtifactHeaders, - resolved_with: ResolvedWith, - }, -} - #[derive(Clone, Debug, PartialEq, Eq, Hash)] -struct SearchQuery { - index_url: Url, - token: String, +pub struct SearchQuery { + pub index_url: Url, + pub token: String, } /// An LRU Cache for Sentry DIF (Native Debug Files) lookups. type SentryDifCache = moka::future::Cache>>; -/// An LRU Cache for Sentry JS Artifact lookups. -type SentryJsCache = moka::future::Cache>>; - pub struct SentryDownloader { client: reqwest::Client, runtime: tokio::runtime::Handle, dif_cache: SentryDifCache, - js_cache: SentryJsCache, timeouts: DownloadTimeouts, } @@ -143,7 +93,6 @@ impl fmt::Debug for SentryDownloader { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("SentryDownloader") .field("dif_cache", &self.dif_cache.entry_count()) - .field("js_cache", &self.js_cache.entry_count()) .field("timeouts", &self.timeouts) .finish() } @@ -160,22 +109,17 @@ impl SentryDownloader { .max_capacity(in_memory.sentry_index_capacity) .time_to_live(in_memory.sentry_index_ttl) .build(); - let js_cache = SentryJsCache::builder() - .max_capacity(in_memory.sentry_index_capacity) - .time_to_live(in_memory.sentry_index_ttl) - .build(); Self { client, runtime, dif_cache, - js_cache, timeouts, } } /// Make a request to sentry, parse the result as a JSON SearchResult list. #[tracing::instrument(skip_all)] - async fn fetch_sentry_json( + pub async fn fetch_sentry_json( client: &reqwest::Client, query: &SearchQuery, ) -> CacheEntry> @@ -289,115 +233,6 @@ impl SentryDownloader { Ok(file_ids) } - /// Look up a list of bundles or individual artifact files covering the - /// `debug_ids` and `file_stems` (using the `release` + `dist`). - pub async fn lookup_js_artifacts( - &self, - source: Arc, - debug_ids: BTreeSet, - file_stems: BTreeSet, - release: Option<&str>, - dist: Option<&str>, - ) -> CacheEntry> { - let mut lookup_url = source.url.clone(); - { - let mut query = lookup_url.query_pairs_mut(); - - if let Some(release) = release { - query.append_pair("release", release); - - // A `url` is only valid in combination with a `release`. - for file_stem in file_stems { - query.append_pair("url", &file_stem); - } - } - if let Some(dist) = dist { - query.append_pair("dist", dist); - } - for debug_id in debug_ids { - query.append_pair("debug_id", &debug_id.to_string()); - } - } - - // NOTE: `http::Uri` has a hard limit defined, and reqwest unconditionally unwraps such - // errors, when converting between `Url` to `Uri`. To avoid a panic in that case, we - // duplicate the check here to gracefully error out. - if lookup_url.as_str().len() > (u16::MAX - 1) as usize { - return Err(CacheError::DownloadError("uri too long".into())); - } - - let query = SearchQuery { - index_url: lookup_url, - token: source.token.clone(), - }; - - metric!(counter("source.sentry.js_lookup.access") += 1); - - let init = Box::pin(async { - metric!(counter("source.sentry.js_lookup.computation") += 1); - tracing::debug!( - "Fetching list of Sentry JS artifacts from {}", - &query.index_url - ); - - let future = { - let client = self.client.clone(); - let query = query.clone(); - async move { super::retry(|| Self::fetch_sentry_json(&client, &query)).await } - }; - - let future = - CancelOnDrop::new(self.runtime.spawn(future.bind_hub(sentry::Hub::current()))); - - let timeout = Duration::from_secs(30); - let future = tokio::time::timeout(timeout, future); - let future = measure( - "service.download.lookup_js_artifacts", - m::timed_result, - future, - ); - - future - .await - .map_err(|_| CacheError::Timeout(timeout))? - .map_err(|_| CacheError::InternalError)? - }); - - let entries = self - .js_cache - .entry_by_ref(&query) - .or_insert_with_if(init, |entry| entry.is_err()) - .await - .into_value()?; - - let results = entries - .iter() - .map(|raw| match raw { - RawJsLookupResult::Bundle { - id, - url, - resolved_with, - } => JsLookupResult::ArtifactBundle { - remote_file: make_remote_file(&source, id, url), - resolved_with: *resolved_with, - }, - RawJsLookupResult::File { - id, - url, - abs_path, - headers, - resolved_with, - } => JsLookupResult::IndividualArtifact { - remote_file: make_remote_file(&source, id, url), - abs_path: abs_path.clone(), - headers: headers.clone(), - resolved_with: *resolved_with, - }, - }) - .collect(); - Ok(results) - } - /// Downloads a source hosted on Sentry. pub async fn download_source( &self, @@ -417,28 +252,6 @@ impl SentryDownloader { } } -/// Transforms the given `url` into a [`RemoteFile`]. -/// -/// The problem here is being forward-compatible to a future in which the Sentry API returns -/// pre-authenticated Urls on some external file storage service. -/// Whereas right now, these files are still being served from a Sentry API endpoint, which -/// needs to be authenticated via a `token` that we do not want to leak to any public Url, as -/// well as using a restricted IP that is being blocked for arbitrary HTTP files. -fn make_remote_file( - source: &Arc, - file_id: &SentryFileId, - url: &Url, -) -> RemoteFile { - let use_credentials = url.as_str().starts_with(source.url.as_str()); - SentryRemoteFile::new( - Arc::clone(source), - use_credentials, - file_id.clone(), - Some(url.clone()), - ) - .into() -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/symbolicator-service/src/services/mod.rs b/crates/symbolicator-service/src/services/mod.rs index e203a211c..8928765b4 100644 --- a/crates/symbolicator-service/src/services/mod.rs +++ b/crates/symbolicator-service/src/services/mod.rs @@ -1,10 +1,8 @@ -//! Provides the internal Symbolicator services and a way to initialize them. +//! Provides the internal shared Symbolicator services and a way to initialize them. //! //! Symbolicator operates a number of independent services defined in this module for downloading, -//! cache management, and symbolication. -//! The main [`create_service`] fn creates all these internal services according to the provided -//! [`Config`] and returns a [`SymbolicationActor`] as the main Symbolicator interface, and an -//! [`ObjectsActor`] which abstracts object access. +//! cache management, and file access. +//! [`SharedServices`] initializes all these internal services according to the provided [`Config`]. //! //! The internal services require a separate asynchronous runtimes dedicated for I/O-intensive work, //! such as downloads and access to the shared cache. @@ -13,11 +11,10 @@ use std::sync::Arc; use anyhow::{Context, Result}; -use crate::caching::{Caches, SharedCacheService}; +use crate::caching::{Caches, SharedCacheRef, SharedCacheService}; use crate::config::Config; pub mod bitcode; -mod bundle_index; pub mod caches; pub mod cficaches; pub mod derived; @@ -28,92 +25,56 @@ mod minidump; mod module_lookup; pub mod objects; pub mod ppdb_caches; -pub mod sourcemap; -mod sourcemap_lookup; pub mod symbolication; pub mod symcaches; -use self::bitcode::BitcodeService; -use self::caches::{BundleIndexCache, SourceFilesCache}; -use self::cficaches::CfiCacheActor; +use self::caches::SourceFilesCache; use self::download::DownloadService; -use self::il2cpp::Il2cppService; use self::objects::ObjectsActor; -use self::ppdb_caches::PortablePdbCacheActor; -use self::sourcemap::SourceMapService; -use self::symbolication::SymbolicationActor; -use self::symcaches::SymCacheActor; pub use self::symbolication::ScrapingConfig; pub use fetch_file::fetch_file; -pub fn create_service( - config: &Config, - io_pool: tokio::runtime::Handle, -) -> Result<(SymbolicationActor, ObjectsActor)> { - let caches = Caches::from_config(config).context("failed to create local caches")?; - caches - .clear_tmp(config) - .context("failed to clear tmp caches")?; - - let downloader = DownloadService::new(config, io_pool.clone()); - - let shared_cache = SharedCacheService::new(config.shared_cache.clone(), io_pool); - - let sourcefiles_cache = Arc::new(SourceFilesCache::new( - caches.sourcefiles, - shared_cache.clone(), - downloader.clone(), - )); - - let objects = ObjectsActor::new( - caches.object_meta, - caches.objects, - shared_cache.clone(), - downloader.clone(), - ); - - let bitcode = BitcodeService::new(caches.auxdifs, shared_cache.clone(), downloader.clone()); - - let il2cpp = Il2cppService::new(caches.il2cpp, shared_cache.clone(), downloader.clone()); - - let symcaches = SymCacheActor::new( - caches.symcaches, - shared_cache.clone(), - objects.clone(), - bitcode, - il2cpp, - ); - - let cficaches = CfiCacheActor::new(caches.cficaches, shared_cache.clone(), objects.clone()); - - let ppdb_caches = - PortablePdbCacheActor::new(caches.ppdb_caches, shared_cache.clone(), objects.clone()); - - let bundle_index_cache = BundleIndexCache::new( - caches.bundle_index, - shared_cache.clone(), - downloader.clone(), - ); - - let sourcemaps = SourceMapService::new( - objects.clone(), - sourcefiles_cache.clone(), - bundle_index_cache, - caches.sourcemap_caches, - shared_cache, - downloader, - ); - - let symbolication = SymbolicationActor::new( - objects.clone(), - symcaches, - cficaches, - ppdb_caches, - caches.diagnostics, - sourcemaps, - sourcefiles_cache, - ); +pub struct SharedServices { + pub config: Config, + pub caches: Caches, + pub download_svc: Arc, + pub shared_cache: SharedCacheRef, + pub objects: ObjectsActor, + pub sourcefiles_cache: Arc, +} - Ok((symbolication, objects)) +impl SharedServices { + pub fn new(config: Config, io_pool: tokio::runtime::Handle) -> Result { + let caches = Caches::from_config(&config).context("failed to create local caches")?; + caches + .clear_tmp(&config) + .context("failed to clear tmp caches")?; + + let download_svc = DownloadService::new(&config, io_pool.clone()); + + let shared_cache = SharedCacheService::new(config.shared_cache.clone(), io_pool); + + let sourcefiles_cache = Arc::new(SourceFilesCache::new( + caches.sourcefiles.clone(), + shared_cache.clone(), + download_svc.clone(), + )); + + let objects = ObjectsActor::new( + caches.object_meta.clone(), + caches.objects.clone(), + shared_cache.clone(), + download_svc.clone(), + ); + + Ok(Self { + config, + caches, + download_svc, + shared_cache, + objects, + sourcefiles_cache, + }) + } } diff --git a/crates/symbolicator-service/src/services/sourcemap.rs b/crates/symbolicator-service/src/services/sourcemap.rs deleted file mode 100644 index c5c587fc9..000000000 --- a/crates/symbolicator-service/src/services/sourcemap.rs +++ /dev/null @@ -1,37 +0,0 @@ -//! Service for retrieving Artifacts and SourceMap. - -use crate::caching::{Cache, Cacher, SharedCacheRef}; -use crate::services::download::DownloadService; -use std::sync::Arc; - -use super::caches::{BundleIndexCache, SourceFilesCache}; -use super::objects::ObjectsActor; -use super::sourcemap_lookup::FetchSourceMapCacheInternal; - -#[derive(Debug, Clone)] -pub struct SourceMapService { - pub(crate) objects: ObjectsActor, - pub(crate) sourcefiles_cache: Arc, - pub(crate) bundle_index_cache: Arc, - pub(crate) sourcemap_caches: Arc>, - pub(crate) download_svc: Arc, -} - -impl SourceMapService { - pub fn new( - objects: ObjectsActor, - sourcefiles_cache: Arc, - bundle_index_cache: BundleIndexCache, - sourcemap_cache: Cache, - shared_cache: SharedCacheRef, - download_svc: Arc, - ) -> Self { - Self { - objects, - sourcefiles_cache, - bundle_index_cache: Arc::new(bundle_index_cache), - sourcemap_caches: Arc::new(Cacher::new(sourcemap_cache, shared_cache)), - download_svc, - } - } -} diff --git a/crates/symbolicator-service/src/services/symbolication/js.rs b/crates/symbolicator-service/src/services/symbolication/js.rs deleted file mode 100644 index 07ba0439e..000000000 --- a/crates/symbolicator-service/src/services/symbolication/js.rs +++ /dev/null @@ -1,786 +0,0 @@ -//! Symbolication of JS/SourceMap requests. -//! -//! # Metrics -//! -//! - `js.unsymbolicated_frames`: The number of unsymbolicated frames, per event. -//! Should be `0` in the best case, as we obviously should symbolicate :-) -//! -//! - `js.missing_sourcescontent`: The number of frames, per event, that have no embedded sources. -//! Should be `0` in the best case, as the SourceMaps we use should have embedded sources. -//! If they don’t, we have to fall back to applying source context from elsewhere. -//! -//! - `js.api_requests`: The number of (potentially cached) API requests, per event. -//! Should be `1` in the best case, as `prefetch_artifacts` should provide us with everything we need. -//! -//! - `js.queried_bundles` / `js.fetched_bundles`: The number of artifact bundles the API gave us, -//! and the ones we ended up using. -//! Should both be `1` in the best case, as a single bundle should ideally serve all our needs. -//! Otherwise `queried` and `fetched` should be the same, as a difference between the two means -//! that multiple API requests gave us duplicated bundles. -//! -//! - `js.queried_artifacts` / `js.fetched_artifacts`: The number of individual artifacts the API -//! gave us, and the ones we ended up using. -//! Should both be `0` as we should not be using individual artifacts but rather bundles. -//! Otherwise, `queried` should be close to `fetched`. If they differ, it means the API is sending -//! us a lot of candidate artifacts that we don’t end up using, or multiple API requests give us -//! duplicated artifacts. -//! -//! - `js.scraped_files`: The number of files that were scraped from the Web. -//! Should be `0`, as we should find/use files from within bundles or as individual artifacts. - -use std::collections::BTreeSet; -use std::sync::Arc; - -use once_cell::sync::Lazy; -use regex::Regex; -use reqwest::Url; -use symbolic::sourcemapcache::{ScopeLookupResult, SourcePosition}; -use symbolicator_sources::SentrySourceConfig; - -use crate::caching::CacheError; -use crate::services::sourcemap_lookup::{join_paths, strip_hostname, SourceMapLookup}; -use crate::services::ScrapingConfig; -use crate::types::{ - CompletedJsSymbolicationResponse, JsFrame, JsModuleError, JsModuleErrorKind, JsStacktrace, - RawObjectInfo, Scope, -}; - -use super::source_context::get_context_lines; -use super::SymbolicationActor; - -static WEBPACK_NAMESPACE_RE: Lazy = - Lazy::new(|| Regex::new(r"^webpack://[a-zA-Z0-9_\-@\.]+/\./").unwrap()); -static NODE_MODULES_RE: Lazy = Lazy::new(|| Regex::new(r"\bnode_modules/").unwrap()); - -#[derive(Debug, Clone)] -pub struct SymbolicateJsStacktraces { - pub scope: Scope, - pub source: Arc, - pub release: Option, - pub dist: Option, - pub debug_id_index: Option, - pub url_index: Option, - pub stacktraces: Vec, - pub modules: Vec, - pub scraping: ScrapingConfig, - /// Whether to apply source context for the stack frames. - pub apply_source_context: bool, -} - -impl SymbolicationActor { - #[tracing::instrument(skip_all)] - pub async fn symbolicate_js( - &self, - mut request: SymbolicateJsStacktraces, - ) -> anyhow::Result { - let mut raw_stacktraces = std::mem::take(&mut request.stacktraces); - let apply_source_context = request.apply_source_context; - let mut lookup = SourceMapLookup::new(self.sourcemaps.clone(), request).await; - lookup.prepare_modules(&mut raw_stacktraces[..]); - - let mut unsymbolicated_frames = 0; - let mut missing_sourcescontent = 0; - - let num_stacktraces = raw_stacktraces.len(); - let mut stacktraces = Vec::with_capacity(num_stacktraces); - - let mut errors = BTreeSet::new(); - for raw_stacktrace in &mut raw_stacktraces { - let num_frames = raw_stacktrace.frames.len(); - let mut symbolicated_frames = Vec::with_capacity(num_frames); - let mut callsite_fn_name = None; - - for raw_frame in &mut raw_stacktrace.frames { - match symbolicate_js_frame( - &mut lookup, - raw_frame, - &mut errors, - std::mem::take(&mut callsite_fn_name), - apply_source_context, - &mut missing_sourcescontent, - ) - .await - { - Ok(mut frame) => { - std::mem::swap(&mut callsite_fn_name, &mut frame.token_name); - symbolicated_frames.push(frame); - } - Err(err) => { - unsymbolicated_frames += 1; - errors.insert(JsModuleError { - abs_path: raw_frame.abs_path.clone(), - kind: err, - }); - symbolicated_frames.push(raw_frame.clone()); - } - } - } - - stacktraces.push(JsStacktrace { - frames: symbolicated_frames, - }); - } - - lookup.record_metrics(); - metric!(time_raw("js.unsymbolicated_frames") = unsymbolicated_frames); - metric!(time_raw("js.missing_sourcescontent") = missing_sourcescontent); - - let (used_artifact_bundles, scraping_attempts) = lookup.into_records(); - - Ok(CompletedJsSymbolicationResponse { - stacktraces, - raw_stacktraces, - errors: errors.into_iter().collect(), - used_artifact_bundles, - scraping_attempts, - }) - } -} - -async fn symbolicate_js_frame( - lookup: &mut SourceMapLookup, - raw_frame: &mut JsFrame, - errors: &mut BTreeSet, - callsite_fn_name: Option, - should_apply_source_context: bool, - missing_sourcescontent: &mut u64, -) -> Result { - // we check for a valid line (i.e. >= 1) first, as we want to avoid resolving / scraping the minified - // file in that case. we frequently saw 0 line/col values in combination with non-js files, - // and we want to avoid scraping a bunch of html files in that case. - let line = if raw_frame.lineno > 0 { - raw_frame.lineno - } else { - return Err(JsModuleErrorKind::InvalidLocation { - line: raw_frame.lineno, - col: raw_frame.colno, - }); - }; - - let col = raw_frame.colno.unwrap_or_default(); - - let module = lookup.get_module(&raw_frame.abs_path).await; - - tracing::trace!( - abs_path = &raw_frame.abs_path, - ?module, - "Module for `abs_path`" - ); - - // Apply source context to the raw frame. If it fails, we bail early, as it's not possible - // to construct a `SourceMapCache` without the minified source anyway. - match &module.minified_source.entry { - Ok(minified_source) => { - if should_apply_source_context { - apply_source_context(raw_frame, &minified_source.contents)? - } - } - Err(CacheError::DownloadError(msg)) if msg == "Scraping disabled" => { - return Err(JsModuleErrorKind::ScrapingDisabled); - } - Err(_) => return Err(JsModuleErrorKind::MissingSource), - } - - let sourcemap_label = &module - .minified_source - .entry - .as_ref() - .map(|entry| entry.sourcemap_url()) - .ok() - .flatten() - .unwrap_or_else(|| raw_frame.abs_path.clone()); - - let (smcache, resolved_with) = match &module.smcache { - Some(smcache) => match &smcache.entry { - Ok(entry) => (entry, smcache.resolved_with), - Err(CacheError::Malformed(_)) => { - // If we successfully resolved the sourcemap but it's broken somehow, - // We should still record that we resolved it. - raw_frame.data.resolved_with = Some(smcache.resolved_with); - return Err(JsModuleErrorKind::MalformedSourcemap { - url: sourcemap_label.to_owned(), - }); - } - Err(CacheError::DownloadError(msg)) if msg == "Scraping disabled" => { - return Err(JsModuleErrorKind::ScrapingDisabled); - } - Err(_) => return Err(JsModuleErrorKind::MissingSourcemap), - }, - // In case it's just a source file, with no sourcemap reference or any debug id, we bail. - None => return Ok(raw_frame.clone()), - }; - - let mut frame = raw_frame.clone(); - frame.data.sourcemap = Some(sourcemap_label.clone()); - frame.data.resolved_with = Some(resolved_with); - - let sp = SourcePosition::new(line - 1, col.saturating_sub(1)); - let token = smcache - .get() - .lookup(sp) - .ok_or(JsModuleErrorKind::InvalidLocation { - line, - col: Some(col), - })?; - - // We consider the frame successfully symbolicated if we can resolve the minified source position - // to a token. - frame.data.symbolicated = true; - - // Store the resolved token name, which can be used for function name resolution in next frame. - // Refer to https://blog.sentry.io/2022/11/30/how-we-made-javascript-stack-traces-awesome/ - // for more details about "caller naming". - frame.token_name = token.name().map(|n| n.to_owned()); - - let function_name = match token.scope() { - ScopeLookupResult::NamedScope(name) => { - let scope_name = name.to_string(); - // Special case for Dart async function rewrites - // https://github.com/dart-lang/sdk/blob/fab753ea277c96c7699920852dabf977a7065fa5/pkg/compiler/lib/src/js_backend/namer.dart#L1845-L1866 - // ref: https://github.com/getsentry/symbolic/issues/791 - if name.starts_with("$async$") { - token.name().map_or_else(|| scope_name, |n| n.to_owned()) - } else { - scope_name - } - } - ScopeLookupResult::AnonymousScope => "".to_string(), - ScopeLookupResult::Unknown => { - // Fallback to minified function name - raw_frame - .function - .clone() - .unwrap_or("".to_string()) - } - }; - - frame.function = Some(fold_function_name(get_function_for_token( - raw_frame.function.as_deref(), - &function_name, - callsite_fn_name.as_deref(), - ))); - - if let Some(filename) = token.file_name() { - let mut filename = filename.to_string(); - frame.abs_path = module - .source_file_base() - .map(|base| join_paths(base, &filename)) - .unwrap_or_else(|| filename.clone()); - - if filename.starts_with("webpack:") { - filename = fixup_webpack_filename(&filename); - frame.module = Some(generate_module(&filename)); - } - - frame.in_app = is_in_app(&frame.abs_path, &filename); - - if frame.module.is_none() - && (frame.abs_path.starts_with("http:") - || frame.abs_path.starts_with("https:") - || frame.abs_path.starts_with("webpack:") - || frame.abs_path.starts_with("app:")) - { - frame.module = Some(generate_module(&frame.abs_path)); - } - - frame.filename = Some(filename); - } - - frame.lineno = token.line().saturating_add(1); - frame.colno = Some(token.column().saturating_add(1)); - - if !should_apply_source_context { - return Ok(frame); - } - - if let Some(file) = token.file() { - if let Some(file_source) = file.source() { - if let Err(err) = apply_source_context(&mut frame, file_source) { - errors.insert(JsModuleError { - abs_path: raw_frame.abs_path.clone(), - kind: err, - }); - } - } else { - *missing_sourcescontent += 1; - - // If we have no source context from within the `SourceMapCache`, - // fall back to applying the source context from a raw artifact file - let file_key = file - .name() - .and_then(|filename| module.source_file_key(filename)); - - let source_file = match &file_key { - Some(key) => &lookup.get_source_file(key.clone()).await.entry, - None => &Err(CacheError::NotFound), - }; - - if source_file - .as_ref() - .map_err(|_| JsModuleErrorKind::MissingSource) - .and_then(|file| apply_source_context(&mut frame, &file.contents)) - .is_err() - { - // It's arguable whether we should collect it, but this is what monolith does now, - // and it might be useful to indicate incorrect sentry-cli rewrite behavior. - errors.insert(JsModuleError { - abs_path: raw_frame.abs_path.clone(), - kind: JsModuleErrorKind::MissingSourceContent { - source: file_key - .and_then(|key| key.abs_path().map(|path| path.to_string())) - .unwrap_or_default(), - sourcemap: sourcemap_label.clone(), - }, - }); - } - } - } - - Ok(frame) -} - -fn apply_source_context(frame: &mut JsFrame, source: &str) -> Result<(), JsModuleErrorKind> { - let lineno = frame.lineno as usize; - let column = frame.colno.map(|col| col as usize); - - if let Some((pre_context, context_line, post_context)) = - get_context_lines(source, lineno, column, None) - { - frame.pre_context = pre_context; - frame.context_line = Some(context_line); - frame.post_context = post_context; - } - - Ok(()) -} - -// Names that do not provide any reasonable value, and that can possibly obstruct -// better available names. In case we encounter one, we fallback to current frame fn name if available. -const USELESS_FN_NAMES: [&str; 3] = ["", "__webpack_require__", "__webpack_modules__"]; - -/// Get function name for a given frame based on the token resolved by symbolic. -/// It tries following paths in order: -/// - return token function name if we have a usable value (filtered through `USELESS_FN_NAMES` list), -/// - return mapped name of the caller (previous frame) token if it had, -/// - return token function name, including filtered values if it mapped to anything in the first place, -/// - return current frames function name as a fallback -/// -// fn get_function_for_token(frame, token, previous_frame=None): -fn get_function_for_token<'a>( - frame_fn_name: Option<&'a str>, - token_fn_name: &'a str, - callsite_fn_name: Option<&'a str>, -) -> &'a str { - // Try to use the function name we got from sourcemap-cache, filtering useless names. - if !USELESS_FN_NAMES.contains(&token_fn_name) { - return token_fn_name; - } - - // If not found, ask the callsite (previous token) for function name if possible. - if let Some(token_name) = callsite_fn_name { - if !token_name.is_empty() { - return token_name; - } - } - - // If there was no minified name at all, return even useless, filtered one from the original token. - if frame_fn_name.is_none() { - return token_fn_name; - } - - // Otherwise fallback to the old, minified name. - frame_fn_name.unwrap_or("") -} - -/// Fold multiple consecutive occurences of the same property name into a single group, excluding the last component. -/// -/// foo | foo -/// foo.foo | foo.foo -/// foo.foo.foo | {foo#2}.foo -/// bar.foo.foo | bar.foo.foo -/// bar.foo.foo.foo | bar.{foo#2}.foo -/// bar.foo.foo.onError | bar.{foo#2}.onError -/// bar.bar.bar.foo.foo.onError | {bar#3}.{foo#2}.onError -/// bar.foo.foo.bar.bar.onError | bar.{foo#2}.{bar#2}.onError -fn fold_function_name(function_name: &str) -> String { - let mut parts: Vec<_> = function_name.split('.').collect(); - - if parts.len() == 1 { - return function_name.to_string(); - } - - // unwrap: `parts` has at least a single item. - let tail = parts.pop().unwrap(); - let mut grouped: Vec> = vec![vec![]]; - - for part in parts { - // unwrap: we initialized `grouped` with at least a single slice. - let current_group = grouped.last_mut().unwrap(); - if current_group.is_empty() || current_group.last() == Some(&part) { - current_group.push(part); - } else { - grouped.push(vec![part]); - } - } - - let folded = grouped - .iter() - .map(|group| { - // unwrap: each group contains at least a single item. - if group.len() == 1 { - group.first().unwrap().to_string() - } else { - format!("{{{}#{}}}", group.first().unwrap(), group.len()) - } - }) - .collect::>() - .join("."); - - format!("{folded}.{tail}") -} - -fn fixup_webpack_filename(filename: &str) -> String { - if let Some((_, rest)) = filename.split_once("/~/") { - format!("~/{rest}") - } else if WEBPACK_NAMESPACE_RE.is_match(filename) { - WEBPACK_NAMESPACE_RE.replace(filename, "./").to_string() - } else if let Some(rest) = filename.strip_prefix("webpack:///") { - rest.to_string() - } else { - filename.to_string() - } -} - -fn is_in_app(abs_path: &str, filename: &str) -> Option { - if abs_path.starts_with("webpack:") { - Some(filename.starts_with("./") && !filename.contains("/node_modules/")) - } else if abs_path.starts_with("app:") { - Some(!NODE_MODULES_RE.is_match(filename)) - } else if abs_path.contains("/node_modules/") { - Some(false) - } else { - None - } -} - -// As a running joke, here you have a 8 year old comment from 2015: -// TODO(dcramer): replace CLEAN_MODULE_RE with tokenizer completely -static CLEAN_MODULE_RE: Lazy = Lazy::new(|| { - Regex::new( - r"(?ix) -^ -(?:/| # Leading slashes -(?: - (?:java)?scripts?|js|build|static|node_modules|bower_components|[_\.~].*?| # common folder prefixes - v?(?:\d+\.)*\d+| # version numbers, v1, 1.0.0 - [a-f0-9]{7,8}| # short sha - [a-f0-9]{32}| # md5 - [a-f0-9]{40} # sha1 -)/)+| -(?:[-\.][a-f0-9]{7,}$) # Ending in a commitish -", - ).unwrap() -}); - -/// Converts a url into a made-up module name by doing the following: -/// * Extract just the path name ignoring querystrings -/// * Trimming off the initial / -/// * Trimming off the file extension -/// * Removes off useless folder prefixes -/// e.g. `http://google.com/js/v1.0/foo/bar/baz.js` -> `foo/bar/baz` -fn generate_module(abs_path: &str) -> String { - let path = strip_hostname(abs_path); - let mut path = path.split(&['#', '?']).next().unwrap_or(path); - - if let Some((idx, ".")) = path.rmatch_indices(&['.', '/']).next() { - path = &path[..idx]; - } - - let path = path.strip_suffix(".min").unwrap_or(path); - - // return all the segments following a 32/40-char hash - let mut segments = path.split('/'); - while let Some(segment) = segments.next() { - if segment.len() == 32 - || segment.len() == 40 && segment.chars().all(|c| c.is_ascii_hexdigit()) - { - let mut s = String::new(); - for (i, seg) in segments.enumerate() { - if i > 0 { - s.push('/'); - } - s.push_str(seg); - } - return s; - } - } - - CLEAN_MODULE_RE.replace_all(path, "").into_owned() -} - -#[cfg(test)] -mod tests { - - use super::*; - - /// A faithful port of the monolith's in-app logic, for testing purposes. - fn is_in_app_faithful(abs_path: &str, filename: &str) -> Option { - let mut in_app = None; - if abs_path.starts_with("webpack:") { - if filename.starts_with("~/") - || filename.contains("/node_modules/") - || !filename.starts_with("./") - { - in_app = Some(false); - } else if filename.starts_with("./") { - in_app = Some(true); - } - } else if abs_path.contains("/node_modules/") { - in_app = Some(false); - } - - if abs_path.starts_with("app:") { - if NODE_MODULES_RE.is_match(filename) { - in_app = Some(false); - } else { - in_app = Some(true); - } - } - - in_app - } - - #[test] - fn test_get_function_name_valid_name() { - assert_eq!( - get_function_for_token(Some("original"), "lookedup", None), - "lookedup" - ); - } - #[test] - fn test_get_function_name_fallback_to_previous_frames_token_if_useless_name() { - assert_eq!( - get_function_for_token(None, "__webpack_require__", Some("previous_name")), - "previous_name" - ) - } - #[test] - fn test_get_function_name_fallback_to_useless_name() { - assert_eq!( - get_function_for_token(None, "__webpack_require__", None), - "__webpack_require__" - ) - } - #[test] - fn test_get_function_name_fallback_to_original_name() { - assert_eq!( - get_function_for_token(Some("original"), "__webpack_require__", None), - "original" - ) - } - - #[test] - fn test_fold_function_name() { - assert_eq!(fold_function_name("foo"), "foo"); - assert_eq!(fold_function_name("foo.foo"), "foo.foo"); - assert_eq!(fold_function_name("foo.foo.foo"), "{foo#2}.foo"); - assert_eq!(fold_function_name("bar.foo.foo"), "bar.foo.foo"); - assert_eq!(fold_function_name("bar.foo.foo.foo"), "bar.{foo#2}.foo"); - assert_eq!( - fold_function_name("bar.foo.foo.onError"), - "bar.{foo#2}.onError" - ); - assert_eq!( - fold_function_name("bar.bar.bar.foo.foo.onError"), - "{bar#3}.{foo#2}.onError" - ); - assert_eq!( - fold_function_name("bar.foo.foo.bar.bar.onError"), - "bar.{foo#2}.{bar#2}.onError" - ); - } - - #[test] - fn test_fixup_webpack_filename() { - let filename = "webpack:///../node_modules/@sentry/browser/esm/helpers.js"; - - assert_eq!( - fixup_webpack_filename(filename), - "../node_modules/@sentry/browser/esm/helpers.js" - ); - - let filename = "webpack:///./app/utils/requestError/createRequestError.tsx"; - - assert_eq!( - fixup_webpack_filename(filename), - "./app/utils/requestError/createRequestError.tsx" - ); - } - - #[test] - fn test_in_app_webpack() { - let abs_path = "webpack:///../node_modules/@sentry/browser/esm/helpers.js"; - let filename = "../node_modules/@sentry/browser/esm/helpers.js"; - - assert_eq!(is_in_app(abs_path, filename), Some(false)); - assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); - - let abs_path = "webpack:///~/@sentry/browser/esm/helpers.js"; - let filename = "~/@sentry/browser/esm/helpers.js"; - - assert_eq!(is_in_app(abs_path, filename), Some(false)); - assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); - - let abs_path = "webpack:///./@sentry/browser/esm/helpers.js"; - let filename = "./@sentry/browser/esm/helpers.js"; - - assert_eq!(is_in_app(abs_path, filename), Some(true)); - assert_eq!(is_in_app_faithful(abs_path, filename), Some(true)); - - let abs_path = "webpack:///./node_modules/@sentry/browser/esm/helpers.js"; - let filename = "./node_modules/@sentry/browser/esm/helpers.js"; - - assert_eq!(is_in_app(abs_path, filename), Some(false)); - assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); - } - - #[test] - fn test_in_app_app() { - let abs_path = "app:///../node_modules/@sentry/browser/esm/helpers.js"; - let filename = "../node_modules/@sentry/browser/esm/helpers.js"; - - assert_eq!(is_in_app(abs_path, filename), Some(false)); - assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); - - let abs_path = "app:///../@sentry/browser/esm/helpers.js"; - let filename = "../@sentry/browser/esm/helpers.js"; - - assert_eq!(is_in_app(abs_path, filename), Some(true)); - assert_eq!(is_in_app_faithful(abs_path, filename), Some(true)); - - let abs_path = "app:///node_modules/rxjs/internal/operators/switchMap.js"; - let filename = "node_modules/rxjs/internal/operators/switchMap.js"; - - assert_eq!(is_in_app(abs_path, filename), Some(false)); - assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); - } - - #[test] - fn test_in_app_general() { - let abs_path = "file:///../node_modules/@sentry/browser/esm/helpers.js"; - let filename = "../node_modules/@sentry/browser/esm/helpers.js"; - - assert_eq!(is_in_app(abs_path, filename), Some(false)); - assert_eq!(is_in_app_faithful(abs_path, filename), Some(false)); - - let abs_path = "file:///../@sentry/browser/esm/helpers.js"; - let filename = "../@sentry/browser/esm/helpers.js"; - - assert_eq!(is_in_app(abs_path, filename), None); - assert_eq!(is_in_app_faithful(abs_path, filename), None); - } - - #[test] - fn test_generate_module() { - assert_eq!(generate_module("http://example.com/foo.js"), "foo"); - assert_eq!(generate_module("http://example.com/foo/bar.js"), "foo/bar"); - assert_eq!( - generate_module("http://example.com/js/foo/bar.js"), - "foo/bar" - ); - assert_eq!( - generate_module("http://example.com/javascript/foo/bar.js"), - "foo/bar" - ); - assert_eq!( - generate_module("http://example.com/1.0/foo/bar.js"), - "foo/bar" - ); - assert_eq!( - generate_module("http://example.com/v1/foo/bar.js"), - "foo/bar" - ); - assert_eq!( - generate_module("http://example.com/v1.0.0/foo/bar.js"), - "foo/bar" - ); - assert_eq!( - generate_module("http://example.com/_baz/foo/bar.js"), - "foo/bar" - ); - assert_eq!( - generate_module("http://example.com/1/2/3/foo/bar.js"), - "foo/bar" - ); - assert_eq!( - generate_module("http://example.com/abcdef0/foo/bar.js"), - "foo/bar" - ); - assert_eq!( - generate_module( - "http://example.com/92cd589eca8235e7b373bf5ae94ebf898e3b949c/foo/bar.js" - ), - "foo/bar" - ); - assert_eq!( - generate_module("http://example.com/7d6d00eae0ceccdc7ee689659585d95f/foo/bar.js"), - "foo/bar" - ); - assert_eq!( - generate_module("http://example.com/foo/bar.coffee"), - "foo/bar" - ); - assert_eq!( - generate_module("http://example.com/foo/bar.js?v=1234"), - "foo/bar" - ); - assert_eq!(generate_module("/foo/bar.js"), "foo/bar"); - assert_eq!(generate_module("/foo/bar.ts"), "foo/bar"); - assert_eq!(generate_module("../../foo/bar.js"), "foo/bar"); - assert_eq!(generate_module("../../foo/bar.ts"), "foo/bar"); - assert_eq!(generate_module("../../foo/bar.awesome"), "foo/bar"); - assert_eq!(generate_module("../../foo/bar"), "foo/bar"); - assert_eq!( - generate_module("/foo/bar-7d6d00eae0ceccdc7ee689659585d95f.js"), - "foo/bar" - ); - assert_eq!(generate_module("/bower_components/foo/bar.js"), "foo/bar"); - assert_eq!(generate_module("/node_modules/foo/bar.js"), "foo/bar"); - assert_eq!( - generate_module( - "http://example.com/vendor.92cd589eca8235e7b373bf5ae94ebf898e3b949c.js", - ), - "vendor", - ); - assert_eq!( - generate_module( - "/a/javascripts/application-bundle-149360d3414c26adac3febdf6832e25c.min.js" - ), - "a/javascripts/application-bundle" - ); - assert_eq!( - generate_module("https://example.com/libs/libs-20150417171659.min.js"), - "libs/libs" - ); - assert_eq!( - generate_module("webpack:///92cd589eca8235e7b373bf5ae94ebf898e3b949c/vendor.js"), - "vendor" - ); - assert_eq!( - generate_module("webpack:///92cd589eca8235e7b373bf5ae94ebf898e3b949c/vendor.js"), - "vendor" - ); - assert_eq!( - generate_module("app:///92cd589eca8235e7b373bf5ae94ebf898e3b949c/vendor.js"), - "vendor" - ); - assert_eq!( - generate_module("app:///example/92cd589eca8235e7b373bf5ae94ebf898e3b949c/vendor.js"), - "vendor" - ); - assert_eq!( - generate_module("~/app/components/projectHeader/projectSelector.jsx"), - "app/components/projectHeader/projectSelector" - ); - } -} diff --git a/crates/symbolicator-service/src/services/symbolication/mod.rs b/crates/symbolicator-service/src/services/symbolication/mod.rs index 094c4e1c0..c6b1ba64e 100644 --- a/crates/symbolicator-service/src/services/symbolication/mod.rs +++ b/crates/symbolicator-service/src/services/symbolication/mod.rs @@ -16,7 +16,6 @@ use crate::services::cficaches::CfiCacheActor; use crate::services::module_lookup::{CacheFileEntry, CacheLookupResult, ModuleLookup}; use crate::services::objects::ObjectsActor; use crate::services::ppdb_caches::PortablePdbCacheActor; -use crate::services::sourcemap::SourceMapService; use crate::services::symcaches::SymCacheActor; use crate::types::{ CompleteObjectInfo, CompleteStacktrace, CompletedSymbolicationResponse, FrameStatus, @@ -26,13 +25,14 @@ use crate::types::{ use crate::utils::hex::HexValue; use crate::utils::http::is_valid_origin; +use super::bitcode::BitcodeService; +use super::il2cpp::Il2cppService; +use super::SharedServices; + mod apple; -mod js; mod process_minidump; pub mod source_context; -pub use js::SymbolicateJsStacktraces; - /// Whether a frame's instruction address needs to be "adjusted" by subtracting a word. #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum AdjustInstructionAddr { @@ -89,20 +89,42 @@ pub struct SymbolicationActor { cficaches: CfiCacheActor, ppdb_caches: PortablePdbCacheActor, diagnostics_cache: Cache, - sourcemaps: SourceMapService, sourcefiles_cache: Arc, } impl SymbolicationActor { - pub fn new( - objects: ObjectsActor, - symcaches: SymCacheActor, - cficaches: CfiCacheActor, - ppdb_caches: PortablePdbCacheActor, - diagnostics_cache: Cache, - sourcemaps: SourceMapService, - sourcefiles_cache: Arc, - ) -> Self { + pub fn new(services: &SharedServices) -> Self { + let caches = &services.caches; + let shared_cache = services.shared_cache.clone(); + let objects = services.objects.clone(); + let download_svc = services.download_svc.clone(); + let sourcefiles_cache = services.sourcefiles_cache.clone(); + + let bitcode = BitcodeService::new( + caches.auxdifs.clone(), + shared_cache.clone(), + download_svc.clone(), + ); + + let il2cpp = Il2cppService::new(caches.il2cpp.clone(), shared_cache.clone(), download_svc); + + let symcaches = SymCacheActor::new( + caches.symcaches.clone(), + shared_cache.clone(), + objects.clone(), + bitcode, + il2cpp, + ); + + let cficaches = CfiCacheActor::new( + caches.cficaches.clone(), + shared_cache.clone(), + objects.clone(), + ); + + let ppdb_caches = + PortablePdbCacheActor::new(caches.ppdb_caches.clone(), shared_cache, objects.clone()); + let demangle_cache = DemangleCache::builder() .max_capacity(10 * 1024 * 1024) // 10 MiB, considering key and value: .weigher(|k, v| (k.0.len() + v.len()).try_into().unwrap_or(u32::MAX)) @@ -114,8 +136,7 @@ impl SymbolicationActor { symcaches, cficaches, ppdb_caches, - diagnostics_cache, - sourcemaps, + diagnostics_cache: caches.diagnostics.clone(), sourcefiles_cache, } } diff --git a/crates/symbolicator-service/src/types/mod.rs b/crates/symbolicator-service/src/types/mod.rs index f7f2c1011..2c5b73a81 100644 --- a/crates/symbolicator-service/src/types/mod.rs +++ b/crates/symbolicator-service/src/types/mod.rs @@ -4,16 +4,15 @@ //! HTTP API. Its messy and things probably need a better place and different way to signal //! they are part of the public API. -use std::collections::{BTreeMap, HashSet}; +use std::collections::BTreeMap; use std::fmt; use std::sync::Arc; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use symbolic::common::{Arch, CodeId, DebugId, Language}; -use symbolicator_sources::{ObjectType, SentryFileId}; +use symbolicator_sources::ObjectType; -use crate::caching::CacheError; use crate::utils::addr::AddrMode; use crate::utils::hex::HexValue; @@ -501,28 +500,6 @@ impl From for CompleteObjectInfo { } } -/// A wrapper around possible completed endpoint responses. -/// -/// This allows us to support multiple independent types of symbolication. -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(untagged)] -pub enum CompletedResponse { - NativeSymbolication(CompletedSymbolicationResponse), - JsSymbolication(CompletedJsSymbolicationResponse), -} - -impl From for CompletedResponse { - fn from(response: CompletedSymbolicationResponse) -> Self { - Self::NativeSymbolication(response) - } -} - -impl From for CompletedResponse { - fn from(response: CompletedJsSymbolicationResponse) -> Self { - Self::JsSymbolication(response) - } -} - /// The symbolicated crash data. /// /// It contains the symbolicated stack frames, module information as well as other @@ -575,167 +552,6 @@ pub struct CompletedSymbolicationResponse { pub modules: Vec, } -// Some of the renames are there only to make it synchronized -// with the already existing monolith naming scheme. -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] -#[serde(rename_all = "snake_case")] -#[serde(tag = "type")] -pub enum JsModuleErrorKind { - InvalidLocation { line: u32, col: Option }, - InvalidAbsPath, - NoColumn, - MissingSourceContent { source: String, sourcemap: String }, - MissingSource, - MalformedSourcemap { url: String }, - MissingSourcemap, - InvalidBase64Sourcemap, - ScrapingDisabled, -} - -impl fmt::Display for JsModuleErrorKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - JsModuleErrorKind::InvalidLocation { line, col } => { - write!(f, "Invalid source location")?; - match (line, col) { - (l, None) => write!(f, ": line:{l}")?, - (l, Some(c)) => write!(f, ": line:{l}, col:{c}")?, - } - Ok(()) - } - JsModuleErrorKind::InvalidAbsPath => write!(f, "Invalid absolute path"), - JsModuleErrorKind::NoColumn => write!(f, "No column information"), - JsModuleErrorKind::MissingSourceContent { source, sourcemap } => write!( - f, - "Missing source contents for source file {source} and sourcemap file {sourcemap}" - ), - JsModuleErrorKind::MissingSource => write!(f, "Missing source file"), - JsModuleErrorKind::MalformedSourcemap { url } => { - write!(f, "Sourcemap file at {url} is malformed") - } - JsModuleErrorKind::MissingSourcemap => write!(f, "Missing sourcemap file"), - JsModuleErrorKind::InvalidBase64Sourcemap => write!(f, "Invalid base64 sourcemap"), - JsModuleErrorKind::ScrapingDisabled => { - write!(f, "Could not download file because scraping is disabled") - } - } - } -} - -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] -pub struct JsModuleError { - pub abs_path: String, - #[serde(flatten)] - pub kind: JsModuleErrorKind, -} - -/// An attempt to scrape a JS source or sourcemap file from the web. -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct JsScrapingAttempt { - /// The URL we attempted to scrape from. - pub url: String, - /// The outcome of the attempt. - #[serde(flatten)] - pub result: JsScrapingResult, -} - -impl JsScrapingAttempt { - pub fn success(url: String) -> Self { - Self { - url, - result: JsScrapingResult::Success, - } - } - pub fn not_attempted(url: String) -> Self { - Self { - url, - result: JsScrapingResult::NotAttempted, - } - } - - pub fn failure(url: String, reason: JsScrapingFailureReason, details: String) -> Self { - Self { - url, - result: JsScrapingResult::Failure { reason, details }, - } - } -} - -/// The outcome of a scraping attempt. -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(rename_all = "snake_case")] -#[serde(tag = "status")] -pub enum JsScrapingResult { - /// We didn't actually attempt scraping because we already obtained the file - /// by another method. - NotAttempted, - /// The file was succesfully scraped. - Success, - /// The file couldn't be scraped. - Failure { - /// The basic reason for the failure. - reason: JsScrapingFailureReason, - #[serde(skip_serializing_if = "String::is_empty")] - /// A more detailed explanation of the failure. - details: String, - }, -} - -impl From for JsScrapingResult { - fn from(value: CacheError) -> Self { - let (reason, details) = match value { - CacheError::NotFound => (JsScrapingFailureReason::NotFound, String::new()), - CacheError::PermissionDenied(details) => { - (JsScrapingFailureReason::PermissionDenied, details) - } - CacheError::Timeout(duration) => ( - JsScrapingFailureReason::Timeout, - format!("Timeout after {}", humantime::format_duration(duration)), - ), - CacheError::DownloadError(details) => (JsScrapingFailureReason::DownloadError, details), - CacheError::Malformed(details) => (JsScrapingFailureReason::Other, details), - CacheError::InternalError => (JsScrapingFailureReason::Other, String::new()), - }; - - Self::Failure { reason, details } - } -} - -/// The basic reason a scraping attempt failed. -#[derive(Debug, Clone, Copy, Deserialize, Serialize)] -#[serde(rename_all = "snake_case")] -pub enum JsScrapingFailureReason { - /// The file was not found at the given URL. - NotFound, - /// Scraping was disabled. - Disabled, - /// The URL was not in the list of allowed hosts or had - /// an invalid scheme. - InvalidHost, - /// Permission to access the file was denied. - PermissionDenied, - /// The scraping attempt timed out. - Timeout, - /// There was a non-timeout error while downloading. - DownloadError, - /// Catchall case. - /// - /// This probably can't actually happen. - Other, -} - -#[derive(Debug, Default, Clone, Deserialize, Serialize)] -pub struct CompletedJsSymbolicationResponse { - pub stacktraces: Vec, - pub raw_stacktraces: Vec, - #[serde(skip_serializing_if = "Vec::is_empty")] - pub errors: Vec, - #[serde(skip_serializing_if = "HashSet::is_empty")] - pub used_artifact_bundles: HashSet, - #[serde(skip_serializing_if = "Vec::is_empty")] - pub scraping_attempts: Vec, -} - /// Information about the operating system. #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] pub struct SystemInfo { @@ -754,89 +570,3 @@ pub struct SystemInfo { /// Device model name pub device_model: String, } - -#[derive(Debug, Default, Clone, Deserialize, Serialize, PartialEq, Eq)] -pub struct JsFrame { - #[serde(skip_serializing_if = "Option::is_none")] - pub function: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - pub filename: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - pub module: Option, - - pub abs_path: String, - - pub lineno: u32, - - #[serde(skip_serializing_if = "Option::is_none")] - pub colno: Option, - - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub pre_context: Vec, - - #[serde(skip_serializing_if = "Option::is_none")] - pub context_line: Option, - - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub post_context: Vec, - - #[serde(skip_serializing)] - pub token_name: Option, - - #[serde(skip_serializing_if = "Option::is_none")] - pub in_app: Option, - - #[serde(default, skip_serializing_if = "JsFrameData::is_empty")] - pub data: JsFrameData, -} - -#[derive(Debug, Default, Clone, Deserialize, Serialize, PartialEq, Eq)] -pub struct JsFrameData { - #[serde(skip_serializing_if = "Option::is_none")] - pub sourcemap: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub resolved_with: Option, - #[serde(default)] - pub symbolicated: bool, -} - -/// A marker indicating what a File was resolved with. -/// -/// This enum serves a double purpose, both marking how an individual file was found inside of a -/// bundle, as well as tracking through which method that bundle itself was found. -/// -#[derive(Debug, Default, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)] -#[serde(rename_all = "kebab-case")] -pub enum ResolvedWith { - /// Both: Found in a Bundle via DebugId - /// And: Found the Bundle via API Lookup via DebugId / Database Index - DebugId, - /// Found in a Bundle via Url matching - Url, - /// Found the Bundle via API Lookup via Database Index - Index, - /// Found the File in a Flat File / Bundle Index - BundleIndex, - /// Found the Bundle via API Lookup as an ArtifactBundle - Release, - /// Found the Bundle via API Lookup as a ReleaseFile - ReleaseOld, - /// Scraped the File from the Web - Scraping, - /// Unknown - #[default] - Unknown, -} - -impl JsFrameData { - pub fn is_empty(&self) -> bool { - *self == Self::default() - } -} - -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -pub struct JsStacktrace { - pub frames: Vec, -} diff --git a/crates/symbolicator-service/tests/integration/main.rs b/crates/symbolicator-service/tests/integration/main.rs index ef4e1abca..381064da2 100644 --- a/crates/symbolicator-service/tests/integration/main.rs +++ b/crates/symbolicator-service/tests/integration/main.rs @@ -4,7 +4,6 @@ pub mod e2e; pub mod process_minidump; pub mod public_sources; pub mod source_errors; -pub mod sourcemap; pub mod symbolication; pub mod utils; diff --git a/crates/symbolicator-service/tests/integration/snapshots/CAUTION.md b/crates/symbolicator-service/tests/integration/snapshots/CAUTION.md deleted file mode 100644 index b3c3e4c01..000000000 --- a/crates/symbolicator-service/tests/integration/snapshots/CAUTION.md +++ /dev/null @@ -1 +0,0 @@ -When moving this directory, update `/dangerfile.js` with the new location. diff --git a/crates/symbolicator-service/tests/integration/utils.rs b/crates/symbolicator-service/tests/integration/utils.rs index 3dd577cbe..161752377 100644 --- a/crates/symbolicator-service/tests/integration/utils.rs +++ b/crates/symbolicator-service/tests/integration/utils.rs @@ -1,10 +1,10 @@ use std::sync::Arc; use symbolicator_service::config::Config; -use symbolicator_service::services::create_service; use symbolicator_service::services::symbolication::{ StacktraceOrigin, SymbolicateStacktraces, SymbolicationActor, }; +use symbolicator_service::services::SharedServices; use symbolicator_service::types::RawObjectInfo; use symbolicator_sources::SourceConfig; use symbolicator_test as test; @@ -36,7 +36,8 @@ pub fn setup_service( update_config(&mut config); let handle = tokio::runtime::Handle::current(); - let (symbolication, _objects) = create_service(&config, handle).unwrap(); + let shared_services = SharedServices::new(config, handle).unwrap(); + let symbolication = SymbolicationActor::new(&shared_services); (symbolication, cache_dir) } diff --git a/crates/symbolicator-stress/Cargo.toml b/crates/symbolicator-stress/Cargo.toml index 62aef1171..88451f81d 100644 --- a/crates/symbolicator-stress/Cargo.toml +++ b/crates/symbolicator-stress/Cargo.toml @@ -14,6 +14,7 @@ sentry = { version = "0.31.7", features = ["anyhow", "debug-images", "tracing"] serde = { version = "1.0.137", features = ["derive"] } serde_json = "1.0.81" serde_yaml = "0.9.14" +symbolicator-js = { path = "../symbolicator-js" } symbolicator-service = { path = "../symbolicator-service" } symbolicator-test = { path = "../symbolicator-test" } tempfile = "3.2.0" diff --git a/crates/symbolicator-stress/src/stresstest.rs b/crates/symbolicator-stress/src/stresstest.rs index 50fdcb123..0dcc9a99a 100644 --- a/crates/symbolicator-stress/src/stresstest.rs +++ b/crates/symbolicator-stress/src/stresstest.rs @@ -5,7 +5,10 @@ use std::time::{Duration, Instant}; use anyhow::{Context, Result}; use sentry::SentryFutureExt; +use symbolicator_js::SourceMapService; use symbolicator_service::config::Config as SymbolicatorConfig; +use symbolicator_service::services::symbolication::SymbolicationActor; +use symbolicator_service::services::SharedServices; use symbolicator_service::types::Scope; use tokio::sync::Semaphore; @@ -18,10 +21,12 @@ pub async fn perform_stresstest( ) -> Result<()> { // start symbolicator service let runtime = tokio::runtime::Handle::current(); - let (symbolication, _objects) = - symbolicator_service::services::create_service(&service_config, runtime) - .context("failed starting symbolication service")?; - let symbolication = Arc::new(symbolication); + let shared_services = SharedServices::new(service_config, runtime) + .context("failed to start symbolication service")?; + let native = SymbolicationActor::new(&shared_services); + let js = SourceMapService::new(&shared_services); + let symbolication = Arc::new((native, js)); + let service_config = shared_services.config; // initialize workloads let workloads: Vec<_> = workloads diff --git a/crates/symbolicator-stress/src/workloads.rs b/crates/symbolicator-stress/src/workloads.rs index a5ce1b243..a3ec944d6 100644 --- a/crates/symbolicator-stress/src/workloads.rs +++ b/crates/symbolicator-stress/src/workloads.rs @@ -5,11 +5,13 @@ use std::sync::Arc; use serde::de::DeserializeOwned; use serde::{Deserialize, Serialize}; +use symbolicator_js::interface::{JsStacktrace, SymbolicateJsStacktraces}; +use symbolicator_js::SourceMapService; use symbolicator_service::services::download::SourceConfig; use symbolicator_service::services::symbolication::{ - StacktraceOrigin, SymbolicateJsStacktraces, SymbolicateStacktraces, SymbolicationActor, + StacktraceOrigin, SymbolicateStacktraces, SymbolicationActor, }; -use symbolicator_service::types::{JsStacktrace, RawObjectInfo, RawStacktrace, Scope}; +use symbolicator_service::types::{RawObjectInfo, RawStacktrace, Scope}; #[derive(Debug, Deserialize, Serialize)] pub struct WorkloadsConfig { @@ -135,7 +137,10 @@ pub fn read_json(path: impl AsRef) -> T { serde_json::from_reader(reader).unwrap() } -pub async fn process_payload(symbolication: &SymbolicationActor, workload: &ParsedPayload) { +pub async fn process_payload( + symbolication: &(SymbolicationActor, SourceMapService), + workload: &ParsedPayload, +) { match workload { ParsedPayload::Minidump(payload) => { let MinidumpPayload { @@ -159,6 +164,7 @@ pub async fn process_payload(symbolication: &SymbolicationActor, workload: &Pars .unwrap(); symbolication + .0 .process_minidump( scope.clone(), temp_path, @@ -169,10 +175,10 @@ pub async fn process_payload(symbolication: &SymbolicationActor, workload: &Pars .unwrap(); } ParsedPayload::Event(payload) => { - symbolication.symbolicate(payload.clone()).await.unwrap(); + symbolication.0.symbolicate(payload.clone()).await.unwrap(); } ParsedPayload::Js(_srv, payload) => { - symbolication.symbolicate_js(payload.clone()).await.unwrap(); + symbolication.1.symbolicate_js(payload.clone()).await; } }; } diff --git a/crates/symbolicator/Cargo.toml b/crates/symbolicator/Cargo.toml index 299322222..84e3b1ea1 100644 --- a/crates/symbolicator/Cargo.toml +++ b/crates/symbolicator/Cargo.toml @@ -22,6 +22,7 @@ serde = { version = "1.0.137", features = ["derive", "rc"] } serde_json = "1.0.81" symbolic = "12.4.0" symbolicator-crash = { path = "../symbolicator-crash", optional = true } +symbolicator-js = { path = "../symbolicator-js" } symbolicator-service = { path = "../symbolicator-service" } symbolicator-sources = { path = "../symbolicator-sources" } tempfile = "3.2.0" diff --git a/crates/symbolicator/src/endpoints/symbolicate_js.rs b/crates/symbolicator/src/endpoints/symbolicate_js.rs index 001faaca5..69a446e1c 100644 --- a/crates/symbolicator/src/endpoints/symbolicate_js.rs +++ b/crates/symbolicator/src/endpoints/symbolicate_js.rs @@ -3,14 +3,14 @@ use std::sync::Arc; use axum::extract; use axum::response::Json; use serde::{Deserialize, Serialize}; -use symbolicator_service::services::symbolication::SymbolicateJsStacktraces; +use symbolicator_js::interface::{JsStacktrace, SymbolicateJsStacktraces}; use symbolicator_service::services::ScrapingConfig; use symbolicator_service::types::RawObjectInfo; use symbolicator_sources::SentrySourceConfig; use url::Url; use crate::endpoints::symbolicate::SymbolicationRequestQueryParams; -use crate::service::{JsStacktrace, RequestService, SymbolicationResponse}; +use crate::service::{RequestService, SymbolicationResponse}; use crate::utils::sentry::ConfigureScope; use super::ResponseError; diff --git a/crates/symbolicator/src/service.rs b/crates/symbolicator/src/service.rs index d22678059..cfe7b81b9 100644 --- a/crates/symbolicator/src/service.rs +++ b/crates/symbolicator/src/service.rs @@ -24,7 +24,9 @@ use futures::{channel::oneshot, FutureExt as _}; use sentry::protocol::SessionStatus; use sentry::SentryFutureExt; use serde::{Deserialize, Deserializer, Serialize}; -use symbolicator_service::services::ScrapingConfig; +use symbolicator_js::interface::{CompletedJsSymbolicationResponse, SymbolicateJsStacktraces}; +use symbolicator_js::SourceMapService; +use symbolicator_service::services::{ScrapingConfig, SharedServices}; use tempfile::TempPath; use uuid::Uuid; @@ -33,7 +35,7 @@ use symbolicator_service::config::Config; use symbolicator_service::metric; use symbolicator_service::services::objects::ObjectsActor; use symbolicator_service::services::symbolication::SymbolicationActor; -use symbolicator_service::types::{CompletedResponse, CompletedSymbolicationResponse}; +use symbolicator_service::types::CompletedSymbolicationResponse; use symbolicator_service::utils::futures::CallOnDrop; use symbolicator_service::utils::futures::{m, measure}; use symbolicator_sources::SourceConfig; @@ -41,10 +43,8 @@ use symbolicator_sources::SourceConfig; pub use symbolicator_service::services::objects::{ FindObject, FindResult, ObjectHandle, ObjectMetaHandle, ObjectPurpose, }; -pub use symbolicator_service::services::symbolication::{ - StacktraceOrigin, SymbolicateJsStacktraces, SymbolicateStacktraces, -}; -pub use symbolicator_service::types::{JsStacktrace, RawObjectInfo, RawStacktrace, Scope, Signal}; +pub use symbolicator_service::services::symbolication::{StacktraceOrigin, SymbolicateStacktraces}; +pub use symbolicator_service::types::{RawObjectInfo, RawStacktrace, Scope, Signal}; /// Symbolication task identifier. #[derive(Debug, Clone, Copy, Serialize, Ord, PartialOrd, Eq, PartialEq)] @@ -101,6 +101,13 @@ pub enum SymbolicationResponse { InternalError, } +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(untagged)] +pub enum CompletedResponse { + NativeSymbolication(CompletedSymbolicationResponse), + JsSymbolication(CompletedJsSymbolicationResponse), +} + /// Common options for all symbolication API requests. /// /// These options control some features which control the symbolication and general request @@ -159,7 +166,8 @@ type ComputationMap = Arc>>; struct RequestServiceInner { config: Config, - symbolication: SymbolicationActor, + native: SymbolicationActor, + js: SourceMapService, objects: ObjectsActor, cpu_pool: tokio::runtime::Handle, @@ -183,8 +191,12 @@ impl RequestService { config.caches.in_memory.sentry_index_ttl = Duration::ZERO; } - let (symbolication, objects) = - symbolicator_service::services::create_service(&config, io_pool.clone())?; + let shared_services = SharedServices::new(config, io_pool.clone())?; + let native = SymbolicationActor::new(&shared_services); + let js = SourceMapService::new(&shared_services); + let SharedServices { + objects, config, .. + } = shared_services; let symbolication_taskmon = tokio_metrics::TaskMonitor::new(); { @@ -202,7 +214,8 @@ impl RequestService { let inner = RequestServiceInner { config, - symbolication, + native, + js, objects, cpu_pool, @@ -246,7 +259,10 @@ impl RequestService { ) -> Result { let slf = self.inner.clone(); self.create_symbolication_request("symbolicate", options, async move { - slf.symbolication.symbolicate(request).await.map(Into::into) + slf.native + .symbolicate(request) + .await + .map(CompletedResponse::NativeSymbolication) }) } @@ -256,10 +272,9 @@ impl RequestService { ) -> Result { let slf = self.inner.clone(); self.create_symbolication_request("symbolicate_js", RequestOptions::default(), async move { - slf.symbolication - .symbolicate_js(request) - .await - .map(Into::into) + Ok(CompletedResponse::JsSymbolication( + slf.js.symbolicate_js(request).await, + )) }) } @@ -277,10 +292,10 @@ impl RequestService { ) -> Result { let slf = self.inner.clone(); self.create_symbolication_request("minidump_stackwalk", options, async move { - slf.symbolication + slf.native .process_minidump(scope, minidump_file, sources, scraping) .await - .map(Into::into) + .map(CompletedResponse::NativeSymbolication) }) } @@ -298,10 +313,10 @@ impl RequestService { ) -> Result { let slf = self.inner.clone(); self.create_symbolication_request("parse_apple_crash_report", options, async move { - slf.symbolication + slf.native .process_apple_crash_report(scope, apple_crash_report, sources, scraping) .await - .map(Into::into) + .map(CompletedResponse::NativeSymbolication) }) } diff --git a/crates/symbolicli/Cargo.toml b/crates/symbolicli/Cargo.toml index e93f6d2fb..1b6a444c5 100644 --- a/crates/symbolicli/Cargo.toml +++ b/crates/symbolicli/Cargo.toml @@ -15,6 +15,7 @@ serde = { version = "1.0.137", features = ["derive", "rc"] } serde_json = "1.0.81" serde_yaml = "0.9.14" symbolic = "12.4.0" +symbolicator-js = { path = "../symbolicator-js" } symbolicator-service = { path = "../symbolicator-service" } symbolicator-sources = { path = "../symbolicator-sources" } tempfile = "3.3.0" diff --git a/crates/symbolicli/src/main.rs b/crates/symbolicli/src/main.rs index dd651712c..c1a53a7bf 100644 --- a/crates/symbolicli/src/main.rs +++ b/crates/symbolicli/src/main.rs @@ -8,7 +8,10 @@ use output::{print_compact, print_pretty}; use remote::EventKey; use settings::Mode; -use symbolicator_service::types::{CompletedResponse, Scope}; +use symbolicator_js::SourceMapService; +use symbolicator_service::services::symbolication::SymbolicationActor; +use symbolicator_service::services::SharedServices; +use symbolicator_service::types::Scope; use symbolicator_sources::{ CommonSourceConfig, DirectoryLayout, DirectoryLayoutType, FilesystemSourceConfig, SentrySourceConfig, SourceConfig, SourceId, @@ -20,6 +23,8 @@ use tempfile::{NamedTempFile, TempPath}; use tracing_subscriber::filter; use tracing_subscriber::prelude::*; +use crate::output::CompletedResponse; + mod output; mod settings; @@ -45,9 +50,12 @@ async fn main() -> Result<()> { .init(); let runtime = tokio::runtime::Handle::current(); - let (symbolication, _objects) = - symbolicator_service::services::create_service(&symbolicator_config, runtime) - .context("failed to start symbolication service")?; + + let shared_services = SharedServices::new(symbolicator_config, runtime) + .context("failed to start symbolication service")?; + let native = SymbolicationActor::new(&shared_services); + let js = SourceMapService::new(&shared_services); + let symbolicator_config = shared_services.config; let scope = match mode { Mode::Online { ref project, .. } => Scope::Scoped(Arc::from(project.as_str())), @@ -117,10 +125,7 @@ async fn main() -> Result<()> { tracing::info!("symbolicating event"); - symbolication - .symbolicate_js(request) - .await - .map(CompletedResponse::from)? + CompletedResponse::JsSymbolication(js.symbolicate_js(request).await) } _ => { @@ -162,26 +167,22 @@ async fn main() -> Result<()> { } let dsym_sources = Arc::from(dsym_sources.into_boxed_slice()); - match payload { + CompletedResponse::NativeSymbolication(match payload { Payload::Event(event) => { let request = create_native_symbolication_request(scope, dsym_sources, event) .context("Event cannot be symbolicated")?; tracing::info!("symbolicating event"); - symbolication - .symbolicate(request) - .await - .map(CompletedResponse::from)? + native.symbolicate(request).await? } Payload::Minidump(minidump_path) => { tracing::info!("symbolicating minidump"); - symbolication + native .process_minidump(scope, minidump_path, dsym_sources, Default::default()) - .await - .map(CompletedResponse::from)? + .await? } - } + }) } }; @@ -406,13 +407,13 @@ mod event { use anyhow::bail; use serde::Deserialize; use symbolic::common::Language; - use symbolicator_service::services::symbolication::{ - StacktraceOrigin, SymbolicateJsStacktraces, SymbolicateStacktraces, + use symbolicator_js::interface::{ + JsFrame, JsFrameData, JsStacktrace, SymbolicateJsStacktraces, }; + use symbolicator_service::services::symbolication::{StacktraceOrigin, SymbolicateStacktraces}; use symbolicator_service::services::ScrapingConfig; use symbolicator_service::types::{ - CompleteObjectInfo, FrameTrust, JsFrame, JsFrameData, JsStacktrace, RawFrame, - RawObjectInfo, RawStacktrace, Scope, Signal, + CompleteObjectInfo, FrameTrust, RawFrame, RawObjectInfo, RawStacktrace, Scope, Signal, }; use symbolicator_service::utils::{addr::AddrMode, hex::HexValue}; use symbolicator_sources::{SentrySourceConfig, SourceConfig}; diff --git a/crates/symbolicli/src/output.rs b/crates/symbolicli/src/output.rs index e20e0e303..8c98ffd93 100644 --- a/crates/symbolicli/src/output.rs +++ b/crates/symbolicli/src/output.rs @@ -2,11 +2,17 @@ use std::{collections::HashMap, iter::Peekable, vec::IntoIter}; use prettytable::{cell, format::consts::FORMAT_CLEAN, row, Row, Table}; use symbolic::common::split_path; +use symbolicator_js::interface::{CompletedJsSymbolicationResponse, JsFrame}; use symbolicator_service::types::{ - CompleteObjectInfo, CompletedJsSymbolicationResponse, CompletedResponse, - CompletedSymbolicationResponse, FrameTrust, JsFrame, SymbolicatedFrame, + CompleteObjectInfo, CompletedSymbolicationResponse, FrameTrust, SymbolicatedFrame, }; +#[derive(Debug, Clone)] +pub enum CompletedResponse { + NativeSymbolication(CompletedSymbolicationResponse), + JsSymbolication(CompletedJsSymbolicationResponse), +} + pub fn print_compact(response: CompletedResponse) { match response { CompletedResponse::NativeSymbolication(response) => print_compact_native(response), diff --git a/dangerfile.js b/dangerfile.js index 7e86a2962..03f54a95b 100644 --- a/dangerfile.js +++ b/dangerfile.js @@ -50,58 +50,6 @@ async function checkChangelog() { } } -function getSnapshotDetails() { - return ` -
-Instructions for snapshot changes - -Sentry runs a symbolicator integration test suite located at [\`tests/symbolicator/\`](https://github.com/getsentry/sentry/tree/master/tests/symbolicator). Changes in this PR will likely result in snapshot diffs in Sentry, which will break the master branch and in-progress PRs. - -Follow these steps to update snapshots in Sentry: - -1. Check out latest Sentry \`master\` and enable the virtualenv. -2. Enable symbolicator (\`symbolicator: true\`) in sentry via \`~/.sentry/config.yml\`. -3. Make sure your other devservices are running via \`sentry devservices up --exclude symbolicator\`. If -they're already running, stop symbolicator with \`sentry devservices down symbolicator\`. You want to use your -own development symbolicator to update the snapshots. -4. Run your development symbolicator on port \`3021\`, or whatever port symbolicator is configured to use -in \`~/.sentry/config.yml\`. -5. Export \`SENTRY_SNAPSHOTS_WRITEBACK=1\` to automatically update the existing snapshots with your new -results and run symbolicator tests with pytest (\`pytest tests/symbolicator\`). -6. Review snapshot changes locally, then create a PR to Sentry. -7. Merge the Symbolicator PR, then merge the Sentry PR. - -
- `; -} - -async function checkSnapshots() { - const SNAPSHOT_LOCATION = "crates/symbolicator-service/tests/integration/snapshots/"; - - // Sanity check that the snapshot directory exists - let contents = await danger.github.utils.fileContents( - SNAPSHOT_LOCATION + "CAUTION.md" - ); - if (!contents) { - fail( - "The snapshot directory has moved to a new location. Please update SNAPSHOT_LOCATION in /dangerfile.js." - ); - return; - } - - const changesSnapshots = danger.git.modified_files.some((f) => - f.startsWith(SNAPSHOT_LOCATION) - ); - - if (changesSnapshots) { - warn( - "Snapshot changes likely affect Sentry tests. If the Sentry-Symbolicator Integration Tests in CI are " + - "failing for your PR, please check the symbolicator test suite in Sentry and update snapshots as needed." - ); - markdown(getSnapshotDetails()); - } -} - async function checkAll() { // See: https://spectrum.chat/danger/javascript/support-for-github-draft-prs~82948576-ce84-40e7-a043-7675e5bf5690 const isDraft = danger.github.pr.mergeable_state === "draft"; @@ -111,7 +59,6 @@ async function checkAll() { } await checkChangelog(); - await checkSnapshots(); } schedule(checkAll);