Skip to content

Commit

Permalink
feat: Return "scraping attempts" from JS symbolication (#1311)
Browse files Browse the repository at this point in the history
  • Loading branch information
loewenheim authored Oct 2, 2023
1 parent 3820fae commit 2bde872
Show file tree
Hide file tree
Showing 18 changed files with 283 additions and 40 deletions.
106 changes: 80 additions & 26 deletions crates/symbolicator-service/src/services/sourcemap_lookup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ use crate::caching::{
use crate::services::download::DownloadService;
use crate::services::objects::ObjectMetaHandle;
use crate::services::symbolication::ScrapingConfig;
use crate::types::{JsStacktrace, ResolvedWith, Scope};
use crate::types::{JsScrapingAttempt, JsScrapingFailureReason, JsStacktrace, ResolvedWith, Scope};
use crate::utils::http::is_valid_origin;

use crate::js::JsMetrics;
Expand Down Expand Up @@ -227,6 +227,8 @@ impl SourceMapLookup {
used_artifact_bundles: Default::default(),

metrics: Default::default(),

scraping_attempts: Default::default(),
};

Self {
Expand Down Expand Up @@ -307,8 +309,13 @@ impl SourceMapLookup {
self.fetcher.record_metrics();
}

pub fn into_used_artifact_bundles(self) -> HashSet<SentryFileId> {
self.fetcher.used_artifact_bundles
/// Consumes `self` and returns the artifact bundles that were used and
/// the scraping attempts that were made.
pub fn into_records(self) -> (HashSet<SentryFileId>, Vec<JsScrapingAttempt>) {
(
self.fetcher.used_artifact_bundles,
self.fetcher.scraping_attempts,
)
}
}

Expand Down Expand Up @@ -631,6 +638,8 @@ struct ArtifactFetcher {
individual_artifacts: HashMap<String, IndividualArtifact>,

used_artifact_bundles: HashSet<SentryFileId>,

scraping_attempts: Vec<JsScrapingAttempt>,
}

impl ArtifactFetcher {
Expand Down Expand Up @@ -714,30 +723,36 @@ impl ArtifactFetcher {
/// (because multiple files can share one [`DebugId`]).
#[tracing::instrument(skip(self))]
pub async fn get_file(&mut self, key: &FileKey) -> CachedFileEntry {
if let Some(file) = self.try_get_file_using_index(key).await {
return file;
}
let mut file = self.try_get_file_using_index(key).await;

// Try looking up the file in one of the artifact bundles that we know about.
if let Some(file) = self.try_get_file_from_bundles(key) {
return file;
if file.is_none() {
// Try looking up the file in one of the artifact bundles that we know about.
file = self.try_get_file_from_bundles(key);
}

// Otherwise, try to get the file from an individual artifact.
if let Some(file) = self.try_fetch_file_from_artifacts(key).await {
return file;
if file.is_none() {
// Otherwise, try to get the file from an individual artifact.
file = self.try_fetch_file_from_artifacts(key).await;
}

// Otherwise: Do a (cached) API lookup for the `abs_path` + `DebugId`
if self.query_sentry_for_file(key).await {
// At this point, *one* of our known artifacts includes the file we are looking for.
// So we do the whole dance yet again.
if let Some(file) = self.try_get_file_from_bundles(key) {
return file;
if file.is_none() {
// Otherwise: Do a (cached) API lookup for the `abs_path` + `DebugId`
if self.query_sentry_for_file(key).await {
// At this point, *one* of our known artifacts includes the file we are looking for.
// So we do the whole dance yet again.
file = self.try_get_file_from_bundles(key);
if file.is_none() {
file = self.try_fetch_file_from_artifacts(key).await;
}
}
if let Some(file) = self.try_fetch_file_from_artifacts(key).await {
return file;
}

if let Some(file) = file {
if let Some(url) = key.abs_path() {
self.scraping_attempts
.push(JsScrapingAttempt::not_attempted(url.to_owned()));
}
return file;
}

// Otherwise, fall back to scraping from the Web.
Expand Down Expand Up @@ -879,28 +894,53 @@ impl ArtifactFetcher {
};

if !self.scraping.enabled {
self.scraping_attempts.push(JsScrapingAttempt::failure(
abs_path.to_owned(),
JsScrapingFailureReason::Disabled,
String::new(),
));
return make_error("Scraping disabled".to_string());
}

// Only scrape from http sources
let scheme = url.scheme();
if !["http", "https"].contains(&scheme) {
self.scraping_attempts.push(JsScrapingAttempt::failure(
abs_path.to_owned(),
JsScrapingFailureReason::InvalidHost,
format!("`{scheme}` is not an allowed download scheme"),
));
return make_error(format!("`{scheme}` is not an allowed download scheme"));
}

if !is_valid_origin(&url, &self.scraping.allowed_origins) {
self.scraping_attempts.push(JsScrapingAttempt::failure(
abs_path.to_owned(),
JsScrapingFailureReason::InvalidHost,
format!("{abs_path} is not an allowed download origin"),
));
return make_error(format!("{abs_path} is not an allowed download origin"));
}

let host_string = match url.host_str() {
None => {
self.scraping_attempts.push(JsScrapingAttempt::failure(
abs_path.to_owned(),
JsScrapingFailureReason::InvalidHost,
String::new(),
));
return make_error("Invalid host".to_string());
}
Some(host @ ("localhost" | "127.0.0.1")) => {
// NOTE: the reserved IPs cover a lot more than just localhost.
if self.download_svc.can_connect_to_reserved_ips() {
host
} else {
self.scraping_attempts.push(JsScrapingAttempt::failure(
abs_path.to_owned(),
JsScrapingFailureReason::InvalidHost,
format!("Can't connect to restricted host {host}"),
));
return make_error("Invalid host".to_string());
}
}
Expand Down Expand Up @@ -947,21 +987,35 @@ impl ArtifactFetcher {
.await;

transaction.finish();
CachedFileEntry {
uri,
entry: scraped_file.map(|contents| {
let entry = match scraped_file {
Ok(contents) => {
self.metrics.record_file_scraped(key.as_type());
tracing::trace!(?key, "Found file by scraping the web");

let sourcemap_url = discover_sourcemaps_location(&contents)
.and_then(|sm_ref| SourceMapUrl::parse_with_prefix(abs_path, sm_ref).ok())
.map(Arc::new);

CachedFile {
self.scraping_attempts
.push(JsScrapingAttempt::success(abs_path.to_owned()));

Ok(CachedFile {
contents,
sourcemap_url,
}
}),
})
}
Err(e) => {
self.scraping_attempts.push(JsScrapingAttempt {
url: abs_path.to_owned(),
result: e.clone().into(),
});

Err(e)
}
};
CachedFileEntry {
uri,
entry,
resolved_with: ResolvedWith::Scraping,
}
}
Expand Down
3 changes: 2 additions & 1 deletion crates/symbolicator-service/src/services/symbolication/js.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,14 @@ impl SymbolicationActor {
metric!(time_raw("js.unsymbolicated_frames") = unsymbolicated_frames);
metric!(time_raw("js.missing_sourcescontent") = missing_sourcescontent);

let used_artifact_bundles = lookup.into_used_artifact_bundles();
let (used_artifact_bundles, scraping_attempts) = lookup.into_records();

Ok(CompletedJsSymbolicationResponse {
stacktraces,
raw_stacktraces,
errors: errors.into_iter().collect(),
used_artifact_bundles,
scraping_attempts,
})
}
}
Expand Down
98 changes: 98 additions & 0 deletions crates/symbolicator-service/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use serde::{Deserialize, Serialize};
use symbolic::common::{Arch, CodeId, DebugId, Language};
use symbolicator_sources::{ObjectType, SentryFileId};

use crate::caching::CacheError;
use crate::utils::addr::AddrMode;
use crate::utils::hex::HexValue;

Expand Down Expand Up @@ -628,6 +629,101 @@ pub struct JsModuleError {
pub kind: JsModuleErrorKind,
}

/// An attempt to scrape a JS source or sourcemap file from the web.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct JsScrapingAttempt {
/// The URL we attempted to scrape from.
pub url: String,
/// The outcome of the attempt.
#[serde(flatten)]
pub result: JsScrapingResult,
}

impl JsScrapingAttempt {
pub fn success(url: String) -> Self {
Self {
url,
result: JsScrapingResult::Success,
}
}
pub fn not_attempted(url: String) -> Self {
Self {
url,
result: JsScrapingResult::NotAttempted,
}
}

pub fn failure(url: String, reason: JsScrapingFailureReason, details: String) -> Self {
Self {
url,
result: JsScrapingResult::Failure { reason, details },
}
}
}

/// The outcome of a scraping attempt.
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(rename_all = "snake_case")]
#[serde(tag = "status")]
pub enum JsScrapingResult {
/// We didn't actually attempt scraping because we already obtained the file
/// by another method.
NotAttempted,
/// The file was succesfully scraped.
Success,
/// The file couldn't be scraped.
Failure {
/// The basic reason for the failure.
reason: JsScrapingFailureReason,
#[serde(skip_serializing_if = "String::is_empty")]
/// A more detailed explanation of the failure.
details: String,
},
}

impl From<CacheError> for JsScrapingResult {
fn from(value: CacheError) -> Self {
let (reason, details) = match value {
CacheError::NotFound => (JsScrapingFailureReason::NotFound, String::new()),
CacheError::PermissionDenied(details) => {
(JsScrapingFailureReason::PermissionDenied, details)
}
CacheError::Timeout(duration) => (
JsScrapingFailureReason::Timeout,
format!("Timeout after {}", humantime::format_duration(duration)),
),
CacheError::DownloadError(details) => (JsScrapingFailureReason::DownloadError, details),
CacheError::Malformed(details) => (JsScrapingFailureReason::Other, details),
CacheError::InternalError => (JsScrapingFailureReason::Other, String::new()),
};

Self::Failure { reason, details }
}
}

/// The basic reason a scraping attempt failed.
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum JsScrapingFailureReason {
/// The file was not found at the given URL.
NotFound,
/// Scraping was disabled.
Disabled,
/// The URL was not in the list of allowed hosts or had
/// an invalid scheme.
InvalidHost,
/// Permission to access the file was denied.
PermissionDenied,
/// The scraping attempt timed out.
Timeout,
/// There was a non-timeout error while downloading.
DownloadError,
/// Catchall case.
///
/// This probably can't actually happen.
Other,
}

#[derive(Debug, Default, Clone, Deserialize, Serialize)]
pub struct CompletedJsSymbolicationResponse {
pub stacktraces: Vec<JsStacktrace>,
Expand All @@ -636,6 +732,8 @@ pub struct CompletedJsSymbolicationResponse {
pub errors: Vec<JsModuleError>,
#[serde(skip_serializing_if = "HashSet::is_empty")]
pub used_artifact_bundles: HashSet<SentryFileId>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub scraping_attempts: Vec<JsScrapingAttempt>,
}

/// Information about the operating system.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,9 @@ raw_stacktraces:
- "//# sourceMappingURL=test.min.js.map"
used_artifact_bundles:
- 02_correct.zip
scraping_attempts:
- url: "http://example.com/test.min.js"
status: not_attempted
- url: "http://example.com/test.min.js.map"
status: not_attempted

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: crates/symbolicator-service/tests/integration/sourcemap.rs
assertion_line: 595
assertion_line: 636
expression: response.unwrap()
---
stacktraces:
Expand Down Expand Up @@ -39,4 +39,9 @@ raw_stacktraces:
- "//# sourceMappingURL=a.different.map"
- ""
- //@ sourceMappingURL=another.different.map
scraping_attempts:
- url: "http://localhost:<port>/files/app.js"
status: success
- url: "http://localhost:<port>/files/app.js.map"
status: success

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: crates/symbolicator-service/tests/integration/sourcemap.rs
assertion_line: 508
assertion_line: 549
expression: response.unwrap()
---
stacktraces:
Expand Down Expand Up @@ -40,4 +40,9 @@ raw_stacktraces:
- exports.main = main;
- "//# sourceMappingURL=entrypoint1.js.map"
- ""
scraping_attempts:
- url: /Users/lucaforstner/code/github/getsentry/sentry-javascript-bundler-plugins/packages/playground/öut path/rollup/entrypoint1.js
status: not_attempted
- url: /Users/lucaforstner/code/github/getsentry/sentry-javascript-bundler-plugins/packages/playground/öut path/rollup/entrypoint1.js.map
status: not_attempted

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: crates/symbolicator-service/tests/integration/sourcemap.rs
assertion_line: 572
assertion_line: 613
expression: response.unwrap()
---
stacktraces:
Expand All @@ -26,4 +26,9 @@ raw_stacktraces:
- abs_path: "app:///index.android.bundle"
lineno: 1
colno: 11940
scraping_attempts:
- url: "app:///index.android.bundle"
status: not_attempted
- url: "app:///index.android.bundle.map"
status: not_attempted

Loading

0 comments on commit 2bde872

Please sign in to comment.