Skip to content

Commit

Permalink
Split JS processing into its own crate (#1316)
Browse files Browse the repository at this point in the history
  • Loading branch information
Swatinem authored Oct 5, 2023
1 parent 51260f4 commit d8704ed
Show file tree
Hide file tree
Showing 55 changed files with 2,143 additions and 2,047 deletions.
29 changes: 29 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions crates/symbolicator-js/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[package]
name = "symbolicator-js"
publish = false
version = "23.9.1"
authors = ["Sentry <[email protected]>"]
edition = "2021"
license = "MIT"

[dependencies]
data-url = "0.3.0"
futures = "0.3.12"
humantime = "2.1.0"
moka = { version = "0.12.1", features = ["future", "sync"] }
once_cell = "1.17.1"
regex = "1.5.5"
reqwest = { version = "0.11.0", features = ["gzip", "brotli", "deflate", "json", "stream", "trust-dns"] }
sentry = { version = "0.31.7", features = ["tracing"] }
serde = { version = "1.0.137", features = ["derive", "rc"] }
serde_json = "1.0.81"
sha2 = "0.10.6"
symbolic = { version = "12.4.0", features = ["common-serde", "sourcemapcache"] }
symbolicator-service = { path = "../symbolicator-service" }
symbolicator-sources = { path = "../symbolicator-sources" }
tempfile = "3.2.0"
tokio = { version = "1.24.2", features = ["rt", "macros", "fs"] }
tracing = "0.1.34"
url = { version = "2.2.0", features = ["serde"] }

[dev-dependencies]
insta = { version = "1.18.0", features = ["redactions", "yaml"] }
symbolicator-test = { path = "../symbolicator-test" }
233 changes: 233 additions & 0 deletions crates/symbolicator-js/src/api_lookup.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
use std::collections::{BTreeMap, BTreeSet};
use std::fmt;
use std::sync::Arc;
use std::time::Duration;

use sentry::types::DebugId;
use sentry::SentryFutureExt;
use serde::Deserialize;
use symbolicator_service::metric;
use symbolicator_service::services::download::retry;
use symbolicator_service::services::download::sentry::{SearchQuery, SentryDownloader};
use url::Url;

use symbolicator_service::caching::{CacheEntry, CacheError};
use symbolicator_service::config::InMemoryCacheConfig;
use symbolicator_service::utils::futures::{m, measure, CancelOnDrop};
use symbolicator_service::utils::http::DownloadTimeouts;
use symbolicator_sources::{RemoteFile, SentryFileId, SentryRemoteFile, SentrySourceConfig};

use crate::interface::ResolvedWith;

#[derive(Clone, Debug, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum RawJsLookupResult {
Bundle {
id: SentryFileId,
url: Url,
#[serde(default)]
resolved_with: ResolvedWith,
},
File {
id: SentryFileId,
url: Url,
abs_path: String,
#[serde(default)]
headers: ArtifactHeaders,
#[serde(default)]
resolved_with: ResolvedWith,
},
}

pub type ArtifactHeaders = BTreeMap<String, String>;

/// The Result of looking up JS Artifacts.
#[derive(Clone, Debug)]
pub enum JsLookupResult {
/// This is an `ArtifactBundle`.
ArtifactBundle {
/// The [`RemoteFile`] to download this bundle from.
remote_file: RemoteFile,
resolved_with: ResolvedWith,
},
/// This is an individual artifact file.
IndividualArtifact {
/// The [`RemoteFile`] to download this artifact from.
remote_file: RemoteFile,
/// The absolute path (also called `url`) of the artifact.
abs_path: String,
/// Arbitrary headers of this file, such as a `Sourcemap` reference.
headers: ArtifactHeaders,
resolved_with: ResolvedWith,
},
}

/// An LRU Cache for Sentry JS Artifact lookups.
type SentryJsCache = moka::future::Cache<SearchQuery, CacheEntry<Arc<[RawJsLookupResult]>>>;

pub struct SentryLookupApi {
client: reqwest::Client,
runtime: tokio::runtime::Handle,
js_cache: SentryJsCache,
timeouts: DownloadTimeouts,
}

impl fmt::Debug for SentryLookupApi {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("SentryDownloader")
.field("js_cache", &self.js_cache.entry_count())
.field("timeouts", &self.timeouts)
.finish()
}
}

impl SentryLookupApi {
pub fn new(
client: reqwest::Client,
runtime: tokio::runtime::Handle,
timeouts: DownloadTimeouts,
in_memory: &InMemoryCacheConfig,
) -> Self {
let js_cache = SentryJsCache::builder()
.max_capacity(in_memory.sentry_index_capacity)
.time_to_live(in_memory.sentry_index_ttl)
.build();
Self {
client,
runtime,
js_cache,
timeouts,
}
}

/// Look up a list of bundles or individual artifact files covering the
/// `debug_ids` and `file_stems` (using the `release` + `dist`).
pub async fn lookup_js_artifacts(
&self,
source: Arc<SentrySourceConfig>,
debug_ids: BTreeSet<DebugId>,
file_stems: BTreeSet<String>,
release: Option<&str>,
dist: Option<&str>,
) -> CacheEntry<Vec<JsLookupResult>> {
let mut lookup_url = source.url.clone();
{
let mut query = lookup_url.query_pairs_mut();

if let Some(release) = release {
query.append_pair("release", release);

// A `url` is only valid in combination with a `release`.
for file_stem in file_stems {
query.append_pair("url", &file_stem);
}
}
if let Some(dist) = dist {
query.append_pair("dist", dist);
}
for debug_id in debug_ids {
query.append_pair("debug_id", &debug_id.to_string());
}
}

// NOTE: `http::Uri` has a hard limit defined, and reqwest unconditionally unwraps such
// errors, when converting between `Url` to `Uri`. To avoid a panic in that case, we
// duplicate the check here to gracefully error out.
if lookup_url.as_str().len() > (u16::MAX - 1) as usize {
return Err(CacheError::DownloadError("uri too long".into()));
}

let query = SearchQuery {
index_url: lookup_url,
token: source.token.clone(),
};

metric!(counter("source.sentry.js_lookup.access") += 1);

let init = Box::pin(async {
metric!(counter("source.sentry.js_lookup.computation") += 1);
tracing::debug!(
"Fetching list of Sentry JS artifacts from {}",
&query.index_url
);

let future = {
let client = self.client.clone();
let query = query.clone();
async move { retry(|| SentryDownloader::fetch_sentry_json(&client, &query)).await }
};

let future =
CancelOnDrop::new(self.runtime.spawn(future.bind_hub(sentry::Hub::current())));

let timeout = Duration::from_secs(30);
let future = tokio::time::timeout(timeout, future);
let future = measure(
"service.download.lookup_js_artifacts",
m::timed_result,
future,
);

future
.await
.map_err(|_| CacheError::Timeout(timeout))?
.map_err(|_| CacheError::InternalError)?
});

let entries = self
.js_cache
.entry_by_ref(&query)
.or_insert_with_if(init, |entry| entry.is_err())
.await
.into_value()?;

let results = entries
.iter()
.map(|raw| match raw {
RawJsLookupResult::Bundle {
id,
url,
resolved_with,
} => JsLookupResult::ArtifactBundle {
remote_file: make_remote_file(&source, id, url),
resolved_with: *resolved_with,
},
RawJsLookupResult::File {
id,
url,
abs_path,
headers,
resolved_with,
} => JsLookupResult::IndividualArtifact {
remote_file: make_remote_file(&source, id, url),
abs_path: abs_path.clone(),
headers: headers.clone(),
resolved_with: *resolved_with,
},
})
.collect();
Ok(results)
}
}

/// Transforms the given `url` into a [`RemoteFile`].
///
/// The problem here is being forward-compatible to a future in which the Sentry API returns
/// pre-authenticated Urls on some external file storage service.
/// Whereas right now, these files are still being served from a Sentry API endpoint, which
/// needs to be authenticated via a `token` that we do not want to leak to any public Url, as
/// well as using a restricted IP that is being blocked for arbitrary HTTP files.
fn make_remote_file(
source: &Arc<SentrySourceConfig>,
file_id: &SentryFileId,
url: &Url,
) -> RemoteFile {
let use_credentials = url.as_str().starts_with(source.url.as_str());
SentryRemoteFile::new(
Arc::clone(source),
use_credentials,
file_id.clone(),
Some(url.clone()),
)
.into()
}
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
use std::sync::Arc;

use symbolic::common::ByteView;
use symbolicator_sources::RemoteFile;

use crate::caching::{
use symbolicator_service::caching::{
Cache, CacheEntry, CacheItemRequest, CacheKey, CacheVersions, Cacher, SharedCacheRef,
};
use crate::services::bundle_index::BundleIndex;
use crate::services::download::DownloadService;
use crate::services::fetch_file;
use crate::types::Scope;
use symbolicator_service::services::caches::versions::BUNDLE_INDEX_CACHE_VERSIONS;
use symbolicator_service::services::download::DownloadService;
use symbolicator_service::services::fetch_file;
use symbolicator_service::types::Scope;
use symbolicator_sources::RemoteFile;

use super::versions::BUNDLE_INDEX_CACHE_VERSIONS;
use crate::bundle_index::BundleIndex;

type BundleIndexCacheItem = (u32, Arc<BundleIndex>);

Expand Down
Loading

0 comments on commit d8704ed

Please sign in to comment.