From 3cfaf3cb0cbd30bd3d3df9469a9672dddfdb7b7f Mon Sep 17 00:00:00 2001 From: Tobias Wilfert <36408720+tobias-wilfert@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:32:18 +0200 Subject: [PATCH] feat(downloader): Avoid downloading login pages (#1533) --- .../symbolicator-service/src/download/http.rs | 27 ++++++++++++ .../symbolicator-service/src/download/mod.rs | 10 +++++ crates/symbolicator-service/src/utils/http.rs | 43 ++++++++++++++++++- 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/crates/symbolicator-service/src/download/http.rs b/crates/symbolicator-service/src/download/http.rs index d69c70c8c..42a00b53f 100644 --- a/crates/symbolicator-service/src/download/http.rs +++ b/crates/symbolicator-service/src/download/http.rs @@ -119,4 +119,31 @@ mod tests { assert_eq!(download_status, Err(CacheError::NotFound)); } + + #[tokio::test] + async fn test_download_azure_file() { + test::setup(); + + let tmpfile = tempfile::NamedTempFile::new().unwrap(); + let dest = tmpfile.path(); + + let file_source = + HttpRemoteFile::from_url("https://dev.azure.com/foo/bar.cs".parse().unwrap(), true); + + let restricted_client = crate::utils::http::create_client(&Default::default(), true, false); + let no_ssl_client = crate::utils::http::create_client(&Default::default(), true, true); + + let downloader = HttpDownloader::new(restricted_client, no_ssl_client, Default::default()); + let mut destination = tokio::fs::File::create(&dest).await.unwrap(); + let download_status = downloader + .download_source("", &file_source, &mut destination) + .await; + + assert_eq!( + download_status, + Err(CacheError::PermissionDenied( + "Potential login page detected".into() + )) + ); + } } diff --git a/crates/symbolicator-service/src/download/mod.rs b/crates/symbolicator-service/src/download/mod.rs index f1eae9a32..4e5051c16 100644 --- a/crates/symbolicator-service/src/download/mod.rs +++ b/crates/symbolicator-service/src/download/mod.rs @@ -576,6 +576,16 @@ async fn download_reqwest( ); Err(CacheError::NotFound) + } else if status == StatusCode::FOUND { + tracing::debug!( + "Potential login page detected when downloading from `{}`: {}", + source, + status + ); + + Err(CacheError::PermissionDenied( + "Potential login page detected".to_string(), + )) } else { tracing::debug!("Unexpected status code from `{}`: {}", source, status); diff --git a/crates/symbolicator-service/src/utils/http.rs b/crates/symbolicator-service/src/utils/http.rs index db0361905..713398f8e 100644 --- a/crates/symbolicator-service/src/utils/http.rs +++ b/crates/symbolicator-service/src/utils/http.rs @@ -4,7 +4,7 @@ use std::time::Duration; use ipnetwork::Ipv4Network; use once_cell::sync::Lazy; -use reqwest::Url; +use reqwest::{redirect, StatusCode, Url}; use crate::config::Config; @@ -95,6 +95,8 @@ impl Default for DownloadTimeouts { /// connect to reserved IPs (as defined in `RESERVED_IP_BLOCKS`). /// * `accept_invalid_certs` determines whether the client accepts invalid /// SSL certificates. +/// * Uses a custom redirect policy that limits redirects from certain hosts +/// to avoid fetching login pages. pub fn create_client( timeouts: &DownloadTimeouts, connect_to_reserved_ips: bool, @@ -106,7 +108,27 @@ pub fn create_client( .connect_timeout(timeouts.connect) .timeout(timeouts.max_download) .pool_idle_timeout(Duration::from_secs(30)) - .danger_accept_invalid_certs(accept_invalid_certs); + .danger_accept_invalid_certs(accept_invalid_certs) + .redirect(redirect::Policy::custom(|attempt: redirect::Attempt| { + // The default redirect policy allows to follow up to 10 redirects. This is problematic + // when symbolicator tries to fetch native source files from a web source, as a redirect + // might land us on a login page, which is then used for source context. + // To avoid this, symbolicator's redirect policy is to not follow temporary redirects + // on hosts that are known to redirect to login pages. + + if attempt.status() == StatusCode::FOUND { + let is_from_azure = attempt + .previous() + .last() + .and_then(|url| url.host_str()) + .map_or(false, |host| host == "dev.azure.com"); + + if is_from_azure { + return attempt.stop(); + } + } + redirect::Policy::default().redirect(attempt) + })); if !connect_to_reserved_ips { builder = builder.ip_filter(is_external_ip); @@ -273,6 +295,23 @@ mod tests { assert_eq!(text, "OK"); } + #[tokio::test] + async fn test_client_redirect_policy() { + let client = create_client(&Default::default(), false, false); + + let response = client + .get("https://dev.azure.com/foo/bar.cs") + .send() + .await + .unwrap(); + + let status = response.status(); + + assert_eq!(status.as_u16(), 302); + assert!(status.is_redirection()); + assert!(!status.is_success()); + } + fn is_valid_origin(origin: &str, allowed: &[&str]) -> bool { let allowed: Vec<_> = allowed.iter().map(|s| s.to_string()).collect(); super::is_valid_origin(&origin.parse().unwrap(), &allowed)