From d4952590e4707ef6fe6e69d2ed151ed9475ee362 Mon Sep 17 00:00:00 2001 From: Phillip LeBlanc Date: Thu, 5 Dec 2024 15:47:35 +0900 Subject: [PATCH 1/5] Explicitly disable gzip on reqwest client used by object_store --- object_store/src/client/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/object_store/src/client/mod.rs b/object_store/src/client/mod.rs index 76d1c1f22f58..73d89128af35 100644 --- a/object_store/src/client/mod.rs +++ b/object_store/src/client/mod.rs @@ -671,6 +671,8 @@ impl ClientOptions { builder = builder.danger_accept_invalid_certs(true) } + builder = builder.no_gzip(); + builder .https_only(!self.allow_http.get()?) .build() From 9aac0258f68ba3fd2c93ed2903fa5411c81c5888 Mon Sep 17 00:00:00 2001 From: Phillip LeBlanc Date: Fri, 6 Dec 2024 10:15:39 +0900 Subject: [PATCH 2/5] Add comment --- object_store/src/client/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/object_store/src/client/mod.rs b/object_store/src/client/mod.rs index 73d89128af35..1b7ce5aa7a78 100644 --- a/object_store/src/client/mod.rs +++ b/object_store/src/client/mod.rs @@ -671,6 +671,8 @@ impl ClientOptions { builder = builder.danger_accept_invalid_certs(true) } + // Reqwest will remove the `Content-Length` header if it is configured to + // transparently decompress the body via the non-default `gzip` feature. builder = builder.no_gzip(); builder From 796f399ac4068229c65a7f3cc3865760b1bad0c9 Mon Sep 17 00:00:00 2001 From: Phillip LeBlanc Date: Fri, 6 Dec 2024 10:34:27 +0900 Subject: [PATCH 3/5] Add integration test for checking reqwest gzip feature --- object_store/Cargo.toml | 1 + object_store/tests/http.rs | 43 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 object_store/tests/http.rs diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml index a04733625135..e147e53b1d17 100644 --- a/object_store/Cargo.toml +++ b/object_store/Cargo.toml @@ -77,6 +77,7 @@ http-body-util = "0.1" rand = "0.8" tempfile = "3.1.0" regex = "1.11.1" +reqwest = { version = "0.12", features = ["gzip"] } http = "1.1.0" [[test]] diff --git a/object_store/tests/http.rs b/object_store/tests/http.rs new file mode 100644 index 000000000000..e248dc165e24 --- /dev/null +++ b/object_store/tests/http.rs @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Tests the HTTP store implementation + +#[cfg(feature = "http")] +use object_store::{http::HttpBuilder, path::Path, GetOptions, GetRange, ObjectStore}; + +/// Tests that even when reqwest has the `gzip` feature enabled, the HTTP store +/// does not error on a missing `Content-Length` header. +#[tokio::test] +#[cfg(feature = "http")] +async fn test_http_store_gzip() { + let http_store = HttpBuilder::new() + .with_url("https://raw.githubusercontent.com/apache/arrow-rs/refs/heads/main") + .build() + .unwrap(); + + let object = http_store + .get_opts( + &Path::parse("LICENSE.txt").unwrap(), + GetOptions { + range: Some(GetRange::Bounded(0..100)), + ..Default::default() + }, + ) + .await + .unwrap(); +} From 31526937e634ad54650393db1d51605a96bf827e Mon Sep 17 00:00:00 2001 From: Phillip LeBlanc Date: Fri, 6 Dec 2024 10:53:48 +0900 Subject: [PATCH 4/5] Fix lint --- object_store/tests/http.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object_store/tests/http.rs b/object_store/tests/http.rs index e248dc165e24..a9b3145bb660 100644 --- a/object_store/tests/http.rs +++ b/object_store/tests/http.rs @@ -30,7 +30,7 @@ async fn test_http_store_gzip() { .build() .unwrap(); - let object = http_store + let _ = http_store .get_opts( &Path::parse("LICENSE.txt").unwrap(), GetOptions { From e2f8c2ece772f952aedc56e33e56ced0f074461b Mon Sep 17 00:00:00 2001 From: Phillip LeBlanc Date: Wed, 11 Dec 2024 16:58:21 +0900 Subject: [PATCH 5/5] Add comment explaining why gzip feature is enabled --- object_store/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml index e147e53b1d17..bcc8e0b92243 100644 --- a/object_store/Cargo.toml +++ b/object_store/Cargo.toml @@ -77,6 +77,7 @@ http-body-util = "0.1" rand = "0.8" tempfile = "3.1.0" regex = "1.11.1" +# The "gzip" feature for reqwest is enabled for an integration test. reqwest = { version = "0.12", features = ["gzip"] } http = "1.1.0"