diff --git a/.github/workflows/ci_core.yml b/.github/workflows/ci_core.yml index aa4c581b369e..b8afc9ef201a 100644 --- a/.github/workflows/ci_core.yml +++ b/.github/workflows/ci_core.yml @@ -80,8 +80,8 @@ jobs: check_msrv: runs-on: ubuntu-latest env: - # OpenDAL's MSRV is 1.67. - OPENDAL_MSRV: "1.67" + # OpenDAL's MSRV is 1.75. + OPENDAL_MSRV: "1.75" steps: - uses: actions/checkout@v4 - name: Setup msrv of rust diff --git a/bin/oay/Cargo.toml b/bin/oay/Cargo.toml index 8b651855b16c..79c1a60110fd 100644 --- a/bin/oay/Cargo.toml +++ b/bin/oay/Cargo.toml @@ -26,7 +26,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" version = "0.41.1" [features] diff --git a/bin/ofs/Cargo.toml b/bin/ofs/Cargo.toml index c9f8da56374a..38f526e47af3 100644 --- a/bin/ofs/Cargo.toml +++ b/bin/ofs/Cargo.toml @@ -27,7 +27,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" [dependencies] anyhow = "1" diff --git a/bin/oli/Cargo.toml b/bin/oli/Cargo.toml index 950be3d87588..2c1b6493e327 100644 --- a/bin/oli/Cargo.toml +++ b/bin/oli/Cargo.toml @@ -26,7 +26,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" version = "0.41.1" [features] diff --git a/bindings/c/Cargo.toml b/bindings/c/Cargo.toml index 725326e180d7..48dd75220b13 100644 --- a/bindings/c/Cargo.toml +++ b/bindings/c/Cargo.toml @@ -24,7 +24,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" version = "0.44.3" [lib] diff --git a/bindings/cpp/Cargo.toml b/bindings/cpp/Cargo.toml index ab8c2caf97c5..10b9cc938e40 100644 --- a/bindings/cpp/Cargo.toml +++ b/bindings/cpp/Cargo.toml @@ -24,7 +24,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" version = "0.44.3" [lib] diff --git a/bindings/dotnet/Cargo.toml b/bindings/dotnet/Cargo.toml index d1848d8d8bf6..7e40769c60b2 100644 --- a/bindings/dotnet/Cargo.toml +++ b/bindings/dotnet/Cargo.toml @@ -25,7 +25,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" [lib] crate-type = ["cdylib"] diff --git a/bindings/haskell/Cargo.toml b/bindings/haskell/Cargo.toml index 26fdf5ae68d5..9b4824ddb091 100644 --- a/bindings/haskell/Cargo.toml +++ b/bindings/haskell/Cargo.toml @@ -24,7 +24,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" version = "0.44.3" [lib] diff --git a/bindings/java/Cargo.toml b/bindings/java/Cargo.toml index ffcd629524d7..2df78b8f6e7d 100644 --- a/bindings/java/Cargo.toml +++ b/bindings/java/Cargo.toml @@ -24,7 +24,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" version = "0.45.1" [lib] diff --git a/bindings/lua/Cargo.toml b/bindings/lua/Cargo.toml index 0dceaa2f7d32..6a920bc3484e 100644 --- a/bindings/lua/Cargo.toml +++ b/bindings/lua/Cargo.toml @@ -25,7 +25,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" [features] default = ["mlua/lua52"] diff --git a/bindings/nodejs/Cargo.toml b/bindings/nodejs/Cargo.toml index 9821a9fefc2a..9344c6b4f9db 100644 --- a/bindings/nodejs/Cargo.toml +++ b/bindings/nodejs/Cargo.toml @@ -24,7 +24,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" version = "0.45.1" [features] diff --git a/bindings/nodejs/generated.d.ts b/bindings/nodejs/generated.d.ts index 5d196b2589c2..a0c5329afeea 100644 --- a/bindings/nodejs/generated.d.ts +++ b/bindings/nodejs/generated.d.ts @@ -563,6 +563,8 @@ export class Reader { * > &mut self in async napi methods should be marked as unsafe * * Read bytes from this reader into given buffer. + * + * TODO: change api into stream based. */ read(buf: Buffer): Promise } diff --git a/bindings/nodejs/src/lib.rs b/bindings/nodejs/src/lib.rs index 212c4fa967fc..4a0a9e188d99 100644 --- a/bindings/nodejs/src/lib.rs +++ b/bindings/nodejs/src/lib.rs @@ -27,7 +27,6 @@ use std::time::Duration; use futures::TryStreamExt; use napi::bindgen_prelude::*; use opendal::raw::oio::BlockingRead; -use opendal::raw::oio::ReadExt; #[napi] pub struct Operator(opendal::Operator); @@ -665,9 +664,14 @@ impl Reader { /// > &mut self in async napi methods should be marked as unsafe /// /// Read bytes from this reader into given buffer. + /// + /// TODO: change api into stream based. #[napi] pub async unsafe fn read(&mut self, mut buf: Buffer) -> Result { - self.0.read(buf.as_mut()).await.map_err(format_napi_error) + let buf = buf.as_mut(); + let bs = self.0.read(buf.len()).await.map_err(format_napi_error)?; + buf[..bs.len()].copy_from_slice(&bs); + Ok(bs.len()) } } diff --git a/bindings/ocaml/Cargo.toml b/bindings/ocaml/Cargo.toml index 01448cc92c92..ac5e6e3cea28 100644 --- a/bindings/ocaml/Cargo.toml +++ b/bindings/ocaml/Cargo.toml @@ -25,7 +25,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" [lib] crate-type = ["staticlib", "cdylib"] diff --git a/bindings/php/Cargo.toml b/bindings/php/Cargo.toml index e4b5e268d5c2..108923eb0d70 100644 --- a/bindings/php/Cargo.toml +++ b/bindings/php/Cargo.toml @@ -25,7 +25,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" [lib] crate-type = ["cdylib"] diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 96132f42ea32..5d416c9cff79 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -24,7 +24,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" version = "0.45.1" [features] diff --git a/bindings/python/src/file.rs b/bindings/python/src/file.rs index 552903b48975..806306ca5a55 100644 --- a/bindings/python/src/file.rs +++ b/bindings/python/src/file.rs @@ -25,7 +25,6 @@ use std::io::Write; use std::ops::DerefMut; use std::sync::Arc; -use futures::AsyncReadExt; use futures::AsyncSeekExt; use futures::AsyncWriteExt; use pyo3::exceptions::PyIOError; @@ -243,12 +242,11 @@ impl AsyncFile { let buffer = match size { Some(size) => { - let mut buffer = vec![0; size]; - reader - .read_exact(&mut buffer) + let buffer = reader + .read_exact(size) .await .map_err(|err| PyIOError::new_err(err.to_string()))?; - buffer + buffer.to_vec() } None => { let mut buffer = Vec::new(); diff --git a/bindings/ruby/Cargo.toml b/bindings/ruby/Cargo.toml index 435d20136de6..99fa817b7741 100644 --- a/bindings/ruby/Cargo.toml +++ b/bindings/ruby/Cargo.toml @@ -25,7 +25,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" [lib] crate-type = ["cdylib"] diff --git a/core/Cargo.lock b/core/Cargo.lock index 2dfab9bbafe1..5986d5cae1fd 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -886,11 +886,11 @@ dependencies = [ [[package]] name = "backon" -version = "0.4.1" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c1a6197b2120bb2185a267f6515038558b019e92b832bb0320e96d66268dcf9" +checksum = "c491fa80d69c03084223a4e73c378dd9f9a1e612eb54051213f88b2d5249b458" dependencies = [ - "fastrand 1.9.0", + "fastrand 2.0.1", "futures-core", "pin-project", "tokio", diff --git a/core/Cargo.toml b/core/Cargo.toml index 451280d71c4c..6d7a8d25b0db 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -27,7 +27,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" version = "0.45.1" [package.metadata.docs.rs] @@ -235,7 +235,7 @@ async-tls = { version = "0.13.0", optional = true } # Required dependencies anyhow = { version = "1.0.30", features = ["std"] } async-trait = "0.1.68" -backon = "0.4.1" +backon = "0.4.3" base64 = "0.21" bytes = "1.4" chrono = { version = "0.4.28", default-features = false, features = [ @@ -256,7 +256,7 @@ reqwest = { version = "0.11.18", features = [ ], default-features = false } serde = { version = "1", features = ["derive"] } serde_json = "1" -tokio = { version = "1.27", features = ["sync"] } +tokio = { version = "1.27", features = ["sync", "io-util"] } uuid = { version = "1", features = ["serde", "v4"] } # Test only dependencies diff --git a/core/benches/ops/read.rs b/core/benches/ops/read.rs index 8171daebdd9f..401e72ff7b80 100644 --- a/core/benches/ops/read.rs +++ b/core/benches/ops/read.rs @@ -17,7 +17,6 @@ use criterion::Criterion; use futures::io; -use futures::AsyncReadExt; use opendal::raw::tests::init_test_service; use opendal::raw::tests::TEST_RUNTIME; use opendal::Operator; @@ -112,25 +111,24 @@ fn bench_read_parallel(c: &mut Criterion, name: &str, op: Operator) { let content = gen_bytes(&mut rng, (size.bytes() * 2) as usize); let path = uuid::Uuid::new_v4().to_string(); let offset = (size.bytes() / 2) as u64; - let buf = vec![0; size.bytes() as usize]; + let buf_size = size.bytes() as usize; let temp_data = TempData::generate(op.clone(), &path, content.clone()); for parallel in [1, 2, 4, 8, 16] { group.throughput(criterion::Throughput::Bytes(parallel * size.bytes() as u64)); group.bench_with_input( format!("{}x{}", parallel, size.to_string()), - &(op.clone(), &path, buf.clone()), - |b, (op, path, buf)| { + &(op.clone(), &path, buf_size), + |b, (op, path, buf_size)| { b.to_async(&*TEST_RUNTIME).iter(|| async { let futures = (0..parallel) .map(|_| async { - let mut buf = buf.clone(); let mut r = op .reader_with(path) .range(offset..=offset + size.bytes() as u64) .await .unwrap(); - r.read_exact(&mut buf).await.unwrap(); + r.read_exact(*buf_size).await.unwrap(); let mut d = 0; // mock same little cpu work diff --git a/core/benches/vs_fs/Cargo.toml b/core/benches/vs_fs/Cargo.toml index 291989dacff9..89ebab6df509 100644 --- a/core/benches/vs_fs/Cargo.toml +++ b/core/benches/vs_fs/Cargo.toml @@ -21,7 +21,7 @@ edition = "2021" license = "Apache-2.0" name = "opendal-benchmark-vs-fs" publish = false -rust-version = "1.67" +rust-version = "1.75" version = "0.0.0" [dependencies] diff --git a/core/benches/vs_s3/Cargo.toml b/core/benches/vs_s3/Cargo.toml index 4aa2ff45cb3c..de072134b65e 100644 --- a/core/benches/vs_s3/Cargo.toml +++ b/core/benches/vs_s3/Cargo.toml @@ -21,7 +21,7 @@ edition = "2021" license = "Apache-2.0" name = "opendal-benchmark-vs-s3" publish = false -rust-version = "1.67" +rust-version = "1.75" version = "0.0.0" [dependencies] diff --git a/core/edge/file_write_on_full_disk/Cargo.toml b/core/edge/file_write_on_full_disk/Cargo.toml index 5e1c7f7c7a12..ccd408ac7fca 100644 --- a/core/edge/file_write_on_full_disk/Cargo.toml +++ b/core/edge/file_write_on_full_disk/Cargo.toml @@ -20,7 +20,7 @@ edition = "2021" license = "Apache-2.0" name = "edge_test_file_write_on_full_disk" publish = false -rust-version = "1.67" +rust-version = "1.75" version = "0.0.0" [dependencies] diff --git a/core/edge/s3_aws_assume_role_with_web_identity/Cargo.toml b/core/edge/s3_aws_assume_role_with_web_identity/Cargo.toml index b6c1fd1c88f6..a35d639835f3 100644 --- a/core/edge/s3_aws_assume_role_with_web_identity/Cargo.toml +++ b/core/edge/s3_aws_assume_role_with_web_identity/Cargo.toml @@ -20,7 +20,7 @@ edition = "2021" license = "Apache-2.0" name = "edge_test_aws_s3_assume_role_with_web_identity" publish = false -rust-version = "1.67" +rust-version = "1.75" version = "0.0.0" [dependencies] diff --git a/core/edge/s3_read_on_wasm/Cargo.toml b/core/edge/s3_read_on_wasm/Cargo.toml index 4f9b1e028c73..0a2389e5975b 100644 --- a/core/edge/s3_read_on_wasm/Cargo.toml +++ b/core/edge/s3_read_on_wasm/Cargo.toml @@ -20,7 +20,7 @@ edition = "2021" license = "Apache-2.0" name = "edge_test_s3_read_on_wasm" publish = false -rust-version = "1.67" +rust-version = "1.75" version = "0.0.0" [lib] diff --git a/core/fuzz/Cargo.toml b/core/fuzz/Cargo.toml index 75713ab92248..8463239fae2c 100644 --- a/core/fuzz/Cargo.toml +++ b/core/fuzz/Cargo.toml @@ -20,7 +20,7 @@ edition = "2021" license = "Apache-2.0" name = "opendal-fuzz" publish = false -rust-version = "1.67" +rust-version = "1.75" version = "0.0.0" [package.metadata] diff --git a/core/fuzz/fuzz_reader.rs b/core/fuzz/fuzz_reader.rs index 2d9c0efa846a..c18a348cef84 100644 --- a/core/fuzz/fuzz_reader.rs +++ b/core/fuzz/fuzz_reader.rs @@ -96,27 +96,25 @@ impl Arbitrary<'_> for FuzzInput { let mut actions = vec![]; for _ in 0..count { - let action = match u.int_in_range(0..=4)? { + let action = match u.int_in_range(0..=3)? { // Read 0 => { let size = u.int_in_range(0..=total_size * 2)?; ReadAction::Read(size) } - // Next - 1 => ReadAction::Next, // Seek Start - 2 => { + 1 => { // NOTE: seek out of the end of file is valid. let offset = u.int_in_range(0..=total_size * 2)?; ReadAction::Seek(SeekFrom::Start(offset as u64)) } // Seek Current - 3 => { + 2 => { let offset = u.int_in_range(-(total_size as i64)..=(total_size as i64))?; ReadAction::Seek(SeekFrom::Current(offset)) } // Seek End - 4 => { + 3 => { let offset = u.int_in_range(-(total_size as i64)..=(total_size as i64))?; ReadAction::Seek(SeekFrom::End(offset)) } diff --git a/core/src/layers/blocking.rs b/core/src/layers/blocking.rs index bf2c0e540a26..3476dc6e90ba 100644 --- a/core/src/layers/blocking.rs +++ b/core/src/layers/blocking.rs @@ -17,11 +17,10 @@ use async_trait::async_trait; use bytes; -use bytes::Bytes; +use bytes::{BufMut, Bytes}; use futures::future::poll_fn; use tokio::runtime::Handle; -use crate::raw::oio::ReadExt; use crate::raw::*; use crate::*; @@ -289,8 +288,11 @@ impl BlockingWrapper { } impl oio::BlockingRead for BlockingWrapper { - fn read(&mut self, buf: &mut [u8]) -> Result { - self.handle.block_on(self.inner.read(buf)) + fn read(&mut self, mut buf: &mut [u8]) -> Result { + let bs = self.handle.block_on(self.inner.read(buf.len())); + let bs = bs?; + buf.put_slice(&bs); + Ok(bs.len()) } fn seek(&mut self, pos: std::io::SeekFrom) -> Result { @@ -298,7 +300,12 @@ impl oio::BlockingRead for BlockingWrapper { } fn next(&mut self) -> Option> { - self.handle.block_on(self.inner.next()) + let bs = self.handle.block_on(self.inner.read(4 * 1024 * 1024)); + match bs { + Ok(bs) if bs.is_empty() => None, + Ok(bs) => Some(Ok(bs)), + Err(err) => Some(Err(err)), + } } } diff --git a/core/src/layers/chaos.rs b/core/src/layers/chaos.rs index 0907503fd822..223e1e256abd 100644 --- a/core/src/layers/chaos.rs +++ b/core/src/layers/chaos.rs @@ -16,8 +16,6 @@ // under the License. use std::io; -use std::task::Context; -use std::task::Poll; use async_trait::async_trait; use bytes::Bytes; @@ -176,27 +174,19 @@ impl ChaosReader { } impl oio::Read for ChaosReader { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + async fn read(&mut self, size: usize) -> Result { if self.i_feel_lucky() { - self.inner.poll_read(cx, buf) + self.inner.read(size).await } else { - Poll::Ready(Err(Self::unexpected_eof())) - } - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - if self.i_feel_lucky() { - self.inner.poll_seek(cx, pos) - } else { - Poll::Ready(Err(Self::unexpected_eof())) + Err(Self::unexpected_eof()) } } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + async fn seek(&mut self, pos: io::SeekFrom) -> Result { if self.i_feel_lucky() { - self.inner.poll_next(cx) + self.inner.seek(pos).await } else { - Poll::Ready(Some(Err(Self::unexpected_eof()))) + Err(Self::unexpected_eof()) } } } diff --git a/core/src/layers/concurrent_limit.rs b/core/src/layers/concurrent_limit.rs index e37b74ce043c..c313eaafcc26 100644 --- a/core/src/layers/concurrent_limit.rs +++ b/core/src/layers/concurrent_limit.rs @@ -257,16 +257,12 @@ impl ConcurrentLimitWrapper { } impl oio::Read for ConcurrentLimitWrapper { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - self.inner.poll_read(cx, buf) + async fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.seek(pos).await } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - self.inner.poll_seek(cx, pos) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.inner.poll_next(cx) + async fn read(&mut self, size: usize) -> Result { + self.inner.read(size).await } } diff --git a/core/src/layers/dtrace.rs b/core/src/layers/dtrace.rs index 0b794cac7e39..e32fe1b07d8f 100644 --- a/core/src/layers/dtrace.rs +++ b/core/src/layers/dtrace.rs @@ -344,24 +344,25 @@ impl DtraceLayerWrapper { } impl oio::Read for DtraceLayerWrapper { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + async fn read(&mut self, size: usize) -> Result { let c_path = CString::new(self.path.clone()).unwrap(); probe_lazy!(opendal, reader_read_start, c_path.as_ptr()); - self.inner.poll_read(cx, buf).map(|res| match res { - Ok(n) => { - probe_lazy!(opendal, reader_read_ok, c_path.as_ptr(), n); - Ok(n) + match self.inner.read(size).await { + Ok(bs) => { + probe_lazy!(opendal, reader_read_ok, c_path.as_ptr(), bs.len()); + Ok(bs) } Err(e) => { probe_lazy!(opendal, reader_read_error, c_path.as_ptr()); Err(e) } - }) + } } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { + + async fn seek(&mut self, pos: io::SeekFrom) -> Result { let c_path = CString::new(self.path.clone()).unwrap(); probe_lazy!(opendal, reader_seek_start, c_path.as_ptr()); - self.inner.poll_seek(cx, pos).map(|res| match res { + match self.inner.seek(pos).await { Ok(n) => { probe_lazy!(opendal, reader_seek_ok, c_path.as_ptr(), n); Ok(n) @@ -370,26 +371,7 @@ impl oio::Read for DtraceLayerWrapper { probe_lazy!(opendal, reader_seek_error, c_path.as_ptr()); Err(e) } - }) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let c_path = CString::new(self.path.clone()).unwrap(); - probe_lazy!(opendal, reader_next_start, c_path.as_ptr()); - self.inner.poll_next(cx).map(|res| match res { - Some(Ok(bytes)) => { - probe_lazy!(opendal, reader_next_ok, c_path.as_ptr(), bytes.len()); - Some(Ok(bytes)) - } - Some(Err(e)) => { - probe_lazy!(opendal, reader_next_error, c_path.as_ptr()); - Some(Err(e)) - } - None => { - probe_lazy!(opendal, reader_next_end, c_path.as_ptr()); - None - } - }) + } } } diff --git a/core/src/layers/error_context.rs b/core/src/layers/error_context.rs index 9a3a0130ef8a..3cf0036bd906 100644 --- a/core/src/layers/error_context.rs +++ b/core/src/layers/error_context.rs @@ -349,25 +349,16 @@ pub struct ErrorContextWrapper { } impl oio::Read for ErrorContextWrapper { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - self.inner.poll_read(cx, buf).map_err(|err| { - err.with_operation(ReadOperation::Read) - .with_context("service", self.scheme) - .with_context("path", &self.path) - .with_context("read_buf", buf.len().to_string()) - }) - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - self.inner.poll_seek(cx, pos).map_err(|err| { + async fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.seek(pos).await.map_err(|err| { err.with_operation(ReadOperation::Seek) .with_context("service", self.scheme) .with_context("path", &self.path) }) } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.inner.poll_next(cx).map_err(|err| { + async fn read(&mut self, size: usize) -> Result { + self.inner.read(size).await.map_err(|err| { err.with_operation(ReadOperation::Next) .with_context("service", self.scheme) .with_context("path", &self.path) diff --git a/core/src/layers/logging.rs b/core/src/layers/logging.rs index 5dab01287e8c..45d6c266a6f3 100644 --- a/core/src/layers/logging.rs +++ b/core/src/layers/logging.rs @@ -991,153 +991,66 @@ impl Drop for LoggingReader { } impl oio::Read for LoggingReader { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - let buf_size = buf.len(); - - match self.inner.poll_read(cx, buf) { - Poll::Ready(res) => match res { - Ok(n) => { - self.read += n as u64; - trace!( - target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> buf size: {}B, read {}B ", - self.ctx.scheme, - ReadOperation::Read, - self.path, - self.read, - buf_size, - n - ); - Poll::Ready(Ok(n)) - } - Err(err) => { - if let Some(lvl) = self.ctx.error_level(&err) { - log!( - target: LOGGING_TARGET, - lvl, - "service={} operation={} path={} read={} -> read failed: {}", - self.ctx.scheme, - ReadOperation::Read, - self.path, - self.read, - self.ctx.error_print(&err) - ) - } - Poll::Ready(Err(err)) - } - }, - Poll::Pending => { + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + match self.inner.seek(pos).await { + Ok(n) => { trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> buf size: {}B, read pending", + "service={} operation={} path={} read={} -> seek to {pos:?}, current offset {n}", self.ctx.scheme, - ReadOperation::Read, + ReadOperation::Seek, self.path, self.read, - buf_size ); - Poll::Pending + Ok(n) } - } - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - match self.inner.poll_seek(cx, pos) { - Poll::Ready(res) => match res { - Ok(n) => { - trace!( + Err(err) => { + if let Some(lvl) = self.ctx.error_level(&err) { + log!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> seek to {pos:?}, current offset {n}", + lvl, + "service={} operation={} path={} read={} -> seek to {pos:?} failed: {}", self.ctx.scheme, ReadOperation::Seek, self.path, self.read, - ); - Poll::Ready(Ok(n)) - } - Err(err) => { - if let Some(lvl) = self.ctx.error_level(&err) { - log!( - target: LOGGING_TARGET, - lvl, - "service={} operation={} path={} read={} -> seek to {pos:?} failed: {}", - self.ctx.scheme, - ReadOperation::Seek, - self.path, - self.read, - self.ctx.error_print(&err), - ) - } - Poll::Ready(Err(err)) + self.ctx.error_print(&err), + ) } - }, - Poll::Pending => { + Err(err) + } + } + } + + async fn read(&mut self, size: usize) -> Result { + match self.inner.read(size).await { + Ok(bs) => { + self.read += bs.len() as u64; trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> seek to {pos:?} pending", + "service={} operation={} path={} read={} -> next returns {}B", self.ctx.scheme, - ReadOperation::Seek, + ReadOperation::Next, self.path, - self.read + self.read, + bs.len() ); - Poll::Pending + Ok(bs) } - } - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - match self.inner.poll_next(cx) { - Poll::Ready(res) => match res { - Some(Ok(bs)) => { - self.read += bs.len() as u64; - trace!( - target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> next returns {}B", - self.ctx.scheme, - ReadOperation::Next, - self.path, - self.read, - bs.len() - ); - Poll::Ready(Some(Ok(bs))) - } - Some(Err(err)) => { - if let Some(lvl) = self.ctx.error_level(&err) { - log!( - target: LOGGING_TARGET, - lvl, - "service={} operation={} path={} read={} -> next failed: {}", - self.ctx.scheme, - ReadOperation::Next, - self.path, - self.read, - self.ctx.error_print(&err), - ) - } - Poll::Ready(Some(Err(err))) - } - None => { - trace!( + Err(err) => { + if let Some(lvl) = self.ctx.error_level(&err) { + log!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> next returns None", + lvl, + "service={} operation={} path={} read={} -> next failed: {}", self.ctx.scheme, ReadOperation::Next, self.path, self.read, - ); - Poll::Ready(None) + self.ctx.error_print(&err), + ) } - }, - Poll::Pending => { - trace!( - target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> next returns pending", - self.ctx.scheme, - ReadOperation::Next, - self.path, - self.read - ); - Poll::Pending + Err(err) } } } diff --git a/core/src/layers/madsim.rs b/core/src/layers/madsim.rs index 3d68bff90ea9..d4135251a3a1 100644 --- a/core/src/layers/madsim.rs +++ b/core/src/layers/madsim.rs @@ -22,6 +22,7 @@ #![allow(dead_code)] use std::any::Any; +use std::cmp::min; use std::collections::HashMap; use std::fmt::Debug; use std::io::Result; @@ -262,28 +263,20 @@ pub struct MadsimReader { } impl oio::Read for MadsimReader { - fn poll_read(&mut self, _: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + async fn read(&mut self, size: usize) -> crate::Result { if let Some(ref data) = self.data { - let len = data.len(); - buf[..len].copy_from_slice(data); - Poll::Ready(Ok(len)) + let size = min(size, data.len()); + Ok(data.clone().split_to(size)) } else { - Poll::Ready(Ok(0)) + Ok(Bytes::new()) } } - fn poll_seek(&mut self, _: &mut Context<'_>, _: SeekFrom) -> Poll> { - Poll::Ready(Err(Error::new( - ErrorKind::Unsupported, - "will be supported in the future", - ))) - } - - fn poll_next(&mut self, _: &mut Context<'_>) -> Poll>> { - Poll::Ready(Some(Err(Error::new( + async fn seek(&mut self, _: SeekFrom) -> crate::Result { + Err(Error::new( ErrorKind::Unsupported, "will be supported in the future", - )))) + )) } } diff --git a/core/src/layers/metrics.rs b/core/src/layers/metrics.rs index 4e18512cc0cd..7cb3dcf9f6de 100644 --- a/core/src/layers/metrics.rs +++ b/core/src/layers/metrics.rs @@ -773,41 +773,27 @@ impl Drop for MetricWrapper { } impl oio::Read for MetricWrapper { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - self.inner.poll_read(cx, buf).map(|res| match res { + async fn read(&mut self, size: usize) -> Result { + match self.inner.read(size).await { Ok(bytes) => { - self.bytes += bytes as u64; + self.bytes += bytes.len() as u64; Ok(bytes) } Err(e) => { self.handle.increment_errors_total(self.op, e.kind()); Err(e) } - }) + } } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - self.inner.poll_seek(cx, pos).map(|res| match res { + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + match self.inner.seek(pos).await { Ok(n) => Ok(n), Err(e) => { self.handle.increment_errors_total(self.op, e.kind()); Err(e) } - }) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.inner.poll_next(cx).map(|res| match res { - Some(Ok(bytes)) => { - self.bytes += bytes.len() as u64; - Some(Ok(bytes)) - } - Some(Err(e)) => { - self.handle.increment_errors_total(self.op, e.kind()); - Some(Err(e)) - } - None => None, - }) + } } } diff --git a/core/src/layers/minitrace.rs b/core/src/layers/minitrace.rs index b49ad74cf706..596a3b0a18e8 100644 --- a/core/src/layers/minitrace.rs +++ b/core/src/layers/minitrace.rs @@ -297,22 +297,14 @@ impl MinitraceWrapper { } impl oio::Read for MinitraceWrapper { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - let _g = self.span.set_local_parent(); - let _span = LocalSpan::enter_with_local_parent(ReadOperation::Read.into_static()); - self.inner.poll_read(cx, buf) - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - let _g = self.span.set_local_parent(); - let _span = LocalSpan::enter_with_local_parent(ReadOperation::Seek.into_static()); - self.inner.poll_seek(cx, pos) + #[trace(enter_on_poll = true)] + async fn read(&mut self, size: usize) -> Result { + self.inner.read(size).await } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let _g = self.span.set_local_parent(); - let _span = LocalSpan::enter_with_local_parent(ReadOperation::Next.into_static()); - self.inner.poll_next(cx) + #[trace(enter_on_poll = true)] + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + self.inner.seek(pos).await } } diff --git a/core/src/layers/oteltrace.rs b/core/src/layers/oteltrace.rs index 3f8754c48d5e..1c3825e5a588 100644 --- a/core/src/layers/oteltrace.rs +++ b/core/src/layers/oteltrace.rs @@ -278,16 +278,12 @@ impl OtelTraceWrapper { } impl oio::Read for OtelTraceWrapper { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - self.inner.poll_read(cx, buf) + async fn read(&mut self, size: usize) -> Result { + self.inner.read(size).await } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - self.inner.poll_seek(cx, pos) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.inner.poll_next(cx) + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + self.inner.seek(pos).await } } diff --git a/core/src/layers/prometheus.rs b/core/src/layers/prometheus.rs index d5426c883990..68a74c03419b 100644 --- a/core/src/layers/prometheus.rs +++ b/core/src/layers/prometheus.rs @@ -686,57 +686,35 @@ impl PrometheusMetricWrapper { } impl oio::Read for PrometheusMetricWrapper { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + async fn read(&mut self, size: usize) -> Result { let labels = self.stats.generate_metric_label( self.scheme.into_static(), Operation::Read.into_static(), &self.path, ); - self.inner.poll_read(cx, buf).map(|res| match res { + match self.inner.read(size).await { Ok(bytes) => { self.stats .bytes_total .with_label_values(&labels) - .observe(bytes as f64); + .observe(bytes.len() as f64); Ok(bytes) } Err(e) => { self.stats.increment_errors_total(self.op, e.kind()); Err(e) } - }) + } } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - self.inner.poll_seek(cx, pos).map(|res| match res { + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + match self.inner.seek(pos).await { Ok(n) => Ok(n), Err(e) => { self.stats.increment_errors_total(self.op, e.kind()); Err(e) } - }) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let labels = self.stats.generate_metric_label( - self.scheme.into_static(), - Operation::Read.into_static(), - &self.path, - ); - self.inner.poll_next(cx).map(|res| match res { - Some(Ok(bytes)) => { - self.stats - .bytes_total - .with_label_values(&labels) - .observe(bytes.len() as f64); - Some(Ok(bytes)) - } - Some(Err(e)) => { - self.stats.increment_errors_total(self.op, e.kind()); - Some(Err(e)) - } - None => None, - }) + } } } diff --git a/core/src/layers/prometheus_client.rs b/core/src/layers/prometheus_client.rs index eb44d8b4abd0..7f298dc4917e 100644 --- a/core/src/layers/prometheus_client.rs +++ b/core/src/layers/prometheus_client.rs @@ -539,10 +539,10 @@ impl PrometheusMetricWrapper { } impl oio::Read for PrometheusMetricWrapper { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - self.inner.poll_read(cx, buf).map(|res| match res { + async fn read(&mut self, size: usize) -> Result { + match self.inner.read(size).await { Ok(bytes) => { - self.bytes_total += bytes; + self.bytes_total += bytes.len(); Ok(bytes) } Err(e) => { @@ -550,33 +550,18 @@ impl oio::Read for PrometheusMetricWrapper { .increment_errors_total(self.scheme, self.op, e.kind()); Err(e) } - }) + } } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - self.inner.poll_seek(cx, pos).map(|res| match res { + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + match self.inner.seek(pos).await { Ok(n) => Ok(n), Err(e) => { self.metrics .increment_errors_total(self.scheme, self.op, e.kind()); Err(e) } - }) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.inner.poll_next(cx).map(|res| match res { - Some(Ok(bytes)) => { - self.bytes_total += bytes.len(); - Some(Ok(bytes)) - } - Some(Err(e)) => { - self.metrics - .increment_errors_total(self.scheme, self.op, e.kind()); - Some(Err(e)) - } - None => None, - }) + } } } diff --git a/core/src/layers/retry.rs b/core/src/layers/retry.rs index abef99312863..bb5256e7ebb0 100644 --- a/core/src/layers/retry.rs +++ b/core/src/layers/retry.rs @@ -647,7 +647,7 @@ impl LayeredAccessor for RetryAccessor { } pub struct RetryWrapper { - inner: R, + inner: Option, notify: Arc, path: String, @@ -659,7 +659,7 @@ pub struct RetryWrapper { impl RetryWrapper { fn new(inner: R, notify: Arc, path: &str, backoff: ExponentialBuilder) -> Self { Self { - inner, + inner: Some(inner), notify, path: path.to_string(), @@ -671,152 +671,74 @@ impl RetryWrapper { } impl oio::Read for RetryWrapper { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - if let Some(sleep) = self.sleep.as_mut() { - ready!(sleep.poll_unpin(cx)); - self.sleep = None; - } - - match ready!(self.inner.poll_read(cx, buf)) { - Ok(v) => { - self.current_backoff = None; - Poll::Ready(Ok(v)) - } - Err(err) if !err.is_temporary() => { - self.current_backoff = None; - Poll::Ready(Err(err)) - } - Err(err) => { - let backoff = match self.current_backoff.as_mut() { - Some(backoff) => backoff, - None => { - self.current_backoff = Some(self.builder.build()); - self.current_backoff.as_mut().unwrap() - } - }; - - match backoff.next() { - None => { - self.current_backoff = None; - Poll::Ready(Err(err)) - } - Some(dur) => { - self.notify.intercept( - &err, - dur, - &[ - ("operation", ReadOperation::Read.into_static()), - ("path", &self.path), - ], - ); - self.sleep = Some(Box::pin(tokio::time::sleep(dur))); - self.poll_read(cx, buf) - } - } - } - } - } + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + use backon::RetryableWithContext; - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - if let Some(sleep) = self.sleep.as_mut() { - ready!(sleep.poll_unpin(cx)); - self.sleep = None; - } + let inner = self.inner.take().expect("inner must be valid"); - match ready!(self.inner.poll_seek(cx, pos)) { - Ok(v) => { - self.current_backoff = None; - Poll::Ready(Ok(v)) - } - Err(err) if !err.is_temporary() => { - self.current_backoff = None; - Poll::Ready(Err(err)) - } - Err(err) => { - let backoff = match self.current_backoff.as_mut() { - Some(backoff) => backoff, - None => { - self.current_backoff = Some(self.builder.build()); - self.current_backoff.as_mut().unwrap() - } - }; + let (inner, res) = { + |mut r: R| async move { + let res = r.seek(pos).await; - match backoff.next() { - None => { - self.current_backoff = None; - Poll::Ready(Err(err)) - } - Some(dur) => { - self.notify.intercept( - &err, - dur, - &[ - ("operation", ReadOperation::Seek.into_static()), - ("path", &self.path), - ], - ); - self.sleep = Some(Box::pin(tokio::time::sleep(dur))); - self.poll_seek(cx, pos) - } - } + (r, res) } } + .retry(&self.builder) + .context(inner) + .when(|e| e.is_temporary()) + .notify(|err, dur| { + self.notify.intercept( + err, + dur, + &[ + ("operation", ReadOperation::Seek.into_static()), + ("path", &self.path), + ], + ) + }) + .map(|(r, res)| (r, res.map_err(|err| err.set_persistent()))) + .await; + + self.inner = Some(inner); + res } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - if let Some(sleep) = self.sleep.as_mut() { - ready!(sleep.poll_unpin(cx)); - self.sleep = None; - } + async fn read(&mut self, size: usize) -> Result { + use backon::RetryableWithContext; - match ready!(self.inner.poll_next(cx)) { - None => { - self.current_backoff = None; - Poll::Ready(None) - } - Some(Ok(v)) => { - self.current_backoff = None; - Poll::Ready(Some(Ok(v))) - } - Some(Err(err)) if !err.is_temporary() => { - self.current_backoff = None; - Poll::Ready(Some(Err(err))) - } - Some(Err(err)) => { - let backoff = match self.current_backoff.as_mut() { - Some(backoff) => backoff, - None => { - self.current_backoff = Some(self.builder.build()); - self.current_backoff.as_mut().unwrap() - } - }; + let inner = self.inner.take().expect("inner must be valid"); - match backoff.next() { - None => { - self.current_backoff = None; - Poll::Ready(Some(Err(err))) - } - Some(dur) => { - self.notify.intercept( - &err, - dur, - &[ - ("operation", ReadOperation::Next.into_static()), - ("path", &self.path), - ], - ); - self.sleep = Some(Box::pin(tokio::time::sleep(dur))); - self.poll_next(cx) - } - } + let (inner, res) = { + |mut r: R| async move { + let res = r.read(size).await; + + (r, res) } } + .retry(&self.builder) + .when(|e| e.is_temporary()) + .context(inner) + .notify(|err, dur| { + self.notify.intercept( + err, + dur, + &[ + ("operation", ReadOperation::Next.into_static()), + ("path", &self.path), + ], + ) + }) + .map(|(r, res)| (r, res.map_err(|err| err.set_persistent()))) + .await; + + self.inner = Some(inner); + res } } impl oio::BlockingRead for RetryWrapper { fn read(&mut self, buf: &mut [u8]) -> Result { - { || self.inner.read(buf) } + { || self.inner.as_mut().unwrap().read(buf) } .retry(&self.builder) .when(|e| e.is_temporary()) .notify(|err, dur| { @@ -834,7 +756,7 @@ impl oio::BlockingRead for RetryWrapp } fn seek(&mut self, pos: io::SeekFrom) -> Result { - { || self.inner.seek(pos) } + { || self.inner.as_mut().unwrap().seek(pos) } .retry(&self.builder) .when(|e| e.is_temporary()) .notify(|err, dur| { @@ -852,7 +774,7 @@ impl oio::BlockingRead for RetryWrapp } fn next(&mut self) -> Option> { - { || self.inner.next().transpose() } + { || self.inner.as_mut().unwrap().next().transpose() } .retry(&self.builder) .when(|e| e.is_temporary()) .notify(|err, dur| { @@ -878,7 +800,7 @@ impl oio::Write for RetryWrapper { self.sleep = None; } - match ready!(self.inner.poll_write(cx, bs)) { + match ready!(self.inner.as_mut().unwrap().poll_write(cx, bs)) { Ok(v) => { self.current_backoff = None; Poll::Ready(Ok(v)) @@ -924,7 +846,7 @@ impl oio::Write for RetryWrapper { self.sleep = None; } - match ready!(self.inner.poll_abort(cx)) { + match ready!(self.inner.as_mut().unwrap().poll_abort(cx)) { Ok(v) => { self.current_backoff = None; Poll::Ready(Ok(v)) @@ -970,7 +892,7 @@ impl oio::Write for RetryWrapper { self.sleep = None; } - match ready!(self.inner.poll_close(cx)) { + match ready!(self.inner.as_mut().unwrap().poll_close(cx)) { Ok(v) => { self.current_backoff = None; Poll::Ready(Ok(v)) @@ -1013,7 +935,7 @@ impl oio::Write for RetryWrapper { impl oio::BlockingWrite for RetryWrapper { fn write(&mut self, bs: &dyn oio::WriteBuf) -> Result { - { || self.inner.write(bs) } + { || self.inner.as_mut().unwrap().write(bs) } .retry(&self.builder) .when(|e| e.is_temporary()) .notify(|err, dur| { @@ -1031,7 +953,7 @@ impl oio::BlockingWrite for RetryWra } fn close(&mut self) -> Result<()> { - { || self.inner.close() } + { || self.inner.as_mut().unwrap().close() } .retry(&self.builder) .when(|e| e.is_temporary()) .notify(|err, dur| { @@ -1058,7 +980,7 @@ impl oio::List for RetryWrapper { self.sleep = None; } - match ready!(self.inner.poll_next(cx)) { + match ready!(self.inner.as_mut().unwrap().poll_next(cx)) { Ok(v) => { self.current_backoff = None; Poll::Ready(Ok(v)) @@ -1101,7 +1023,7 @@ impl oio::List for RetryWrapper { impl oio::BlockingList for RetryWrapper { fn next(&mut self) -> Result> { - { || self.inner.next() } + { || self.inner.as_mut().unwrap().next() } .retry(&self.builder) .when(|e| e.is_temporary()) .notify(|err, dur| { @@ -1130,7 +1052,6 @@ mod tests { use async_trait::async_trait; use bytes::Bytes; - use futures::AsyncReadExt; use futures::TryStreamExt; use super::*; @@ -1263,50 +1184,39 @@ mod tests { } impl oio::Read for MockReader { - fn poll_read(&mut self, _: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + self.pos = match pos { + io::SeekFrom::Current(n) => (self.pos as i64 + n) as u64, + io::SeekFrom::Start(n) => n, + io::SeekFrom::End(n) => (13 + n) as u64, + }; + + Ok(self.pos) + } + + async fn read(&mut self, _: usize) -> Result { let mut attempt = self.attempt.lock().unwrap(); *attempt += 1; - Poll::Ready(match *attempt { + match *attempt { 1 => Err( Error::new(ErrorKind::Unexpected, "retryable_error from reader") .set_temporary(), ), 2 => { - buf[..7].copy_from_slice("Hello, ".as_bytes()); self.pos += 7; - Ok(7) + Ok(Bytes::copy_from_slice("Hello, ".as_bytes())) } 3 => Err( Error::new(ErrorKind::Unexpected, "retryable_error from reader") .set_temporary(), ), 4 => { - buf[..6].copy_from_slice("World!".as_bytes()); self.pos += 6; - Ok(6) + Ok(Bytes::copy_from_slice("World!".as_bytes())) } - 5 => Ok(0), + 5 => Ok(Bytes::new()), _ => unreachable!(), - }) - } - - fn poll_seek(&mut self, _: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - self.pos = match pos { - io::SeekFrom::Current(n) => (self.pos as i64 + n) as u64, - io::SeekFrom::Start(n) => n, - io::SeekFrom::End(n) => (13 + n) as u64, - }; - - Poll::Ready(Ok(self.pos)) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let mut bs = vec![0; 1]; - match ready!(self.poll_read(cx, &mut bs)) { - Ok(0) => Poll::Ready(None), - Ok(v) => Poll::Ready(Some(Ok(Bytes::from(bs[..v].to_vec())))), - Err(err) => Poll::Ready(Some(Err(err))), } } } diff --git a/core/src/layers/throttle.rs b/core/src/layers/throttle.rs index 8d4b93755c38..55b13959ebe8 100644 --- a/core/src/layers/throttle.rs +++ b/core/src/layers/throttle.rs @@ -185,17 +185,13 @@ impl ThrottleWrapper { } impl oio::Read for ThrottleWrapper { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + async fn read(&mut self, size: usize) -> Result { // TODO: How can we handle buffer reads with a limiter? - self.inner.poll_read(cx, buf) + self.inner.read(size).await } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - self.inner.poll_seek(cx, pos) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.inner.poll_next(cx) + async fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.seek(pos).await } } diff --git a/core/src/layers/timeout.rs b/core/src/layers/timeout.rs index 6f322251ef00..8b1e5d3b21f1 100644 --- a/core/src/layers/timeout.rs +++ b/core/src/layers/timeout.rs @@ -286,6 +286,20 @@ impl TimeoutWrapper { } } + #[inline] + async fn io_timeout>, T>( + timeout: Duration, + op: &'static str, + fut: F, + ) -> Result { + tokio::time::timeout(timeout, fut).await.map_err(|_| { + Error::new(ErrorKind::Unexpected, "io operation timeout reached") + .with_operation(op) + .with_context("timeout", timeout.as_secs_f64().to_string()) + .set_temporary() + })? + } + #[inline] fn poll_timeout(&mut self, cx: &mut Context<'_>, op: &'static str) -> Result<()> { let sleep = self @@ -308,28 +322,14 @@ impl TimeoutWrapper { } impl oio::Read for TimeoutWrapper { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - self.poll_timeout(cx, ReadOperation::Read.into_static())?; - - let v = ready!(self.inner.poll_read(cx, buf)); - self.sleep = None; - Poll::Ready(v) - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - self.poll_timeout(cx, ReadOperation::Seek.into_static())?; - - let v = ready!(self.inner.poll_seek(cx, pos)); - self.sleep = None; - Poll::Ready(v) + async fn seek(&mut self, pos: SeekFrom) -> Result { + let fut = self.inner.seek(pos); + Self::io_timeout(self.timeout, ReadOperation::Seek.into_static(), fut).await } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.poll_timeout(cx, ReadOperation::Next.into_static())?; - - let v = ready!(self.inner.poll_next(cx)); - self.sleep = None; - Poll::Ready(v) + async fn read(&mut self, size: usize) -> Result { + let fut = self.inner.read(size); + Self::io_timeout(self.timeout, ReadOperation::Next.into_static(), fut).await } } @@ -371,6 +371,7 @@ impl oio::List for TimeoutWrapper { #[cfg(test)] mod tests { + use std::future::{pending, Future}; use std::io::SeekFrom; use std::sync::Arc; use std::task::Context; @@ -385,7 +386,6 @@ mod tests { use crate::layers::TimeoutLayer; use crate::layers::TypeEraseLayer; - use crate::raw::oio::ReadExt; use crate::raw::*; use crate::*; @@ -434,16 +434,12 @@ mod tests { struct MockReader; impl oio::Read for MockReader { - fn poll_read(&mut self, _: &mut Context<'_>, _: &mut [u8]) -> Poll> { - Poll::Pending - } - - fn poll_seek(&mut self, _: &mut Context<'_>, _: SeekFrom) -> Poll> { - Poll::Pending + fn seek(&mut self, _: SeekFrom) -> impl Future> { + pending() } - fn poll_next(&mut self, _: &mut Context<'_>) -> Poll>> { - Poll::Pending + fn read(&mut self, _: usize) -> impl Future> { + pending() } } @@ -483,7 +479,7 @@ mod tests { let mut reader = op.reader("test").await.unwrap(); - let res = reader.read(&mut [0; 4]).await; + let res = reader.read(4).await; assert!(res.is_err()); let err = res.unwrap_err(); assert_eq!(err.kind(), ErrorKind::Unexpected); diff --git a/core/src/layers/tracing.rs b/core/src/layers/tracing.rs index 377b1e2a89f6..4dc435994654 100644 --- a/core/src/layers/tracing.rs +++ b/core/src/layers/tracing.rs @@ -272,24 +272,16 @@ impl oio::Read for TracingWrapper { parent = &self.span, level = "trace", skip_all)] - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - self.inner.poll_read(cx, buf) + async fn read(&mut self, size: usize) -> Result { + self.inner.read(size).await } #[tracing::instrument( parent = &self.span, level = "trace", skip_all)] - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - self.inner.poll_seek(cx, pos) - } - - #[tracing::instrument( - parent = &self.span, - level = "trace", - skip_all)] - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.inner.poll_next(cx) + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + self.inner.seek(pos).await } } diff --git a/core/src/raw/enum_utils.rs b/core/src/raw/enum_utils.rs index 518ccc674087..ca6ec68c2f6d 100644 --- a/core/src/raw/enum_utils.rs +++ b/core/src/raw/enum_utils.rs @@ -58,24 +58,17 @@ pub enum TwoWays { } impl oio::Read for TwoWays { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + async fn seek(&mut self, pos: SeekFrom) -> Result { match self { - Self::One(v) => v.poll_read(cx, buf), - Self::Two(v) => v.poll_read(cx, buf), + Self::One(v) => v.seek(pos).await, + Self::Two(v) => v.seek(pos).await, } } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + async fn read(&mut self, size: usize) -> Result { match self { - Self::One(v) => v.poll_seek(cx, pos), - Self::Two(v) => v.poll_seek(cx, pos), - } - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - match self { - Self::One(v) => v.poll_next(cx), - Self::Two(v) => v.poll_next(cx), + Self::One(v) => v.read(size).await, + Self::Two(v) => v.read(size).await, } } } @@ -139,27 +132,19 @@ pub enum ThreeWays { } impl oio::Read for ThreeWays { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + async fn seek(&mut self, pos: SeekFrom) -> Result { match self { - Self::One(v) => v.poll_read(cx, buf), - Self::Two(v) => v.poll_read(cx, buf), - Self::Three(v) => v.poll_read(cx, buf), + Self::One(v) => v.seek(pos).await, + Self::Two(v) => v.seek(pos).await, + Self::Three(v) => v.seek(pos).await, } } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + async fn read(&mut self, size: usize) -> Result { match self { - Self::One(v) => v.poll_seek(cx, pos), - Self::Two(v) => v.poll_seek(cx, pos), - Self::Three(v) => v.poll_seek(cx, pos), - } - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - match self { - Self::One(v) => v.poll_next(cx), - Self::Two(v) => v.poll_next(cx), - Self::Three(v) => v.poll_next(cx), + Self::One(v) => v.read(size).await, + Self::Two(v) => v.read(size).await, + Self::Three(v) => v.read(size).await, } } } @@ -241,30 +226,21 @@ where THREE: oio::Read, FOUR: oio::Read, { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - match self { - Self::One(v) => v.poll_read(cx, buf), - Self::Two(v) => v.poll_read(cx, buf), - Self::Three(v) => v.poll_read(cx, buf), - Self::Four(v) => v.poll_read(cx, buf), - } - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + async fn seek(&mut self, pos: SeekFrom) -> Result { match self { - Self::One(v) => v.poll_seek(cx, pos), - Self::Two(v) => v.poll_seek(cx, pos), - Self::Three(v) => v.poll_seek(cx, pos), - Self::Four(v) => v.poll_seek(cx, pos), + Self::One(v) => v.seek(pos).await, + Self::Two(v) => v.seek(pos).await, + Self::Three(v) => v.seek(pos).await, + Self::Four(v) => v.seek(pos).await, } } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + async fn read(&mut self, size: usize) -> Result { match self { - Self::One(v) => v.poll_next(cx), - Self::Two(v) => v.poll_next(cx), - Self::Three(v) => v.poll_next(cx), - Self::Four(v) => v.poll_next(cx), + Self::One(v) => v.read(size).await, + Self::Two(v) => v.read(size).await, + Self::Three(v) => v.read(size).await, + Self::Four(v) => v.read(size).await, } } } diff --git a/core/src/raw/futures_util.rs b/core/src/raw/futures_util.rs index 390d263ccb4f..2e467b8a2c82 100644 --- a/core/src/raw/futures_util.rs +++ b/core/src/raw/futures_util.rs @@ -29,9 +29,17 @@ use futures::StreamExt; /// /// We will switch to [`futures::future::LocalBoxFuture`] on wasm32 target. #[cfg(not(target_arch = "wasm32"))] -pub type BoxedFuture = futures::future::BoxFuture<'static, T>; +pub type BoxedFuture<'a, T> = futures::future::BoxFuture<'a, T>; #[cfg(target_arch = "wasm32")] -pub type BoxedFuture = futures::future::LocalBoxFuture<'static, T>; +pub type BoxedFuture<'a, T> = futures::future::LocalBoxFuture<'a, T>; + +/// BoxedStaticFuture is the type alias of [`futures::future::BoxFuture`]. +/// +/// We will switch to [`futures::future::LocalBoxFuture`] on wasm32 target. +#[cfg(not(target_arch = "wasm32"))] +pub type BoxedStaticFuture = futures::future::BoxFuture<'static, T>; +#[cfg(target_arch = "wasm32")] +pub type BoxedStaticFuture = futures::future::LocalBoxFuture<'static, T>; /// CONCURRENT_LARGE_THRESHOLD is the threshold to determine whether to use /// [`FuturesOrdered`] or not. diff --git a/core/src/raw/http_util/body.rs b/core/src/raw/http_util/body.rs index c73aaf374122..ac0174090db8 100644 --- a/core/src/raw/http_util/body.rs +++ b/core/src/raw/http_util/body.rs @@ -18,13 +18,11 @@ use std::cmp::min; use std::cmp::Ordering; use std::io; -use std::task::ready; -use std::task::Context; -use std::task::Poll; use bytes::Buf; use bytes::BufMut; use bytes::Bytes; +use futures::StreamExt; use crate::raw::*; use crate::*; @@ -61,7 +59,7 @@ pub struct IncomingAsyncBody { inner: oio::Streamer, size: Option, consumed: u64, - chunk: Option, + chunk: Bytes, } impl IncomingAsyncBody { @@ -71,7 +69,7 @@ impl IncomingAsyncBody { inner: s, size, consumed: 0, - chunk: None, + chunk: Bytes::new(), } } @@ -82,20 +80,23 @@ impl IncomingAsyncBody { inner: Box::new(()), size: Some(0), consumed: 0, - chunk: None, + chunk: Bytes::new(), } } /// Consume the entire body. pub async fn consume(mut self) -> Result<()> { - use oio::ReadExt; + use oio::Read; - while let Some(bs) = self.next().await { - bs.map_err(|err| { + loop { + let buf = self.read(4 * 1024 * 1024).await.map_err(|err| { Error::new(ErrorKind::Unexpected, "fetch bytes from stream") .with_operation("http_util::IncomingAsyncBody::consume") .set_source(err) })?; + if buf.is_empty() { + break; + } } Ok(()) @@ -105,20 +106,18 @@ impl IncomingAsyncBody { /// /// This code is inspired from hyper's [`to_bytes`](https://docs.rs/hyper/0.14.23/hyper/body/fn.to_bytes.html). pub async fn bytes(mut self) -> Result { - use oio::ReadExt; + use oio::Read; // If there's only 1 chunk, we can just return Buf::to_bytes() - let first = if let Some(buf) = self.next().await { - buf? - } else { - return Ok(Bytes::new()); - }; + let first = self.read(4 * 1024 * 1024).await?; + if first.is_empty() { + return Ok(first); + } - let second = if let Some(buf) = self.next().await { - buf? - } else { + let second = self.read(4 * 1024 * 1024).await?; + if second.is_empty() { return Ok(first); - }; + } // With more than 1 buf, we gotta flatten into a Vec first. let cap = if let Some(size) = self.size { @@ -134,8 +133,13 @@ impl IncomingAsyncBody { vec.put(first); vec.put(second); - while let Some(buf) = self.next().await { - vec.put(buf?); + // TODO: we can tune the io size here. + loop { + let buf = self.read(4 * 1024 * 1024).await?; + if buf.is_empty() { + break; + } + vec.put(buf); } Ok(vec.into()) @@ -160,78 +164,36 @@ impl IncomingAsyncBody { } impl oio::Read for IncomingAsyncBody { - fn poll_read(&mut self, cx: &mut Context<'_>, mut buf: &mut [u8]) -> Poll> { - if buf.is_empty() || self.size == Some(0) { - return Poll::Ready(Ok(0)); + async fn read(&mut self, size: usize) -> Result { + if self.size == Some(0) { + return Ok(Bytes::new()); } - // Avoid extra poll of next if we already have chunks. - let mut bs = if let Some(chunk) = self.chunk.take() { - chunk - } else { - loop { - match ready!(self.inner.poll_next(cx)) { - // It's possible for underlying stream to return empty bytes, we should continue - // to fetch next one. - Some(Ok(bs)) if bs.is_empty() => continue, - Some(Ok(bs)) => { - self.consumed += bs.len() as u64; - break bs; - } - Some(Err(err)) => return Poll::Ready(Err(err)), - None => { - if let Some(size) = self.size { - Self::check(size, self.consumed)?; - } - return Poll::Ready(Ok(0)); + if self.chunk.is_empty() { + self.chunk = match self.inner.next().await.transpose()? { + Some(bs) => bs, + None => { + if let Some(size) = self.size { + Self::check(size, self.consumed)? } - } - } - }; - let amt = min(bs.len(), buf.len()); - buf.put_slice(&bs[..amt]); - bs.advance(amt); - if !bs.is_empty() { - self.chunk = Some(bs); + return Ok(Bytes::new()); + } + }; } - Poll::Ready(Ok(amt)) + let size = min(size, self.chunk.len()); + self.consumed += size as u64; + let bs = self.chunk.split_to(size); + Ok(bs) } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - let (_, _) = (cx, pos); + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + let _ = pos; - Poll::Ready(Err(Error::new( + Err(Error::new( ErrorKind::Unsupported, "output reader doesn't support seeking", - ))) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - if self.size == Some(0) { - return Poll::Ready(None); - } - - if let Some(bs) = self.chunk.take() { - return Poll::Ready(Some(Ok(bs))); - } - - let res = match ready!(self.inner.poll_next(cx)) { - Some(Ok(bs)) => { - self.consumed += bs.len() as u64; - Some(Ok(bs)) - } - Some(Err(err)) => Some(Err(err)), - None => { - if let Some(size) = self.size { - Self::check(size, self.consumed)?; - } - - None - } - }; - - Poll::Ready(res) + )) } } diff --git a/core/src/raw/mod.rs b/core/src/raw/mod.rs index 2783dddf9ab2..e58d6e9ced70 100644 --- a/core/src/raw/mod.rs +++ b/core/src/raw/mod.rs @@ -71,6 +71,7 @@ pub use std_io_util::*; mod futures_util; pub use futures_util::BoxedFuture; +pub use futures_util::BoxedStaticFuture; pub use futures_util::ConcurrentFutures; mod enum_utils; diff --git a/core/src/raw/oio/cursor.rs b/core/src/raw/oio/cursor.rs index a4b7caee3731..a8a82a0a57a9 100644 --- a/core/src/raw/oio/cursor.rs +++ b/core/src/raw/oio/cursor.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use std::cmp::min; use std::io::Read; use std::io::SeekFrom; use std::task::Context; @@ -71,17 +72,7 @@ impl From> for Cursor { } impl oio::Read for Cursor { - fn poll_read(&mut self, _: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - let n = Read::read(&mut self.remaining_slice(), buf).map_err(|err| { - Error::new(ErrorKind::Unexpected, "read data from Cursor") - .with_context("source", "Cursor") - .set_source(err) - })?; - self.pos += n as u64; - Poll::Ready(Ok(n)) - } - - fn poll_seek(&mut self, _: &mut Context<'_>, pos: SeekFrom) -> Poll> { + async fn seek(&mut self, pos: SeekFrom) -> Result { let (base, amt) = match pos { SeekFrom::Start(n) => (0, n as i64), SeekFrom::End(n) => (self.inner.len() as i64, n), @@ -91,24 +82,25 @@ impl oio::Read for Cursor { let n = match base.checked_add(amt) { Some(n) if n >= 0 => n as u64, _ => { - return Poll::Ready(Err(Error::new( + return Err(Error::new( ErrorKind::InvalidInput, "invalid seek to a negative or overflowing position", - ))) + )) } }; self.pos = n; - Poll::Ready(Ok(n)) + Ok(n) } - fn poll_next(&mut self, _: &mut Context<'_>) -> Poll>> { + async fn read(&mut self, size: usize) -> Result { if self.is_empty() { - Poll::Ready(None) + Ok(Bytes::new()) } else { // The clone here is required as we don't want to change it. - let bs = self.inner.clone().split_off(self.pos as usize); + let mut bs = self.inner.clone().split_off(self.pos as usize); + let bs = bs.split_to(min(bs.len(), size)); self.pos += bs.len() as u64; - Poll::Ready(Some(Ok(bs))) + Ok(bs) } } } diff --git a/core/src/raw/oio/list/flat_list.rs b/core/src/raw/oio/list/flat_list.rs index acc2749f2584..2b948408c1de 100644 --- a/core/src/raw/oio/list/flat_list.rs +++ b/core/src/raw/oio/list/flat_list.rs @@ -25,7 +25,7 @@ use crate::raw::*; use crate::*; /// ListFuture is the future returned while calling async list. -type ListFuture = BoxedFuture<(A, oio::Entry, Result<(RpList, L)>)>; +type ListFuture = BoxedStaticFuture<(A, oio::Entry, Result<(RpList, L)>)>; /// FlatLister will walk dir in bottom up way: /// diff --git a/core/src/raw/oio/list/page_list.rs b/core/src/raw/oio/list/page_list.rs index f36b9c9bc216..b793a6b63f0f 100644 --- a/core/src/raw/oio/list/page_list.rs +++ b/core/src/raw/oio/list/page_list.rs @@ -72,7 +72,7 @@ pub struct PageLister { enum State { Idle(Option<(L, PageContext)>), - Fetch(BoxedFuture<((L, PageContext), Result<()>)>), + Fetch(BoxedStaticFuture<((L, PageContext), Result<()>)>), } /// # Safety diff --git a/core/src/raw/oio/read/api.rs b/core/src/raw/oio/read/api.rs index f6175f436045..d7f681fe96ec 100644 --- a/core/src/raw/oio/read/api.rs +++ b/core/src/raw/oio/read/api.rs @@ -18,15 +18,13 @@ use std::fmt::Display; use std::fmt::Formatter; use std::io; -use std::pin::Pin; -use std::task::ready; -use std::task::Context; -use std::task::Poll; +use std::ops::DerefMut; use bytes::Bytes; use futures::Future; use tokio::io::ReadBuf; +use crate::raw::BoxedFuture; use crate::*; /// PageOperation is the name for APIs of lister. @@ -76,7 +74,7 @@ impl From for &'static str { } /// Reader is a type erased [`Read`]. -pub type Reader = Box; +pub type Reader = Box; /// Read is the trait that OpenDAL returns to callers. /// @@ -89,213 +87,77 @@ pub type Reader = Box; /// `AsyncRead` is required to be implemented, `AsyncSeek` and `Stream` /// is optional. We use `Read` to make users life easier. pub trait Read: Unpin + Send + Sync { - /// Read bytes asynchronously. - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll>; + /// Fetch more bytes from underlying reader. + /// + /// `size` is used to hint the data that user want to read at most. Implementer + /// MUST NOT return more than `size` bytes. However, implementer can decide + /// whether to split or merge the read requests underground. + /// + /// Returning `bytes`'s `length == 0` means: + /// + /// - This reader has reached its “end of file” and will likely no longer be able to produce bytes. + /// - The `size` specified was `0`. + #[cfg(not(target_arch = "wasm32"))] + fn read(&mut self, size: usize) -> impl Future> + Send; + #[cfg(target_arch = "wasm32")] + fn read(&mut self, size: usize) -> impl Future>; /// Seek asynchronously. /// /// Returns `Unsupported` error if underlying reader doesn't support seek. - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll>; - - /// Stream [`Bytes`] from underlying reader. - /// - /// Returns `Unsupported` error if underlying reader doesn't support stream. - /// - /// This API exists for avoiding bytes copying inside async runtime. - /// Users can poll bytes from underlying reader and decide when to - /// read/consume them. - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>>; + #[cfg(not(target_arch = "wasm32"))] + fn seek(&mut self, pos: io::SeekFrom) -> impl Future> + Send; + #[cfg(target_arch = "wasm32")] + fn seek(&mut self, pos: io::SeekFrom) -> impl Future>; } impl Read for () { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - let (_, _) = (cx, buf); + async fn read(&mut self, size: usize) -> Result { + let _ = size; - unimplemented!("poll_read is required to be implemented for oio::Read") - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - let (_, _) = (cx, pos); - - Poll::Ready(Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support seeking", - ))) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let _ = cx; - - Poll::Ready(Some(Err(Error::new( + Err(Error::new( ErrorKind::Unsupported, "output reader doesn't support streaming", - )))) - } -} - -/// `Box` won't implement `Read` automatically. To make Reader -/// work as expected, we must add this impl. -impl Read for Box { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - (**self).poll_read(cx, buf) - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - (**self).poll_seek(cx, pos) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - (**self).poll_next(cx) - } -} - -impl futures::AsyncRead for dyn Read { - fn poll_read( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &mut [u8], - ) -> Poll> { - let this: &mut dyn Read = &mut *self; - this.poll_read(cx, buf).map_err(format_io_error) - } -} - -impl futures::AsyncSeek for dyn Read { - fn poll_seek( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - pos: io::SeekFrom, - ) -> Poll> { - let this: &mut dyn Read = &mut *self; - this.poll_seek(cx, pos).map_err(format_io_error) + )) } -} -impl futures::Stream for dyn Read { - type Item = Result; + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + let _ = pos; - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let this: &mut dyn Read = &mut *self; - this.poll_next(cx) + Err(Error::new( + ErrorKind::Unsupported, + "output reader doesn't support seeking", + )) } } -/// Impl ReadExt for all T: Read -impl ReadExt for T {} - -/// Extension of [`Read`] to make it easier for use. -pub trait ReadExt: Read { - /// Build a future for `poll_read`. - fn read<'a>(&'a mut self, buf: &'a mut [u8]) -> ReadFuture<'a, Self> { - ReadFuture { reader: self, buf } - } - - /// Build a future for `poll_seek`. - fn seek(&mut self, pos: io::SeekFrom) -> SeekFuture<'_, Self> { - SeekFuture { reader: self, pos } - } - - /// Build a future for `poll_next` - fn next(&mut self) -> NextFuture<'_, Self> { - NextFuture { reader: self } - } - - /// Build a future for `read_to_end`. - fn read_to_end<'a>(&'a mut self, buf: &'a mut Vec) -> ReadToEndFuture<'a, Self> { - ReadToEndFuture { reader: self, buf } - } -} +pub trait ReadDyn: Unpin + Send + Sync { + fn read_dyn(&mut self, size: usize) -> BoxedFuture>; -pub struct ReadFuture<'a, R: Read + Unpin + ?Sized> { - reader: &'a mut R, - buf: &'a mut [u8], + fn seek_dyn(&mut self, pos: io::SeekFrom) -> BoxedFuture>; } -impl Future for ReadFuture<'_, R> -where - R: Read + Unpin + ?Sized, -{ - type Output = Result; - - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let this = self.get_mut(); - this.reader.poll_read(cx, this.buf) +impl ReadDyn for T { + fn read_dyn(&mut self, size: usize) -> BoxedFuture> { + Box::pin(self.read(size)) } -} - -pub struct SeekFuture<'a, R: Read + Unpin + ?Sized> { - reader: &'a mut R, - pos: io::SeekFrom, -} - -impl Future for SeekFuture<'_, R> -where - R: Read + Unpin + ?Sized, -{ - type Output = Result; - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let this = self.get_mut(); - this.reader.poll_seek(cx, this.pos) + fn seek_dyn(&mut self, pos: io::SeekFrom) -> BoxedFuture> { + Box::pin(self.seek(pos)) } } -pub struct NextFuture<'a, R: Read + Unpin + ?Sized> { - reader: &'a mut R, -} - -impl Future for NextFuture<'_, R> -where - R: Read + Unpin + ?Sized, -{ - type Output = Option>; - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll>> { - self.reader.poll_next(cx) +/// # NOTE +/// +/// Take care about the `deref_mut()` here. This makes sure that we are calling functions +/// upon `&mut T` instead of `&mut Box`. The later could result in infinite recursion. +impl Read for Box { + async fn read(&mut self, size: usize) -> Result { + self.deref_mut().read_dyn(size).await } -} - -pub struct ReadToEndFuture<'a, R: Read + Unpin + ?Sized> { - reader: &'a mut R, - buf: &'a mut Vec, -} -impl Future for ReadToEndFuture<'_, R> -where - R: Read + Unpin + ?Sized, -{ - type Output = Result; - - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let this = self.get_mut(); - let start_len = this.buf.len(); - - loop { - if this.buf.len() == this.buf.capacity() { - this.buf.reserve(32); // buf is full, need more space - } - - let spare = this.buf.spare_capacity_mut(); - let mut read_buf: ReadBuf = ReadBuf::uninit(spare); - - // SAFETY: These bytes were initialized but not filled in the previous loop - unsafe { - read_buf.assume_init(read_buf.capacity()); - } - - match ready!(this.reader.poll_read(cx, read_buf.initialize_unfilled())) { - Ok(0) => { - return Poll::Ready(Ok(this.buf.len() - start_len)); - } - Ok(n) => { - // SAFETY: Read API makes sure that returning `n` is correct. - unsafe { - this.buf.set_len(this.buf.len() + n); - } - } - Err(e) => return Poll::Ready(Err(e)), - } - } + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + self.deref_mut().seek_dyn(pos).await } } @@ -409,46 +271,3 @@ impl BlockingRead for Box { (**self).next() } } - -impl io::Read for dyn BlockingRead { - #[inline] - fn read(&mut self, buf: &mut [u8]) -> io::Result { - let this: &mut dyn BlockingRead = &mut *self; - this.read(buf).map_err(format_io_error) - } -} - -impl io::Seek for dyn BlockingRead { - #[inline] - fn seek(&mut self, pos: io::SeekFrom) -> io::Result { - let this: &mut dyn BlockingRead = &mut *self; - this.seek(pos).map_err(format_io_error) - } -} - -impl Iterator for dyn BlockingRead { - type Item = Result; - - #[inline] - fn next(&mut self) -> Option { - let this: &mut dyn BlockingRead = &mut *self; - this.next() - } -} - -/// helper functions to format `Error` into `io::Error`. -/// -/// This function is added privately by design and only valid in current -/// context (i.e. `oio` crate). We don't want to expose this function to -/// users. -#[inline] -fn format_io_error(err: Error) -> io::Error { - let kind = match err.kind() { - ErrorKind::NotFound => io::ErrorKind::NotFound, - ErrorKind::PermissionDenied => io::ErrorKind::PermissionDenied, - ErrorKind::InvalidInput => io::ErrorKind::InvalidInput, - _ => io::ErrorKind::Interrupted, - }; - - io::Error::new(kind, err) -} diff --git a/core/src/raw/oio/read/buffer_reader.rs b/core/src/raw/oio/read/buffer_reader.rs index 3d96b93c1270..d6d723fa7bb4 100644 --- a/core/src/raw/oio/read/buffer_reader.rs +++ b/core/src/raw/oio/read/buffer_reader.rs @@ -17,9 +17,6 @@ use std::cmp::min; use std::io::SeekFrom; -use std::task::ready; -use std::task::Context; -use std::task::Poll; use bytes::BufMut; use bytes::Bytes; @@ -35,6 +32,7 @@ pub struct BufferReader { r: R, cur: u64, + /// TODO: maybe we can use chunked bytes here? buf: Vec, filled: usize, pos: usize, @@ -100,7 +98,7 @@ impl BufferReader where R: oio::Read, { - fn poll_fill_buf(&mut self, cx: &mut Context<'_>) -> Poll> { + async fn fill_buf(&mut self) -> Result<&[u8]> { // If we've reached the end of our internal buffer then we need to fetch // some more data from the underlying reader. // Branch using `>=` instead of the more correct `==` @@ -114,22 +112,23 @@ where let mut buf = ReadBuf::uninit(dst); unsafe { buf.assume_init(cap) }; - let n = ready!(self.r.poll_read(cx, buf.initialized_mut()))?; - unsafe { self.buf.set_len(n) } + let bs = self.r.read(cap).await?; + buf.put_slice(&bs); + unsafe { self.buf.set_len(bs.len()) } self.pos = 0; - self.filled = n; + self.filled = bs.len(); } - Poll::Ready(Ok(&self.buf[self.pos..self.filled])) + Ok(&self.buf[self.pos..self.filled]) } - fn poll_inner_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - let cur = ready!(self.r.poll_seek(cx, pos))?; + async fn inner_seek(&mut self, pos: SeekFrom) -> Result { + let cur = self.r.seek(pos).await?; self.discard_buffer(); self.cur = cur; - Poll::Ready(Ok(cur)) + Ok(cur) } } @@ -137,69 +136,59 @@ impl oio::Read for BufferReader where R: oio::Read, { - fn poll_read(&mut self, cx: &mut Context<'_>, mut dst: &mut [u8]) -> Poll> { - // Sanity check for normal cases. - if dst.is_empty() { - return Poll::Ready(Ok(0)); - } - - // If we don't have any buffered data and we're doing a massive read - // (larger than our internal buffer), bypass our internal buffer - // entirely. - if self.pos == self.filled && dst.len() >= self.capacity() { - let res = ready!(self.r.poll_read(cx, dst)); - self.discard_buffer(); - return match res { - Ok(nread) => { - self.cur += nread as u64; - Poll::Ready(Ok(nread)) - } - Err(err) => Poll::Ready(Err(err)), - }; - } - - let rem = ready!(self.poll_fill_buf(cx))?; - let amt = min(rem.len(), dst.len()); - dst.put(&rem[..amt]); - self.consume(amt); - Poll::Ready(Ok(amt)) - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + async fn seek(&mut self, pos: SeekFrom) -> Result { match pos { SeekFrom::Start(new_pos) => { // TODO(weny): Check the overflowing. let Some(offset) = (new_pos as i64).checked_sub(self.cur as i64) else { - return self.poll_inner_seek(cx, pos); + return self.inner_seek(pos).await; }; match self.seek_relative(offset) { - Some(cur) => Poll::Ready(Ok(cur)), - None => self.poll_inner_seek(cx, pos), + Some(cur) => Ok(cur), + None => self.inner_seek(pos).await, } } SeekFrom::Current(offset) => match self.seek_relative(offset) { - Some(cur) => Poll::Ready(Ok(cur)), - None => self - .poll_inner_seek(cx, SeekFrom::Current(offset - self.unconsumed_buffer_len())), + Some(cur) => Ok(cur), + None => { + self.inner_seek(SeekFrom::Current(offset - self.unconsumed_buffer_len())) + .await + } }, - SeekFrom::End(_) => self.poll_inner_seek(cx, pos), + SeekFrom::End(_) => self.inner_seek(pos).await, } } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - match ready!(self.poll_fill_buf(cx)) { - Ok(bytes) => { - if bytes.is_empty() { - return Poll::Ready(None); + async fn read(&mut self, size: usize) -> Result { + if size == 0 { + return Ok(Bytes::new()); + } + + // If we don't have any buffered data and we're doing a massive read + // (larger than our internal buffer), bypass our internal buffer + // entirely. + if self.pos == self.filled && size >= self.capacity() { + let res = self.r.read(size).await; + self.discard_buffer(); + return match res { + Ok(bs) => { + self.cur += bs.len() as u64; + Ok(bs) } + Err(err) => Err(err), + }; + } - let bytes = Bytes::copy_from_slice(bytes); - self.consume(bytes.len()); - Poll::Ready(Some(Ok(bytes))) - } - Err(err) => Poll::Ready(Some(Err(err))), + let bytes = self.fill_buf().await?; + + if bytes.is_empty() { + return Ok(Bytes::new()); } + let size = min(bytes.len(), size); + let bytes = Bytes::copy_from_slice(&bytes[..size]); + self.consume(bytes.len()); + Ok(bytes) } } @@ -207,7 +196,7 @@ impl BufferReader where R: BlockingRead, { - fn fill_buf(&mut self) -> Result<&[u8]> { + fn blocking_fill_buf(&mut self) -> Result<&[u8]> { // If we've reached the end of our internal buffer then we need to fetch // some more data from the underlying reader. // Branch using `>=` instead of the more correct `==` @@ -231,7 +220,7 @@ where Ok(&self.buf[self.pos..self.filled]) } - fn inner_seek(&mut self, pos: SeekFrom) -> Result { + fn blocking_inner_seek(&mut self, pos: SeekFrom) -> Result { let cur = self.r.seek(pos)?; self.discard_buffer(); self.cur = cur; @@ -265,7 +254,7 @@ where }; } - let rem = self.fill_buf()?; + let rem = self.blocking_fill_buf()?; let amt = min(rem.len(), dst.len()); dst.put(&rem[..amt]); self.consume(amt); @@ -277,24 +266,25 @@ where SeekFrom::Start(new_pos) => { // TODO(weny): Check the overflowing. let Some(offset) = (new_pos as i64).checked_sub(self.cur as i64) else { - return self.inner_seek(pos); + return self.blocking_inner_seek(pos); }; match self.seek_relative(offset) { Some(cur) => Ok(cur), - None => self.inner_seek(pos), + None => self.blocking_inner_seek(pos), } } SeekFrom::Current(offset) => match self.seek_relative(offset) { Some(cur) => Ok(cur), - None => self.inner_seek(SeekFrom::Current(offset - self.unconsumed_buffer_len())), + None => self + .blocking_inner_seek(SeekFrom::Current(offset - self.unconsumed_buffer_len())), }, - SeekFrom::End(_) => self.inner_seek(pos), + SeekFrom::End(_) => self.blocking_inner_seek(pos), } } fn next(&mut self) -> Option> { - match self.fill_buf() { + match self.blocking_fill_buf() { Ok(bytes) => { if bytes.is_empty() { return None; @@ -316,8 +306,6 @@ mod tests { use async_trait::async_trait; use bytes::Bytes; - use futures::AsyncReadExt; - use futures::AsyncSeekExt; use rand::prelude::*; use sha2::Digest; use sha2::Sha256; @@ -394,21 +382,17 @@ mod tests { } impl oio::Read for MockReader { - fn poll_read(&mut self, cx: &mut Context, buf: &mut [u8]) -> Poll> { - self.inner.poll_read(cx, buf) - } + async fn seek(&mut self, pos: SeekFrom) -> Result { + let _ = pos; - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - let (_, _) = (cx, pos); - - Poll::Ready(Err(Error::new( + Err(Error::new( ErrorKind::Unsupported, "output reader doesn't support seeking", - ))) + )) } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.inner.poll_next(cx) + async fn read(&mut self, size: usize) -> Result { + oio::Read::read(&mut self.inner, size).await } } @@ -437,22 +421,20 @@ mod tests { let r = Box::new(RangeReader::new(acc, "x", OpRead::default())) as oio::Reader; let buf_cap = 10; - let mut r = Box::new(BufferReader::new(r, buf_cap)) as oio::Reader; - let mut dst = [0u8; 5]; + let r = Box::new(BufferReader::new(r, buf_cap)) as oio::Reader; + let mut r = Reader::new(r); - let nread = r.read(&mut dst).await?; - assert_eq!(nread, dst.len()); - assert_eq!(&dst, b"Hello"); + let bs = r.read(5).await?; + assert_eq!(bs.len(), 5); + assert_eq!(bs.as_ref(), b"Hello"); - let mut dst = [0u8; 5]; - let nread = r.read(&mut dst).await?; - assert_eq!(nread, dst.len()); - assert_eq!(&dst, b", Wor"); + let bs = r.read(5).await?; + assert_eq!(bs.len(), 5); + assert_eq!(bs.as_ref(), b", Wor"); - let mut dst = [0u8; 3]; - let nread = r.read(&mut dst).await?; - assert_eq!(nread, dst.len()); - assert_eq!(&dst, b"ld!"); + let bs = r.read(3).await?; + assert_eq!(bs.len(), 3); + assert_eq!(bs.as_ref(), b"ld!"); Ok(()) } @@ -464,36 +446,33 @@ mod tests { let r = Box::new(RangeReader::new(acc, "x", OpRead::default())) as oio::Reader; let buf_cap = 10; - let mut r = Box::new(BufferReader::new(r, buf_cap)) as oio::Reader; + let r = Box::new(BufferReader::new(r, buf_cap)) as oio::Reader; + let mut r = Reader::new(r); // The underlying reader buffers the b"Hello, Wor". - let mut dst = [0u8; 5]; - let nread = r.read(&mut dst).await?; - assert_eq!(nread, dst.len()); - assert_eq!(&dst, b"Hello"); + let buf = r.read(5).await?; + assert_eq!(buf.len(), 5); + assert_eq!(buf.as_ref(), b"Hello"); let pos = r.seek(SeekFrom::Start(7)).await?; assert_eq!(pos, 7); - let mut dst = [0u8; 5]; - let nread = r.read(&mut dst).await?; - assert_eq!(&dst[..nread], &bs[7..10]); - assert_eq!(nread, 3); + let buf = r.read(5).await?; + assert_eq!(&buf, &bs[7..10]); + assert_eq!(buf.len(), 3); // Should perform a relative seek. let pos = r.seek(SeekFrom::Start(0)).await?; assert_eq!(pos, 0); - let mut dst = [0u8; 9]; - let nread = r.read(&mut dst).await?; - assert_eq!(&dst[..nread], &bs[0..9]); - assert_eq!(nread, 9); + let buf = r.read(9).await?; + assert_eq!(&buf, &bs[0..9]); + assert_eq!(buf.len(), 9); // Should perform a non-relative seek. let pos = r.seek(SeekFrom::Start(11)).await?; assert_eq!(pos, 11); - let mut dst = [0u8; 9]; - let nread = r.read(&mut dst).await?; - assert_eq!(&dst[..nread], &bs[11..13]); - assert_eq!(nread, 2); + let buf = r.read(9).await?; + assert_eq!(&buf, &bs[11..13]); + assert_eq!(buf.len(), 2); Ok(()) } @@ -509,7 +488,8 @@ mod tests { OpRead::default().with_range(BytesRange::from(..)), )) as oio::Reader; - let mut r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::Reader; + let r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::Reader; + let mut r = Reader::new(r); let mut buf = Vec::new(); r.read_to_end(&mut buf).await?; @@ -546,13 +526,13 @@ mod tests { "x", OpRead::default().with_range(BytesRange::from(..)), )) as oio::Reader; - let mut r = Box::new(BufferReader::new(r, 10)) as oio::Reader; + let r = Box::new(BufferReader::new(r, 10)) as oio::Reader; + let mut r = Reader::new(r); let mut cur = 0; for _ in 0..3 { - let mut dst = [0u8; 5]; - let nread = r.read(&mut dst).await?; - assert_eq!(nread, 5); + let bs = r.read(5).await?; + assert_eq!(bs.len(), 5); cur += 5; } @@ -573,13 +553,13 @@ mod tests { "x", OpRead::default().with_range(BytesRange::from(..)), )) as oio::Reader; - let mut r = Box::new(BufferReader::new(r, 5)) as oio::Reader; + let r = Box::new(BufferReader::new(r, 5)) as oio::Reader; + let mut r = Reader::new(r); let mut cur = 0; for _ in 0..3 { - let mut dst = [0u8; 6]; - let nread = r.read(&mut dst).await?; - assert_eq!(nread, 6); + let bs = r.read(6).await?; + assert_eq!(bs.len(), 6); cur += 6; } @@ -599,7 +579,8 @@ mod tests { "x", OpRead::default().with_range(BytesRange::from(4096..4096 + 4096)), )) as oio::Reader; - let mut r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::Reader; + let r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::Reader; + let mut r = Reader::new(r); let mut buf = Vec::new(); r.read_to_end(&mut buf).await?; @@ -625,8 +606,7 @@ mod tests { let n = r.seek(SeekFrom::Start(1024)).await?; assert_eq!(1024, n, "seek to 1024"); - let mut buf = vec![0; 1024]; - r.read_exact(&mut buf).await?; + let buf = r.read_exact(1024).await?; assert_eq!( format!("{:x}", Sha256::digest(&bs[4096 + 1024..4096 + 2048])), format!("{:x}", Sha256::digest(&buf)), @@ -636,8 +616,7 @@ mod tests { let n = r.seek(SeekFrom::Current(1024)).await?; assert_eq!(3072, n, "seek to 3072"); - let mut buf = vec![0; 1024]; - r.read_exact(&mut buf).await?; + let buf = r.read_exact(1024).await?; assert_eq!( format!("{:x}", Sha256::digest(&bs[4096 + 3072..4096 + 3072 + 1024])), format!("{:x}", Sha256::digest(&buf)), @@ -652,7 +631,8 @@ mod tests { let bs = Bytes::copy_from_slice(&b"Hello, World!"[..]); let r = Box::new(oio::Cursor::from(bs.clone())) as oio::BlockingReader; let buf_cap = 10; - let mut r = Box::new(BufferReader::new(r, buf_cap)) as oio::BlockingReader; + let r = Box::new(BufferReader::new(r, buf_cap)) as oio::BlockingReader; + let mut r = BlockingReader::new(r); let mut dst = [0u8; 5]; let nread = r.read(&mut dst)?; @@ -677,7 +657,8 @@ mod tests { let bs = Bytes::copy_from_slice(&b"Hello, World!"[..]); let r = Box::new(oio::Cursor::from(bs.clone())) as oio::BlockingReader; let buf_cap = 10; - let mut r = Box::new(BufferReader::new(r, buf_cap)) as oio::BlockingReader; + let r = Box::new(BufferReader::new(r, buf_cap)) as oio::BlockingReader; + let mut r = BlockingReader::new(r); // The underlying reader buffers the b"Hello, Wor". let mut dst = [0u8; 5]; @@ -715,7 +696,8 @@ mod tests { async fn test_blocking_read_all() -> anyhow::Result<()> { let (bs, _) = gen_bytes(); let r = Box::new(oio::Cursor::from(bs.clone())) as oio::BlockingReader; - let mut r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::BlockingReader; + let r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::BlockingReader; + let mut r = BlockingReader::new(r); let mut buf = Vec::new(); r.read_to_end(&mut buf)?; @@ -747,7 +729,8 @@ mod tests { &b"Hello, World! I'm going to tests a seek relative related bug!"[..], ); let r = Box::new(oio::Cursor::from(bs.clone())) as oio::BlockingReader; - let mut r = Box::new(BufferReader::new(r, 10)) as oio::BlockingReader; + let r = Box::new(BufferReader::new(r, 10)) as oio::BlockingReader; + let mut r = BlockingReader::new(r); let mut cur = 0; for _ in 0..3 { @@ -769,7 +752,8 @@ mod tests { &b"Hello, World! I'm going to tests a seek relative related bug!"[..], ); let r = Box::new(oio::Cursor::from(bs.clone())) as oio::BlockingReader; - let mut r = Box::new(BufferReader::new(r, 5)) as oio::BlockingReader; + let r = Box::new(BufferReader::new(r, 5)) as oio::BlockingReader; + let mut r = BlockingReader::new(r); let mut cur = 0; for _ in 0..3 { @@ -796,7 +780,8 @@ mod tests { "x", OpRead::default().with_range(BytesRange::from(4096..4096 + 4096)), )) as oio::BlockingReader; - let mut r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::BlockingReader; + let r = Box::new(BufferReader::new(r, 4096 * 1024)) as oio::BlockingReader; + let mut r = BlockingReader::new(r); let mut buf = Vec::new(); BlockingRead::read_to_end(&mut r, &mut buf)?; diff --git a/core/src/raw/oio/read/file_read.rs b/core/src/raw/oio/read/file_read.rs index 8db1c7cf7dca..bb43e7be700f 100644 --- a/core/src/raw/oio/read/file_read.rs +++ b/core/src/raw/oio/read/file_read.rs @@ -17,14 +17,9 @@ use std::cmp; use std::io::SeekFrom; -use std::pin::Pin; use std::sync::Arc; -use std::task::ready; -use std::task::Context; -use std::task::Poll; use bytes::Bytes; -use futures::Future; use crate::raw::*; use crate::*; @@ -44,25 +39,12 @@ pub struct FileReader { size: Option, cur: u64, + reader: Option, buf: oio::AdaptiveBuf, - state: State, /// Do we need to reset our cursor? seek_dirty: bool, } -enum State { - Idle, - Send(BoxedFuture>), - Read(R), -} - -/// # Safety -/// -/// wasm32 is a special target that we only have one event-loop for this state. -unsafe impl Send for State {} -/// Safety: State will only be accessed under &mut. -unsafe impl Sync for State {} - impl FileReader where A: Accessor, @@ -82,7 +64,7 @@ where size: None, cur: 0, buf: oio::AdaptiveBuf::default(), - state: State::::Idle, + reader: None, seek_dirty: false, } } @@ -93,80 +75,65 @@ where A: Accessor, R: oio::Read, { - fn read_future(&self) -> BoxedFuture> { - let acc = self.acc.clone(); - let path = self.path.clone(); - - // FileReader doesn't support range, we will always use full range to open a file. - let op = self.op.clone().with_range(BytesRange::from(..)); - - Box::pin(async move { acc.read(&path, op).await }) - } - /// calculate_offset will make sure that the offset has been set. - fn poll_offset( - cx: &mut Context<'_>, - r: &mut R, - range: BytesRange, - ) -> Poll, Option)>> { + async fn offset(r: &mut R, range: BytesRange) -> Result<(Option, Option)> { let (offset, size) = match (range.offset(), range.size()) { (None, None) => (0, None), (None, Some(size)) => { - let start = ready!(r.poll_seek(cx, SeekFrom::End(-(size as i64))))?; + let start = r.seek(SeekFrom::End(-(size as i64))).await?; (start, Some(size)) } (Some(offset), None) => { - let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + let start = r.seek(SeekFrom::Start(offset)).await?; (start, None) } (Some(offset), Some(size)) => { - let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + let start = r.seek(SeekFrom::Start(offset)).await?; (start, Some(size)) } }; - Poll::Ready(Ok((Some(offset), size))) + Ok((Some(offset), size)) } - fn poll_seek_inner( - cx: &mut Context<'_>, + async fn seek_inner( r: &mut R, offset: Option, size: Option, cur: u64, pos: SeekFrom, - ) -> Poll> { + ) -> Result { let offset = offset.expect("offset should be set for calculate_position"); match pos { SeekFrom::Start(n) => { // It's valid for user to seek outsides end of the file. - r.poll_seek(cx, SeekFrom::Start(offset + n)) + r.seek(SeekFrom::Start(offset + n)).await } SeekFrom::End(n) => { let size = size.expect("size should be set for calculate_position when seek with end"); if size as i64 + n < 0 { - return Poll::Ready(Err(Error::new( + return Err(Error::new( ErrorKind::InvalidInput, "seek to a negative position is invalid", ) - .with_context("position", format!("{pos:?}")))); + .with_context("position", format!("{pos:?}"))); } // size is known, we can convert SeekFrom::End into SeekFrom::Start. let pos = SeekFrom::Start(offset + (size as i64 + n) as u64); - r.poll_seek(cx, pos) + r.seek(pos).await } SeekFrom::Current(n) => { if cur as i64 + n < 0 { - return Poll::Ready(Err(Error::new( + return Err(Error::new( ErrorKind::InvalidInput, "seek to a negative position is invalid", ) - .with_context("position", format!("{pos:?}")))); + .with_context("position", format!("{pos:?}"))); } let pos = SeekFrom::Start(offset + (cur as i64 + n) as u64); - r.poll_seek(cx, pos) + r.seek(pos).await } } } @@ -198,7 +165,7 @@ where Ok((Some(offset), size)) } - fn seek_inner( + fn blocking_seek_inner( r: &mut R, offset: Option, size: Option, @@ -246,149 +213,55 @@ where A: Accessor, R: oio::Read, { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - match &mut self.state { - State::Idle => { - self.state = State::Send(self.read_future()); - self.poll_read(cx, buf) - } - State::Send(fut) => { - let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If send future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - self.state = State::Read(r); - self.poll_read(cx, buf) - } - State::Read(r) => { - // We should know where to start read the data. - if self.offset.is_none() { - (self.offset, self.size) = ready!(Self::poll_offset(cx, r, self.op.range()))?; - } - - let size = if let Some(size) = self.size { - // Sanity check. - if self.cur >= size { - return Poll::Ready(Ok(0)); - } - cmp::min(buf.len(), (size - self.cur) as usize) - } else { - buf.len() - }; - - match ready!(r.poll_read(cx, &mut buf[..size])) { - Ok(0) => Poll::Ready(Ok(0)), - Ok(n) => { - self.cur += n as u64; - Poll::Ready(Ok(n)) - } - // We don't need to reset state here since it's ok to poll the same reader. - Err(err) => Poll::Ready(Err(err)), - } - } + async fn seek(&mut self, pos: SeekFrom) -> Result { + if self.reader.is_none() { + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); + let (_, r) = self.acc.read(&self.path, op).await?; + self.reader = Some(r); } - } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - match &mut self.state { - State::Idle => { - self.state = State::Send(self.read_future()); - self.poll_seek(cx, pos) - } - State::Send(fut) => { - let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If send future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - self.state = State::Read(r); - self.poll_seek(cx, pos) - } - State::Read(r) => { - // We should know where to start read the data. - if self.offset.is_none() { - (self.offset, self.size) = ready!(Self::poll_offset(cx, r, self.op.range()))?; - } + let r = self.reader.as_mut().expect("reader must be valid"); - // Fetch size when seek end. - let current_offset = self.offset.unwrap() + self.cur; - if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { - let size = ready!(r.poll_seek(cx, SeekFrom::End(0)))?; - self.size = Some(size - self.offset.unwrap()); - self.seek_dirty = true; - } - if self.seek_dirty { - // Reset cursor. - ready!(r.poll_seek(cx, SeekFrom::Start(current_offset)))?; - self.seek_dirty = false; - } + // We should know where to start read the data. + if self.offset.is_none() { + (self.offset, self.size) = Self::offset(r, self.op.range()).await?; + } - let pos = ready!(Self::poll_seek_inner( - cx, - r, - self.offset, - self.size, - self.cur, - pos - ))?; - self.cur = pos - self.offset.unwrap(); - Poll::Ready(Ok(self.cur)) - } + // Fetch size when seek end. + let current_offset = self.offset.unwrap() + self.cur; + if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { + let size = r.seek(SeekFrom::End(0)).await?; + self.size = Some(size - self.offset.unwrap()); + self.seek_dirty = true; + } + if self.seek_dirty { + // Reset cursor. + r.seek(SeekFrom::Start(current_offset)).await?; + self.seek_dirty = false; } + + let pos = Self::seek_inner(r, self.offset, self.size, self.cur, pos).await?; + self.cur = pos - self.offset.unwrap(); + Ok(self.cur) } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - match &mut self.state { - State::Idle => { - self.state = State::Send(self.read_future()); - self.poll_next(cx) - } - State::Send(fut) => { - let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If send future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - self.state = State::Read(r); - self.poll_next(cx) - } - State::Read(r) => { - // We should know where to start read the data. - if self.offset.is_none() { - (self.offset, self.size) = ready!(Self::poll_offset(cx, r, self.op.range()))?; - } + async fn read(&mut self, size: usize) -> Result { + if self.reader.is_none() { + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); + let (_, r) = self.acc.read(&self.path, op).await?; + self.reader = Some(r); + } - self.buf.reserve(); - - let mut buf = self.buf.initialized_mut(); - let buf = buf.initialized_mut(); - - let size = if let Some(size) = self.size { - // Sanity check. - if self.cur >= size { - return Poll::Ready(None); - } - cmp::min(buf.len(), (size - self.cur) as usize) - } else { - buf.len() - }; - - match ready!(r.poll_read(cx, &mut buf[..size])) { - Ok(0) => Poll::Ready(None), - Ok(n) => { - self.cur += n as u64; - self.buf.record(n); - Poll::Ready(Some(Ok(self.buf.split(n)))) - } - // We don't need to reset state here since it's ok to poll the same reader. - Err(err) => Poll::Ready(Some(Err(err))), - } - } + let r = self.reader.as_mut().expect("reader must be valid"); + + // We should know where to start read the data. + if self.offset.is_none() { + (self.offset, self.size) = Self::offset(r, self.op.range()).await?; } + + r.read(size).await } } @@ -398,144 +271,118 @@ where R: oio::BlockingRead, { fn read(&mut self, buf: &mut [u8]) -> Result { - match &mut self.state { - State::Idle => { - // FileReader doesn't support range, we will always use full range to open a file. - let op = self.op.clone().with_range(BytesRange::from(..)); - - let (_, r) = self.acc.blocking_read(&self.path, op)?; - self.state = State::Read(r); - self.read(buf) - } + if self.reader.is_none() { + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); + let (_, r) = self.acc.blocking_read(&self.path, op)?; + self.reader = Some(r); + } - State::Read(r) => { - // We should know where to start read the data. - if self.offset.is_none() { - (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; - } + let r = self.reader.as_mut().expect("reader must be valid"); - let size = if let Some(size) = self.size { - // Sanity check. - if self.cur >= size { - return Ok(0); - } - cmp::min(buf.len(), (size - self.cur) as usize) - } else { - buf.len() - }; - - match r.read(&mut buf[..size]) { - Ok(0) => Ok(0), - Ok(n) => { - self.cur += n as u64; - Ok(n) - } - // We don't need to reset state here since it's ok to poll the same reader. - Err(err) => Err(err), - } + // We should know where to start read the data. + if self.offset.is_none() { + (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; + } + + let size = if let Some(size) = self.size { + // Sanity check. + if self.cur >= size { + return Ok(0); } - State::Send(_) => { - unreachable!( - "It's invalid to go into State::Send for BlockingRead, please report this bug" - ) + cmp::min(buf.len(), (size - self.cur) as usize) + } else { + buf.len() + }; + + match r.read(&mut buf[..size]) { + Ok(0) => Ok(0), + Ok(n) => { + self.cur += n as u64; + Ok(n) } + // We don't need to reset state here since it's ok to poll the same reader. + Err(err) => Err(err), } } fn seek(&mut self, pos: SeekFrom) -> Result { - match &mut self.state { - State::Idle => { - // FileReader doesn't support range, we will always use full range to open a file. - let op = self.op.clone().with_range(BytesRange::from(..)); - - let (_, r) = self.acc.blocking_read(&self.path, op)?; - self.state = State::Read(r); - self.seek(pos) - } - State::Read(r) => { - // We should know where to start read the data. - if self.offset.is_none() { - (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; - } - // Fetch size when seek end. - let current_offset = self.offset.unwrap() + self.cur; - if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { - let size = r.seek(SeekFrom::End(0))?; - self.size = Some(size - self.offset.unwrap()); - self.seek_dirty = true; - } - if self.seek_dirty { - // Reset cursor. - r.seek(SeekFrom::Start(current_offset))?; - self.seek_dirty = false; - } + if self.reader.is_none() { + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); + let (_, r) = self.acc.blocking_read(&self.path, op)?; + self.reader = Some(r); + } - let pos = Self::seek_inner(r, self.offset, self.size, self.cur, pos)?; - self.cur = pos - self.offset.unwrap(); - Ok(self.cur) - } - State::Send(_) => { - unreachable!( - "It's invalid to go into State::Send for BlockingRead, please report this bug" - ) - } + let r = self.reader.as_mut().expect("reader must be valid"); + + // We should know where to start read the data. + if self.offset.is_none() { + (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; } + // Fetch size when seek end. + let current_offset = self.offset.unwrap() + self.cur; + if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { + let size = r.seek(SeekFrom::End(0))?; + self.size = Some(size - self.offset.unwrap()); + self.seek_dirty = true; + } + if self.seek_dirty { + // Reset cursor. + r.seek(SeekFrom::Start(current_offset))?; + self.seek_dirty = false; + } + + let pos = Self::blocking_seek_inner(r, self.offset, self.size, self.cur, pos)?; + self.cur = pos - self.offset.unwrap(); + Ok(self.cur) } fn next(&mut self) -> Option> { - match &mut self.state { - State::Idle => { - // FileReader doesn't support range, we will always use full range to open a file. - let op = self.op.clone().with_range(BytesRange::from(..)); - - let r = match self.acc.blocking_read(&self.path, op) { - Ok((_, r)) => r, - Err(err) => return Some(Err(err)), - }; - self.state = State::Read(r); - self.next() + if self.reader.is_none() { + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); + let (_, r) = match self.acc.blocking_read(&self.path, op) { + Ok(v) => v, + Err(err) => return Some(Err(err)), + }; + self.reader = Some(r); + } + + let r = self.reader.as_mut().expect("reader must be valid"); + + // We should know where to start read the data. + if self.offset.is_none() { + (self.offset, self.size) = match Self::calculate_offset(r, self.op.range()) { + Ok(v) => v, + Err(err) => return Some(Err(err)), } + } - State::Read(r) => { - // We should know where to start read the data. - if self.offset.is_none() { - (self.offset, self.size) = match Self::calculate_offset(r, self.op.range()) { - Ok(v) => v, - Err(err) => return Some(Err(err)), - } - } + self.buf.reserve(); - self.buf.reserve(); - - let mut buf = self.buf.initialized_mut(); - let buf = buf.initialized_mut(); - - let size = if let Some(size) = self.size { - // Sanity check. - if self.cur >= size { - return None; - } - cmp::min(buf.len(), (size - self.cur) as usize) - } else { - buf.len() - }; - - match r.read(&mut buf[..size]) { - Ok(0) => None, - Ok(n) => { - self.cur += n as u64; - self.buf.record(n); - Some(Ok(self.buf.split(n))) - } - // We don't need to reset state here since it's ok to poll the same reader. - Err(err) => Some(Err(err)), - } + let mut buf = self.buf.initialized_mut(); + let buf = buf.initialized_mut(); + + let size = if let Some(size) = self.size { + // Sanity check. + if self.cur >= size { + return None; } - State::Send(_) => { - unreachable!( - "It's invalid to go into State::Send for BlockingRead, please report this bug" - ) + cmp::min(buf.len(), (size - self.cur) as usize) + } else { + buf.len() + }; + + match r.read(&mut buf[..size]) { + Ok(0) => None, + Ok(n) => { + self.cur += n as u64; + self.buf.record(n); + Some(Ok(self.buf.split(n))) } + // We don't need to reset state here since it's ok to poll the same reader. + Err(err) => Some(Err(err)), } } } diff --git a/core/src/raw/oio/read/futures_read.rs b/core/src/raw/oio/read/futures_read.rs index 8b3bd3999d9d..81f53447a0c7 100644 --- a/core/src/raw/oio/read/futures_read.rs +++ b/core/src/raw/oio/read/futures_read.rs @@ -16,13 +16,13 @@ // under the License. use std::io::SeekFrom; -use std::pin::Pin; -use std::task::Context; -use std::task::Poll; use bytes::Bytes; use futures::AsyncRead; +use futures::AsyncReadExt; use futures::AsyncSeek; +use futures::AsyncSeekExt; +use tokio::io::ReadBuf; use crate::raw::*; use crate::*; @@ -30,12 +30,16 @@ use crate::*; /// FuturesReader implements [`oio::Read`] via [`AsyncRead`] + [`AsyncSeek`]. pub struct FuturesReader { inner: R, + buf: Vec, } impl FuturesReader { /// Create a new futures reader. pub fn new(inner: R) -> Self { - Self { inner } + Self { + inner, + buf: Vec::with_capacity(64 * 1024), + } } } @@ -43,28 +47,38 @@ impl oio::Read for FuturesReader where R: AsyncRead + AsyncSeek + Unpin + Send + Sync, { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - Pin::new(&mut self.inner).poll_read(cx, buf).map_err(|err| { - new_std_io_error(err) - .with_operation(oio::ReadOperation::Read) - .with_context("source", "FuturesReader") - }) - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - Pin::new(&mut self.inner).poll_seek(cx, pos).map_err(|err| { + async fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.seek(pos).await.map_err(|err| { new_std_io_error(err) .with_operation(oio::ReadOperation::Seek) .with_context("source", "FuturesReader") }) } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let _ = cx; + async fn read(&mut self, size: usize) -> Result { + // Make sure buf has enough space. + if self.buf.capacity() < size { + self.buf.reserve(size); + } + let buf = self.buf.spare_capacity_mut(); + let mut read_buf: ReadBuf = ReadBuf::uninit(buf); + + // SAFETY: Read at most `size` bytes into `read_buf`. + unsafe { + read_buf.assume_init(size); + } + + let n = self + .inner + .read(read_buf.initialized_mut()) + .await + .map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Read) + .with_context("source", "FuturesReader") + })?; + read_buf.set_filled(n); - Poll::Ready(Some(Err(Error::new( - ErrorKind::Unsupported, - "FuturesReader doesn't support poll_next", - )))) + Ok(Bytes::copy_from_slice(read_buf.filled())) } } diff --git a/core/src/raw/oio/read/into_read_from_stream.rs b/core/src/raw/oio/read/into_read_from_stream.rs index 9bb734eda2f5..a130de2ee429 100644 --- a/core/src/raw/oio/read/into_read_from_stream.rs +++ b/core/src/raw/oio/read/into_read_from_stream.rs @@ -15,11 +15,9 @@ // specific language governing permissions and limitations // under the License. +use std::cmp::min; use std::io::SeekFrom; -use std::task::Context; -use std::task::Poll; -use bytes::Buf; use bytes::Bytes; use futures::StreamExt; @@ -45,43 +43,22 @@ where S: futures::Stream> + Send + Sync + Unpin + 'static, T: Into, { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - if !self.buf.is_empty() { - let len = std::cmp::min(buf.len(), self.buf.len()); - buf[..len].copy_from_slice(&self.buf[..len]); - self.buf.advance(len); - return Poll::Ready(Ok(len)); - } - - match futures::ready!(self.inner.poll_next_unpin(cx)) { - Some(Ok(bytes)) => { - let bytes = bytes.into(); - let len = std::cmp::min(buf.len(), bytes.len()); - buf[..len].copy_from_slice(&bytes[..len]); - self.buf = bytes.slice(len..); - Poll::Ready(Ok(len)) - } - Some(Err(err)) => Poll::Ready(Err(err)), - None => Poll::Ready(Ok(0)), - } - } - - fn poll_seek(&mut self, _: &mut Context<'_>, _: SeekFrom) -> Poll> { - Poll::Ready(Err(Error::new( + async fn seek(&mut self, _: SeekFrom) -> Result { + Err(Error::new( ErrorKind::Unsupported, "FromStreamReader can't support operation", - ))) + )) } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - if !self.buf.is_empty() { - return Poll::Ready(Some(Ok(std::mem::take(&mut self.buf)))); + async fn read(&mut self, size: usize) -> Result { + if self.buf.is_empty() { + self.buf = match self.inner.next().await.transpose()? { + Some(v) => v.into(), + None => return Ok(Bytes::new()), + }; } - match futures::ready!(self.inner.poll_next_unpin(cx)) { - Some(Ok(bytes)) => Poll::Ready(Some(Ok(bytes.into()))), - Some(Err(err)) => Poll::Ready(Some(Err(err))), - None => Poll::Ready(None), - } + let bs = self.buf.split_to(min(size, self.buf.len())); + Ok(bs) } } diff --git a/core/src/raw/oio/read/into_streamable_read.rs b/core/src/raw/oio/read/into_streamable_read.rs index c3f90e24d585..890b129d716e 100644 --- a/core/src/raw/oio/read/into_streamable_read.rs +++ b/core/src/raw/oio/read/into_streamable_read.rs @@ -15,10 +15,8 @@ // specific language governing permissions and limitations // under the License. +use std::cmp::min; use std::io::SeekFrom; -use std::task::ready; -use std::task::Context; -use std::task::Poll; use bytes::Bytes; use tokio::io::ReadBuf; @@ -43,27 +41,24 @@ pub struct StreamableReader { } impl oio::Read for StreamableReader { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - self.r.poll_read(cx, buf) + async fn seek(&mut self, pos: SeekFrom) -> Result { + self.r.seek(pos).await } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - self.r.poll_seek(cx, pos) - } + async fn read(&mut self, size: usize) -> Result { + let size = min(self.buf.capacity(), size); - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { let dst = self.buf.spare_capacity_mut(); let mut buf = ReadBuf::uninit(dst); - unsafe { buf.assume_init(self.cap) }; - match ready!(self.r.poll_read(cx, buf.initialized_mut())) { - Err(err) => Poll::Ready(Some(Err(err))), - Ok(0) => Poll::Ready(None), - Ok(n) => { - buf.set_filled(n); - Poll::Ready(Some(Ok(Bytes::from(buf.filled().to_vec())))) - } - } + // SAFETY: Read at most `size` bytes into `read_buf`. + unsafe { buf.assume_init(size) }; + + let bs = self.r.read(size).await?; + buf.put_slice(&bs); + buf.set_filled(bs.len()); + + Ok(Bytes::from(buf.filled().to_vec())) } } @@ -94,6 +89,7 @@ impl oio::BlockingRead for StreamableReader { #[cfg(test)] mod tests { + use crate::raw::oio::Read; use bytes::BufMut; use bytes::BytesMut; use rand::prelude::*; @@ -102,8 +98,6 @@ mod tests { #[tokio::test] async fn test_into_stream() { - use oio::ReadExt; - let mut rng = ThreadRng::default(); // Generate size between 1B..16MB. let size = rng.gen_range(1..16 * 1024 * 1024); @@ -116,8 +110,11 @@ mod tests { let mut s = into_streamable_read(Box::new(r) as oio::Reader, cap); let mut bs = BytesMut::new(); - while let Some(b) = s.next().await { - let b = b.expect("read must success"); + loop { + let b = s.read(4 * 1024 * 1024).await.expect("read must success"); + if b.is_empty() { + break; + } bs.put_slice(&b); } assert_eq!(bs.freeze().to_vec(), content) diff --git a/core/src/raw/oio/read/lazy_read.rs b/core/src/raw/oio/read/lazy_read.rs index 84ba5b55a59b..b39929d3a5df 100644 --- a/core/src/raw/oio/read/lazy_read.rs +++ b/core/src/raw/oio/read/lazy_read.rs @@ -16,14 +16,9 @@ // under the License. use std::io::SeekFrom; -use std::pin::Pin; use std::sync::Arc; -use std::task::ready; -use std::task::Context; -use std::task::Poll; use bytes::Bytes; -use futures::Future; use crate::raw::*; use crate::*; @@ -35,22 +30,9 @@ pub struct LazyReader { acc: Arc, path: Arc, op: OpRead, - state: State, + reader: Option, } -enum State { - Idle, - Send(BoxedFuture>), - Read(R), -} - -/// # Safety -/// -/// wasm32 is a special target that we only have one event-loop for this state. -unsafe impl Send for State {} -/// Safety: State will only be accessed under &mut. -unsafe impl Sync for State {} - impl LazyReader where A: Accessor, @@ -62,7 +44,7 @@ where path: Arc::new(path.to_string()), op, - state: State::::Idle, + reader: None, } } } @@ -72,12 +54,13 @@ where A: Accessor, R: oio::Read, { - fn read_future(&self) -> BoxedFuture> { - let acc = self.acc.clone(); - let path = self.path.clone(); - let op = self.op.clone(); + async fn reader(&mut self) -> Result<&mut R> { + if self.reader.is_none() { + let (_, r) = self.acc.read(&self.path, self.op.clone()).await?; + self.reader = Some(r); + } - Box::pin(async move { acc.read(&path, op).await }) + Ok(self.reader.as_mut().expect("reader must be valid")) } } @@ -86,64 +69,28 @@ where A: Accessor, R: oio::Read, { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - match &mut self.state { - State::Idle => { - self.state = State::Send(self.read_future()); - self.poll_read(cx, buf) - } - State::Send(fut) => { - let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If read future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - self.state = State::Read(r); - self.poll_read(cx, buf) - } - State::Read(r) => r.poll_read(cx, buf), - } + async fn seek(&mut self, pos: SeekFrom) -> Result { + self.reader().await?.seek(pos).await } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - match &mut self.state { - State::Idle => { - self.state = State::Send(self.read_future()); - self.poll_seek(cx, pos) - } - State::Send(fut) => { - let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If read future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - self.state = State::Read(r); - self.poll_seek(cx, pos) - } - State::Read(r) => r.poll_seek(cx, pos), - } + async fn read(&mut self, size: usize) -> Result { + let r = self.reader().await?; + r.read(size).await } +} - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - match &mut self.state { - State::Idle => { - self.state = State::Send(self.read_future()); - self.poll_next(cx) - } - State::Send(fut) => { - let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If read future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - self.state = State::Read(r); - self.poll_next(cx) - } - State::Read(r) => r.poll_next(cx), +impl LazyReader +where + A: Accessor, + R: oio::BlockingRead, +{ + fn blocking_reader(&mut self) -> Result<&mut R> { + if self.reader.is_none() { + let (_, r) = self.acc.blocking_read(&self.path, self.op.clone())?; + self.reader = Some(r); } + + Ok(self.reader.as_mut().expect("reader must be valid")) } } @@ -153,53 +100,19 @@ where R: oio::BlockingRead, { fn read(&mut self, buf: &mut [u8]) -> Result { - match &mut self.state { - State::Idle => { - let (_, r) = self.acc.blocking_read(&self.path, self.op.clone())?; - self.state = State::Read(r); - self.read(buf) - } - State::Read(r) => r.read(buf), - State::Send(_) => { - unreachable!( - "It's invalid to go into State::Send for BlockingRead, please report this bug" - ) - } - } + self.blocking_reader()?.read(buf) } fn seek(&mut self, pos: SeekFrom) -> Result { - match &mut self.state { - State::Idle => { - let (_, r) = self.acc.blocking_read(&self.path, self.op.clone())?; - self.state = State::Read(r); - self.seek(pos) - } - State::Read(r) => r.seek(pos), - State::Send(_) => { - unreachable!( - "It's invalid to go into State::Send for BlockingRead, please report this bug" - ) - } - } + self.blocking_reader()?.seek(pos) } fn next(&mut self) -> Option> { - match &mut self.state { - State::Idle => { - let r = match self.acc.blocking_read(&self.path, self.op.clone()) { - Ok((_, r)) => r, - Err(err) => return Some(Err(err)), - }; - self.state = State::Read(r); - self.next() - } - State::Read(r) => r.next(), - State::Send(_) => { - unreachable!( - "It's invalid to go into State::Send for BlockingRead, please report this bug" - ) - } - } + let r = match self.blocking_reader() { + Ok(r) => r, + Err(err) => return Some(Err(err)), + }; + + r.next() } } diff --git a/core/src/raw/oio/read/mod.rs b/core/src/raw/oio/read/mod.rs index d0edf682dc4c..d05b4593c8b1 100644 --- a/core/src/raw/oio/read/mod.rs +++ b/core/src/raw/oio/read/mod.rs @@ -19,7 +19,6 @@ mod api; pub use api::BlockingRead; pub use api::BlockingReader; pub use api::Read; -pub use api::ReadExt; pub use api::ReadOperation; pub use api::Reader; diff --git a/core/src/raw/oio/read/range_read.rs b/core/src/raw/oio/read/range_read.rs index 5ffacf2ec8a7..d557cf0e3d5d 100644 --- a/core/src/raw/oio/read/range_read.rs +++ b/core/src/raw/oio/read/range_read.rs @@ -15,13 +15,8 @@ // specific language governing permissions and limitations // under the License. -use std::future::Future; use std::io::SeekFrom; -use std::pin::Pin; use std::sync::Arc; -use std::task::ready; -use std::task::Context; -use std::task::Poll; use bytes::Bytes; @@ -45,23 +40,9 @@ pub struct RangeReader { offset: Option, size: Option, cur: u64, - state: State, + reader: Option, } -enum State { - Idle, - SendStat(BoxedFuture>), - SendRead(BoxedFuture>), - Read(R), -} - -/// # Safety -/// -/// wasm32 is a special target that we only have one event-loop for this state. -unsafe impl Send for State {} -/// Safety: State will only be accessed under &mut. -unsafe impl Sync for State {} - impl RangeReader where A: Accessor, @@ -95,7 +76,7 @@ where offset, size, cur: 0, - state: State::::Idle, + reader: None, } } @@ -106,7 +87,7 @@ where if size > total_size { // If returns an error, we should reset // state to Idle so that we can retry it. - self.state = State::Idle; + self.reader = None; return Err(Error::new( ErrorKind::InvalidInput, "read to a negative or overflowing position is invalid", @@ -195,10 +176,7 @@ where A: Accessor, R: oio::Read, { - fn read_future(&self) -> BoxedFuture> { - let acc = self.acc.clone(); - let path = self.path.clone(); - + async fn read_future(&self) -> Result<(RpRead, R)> { let mut op = self.op.clone(); // cur != 0 means we have read some data out, we should convert // the op into deterministic to avoid ETag changes. @@ -208,13 +186,10 @@ where // Alter OpRead with correct calculated range. op = op.with_range(self.calculate_range()); - Box::pin(async move { acc.read(&path, op).await }) + self.acc.read(&self.path, op).await } - fn stat_future(&self) -> BoxedFuture> { - let acc = self.acc.clone(); - let path = self.path.clone(); - + async fn stat_future(&self) -> Result { // Handle if-match and if-none-match correctly. let mut args = OpStat::default(); // TODO: stat should support range to check if ETag matches. @@ -227,7 +202,7 @@ where } } - Box::pin(async move { acc.stat(&path, args).await }) + self.acc.stat(&self.path, args).await } } @@ -277,192 +252,81 @@ where A: Accessor, R: oio::Read, { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - // Sanity check for normal cases. - if buf.is_empty() || self.cur >= self.size.unwrap_or(u64::MAX) { - return Poll::Ready(Ok(0)); + async fn seek(&mut self, pos: SeekFrom) -> Result { + // There is an optimization here that we can calculate if users trying to seek + // the same position, for example, `reader.seek(SeekFrom::Current(0))`. + // In this case, we can just return current position without dropping reader. + if pos == SeekFrom::Current(0) || pos == SeekFrom::Start(self.cur) { + return Ok(self.cur); } - match &mut self.state { - State::Idle => { - self.state = if self.offset.is_none() { - // Offset is none means we are doing tailing reading. - // we should stat first to get the correct offset. - State::SendStat(self.stat_future()) - } else { - State::SendRead(self.read_future()) - }; - - self.poll_read(cx, buf) - } - State::SendStat(fut) => { - let rp = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If stat future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - - let length = rp.into_metadata().content_length(); - self.ensure_offset(length)?; - - self.state = State::Idle; - self.poll_read(cx, buf) - } - State::SendRead(fut) => { - let (rp, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If read future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; + // We are seeking to other places, let's drop existing reader. + self.reader = None; - self.ensure_size(rp.range().unwrap_or_default().size(), rp.size()); + let (base, amt) = match pos { + SeekFrom::Start(n) => (0, n as i64), + SeekFrom::Current(n) => (self.cur as i64, n), + SeekFrom::End(n) => { + if let Some(size) = self.size { + (size as i64, n) + } else { + let rp = self.stat_future().await?; + let length = rp.into_metadata().content_length(); + self.ensure_offset(length)?; - self.state = State::Read(r); - self.poll_read(cx, buf) - } - State::Read(r) => match ready!(Pin::new(r).poll_read(cx, buf)) { - Ok(0) => { - // Reset state to Idle after all data has been consumed. - self.state = State::Idle; - Poll::Ready(Ok(0)) + (length as i64, n) } - Ok(n) => { - self.cur += n as u64; - Poll::Ready(Ok(n)) - } - Err(e) => { - self.state = State::Idle; - Poll::Ready(Err(e)) - } - }, - } - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - match &mut self.state { - State::Idle => { - let (base, amt) = match pos { - SeekFrom::Start(n) => (0, n as i64), - SeekFrom::Current(n) => (self.cur as i64, n), - SeekFrom::End(n) => { - if let Some(size) = self.size { - (size as i64, n) - } else { - self.state = State::SendStat(self.stat_future()); - return self.poll_seek(cx, pos); - } - } - }; - - let seek_pos = match base.checked_add(amt) { - Some(n) if n >= 0 => n as u64, - _ => { - return Poll::Ready(Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - ))) - } - }; - - self.cur = seek_pos; - Poll::Ready(Ok(self.cur)) } - State::SendStat(fut) => { - let rp = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If stat future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - - let length = rp.into_metadata().content_length(); - self.ensure_offset(length)?; + }; - self.state = State::Idle; - self.poll_seek(cx, pos) - } - State::SendRead(_) => { - // It's impossible for us to go into this state while - // poll_seek. We can just drop this future and check state. - self.state = State::Idle; - self.poll_seek(cx, pos) + let seek_pos = match base.checked_add(amt) { + Some(n) if n >= 0 => n as u64, + _ => { + return Err(Error::new( + ErrorKind::InvalidInput, + "invalid seek to a negative or overflowing position", + )) } - State::Read(_) => { - // There is an optimization here that we can calculate if users trying to seek - // the same position, for example, `reader.seek(SeekFrom::Current(0))`. - // In this case, we can just return current position without dropping reader. - if pos == SeekFrom::Current(0) || pos == SeekFrom::Start(self.cur) { - return Poll::Ready(Ok(self.cur)); - } + }; - self.state = State::Idle; - self.poll_seek(cx, pos) - } - } + self.cur = seek_pos; + Ok(self.cur) } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + async fn read(&mut self, size: usize) -> Result { // Sanity check for normal cases. if self.cur >= self.size.unwrap_or(u64::MAX) { - return Poll::Ready(None); + return Ok(Bytes::new()); } - match &mut self.state { - State::Idle => { - self.state = if self.offset.is_none() { - // Offset is none means we are doing tailing reading. - // we should stat first to get the correct offset. - State::SendStat(self.stat_future()) - } else { - State::SendRead(self.read_future()) - }; - - self.poll_next(cx) - } - State::SendStat(fut) => { - let rp = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If stat future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - - let length = rp.into_metadata().content_length(); - self.ensure_offset(length)?; + if self.offset.is_none() { + let rp = match self.stat_future().await { + Ok(v) => v, + Err(err) => return Err(err), + }; + let length = rp.into_metadata().content_length(); + self.ensure_offset(length)? + } + if self.reader.is_none() { + let (rp, r) = match self.read_future().await { + Ok((rp, r)) => (rp, r), + Err(err) => return Err(err), + }; + + self.ensure_size(rp.range().unwrap_or_default().size(), rp.size()); + self.reader = Some(r); + } - self.state = State::Idle; - self.poll_next(cx) + let r = self.reader.as_mut().expect("reader must be valid"); + match r.read(size).await { + Ok(bs) => { + self.cur += bs.len() as u64; + Ok(bs) } - State::SendRead(fut) => { - let (rp, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { - // If read future returns an error, we should reset - // state to Idle so that we can retry it. - self.state = State::Idle; - err - })?; - - // Set size if read returns size hint. - self.ensure_size(rp.range().unwrap_or_default().size(), rp.size()); - - self.state = State::Read(r); - self.poll_next(cx) + Err(err) => { + self.reader = None; + Err(err) } - State::Read(r) => match ready!(Pin::new(r).poll_next(cx)) { - Some(Ok(bs)) => { - self.cur += bs.len() as u64; - Poll::Ready(Some(Ok(bs))) - } - Some(Err(err)) => { - self.state = State::Idle; - Poll::Ready(Some(Err(err))) - } - None => { - self.state = State::Idle; - Poll::Ready(None) - } - }, } } } @@ -478,155 +342,116 @@ where return Ok(0); } - match &mut self.state { - State::Idle => { - // Offset is none means we are doing tailing reading. - // we should stat first to get the correct offset. - if self.offset.is_none() { - let rp = self.stat_action()?; - - let length = rp.into_metadata().content_length(); - self.ensure_offset(length)?; - } - - let (rp, r) = self.read_action()?; + if self.offset.is_none() { + let rp = self.stat_action()?; + let length = rp.into_metadata().content_length(); + self.ensure_offset(length)?; + } + if self.reader.is_none() { + let (rp, r) = self.read_action()?; - // Set size if read returns size hint. - self.ensure_size(rp.range().unwrap_or_default().size(), rp.size()); + self.ensure_size(rp.range().unwrap_or_default().size(), rp.size()); + self.reader = Some(r); + } - self.state = State::Read(r); - self.read(buf) - } - State::Read(r) => { - match r.read(buf) { - Ok(0) => { - // Reset state to Idle after all data has been consumed. - self.state = State::Idle; - Ok(0) - } - Ok(n) => { - self.cur += n as u64; - Ok(n) - } - Err(e) => { - self.state = State::Idle; - Err(e) - } - } + let r = self.reader.as_mut().expect("reader must be valid"); + match r.read(buf) { + Ok(0) => { + // Reset state to Idle after all data has been consumed. + self.reader = None; + Ok(0) } - State::SendStat(_) => { - unreachable!("It's invalid to go into State::SendStat for BlockingRead, please report this bug") + Ok(n) => { + self.cur += n as u64; + Ok(n) } - State::SendRead(_) => { - unreachable!("It's invalid to go into State::SendRead for BlockingRead, please report this bug") + Err(e) => { + self.reader = None; + Err(e) } } } fn seek(&mut self, pos: SeekFrom) -> Result { - match &mut self.state { - State::Idle => { - let (base, amt) = match pos { - SeekFrom::Start(n) => (0, n as i64), - SeekFrom::End(n) => { - if let Some(size) = self.size { - (size as i64, n) - } else { - let rp = self.stat_action()?; - let length = rp.into_metadata().content_length(); - self.ensure_offset(length)?; - - let size = self.size.expect("size must be valid after fill_range"); - (size as i64, n) - } - } - SeekFrom::Current(n) => (self.cur as i64, n), - }; - - let seek_pos = match base.checked_add(amt) { - Some(n) if n >= 0 => n as u64, - _ => { - return Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - )); - } - }; - - self.cur = seek_pos; - Ok(self.cur) - } - State::Read(_) => { - // There is an optimization here that we can calculate if users trying to seek - // the same position, for example, `reader.seek(SeekFrom::Current(0))`. - // In this case, we can just return current position without dropping reader. - if pos == SeekFrom::Current(0) || pos == SeekFrom::Start(self.cur) { - return Ok(self.cur); - } + // There is an optimization here that we can calculate if users trying to seek + // the same position, for example, `reader.seek(SeekFrom::Current(0))`. + // In this case, we can just return current position without dropping reader. + if pos == SeekFrom::Current(0) || pos == SeekFrom::Start(self.cur) { + return Ok(self.cur); + } - self.state = State::Idle; - self.seek(pos) - } - State::SendStat(_) => { - unreachable!("It's invalid to go into State::SendStat for BlockingRead, please report this bug") + // We are seeking to other places, let's drop existing reader. + self.reader = None; + + let (base, amt) = match pos { + SeekFrom::Start(n) => (0, n as i64), + SeekFrom::Current(n) => (self.cur as i64, n), + SeekFrom::End(n) => { + if let Some(size) = self.size { + (size as i64, n) + } else { + let rp = self.stat_action()?; + let length = rp.into_metadata().content_length(); + self.ensure_offset(length)?; + + (length as i64, n) + } } - State::SendRead(_) => { - unreachable!("It's invalid to go into State::SendRead for BlockingRead, please report this bug") + }; + + let seek_pos = match base.checked_add(amt) { + Some(n) if n >= 0 => n as u64, + _ => { + return Err(Error::new( + ErrorKind::InvalidInput, + "invalid seek to a negative or overflowing position", + )) } - } + }; + + self.cur = seek_pos; + Ok(self.cur) } fn next(&mut self) -> Option> { - match &mut self.state { - State::Idle => { - // Sanity check for normal cases. - if self.cur >= self.size.unwrap_or(u64::MAX) { - return None; - } - - // Offset is none means we are doing tailing reading. - // we should stat first to get the correct offset. - if self.offset.is_none() { - let rp = match self.stat_action() { - Ok(rp) => rp, - Err(err) => return Some(Err(err)), - }; + // Sanity check for normal cases. + if self.cur >= self.size.unwrap_or(u64::MAX) { + return None; + } - let length = rp.into_metadata().content_length(); - if let Err(err) = self.ensure_offset(length) { - return Some(Err(err)); - } - } + if self.offset.is_none() { + let rp = match self.stat_action() { + Ok(rp) => rp, + Err(err) => return Some(Err(err)), + }; + let length = rp.into_metadata().content_length(); + if let Err(err) = self.ensure_offset(length) { + return Some(Err(err)); + } + } + if self.reader.is_none() { + let (rp, r) = match self.read_action() { + Ok((rp, r)) => (rp, r), + Err(err) => return Some(Err(err)), + }; + + self.ensure_size(rp.range().unwrap_or_default().size(), rp.size()); + self.reader = Some(r); + } - let r = match self.read_action() { - Ok((rp, r)) => { - self.ensure_size(rp.range().unwrap_or_default().size(), rp.size()); - r - } - Err(err) => return Some(Err(err)), - }; - self.state = State::Read(r); - self.next() + let r = self.reader.as_mut().expect("reader must be valid"); + match r.next() { + Some(Ok(bs)) => { + self.cur += bs.len() as u64; + Some(Ok(bs)) } - State::Read(r) => match r.next() { - Some(Ok(bs)) => { - self.cur += bs.len() as u64; - Some(Ok(bs)) - } - Some(Err(err)) => { - self.state = State::Idle; - Some(Err(err)) - } - None => { - self.state = State::Idle; - None - } - }, - State::SendStat(_) => { - unreachable!("It's invalid to go into State::SendStat for BlockingRead, please report this bug") + Some(Err(err)) => { + self.reader = None; + Some(Err(err)) } - State::SendRead(_) => { - unreachable!("It's invalid to go into State::SendRead for BlockingRead, please report this bug") + None => { + self.reader = None; + None } } } @@ -638,9 +463,7 @@ mod tests { use async_trait::async_trait; use bytes::Bytes; - use futures::AsyncRead; use futures::AsyncReadExt; - use futures::AsyncSeekExt; use rand::prelude::*; use sha2::Digest; use sha2::Sha256; @@ -706,32 +529,24 @@ mod tests { } impl oio::Read for MockReader { - fn poll_read(&mut self, cx: &mut Context, buf: &mut [u8]) -> Poll> { - Pin::new(&mut self.inner).poll_read(cx, buf).map_err(|err| { - Error::new(ErrorKind::Unexpected, "read data from mock").set_source(err) - }) - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - let (_, _) = (cx, pos); + async fn seek(&mut self, pos: SeekFrom) -> Result { + let _ = pos; - Poll::Ready(Err(Error::new( + Err(Error::new( ErrorKind::Unsupported, "output reader doesn't support seeking", - ))) + )) } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let mut bs = vec![0; 4 * 1024]; - let n = ready!(Pin::new(&mut self.inner) - .poll_read(cx, &mut bs) - .map_err( - |err| Error::new(ErrorKind::Unexpected, "read data from mock").set_source(err) - )?); + async fn read(&mut self, size: usize) -> Result { + let mut bs = vec![0; size]; + let n = self.inner.read(&mut bs).await.map_err(|err| { + Error::new(ErrorKind::Unexpected, "read data from mock").set_source(err) + })?; if n == 0 { - Poll::Ready(None) + Ok(Bytes::new()) } else { - Poll::Ready(Some(Ok(Bytes::from(bs[..n].to_vec())))) + Ok(Bytes::from(bs[..n].to_vec())) } } } @@ -741,11 +556,12 @@ mod tests { let (bs, _) = gen_bytes(); let acc = Arc::new(MockReadService::new(bs.clone())); - let mut r = Box::new(RangeReader::new( + let r = Box::new(RangeReader::new( acc, "x", OpRead::default().with_range(BytesRange::from(..)), )) as oio::Reader; + let mut r = Reader::new(r); let mut buf = Vec::new(); r.read_to_end(&mut buf).await?; @@ -776,11 +592,12 @@ mod tests { let (bs, _) = gen_bytes(); let acc = Arc::new(MockReadService::new(bs.clone())); - let mut r = Box::new(RangeReader::new( + let r = Box::new(RangeReader::new( acc, "x", OpRead::default().with_range(BytesRange::from(4096..4096 + 4096)), )) as oio::Reader; + let mut r = Reader::new(r); let mut buf = Vec::new(); r.read_to_end(&mut buf).await?; @@ -806,8 +623,7 @@ mod tests { let n = r.seek(SeekFrom::Start(1024)).await?; assert_eq!(1024, n, "seek to 1024"); - let mut buf = vec![0; 1024]; - r.read_exact(&mut buf).await?; + let buf = r.read_exact(1024).await?; assert_eq!( format!("{:x}", Sha256::digest(&bs[4096 + 1024..4096 + 2048])), format!("{:x}", Sha256::digest(&buf)), @@ -817,8 +633,7 @@ mod tests { let n = r.seek(SeekFrom::Current(1024)).await?; assert_eq!(3072, n, "seek to 3072"); - let mut buf = vec![0; 1024]; - r.read_exact(&mut buf).await?; + let buf = r.read_exact(1024).await?; assert_eq!( format!("{:x}", Sha256::digest(&bs[4096 + 3072..4096 + 3072 + 1024])), format!("{:x}", Sha256::digest(&buf)), diff --git a/core/src/raw/oio/read/tokio_read.rs b/core/src/raw/oio/read/tokio_read.rs index 37aabc14732b..71d5340e2065 100644 --- a/core/src/raw/oio/read/tokio_read.rs +++ b/core/src/raw/oio/read/tokio_read.rs @@ -16,14 +16,12 @@ // under the License. use std::io::SeekFrom; -use std::pin::Pin; -use std::task::ready; -use std::task::Context; -use std::task::Poll; use bytes::Bytes; use tokio::io::AsyncRead; +use tokio::io::AsyncReadExt; use tokio::io::AsyncSeek; +use tokio::io::AsyncSeekExt; use tokio::io::ReadBuf; use crate::raw::*; @@ -32,8 +30,7 @@ use crate::*; /// FuturesReader implements [`oio::Read`] via [`AsyncRead`] + [`AsyncSeek`]. pub struct TokioReader { inner: R, - - seek_pos: Option, + buf: Vec, } impl TokioReader { @@ -41,7 +38,7 @@ impl TokioReader { pub fn new(inner: R) -> Self { Self { inner, - seek_pos: None, + buf: Vec::with_capacity(64 * 1024), } } } @@ -50,44 +47,38 @@ impl oio::Read for TokioReader where R: AsyncRead + AsyncSeek + Unpin + Send + Sync, { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - let mut buf = ReadBuf::new(buf); - - ready!(Pin::new(&mut self.inner).poll_read(cx, &mut buf)).map_err(|err| { + async fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.seek(pos).await.map_err(|err| { new_std_io_error(err) - .with_operation(oio::ReadOperation::Read) + .with_operation(oio::ReadOperation::Seek) .with_context("source", "TokioReader") - })?; - - Poll::Ready(Ok(buf.filled().len())) + }) } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - if self.seek_pos != Some(pos) { - Pin::new(&mut self.inner).start_seek(pos).map_err(|err| { - new_std_io_error(err) - .with_operation(oio::ReadOperation::Seek) - .with_context("source", "TokioReader") - })?; - self.seek_pos = Some(pos) + async fn read(&mut self, size: usize) -> Result { + // Make sure buf has enough space. + if self.buf.capacity() < size { + self.buf.reserve(size); } + let buf = self.buf.spare_capacity_mut(); + let mut read_buf: ReadBuf = ReadBuf::uninit(buf); - // NOTE: don't return error by `?` here, we need to reset seek_pos. - let pos = ready!(Pin::new(&mut self.inner).poll_complete(cx).map_err(|err| { - new_std_io_error(err) - .with_operation(oio::ReadOperation::Seek) - .with_context("source", "TokioReader") - })); - self.seek_pos = None; - Poll::Ready(pos) - } + // SAFETY: Read at most `size` bytes into `read_buf`. + unsafe { + read_buf.assume_init(size); + } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let _ = cx; + let n = self + .inner + .read(read_buf.initialized_mut()) + .await + .map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Read) + .with_context("source", "TokioReader") + })?; + read_buf.set_filled(n); - Poll::Ready(Some(Err(Error::new( - ErrorKind::Unsupported, - "TokioReader doesn't support poll_next", - )))) + Ok(Bytes::copy_from_slice(read_buf.filled())) } } diff --git a/core/src/raw/oio/stream/api.rs b/core/src/raw/oio/stream/api.rs index fe95bb656ac9..7a672d2ff2a0 100644 --- a/core/src/raw/oio/stream/api.rs +++ b/core/src/raw/oio/stream/api.rs @@ -54,11 +54,11 @@ impl Stream for Box { } } -impl Stream for dyn raw::oio::Read { - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - raw::oio::Read::poll_next(self, cx) - } -} +// impl Stream for T { +// fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { +// raw::oio::Read::poll_next(self, cx) +// } +// } impl futures::Stream for dyn Stream { type Item = Result; diff --git a/core/src/raw/oio/write/append_write.rs b/core/src/raw/oio/write/append_write.rs index caa8fa677575..3e20429f510e 100644 --- a/core/src/raw/oio/write/append_write.rs +++ b/core/src/raw/oio/write/append_write.rs @@ -65,8 +65,8 @@ pub struct AppendWriter { enum State { Idle(Option), - Offset(BoxedFuture<(W, Result)>), - Append(BoxedFuture<(W, Result)>), + Offset(BoxedStaticFuture<(W, Result)>), + Append(BoxedStaticFuture<(W, Result)>), } /// # Safety diff --git a/core/src/raw/oio/write/block_write.rs b/core/src/raw/oio/write/block_write.rs index af4e79a05853..bcdae527f67d 100644 --- a/core/src/raw/oio/write/block_write.rs +++ b/core/src/raw/oio/write/block_write.rs @@ -93,7 +93,7 @@ pub trait BlockWrite: Send + Sync + Unpin + 'static { /// The error part will carries input `(block_id, bytes, err)` so caller can retry them. type WriteBlockResult = std::result::Result; -struct WriteBlockFuture(BoxedFuture); +struct WriteBlockFuture(BoxedStaticFuture); /// # Safety /// @@ -144,8 +144,8 @@ pub struct BlockWriter { enum State { Idle, - Close(BoxedFuture>), - Abort(BoxedFuture>), + Close(BoxedStaticFuture>), + Abort(BoxedStaticFuture>), } /// # Safety diff --git a/core/src/raw/oio/write/multipart_write.rs b/core/src/raw/oio/write/multipart_write.rs index 46d215ed124f..9d61911735e4 100644 --- a/core/src/raw/oio/write/multipart_write.rs +++ b/core/src/raw/oio/write/multipart_write.rs @@ -121,7 +121,7 @@ pub struct MultipartPart { /// The error part will carries input `(part_number, bytes, err)` so caller can retry them. type WritePartResult = std::result::Result; -struct WritePartFuture(BoxedFuture); +struct WritePartFuture(BoxedStaticFuture); /// # Safety /// @@ -177,9 +177,9 @@ pub struct MultipartWriter { enum State { Idle, - Init(BoxedFuture>), - Close(BoxedFuture>), - Abort(BoxedFuture>), + Init(BoxedStaticFuture>), + Close(BoxedStaticFuture>), + Abort(BoxedStaticFuture>), } /// # Safety diff --git a/core/src/raw/oio/write/one_shot_write.rs b/core/src/raw/oio/write/one_shot_write.rs index 4531317678db..915918d01147 100644 --- a/core/src/raw/oio/write/one_shot_write.rs +++ b/core/src/raw/oio/write/one_shot_write.rs @@ -47,7 +47,7 @@ pub struct OneShotWriter { enum State { Idle(Option), - Write(BoxedFuture<(W, Result<()>)>), + Write(BoxedStaticFuture<(W, Result<()>)>), } /// # Safety diff --git a/core/src/raw/oio/write/range_write.rs b/core/src/raw/oio/write/range_write.rs index 496722266b40..431d08b2298b 100644 --- a/core/src/raw/oio/write/range_write.rs +++ b/core/src/raw/oio/write/range_write.rs @@ -99,7 +99,7 @@ pub trait RangeWrite: Send + Sync + Unpin + 'static { /// The error part will carries input `(offset, bytes, err)` so caller can retry them. type WriteRangeResult = std::result::Result<(), (u64, oio::ChunkedBytes, Error)>; -struct WriteRangeFuture(BoxedFuture); +struct WriteRangeFuture(BoxedStaticFuture); /// # Safety /// @@ -153,9 +153,9 @@ pub struct RangeWriter { enum State { Idle, - Init(BoxedFuture>), - Complete(BoxedFuture>), - Abort(BoxedFuture>), + Init(BoxedStaticFuture>), + Complete(BoxedStaticFuture>), + Abort(BoxedStaticFuture>), } /// # Safety diff --git a/core/src/raw/std_io_util.rs b/core/src/raw/std_io_util.rs index a36e1e47f6cc..1efe5d98c095 100644 --- a/core/src/raw/std_io_util.rs +++ b/core/src/raw/std_io_util.rs @@ -16,6 +16,7 @@ // under the License. use crate::*; +use std::io; /// Parse std io error into opendal::Error. /// @@ -46,3 +47,20 @@ pub fn new_std_io_error(err: std::io::Error) -> Error { err } + +/// helper functions to format `Error` into `io::Error`. +/// +/// This function is added privately by design and only valid in current +/// context (i.e. `raw` mod). We don't want to expose this function to +/// users. +#[inline] +pub(crate) fn format_std_io_error(err: Error) -> io::Error { + let kind = match err.kind() { + ErrorKind::NotFound => io::ErrorKind::NotFound, + ErrorKind::PermissionDenied => io::ErrorKind::PermissionDenied, + ErrorKind::InvalidInput => io::ErrorKind::InvalidInput, + _ => io::ErrorKind::Interrupted, + }; + + io::Error::new(kind, err) +} diff --git a/core/src/raw/tests/read.rs b/core/src/raw/tests/read.rs index 6c6a3b3d22fa..49b44b3682e9 100644 --- a/core/src/raw/tests/read.rs +++ b/core/src/raw/tests/read.rs @@ -41,8 +41,6 @@ pub enum ReadAction { /// /// It's valid that seek outside of the file's end. Seek(SeekFrom), - /// Next represents a next action. - Next, } /// ReadChecker is used to check the correctness of the read process. @@ -155,56 +153,21 @@ impl ReadChecker { self.cur = expected as usize; } - /// check_next checks the correctness of the read process after a next action. - fn check_next(&mut self, output: Option) { - if self.cur < self.ranged_data.len() { - let Some(bs) = output else { - panic!("check next failed: there are remaining bytes, but next output is None",); - }; - - assert_eq!( - format!("{:x}", Sha256::digest(&bs)), - format!( - "{:x}", - Sha256::digest(&self.ranged_data[self.cur..self.cur + bs.len()]) - ), - "check next failed: output bs is different with expected bs, current: {}, output length: {}", - self.cur, bs.len(), - ); - - // update the current position - self.cur += bs.len(); - } else { - assert!( - output.is_none(), - "check next failed: there are no remaining bytes, the next output is Some", - ); - } - } - /// Check will check the correctness of the read process via given actions. /// /// Check will panic if any check failed. pub async fn check(&mut self, mut r: Reader, actions: &[ReadAction]) { - use oio::ReadExt; - for action in actions { match action { ReadAction::Read(size) => { - let mut buf = vec![0; *size]; - let n = r.read(&mut buf).await.expect("read must success"); - self.check_read(*size, &buf[..n]); + let bs = r.read(*size).await.expect("read must success"); + self.check_read(*size, &bs); } ReadAction::Seek(pos) => { let res = r.seek(*pos).await; self.check_seek(*pos, res); } - - ReadAction::Next => { - let res = r.next().await.transpose().expect("next must success"); - self.check_next(res); - } } } } @@ -229,11 +192,6 @@ impl ReadChecker { let res = r.seek(*pos); self.check_seek(*pos, res); } - - ReadAction::Next => { - let res = r.next().transpose().expect("next must success"); - self.check_next(res); - } } } } diff --git a/core/src/services/dbfs/reader.rs b/core/src/services/dbfs/reader.rs index e9fe13de3f7e..bac1c4be73a9 100644 --- a/core/src/services/dbfs/reader.rs +++ b/core/src/services/dbfs/reader.rs @@ -15,18 +15,12 @@ // specific language governing permissions and limitations // under the License. -use std::cmp; use std::io::SeekFrom; use std::sync::Arc; -use std::task::ready; -use std::task::Context; -use std::task::Poll; use base64::engine::general_purpose; use base64::Engine; -use bytes::BufMut; use bytes::Bytes; -use futures::future::BoxFuture; use serde::Deserialize; use super::core::DbfsCore; @@ -35,10 +29,11 @@ use crate::*; // The number of bytes to read starting from the offset. This has a limit of 1 MB // Reference: https://docs.databricks.com/api/azure/workspace/dbfs/read -const DBFS_READ_LIMIT: usize = 1024 * 1024; +// const DBFS_READ_LIMIT: usize = 1024 * 1024; +#[allow(dead_code)] pub struct DbfsReader { - state: State, + core: Arc, path: String, offset: u64, has_filled: u64, @@ -47,7 +42,7 @@ pub struct DbfsReader { impl DbfsReader { pub fn new(core: Arc, op: OpRead, path: String) -> Self { DbfsReader { - state: State::Reading(Some(core)), + core, path, offset: op.range().offset().unwrap_or(0), has_filled: 0, @@ -55,10 +50,12 @@ impl DbfsReader { } #[inline] + #[allow(dead_code)] fn set_offset(&mut self, offset: u64) { self.offset = offset; } + #[allow(dead_code)] fn serde_json_decode(&self, bs: &Bytes) -> Result { let response_body = match serde_json::from_slice::(bs) { Ok(v) => v, @@ -83,70 +80,28 @@ impl DbfsReader { } } -enum State { - Reading(Option>), - Finalize(BoxFuture<'static, (Arc, Result)>), -} - /// # Safety /// /// We will only take `&mut Self` reference for DbfsReader. unsafe impl Sync for DbfsReader {} impl oio::Read for DbfsReader { - fn poll_read(&mut self, cx: &mut Context<'_>, mut buf: &mut [u8]) -> Poll> { - while self.has_filled as usize != buf.len() { - match &mut self.state { - State::Reading(core) => { - let core = core.take().expect("DbfsReader must be initialized"); - - let path = self.path.clone(); - let offset = self.offset; - let len = cmp::min(buf.len(), DBFS_READ_LIMIT); - - let fut = async move { - let resp = async { core.dbfs_read(&path, offset, len as u64).await }.await; - let body = match resp { - Ok(resp) => resp.into_body(), - Err(err) => { - return (core, Err(err)); - } - }; - let bs = async { body.bytes().await }.await; - (core, bs) - }; - self.state = State::Finalize(Box::pin(fut)); - } - State::Finalize(fut) => { - let (core, bs) = ready!(fut.as_mut().poll(cx)); - let data = self.serde_json_decode(&bs?)?; - - buf.put_slice(&data[..]); - self.set_offset(self.offset + data.len() as u64); - self.has_filled += data.len() as u64; - self.state = State::Reading(Some(core)); - } - } - } - Poll::Ready(Ok(self.has_filled as usize)) - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - let (_, _) = (cx, pos); + async fn read(&mut self, size: usize) -> Result { + let _ = size; - Poll::Ready(Err(Error::new( + Err(Error::new( ErrorKind::Unsupported, "output reader doesn't support seeking", - ))) + )) } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let _ = cx; + async fn seek(&mut self, pos: SeekFrom) -> Result { + let _ = pos; - Poll::Ready(Some(Err(Error::new( + Err(Error::new( ErrorKind::Unsupported, - "output reader doesn't support iterating", - )))) + "output reader doesn't support seeking", + )) } } diff --git a/core/src/services/ftp/util.rs b/core/src/services/ftp/util.rs index 15d3ced8047e..3e46abe513c7 100644 --- a/core/src/services/ftp/util.rs +++ b/core/src/services/ftp/util.rs @@ -16,16 +16,13 @@ // under the License. use std::io; -use std::pin::Pin; -use std::task::ready; -use std::task::Context; -use std::task::Poll; use bb8::PooledConnection; -use futures::future::BoxFuture; +use bytes::Bytes; use futures::AsyncRead; -use futures::FutureExt; +use futures::AsyncReadExt; use suppaftp::Status; +use tokio::io::ReadBuf; use super::backend::Manager; use crate::raw::*; @@ -35,91 +32,74 @@ use crate::*; /// Wrapper for ftp data stream and command stream. pub struct FtpReader { reader: Box, - state: State, + conn: Option>, + buf: Vec, } unsafe impl Sync for FtpReader {} -pub enum State { - Reading(Option>), - Finalize(BoxFuture<'static, Result<()>>), -} - impl FtpReader { /// Create an instance of FtpReader. pub fn new( - r: Box, - c: PooledConnection<'static, Manager>, + reader: Box, + conn: PooledConnection<'static, Manager>, ) -> Self { Self { - reader: r, - state: State::Reading(Some(c)), + reader, + conn: Some(conn), + buf: Vec::with_capacity(64 * 1024), } } } impl oio::Read for FtpReader { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - let data = Pin::new(&mut self.reader).poll_read(cx, buf); + async fn read(&mut self, size: usize) -> Result { + if self.conn.is_none() { + return Err(Error::new( + ErrorKind::Unexpected, + "ftp reader is already closed", + )); + } - match &mut self.state { - // Reading state, try to poll some data. - State::Reading(stream) => { - match stream { - Some(_) => { - // when hit Err or EOF, consume ftpstream, change state to Finalize and send fut. - if let Poll::Ready(Err(_)) | Poll::Ready(Ok(0)) = data { - let mut ft = stream.take().unwrap(); + // Make sure buf has enough space. + if self.buf.capacity() < size { + self.buf.reserve(size); + } + let buf = self.buf.spare_capacity_mut(); + let mut read_buf: ReadBuf = ReadBuf::uninit(buf); - let fut = async move { - ft.read_response_in(&[ - Status::ClosingDataConnection, - Status::RequestedFileActionOk, - ]) - .await - .map_err(parse_error)?; + // SAFETY: Read at most `size` bytes into `read_buf`. + unsafe { + read_buf.assume_init(size); + } - Ok(()) - }; - self.state = State::Finalize(Box::pin(fut)); - } else { - // Otherwise, exit and return data. - return data.map_err(|err| { - Error::new(ErrorKind::Unexpected, "read data from ftp data stream") - .set_source(err) - }); - } + let data = self.reader.read(read_buf.initialize_unfilled()).await; - self.poll_read(cx, buf) - } - // We could never reach this branch because we will change to Finalize state once we consume ftp stream. - None => unreachable!(), - } + // Data read with success, copy and return it. + if let Ok(n) = data { + if n > 0 { + read_buf.set_filled(n); + return Ok(Bytes::copy_from_slice(&self.buf[..n])); } - - // Finalize state, wait for finalization of stream. - State::Finalize(fut) => match ready!(Pin::new(fut).poll_unpin(cx)) { - Ok(_) => Poll::Ready(Ok(0)), - Err(e) => Poll::Ready(Err(e)), - }, } - } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - let (_, _) = (cx, pos); - - Poll::Ready(Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support seeking", - ))) + // While hitting Error or EOF, we should end this ftp stream. + let _ = self + .conn + .take() + .expect("connection must be valid during read") + .read_response_in(&[Status::ClosingDataConnection, Status::RequestedFileActionOk]) + .await + .map_err(parse_error)?; + Ok(Bytes::new()) } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let _ = cx; + async fn seek(&mut self, pos: io::SeekFrom) -> Result { + let _ = pos; - Poll::Ready(Some(Err(Error::new( + Err(Error::new( ErrorKind::Unsupported, - "output reader doesn't support seeking", - )))) + "ftp reader doesn't support seeking", + )) } } diff --git a/core/src/services/hdfs_native/reader.rs b/core/src/services/hdfs_native/reader.rs index 784d0678df4b..c661d4522998 100644 --- a/core/src/services/hdfs_native/reader.rs +++ b/core/src/services/hdfs_native/reader.rs @@ -16,8 +16,6 @@ // under the License. use std::io::SeekFrom; -use std::task::Context; -use std::task::Poll; use bytes::Bytes; use hdfs_native::file::FileReader; @@ -36,25 +34,18 @@ impl HdfsNativeReader { } impl Read for HdfsNativeReader { - fn poll_read(&mut self, _cx: &mut Context<'_>, _buf: &mut [u8]) -> Poll> { + async fn read(&mut self, size: usize) -> Result { + let _ = size; + todo!() } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - let (_, _) = (cx, pos); + async fn seek(&mut self, pos: SeekFrom) -> Result { + let _ = pos; - Poll::Ready(Err(Error::new( + Err(Error::new( ErrorKind::Unsupported, "HdfsNativeReader doesn't support seeking", - ))) - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let _ = cx; - - Poll::Ready(Some(Err(Error::new( - ErrorKind::Unsupported, - "HdfsNativeReader doesn't support iterating", - )))) + )) } } diff --git a/core/src/types/list.rs b/core/src/types/list.rs index 295726556a32..fd0f2485ed9b 100644 --- a/core/src/types/list.rs +++ b/core/src/types/list.rs @@ -97,7 +97,7 @@ enum StatTask { /// Stating is used to store the join handle of spawned task. /// /// TODO: Replace with static future type after rust supported. - Stating(BoxedFuture<(String, Result)>), + Stating(BoxedStaticFuture<(String, Result)>), /// Known is used to store the entry that already contains the required metakey. Known(Option<(String, Metadata)>), } diff --git a/core/src/types/operator/operator.rs b/core/src/types/operator/operator.rs index d12096606bca..8f332bfc2e49 100644 --- a/core/src/types/operator/operator.rs +++ b/core/src/types/operator/operator.rs @@ -27,7 +27,6 @@ use futures::TryStreamExt; use super::BlockingOperator; use crate::operator_futures::*; -use crate::raw::oio::ReadExt; use crate::raw::oio::WriteExt; use crate::raw::*; use crate::*; @@ -540,9 +539,10 @@ impl Operator { (range.size().unwrap(), range) }; - let (_, mut s) = inner.read(&path, args.with_range(range)).await?; + let (_, r) = inner.read(&path, args.with_range(range)).await?; + let mut r = Reader::new(r); let mut buf = Vec::with_capacity(size_hint as usize); - s.read_to_end(&mut buf).await?; + r.read_to_end(&mut buf).await?; Ok(buf) }, diff --git a/core/src/types/reader.rs b/core/src/types/reader.rs index 5c6d97b30dc5..20236bed47e9 100644 --- a/core/src/types/reader.rs +++ b/core/src/types/reader.rs @@ -16,15 +16,15 @@ // under the License. use std::io; +use std::io::SeekFrom; use std::pin::Pin; use std::task::ready; use std::task::Context; use std::task::Poll; -use bytes::Bytes; -use futures::AsyncRead; -use futures::AsyncSeek; +use bytes::{BufMut, Bytes, BytesMut}; use futures::Stream; +use tokio::io::ReadBuf; use crate::raw::*; use crate::*; @@ -54,11 +54,17 @@ use crate::*; /// Besides, `Stream` **COULD** reduce an extra copy if underlying reader is /// stream based (like services s3, azure which based on HTTP). pub struct Reader { - inner: oio::Reader, - seek_state: SeekState, + state: State, } impl Reader { + /// Create a new reader from an `oio::Reader`. + pub(crate) fn new(r: oio::Reader) -> Self { + Reader { + state: State::Idle(Some(r)), + } + } + /// Create a new reader. /// /// Create will use internal information to decide the most suitable @@ -70,43 +76,217 @@ impl Reader { let (_, r) = acc.read(path, op).await?; Ok(Reader { - inner: r, - seek_state: SeekState::Init, + state: State::Idle(Some(r)), }) } -} -impl oio::Read for Reader { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - self.inner.poll_read(cx, buf) + /// Convert [`Reader`] into an [`futures::AsyncRead`] and [`futures::AsyncSeek`] + /// + /// `Reader` itself implements [`futures::AsyncRead`], this function is used to + /// make sure that `Reader` is used as an `AsyncRead` only. + /// + /// The returning type also implements `Send`, `Sync` and `Unpin`, so users can use it + /// as `Box` and calling `poll_read_unpin` on it. + #[inline] + #[cfg(not(target_arch = "wasm32"))] + pub fn into_futures_read( + self, + ) -> impl futures::AsyncRead + futures::AsyncSeek + Send + Sync + Unpin { + self + } + + /// Convert [`Reader`] into an [`tokio::io::AsyncRead`] and [`tokio::io::AsyncSeek`] + /// + /// `Reader` itself implements [`tokio::io::AsyncRead`], this function is used to + /// make sure that `Reader` is used as an [`tokio::io::AsyncRead`] only. + /// + /// The returning type also implements `Send`, `Sync` and `Unpin`, so users can use it + /// as `Box` and calling `poll_read_unpin` on it. + #[inline] + #[cfg(not(target_arch = "wasm32"))] + pub fn into_tokio_read( + self, + ) -> impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Send + Sync + Unpin { + self + } + + /// Seek to the position of `pos` of reader. + #[inline] + pub async fn seek(&mut self, pos: SeekFrom) -> Result { + let State::Idle(Some(r)) = &mut self.state else { + return Err(Error::new(ErrorKind::Unexpected, "reader must be valid")); + }; + r.seek_dyn(pos).await } - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: io::SeekFrom) -> Poll> { - self.inner.poll_seek(cx, pos) + /// Read at most `size` bytes of data from reader. + #[inline] + pub async fn read(&mut self, size: usize) -> Result { + let State::Idle(Some(r)) = &mut self.state else { + return Err(Error::new(ErrorKind::Unexpected, "reader must be valid")); + }; + r.read_dyn(size).await } - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - self.inner.poll_next(cx) + /// Read exact `size` bytes of data from reader. + pub async fn read_exact(&mut self, size: usize) -> Result { + let State::Idle(Some(r)) = &mut self.state else { + return Err(Error::new(ErrorKind::Unexpected, "reader must be valid")); + }; + + // Lucky path. + let bs1 = r.read_dyn(size).await?; + debug_assert!( + bs1.len() <= size, + "read should not return more bytes than expected" + ); + if bs1.len() == size { + return Ok(bs1); + } + if bs1.is_empty() { + return Err( + Error::new(ErrorKind::ContentIncomplete, "reader got too little data") + .with_context("expect", size.to_string()), + ); + } + + let mut bs = BytesMut::with_capacity(size); + bs.put_slice(&bs1); + + let mut remaining = size - bs.len(); + + loop { + let tmp = r.read_dyn(remaining).await?; + if tmp.is_empty() { + return Err( + Error::new(ErrorKind::ContentIncomplete, "reader got too little data") + .with_context("expect", size.to_string()) + .with_context("actual", bs.len().to_string()), + ); + } + bs.put_slice(&tmp); + debug_assert!( + tmp.len() <= remaining, + "read should not return more bytes than expected" + ); + + remaining -= tmp.len(); + if remaining == 0 { + break; + } + } + + Ok(bs.freeze()) } + + /// Reads all bytes until EOF in this source, placing them into buf. + pub async fn read_to_end(&mut self, buf: &mut Vec) -> Result { + let start_len = buf.len(); + + loop { + if buf.len() == buf.capacity() { + buf.reserve(32); // buf is full, need more space + } + + let spare = buf.spare_capacity_mut(); + let mut read_buf: ReadBuf = ReadBuf::uninit(spare); + + // SAFETY: These bytes were initialized but not filled in the previous loop + unsafe { + read_buf.assume_init(read_buf.capacity()); + } + + match self.read(read_buf.initialize_unfilled().len()).await { + Ok(bs) if bs.is_empty() => { + return Ok(buf.len() - start_len); + } + Ok(bs) => { + read_buf.initialize_unfilled()[..bs.len()].copy_from_slice(&bs); + // SAFETY: Read API makes sure that returning `n` is correct. + unsafe { + buf.set_len(buf.len() + bs.len()); + } + } + Err(e) => return Err(e), + } + } + } +} + +enum State { + Idle(Option), + Reading(BoxedStaticFuture<(oio::Reader, Result)>), + Seeking(BoxedStaticFuture<(oio::Reader, Result)>), } -impl AsyncRead for Reader { +/// # Safety +/// +/// Reader will only be used with `&mut self`. +unsafe impl Sync for State {} + +impl futures::AsyncRead for Reader { fn poll_read( mut self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut [u8], ) -> Poll> { - Pin::new(&mut self.inner).poll_read(cx, buf) + use oio::Read; + + match &mut self.state { + State::Idle(r) => { + let mut r = r.take().expect("reader must be valid"); + let size = buf.len(); + let fut = async move { + let res = r.read(size).await; + (r, res) + }; + self.state = State::Reading(Box::pin(fut)); + self.poll_read(cx, buf) + } + State::Reading(fut) => { + let (r, res) = ready!(fut.as_mut().poll(cx)); + self.state = State::Idle(Some(r)); + let bs = res.map_err(format_std_io_error)?; + let n = bs.len(); + buf[..n].copy_from_slice(&bs); + Poll::Ready(Ok(n)) + } + State::Seeking(_) => Poll::Ready(Err(io::Error::new( + io::ErrorKind::Interrupted, + "another io operation is in progress", + ))), + } } } -impl AsyncSeek for Reader { +impl futures::AsyncSeek for Reader { fn poll_seek( mut self: Pin<&mut Self>, cx: &mut Context<'_>, pos: io::SeekFrom, ) -> Poll> { - Pin::new(&mut self.inner).poll_seek(cx, pos) + use oio::Read; + + match &mut self.state { + State::Idle(r) => { + let mut r = r.take().expect("reader must be valid"); + let fut = async move { + let res = r.seek(pos).await; + (r, res) + }; + self.state = State::Seeking(Box::pin(fut)); + self.poll_seek(cx, pos) + } + State::Seeking(fut) => { + let (r, res) = ready!(fut.as_mut().poll(cx)); + self.state = State::Idle(Some(r)); + Poll::Ready(res.map_err(format_std_io_error)) + } + State::Reading(_) => Poll::Ready(Err(io::Error::new( + io::ErrorKind::Interrupted, + "another io operation is in progress", + ))), + } } } @@ -116,62 +296,117 @@ impl tokio::io::AsyncRead for Reader { cx: &mut Context<'_>, buf: &mut tokio::io::ReadBuf<'_>, ) -> Poll> { - // Safety: We make sure that we will set filled correctly. - unsafe { buf.assume_init(buf.remaining()) } - let n = ready!(self.inner.poll_read(cx, buf.initialize_unfilled()))?; - buf.advance(n); - Poll::Ready(Ok(())) + use oio::Read; + + loop { + match &mut self.state { + State::Idle(r) => { + // Safety: We make sure that we will set filled correctly. + unsafe { buf.assume_init(buf.remaining()) } + let size = buf.initialize_unfilled().len(); + + let mut r = r.take().expect("reader must be valid"); + let fut = async move { + let res = r.read(size).await; + (r, res) + }; + self.state = State::Reading(Box::pin(fut)); + } + State::Reading(fut) => { + let (r, res) = ready!(fut.as_mut().poll(cx)); + self.state = State::Idle(Some(r)); + let bs = res.map_err(format_std_io_error)?; + let n = bs.len(); + buf.initialize_unfilled()[..n].copy_from_slice(&bs); + buf.advance(n); + return Poll::Ready(Ok(())); + } + State::Seeking(_) => { + return Poll::Ready(Err(io::Error::new( + io::ErrorKind::Interrupted, + "another io operation is in progress", + ))) + } + } + } } } impl tokio::io::AsyncSeek for Reader { - fn start_seek(self: Pin<&mut Self>, pos: io::SeekFrom) -> io::Result<()> { - let this = self.get_mut(); - if let SeekState::Start(_) = this.seek_state { - return Err(io::Error::new( - io::ErrorKind::Other, - "another search is in progress.", - )); + fn start_seek(mut self: Pin<&mut Self>, pos: io::SeekFrom) -> io::Result<()> { + use oio::Read; + + match &mut self.state { + State::Idle(r) => { + let mut r = r.take().expect("reader must be valid"); + let fut = async move { + let res = r.seek(pos).await; + (r, res) + }; + self.state = State::Seeking(Box::pin(fut)); + Ok(()) + } + State::Seeking(_) | State::Reading(_) => Err(io::Error::new( + io::ErrorKind::Interrupted, + "another io operation is in progress", + )), } - this.seek_state = SeekState::Start(pos); - Ok(()) } fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let state = self.seek_state; - match state { - SeekState::Init => { + match &mut self.state { + State::Idle(_) => { // AsyncSeek recommends calling poll_complete before start_seek. // We don't have to guarantee that the value returned by // poll_complete called without start_seek is correct, // so we'll return 0. Poll::Ready(Ok(0)) } - SeekState::Start(pos) => { - let n = ready!(self.inner.poll_seek(cx, pos))?; - self.get_mut().seek_state = SeekState::Init; - Poll::Ready(Ok(n)) + State::Seeking(fut) => { + let (r, res) = ready!(fut.as_mut().poll(cx)); + self.state = State::Idle(Some(r)); + Poll::Ready(res.map_err(format_std_io_error)) } + State::Reading(_) => Poll::Ready(Err(io::Error::new( + io::ErrorKind::Interrupted, + "another io operation is in progress", + ))), } } } -#[derive(Debug, Clone, Copy)] -/// SeekState is used to track the tokio seek state of Reader. -enum SeekState { - /// start_seek has not been called. - Init, - /// start_seek has been called, but poll_complete has not yet been called. - Start(io::SeekFrom), -} - impl Stream for Reader { type Item = io::Result; fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - Pin::new(&mut self.inner) - .poll_next(cx) - .map_err(|err| io::Error::new(io::ErrorKind::Interrupted, err)) + use oio::Read; + + match &mut self.state { + State::Idle(r) => { + let mut r = r.take().expect("reader must be valid"); + let fut = async move { + // TODO: should allow user to tune this value. + let res = r.read(4 * 1024 * 1024).await; + (r, res) + }; + self.state = State::Reading(Box::pin(fut)); + self.poll_next(cx) + } + State::Reading(fut) => { + let (r, res) = ready!(fut.as_mut().poll(cx)); + self.state = State::Idle(Some(r)); + let bs = res.map_err(format_std_io_error)?; + if bs.is_empty() { + Poll::Ready(None) + } else { + Poll::Ready(Some(Ok(bs))) + } + } + State::Seeking(_) => Poll::Ready(Some(Err(io::Error::new( + io::ErrorKind::Interrupted, + "another io operation is in progress", + )))), + } } } @@ -194,6 +429,12 @@ impl BlockingReader { Ok(BlockingReader { inner: r }) } + + /// Create a new reader from an `oio::BlockingReader`. + #[cfg(test)] + pub(crate) fn new(r: oio::BlockingReader) -> Self { + BlockingReader { inner: r } + } } impl oio::BlockingRead for BlockingReader { @@ -216,14 +457,14 @@ impl oio::BlockingRead for BlockingReader { impl io::Read for BlockingReader { #[inline] fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.inner.read(buf) + self.inner.read(buf).map_err(format_std_io_error) } } impl io::Seek for BlockingReader { #[inline] fn seek(&mut self, pos: io::SeekFrom) -> io::Result { - self.inner.seek(pos) + self.inner.seek(pos).map_err(format_std_io_error) } } @@ -243,8 +484,6 @@ mod tests { use rand::rngs::ThreadRng; use rand::Rng; use rand::RngCore; - use tokio::io::AsyncReadExt; - use tokio::io::AsyncSeekExt; use crate::services; use crate::Operator; diff --git a/core/tests/behavior/async_fuzz.rs b/core/tests/behavior/async_fuzz.rs index abe335a7ddb2..6113f0ee72d0 100644 --- a/core/tests/behavior/async_fuzz.rs +++ b/core/tests/behavior/async_fuzz.rs @@ -118,7 +118,7 @@ pub async fn test_fuzz_issue_2717(op: Operator) -> Result<()> { pub async fn test_fuzz_pr_3395_case_1(op: Operator) -> Result<()> { let actions = [ ReadAction::Seek(SeekFrom::Current(1)), - ReadAction::Next, + ReadAction::Read(1024), ReadAction::Seek(SeekFrom::End(-1)), ]; test_fuzz_read(op, 1, 0.., &actions).await @@ -138,9 +138,9 @@ pub async fn test_fuzz_pr_3395_case_1(op: Operator) -> Result<()> { /// ``` pub async fn test_fuzz_pr_3395_case_2(op: Operator) -> Result<()> { let actions = [ - ReadAction::Next, + ReadAction::Read(1024), ReadAction::Seek(SeekFrom::Current(1)), - ReadAction::Next, + ReadAction::Read(1024), ReadAction::Seek(SeekFrom::End(0)), ]; test_fuzz_read(op, 1, 0.., &actions).await diff --git a/core/tests/behavior/async_read.rs b/core/tests/behavior/async_read.rs index b6fa51531865..adbbeafa5897 100644 --- a/core/tests/behavior/async_read.rs +++ b/core/tests/behavior/async_read.rs @@ -18,8 +18,6 @@ use std::str::FromStr; use std::time::Duration; -use futures::AsyncReadExt; -use futures::AsyncSeekExt; use http::StatusCode; use log::warn; use reqwest::Url; @@ -613,7 +611,7 @@ pub async fn test_read_with_invalid_seek(op: Operator) -> anyhow::Result<()> { assert_eq!( res.unwrap_err().kind(), - std::io::ErrorKind::InvalidInput, + ErrorKind::InvalidInput, "seeking a negative position should return a InvalidInput error" ); diff --git a/integrations/dav-server/Cargo.toml b/integrations/dav-server/Cargo.toml index d1d82c990342..1fa278106106 100644 --- a/integrations/dav-server/Cargo.toml +++ b/integrations/dav-server/Cargo.toml @@ -25,7 +25,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" [dependencies] anyhow = "1" diff --git a/integrations/object_store/Cargo.toml b/integrations/object_store/Cargo.toml index 740f274c0fa3..1eb27b7b8e5f 100644 --- a/integrations/object_store/Cargo.toml +++ b/integrations/object_store/Cargo.toml @@ -24,7 +24,7 @@ edition = "2021" homepage = "https://opendal.apache.org/" license = "Apache-2.0" repository = "https://github.com/apache/opendal" -rust-version = "1.67" +rust-version = "1.75" version = "0.43.0" [dependencies] @@ -38,4 +38,4 @@ tokio = { version = "1", default-features = false } [dev-dependencies] tokio = { version = "1", features = ["fs", "macros", "rt-multi-thread"] } -opendal = { version = "0.45.1", path = "../../core", features = ["services-memory"] } \ No newline at end of file +opendal = { version = "0.45.1", path = "../../core", features = ["services-memory"] } diff --git a/integrations/object_store/src/lib.rs b/integrations/object_store/src/lib.rs index df257386b510..11f4e4f4f33e 100644 --- a/integrations/object_store/src/lib.rs +++ b/integrations/object_store/src/lib.rs @@ -347,10 +347,9 @@ struct OpendalReader { impl Stream for OpendalReader { type Item = Result; - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - use opendal::raw::oio::Read; - - self.inner + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let inner = Pin::new(&mut self.get_mut().inner); + inner .poll_next(cx) .map_err(|err| object_store::Error::Generic { store: "IoError",