From 305696073ccd5fd5e7f5efaacc090479076a8f53 Mon Sep 17 00:00:00 2001 From: hoslo Date: Sun, 4 Feb 2024 10:12:49 +0800 Subject: [PATCH] fix(services/fs,hdfs): fix poll_close when retry --- .github/workflows/test_edge.yml | 25 +++++++++ core/Cargo.lock | 10 ++++ .../Cargo.toml | 30 +++++++++++ .../README.md | 14 +++++ .../src/main.rs | 48 +++++++++++++++++ core/src/services/fs/writer.rs | 51 ++++++++++++++----- core/src/services/hdfs/writer.rs | 22 +++++--- 7 files changed, 180 insertions(+), 20 deletions(-) create mode 100644 core/edge/file_close_with_retry_on_full_disk/Cargo.toml create mode 100644 core/edge/file_close_with_retry_on_full_disk/README.md create mode 100644 core/edge/file_close_with_retry_on_full_disk/src/main.rs diff --git a/.github/workflows/test_edge.yml b/.github/workflows/test_edge.yml index a09f1de8b6dd..c8888ee1e1b8 100644 --- a/.github/workflows/test_edge.yml +++ b/.github/workflows/test_edge.yml @@ -33,6 +33,31 @@ on: - ".github/workflows/edge_test.yml" jobs: + test_file_close_with_retry_on_full_disk: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Create disk image + run: | + fallocate -l 512K disk.img + mkfs disk.img + + - name: Mount disk image + run: | + mkdir /tmp/test_dir + sudo mount -o loop disk.img /tmp/test_dir + + - name: Set permissions + run: sudo chmod a+wr /tmp/test_dir + + - name: Test + working-directory: core/edge/file_close_with_retry_on_full_disk + run: cargo run + env: + OPENDAL_FS_ROOT: /tmp/test_dir test_file_write_on_full_disk: runs-on: ubuntu-latest diff --git a/core/Cargo.lock b/core/Cargo.lock index fbaaf3f2a8a9..e087911ad03e 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -2003,6 +2003,16 @@ dependencies = [ "signature 1.6.4", ] +[[package]] +name = "edge_file_close_with_retry_on_full_disk" +version = "0.0.0" +dependencies = [ + "futures", + "opendal", + "rand 0.8.5", + "tokio", +] + [[package]] name = "edge_test_aws_s3_assume_role_with_web_identity" version = "0.0.0" diff --git a/core/edge/file_close_with_retry_on_full_disk/Cargo.toml b/core/edge/file_close_with_retry_on_full_disk/Cargo.toml new file mode 100644 index 000000000000..bdc2a648b85c --- /dev/null +++ b/core/edge/file_close_with_retry_on_full_disk/Cargo.toml @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +edition = "2021" +license = "Apache-2.0" +name = "edge_file_close_with_retry_on_full_disk" +publish = false +rust-version = "1.67" +version = "0.0.0" + +[dependencies] +futures = "0.3" +opendal = { path = "../../" } +rand = "0.8" +tokio = { version = "1", features = ["full"] } diff --git a/core/edge/file_close_with_retry_on_full_disk/README.md b/core/edge/file_close_with_retry_on_full_disk/README.md new file mode 100644 index 000000000000..51cc9641afd2 --- /dev/null +++ b/core/edge/file_close_with_retry_on_full_disk/README.md @@ -0,0 +1,14 @@ +# File Close with Retry on Full Disk + +Reported by [fs::Writer::poll_close can't be retried multiple times when error occurs](https://github.com/apache/opendal/issues/4058). + +Setup: + +```shell +fallocate -l 512K disk.img +mkfs disk.img +mkdir ./td +sudo mount -o loop td.img ./td +chmod a+wr ./td +``` + diff --git a/core/edge/file_close_with_retry_on_full_disk/src/main.rs b/core/edge/file_close_with_retry_on_full_disk/src/main.rs new file mode 100644 index 000000000000..1748dc21985e --- /dev/null +++ b/core/edge/file_close_with_retry_on_full_disk/src/main.rs @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::env; + +use opendal::layers::RetryLayer; +use opendal::services::Fs; +use opendal::Operator; +use opendal::Result; +use rand::prelude::*; + +#[tokio::main] +async fn main() -> Result<()> { + let mut builder = Fs::default(); + builder.root(&env::var("OPENDAL_FS_ROOT").expect("root must be set for this test")); + let op = Operator::new(builder)? + .layer(RetryLayer::new().with_max_times(3)) + .finish(); + + let size = thread_rng().gen_range(512 * 1024 + 1..4 * 1024 * 1024); + let mut bs = vec![0; size]; + thread_rng().fill_bytes(&mut bs); + + let mut w = op.writer("/test").await?; + w.write(bs).await?; + let result = w.close().await; + // Write file with size > 512KB should fail + assert!( + result.is_err(), + "write file on full disk should return error" + ); + + Ok(()) +} diff --git a/core/src/services/fs/writer.rs b/core/src/services/fs/writer.rs index 12e5a4fffe77..a8ebb4fd3c39 100644 --- a/core/src/services/fs/writer.rs +++ b/core/src/services/fs/writer.rs @@ -35,7 +35,7 @@ pub struct FsWriter { tmp_path: Option, f: Option, - fut: Option>>, + fut: Option)>>, } impl FsWriter { @@ -69,23 +69,35 @@ impl oio::Write for FsWriter { if let Some(fut) = self.fut.as_mut() { let res = ready!(fut.poll_unpin(cx)); self.fut = None; - return Poll::Ready(res); + if let Err(e) = res.1 { + self.f = Some(res.0); + return Poll::Ready(Err(e)); + } + return Poll::Ready(Ok(())); } let mut f = self.f.take().expect("FsWriter must be initialized"); let tmp_path = self.tmp_path.clone(); let target_path = self.target_path.clone(); self.fut = Some(Box::pin(async move { - f.flush().await.map_err(new_std_io_error)?; - f.sync_all().await.map_err(new_std_io_error)?; + if let Err(e) = f.flush().await.map_err(new_std_io_error) { + // Reserve the original error for retry. + return (f, Err(e)); + } + if let Err(e) = f.sync_all().await.map_err(new_std_io_error) { + return (f, Err(e)); + } if let Some(tmp_path) = &tmp_path { - tokio::fs::rename(tmp_path, &target_path) + if let Err(e) = tokio::fs::rename(tmp_path, &target_path) .await - .map_err(new_std_io_error)?; + .map_err(new_std_io_error) + { + return (f, Err(e)); + } } - Ok(()) + (f, Ok(())) })); } } @@ -95,21 +107,32 @@ impl oio::Write for FsWriter { if let Some(fut) = self.fut.as_mut() { let res = ready!(fut.poll_unpin(cx)); self.fut = None; - return Poll::Ready(res); + if let Err(e) = res.1 { + self.f = Some(res.0); + return Poll::Ready(Err(e)); + } + return Poll::Ready(Ok(())); } - let _ = self.f.take().expect("FsWriter must be initialized"); + let f = self.f.take().expect("FsWriter must be initialized"); let tmp_path = self.tmp_path.clone(); self.fut = Some(Box::pin(async move { if let Some(tmp_path) = &tmp_path { - tokio::fs::remove_file(tmp_path) + if let Err(e) = tokio::fs::remove_file(tmp_path) .await .map_err(new_std_io_error) + { + return (f, Err(e)); + } + (f, Ok(())) } else { - Err(Error::new( - ErrorKind::Unsupported, - "Fs doesn't support abort if atomic_write_dir is not set", - )) + ( + f, + Err(Error::new( + ErrorKind::Unsupported, + "Fs doesn't support abort if atomic_write_dir is not set", + )), + ) } })); } diff --git a/core/src/services/hdfs/writer.rs b/core/src/services/hdfs/writer.rs index 6c77097d842f..535e97c38b5c 100644 --- a/core/src/services/hdfs/writer.rs +++ b/core/src/services/hdfs/writer.rs @@ -36,7 +36,7 @@ pub struct HdfsWriter { tmp_path: Option, f: Option, client: Arc, - fut: Option>>, + fut: Option)>>, } /// # Safety @@ -76,7 +76,11 @@ impl oio::Write for HdfsWriter { if let Some(fut) = self.fut.as_mut() { let res = ready!(fut.poll_unpin(cx)); self.fut = None; - return Poll::Ready(res); + if let Err(e) = res.1 { + self.f = Some(res.0); + return Poll::Ready(Err(e)); + } + return Poll::Ready(Ok(())); } let mut f = self.f.take().expect("HdfsWriter must be initialized"); @@ -86,15 +90,21 @@ impl oio::Write for HdfsWriter { let client = self.client.clone(); self.fut = Some(Box::pin(async move { - f.close().await.map_err(new_std_io_error)?; + if let Err(e) = f.close().await.map_err(new_std_io_error) { + // Reserve the original error for retry. + return (f, Err(e)); + } if let Some(tmp_path) = tmp_path { - client + if let Err(e) = client .rename_file(&tmp_path, &target_path) - .map_err(new_std_io_error)?; + .map_err(new_std_io_error) + { + return (f, Err(e)); + } } - Ok(()) + (f, Ok(())) })); } }