From 0ecf5b43d8046a119cf236c972b55208df3c6520 Mon Sep 17 00:00:00 2001 From: "Nathan (Blaise) Bruer" Date: Fri, 30 Aug 2024 23:30:03 -0500 Subject: [PATCH] S3 store will now retry more aggresively (#1302) S3Store was not retrying under some conditions if the s3 side of the stream closed. This will make S3Store a bit more stable. --- nativelink-store/src/s3_store.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/nativelink-store/src/s3_store.rs b/nativelink-store/src/s3_store.rs index cec5dda37..4bce1227b 100644 --- a/nativelink-store/src/s3_store.rs +++ b/nativelink-store/src/s3_store.rs @@ -40,7 +40,11 @@ use hyper::client::connect::{Connected, Connection, HttpConnector}; use hyper::service::Service; use hyper::Uri; use hyper_rustls::{HttpsConnector, MaybeHttpsStream}; -use nativelink_error::{make_err, make_input_err, Code, Error, ResultExt}; +// Note: S3 store should be very careful about the error codes it returns +// when in a retryable wrapper. Always prefer Code::Aborted or another +// retryable code over Code::InvalidArgument or make_input_err!(). +// ie: Don't import make_input_err!() to help prevent this. +use nativelink_error::{make_err, Code, Error, ResultExt}; use nativelink_metric::MetricsComponent; use nativelink_util::buf_channel::{ make_buf_channel_pair, DropCloserReadHalf, DropCloserWriteHalf, @@ -732,7 +736,8 @@ where } if let Err(e) = writer.send(bytes).await { return Some(( - RetryResult::Err(make_input_err!( + RetryResult::Err(make_err!( + Code::Aborted, "Error sending bytes to consumer in S3: {e}" )), writer, @@ -741,7 +746,8 @@ where } Err(e) => { return Some(( - RetryResult::Retry(make_input_err!( + RetryResult::Retry(make_err!( + Code::Aborted, "Bad bytestream element in S3: {e}" )), writer, @@ -751,7 +757,8 @@ where } if let Err(e) = writer.send_eof() { return Some(( - RetryResult::Err(make_input_err!( + RetryResult::Err(make_err!( + Code::Aborted, "Failed to send EOF to consumer in S3: {e}" )), writer,