From 5f1e0075e13f6240e92e68355557aea4534f77a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Uzarski?= Date: Wed, 13 Nov 2024 12:41:18 +0100 Subject: [PATCH] spec_exec: fix can_be_ignored function Previous implementation did not make much sense. We would prematurely return from speculative execution for some errors which could be ignored. I adjusted the function, so we use `#[deny(clippy::wildcard_enum_match_arm)]` lint. It will force the developer to think before adjusting this function if new QueryError variant is introduced. I categorized the errors (see the comments in the code) and justified why each of them needs to be ignored or not. --- scylla/src/transport/speculative_execution.rs | 80 +++++++++++++++++-- 1 file changed, 75 insertions(+), 5 deletions(-) diff --git a/scylla/src/transport/speculative_execution.rs b/scylla/src/transport/speculative_execution.rs index 10b60c4b75..d66a9bb6f7 100644 --- a/scylla/src/transport/speculative_execution.rs +++ b/scylla/src/transport/speculative_execution.rs @@ -2,6 +2,7 @@ use futures::{ future::FutureExt, stream::{FuturesUnordered, StreamExt}, }; +use scylla_cql::frame::response::error::DbError; use std::{future::Future, sync::Arc, time::Duration}; use tracing::{trace_span, warn, Instrument}; @@ -81,14 +82,83 @@ impl SpeculativeExecutionPolicy for PercentileSpeculativeExecutionPolicy { } } -// checks if a result created in a speculative execution branch can be ignored +/// Checks if a result created in a speculative execution branch can be ignored. +/// +/// We should ignore errors such that their presence when executing the request +/// on one node, does not imply that the same error will appear during retry on some other node. fn can_be_ignored(result: &Result) -> bool { match result { Ok(_) => false, - Err(QueryError::BrokenConnection(_)) => true, - Err(QueryError::ConnectionPoolError(_)) => true, - Err(QueryError::TimeoutError) => true, - _ => false, + // Do not remove this lint! + // It's there for a reason - we don't want new variants + // automatically fall under `_` pattern when they are introduced. + #[deny(clippy::wildcard_enum_match_arm)] + Err(e) => match e { + // Errors that will almost certainly appear for other nodes as well + QueryError::BadQuery(_) + | QueryError::CqlRequestSerialization(_) + | QueryError::BodyExtensionsParseError(_) + | QueryError::CqlResultParseError(_) + | QueryError::CqlErrorParseError(_) + | QueryError::ProtocolError(_) => false, + + // EmptyPlan is not returned by `Session::execute_query`. + // It is represented by None, which is then transformed + // to QueryError::EmptyPlan by the caller + // (either here is speculative_execution module, or for non-speculative execution). + // I believe this should not be ignored, since we do not expect it here. + QueryError::EmptyPlan => false, + + // Errors that should not appear here, thus should not be ignored + QueryError::TimeoutError + | QueryError::RequestTimeout(_) + | QueryError::MetadataError(_) => false, + + // Errors that can be ignored + QueryError::BrokenConnection(_) + | QueryError::UnableToAllocStreamId + | QueryError::ConnectionPoolError(_) => true, + + // Handle DbErrors + QueryError::DbError(db_error, _) => { + // Do not remove this lint! + // It's there for a reason - we don't want new variants + // automatically fall under `_` pattern when they are introduced. + #[deny(clippy::wildcard_enum_match_arm)] + match db_error { + // Errors that will almost certainly appear on other nodes as well + DbError::SyntaxError + | DbError::Invalid + | DbError::AlreadyExists { .. } + | DbError::Unauthorized + | DbError::ProtocolError => false, + + // Errors that should not appear there - thus, should not be ignored. + DbError::AuthenticationError | DbError::Other(_) => false, + + // For now, let's assume that UDF failure is not transient - don't ignore it + // TODO: investigate + DbError::FunctionFailure { .. } => false, + + // Not sure when these can appear - don't ignore them + // TODO: Investigate these errors + DbError::ConfigError | DbError::TruncateError => false, + + // Errors that we can ignore and perform a retry on some other node + DbError::Unavailable { .. } + | DbError::Overloaded + | DbError::IsBootstrapping + | DbError::ReadTimeout { .. } + | DbError::WriteTimeout { .. } + | DbError::ReadFailure { .. } + | DbError::WriteFailure { .. } + // Preparation may succeed on some other node. + | DbError::Unprepared { .. } + | DbError::ServerError + | DbError::RateLimitReached { .. } => true, + } + } + }, } }