From 72a82cf47752069a6d1d2fbfa17e0c2bef32b5d0 Mon Sep 17 00:00:00 2001 From: Alexey Bashtanov Date: Tue, 3 Dec 2024 11:04:39 +0000 Subject: [PATCH 1/4] features/snapshot: figure out state for features missing in snapshot Depending on the agreed version in the snapshot, a feature is known to be either not yet available, or retired. Use this logic when applying a snapshot. --- src/v/features/feature_table_snapshot.cc | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/v/features/feature_table_snapshot.cc b/src/v/features/feature_table_snapshot.cc index 7113d5f4755d2..564aaeba46436 100644 --- a/src/v/features/feature_table_snapshot.cc +++ b/src/v/features/feature_table_snapshot.cc @@ -54,13 +54,25 @@ void feature_table_snapshot::apply(feature_table& ft) const { }); if (snap_state_iter == states.end()) { // The feature table refers to a feature name that the snapshot - // doesn't mention: this is normal on upgrade. The feature will - // remain in its default-initialized state. + // doesn't mention: this is normal on upgrade. + if (spec.require_version <= version) { + // The feature was introduced no later than the agreed version, + // which is no later than the version of the broker that took + // the snapshot. So it only can be missing because it has been + // retired and thus deemed active. + cur_state._state = feature_state::state::active; + } else { + // Otherwise the feature was introduced after the agreed + // version, so it can only be disabled before we reach it. + cur_state._state = feature_state::state::unavailable; + } vlog( featureslog.debug, - "No state for feature '{}' in snapshot, upgrade in progress?", - spec.name); - continue; + "No state for feature '{}' in snapshot v{}, upgrade in progress? " + "Assuming the feature state is {}", + spec.name, + version, + cur_state._state); } else { if ( spec.require_version From 7c63a297db228c4601c49f412d1202d0e4b9756f Mon Sep 17 00:00:00 2001 From: Alexey Bashtanov Date: Wed, 4 Dec 2024 16:23:12 +0000 Subject: [PATCH 2/4] features/table: improve comments Warn that it is not safe to retire a feature together with its checks in the same version. --- src/v/cluster/feature_backend.h | 2 +- src/v/features/feature_table.h | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/v/cluster/feature_backend.h b/src/v/cluster/feature_backend.h index b9d8f51c4487d..231bb631409d7 100644 --- a/src/v/cluster/feature_backend.h +++ b/src/v/cluster/feature_backend.h @@ -41,7 +41,7 @@ class feature_backend { ss::future<> fill_snapshot(controller_snapshot&) const; ss::future<> apply_snapshot(model::offset, const controller_snapshot&); - /// this functions deal with the snapshot stored in local kvstore (in + /// these functions deal with the snapshot stored in local kvstore (in /// contrast to fill/apply_snapshot which deal with the feature table data /// in the replicated controller snapshot). bool has_local_snapshot(); diff --git a/src/v/features/feature_table.h b/src/v/features/feature_table.h index b9e08df0fe9c2..db3d8ad010f6e 100644 --- a/src/v/features/feature_table.h +++ b/src/v/features/feature_table.h @@ -82,8 +82,13 @@ enum class feature : std::uint64_t { // controller messages for unknown features (unexpected), and controller // messages that refer to features that have been retired. // -// retired does *not* mean the functionality is gone: it just means it +// Retired does *not* mean the functionality is gone: it just means it // is no longer guarded by a feature flag. +// +// All feature checks need to be removed one version before the feature is +// retired. That's because during upgrade, when a mixed version cluster is +// running, the older version nodes may read a snaphot from the newer version +// and get the feature automatically enabled. inline const std::unordered_set retired_features = { "central_config", "consumer_offsets", From 02ec796b0cc82657a7ad2ec393840c94415f78b8 Mon Sep 17 00:00:00 2001 From: Alexey Bashtanov Date: Thu, 5 Dec 2024 17:23:13 +0000 Subject: [PATCH 3/4] features/tests: create a constant for test version --- src/v/features/feature_table.cc | 6 +++--- src/v/features/feature_table.h | 2 ++ src/v/features/tests/feature_table_test.cc | 6 +++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/v/features/feature_table.cc b/src/v/features/feature_table.cc index 3b47fa2990565..346b3813ae2b8 100644 --- a/src/v/features/feature_table.cc +++ b/src/v/features/feature_table.cc @@ -197,7 +197,7 @@ bool is_major_version_upgrade( static std::array test_extra_schema{ // For testing, a feature that does not auto-activate feature_spec{ - cluster::cluster_version{2001}, + TEST_VERSION, "__test_alpha", feature::test_alpha, feature_spec::available_policy::explicit_only, @@ -205,7 +205,7 @@ static std::array test_extra_schema{ // For testing, a feature that auto-activates feature_spec{ - cluster::cluster_version{2001}, + TEST_VERSION, "__test_bravo", feature::test_bravo, feature_spec::available_policy::always, @@ -213,7 +213,7 @@ static std::array test_extra_schema{ // For testing, a feature that auto-activates feature_spec{ - cluster::cluster_version{2001}, + TEST_VERSION, "__test_charlie", feature::test_charlie, feature_spec::available_policy::new_clusters_only, diff --git a/src/v/features/feature_table.h b/src/v/features/feature_table.h index db3d8ad010f6e..cef32382f01ff 100644 --- a/src/v/features/feature_table.h +++ b/src/v/features/feature_table.h @@ -163,6 +163,8 @@ constexpr cluster::cluster_version to_cluster_version(release_version rv) { vassert(false, "Invalid release_version"); } +constexpr cluster::cluster_version TEST_VERSION{2001}; + bool is_major_version_upgrade( cluster::cluster_version from, cluster::cluster_version to); diff --git a/src/v/features/tests/feature_table_test.cc b/src/v/features/tests/feature_table_test.cc index 77174d346a691..31803e7c9ed56 100644 --- a/src/v/features/tests/feature_table_test.cc +++ b/src/v/features/tests/feature_table_test.cc @@ -121,10 +121,10 @@ FIXTURE_TEST(feature_table_basic, feature_table_fixture) { ft.get_state(feature::test_alpha).get_state() == feature_state::state::unavailable); - // The dummy test features requires version 2001. The feature + // The dummy test features requires version TEST_VERSION. The feature // should go available, but not any further: the feature table // relies on external stimulus to actually activate features. - set_active_version(cluster_version{2001}); + set_active_version(TEST_VERSION); BOOST_REQUIRE( ft.get_state(feature::test_alpha).get_state() @@ -274,7 +274,7 @@ FIXTURE_TEST(feature_uniqueness, feature_table_fixture) { * but also activates elegible features. */ FIXTURE_TEST(feature_table_bootstrap, feature_table_fixture) { - bootstrap_active_version(cluster_version{2001}); + bootstrap_active_version(TEST_VERSION); // A non-auto-activating feature should remain in available state: // explicit_only features always require explicit activation, even From 8e53b6a65f94ecca4b0bfb5a2c16d9632fd851ff Mon Sep 17 00:00:00 2001 From: Alexey Bashtanov Date: Thu, 5 Dec 2024 17:27:10 +0000 Subject: [PATCH 4/4] features/test: check behavior on missing features in snapshot --- src/v/features/tests/feature_table_test.cc | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/v/features/tests/feature_table_test.cc b/src/v/features/tests/feature_table_test.cc index 31803e7c9ed56..aea777e63580e 100644 --- a/src/v/features/tests/feature_table_test.cc +++ b/src/v/features/tests/feature_table_test.cc @@ -334,6 +334,40 @@ FIXTURE_TEST(feature_table_old_snapshot, feature_table_fixture) { == feature_state::state::active); } +// Test that applying an old snapshot disables features that we only enabled in +// this version. +FIXTURE_TEST(feature_table_old_snapshot_missing, feature_table_fixture) { + bootstrap_active_version(TEST_VERSION); + + features::feature_table_snapshot snapshot; + snapshot.version = cluster::cluster_version{ft.get_active_version() - 1}; + snapshot.states = {}; + snapshot.apply(ft); + + // A feature with explicit available_policy should be activated by the + // snapshot. + BOOST_CHECK( + ft.get_state(feature::test_alpha).get_state() + == feature_state::state::unavailable); +} + +// Test that applying a snapshot of the same version with a missing feature +// enables it, as we assume it was retired in the next version. +FIXTURE_TEST(feature_table_new_snapshot_missing, feature_table_fixture) { + bootstrap_active_version(TEST_VERSION); + + features::feature_table_snapshot snapshot; + snapshot.version = cluster::cluster_version{ft.get_active_version()}; + snapshot.states = {}; + snapshot.apply(ft); + + // A feature with explicit available_policy should be activated by the + // snapshot. + BOOST_CHECK( + ft.get_state(feature::test_alpha).get_state() + == feature_state::state::active); +} + FIXTURE_TEST(feature_table_trial_license_test, feature_table_fixture) { const char* sample_valid_license = std::getenv("REDPANDA_SAMPLE_LICENSE"); if (sample_valid_license == nullptr) {