diff --git a/Cargo.lock b/Cargo.lock index 902329a691..db34d0bb0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1146,7 +1146,7 @@ dependencies = [ [[package]] name = "clickward" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/clickward?rev=ceec762e6a87d2a22bf56792a3025e145caa095e#ceec762e6a87d2a22bf56792a3025e145caa095e" +source = "git+https://github.com/oxidecomputer/clickward?rev=4ee0f74db55f440d589232256458c0750f6a641e#4ee0f74db55f440d589232256458c0750f6a641e" dependencies = [ "anyhow", "camino", @@ -5092,6 +5092,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", + "clickhouse-admin-types", "db-macros", "derive-where", "diesel", @@ -5385,6 +5386,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", + "clickhouse-admin-types", "debug-ignore", "expectorate", "gateway-client", @@ -5553,8 +5555,10 @@ dependencies = [ "api_identity", "async-trait", "base64 0.22.1", + "camino", "chrono", "clap", + "clickhouse-admin-types", "cookie 0.18.1", "derive-where", "derive_more", @@ -6449,6 +6453,7 @@ dependencies = [ "cfg-if", "chrono", "clap", + "clickhouse-admin-types", "crucible-agent-client", "derive_more", "display-error-chain", diff --git a/Cargo.toml b/Cargo.toml index b424862dd6..4629b14dac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "certificates", "clickhouse-admin", "clickhouse-admin/api", + "clickhouse-admin/types", "clients/bootstrap-agent-client", "clients/cockroach-admin-client", "clients/ddm-admin-client", @@ -315,7 +316,7 @@ ciborium = "0.2.2" clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] } clickhouse-admin-api = { path = "clickhouse-admin/api" } clickhouse-admin-types = { path = "clickhouse-admin/types" } -clickward = { git = "https://github.com/oxidecomputer/clickward", rev = "ceec762e6a87d2a22bf56792a3025e145caa095e" } +clickward = { git = "https://github.com/oxidecomputer/clickward", rev = "4ee0f74db55f440d589232256458c0750f6a641e" } cockroach-admin-api = { path = "cockroach-admin/api" } cockroach-admin-client = { path = "clients/cockroach-admin-client" } cockroach-admin-types = { path = "cockroach-admin/types" } diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index 2e99d389a0..3e86f28b60 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -14,6 +14,7 @@ omicron-rpaths.workspace = true anyhow.workspace = true camino.workspace = true chrono.workspace = true +clickhouse-admin-types.workspace = true derive-where.workspace = true diesel = { workspace = true, features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } hex.workspace = true diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index b4c60e12ef..8c0ae74543 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -8,9 +8,9 @@ use crate::inventory::ZoneType; use crate::omicron_zone_config::{self, OmicronZoneNic}; use crate::schema::{ - blueprint, bp_omicron_physical_disk, bp_omicron_zone, bp_omicron_zone_nic, - bp_sled_omicron_physical_disks, bp_sled_omicron_zones, bp_sled_state, - bp_target, + blueprint, bp_clickhouse_cluster_config, bp_omicron_physical_disk, + bp_omicron_zone, bp_omicron_zone_nic, bp_sled_omicron_physical_disks, + bp_sled_omicron_zones, bp_sled_state, bp_target, }; use crate::typed_uuid::DbTypedUuid; use crate::{ @@ -21,7 +21,6 @@ use anyhow::{anyhow, bail, Context, Result}; use chrono::{DateTime, Utc}; use ipnetwork::IpNetwork; use nexus_sled_agent_shared::inventory::OmicronZoneDataset; -use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZonesConfig; @@ -30,6 +29,7 @@ use nexus_types::deployment::{ blueprint_zone_type, BlueprintPhysicalDisksConfig, }; use nexus_types::deployment::{BlueprintPhysicalDiskConfig, BlueprintZoneType}; +use nexus_types::deployment::{BlueprintTarget, ClickhouseClusterConfig}; use nexus_types::deployment::{ OmicronZoneExternalFloatingAddr, OmicronZoneExternalFloatingIp, OmicronZoneExternalSnatIp, @@ -259,6 +259,9 @@ pub struct BpOmicronZone { pub external_ip_id: Option>, pub filesystem_pool: Option>, + + clickhouse_keeper_id: Option, + clickhouse_server_id: Option, } impl BpOmicronZone { @@ -308,6 +311,8 @@ impl BpOmicronZone { snat_ip: None, snat_first_port: None, snat_last_port: None, + clickhouse_keeper_id: None, + clickhouse_server_id: None, }; match &blueprint_zone.zone_type { @@ -350,18 +355,40 @@ impl BpOmicronZone { bp_omicron_zone.set_zpool_name(dataset); } BlueprintZoneType::ClickhouseKeeper( - blueprint_zone_type::ClickhouseKeeper { address, dataset }, + blueprint_zone_type::ClickhouseKeeper { + keeper_id, + address, + dataset, + }, ) => { // Set the common fields bp_omicron_zone.set_primary_service_ip_and_port(address); bp_omicron_zone.set_zpool_name(dataset); + + // Set the zone specific fields + bp_omicron_zone.clickhouse_keeper_id = Some( + keeper_id + .0 + .try_into() + .expect("no more than 2^63 keeper IDs please"), + ); } BlueprintZoneType::ClickhouseServer( - blueprint_zone_type::ClickhouseServer { address, dataset }, + blueprint_zone_type::ClickhouseServer { + server_id, + address, + dataset, + }, ) => { // Set the common fields bp_omicron_zone.set_primary_service_ip_and_port(address); bp_omicron_zone.set_zpool_name(dataset); + + // Set the zone specific fields + bp_omicron_zone.clickhouse_server_id = + Some(server_id.0.try_into().expect( + "no more than 2^63 clickhouse server IDs please", + )); } BlueprintZoneType::CockroachDb( blueprint_zone_type::CockroachDb { address, dataset }, @@ -588,12 +615,22 @@ impl BpOmicronZone { } ZoneType::ClickhouseKeeper => BlueprintZoneType::ClickhouseKeeper( blueprint_zone_type::ClickhouseKeeper { + keeper_id: clickhouse_admin_types::KeeperId( + self.clickhouse_keeper_id.ok_or_else(|| { + anyhow!("missing clickhouse_keeper_id") + })? as u64, + ), address: primary_address, dataset: dataset?, }, ), ZoneType::ClickhouseServer => BlueprintZoneType::ClickhouseServer( blueprint_zone_type::ClickhouseServer { + server_id: clickhouse_admin_types::ServerId( + self.clickhouse_server_id.ok_or_else(|| { + anyhow!("missing clickhouse_keeper_id") + })? as u64, + ), address: primary_address, dataset: dataset?, }, @@ -803,6 +840,64 @@ impl From for OmicronZoneNic { } } +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_clickhouse_cluster_config)] +pub struct BpClickhouseClusterConfig { + blueprint_id: Uuid, + generation: Generation, + max_used_server_id: i64, + max_used_keeper_id: i64, + cluster_name: String, + cluster_secret: String, +} + +impl BpClickhouseClusterConfig { + pub fn new( + blueprint_id: Uuid, + config: &ClickhouseClusterConfig, + ) -> anyhow::Result { + Ok(BpClickhouseClusterConfig { + blueprint_id, + generation: Generation(config.generation), + max_used_server_id: config + .max_used_server_id + .0 + .try_into() + .context("more than 2^63 IDs in use")?, + max_used_keeper_id: config + .max_used_keeper_id + .0 + .try_into() + .context("more than 2^63 IDs in use")?, + cluster_name: config.cluster_name.clone(), + cluster_secret: config.cluster_secret.clone(), + }) + } +} + +impl TryFrom for ClickhouseClusterConfig { + type Error = anyhow::Error; + fn try_from(value: BpClickhouseClusterConfig) -> Result { + Ok(ClickhouseClusterConfig { + generation: value.generation.0, + max_used_server_id: clickhouse_admin_types::ServerId( + value + .max_used_server_id + .try_into() + .context("negative ID in database?")?, + ), + max_used_keeper_id: clickhouse_admin_types::KeeperId( + value + .max_used_keeper_id + .try_into() + .context("negative ID in database?")?, + ), + cluster_name: value.cluster_name.clone(), + cluster_secret: value.cluster_secret.clone(), + }) + } +} + mod diesel_util { use crate::{ schema::bp_omicron_zone::disposition, to_db_bp_zone_disposition, diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 5d9b3da78f..2fe38517eb 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1512,6 +1512,7 @@ table! { cockroachdb_fingerprint -> Text, cockroachdb_setting_preserve_downgrade -> Nullable, + } } @@ -1595,6 +1596,10 @@ table! { disposition -> crate::DbBpZoneDispositionEnum, external_ip_id -> Nullable, filesystem_pool -> Nullable, + + clickhouse_keeper_id -> Nullable, + clickhouse_server_id -> Nullable + } } @@ -1612,6 +1617,18 @@ table! { } } +table! { + bp_clickhouse_cluster_config (blueprint_id) { + blueprint_id -> Uuid, + + generation-> Int8, + max_used_server_id -> Int8, + max_used_keeper_id -> Int8, + cluster_name -> Text, + cluster_secret -> Text, + } +} + table! { cockroachdb_zone_id_to_node_id (omicron_zone_id, crdb_node_id) { omicron_zone_id -> Uuid, diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index d413f9507a..679a043c75 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -49,6 +49,7 @@ use nexus_types::deployment::BlueprintPhysicalDisksConfig; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZonesConfig; +use nexus_types::deployment::ClickhouseClusterConfig; use nexus_types::deployment::CockroachDbPreserveDowngrade; use nexus_types::external_api::views::SledState; use omicron_common::api::external::DataPageParams; @@ -632,6 +633,7 @@ impl DataStore { external_dns_version, cockroachdb_fingerprint, cockroachdb_setting_preserve_downgrade, + clickhouse_cluster_config: None, time_created, creator, comment, diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 8888f2caaa..647cad1d6e 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -2087,6 +2087,8 @@ impl DataStore { rot_pages_found, sled_agents, omicron_zones, + // TODO: fill this in once we collect it + clickhouse_keeper_cluster_membership: BTreeMap::new(), }) } } diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 3b6b4bc007..7c743ebe83 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -147,6 +147,8 @@ impl CollectionBuilder { rot_pages_found: self.rot_pages_found, sled_agents: self.sleds, omicron_zones: self.omicron_zones, + // TODO: Fill when we actually gather keeper inventory + clickhouse_keeper_cluster_membership: BTreeMap::new(), } } diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index 3ccc0dcb06..5248be8cce 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -9,6 +9,7 @@ workspace = true [dependencies] anyhow.workspace = true chrono.workspace = true +clickhouse-admin-types.workspace = true debug-ignore.workspace = true gateway-client.workspace = true indexmap.workspace = true diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 2d8a7c9598..781fe5a659 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -23,6 +23,8 @@ use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::BlueprintZonesConfig; +use nexus_types::deployment::ClickhouseClusterConfig; +use nexus_types::deployment::ClickhouseIdAllocator; use nexus_types::deployment::CockroachDbPreserveDowngrade; use nexus_types::deployment::DiskFilter; use nexus_types::deployment::OmicronZoneExternalFloatingIp; @@ -34,6 +36,7 @@ use nexus_types::deployment::SledResources; use nexus_types::deployment::ZpoolFilter; use nexus_types::deployment::ZpoolName; use nexus_types::external_api::views::SledState; +use nexus_types::inventory::Collection; use omicron_common::address::get_internal_dns_server_addresses; use omicron_common::address::get_sled_address; use omicron_common::address::get_switch_zone_address; @@ -69,6 +72,7 @@ use std::net::SocketAddrV6; use thiserror::Error; use typed_rng::TypedUuidRng; use typed_rng::UuidRng; +use uuid::Uuid; use super::external_networking::BuilderExternalNetworking; use super::external_networking::ExternalNetworkingChoice; @@ -204,6 +208,7 @@ pub struct BlueprintBuilder<'a> { disks: BlueprintDisksBuilder<'a>, sled_state: BTreeMap, cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade, + clickhouse_id_allocator: ClickhouseIdAllocator, creator: String, operations: Vec, @@ -259,6 +264,9 @@ impl<'a> BlueprintBuilder<'a> { .copied() .map(|sled_id| (sled_id, SledState::Active)) .collect(); + let cluster_name = format!("cluster-{}", Uuid::new_v4()); + let clickhouse_cluster_config = + ClickhouseClusterConfig::new(cluster_name); Blueprint { id: rng.blueprint_rng.next(), blueprint_zones, @@ -270,6 +278,7 @@ impl<'a> BlueprintBuilder<'a> { cockroachdb_fingerprint: String::new(), cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade::DoNotModify, + clickhouse_cluster_config, time_created: now_db_precision(), creator: creator.to_owned(), comment: format!("starting blueprint with {num_sleds} empty sleds"), @@ -328,6 +337,9 @@ impl<'a> BlueprintBuilder<'a> { sled_state, cockroachdb_setting_preserve_downgrade: parent_blueprint .cockroachdb_setting_preserve_downgrade, + clickhouse_id_allocator: ClickhouseIdAllocator::from( + &parent_blueprint.clickhouse_cluster_config, + ), creator: creator.to_owned(), operations: Vec::new(), comments: Vec::new(), @@ -360,6 +372,14 @@ impl<'a> BlueprintBuilder<'a> { let blueprint_disks = self .disks .into_disks_map(self.input.all_sled_ids(SledFilter::InService)); + + // Copy the `ClickhouseClusterConfig` from the parent blueprint + // and update it if any allocations or expungements have occurred. + let mut clickhouse_cluster_config = + self.parent_blueprint.clickhouse_cluster_config.clone(); + clickhouse_cluster_config + .update_configuration(&self.clickhouse_id_allocator); + Blueprint { id: self.rng.blueprint_rng.next(), blueprint_zones, @@ -375,6 +395,7 @@ impl<'a> BlueprintBuilder<'a> { .clone(), cockroachdb_setting_preserve_downgrade: self .cockroachdb_setting_preserve_downgrade, + clickhouse_cluster_config, time_created: now_db_precision(), creator: self.creator, comment: self @@ -927,6 +948,119 @@ impl<'a> BlueprintBuilder<'a> { Ok(EnsureMultiple::Changed { added: num_crdb_to_add, removed: 0 }) } + pub fn sled_ensure_zone_multiple_clickhouse_server( + &mut self, + sled_id: SledUuid, + desired_zone_count: usize, + ) -> Result { + // How many clickhouse server zones do we want to add? + let clickhouse_server_count = self.sled_num_running_zones_of_kind( + sled_id, + ZoneKind::ClickhouseServer, + ); + let num_clickhouse_servers_to_add = + match desired_zone_count.checked_sub(clickhouse_server_count) { + Some(0) => return Ok(EnsureMultiple::NotNeeded), + Some(n) => n, + None => { + return Err(Error::Planner(anyhow!( + "removing a ClickhouseServer zone not yet supported \ + (sled {sled_id} has {clickhouse_server_count}; \ + planner wants {desired_zone_count})" + ))); + } + }; + for _ in 0..num_clickhouse_servers_to_add { + let zone_id = self.rng.zone_rng.next(); + let underlay_ip = self.sled_alloc_ip(sled_id)?; + let pool_name = + self.sled_select_zpool(sled_id, ZoneKind::ClickhouseServer)?; + let port = omicron_common::address::CLICKHOUSE_HTTP_PORT; + let address = SocketAddrV6::new(underlay_ip, port, 0, 0); + let zone_type = BlueprintZoneType::ClickhouseServer( + blueprint_zone_type::ClickhouseServer { + server_id: self.clickhouse_id_allocator.next_server_id(), + address, + dataset: OmicronZoneDataset { + pool_name: pool_name.clone(), + }, + }, + ); + let filesystem_pool = pool_name; + + let zone = BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: zone_id, + underlay_address: underlay_ip, + filesystem_pool: Some(filesystem_pool), + zone_type, + }; + self.sled_add_zone(sled_id, zone)?; + } + + Ok(EnsureMultiple::Changed { + added: num_clickhouse_servers_to_add, + removed: 0, + }) + } + + pub fn sled_ensure_zone_multiple_clickhouse_keeper( + &mut self, + sled_id: SledUuid, + desired_zone_count: usize, + ) -> Result { + // How many clickhouse keeper zones do we want to add? + let clickhouse_keeper_count = self.sled_num_running_zones_of_kind( + sled_id, + ZoneKind::ClickhouseKeeper, + ); + let num_clickhouse_keepers_to_add = + match desired_zone_count.checked_sub(clickhouse_keeper_count) { + Some(0) => return Ok(EnsureMultiple::NotNeeded), + Some(n) => n, + None => { + return Err(Error::Planner(anyhow!( + "removing a ClickhouseKeeper zone not yet supported \ + (sled {sled_id} has {clickhouse_keeper_count}; \ + planner wants {desired_zone_count})" + ))); + } + }; + + for _ in 0..num_clickhouse_keepers_to_add { + let zone_id = self.rng.zone_rng.next(); + let underlay_ip = self.sled_alloc_ip(sled_id)?; + let pool_name = + self.sled_select_zpool(sled_id, ZoneKind::ClickhouseKeeper)?; + let port = omicron_common::address::CLICKHOUSE_KEEPER_TCP_PORT; + let address = SocketAddrV6::new(underlay_ip, port, 0, 0); + let zone_type = BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { + keeper_id: self.clickhouse_id_allocator.next_keeper_id(), + address, + dataset: OmicronZoneDataset { + pool_name: pool_name.clone(), + }, + }, + ); + let filesystem_pool = pool_name; + + let zone = BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: zone_id, + underlay_address: underlay_ip, + filesystem_pool: Some(filesystem_pool), + zone_type, + }; + self.sled_add_zone(sled_id, zone)?; + } + + Ok(EnsureMultiple::Changed { + added: num_clickhouse_keepers_to_add, + removed: 0, + }) + } + pub fn sled_promote_internal_ntp_to_boundary_ntp( &mut self, sled_id: SledUuid, diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs new file mode 100644 index 0000000000..3c394d1984 --- /dev/null +++ b/nexus/reconfigurator/planning/src/blueprint_builder/clickhouse.rs @@ -0,0 +1,252 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A mechanism for allocating clickhouse keeper nodes for clustered clickhouse setups + +use clickhouse_admin_types::KeeperId; +use nexus_types::deployment::{ + blueprint_zone_type, Blueprint, BlueprintZoneFilter, BlueprintZoneType, + PlanningInput, +}; +use nexus_types::inventory::Collection; +use omicron_uuid_kinds::OmicronZoneUuid; +use slog::{error, Logger}; +use std::collections::{BTreeMap, BTreeSet}; +use thiserror::Error; + +struct KeeperZones { + in_service: + BTreeMap, + expunged: BTreeMap, +} + +/// Only a single keeper may be added or removed from the clickhouse keeper +/// cluster at a time. If an addition or removal is in progress, we must wait to +/// change our desired state of the cluster. +pub struct KeeperAllocator<'a> { + log: Logger, + parent_blueprint: &'a Blueprint, + input: &'a PlanningInput, + inventory: &'a Collection, +} + +/// Errors encountered when trying to plan keeper deployments +#[derive(Debug, Error)] +pub enum KeeperAllocationError { + #[error("a clickhouse cluster configuration has not been created")] + NoConfig, + #[error("failed to retrieve clickhouse keeper membership from inventory")] + NoInventory, + #[error("cannot add more than one keeper at a time: {added_keepers:?}")] + BadMembershipChange { added_keepers: BTreeSet }, +} + +impl<'a> KeeperAllocator<'a> { + /// Generate the next configuration of our keeper cluster membership + /// + /// If there is a configuration change in progress or the configuration + /// matches our desired state then the membership will not change, and we'll + /// report `Ok(None)`. A configuration matches our desired state when all + /// in service keeper zones have keepers in the clickhouse keeper cluster + /// membership and all expunged zones do not have keepers in the clickhouse + /// keeper cluster membership. + pub fn plan( + &self, + ) -> Result>, KeeperAllocationError> { + let parent_keeper_zones = self.all_keeper_zones_in_parent_blueprint(); + let parent_keeper_membership = self + .desired_keeper_membership_in_parent_blueprint( + &parent_keeper_zones, + )?; + + // Does inventory reflect our desired keeper membership? If not, the + // executor will keep trying until it does. Note that this may end up + // requiring support, but if so it's because of a bug in clickhouse and + // so trying to automate that away at this stage is probably not worth + // it. + let Some((_, inventory_membership)) = + self.inventory.latest_clickhouse_keeper_membership() + else { + // We can't get inventory so we assume that a reconfiguration must + // be in progress for safety purposes. + return Err(KeeperAllocationError::NoInventory); + }; + + let mut desired_membership: BTreeSet<_> = + parent_keeper_membership.keys().cloned().collect(); + + if desired_membership != inventory_membership.raft_config { + // We're still trying to reach our desired state We want to ensure + // however, that if we are currently trying to add a node, that we + // have not expunged the zone of the keeper that we are trying to + // add. This can happen for a number of reasons, and without this + // check we would not be able to make forward progress. + let added_keepers: BTreeSet<_> = desired_membership + .difference(&inventory_membership.raft_config) + .cloned() + .collect(); + + // We should only be adding or removing 1 keeper at a time + if added_keepers.len() > 1 { + error!( + self.log, + concat!( + "Keeper membership error: attempting to add", + "more than one keeper to cluster: {:?}" + ), + added_keepers + ); + return Err(KeeperAllocationError::BadMembershipChange { + added_keepers, + }); + } + + // If we are not adding a keeper than we are done. + // The executor is trying to remove one from the cluster. + // + // This should always succeed eventually, barring a bug in + // clickhouse. + // + // TODO: Should we ensure that we don't have a planner bug + // by checking to see if we have more than one keeper to remove? + if added_keepers.len() == 0 { + return Ok(None); + } + + // We must be adding a keeper. Let's make sure that the zone + // for this keeper has not already been expunged. + // + // Unwraps are safe because we know there is one added keeper and we + // generated it from the keys in `parent_keeper_membership`. + let zone_id = parent_keeper_membership + .get(added_keepers.first().unwrap()) + .unwrap(); + if parent_keeper_zones.expunged.get(zone_id).is_some() { + // We need to change our desired state. A node we were trying + // to add was expunged. Our desired state is equivalent to + // the inventory so let's set it to that and go through one + // more round of planning. If the sled was added to the keeper + // cluster in the meantime this will allow it to get removed in + // the next execution. + return Ok(Some(inventory_membership.raft_config.clone())); + } else { + // We are still trying to add this keeper + return Ok(None); + } + } + + // Our desired membership from the parent blueprint matches the + // inventory. + // + // Do we need to add or remove any nodes? We expunge first, because + // those nodes are already gone and don't help our quorum. + for keeper_zone in parent_keeper_zones.expunged.values() { + if desired_membership.contains(&keeper_zone.keeper_id) { + // Let's expunge the first match we see + desired_membership.remove(&keeper_zone.keeper_id); + return Ok(Some(desired_membership)); + } + } + + for keeper_zone in parent_keeper_zones.in_service.values() { + if !desired_membership.contains(&keeper_zone.keeper_id) { + // Let's add the first match we see + desired_membership.insert(keeper_zone.keeper_id); + return Ok(Some(desired_membership)); + } + } + + // We have reached our desired state and there are no actions + // to take. + Ok(None) + } + + /// Is there an ongoing membership change + + /// Get all `ClickhouseKeeper` zones from the parent blueprint + fn all_keeper_zones_in_parent_blueprint(&self) -> KeeperZones { + let in_service: BTreeMap<_, _> = self + .parent_blueprint + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .filter_map(|(_, bp_zone_config)| { + if let BlueprintZoneType::ClickhouseKeeper(keeper_zone_type) = + &bp_zone_config.zone_type + { + Some((bp_zone_config.id, keeper_zone_type.clone())) + } else { + None + } + }) + .collect(); + let expunged: BTreeMap<_, _> = self + .parent_blueprint + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .filter_map(|(_, bp_zone_config)| { + if let BlueprintZoneType::ClickhouseKeeper(keeper_zone_type) = + &bp_zone_config.zone_type + { + Some((bp_zone_config.id, keeper_zone_type.clone())) + } else { + None + } + }) + .collect(); + KeeperZones { in_service, expunged } + } + + /// Get all `KeeperId`s that we wanted to be part of the clickhouse keeper + /// cluster membership in the parent blueprint. + /// + /// This is our desired state, reflected in the parent blueprint, not what + /// the actual membership is as reflected by the inventory. + /// + /// Also note that a zone may already be expunged, and yet we may still be + /// trying to add it as a keeper member in the executor. We'll handle this in the + /// `plan` method above. + /// + /// Panics if there is a clickhouse keeper zone id in + /// `self.clickhouse_cluster_config.zones_with_keepers`, but not a + /// corresponding omicron zone in `all_keeper_zones`. + fn desired_keeper_membership_in_parent_blueprint( + &self, + all_keeper_zones: &KeeperZones, + ) -> Result, KeeperAllocationError> + { + // Get our current clickhouse cluster configuration if there is one + let Some(clickhouse_cluster_config) = + &self.parent_blueprint.clickhouse_cluster_config + else { + return Err(KeeperAllocationError::NoConfig); + }; + + Ok(clickhouse_cluster_config + .zones_with_keepers + .iter() + .map(|zone_id| { + if let Some(keeper) = all_keeper_zones.in_service.get(zone_id) { + (keeper.keeper_id, *zone_id) + } else { + // It is an invariant violation/programmer error if there is + // not a keeper zone for each keeper id. + // + // We panic here as planning is broken at this point. + if let Some(keeper) = all_keeper_zones.expunged.get(zone_id) + { + (keeper.keeper_id, *zone_id) + } else { + let msg = format!( + concat!( + "Failed to find zone id {}", + "when looking up keeper id" + ), + zone_id + ); + error!(self.log, "{msg}"); + panic!("{msg}"); + } + } + }) + .collect()) + } +} diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs index 99d3b41772..fe410b95fe 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs @@ -5,6 +5,7 @@ //! Low-level facility for generating Blueprints mod builder; +mod clickhouse; mod external_networking; mod zones; diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 3bd1b8757e..04447aaf1c 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -357,6 +357,8 @@ impl<'a> Planner<'a> { DiscretionaryOmicronZone::BoundaryNtp, DiscretionaryOmicronZone::CockroachDb, DiscretionaryOmicronZone::Nexus, + DiscretionaryOmicronZone::ClickhouseServer, + DiscretionaryOmicronZone::ClickhouseKeeper, ] { let num_zones_to_add = self.num_additional_zones_needed(zone_kind); if num_zones_to_add == 0 { @@ -437,6 +439,14 @@ impl<'a> Planner<'a> { DiscretionaryOmicronZone::Nexus => { self.input.target_nexus_zone_count() } + DiscretionaryOmicronZone::ClickhouseServer => { + self.input.target_clickhouse_server_zone_count() + } + DiscretionaryOmicronZone::ClickhouseKeeper => { + // TODO: Should we instead return 1 if the existing count is below the target count? + // Reconfiguration only allows adding one keeper node at a time + self.input.target_clickhouse_keeper_zone_count() + } }; // TODO-correctness What should we do if we have _too many_ @@ -522,6 +532,18 @@ impl<'a> Planner<'a> { new_total_zone_count, )? } + DiscretionaryOmicronZone::ClickhouseServer => { + self.blueprint.sled_ensure_zone_multiple_clickhouse_server( + sled_id, + new_total_zone_count, + )? + } + DiscretionaryOmicronZone::ClickhouseKeeper => { + self.blueprint.sled_ensure_zone_multiple_clickhouse_keeper( + sled_id, + new_total_zone_count, + )? + } }; match result { EnsureMultiple::Changed { added, removed } => { diff --git a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs index 2fb60e66f8..c3f000df1f 100644 --- a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs +++ b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs @@ -17,7 +17,8 @@ pub(crate) enum DiscretionaryOmicronZone { BoundaryNtp, CockroachDb, Nexus, - // TODO expand this enum as we start to place more services + ClickhouseServer, + ClickhouseKeeper, } impl DiscretionaryOmicronZone { @@ -28,10 +29,14 @@ impl DiscretionaryOmicronZone { BlueprintZoneType::BoundaryNtp(_) => Some(Self::BoundaryNtp), BlueprintZoneType::CockroachDb(_) => Some(Self::CockroachDb), BlueprintZoneType::Nexus(_) => Some(Self::Nexus), + BlueprintZoneType::ClickhouseKeeper(_) => { + Some(Self::ClickhouseKeeper) + } + BlueprintZoneType::ClickhouseServer(_) => { + Some(Self::ClickhouseServer) + } // Zones that we should place but don't yet. BlueprintZoneType::Clickhouse(_) - | BlueprintZoneType::ClickhouseKeeper(_) - | BlueprintZoneType::ClickhouseServer(_) | BlueprintZoneType::CruciblePantry(_) | BlueprintZoneType::ExternalDns(_) | BlueprintZoneType::InternalDns(_) @@ -51,6 +56,12 @@ impl From for ZoneKind { DiscretionaryOmicronZone::BoundaryNtp => Self::BoundaryNtp, DiscretionaryOmicronZone::CockroachDb => Self::CockroachDb, DiscretionaryOmicronZone::Nexus => Self::Nexus, + DiscretionaryOmicronZone::ClickhouseServer => { + Self::ClickhouseServer + } + DiscretionaryOmicronZone::ClickhouseKeeper => { + Self::ClickhouseKeeper + } } } } diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 6b31013d49..1d66563880 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -11,8 +11,10 @@ workspace = true anyhow.workspace = true async-trait.workspace = true base64.workspace = true +camino.workspace = true chrono.workspace = true clap.workspace = true +clickhouse-admin-types.workspace = true cookie.workspace = true derive-where.workspace = true derive_more.workspace = true diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 96de893fa3..2c00bc0f6c 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -42,11 +42,14 @@ use uuid::Uuid; mod blueprint_diff; mod blueprint_display; +mod clickhouse; mod network_resources; mod planning_input; mod tri_map; mod zone_type; +pub use clickhouse::ClickhouseClusterConfig; +pub use clickhouse::ClickhouseIdAllocator; pub use network_resources::AddNetworkResourceError; pub use network_resources::OmicronZoneExternalFloatingAddr; pub use network_resources::OmicronZoneExternalFloatingIp; @@ -162,6 +165,10 @@ pub struct Blueprint { /// Whether to set `cluster.preserve_downgrade_option` and what to set it to pub cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade, + /// Configuration for Clickhouse Clusters derived from the parent blueprint + /// and current blueprint zones + pub clickhouse_cluster_config: Option, + /// when this blueprint was generated (for debugging) pub time_created: chrono::DateTime, /// identity of the component that generated the blueprint (for debugging) diff --git a/nexus/types/src/deployment/clickhouse.rs b/nexus/types/src/deployment/clickhouse.rs new file mode 100644 index 0000000000..4dea78652c --- /dev/null +++ b/nexus/types/src/deployment/clickhouse.rs @@ -0,0 +1,181 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types used in blueprints related to clickhouse configuration + +use clickhouse_admin_types::{KeeperId, ServerId}; +use omicron_common::api::external::Generation; +use omicron_uuid_kinds::OmicronZoneUuid; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeSet; +use uuid::Uuid; + +/// A mechanism used by the `BlueprintBuilder` to update clickhouse server and +/// keeper ids +/// +/// Also stores whether any zones have been expunged so that we can bump the +/// generation in the `ClickhouseClusterConfig` even if no new servers have +/// been added. +#[derive(Clone, Debug, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +pub struct ClickhouseIdAllocator { + /// Have any zones been expunged? + zones_expunged: bool, + + /// Clickhouse Server ids must be unique and are handed out monotonically. Keep track + /// of the last used one. + max_used_server_id: ServerId, + /// CLickhouse Keeper ids must be unique and are handed out monotonically. Keep track + /// of the last used one. + max_used_keeper_id: KeeperId, +} + +impl ClickhouseIdAllocator { + pub fn new( + max_used_server_id: ServerId, + max_used_keeper_id: KeeperId, + ) -> ClickhouseIdAllocator { + ClickhouseIdAllocator { + zones_expunged: false, + max_used_server_id, + max_used_keeper_id, + } + } + + pub fn expunge_zone(&mut self) { + self.zones_expunged = true; + } + + pub fn next_server_id(&mut self) -> ServerId { + self.max_used_server_id += 1.into(); + self.max_used_server_id + } + pub fn next_keeper_id(&mut self) -> KeeperId { + self.max_used_keeper_id += 1.into(); + self.max_used_keeper_id + } + + pub fn max_used_server_id(&self) -> ServerId { + self.max_used_server_id + } + pub fn max_used_keeper_id(&self) -> KeeperId { + self.max_used_keeper_id + } +} + +impl From<&ClickhouseClusterConfig> for ClickhouseIdAllocator { + fn from(value: &ClickhouseClusterConfig) -> Self { + ClickhouseIdAllocator::new( + value.max_used_server_id, + value.max_used_keeper_id, + ) + } +} + +/// Global configuration for all clickhouse servers (replicas) and keepers +#[derive(Clone, Debug, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +pub struct ClickhouseClusterConfig { + /// The last update to the clickhouse cluster configuration + /// + /// This is used by `clickhouse-admin` in the clickhouse server and keeper + /// zones to discard old configurations. + pub generation: Generation, + /// Clickhouse Server ids must be unique and are handed out monotonically. Keep track + /// of the last used one. + pub max_used_server_id: ServerId, + /// CLickhouse Keeper ids must be unique and are handed out monotonically. Keep track + /// of the last used one. + pub max_used_keeper_id: KeeperId, + /// An arbitrary name for the Clickhouse cluster shared by all nodes + pub cluster_name: String, + /// An arbitrary string shared by all nodes used at runtime to determine whether + /// nodes are part of the same cluster. + pub cluster_secret: String, + + /// The desired state of the clickhouse keeper cluster + /// + /// We decouple deployment of zones that should contain clickhouse keeper + /// processes from actually starting or stopping those processes, adding or + /// removing them to/from the keeper cluster, and reconfiguring other keeper and + /// clickhouse server nodes to reflect the new configuration. + /// + /// As part of this decoupling, we keep track of the intended zone + /// deployment in the blueprint, but that is not enough to track the desired + /// state of the keeper cluster. We are only allowed to add or remove one + /// keeper node at a time, and therefore we must track the desired state of + /// the keeper cluster which may change multiple times until the keepers in + /// the cluster match the deployed zones. An example may help: + /// + /// 1. We start with 3 keeper nodes in 3 deployed keeper zones and need to + /// add two to reach our desired policy of 5 keepers + /// 2. The planner adds 2 new keeper zones to the blueprint + /// 3. The planner will also add **one** new keeper process that matches one + /// of the deployed zones to the desired keeper cluster. + /// 4. The executor will start the new keeper process, attempt to add it + /// to the keeper cluster by pushing configuration updates to the other + /// keepers, and then updating the clickhouse server configurations to know + /// about the new keeper. + /// 5. If the keeper is successfully added, as reflected in inventory, then + /// steps 3 and 4 above will be retried for the next keeper process. + /// 6. If the keeper addition to the cluster has failed, as reflected in + /// inventory, then the planner will create a new desired state that + /// expunges the keeper zone for the failed keeper, and adds a new + /// keeper zone to replace it. The process will then repeat with the + /// new zone. This is necessary because we must uniquely identify each + /// keeper process with an integer id, and once we allocate a process + /// to a zone we don't want to change the mapping. While changing the + /// mapping *may* be fine, we play it safe and just try to add a node + /// with a new id to the keeper cluster so that keeper reconfiguration + /// never gets stuck. + /// + /// Note that because we must discover the `KeeperId` from the + /// `BlueprintZoneType` of the omicron zone as stored in the blueprint, + /// we cannot remove the expunged zones from the blueprint until we have + /// also successfully removed the keepers for those zones from the keeper + /// cluster. + pub zones_with_keepers: BTreeSet, +} + +impl ClickhouseClusterConfig { + pub fn new(cluster_name: String) -> ClickhouseClusterConfig { + ClickhouseClusterConfig { + generation: Generation::new(), + max_used_server_id: 0.into(), + max_used_keeper_id: 0.into(), + cluster_name, + cluster_secret: Uuid::new_v4().to_string(), + zones_with_keepers: BTreeSet::new(), + } + } + + /// If new IDs have been allocated or any zones expunged, then update the + /// internal state and return true, otherwise return false. + pub fn update_configuration( + &mut self, + allocator: &ClickhouseIdAllocator, + ) -> bool { + let mut updated = allocator.zones_expunged; + if self.max_used_server_id < allocator.max_used_server_id() { + self.max_used_server_id = allocator.max_used_server_id(); + updated = true; + } + if self.max_used_keeper_id < allocator.max_used_keeper_id() { + self.max_used_keeper_id = allocator.max_used_keeper_id(); + updated = true; + } + if updated { + self.generation = self.generation.next(); + } + updated + } + + pub fn has_configuration_changed( + &self, + allocator: &ClickhouseIdAllocator, + ) -> bool { + allocator.zones_expunged + || self.max_used_server_id != allocator.max_used_server_id() + || self.max_used_keeper_id != allocator.max_used_keeper_id() + } +} diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index dabb47066e..4017adfaa0 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -99,6 +99,22 @@ impl PlanningInput { self.policy.target_cockroachdb_zone_count } + pub fn target_clickhouse_server_zone_count(&self) -> usize { + self.policy + .clickhouse_policy + .as_ref() + .map(|policy| policy.target_servers) + .unwrap_or(0) + } + + pub fn target_clickhouse_keeper_zone_count(&self) -> usize { + self.policy + .clickhouse_policy + .as_ref() + .map(|policy| policy.target_keepers) + .unwrap_or(0) + } + pub fn target_cockroachdb_cluster_version( &self, ) -> CockroachDbClusterVersion { diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs index e4958fc3c3..9db1b9d411 100644 --- a/nexus/types/src/deployment/zone_type.rs +++ b/nexus/types/src/deployment/zone_type.rs @@ -125,6 +125,42 @@ impl BlueprintZoneType { } } + /// Identifies whether this is a ClickhouseServer zone + pub fn is_clickhouse_server(&self) -> bool { + match self { + BlueprintZoneType::ClickhouseServer(_) => true, + BlueprintZoneType::BoundaryNtp(_) + | BlueprintZoneType::Crucible(_) + | BlueprintZoneType::Clickhouse(_) + | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::CockroachDb(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::ExternalDns(_) + | BlueprintZoneType::InternalDns(_) + | BlueprintZoneType::InternalNtp(_) + | BlueprintZoneType::Nexus(_) + | BlueprintZoneType::Oximeter(_) => false, + } + } + + /// Identifies whether this is a ClickhouseKeeper zone + pub fn is_clickhouse_keeper(&self) -> bool { + match self { + BlueprintZoneType::ClickhouseKeeper(_) => true, + BlueprintZoneType::BoundaryNtp(_) + | BlueprintZoneType::Crucible(_) + | BlueprintZoneType::Clickhouse(_) + | BlueprintZoneType::ClickhouseServer(_) + | BlueprintZoneType::CockroachDb(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::ExternalDns(_) + | BlueprintZoneType::InternalDns(_) + | BlueprintZoneType::InternalNtp(_) + | BlueprintZoneType::Nexus(_) + | BlueprintZoneType::Oximeter(_) => false, + } + } + /// Returns the durable dataset associated with this zone, if any exists. pub fn durable_dataset(&self) -> Option> { let (dataset, kind, &address) = match self { @@ -132,10 +168,14 @@ impl BlueprintZoneType { blueprint_zone_type::Clickhouse { dataset, address }, ) => (dataset, DatasetKind::Clickhouse, address), BlueprintZoneType::ClickhouseKeeper( - blueprint_zone_type::ClickhouseKeeper { dataset, address }, + blueprint_zone_type::ClickhouseKeeper { + dataset, address, .. + }, ) => (dataset, DatasetKind::ClickhouseKeeper, address), BlueprintZoneType::ClickhouseServer( - blueprint_zone_type::ClickhouseServer { dataset, address }, + blueprint_zone_type::ClickhouseServer { + dataset, address, .. + }, ) => (dataset, DatasetKind::ClickhouseServer, address), BlueprintZoneType::CockroachDb( blueprint_zone_type::CockroachDb { dataset, address }, @@ -301,6 +341,7 @@ pub mod blueprint_zone_type { Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, )] pub struct ClickhouseKeeper { + pub keeper_id: clickhouse_admin_types::KeeperId, pub address: SocketAddrV6, pub dataset: OmicronZoneDataset, } @@ -310,6 +351,7 @@ pub mod blueprint_zone_type { Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, )] pub struct ClickhouseServer { + pub server_id: clickhouse_admin_types::ServerId, pub address: SocketAddrV6, pub dataset: OmicronZoneDataset, } diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 0ec2f6fbdb..d8cfe1300e 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -13,6 +13,7 @@ use crate::external_api::params::PhysicalDiskKind; use crate::external_api::params::UninitializedSledId; use chrono::DateTime; use chrono::Utc; +use clickhouse_admin_types::KeeperId; pub use gateway_client::types::PowerState; pub use gateway_client::types::RotImageError; pub use gateway_client::types::RotSlot; @@ -28,6 +29,7 @@ pub use omicron_common::api::internal::shared::NetworkInterfaceKind; pub use omicron_common::api::internal::shared::SourceNatConfig; pub use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use serde::{Deserialize, Serialize}; @@ -115,6 +117,12 @@ pub struct Collection { /// Omicron zones found, by *sled* id pub omicron_zones: BTreeMap, + + /// The raft configuration (cluster membership) of the clickhouse keeper + /// cluster as returned from each available keeper via `clickhouse-admin` in + /// the `ClickhouseKeeper` zone + pub clickhouse_keeper_cluster_membership: + BTreeMap, } impl Collection { @@ -152,6 +160,27 @@ impl Collection { .filter(|(_, inventory)| inventory.sled_role == SledRole::Scrimlet) .map(|(sled_id, _)| *sled_id) } + + /// Return the latest clickhouse keeper configuration in this last collection, if there is one. + pub fn latest_clickhouse_keeper_membership( + &self, + ) -> Option<(OmicronZoneUuid, ClickhouseKeeperClusterMembership)> { + let mut latest = None; + for (zone_id, membership) in &self.clickhouse_keeper_cluster_membership + { + match &latest { + None => latest = Some((*zone_id, membership.clone())), + Some((_, latest_membership)) => { + if membership.leader_committed_log_index + > latest_membership.leader_committed_log_index + { + latest = Some((*zone_id, membership.clone())); + } + } + } + } + latest + } } /// A unique baseboard id found during a collection @@ -424,3 +453,17 @@ pub struct OmicronZonesFound { pub sled_id: SledUuid, pub zones: OmicronZonesConfig, } + +/// The configuration of the clickhouse keeper raft cluster returned from a +/// single keeper node +/// +/// Each keeper is asked for its known raft configuration via `clickhouse-admin` +/// dropshot servers running in `ClickhouseKeeper` zones. state. We include the +/// leader committed log index known to the current keeper node (whether or not +/// it is the leader) to determine which configuration is newest. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct ClickhouseKeeperClusterMembership { + pub queried_keeper: KeeperId, + pub leader_committed_log_index: u64, + pub raft_config: BTreeSet, +} diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index e851d2ed6b..d837cc7552 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3549,6 +3549,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone ( -- Eventually, that nullability should be removed. filesystem_pool UUID, + -- Clickhouse related node identifiers for a clickhouse cluster deployment + -- These only apply to `ClickhouseKeeper` or `ClickhouseServer` zones. + clickhouse_keeper_id INT8, + clickhouse_server_id INT8 + PRIMARY KEY (blueprint_id, id) ); @@ -3566,6 +3571,26 @@ CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone_nic ( PRIMARY KEY (blueprint_id, id) ); +--- Blueprint information related to clickhouse cluster management +CREATE TABLE IF NOT EXISTS omicron.public.bp_clickhouse_cluster_config ( + -- Foreign key into the `blueprint` table + blueprint_id UUID NOT NULL, + -- Generation number to track changes to the cluster state. + -- Used as optimizitic concurrency control. + generation INT8 NOT NULL, + + -- Clickhouse server and keeper ids can never be reused. We hand them out + -- monotonically and keep track of the last one used here. + max_used_server_id INT8 NOT NULL, + max_used_keeper_id INT8 NOT NULL + + -- Each clickhouse cluster has a unique name and secret value. These are set + -- once and shared among all nodes for the lifetime of the fleet. + cluster_name TEXT NOT NULL + cluster_secret TEXT NOT NULL +) + + -- Mapping of Omicron zone ID to CockroachDB node ID. This isn't directly used -- by the blueprint tables above, but is used by the more general Reconfigurator -- system along with them (e.g., to decommission expunged CRDB nodes). diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 2aefd8f464..ba6ac8cf41 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -22,6 +22,7 @@ cancel-safe-futures.workspace = true cfg-if.workspace = true chrono.workspace = true clap.workspace = true +clickhouse-admin-types.workspace = true # Only used by the simulated sled agent. crucible-agent-client.workspace = true derive_more.workspace = true diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 7bf3a7a875..9f3c1e3478 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -739,13 +739,13 @@ impl Plan { // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // Temporary linter rule until replicated Clickhouse is enabled #[allow(clippy::reversed_empty_ranges)] - for _ in 0..CLICKHOUSE_SERVER_COUNT { + for id in 0..CLICKHOUSE_SERVER_COUNT as u64 { let sled = { let which_sled = sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; &mut sled_info[which_sled] }; - let id = OmicronZoneUuid::new_v4(); + let zone_id = OmicronZoneUuid::new_v4(); let ip = sled.addr_alloc.next().expect("Not enough addrs"); // TODO: This may need to be a different port if/when to have single node // and replicated running side by side as per stage 1 of RFD 468. @@ -753,7 +753,7 @@ impl Plan { let address = SocketAddrV6::new(ip, port, 0, 0); dns_builder .host_zone_with_one_backend( - id, + zone_id, ip, ServiceName::ClickhouseServer, port, @@ -764,10 +764,11 @@ impl Plan { let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, - id, + id: zone_id, underlay_address: ip, zone_type: BlueprintZoneType::ClickhouseServer( blueprint_zone_type::ClickhouseServer { + server_id: id.into(), address, dataset: OmicronZoneDataset { pool_name: dataset_name.pool().clone(), @@ -782,19 +783,19 @@ impl Plan { // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // Temporary linter rule until replicated Clickhouse is enabled #[allow(clippy::reversed_empty_ranges)] - for _ in 0..CLICKHOUSE_KEEPER_COUNT { + for id in 0..CLICKHOUSE_KEEPER_COUNT as u64 { let sled = { let which_sled = sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; &mut sled_info[which_sled] }; - let id = OmicronZoneUuid::new_v4(); + let zone_id = OmicronZoneUuid::new_v4(); let ip = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CLICKHOUSE_KEEPER_TCP_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); dns_builder .host_zone_with_one_backend( - id, + zone_id, ip, ServiceName::ClickhouseKeeper, port, @@ -805,10 +806,11 @@ impl Plan { let filesystem_pool = Some(dataset_name.pool().clone()); sled.request.zones.push(BlueprintZoneConfig { disposition: BlueprintZoneDisposition::InService, - id, + id: zone_id, underlay_address: ip, zone_type: BlueprintZoneType::ClickhouseKeeper( blueprint_zone_type::ClickhouseKeeper { + keeper_id: id.into(), address, dataset: OmicronZoneDataset { pool_name: dataset_name.pool().clone(), diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 3f73e55d0f..07b93654b8 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -91,7 +91,7 @@ use nexus_sled_agent_shared::inventory::{ }; use nexus_types::deployment::{ blueprint_zone_type, Blueprint, BlueprintZoneType, BlueprintZonesConfig, - CockroachDbPreserveDowngrade, + ClickhouseClusterConfig, CockroachDbPreserveDowngrade, }; use nexus_types::external_api::views::SledState; use omicron_common::address::get_sled_address; @@ -692,6 +692,7 @@ impl ServiceInner { .map_err(SetupServiceError::ConvertPlanToBlueprint)?; // ... and use that to derive the initial blueprint from our plan. let blueprint = build_initial_blueprint_from_plan( + &sled_plan.rack_id, &sled_configs_by_id, service_plan, ) @@ -1388,6 +1389,7 @@ fn build_sled_configs_by_id( // Build an initial blueprint fn build_initial_blueprint_from_plan( + rack_id: &Uuid, sled_configs_by_id: &BTreeMap, service_plan: &ServicePlan, ) -> anyhow::Result { @@ -1396,6 +1398,7 @@ fn build_initial_blueprint_from_plan( .context("invalid internal dns version")?; let blueprint = build_initial_blueprint_from_sled_configs( + rack_id, sled_configs_by_id, internal_dns_version, ); @@ -1404,6 +1407,7 @@ fn build_initial_blueprint_from_plan( } pub(crate) fn build_initial_blueprint_from_sled_configs( + rack_id: &Uuid, sled_configs_by_id: &BTreeMap, internal_dns_version: Generation, ) -> Blueprint { @@ -1449,6 +1453,9 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( cockroachdb_fingerprint: String::new(), cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade::DoNotModify, + clickhouse_cluster_config: ClickhouseClusterConfig::new( + rack_id.to_string(), + ), time_created: Utc::now(), creator: "RSS".to_string(), comment: "initial blueprint from rack setup".to_string(), diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index b546025654..4c63f0cfe7 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -255,12 +255,12 @@ async fn handoff_to_nexus( log: &Logger, config: &Config, request: &NexusTypes::RackInitializationRequest, + rack_id: &Uuid, ) -> Result<(), anyhow::Error> { let nexus_client = NexusClient::new( &format!("http://{}", config.nexus_address), log.new(o!("component" => "NexusClient")), ); - let rack_id = uuid::uuid!("c19a698f-c6f9-4a17-ae30-20d711b8f7dc"); let notify_nexus = || async { nexus_client @@ -548,8 +548,10 @@ pub async fn run_standalone_server( SledConfig { disks, zones }, ); + let rack_id = uuid::uuid!("c19a698f-c6f9-4a17-ae30-20d711b8f7dc"); let rack_init_request = NexusTypes::RackInitializationRequest { blueprint: build_initial_blueprint_from_sled_configs( + &rack_id, &sled_configs, internal_dns_version, ), @@ -576,7 +578,7 @@ pub async fn run_standalone_server( allowed_source_ips: NexusTypes::AllowedSourceIps::Any, }; - handoff_to_nexus(&log, &config, &rack_init_request).await?; + handoff_to_nexus(&log, &config, &rack_init_request, &rack_id).await?; info!(log, "Handoff to Nexus is complete"); server.wait_for_finish().await