diff --git a/Cargo.lock b/Cargo.lock index 3060a8fae7..88e9afd8c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5224,6 +5224,36 @@ dependencies = [ "thiserror", ] +[[package]] +name = "omicron-cockroach-admin" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "chrono", + "clap", + "csv", + "dropshot", + "http 0.2.12", + "illumos-utils", + "nexus-test-utils", + "omicron-common", + "omicron-rpaths", + "omicron-test-utils", + "omicron-workspace-hack", + "pq-sys", + "schemars", + "serde", + "slog", + "slog-async", + "slog-dtrace", + "slog-error-chain", + "thiserror", + "tokio", + "toml 0.8.13", + "url", +] + [[package]] name = "omicron-common" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index a350f59f0a..e6b0ffb099 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ members = [ "clients/oximeter-client", "clients/sled-agent-client", "clients/wicketd-client", + "cockroach-admin", "common", "dev-tools/crdb-seed", "dev-tools/omdb", @@ -96,6 +97,7 @@ default-members = [ "clients/oximeter-client", "clients/sled-agent-client", "clients/wicketd-client", + "cockroach-admin", "common", "dev-tools/crdb-seed", "dev-tools/omdb", @@ -338,6 +340,7 @@ nexus-test-utils = { path = "nexus/test-utils" } nexus-types = { path = "nexus/types" } num-integer = "0.1.46" num = { version = "0.4.3", default-features = false, features = [ "libm" ] } +omicron-cockroach-admin = { path = "cockroach-admin" } omicron-common = { path = "common" } omicron-gateway = { path = "gateway" } omicron-nexus = { path = "nexus" } @@ -483,6 +486,7 @@ typed-rng = { path = "typed-rng" } unicode-width = "0.1.11" update-common = { path = "update-common" } update-engine = { path = "update-engine" } +url = "2.5.0" usdt = "0.5.0" uuid = { version = "1.8.0", features = ["serde", "v4"] } uzers = "0.11" diff --git a/cockroach-admin/Cargo.toml b/cockroach-admin/Cargo.toml new file mode 100644 index 0000000000..e0c02493c2 --- /dev/null +++ b/cockroach-admin/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "omicron-cockroach-admin" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[build-dependencies] +omicron-rpaths.workspace = true + +[dependencies] +anyhow.workspace = true +camino.workspace = true +chrono.workspace = true +clap.workspace = true +csv.workspace = true +dropshot.workspace = true +http.workspace = true +illumos-utils.workspace = true +omicron-common.workspace = true +# See omicron-rpaths for more about the "pq-sys" dependency. +pq-sys = "*" +schemars.workspace = true +slog.workspace = true +slog-async.workspace = true +slog-dtrace.workspace = true +slog-error-chain.workspace = true +serde.workspace = true +thiserror.workspace = true +tokio.workspace = true +toml.workspace = true + +omicron-workspace-hack.workspace = true + +[dev-dependencies] +nexus-test-utils.workspace = true +omicron-test-utils.workspace = true +url.workspace = true + +[lints] +workspace = true diff --git a/cockroach-admin/build.rs b/cockroach-admin/build.rs new file mode 100644 index 0000000000..1ba9acd41c --- /dev/null +++ b/cockroach-admin/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/cockroach-admin/src/bin/cockroach-admin.rs b/cockroach-admin/src/bin/cockroach-admin.rs new file mode 100644 index 0000000000..eb28082faa --- /dev/null +++ b/cockroach-admin/src/bin/cockroach-admin.rs @@ -0,0 +1,79 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Executable program to run the Omicron CockroachDb admin interface (not to be +//! confused with CockroachDb's built-in HTTP API) + +use anyhow::anyhow; +use camino::Utf8PathBuf; +use clap::Parser; +use omicron_cockroach_admin::CockroachCli; +use omicron_cockroach_admin::Config; +use omicron_common::cmd::fatal; +use omicron_common::cmd::CmdError; +use std::net::SocketAddr; +use std::net::SocketAddrV6; + +#[derive(Debug, Parser)] +#[clap(name = "cockroach-admin", about = "Omicron CRDB cluster admin server")] +enum Args { + /// Print the OpenAPI Spec document and exit + Openapi, + + /// Start the CRDB admin server + Run { + /// Path to the `cockroach` CLI + #[clap(long, action)] + path_to_cockroach_binary: Utf8PathBuf, + + /// Socket address for a running cockroach server instance + #[clap(long, action)] + cockroach_address: SocketAddrV6, + + /// Address on which this server should run + #[clap(long, action)] + http_address: SocketAddrV6, + + /// Path to the server config file + #[clap(long, action)] + config_file_path: Utf8PathBuf, + }, +} + +#[tokio::main] +async fn main() { + if let Err(err) = main_impl().await { + fatal(err); + } +} + +async fn main_impl() -> Result<(), CmdError> { + let args = Args::parse(); + + match args { + Args::Openapi => omicron_cockroach_admin::run_openapi() + .map_err(|e| CmdError::Failure(anyhow!(e))), + Args::Run { + path_to_cockroach_binary, + cockroach_address, + http_address, + config_file_path, + } => { + let cockroach_cli = + CockroachCli::new(path_to_cockroach_binary, cockroach_address); + let mut config = Config::from_file(&config_file_path) + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + config.dropshot.bind_address = SocketAddr::V6(http_address); + let server = + omicron_cockroach_admin::start_server(cockroach_cli, config) + .await + .map_err(|err| CmdError::Failure(anyhow!(err)))?; + server.await.map_err(|err| { + CmdError::Failure(anyhow!( + "server failed after starting: {err}" + )) + }) + } + } +} diff --git a/cockroach-admin/src/cockroach_cli.rs b/cockroach-admin/src/cockroach_cli.rs new file mode 100644 index 0000000000..5b3958546f --- /dev/null +++ b/cockroach-admin/src/cockroach_cli.rs @@ -0,0 +1,434 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use camino::Utf8PathBuf; +use chrono::DateTime; +use chrono::NaiveDateTime; +use chrono::Utc; +use dropshot::HttpError; +use illumos_utils::output_to_exec_error; +use illumos_utils::ExecutionError; +use schemars::JsonSchema; +use serde::de; +use serde::Deserialize; +use serde::Serialize; +use slog_error_chain::InlineErrorChain; +use slog_error_chain::SlogInlineError; +use std::io; +use std::net::SocketAddr; +use std::net::SocketAddrV6; +use tokio::process::Command; + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum CockroachCliError { + #[error("failed to invoke `cockroach {subcommand}`")] + InvokeCli { + subcommand: &'static str, + #[source] + err: io::Error, + }, + #[error(transparent)] + ExecutionError(#[from] ExecutionError), + #[error( + "failed to parse `cockroach {subcommand}` output \ + (stdout: {stdout}, stderr: {stderr})" + )] + ParseOutput { + subcommand: &'static str, + stdout: String, + stderr: String, + #[source] + err: csv::Error, + }, +} + +impl From for HttpError { + fn from(err: CockroachCliError) -> Self { + match err { + CockroachCliError::InvokeCli { .. } + | CockroachCliError::ExecutionError(_) + | CockroachCliError::ParseOutput { .. } => { + let message = InlineErrorChain::new(&err).to_string(); + HttpError { + status_code: http::StatusCode::INTERNAL_SERVER_ERROR, + error_code: Some(String::from("Internal")), + external_message: message.clone(), + internal_message: message, + } + } + } + } +} + +#[derive(Debug)] +pub struct CockroachCli { + path_to_cockroach_binary: Utf8PathBuf, + cockroach_address: SocketAddrV6, +} + +impl CockroachCli { + pub fn new( + path_to_cockroach_binary: Utf8PathBuf, + cockroach_address: SocketAddrV6, + ) -> Self { + Self { path_to_cockroach_binary, cockroach_address } + } + + pub async fn node_status( + &self, + ) -> Result, CockroachCliError> { + let mut command = Command::new(&self.path_to_cockroach_binary); + command + .arg("node") + .arg("status") + .arg("--host") + .arg(&format!("{}", self.cockroach_address)) + .arg("--insecure") + .arg("--format") + .arg("csv"); + let output = command.output().await.map_err(|err| { + CockroachCliError::InvokeCli { subcommand: "node status", err } + })?; + if !output.status.success() { + return Err(output_to_exec_error(command.as_std(), &output).into()); + } + NodeStatus::parse_from_csv(io::Cursor::new(&output.stdout)).map_err( + |err| CockroachCliError::ParseOutput { + subcommand: "node status", + stdout: String::from_utf8_lossy(&output.stdout).to_string(), + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + err, + }, + ) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct NodeStatus { + pub node_id: String, + pub address: SocketAddr, + pub sql_address: SocketAddr, + pub build: String, + pub started_at: DateTime, + pub updated_at: DateTime, + pub locality: String, + pub is_available: bool, + pub is_live: bool, +} + +// Slightly different `NodeStatus` that matches what we get from `cockroach`: +// +// * `id` column instead of `node_id` +// * timestamps are a fixed format with no timezone, so we have a custom +// deserializer +#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] +struct CliNodeStatus { + id: String, + address: SocketAddr, + sql_address: SocketAddr, + build: String, + #[serde(deserialize_with = "parse_cockroach_cli_timestamp")] + started_at: DateTime, + #[serde(deserialize_with = "parse_cockroach_cli_timestamp")] + updated_at: DateTime, + locality: String, + is_available: bool, + is_live: bool, +} + +impl From for NodeStatus { + fn from(cli: CliNodeStatus) -> Self { + Self { + node_id: cli.id, + address: cli.address, + sql_address: cli.sql_address, + build: cli.build, + started_at: cli.started_at, + updated_at: cli.updated_at, + locality: cli.locality, + is_available: cli.is_available, + is_live: cli.is_live, + } + } +} + +fn parse_cockroach_cli_timestamp<'de, D>( + d: D, +) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + struct CockroachTimestampVisitor; + impl<'de> de::Visitor<'de> for CockroachTimestampVisitor { + type Value = DateTime; + + fn expecting( + &self, + formatter: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + formatter.write_str("a Cockroach CLI timestamp") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + let dt = NaiveDateTime::parse_from_str(v, "%Y-%m-%d %H:%M:%S%.f") + .map_err(E::custom)?; + Ok(DateTime::from_naive_utc_and_offset(dt, Utc)) + } + } + + d.deserialize_str(CockroachTimestampVisitor) +} + +impl NodeStatus { + pub fn parse_from_csv(reader: R) -> Result, csv::Error> + where + R: io::Read, + { + let mut statuses = Vec::new(); + let mut reader = csv::Reader::from_reader(reader); + for result in reader.deserialize() { + let record: CliNodeStatus = result?; + statuses.push(record.into()); + } + Ok(statuses) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::NaiveDate; + use nexus_test_utils::db::test_setup_database; + use omicron_test_utils::dev; + use url::Url; + + #[test] + fn test_node_status_parse_single_line_from_csv() { + let input = r#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live +1,[::1]:42021,[::1]:42021,v22.1.9,2024-05-21 15:19:50.523796,2024-05-21 16:31:28.050069,,true,true"#; + let expected = NodeStatus { + node_id: "1".to_string(), + address: "[::1]:42021".parse().unwrap(), + sql_address: "[::1]:42021".parse().unwrap(), + build: "v22.1.9".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 19, 50, 523796) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(16, 31, 28, 50069) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }; + + let statuses = NodeStatus::parse_from_csv(io::Cursor::new(input)) + .expect("parsed input"); + assert_eq!(statuses, vec![expected]); + } + + #[test] + fn test_node_status_parse_multiple_lines_from_csv() { + let input = r#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live +1,[fd00:1122:3344:109::3]:32221,[fd00:1122:3344:109::3]:32221,v22.1.9-dirty,2024-05-18 19:18:00.597145,2024-05-21 15:22:34.290434,,true,true +2,[fd00:1122:3344:105::3]:32221,[fd00:1122:3344:105::3]:32221,v22.1.9-dirty,2024-05-18 19:17:01.796714,2024-05-21 15:22:34.901268,,true,true +3,[fd00:1122:3344:10b::3]:32221,[fd00:1122:3344:10b::3]:32221,v22.1.9-dirty,2024-05-18 19:18:52.37564,2024-05-21 15:22:36.341146,,true,true +4,[fd00:1122:3344:107::3]:32221,[fd00:1122:3344:107::3]:32221,v22.1.9-dirty,2024-05-18 19:16:22.788276,2024-05-21 15:22:34.897047,,true,true +5,[fd00:1122:3344:108::3]:32221,[fd00:1122:3344:108::3]:32221,v22.1.9-dirty,2024-05-18 19:18:09.196634,2024-05-21 15:22:35.168738,,true,true"#; + let expected = vec![ + NodeStatus { + node_id: "1".to_string(), + address: "[fd00:1122:3344:109::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:109::3]:32221".parse().unwrap(), + build: "v22.1.9-dirty".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 18) + .unwrap() + .and_hms_micro_opt(19, 18, 0, 597145) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 22, 34, 290434) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }, + NodeStatus { + node_id: "2".to_string(), + address: "[fd00:1122:3344:105::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:105::3]:32221".parse().unwrap(), + build: "v22.1.9-dirty".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 18) + .unwrap() + .and_hms_micro_opt(19, 17, 1, 796714) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 22, 34, 901268) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }, + NodeStatus { + node_id: "3".to_string(), + address: "[fd00:1122:3344:10b::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:10b::3]:32221".parse().unwrap(), + build: "v22.1.9-dirty".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 18) + .unwrap() + .and_hms_micro_opt(19, 18, 52, 375640) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 22, 36, 341146) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }, + NodeStatus { + node_id: "4".to_string(), + address: "[fd00:1122:3344:107::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:107::3]:32221".parse().unwrap(), + build: "v22.1.9-dirty".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 18) + .unwrap() + .and_hms_micro_opt(19, 16, 22, 788276) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 22, 34, 897047) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }, + NodeStatus { + node_id: "5".to_string(), + address: "[fd00:1122:3344:108::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:108::3]:32221".parse().unwrap(), + build: "v22.1.9-dirty".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 18) + .unwrap() + .and_hms_micro_opt(19, 18, 9, 196634) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2024, 5, 21) + .unwrap() + .and_hms_micro_opt(15, 22, 35, 168738) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + }, + ]; + + let statuses = NodeStatus::parse_from_csv(io::Cursor::new(input)) + .expect("parsed input"); + assert_eq!(statuses.len(), expected.len()); + for (status, expected) in statuses.iter().zip(&expected) { + assert_eq!(status, expected); + } + } + + // Ensure that if `cockroach node status` changes in a future CRDB version + // bump, we have a test that will fail to force us to check whether our + // current parsing is still valid. + #[tokio::test] + async fn test_node_status_compatibility() { + let logctx = dev::test_setup_log("test_node_status_compatibility"); + let mut db = test_setup_database(&logctx.log).await; + let db_url = db.listen_url().to_string(); + + let expected_headers = "id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live"; + + // Manually run cockroach node status to grab just the CSV header line + // (which the `csv` crate normally eats on our behalf) and check it's + // exactly what we expect. + let mut command = Command::new("cockroach"); + command + .arg("node") + .arg("status") + .arg("--url") + .arg(&db_url) + .arg("--format") + .arg("csv"); + let output = + command.output().await.expect("ran `cockroach node status`"); + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut lines = stdout.lines(); + let headers = lines.next().expect("header line"); + assert_eq!( + headers, expected_headers, + "`cockroach node status --format csv` headers may have changed?" + ); + + // We should also be able to run our wrapper against this cockroach. + let url: Url = db_url.parse().expect("valid url"); + let cockroach_address: SocketAddrV6 = format!( + "{}:{}", + url.host().expect("url has host"), + url.port().expect("url has port") + ) + .parse() + .expect("valid SocketAddrV6"); + let cli = CockroachCli::new("cockroach".into(), cockroach_address); + let status = cli.node_status().await.expect("got node status"); + + // We can't check all the fields exactly, but some we know based on the + // fact that our test database is a single node. + assert_eq!(status.len(), 1); + assert_eq!(status[0].node_id, "1"); + assert_eq!(status[0].address, SocketAddr::V6(cockroach_address)); + assert_eq!(status[0].sql_address, SocketAddr::V6(cockroach_address)); + assert_eq!(status[0].is_available, true); + assert_eq!(status[0].is_live, true); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/cockroach-admin/src/config.rs b/cockroach-admin/src/config.rs new file mode 100644 index 0000000000..77a624835c --- /dev/null +++ b/cockroach-admin/src/config.rs @@ -0,0 +1,43 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use camino::Utf8Path; +use camino::Utf8PathBuf; +use dropshot::ConfigDropshot; +use dropshot::ConfigLogging; +use serde::Deserialize; +use serde::Serialize; +use slog_error_chain::SlogInlineError; +use std::io; + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct Config { + pub dropshot: ConfigDropshot, + pub log: ConfigLogging, +} +impl Config { + /// Load a `Config` from the given TOML file + pub fn from_file(path: &Utf8Path) -> Result { + let contents = std::fs::read_to_string(path) + .map_err(|err| LoadError::Read { path: path.to_owned(), err })?; + toml::de::from_str(&contents) + .map_err(|err| LoadError::Parse { path: path.to_owned(), err }) + } +} + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum LoadError { + #[error("failed to read {path}")] + Read { + path: Utf8PathBuf, + #[source] + err: io::Error, + }, + #[error("failed to parse {path} as TOML")] + Parse { + path: Utf8PathBuf, + #[source] + err: toml::de::Error, + }, +} diff --git a/cockroach-admin/src/context.rs b/cockroach-admin/src/context.rs new file mode 100644 index 0000000000..b3f39f463a --- /dev/null +++ b/cockroach-admin/src/context.rs @@ -0,0 +1,9 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::CockroachCli; + +pub struct ServerContext { + pub cockroach_cli: CockroachCli, +} diff --git a/cockroach-admin/src/http_entrypoints.rs b/cockroach-admin/src/http_entrypoints.rs new file mode 100644 index 0000000000..24d36c9823 --- /dev/null +++ b/cockroach-admin/src/http_entrypoints.rs @@ -0,0 +1,49 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::cockroach_cli::NodeStatus; +use crate::context::ServerContext; +use dropshot::endpoint; +use dropshot::HttpError; +use dropshot::HttpResponseOk; +use dropshot::RequestContext; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use std::sync::Arc; + +type CrdbApiDescription = dropshot::ApiDescription>; + +pub fn api() -> CrdbApiDescription { + fn register_endpoints(api: &mut CrdbApiDescription) -> Result<(), String> { + api.register(node_status)?; + Ok(()) + } + + let mut api = CrdbApiDescription::new(); + if let Err(err) = register_endpoints(&mut api) { + panic!("failed to register entrypoints: {}", err); + } + api +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct ClusterNodeStatus { + pub all_nodes: Vec, +} + +/// Get the status of all nodes in the CRDB cluster +#[endpoint { + method = GET, + path = "/node/status", +}] +async fn node_status( + rqctx: RequestContext>, +) -> Result, HttpError> { + let ctx = rqctx.context(); + let all_nodes = + ctx.cockroach_cli.node_status().await.map_err(HttpError::from)?; + Ok(HttpResponseOk(ClusterNodeStatus { all_nodes })) +} diff --git a/cockroach-admin/src/lib.rs b/cockroach-admin/src/lib.rs new file mode 100644 index 0000000000..d6c53c8dc6 --- /dev/null +++ b/cockroach-admin/src/lib.rs @@ -0,0 +1,85 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use context::ServerContext; +use omicron_common::FileKv; +use slog::debug; +use slog::error; +use slog::Drain; +use slog_dtrace::ProbeRegistration; +use slog_error_chain::SlogInlineError; +use std::error::Error; +use std::io; +use std::sync::Arc; + +mod cockroach_cli; +mod config; +mod context; +mod http_entrypoints; + +pub use cockroach_cli::CockroachCli; +pub use cockroach_cli::CockroachCliError; +pub use config::Config; + +/// Run the OpenAPI generator for the API; this emits the OpenAPI spec to +/// stdout. +pub fn run_openapi() -> Result<(), String> { + http_entrypoints::api() + .openapi("Oxide CockroachDb Cluster Admin API", "0.0.1") + .description( + "API for interacting with the Oxide \ + control plane's CockroachDb cluster", + ) + .contact_url("https://oxide.computer") + .contact_email("api@oxide.computer") + .write(&mut std::io::stdout()) + .map_err(|e| e.to_string()) +} + +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum StartError { + #[error("failed to initialize logger")] + InitializeLogger(#[source] io::Error), + #[error("failed to register dtrace probes: {0}")] + RegisterDtraceProbes(String), + #[error("failed to initialize HTTP server")] + InitializeHttpServer(#[source] Box), +} + +pub type Server = dropshot::HttpServer>; + +/// Start the dropshot server +pub async fn start_server( + cockroach_cli: CockroachCli, + server_config: Config, +) -> Result { + let (drain, registration) = slog_dtrace::with_drain( + server_config + .log + .to_logger("cockroach-admin") + .map_err(StartError::InitializeLogger)?, + ); + let log = slog::Logger::root(drain.fuse(), slog::o!(FileKv)); + match registration { + ProbeRegistration::Success => { + debug!(log, "registered DTrace probes"); + } + ProbeRegistration::Failed(err) => { + let err = StartError::RegisterDtraceProbes(err); + error!(log, "failed to register DTrace probes"; &err); + return Err(err); + } + } + + let context = ServerContext { cockroach_cli }; + let http_server_starter = dropshot::HttpServerStarter::new( + &server_config.dropshot, + http_entrypoints::api(), + Arc::new(context), + &log.new(slog::o!("component" => "dropshot")), + ) + .map_err(StartError::InitializeHttpServer)?; + + Ok(http_server_starter.start()) +} diff --git a/common/src/address.rs b/common/src/address.rs index eddfb996c4..b246f8f392 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -46,6 +46,7 @@ pub const DNS_HTTP_PORT: u16 = 5353; pub const SLED_AGENT_PORT: u16 = 12345; pub const COCKROACH_PORT: u16 = 32221; +pub const COCKROACH_ADMIN_PORT: u16 = 32222; pub const CRUCIBLE_PORT: u16 = 32345; pub const CLICKHOUSE_PORT: u16 = 8123; pub const CLICKHOUSE_KEEPER_PORT: u16 = 9181; diff --git a/package-manifest.toml b/package-manifest.toml index 7f80dacf7c..bffd5be7dc 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -204,6 +204,7 @@ only_for_targets.image = "standard" source.type = "composite" source.packages = [ "cockroachdb-service.tar.gz", + "omicron-cockroach-admin.tar.gz", "internal-dns-cli.tar.gz", "zone-setup.tar.gz", "zone-network-install.tar.gz" @@ -224,6 +225,20 @@ output.type = "zone" output.intermediate_only = true setup_hint = "Run `./tools/ci_download_cockroachdb` to download the necessary binaries" +[package.omicron-cockroach-admin] +service_name = "cockroach-admin" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["cockroach-admin"] +source.rust.release = true +source.paths = [ + { from = "smf/cockroach-admin/manifest.xml", to = "/var/svc/manifest/site/cockroach-admin/manifest.xml" }, + { from = "smf/cockroach-admin/config.toml", to = "/opt/oxide/lib/svc/cockroach-admin/config.toml" }, + { from = "smf/cockroach-admin/method_script.sh", to = "/opt/oxide/lib/svc/manifest/cockroach-admin.sh" }, +] +output.type = "zone" +output.intermediate_only = true + [package.internal-dns-cli] service_name = "internal-dns-cli" only_for_targets.image = "standard" diff --git a/sled-agent/src/profile.rs b/sled-agent/src/profile.rs index 1addbca4c9..33e30d1d7b 100644 --- a/sled-agent/src/profile.rs +++ b/sled-agent/src/profile.rs @@ -183,7 +183,12 @@ impl PropertyGroupBuilder { } } - pub fn add_property(mut self, name: &str, ty: &str, value: &str) -> Self { + pub fn add_property>( + mut self, + name: &str, + ty: &str, + value: S, + ) -> Self { // The data structures here are oriented around a few goals: // // - Properties will be written out in the order that they were added. @@ -209,7 +214,7 @@ impl PropertyGroupBuilder { .property_values .entry(name.to_string()) .or_insert_with(Vec::new); - values.push(value.to_string()); + values.push(value.into()); self } } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index ff10d4aed7..7df9f06d53 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -61,7 +61,6 @@ use illumos_utils::{execute, PFEXEC}; use internal_dns::resolver::Resolver; use itertools::Itertools; use nexus_config::{ConfigDropshotWithTls, DeploymentConfig}; -use omicron_common::address::BOOTSTRAP_ARTIFACT_PORT; use omicron_common::address::CLICKHOUSE_KEEPER_PORT; use omicron_common::address::CLICKHOUSE_PORT; use omicron_common::address::COCKROACH_PORT; @@ -78,6 +77,7 @@ use omicron_common::address::WICKETD_NEXUS_PROXY_PORT; use omicron_common::address::WICKETD_PORT; use omicron_common::address::{Ipv6Subnet, NEXUS_TECHPORT_EXTERNAL_PORT}; use omicron_common::address::{AZ_PREFIX, OXIMETER_PORT}; +use omicron_common::address::{BOOTSTRAP_ARTIFACT_PORT, COCKROACH_ADMIN_PORT}; use omicron_common::api::external::Generation; use omicron_common::api::internal::shared::{ HostPortConfig, RackNetworkConfig, @@ -1406,7 +1406,7 @@ impl ServiceManager { match domain { Some(d) => { dns_config_builder = - dns_config_builder.add_property("domain", "astring", &d) + dns_config_builder.add_property("domain", "astring", d) } None => (), } @@ -1423,10 +1423,11 @@ impl ServiceManager { fn zone_network_setup_install( gw_addr: &Ipv6Addr, zone: &InstalledZone, - static_addr: &String, + static_addr: &Ipv6Addr, ) -> Result { let datalink = zone.get_control_vnic_name(); let gateway = &gw_addr.to_string(); + let static_addr = &static_addr.to_string(); let mut config_builder = PropertyGroupBuilder::new("config"); config_builder = config_builder @@ -1593,7 +1594,7 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let listen_addr = &underlay_address.to_string(); + let listen_addr = underlay_address; let listen_port = &CLICKHOUSE_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( @@ -1605,7 +1606,11 @@ impl ServiceManager { let dns_service = Self::dns_install(info, None, &None).await?; let config = PropertyGroupBuilder::new("config") - .add_property("listen_addr", "astring", listen_addr) + .add_property( + "listen_addr", + "astring", + listen_addr.to_string(), + ) .add_property("listen_port", "astring", listen_port) .add_property("store", "astring", "/data"); let clickhouse_service = @@ -1642,7 +1647,7 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let listen_addr = &underlay_address.to_string(); + let listen_addr = underlay_address; let listen_port = &CLICKHOUSE_KEEPER_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( @@ -1654,7 +1659,11 @@ impl ServiceManager { let dns_service = Self::dns_install(info, None, &None).await?; let config = PropertyGroupBuilder::new("config") - .add_property("listen_addr", "astring", listen_addr) + .add_property( + "listen_addr", + "astring", + listen_addr.to_string(), + ) .add_property("listen_port", "astring", listen_port) .add_property("store", "astring", "/data"); let clickhouse_keeper_service = @@ -1694,25 +1703,27 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let address = SocketAddr::new( - IpAddr::V6(*underlay_address), - COCKROACH_PORT, - ); - let listen_addr = &address.ip().to_string(); - let listen_port = &address.port().to_string(); + let crdb_listen_ip = *underlay_address; + let crdb_address = + SocketAddr::new(IpAddr::V6(crdb_listen_ip), COCKROACH_PORT) + .to_string(); + let admin_address = SocketAddr::new( + IpAddr::V6(crdb_listen_ip), + COCKROACH_ADMIN_PORT, + ) + .to_string(); let nw_setup_service = Self::zone_network_setup_install( &info.underlay_address, &installed_zone, - listen_addr, + &crdb_listen_ip, )?; let dns_service = Self::dns_install(info, None, &None).await?; // Configure the CockroachDB service. let cockroachdb_config = PropertyGroupBuilder::new("config") - .add_property("listen_addr", "astring", listen_addr) - .add_property("listen_port", "astring", listen_port) + .add_property("listen_addr", "astring", &crdb_address) .add_property("store", "astring", "/data"); let cockroachdb_service = ServiceBuilder::new("oxide/cockroachdb").add_instance( @@ -1720,10 +1731,26 @@ impl ServiceManager { .add_property_group(cockroachdb_config), ); + // Configure the Omicron cockroach-admin service. + let cockroach_admin_config = + PropertyGroupBuilder::new("config") + .add_property( + "cockroach_address", + "astring", + crdb_address, + ) + .add_property("http_address", "astring", admin_address); + let cockroach_admin_service = + ServiceBuilder::new("oxide/cockroach-admin").add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(cockroach_admin_config), + ); + let profile = ProfileBuilder::new("omicron") .add_service(nw_setup_service) .add_service(disabled_ssh_service) .add_service(cockroachdb_service) + .add_service(cockroach_admin_service) .add_service(dns_service) .add_service(enabled_dns_client_service); profile @@ -1747,7 +1774,7 @@ impl ServiceManager { let Some(info) = self.inner.sled_info.get() else { return Err(Error::SledAgentNotReady); }; - let listen_addr = &underlay_address.to_string(); + let listen_addr = &underlay_address; let listen_port = &CRUCIBLE_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( @@ -1764,7 +1791,11 @@ impl ServiceManager { let uuid = &Uuid::new_v4().to_string(); let config = PropertyGroupBuilder::new("config") .add_property("dataset", "astring", &dataset_name) - .add_property("listen_addr", "astring", listen_addr) + .add_property( + "listen_addr", + "astring", + listen_addr.to_string(), + ) .add_property("listen_port", "astring", listen_port) .add_property("uuid", "astring", uuid) .add_property("store", "astring", "/data"); @@ -1802,7 +1833,7 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let listen_addr = &underlay_address.to_string(); + let listen_addr = &underlay_address; let listen_port = &CRUCIBLE_PANTRY_PORT.to_string(); let nw_setup_service = Self::zone_network_setup_install( @@ -1812,7 +1843,11 @@ impl ServiceManager { )?; let config = PropertyGroupBuilder::new("config") - .add_property("listen_addr", "astring", listen_addr) + .add_property( + "listen_addr", + "astring", + listen_addr.to_string(), + ) .add_property("listen_port", "astring", listen_port); let profile = ProfileBuilder::new("omicron") @@ -1853,12 +1888,10 @@ impl ServiceManager { OXIMETER_PORT, ); - let listen_addr = &address.ip().to_string(); - let nw_setup_service = Self::zone_network_setup_install( &info.underlay_address, &installed_zone, - listen_addr, + underlay_address, )?; let oximeter_config = PropertyGroupBuilder::new("config") @@ -1896,12 +1929,10 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let static_addr = underlay_address.to_string(); - let nw_setup_service = Self::zone_network_setup_install( &info.underlay_address, &installed_zone, - &static_addr.clone(), + underlay_address, )?; // Like Nexus, we need to be reachable externally via @@ -1925,7 +1956,8 @@ impl ServiceManager { })?; let opte_ip = port.ip(); - let http_addr = format!("[{}]:{}", static_addr, DNS_HTTP_PORT); + let http_addr = + format!("[{}]:{}", underlay_address, DNS_HTTP_PORT); let dns_addr = format!("{}:{}", opte_ip, DNS_PORT); let external_dns_config = PropertyGroupBuilder::new("config") @@ -1985,12 +2017,10 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let static_addr = underlay_address.to_string(); - let nw_setup_service = Self::zone_network_setup_install( &info.underlay_address, &installed_zone, - &static_addr.clone(), + underlay_address, )?; let is_boundary = matches!( @@ -2083,7 +2113,7 @@ impl ServiceManager { let nw_setup_service = Self::zone_network_setup_install( gz_address, &installed_zone, - &underlay_address.to_string(), + underlay_address, )?; // Internal DNS zones require a special route through @@ -2163,12 +2193,10 @@ impl ServiceManager { return Err(Error::SledAgentNotReady); }; - let static_addr = underlay_address.to_string(); - let nw_setup_service = Self::zone_network_setup_install( &info.underlay_address, &installed_zone, - &static_addr.clone(), + underlay_address, )?; // While Nexus will be reachable via `external_ip`, it diff --git a/smf/cockroach-admin/config.toml b/smf/cockroach-admin/config.toml new file mode 100644 index 0000000000..86ee2c5d4b --- /dev/null +++ b/smf/cockroach-admin/config.toml @@ -0,0 +1,10 @@ +[dropshot] +# 1 MiB; we don't expect any requests of more than nominal size. +request_body_max_bytes = 1048576 + +[log] +# Show log messages of this level and more severe +level = "info" +mode = "file" +path = "/dev/stdout" +if_exists = "append" diff --git a/smf/cockroach-admin/manifest.xml b/smf/cockroach-admin/manifest.xml new file mode 100644 index 0000000000..1d6f7c4861 --- /dev/null +++ b/smf/cockroach-admin/manifest.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/smf/cockroach-admin/method_script.sh b/smf/cockroach-admin/method_script.sh new file mode 100755 index 0000000000..c5f924223d --- /dev/null +++ b/smf/cockroach-admin/method_script.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -x +set -o errexit +set -o pipefail + +. /lib/svc/share/smf_include.sh + +COCKROACH_ADDR="$(svcprop -c -p config/cockroach_address "${SMF_FMRI}")" +HTTP_ADDR="$(svcprop -c -p config/http_address "${SMF_FMRI}")" + +args=( + 'run' + '--config-file-path' "/opt/oxide/lib/svc/cockroach-admin/config.toml" + '--path-to-cockroach-binary' "/opt/oxide/cockroachdb/bin/cockroach" + '--cockroach-address' "$COCKROACH_ADDR" + '--http-address' "$HTTP_ADDR" +) + +exec /opt/oxide/cockroach-admin/bin/cockroach-admin "${args[@]}" & diff --git a/smf/cockroachdb/manifest.xml b/smf/cockroachdb/manifest.xml index 3a9b1a7cb8..67ddbe48b8 100644 --- a/smf/cockroachdb/manifest.xml +++ b/smf/cockroachdb/manifest.xml @@ -29,7 +29,6 @@ - diff --git a/smf/cockroachdb/method_script.sh b/smf/cockroachdb/method_script.sh index e8b02eb1eb..1d33ef94a6 100755 --- a/smf/cockroachdb/method_script.sh +++ b/smf/cockroachdb/method_script.sh @@ -7,7 +7,6 @@ set -o pipefail . /lib/svc/share/smf_include.sh LISTEN_ADDR="$(svcprop -c -p config/listen_addr "${SMF_FMRI}")" -LISTEN_PORT="$(svcprop -c -p config/listen_port "${SMF_FMRI}")" DATASTORE="$(svcprop -c -p config/store "${SMF_FMRI}")" # We need to tell CockroachDB the DNS names or IP addresses of the other nodes @@ -25,7 +24,7 @@ fi args=( '--insecure' - '--listen-addr' "[$LISTEN_ADDR]:$LISTEN_PORT" + '--listen-addr' "$LISTEN_ADDR" '--http-addr' '127.0.0.1:8080' '--store' "$DATASTORE" '--join' "$JOIN_ADDRS" diff --git a/tufaceous-lib/Cargo.toml b/tufaceous-lib/Cargo.toml index e448ed6db5..61224e6080 100644 --- a/tufaceous-lib/Cargo.toml +++ b/tufaceous-lib/Cargo.toml @@ -36,7 +36,7 @@ tar.workspace = true tokio.workspace = true toml.workspace = true tough.workspace = true -url = "2.5.0" +url.workspace = true zip.workspace = true omicron-workspace-hack.workspace = true