From 9706689dc87035f8ac97105256ec7664249267df Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 1 Aug 2024 18:58:33 -0700 Subject: [PATCH] [spr] initial version Created using spr 1.3.6-beta.1 --- Cargo.lock | 89 ++- Cargo.toml | 11 + README.adoc | 2 +- dev-tools/cert-dev/Cargo.toml | 23 + dev-tools/cert-dev/src/main.rs | 91 +++ dev-tools/cert-dev/tests/test-cert-dev.rs | 54 ++ dev-tools/ch-dev/Cargo.toml | 24 + dev-tools/ch-dev/build.rs | 10 + dev-tools/ch-dev/src/main.rs | 230 +++++++ dev-tools/db-dev/Cargo.toml | 30 + dev-tools/db-dev/build.rs | 10 + dev-tools/db-dev/src/main.rs | 229 ++++++ .../output/cmd-db-dev-populate-noargs-stderr} | 2 +- .../output/cmd-db-dev-populate-noargs-stdout} | 0 .../output/cmd-db-dev-wipe-noargs-stderr} | 2 +- .../output/cmd-db-dev-wipe-noargs-stdout} | 0 dev-tools/db-dev/tests/test-db-dev.rs | 303 ++++++++ dev-tools/mgs-dev/Cargo.toml | 19 + dev-tools/mgs-dev/src/main.rs | 69 ++ dev-tools/omicron-dev-lib/Cargo.toml | 14 + dev-tools/omicron-dev-lib/src/lib.rs | 7 + dev-tools/omicron-dev-lib/src/test_utils.rs | 43 ++ dev-tools/omicron-dev/Cargo.toml | 25 +- dev-tools/omicron-dev/src/bin/omicron-dev.rs | 649 ------------------ dev-tools/omicron-dev/src/main.rs | 152 ++++ .../output/cmd-omicron-dev-noargs-stderr | 15 +- .../omicron-dev/tests/test-omicron-dev.rs | 224 ++++++ .../omicron-dev/tests/test_omicron_dev.rs | 585 ---------------- docs/how-to-run-simulated.adoc | 24 +- docs/how-to-run.adoc | 2 +- docs/repo.adoc | 2 +- nexus/src/populate.rs | 2 +- test-utils/src/dev/db.rs | 2 +- wicket/README.md | 9 +- 34 files changed, 1656 insertions(+), 1297 deletions(-) create mode 100644 dev-tools/cert-dev/Cargo.toml create mode 100644 dev-tools/cert-dev/src/main.rs create mode 100644 dev-tools/cert-dev/tests/test-cert-dev.rs create mode 100644 dev-tools/ch-dev/Cargo.toml create mode 100644 dev-tools/ch-dev/build.rs create mode 100644 dev-tools/ch-dev/src/main.rs create mode 100644 dev-tools/db-dev/Cargo.toml create mode 100644 dev-tools/db-dev/build.rs create mode 100644 dev-tools/db-dev/src/main.rs rename dev-tools/{omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr => db-dev/tests/output/cmd-db-dev-populate-noargs-stderr} (69%) rename dev-tools/{omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stdout => db-dev/tests/output/cmd-db-dev-populate-noargs-stdout} (100%) rename dev-tools/{omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stderr => db-dev/tests/output/cmd-db-dev-wipe-noargs-stderr} (67%) rename dev-tools/{omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout => db-dev/tests/output/cmd-db-dev-wipe-noargs-stdout} (100%) create mode 100644 dev-tools/db-dev/tests/test-db-dev.rs create mode 100644 dev-tools/mgs-dev/Cargo.toml create mode 100644 dev-tools/mgs-dev/src/main.rs create mode 100644 dev-tools/omicron-dev-lib/Cargo.toml create mode 100644 dev-tools/omicron-dev-lib/src/lib.rs create mode 100644 dev-tools/omicron-dev-lib/src/test_utils.rs delete mode 100644 dev-tools/omicron-dev/src/bin/omicron-dev.rs create mode 100644 dev-tools/omicron-dev/src/main.rs create mode 100644 dev-tools/omicron-dev/tests/test-omicron-dev.rs delete mode 100644 dev-tools/omicron-dev/tests/test_omicron_dev.rs diff --git a/Cargo.lock b/Cargo.lock index efa316e3bb..64d0d47e6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -913,6 +913,23 @@ version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" +[[package]] +name = "cert-dev" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "camino-tempfile", + "clap", + "libc", + "omicron-test-utils", + "omicron-workspace-hack", + "openssl", + "rcgen", + "subprocess", + "tokio", +] + [[package]] name = "cexpr" version = "0.6.0" @@ -944,6 +961,23 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +[[package]] +name = "ch-dev" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "dropshot", + "futures", + "libc", + "omicron-rpaths", + "omicron-test-utils", + "omicron-workspace-hack", + "pq-sys", + "signal-hook-tokio", + "tokio", +] + [[package]] name = "chacha20" version = "0.9.1" @@ -1632,6 +1666,27 @@ dependencies = [ "walkdir", ] +[[package]] +name = "db-dev" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "clap", + "expectorate", + "futures", + "libc", + "omicron-dev-lib", + "omicron-rpaths", + "omicron-test-utils", + "omicron-workspace-hack", + "pq-sys", + "signal-hook-tokio", + "subprocess", + "tokio", + "tokio-postgres", +] + [[package]] name = "db-macros" version = "0.1.0" @@ -4375,6 +4430,21 @@ dependencies = [ "slog", ] +[[package]] +name = "mgs-dev" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "futures", + "gateway-messages", + "gateway-test-utils", + "libc", + "omicron-workspace-hack", + "signal-hook-tokio", + "tokio", +] + [[package]] name = "mime" version = "0.3.17" @@ -5514,28 +5584,21 @@ name = "omicron-dev" version = "0.1.0" dependencies = [ "anyhow", - "camino", - "camino-tempfile", "clap", "dropshot", "expectorate", "futures", - "gateway-messages", - "gateway-test-utils", "libc", "nexus-config", "nexus-test-interface", "nexus-test-utils", - "omicron-common", + "omicron-dev-lib", "omicron-nexus", "omicron-rpaths", "omicron-test-utils", "omicron-workspace-hack", - "openssl", "oxide-client", "pq-sys", - "rcgen", - "signal-hook", "signal-hook-tokio", "subprocess", "tokio", @@ -5543,6 +5606,16 @@ dependencies = [ "toml 0.8.19", ] +[[package]] +name = "omicron-dev-lib" +version = "0.1.0" +dependencies = [ + "libc", + "omicron-test-utils", + "omicron-workspace-hack", + "subprocess", +] + [[package]] name = "omicron-gateway" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 3599a5f74c..e58bac82e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,10 +19,15 @@ members = [ "cockroach-admin/api", "cockroach-admin/types", "common", + "dev-tools/cert-dev", + "dev-tools/ch-dev", "dev-tools/crdb-seed", + "dev-tools/db-dev", "dev-tools/downloader", + "dev-tools/mgs-dev", "dev-tools/omdb", "dev-tools/omicron-dev", + "dev-tools/omicron-dev-lib", "dev-tools/openapi-manager", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", @@ -122,10 +127,15 @@ default-members = [ "cockroach-admin/api", "cockroach-admin/types", "common", + "dev-tools/cert-dev", + "dev-tools/ch-dev", "dev-tools/crdb-seed", + "dev-tools/db-dev", "dev-tools/downloader", + "dev-tools/mgs-dev", "dev-tools/omdb", "dev-tools/omicron-dev", + "dev-tools/omicron-dev-lib", "dev-tools/openapi-manager", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", @@ -405,6 +415,7 @@ num = { version = "0.4.3", default-features = false, features = [ "libm" ] } omicron-certificates = { path = "certificates" } omicron-cockroach-admin = { path = "cockroach-admin" } omicron-common = { path = "common" } +omicron-dev-lib = { path = "dev-tools/omicron-dev-lib" } omicron-gateway = { path = "gateway" } omicron-nexus = { path = "nexus" } omicron-omdb = { path = "dev-tools/omdb" } diff --git a/README.adoc b/README.adoc index 4979411d73..6b24821c6e 100644 --- a/README.adoc +++ b/README.adoc @@ -288,7 +288,7 @@ The server also accepts command-line flags for overriding the values of the conf parameters. The packages downloaded by `cargo xtask download clickhouse` include a `config.xml` file with them. -You should probably run ClickHouse via the `omicron-dev` tool, but if you decide to run it +You should probably run ClickHouse via the `ch-dev` tool, but if you decide to run it manually, you can start the server with: [source,text] diff --git a/dev-tools/cert-dev/Cargo.toml b/dev-tools/cert-dev/Cargo.toml new file mode 100644 index 0000000000..a4704e0556 --- /dev/null +++ b/dev-tools/cert-dev/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "cert-dev" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +camino.workspace = true +clap.workspace = true +libc.workspace = true +omicron-workspace-hack.workspace = true +tokio.workspace = true +rcgen.workspace = true + +[dev-dependencies] +camino-tempfile.workspace = true +omicron-test-utils.workspace = true +openssl.workspace = true +subprocess.workspace = true diff --git a/dev-tools/cert-dev/src/main.rs b/dev-tools/cert-dev/src/main.rs new file mode 100644 index 0000000000..8d8d013d03 --- /dev/null +++ b/dev-tools/cert-dev/src/main.rs @@ -0,0 +1,91 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::{io::Write, os::unix::fs::OpenOptionsExt}; + +use anyhow::Context; +use camino::{Utf8Path, Utf8PathBuf}; +use clap::{Parser, Subcommand}; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = CertDevApp::parse(); + args.exec().await +} + +/// Utilities for working with certificates. +#[derive(Parser)] +struct CertDevApp { + #[clap(subcommand)] + command: CertDevCmd, +} + +impl CertDevApp { + async fn exec(self) -> anyhow::Result<()> { + match self.command { + CertDevCmd::Create(args) => args.exec().await, + } + } +} + +#[derive(Subcommand)] +enum CertDevCmd { + /// Create a self-signed certificate for use with Omicron + Create(CertCreateArgs), +} + +#[derive(Clone, Debug, Parser)] +pub struct CertCreateArgs { + /// path to where the generated certificate and key files should go + /// (e.g., "out/initial-" would cause the files to be called + /// "out/initial-cert.pem" and "out/initial-key.pem") + #[clap(action)] + output_base: Utf8PathBuf, + + /// DNS names that the certificate claims to be valid for (subject + /// alternative names) + #[clap(action, required = true)] + server_names: Vec, +} + +impl CertCreateArgs { + pub async fn exec(&self) -> Result<(), anyhow::Error> { + let cert = + rcgen::generate_simple_self_signed(self.server_names.clone()) + .context("generating certificate")?; + let cert_pem = + cert.serialize_pem().context("serializing certificate as PEM")?; + let key_pem = cert.serialize_private_key_pem(); + + let cert_path = + Utf8PathBuf::from(format!("{}cert.pem", self.output_base)); + write_private_file(&cert_path, cert_pem.as_bytes()) + .context("writing certificate file")?; + println!("wrote certificate to {}", cert_path); + + let key_path = + Utf8PathBuf::from(format!("{}key.pem", self.output_base)); + write_private_file(&key_path, key_pem.as_bytes()) + .context("writing private key file")?; + println!("wrote private key to {}", key_path); + + Ok(()) + } +} + +#[cfg_attr(not(target_os = "macos"), allow(clippy::useless_conversion))] +fn write_private_file( + path: &Utf8Path, + contents: &[u8], +) -> Result<(), anyhow::Error> { + // The file should be readable and writable by the user only. + let perms = libc::S_IRUSR | libc::S_IWUSR; + let mut file = std::fs::OpenOptions::new() + .write(true) + .create_new(true) + .mode(perms.into()) // into() needed on mac only + .open(path) + .with_context(|| format!("open {:?} for writing", path))?; + file.write_all(contents).with_context(|| format!("write to {:?}", path)) +} diff --git a/dev-tools/cert-dev/tests/test-cert-dev.rs b/dev-tools/cert-dev/tests/test-cert-dev.rs new file mode 100644 index 0000000000..7d07b4685c --- /dev/null +++ b/dev-tools/cert-dev/tests/test-cert-dev.rs @@ -0,0 +1,54 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tests for cert-dev. + +use std::path::PathBuf; + +use anyhow::Context; +use omicron_test_utils::dev::test_cmds::{ + assert_exit_code, path_to_executable, run_command, EXIT_SUCCESS, +}; +use subprocess::Exec; + +const CMD_CERT_DEV: &str = env!("CARGO_BIN_EXE_cert-dev"); + +fn path_to_cert_dev() -> PathBuf { + path_to_executable(CMD_CERT_DEV) +} + +#[test] +fn test_cert_create() { + let tmpdir = camino_tempfile::tempdir().unwrap(); + println!("tmpdir: {}", tmpdir.path()); + let output_base = format!("{}/test-", tmpdir.path()); + let exec = Exec::cmd(path_to_cert_dev()) + .arg("create") + .arg(output_base) + .arg("foo.example") + .arg("bar.example"); + let (exit_status, _, stderr_text) = run_command(exec); + assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); + let cert_path = tmpdir.path().join("test-cert.pem"); + let key_path = tmpdir.path().join("test-key.pem"); + let cert_contents = std::fs::read(&cert_path) + .with_context(|| format!("reading certificate path {:?}", cert_path)) + .unwrap(); + let key_contents = std::fs::read(&key_path) + .with_context(|| format!("reading private key path: {:?}", key_path)) + .unwrap(); + let certs_pem = openssl::x509::X509::stack_from_pem(&cert_contents) + .context("parsing certificate") + .unwrap(); + let private_key = openssl::pkey::PKey::private_key_from_pem(&key_contents) + .context("parsing private key") + .unwrap(); + assert!(certs_pem + .iter() + .last() + .unwrap() + .public_key() + .unwrap() + .public_eq(&private_key)); +} diff --git a/dev-tools/ch-dev/Cargo.toml b/dev-tools/ch-dev/Cargo.toml new file mode 100644 index 0000000000..e2c08c04f5 --- /dev/null +++ b/dev-tools/ch-dev/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "ch-dev" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[build-dependencies] +omicron-rpaths.workspace = true + +[dependencies] +anyhow.workspace = true +clap.workspace = true +dropshot.workspace = true +futures.workspace = true +libc.workspace = true +omicron-test-utils.workspace = true +omicron-workspace-hack.workspace = true +# See omicron-rpaths for more about the "pq-sys" dependency. +pq-sys = "*" +signal-hook-tokio.workspace = true +tokio.workspace = true diff --git a/dev-tools/ch-dev/build.rs b/dev-tools/ch-dev/build.rs new file mode 100644 index 0000000000..1ba9acd41c --- /dev/null +++ b/dev-tools/ch-dev/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/dev-tools/ch-dev/src/main.rs b/dev-tools/ch-dev/src/main.rs new file mode 100644 index 0000000000..4c527fe27c --- /dev/null +++ b/dev-tools/ch-dev/src/main.rs @@ -0,0 +1,230 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::path::PathBuf; + +use anyhow::{bail, Context}; +use clap::{Args, Parser, Subcommand}; +use dropshot::test_util::LogContext; +use futures::StreamExt; +use libc::SIGINT; +use omicron_test_utils::dev; +use signal_hook_tokio::Signals; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = ChDevApp::parse(); + args.exec().await +} + +#[derive(Clone, Debug, Parser)] +pub struct ChDevApp { + #[clap(subcommand)] + command: ChDevCmd, +} + +impl ChDevApp { + pub async fn exec(&self) -> Result<(), anyhow::Error> { + match &self.command { + ChDevCmd::Run(args) => args.exec().await, + } + } +} + +#[derive(Clone, Debug, Subcommand)] +pub(crate) enum ChDevCmd { + /// Run a ClickHouse server + Run(ChRunArgs), +} + +#[derive(Clone, Debug, Args)] +pub(crate) struct ChRunArgs { + /// The HTTP port on which the server will listen + #[clap(short, long, default_value = "8123", action)] + port: u16, + /// Starts a ClickHouse replicated cluster of 2 replicas and 3 keeper nodes + #[clap(long, conflicts_with = "port", action)] + replicated: bool, +} + +impl ChRunArgs { + pub(crate) async fn exec(&self) -> Result<(), anyhow::Error> { + let logctx = LogContext::new( + "ch-dev", + &dropshot::ConfigLogging::StderrTerminal { + level: dropshot::ConfigLoggingLevel::Info, + }, + ); + if self.replicated { + start_replicated_cluster(&logctx).await?; + } else { + start_single_node(&logctx, self.port).await?; + } + Ok(()) + } +} + +async fn start_single_node( + logctx: &LogContext, + port: u16, +) -> Result<(), anyhow::Error> { + // Start a stream listening for SIGINT + let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); + let mut signal_stream = signals.fuse(); + + // Start the database server process, possibly on a specific port + let mut db_instance = + dev::clickhouse::ClickHouseInstance::new_single_node(logctx, port) + .await?; + println!( + "ch-dev: running ClickHouse with full command:\n\"clickhouse {}\"", + db_instance.cmdline().join(" ") + ); + println!( + "ch-dev: ClickHouse is running with PID {}", + db_instance + .pid() + .expect("Failed to get process PID, it may not have started") + ); + println!( + "ch-dev: ClickHouse HTTP server listening on port {}", + db_instance.port() + ); + println!( + "ch-dev: using {} for ClickHouse data storage", + db_instance.data_path() + ); + + // Wait for the DB to exit itself (an error), or for SIGINT + tokio::select! { + _ = db_instance.wait_for_shutdown() => { + db_instance.cleanup().await.context("clean up after shutdown")?; + bail!("ch-dev: ClickHouse shutdown unexpectedly"); + } + caught_signal = signal_stream.next() => { + assert_eq!(caught_signal.unwrap(), SIGINT); + + // As above, we don't need to explicitly kill the DB process, since + // the shell will have delivered the signal to the whole process group. + eprintln!( + "ch-dev: caught signal, shutting down and removing \ + temporary directory" + ); + + // Remove the data directory. + db_instance + .wait_for_shutdown() + .await + .context("clean up after SIGINT shutdown")?; + } + } + Ok(()) +} + +async fn start_replicated_cluster( + logctx: &LogContext, +) -> Result<(), anyhow::Error> { + // Start a stream listening for SIGINT + let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); + let mut signal_stream = signals.fuse(); + + // Start the database server and keeper processes + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let replica_config = manifest_dir + .as_path() + .join("../../oximeter/db/src/configs/replica_config.xml"); + let keeper_config = manifest_dir + .as_path() + .join("../../oximeter/db/src/configs/keeper_config.xml"); + + let mut cluster = dev::clickhouse::ClickHouseCluster::new( + logctx, + replica_config, + keeper_config, + ) + .await?; + println!( + "ch-dev: running ClickHouse cluster with configuration files:\n \ + replicas: {}\n keepers: {}", + cluster.replica_config_path().display(), + cluster.keeper_config_path().display() + ); + let pid_error_msg = "Failed to get process PID, it may not have started"; + println!( + "ch-dev: ClickHouse cluster is running with: server PIDs = [{}, {}] \ + and keeper PIDs = [{}, {}, {}]", + cluster.replica_1.pid().expect(pid_error_msg), + cluster.replica_2.pid().expect(pid_error_msg), + cluster.keeper_1.pid().expect(pid_error_msg), + cluster.keeper_2.pid().expect(pid_error_msg), + cluster.keeper_3.pid().expect(pid_error_msg), + ); + println!( + "ch-dev: ClickHouse HTTP servers listening on ports: {}, {}", + cluster.replica_1.port(), + cluster.replica_2.port() + ); + println!( + "ch-dev: using {} and {} for ClickHouse data storage", + cluster.replica_1.data_path(), + cluster.replica_2.data_path() + ); + + // Wait for the replicas and keepers to exit themselves (an error), or for SIGINT + tokio::select! { + _ = cluster.replica_1.wait_for_shutdown() => { + cluster.replica_1.cleanup().await.context( + format!("clean up {} after shutdown", cluster.replica_1.data_path()) + )?; + bail!("ch-dev: ClickHouse replica 1 shutdown unexpectedly"); + } + _ = cluster.replica_2.wait_for_shutdown() => { + cluster.replica_2.cleanup().await.context( + format!("clean up {} after shutdown", cluster.replica_2.data_path()) + )?; + bail!("ch-dev: ClickHouse replica 2 shutdown unexpectedly"); + } + _ = cluster.keeper_1.wait_for_shutdown() => { + cluster.keeper_1.cleanup().await.context( + format!("clean up {} after shutdown", cluster.keeper_1.data_path()) + )?; + bail!("ch-dev: ClickHouse keeper 1 shutdown unexpectedly"); + } + _ = cluster.keeper_2.wait_for_shutdown() => { + cluster.keeper_2.cleanup().await.context( + format!("clean up {} after shutdown", cluster.keeper_2.data_path()) + )?; + bail!("ch-dev: ClickHouse keeper 2 shutdown unexpectedly"); + } + _ = cluster.keeper_3.wait_for_shutdown() => { + cluster.keeper_3.cleanup().await.context( + format!("clean up {} after shutdown", cluster.keeper_3.data_path()) + )?; + bail!("ch-dev: ClickHouse keeper 3 shutdown unexpectedly"); + } + caught_signal = signal_stream.next() => { + assert_eq!(caught_signal.unwrap(), SIGINT); + eprintln!( + "ch-dev: caught signal, shutting down and removing \ + temporary directories" + ); + + // Remove the data directories. + let mut instances = vec![ + cluster.replica_1, + cluster.replica_2, + cluster.keeper_1, + cluster.keeper_2, + cluster.keeper_3, + ]; + for instance in instances.iter_mut() { + instance + .wait_for_shutdown() + .await + .context(format!("clean up {} after SIGINT shutdown", instance.data_path()))?; + }; + } + } + Ok(()) +} diff --git a/dev-tools/db-dev/Cargo.toml b/dev-tools/db-dev/Cargo.toml new file mode 100644 index 0000000000..9f402e21bb --- /dev/null +++ b/dev-tools/db-dev/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "db-dev" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[build-dependencies] +omicron-rpaths.workspace = true + +[dependencies] +anyhow.workspace = true +camino.workspace = true +clap.workspace = true +futures.workspace = true +libc.workspace = true +omicron-test-utils.workspace = true +omicron-workspace-hack.workspace = true +# See omicron-rpaths for more about the "pq-sys" dependency. +pq-sys = "*" +signal-hook-tokio.workspace = true +tokio.workspace = true +tokio-postgres.workspace = true + +[dev-dependencies] +expectorate.workspace = true +omicron-dev-lib.workspace = true +subprocess.workspace = true diff --git a/dev-tools/db-dev/build.rs b/dev-tools/db-dev/build.rs new file mode 100644 index 0000000000..1ba9acd41c --- /dev/null +++ b/dev-tools/db-dev/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/dev-tools/db-dev/src/main.rs b/dev-tools/db-dev/src/main.rs new file mode 100644 index 0000000000..5f5798b041 --- /dev/null +++ b/dev-tools/db-dev/src/main.rs @@ -0,0 +1,229 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Developer tool for operating on Nexus databases. + +use anyhow::{bail, Context, Result}; +use camino::Utf8PathBuf; +use clap::{Args, Parser, Subcommand}; +use futures::stream::StreamExt; +use libc::SIGINT; +use omicron_test_utils::dev; +use signal_hook_tokio::Signals; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = DbDevApp::parse(); + args.exec().await +} + +#[derive(Clone, Debug, Parser)] +pub struct DbDevApp { + #[clap(subcommand)] + command: DbDevCmd, +} + +impl DbDevApp { + pub async fn exec(&self) -> Result<()> { + match &self.command { + DbDevCmd::Run(args) => args.exec().await, + DbDevCmd::Populate(args) => args.exec().await, + DbDevCmd::Wipe(args) => args.exec().await, + } + } +} + +#[derive(Clone, Debug, Subcommand)] +pub(crate) enum DbDevCmd { + /// Run a CockroachDB server + Run(DbRunArgs), + /// Populate a database with schema + Populate(DbPopulateArgs), + /// Wipe a database + Wipe(DbWipeArgs), +} + +#[derive(Clone, Debug, Args)] +pub(crate) struct DbRunArgs { + /// Path to store database data (default: temp dir cleaned up on exit) + #[clap(long, action)] + store_dir: Option, + + /// Database (SQL) listen port. Use `0` to request any available port. + // We choose an arbitrary default port that's different from the default + // CockroachDB port to avoid conflicting. We don't use 0 because this port + // is specified in a few other places, like the default Nexus config file. + // TODO We could load that file at compile time and use the value there. + #[clap(long, default_value = "32221", action)] + listen_port: u16, + + // This unusual clap configuration makes "populate" default to true, + // allowing a --no-populate override on the CLI. + /// Do not populate the database with any schema + #[clap(long = "no-populate", action(clap::ArgAction::SetFalse))] + populate: bool, +} + +impl DbRunArgs { + pub(crate) async fn exec(&self) -> Result<()> { + // Set ourselves up to wait for SIGINT. It's important to do this early, + // before we've created resources that we want to have cleaned up on SIGINT + // (e.g., the temporary directory created by the database starter). + let signals = + Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); + let mut signal_stream = signals.fuse(); + + // Now start CockroachDB. This process looks bureaucratic (create arg + // builder, then create starter, then start it) because we want to be able + // to print what's happening before we do it. + let mut db_arg_builder = dev::db::CockroachStarterBuilder::new() + .listen_port(self.listen_port); + + // NOTE: The stdout strings here are not intended to be stable, but they are + // used by the test suite. + + if let Some(store_dir) = &self.store_dir { + println!( + "db-dev: using user-provided path for database store: {}", + store_dir, + ); + db_arg_builder = db_arg_builder.store_dir(store_dir); + } else { + println!( + "db-dev: using temporary directory for database store \ + (cleaned up on clean exit)" + ); + } + + let db_starter = db_arg_builder.build()?; + println!( + "db-dev: will run this to start CockroachDB:\n{}", + db_starter.cmdline() + ); + println!("db-dev: environment:"); + for (k, v) in db_starter.environment() { + println!(" {}={}", k, v); + } + println!( + "db-dev: temporary directory: {}", + db_starter.temp_dir().display() + ); + + let mut db_instance = db_starter.start().await?; + println!("\ndb-dev: child process: pid {}", db_instance.pid()); + println!( + "db-dev: CockroachDB listening at: {}", + db_instance.listen_url() + ); + + if self.populate { + // Populate the database with our schema. + let start = tokio::time::Instant::now(); + println!("db-dev: populating database"); + db_instance.populate().await.context("populating database")?; + let end = tokio::time::Instant::now(); + let duration = end.duration_since(start); + println!( + "db-dev: populated database in {}.{} seconds", + duration.as_secs(), + duration.subsec_millis() + ); + } + + // Wait for either the child process to shut down on its own or for us to + // receive SIGINT. + tokio::select! { + _ = db_instance.wait_for_shutdown() => { + db_instance.cleanup().await.context("clean up after shutdown")?; + bail!( + "db-dev: database shut down unexpectedly \ + (see error output above)" + ); + } + caught_signal = signal_stream.next() => { + assert_eq!(caught_signal.unwrap(), SIGINT); + + /* + * We don't have to do anything to trigger shutdown because the + * shell will have delivered the same SIGINT that we got to the + * cockroach process as well. + */ + eprintln!( + "db-dev: caught signal, shutting down and removing \ + temporary directory" + ); + + db_instance + .wait_for_shutdown() + .await + .context("clean up after SIGINT shutdown")?; + } + } + + Ok(()) + } +} + +#[derive(Clone, Debug, Args)] +pub(crate) struct DbPopulateArgs { + /// URL for connecting to the database (postgresql:///...) + #[clap(long, action)] + database_url: String, + + /// Wipe any existing schema (and data!) before populating + #[clap(long, action)] + wipe: bool, +} + +impl DbPopulateArgs { + pub(crate) async fn exec(&self) -> Result<()> { + let config = + self.database_url.parse::().with_context( + || format!("parsing database URL {:?}", self.database_url), + )?; + let client = dev::db::Client::connect(&config, tokio_postgres::NoTls) + .await + .with_context(|| { + format!("connecting to {:?}", self.database_url) + })?; + + if self.wipe { + println!("db-dev: wiping any existing database"); + dev::db::wipe(&client).await?; + } + + println!("db-dev: populating database"); + dev::db::populate(&client).await?; + println!("db-dev: populated database"); + client.cleanup().await.expect("connection failed"); + Ok(()) + } +} + +#[derive(Clone, Debug, Args)] +pub(crate) struct DbWipeArgs { + /// URL for connecting to the database (postgresql:///...) + #[clap(long, action)] + database_url: String, +} + +impl DbWipeArgs { + pub(crate) async fn exec(&self) -> Result<()> { + let config = + self.database_url.parse::().with_context( + || format!("parsing database URL {:?}", self.database_url), + )?; + let client = dev::db::Client::connect(&config, tokio_postgres::NoTls) + .await + .with_context(|| { + format!("connecting to {:?}", self.database_url) + })?; + + println!("db-dev: wiping any existing database"); + dev::db::wipe(&client).await?; + println!("db-dev: wiped"); + client.cleanup().await.expect("connection failed"); + Ok(()) + } +} diff --git a/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr b/dev-tools/db-dev/tests/output/cmd-db-dev-populate-noargs-stderr similarity index 69% rename from dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr rename to dev-tools/db-dev/tests/output/cmd-db-dev-populate-noargs-stderr index 6c8af1aa47..e4da7152a2 100644 --- a/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stderr +++ b/dev-tools/db-dev/tests/output/cmd-db-dev-populate-noargs-stderr @@ -1,6 +1,6 @@ error: the following required arguments were not provided: --database-url -Usage: omicron-dev db-wipe --database-url +Usage: db-dev populate --database-url For more information, try '--help'. diff --git a/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stdout b/dev-tools/db-dev/tests/output/cmd-db-dev-populate-noargs-stdout similarity index 100% rename from dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stdout rename to dev-tools/db-dev/tests/output/cmd-db-dev-populate-noargs-stdout diff --git a/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stderr b/dev-tools/db-dev/tests/output/cmd-db-dev-wipe-noargs-stderr similarity index 67% rename from dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stderr rename to dev-tools/db-dev/tests/output/cmd-db-dev-wipe-noargs-stderr index f8276da168..9f6da4b9c4 100644 --- a/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-populate-noargs-stderr +++ b/dev-tools/db-dev/tests/output/cmd-db-dev-wipe-noargs-stderr @@ -1,6 +1,6 @@ error: the following required arguments were not provided: --database-url -Usage: omicron-dev db-populate --database-url +Usage: db-dev wipe --database-url For more information, try '--help'. diff --git a/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout b/dev-tools/db-dev/tests/output/cmd-db-dev-wipe-noargs-stdout similarity index 100% rename from dev-tools/omicron-dev/tests/output/cmd-omicron-dev-db-wipe-noargs-stdout rename to dev-tools/db-dev/tests/output/cmd-db-dev-wipe-noargs-stdout diff --git a/dev-tools/db-dev/tests/test-db-dev.rs b/dev-tools/db-dev/tests/test-db-dev.rs new file mode 100644 index 0000000000..1f2dc27dcf --- /dev/null +++ b/dev-tools/db-dev/tests/test-db-dev.rs @@ -0,0 +1,303 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::{io::BufRead, path::PathBuf}; + +use anyhow::Context; +use expectorate::assert_contents; +use omicron_dev_lib::test_utils::verify_graceful_exit; +use omicron_test_utils::dev::{ + db::has_omicron_schema, + process_running, + test_cmds::{ + assert_exit_code, path_to_executable, run_command, EXIT_USAGE, + }, +}; +use subprocess::{Exec, ExitStatus, Redirection}; + +const CMD_DB_DEV: &str = env!("CARGO_BIN_EXE_db-dev"); + +fn path_to_db_dev() -> PathBuf { + path_to_executable(CMD_DB_DEV) +} + +/// Encapsulates the information we need from a running `db-dev run` command. +#[derive(Debug)] +struct DbDevRun { + subproc: subprocess::Popen, + cmd_pid: u32, + db_pid: u32, + listen_config_url: String, + listen_config: tokio_postgres::Config, + temp_dir: PathBuf, +} + +/// Starts the "db-dev run" command and runs it for long enough to parse the +/// child pid, listen URL, and temporary directory. Returns these, along with +/// a handle to the child process. TODO-robustness It would be great to put a +/// timeout on this. +fn run_db_dev_run(exec: Exec, wait_for_populate: bool) -> DbDevRun { + let cmdline = exec.to_cmdline_lossy(); + eprintln!("will run: {}", cmdline); + + let subproc = exec + .stdout(Redirection::Pipe) + .popen() + .expect("failed to start command"); + let mut subproc_out = + std::io::BufReader::new(subproc.stdout.as_ref().unwrap()); + let cmd_pid = subproc.pid().unwrap(); + let (mut db_pid, mut listen_config_url, mut temp_dir) = (None, None, None); + let mut populated = false; + + eprintln!("waiting for stdout from child process"); + while db_pid.is_none() + || listen_config_url.is_none() + || temp_dir.is_none() + || (wait_for_populate && !populated) + { + let mut buf = String::with_capacity(80); + match subproc_out.read_line(&mut buf) { + Ok(0) => { + panic!("unexpected EOF from child process stdout"); + } + Err(e) => { + panic!("unexpected error reading child process stdout: {}", e); + } + Ok(_) => { + print!("subproc stdout: {}", buf); + } + } + + if let Some(s) = buf.strip_prefix("db-dev: temporary directory: ") { + eprint!("found temporary directory: {}", s); + temp_dir = Some(PathBuf::from(s.trim_end().to_string())); + continue; + } + + if let Some(s) = buf.strip_prefix("db-dev: child process: pid ") { + eprint!("found database pid: {}", s); + db_pid = Some( + s.trim_end().to_string().parse().expect("pid was not a u32"), + ); + continue; + } + + if let Some(s) = buf.strip_prefix("db-dev: CockroachDB listening at: ") + { + eprint!("found postgres listen URL: {}", s); + listen_config_url = Some(s.trim_end().to_string()); + continue; + } + + if buf.contains("db-dev: populated database") { + eprintln!("found database populated"); + populated = true; + continue; + } + } + + assert!(process_running(cmd_pid)); + assert!(process_running(db_pid.unwrap())); + + let listen_config = listen_config_url + .as_ref() + .unwrap() + .parse::() + .expect("invalid PostgreSQL URL"); + + DbDevRun { + subproc, + cmd_pid, + db_pid: db_pid.unwrap(), + listen_config_url: listen_config_url.unwrap(), + listen_config, + temp_dir: temp_dir.unwrap(), + } +} + +#[test] +fn test_db_dev_populate_no_args() { + let exec = Exec::cmd(path_to_db_dev()).arg("populate"); + let (exit_status, stdout_text, stderr_text) = run_command(exec); + assert_exit_code(exit_status, EXIT_USAGE, &stderr_text); + assert_contents( + "tests/output/cmd-db-dev-populate-noargs-stdout", + &stdout_text, + ); + assert_contents( + "tests/output/cmd-db-dev-populate-noargs-stderr", + &stderr_text, + ); +} + +#[test] +fn test_db_dev_wipe_no_args() { + let exec = Exec::cmd(path_to_db_dev()).arg("wipe"); + let (exit_status, stdout_text, stderr_text) = run_command(exec); + assert_exit_code(exit_status, EXIT_USAGE, &stderr_text); + assert_contents("tests/output/cmd-db-dev-wipe-noargs-stdout", &stdout_text); + assert_contents("tests/output/cmd-db-dev-wipe-noargs-stderr", &stderr_text); +} + +// Exercises the normal use case of `db-dev run`: the database starts up, we +// can connect to it and query it, then we simulate the user typing ^C at the +// shell, and then it cleans up its temporary directory. +#[tokio::test] +async fn test_db_run() { + let cmd_path = path_to_db_dev(); + + // Rather than invoke the command directly, we'll use the shell to run the + // command in a subshell with monitor mode active. This puts the child + // process into a separate process group, which allows us to send the whole + // group SIGINT, which simulates what would happen if this were run + // interactively from the shell (which is what we want to test). Maybe + // there's a better way to do this. (Ideally, we would fork, use + // setpgid(2) in the child, then exec our command. The standard library + // does not provide facilities to do this. Maybe we could use the `libc` + // crate to do this?) + // + // Note that it's not a good test to just send SIGINT to the CockroachDB + // process. In the real-world case we're trying to test, db-dev gets + // SIGINT as well. If it doesn't handle it explicitly, the process will be + // terminated and temporary directories will be leaked. However, the test + // would pass because in the test case db-dev would never have gotten + // the SIGINT. + // + // We also redirect stderr to stdout. Originally this was so that the output + // doesn't get dumped to the user's terminal during regular `cargo test` + // runs, though with nextest this is less of an issue. + // + // Finally, we set listen-port=0 to avoid conflicting with concurrent + // invocations. + // + // The `&& true` looks redundant but it prevents recent versions of bash + // from optimising away the fork() and causing cargo itself to receive + // the ^C that we send during testing. + let cmdstr = format!( + "( set -o monitor; {} run --listen-port 0 && true )", + cmd_path.display() + ); + let exec = + Exec::cmd("bash").arg("-c").arg(cmdstr).stderr(Redirection::Merge); + let dbrun = run_db_dev_run(exec, true); + let test_task = async { + let (client, connection) = dbrun + .listen_config + .connect(tokio_postgres::NoTls) + .await + .context("failed to connect to newly setup database")?; + let conn_task = tokio::spawn(connection); + + anyhow::ensure!(has_omicron_schema(&client).await); + + // Now run db-dev populate. + eprintln!("running db-dev populate"); + let populate_result = Exec::cmd(&cmd_path) + .arg("populate") + .arg("--database-url") + .arg(&dbrun.listen_config_url) + .stdout(Redirection::Pipe) + .stderr(Redirection::Pipe) + .capture() + .context("failed to run populate")?; + eprintln!("exit status: {:?}", populate_result.exit_status); + eprintln!("stdout: {:?}", populate_result.stdout_str()); + eprintln!("stdout: {:?}", populate_result.stderr_str()); + anyhow::ensure!(has_omicron_schema(&client).await); + + // Try again, but with the --wipe flag. + eprintln!("running db-dev populate --wipe"); + let populate_result = Exec::cmd(&cmd_path) + .arg("populate") + .arg("--wipe") + .arg("--database-url") + .arg(&dbrun.listen_config_url) + .capture() + .context("failed to run populate --wipe")?; + anyhow::ensure!(matches!( + populate_result.exit_status, + ExitStatus::Exited(0) + )); + anyhow::ensure!(has_omicron_schema(&client).await); + + // Now run db-dev wipe. This should work. + eprintln!("running db-dev wipe"); + let wipe_result = Exec::cmd(&cmd_path) + .arg("wipe") + .arg("--database-url") + .arg(&dbrun.listen_config_url) + .capture() + .context("failed to run wipe")?; + anyhow::ensure!(matches!( + wipe_result.exit_status, + ExitStatus::Exited(0) + )); + anyhow::ensure!(!has_omicron_schema(&client).await); + + // The rest of the populate()/wipe() behavior is tested elsewhere. + + drop(client); + conn_task + .await + .context("failed to join on connection")? + .context("connection failed with an error")?; + eprintln!("cleaned up connection"); + Ok(()) + }; + let res = test_task.await; + + // Figure out what process group our child processes are in. (That won't be + // the child's pid because the immediate shell will be in our process group, + // and it's the db-dev command that's the process group leader.) + let pgid = unsafe { libc::getpgid(dbrun.db_pid as libc::pid_t) }; + assert_ne!(pgid, -1); + + // Send SIGINT to that process group. This simulates an interactive session + // where the user hits ^C. Make sure everything is cleaned up gracefully. + eprintln!("sending SIGINT to process group {}", pgid); + assert_eq!(0, unsafe { libc::kill(-pgid, libc::SIGINT) }); + + let wait = verify_graceful_exit( + dbrun.subproc, + dbrun.cmd_pid, + dbrun.db_pid, + &dbrun.temp_dir, + ); + eprintln!("wait result: {:?}", wait); + assert!(matches!(wait, subprocess::ExitStatus::Exited(0))); + res.expect("test task failed"); +} + +// Exercises the unusual case of `db-dev run` where the database shuts +// down unexpectedly. +#[tokio::test] +async fn test_db_killed() { + // Redirect stderr to stdout just so that it doesn't get dumped to the + // user's terminal during regular `cargo test` runs. + let exec = Exec::cmd(&path_to_db_dev()) + .arg("run") + .arg("--listen-port=0") + .stderr(Redirection::Merge); + // Although it doesn't seem necessary, we wait for "db-dev run" to finish + // populating the database before we kill CockroachDB. The main reason is + // that we're trying to verify that if CockroachDB exits under normal + // conditions, then db-dev run notices. If we don't wait for populate() to + // finish, then we might fail during populate(), and that's a different + // failure path. In particular, that path does _not_ necessarily wait for + // CockroachDB to exit. It arguably should, but this is considerably more + // of an edge case than we're testing here. + let dbrun = run_db_dev_run(exec, true); + assert_eq!(0, unsafe { + libc::kill(dbrun.db_pid as libc::pid_t, libc::SIGKILL) + }); + let wait = verify_graceful_exit( + dbrun.subproc, + dbrun.cmd_pid, + dbrun.db_pid, + &dbrun.temp_dir, + ); + eprintln!("wait result: {:?}", wait); + assert!(matches!(wait, subprocess::ExitStatus::Exited(1),)); +} diff --git a/dev-tools/mgs-dev/Cargo.toml b/dev-tools/mgs-dev/Cargo.toml new file mode 100644 index 0000000000..d5f61f4b96 --- /dev/null +++ b/dev-tools/mgs-dev/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "mgs-dev" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +clap.workspace = true +futures.workspace = true +gateway-messages.workspace = true +gateway-test-utils.workspace = true +libc.workspace = true +omicron-workspace-hack.workspace = true +signal-hook-tokio.workspace = true +tokio.workspace = true diff --git a/dev-tools/mgs-dev/src/main.rs b/dev-tools/mgs-dev/src/main.rs new file mode 100644 index 0000000000..e4e7859b70 --- /dev/null +++ b/dev-tools/mgs-dev/src/main.rs @@ -0,0 +1,69 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Developer tool for running MGS. + +use clap::{Args, Parser, Subcommand}; +use futures::StreamExt; +use libc::SIGINT; +use signal_hook_tokio::Signals; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = MgsDevApp::parse(); + args.exec().await +} + +#[derive(Clone, Debug, Parser)] +pub struct MgsDevApp { + #[clap(subcommand)] + command: MgsDevCmd, +} + +impl MgsDevApp { + pub async fn exec(&self) -> Result<(), anyhow::Error> { + match &self.command { + MgsDevCmd::Run(args) => args.exec().await, + } + } +} + +#[derive(Clone, Debug, Subcommand)] +pub(crate) enum MgsDevCmd { + Run(MgsRunArgs), +} + +#[derive(Clone, Debug, Args)] +pub(crate) struct MgsRunArgs {} + +impl MgsRunArgs { + pub(crate) async fn exec(&self) -> Result<(), anyhow::Error> { + // Start a stream listening for SIGINT + let signals = + Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); + let mut signal_stream = signals.fuse(); + + println!("mgs-dev: setting up MGS ... "); + let gwtestctx = gateway_test_utils::setup::test_setup( + "mgs-dev", + gateway_messages::SpPort::One, + ) + .await; + println!("mgs-dev: MGS is running."); + + let addr = gwtestctx.client.bind_address; + println!("mgs-dev: MGS API: http://{:?}", addr); + + // Wait for a signal. + let caught_signal = signal_stream.next().await; + assert_eq!(caught_signal.unwrap(), SIGINT); + eprintln!( + "mgs-dev: caught signal, shutting down and removing \ + temporary directory" + ); + + gwtestctx.teardown().await; + Ok(()) + } +} diff --git a/dev-tools/omicron-dev-lib/Cargo.toml b/dev-tools/omicron-dev-lib/Cargo.toml new file mode 100644 index 0000000000..4d32ddb65b --- /dev/null +++ b/dev-tools/omicron-dev-lib/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "omicron-dev-lib" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +libc.workspace = true +omicron-test-utils.workspace = true +omicron-workspace-hack.workspace = true +subprocess.workspace = true diff --git a/dev-tools/omicron-dev-lib/src/lib.rs b/dev-tools/omicron-dev-lib/src/lib.rs new file mode 100644 index 0000000000..637a1381d4 --- /dev/null +++ b/dev-tools/omicron-dev-lib/src/lib.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common code shared by the various -dev tools. + +pub mod test_utils; diff --git a/dev-tools/omicron-dev-lib/src/test_utils.rs b/dev-tools/omicron-dev-lib/src/test_utils.rs new file mode 100644 index 0000000000..114c97bcd6 --- /dev/null +++ b/dev-tools/omicron-dev-lib/src/test_utils.rs @@ -0,0 +1,43 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Test utilities for omicron-dev. + +use std::{path::Path, time::Duration}; + +use omicron_test_utils::dev::process_running; + +/// timeout used for various things that should be pretty quick +const TIMEOUT: Duration = Duration::from_secs(30); + +/// Waits for the subprocess to exit and returns status information +/// +/// This assumes the caller has arranged for the processes to terminate. This +/// function verifies that both the omicron-dev and CockroachDB processes are +/// gone and that the temporary directory has been cleaned up. +pub fn verify_graceful_exit( + mut subproc: subprocess::Popen, + cmd_pid: u32, + db_pid: u32, + temp_dir: &Path, +) -> subprocess::ExitStatus { + let wait_result = subproc + .wait_timeout(TIMEOUT) + .expect("failed to wait for process to exit") + .unwrap_or_else(|| { + panic!("timed out waiting {:?} for process to exit", &TIMEOUT) + }); + + assert!(!process_running(cmd_pid)); + assert!(!process_running(db_pid)); + assert_eq!( + libc::ENOENT, + std::fs::metadata(temp_dir) + .expect_err("temporary directory still exists") + .raw_os_error() + .unwrap() + ); + + wait_result +} diff --git a/dev-tools/omicron-dev/Cargo.toml b/dev-tools/omicron-dev/Cargo.toml index 1dcc4eada7..df43e85a54 100644 --- a/dev-tools/omicron-dev/Cargo.toml +++ b/dev-tools/omicron-dev/Cargo.toml @@ -12,40 +12,25 @@ omicron-rpaths.workspace = true [dependencies] anyhow.workspace = true -camino.workspace = true clap.workspace = true dropshot.workspace = true futures.workspace = true -gateway-messages.workspace = true -gateway-test-utils.workspace = true libc.workspace = true nexus-config.workspace = true -nexus-test-utils = { workspace = true, features = ["omicron-dev"] } nexus-test-interface.workspace = true -omicron-common.workspace = true +nexus-test-utils = { workspace = true, features = ["omicron-dev"] } omicron-nexus.workspace = true -omicron-test-utils.workspace = true +omicron-workspace-hack.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" -rcgen.workspace = true -signal-hook.workspace = true signal-hook-tokio.workspace = true -tokio = { workspace = true, features = [ "full" ] } -tokio-postgres.workspace = true +tokio.workspace = true toml.workspace = true -omicron-workspace-hack.workspace = true [dev-dependencies] -camino-tempfile.workspace = true expectorate.workspace = true -libc.workspace = true +omicron-dev-lib.workspace = true omicron-test-utils.workspace = true -openssl.workspace = true oxide-client.workspace = true subprocess.workspace = true - -# Disable doc builds by default for our binaries to work around issue -# rust-lang/cargo#8373. These docs would not be very useful anyway. -[[bin]] -name = "omicron-dev" -doc = false +tokio-postgres.workspace = true diff --git a/dev-tools/omicron-dev/src/bin/omicron-dev.rs b/dev-tools/omicron-dev/src/bin/omicron-dev.rs deleted file mode 100644 index 2c26d03369..0000000000 --- a/dev-tools/omicron-dev/src/bin/omicron-dev.rs +++ /dev/null @@ -1,649 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Developer tool for easily running bits of Omicron - -use anyhow::{bail, Context}; -use camino::Utf8Path; -use camino::Utf8PathBuf; -use clap::Args; -use clap::Parser; -use dropshot::test_util::LogContext; -use futures::stream::StreamExt; -use nexus_config::NexusConfig; -use nexus_test_interface::NexusServer; -use omicron_common::cmd::fatal; -use omicron_common::cmd::CmdError; -use omicron_test_utils::dev; -use signal_hook::consts::signal::SIGINT; -use signal_hook_tokio::Signals; -use std::io::Write; -use std::os::unix::prelude::OpenOptionsExt; -use std::path::PathBuf; - -#[tokio::main] -async fn main() -> Result<(), anyhow::Error> { - let subcmd = OmicronDb::parse(); - let result = match subcmd { - OmicronDb::DbRun { ref args } => cmd_db_run(args).await, - OmicronDb::DbPopulate { ref args } => cmd_db_populate(args).await, - OmicronDb::DbWipe { ref args } => cmd_db_wipe(args).await, - OmicronDb::ChRun { ref args } => cmd_clickhouse_run(args).await, - OmicronDb::MgsRun { ref args } => cmd_mgs_run(args).await, - OmicronDb::RunAll { ref args } => cmd_run_all(args).await, - OmicronDb::CertCreate { ref args } => cmd_cert_create(args).await, - }; - if let Err(error) = result { - fatal(CmdError::Failure(error)); - } - Ok(()) -} - -/// Tools for working with a local Omicron deployment -#[derive(Debug, Parser)] -#[clap(version)] -enum OmicronDb { - /// Start a CockroachDB cluster for development - DbRun { - #[clap(flatten)] - args: DbRunArgs, - }, - - /// Populate an existing CockroachDB cluster with the Omicron schema - DbPopulate { - #[clap(flatten)] - args: DbPopulateArgs, - }, - - /// Wipe the Omicron schema (and all data) from an existing CockroachDB - /// cluster - DbWipe { - #[clap(flatten)] - args: DbWipeArgs, - }, - - /// Run a ClickHouse database server for development - ChRun { - #[clap(flatten)] - args: ChRunArgs, - }, - - /// Run a simulated Management Gateway Service for development - MgsRun { - #[clap(flatten)] - args: MgsRunArgs, - }, - - /// Run a full simulated control plane - RunAll { - #[clap(flatten)] - args: RunAllArgs, - }, - - /// Create a self-signed certificate for use with Omicron - CertCreate { - #[clap(flatten)] - args: CertCreateArgs, - }, -} - -#[derive(Clone, Debug, Args)] -struct DbRunArgs { - /// Path to store database data (default: temp dir cleaned up on exit) - #[clap(long, action)] - store_dir: Option, - - /// Database (SQL) listen port. Use `0` to request any available port. - // We choose an arbitrary default port that's different from the default - // CockroachDB port to avoid conflicting. We don't use 0 because this port - // is specified in a few other places, like the default Nexus config file. - // TODO We could load that file at compile time and use the value there. - #[clap(long, default_value = "32221", action)] - listen_port: u16, - - // This unusual clap configuration makes "populate" default to true, - // allowing a --no-populate override on the CLI. - /// Do not populate the database with any schema - #[clap(long = "no-populate", action(clap::ArgAction::SetFalse))] - populate: bool, -} - -async fn cmd_db_run(args: &DbRunArgs) -> Result<(), anyhow::Error> { - // Set ourselves up to wait for SIGINT. It's important to do this early, - // before we've created resources that we want to have cleaned up on SIGINT - // (e.g., the temporary directory created by the database starter). - let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); - let mut signal_stream = signals.fuse(); - - // Now start CockroachDB. This process looks bureaucratic (create arg - // builder, then create starter, then start it) because we want to be able - // to print what's happening before we do it. - let mut db_arg_builder = - dev::db::CockroachStarterBuilder::new().listen_port(args.listen_port); - - // NOTE: The stdout strings here are not intended to be stable, but they are - // used by the test suite. - - if let Some(store_dir) = &args.store_dir { - println!( - "omicron-dev: using user-provided path for database store: {}", - store_dir.display() - ); - db_arg_builder = db_arg_builder.store_dir(store_dir); - } else { - println!( - "omicron-dev: using temporary directory for database store \ - (cleaned up on clean exit)" - ); - } - - let db_starter = db_arg_builder.build()?; - println!( - "omicron-dev: will run this to start CockroachDB:\n{}", - db_starter.cmdline() - ); - println!("omicron-dev: environment:"); - for (k, v) in db_starter.environment() { - println!(" {}={}", k, v); - } - println!( - "omicron-dev: temporary directory: {}", - db_starter.temp_dir().display() - ); - - let mut db_instance = db_starter.start().await?; - println!("\nomicron-dev: child process: pid {}", db_instance.pid()); - println!( - "omicron-dev: CockroachDB listening at: {}", - db_instance.listen_url() - ); - - if args.populate { - // Populate the database with our schema. - let start = tokio::time::Instant::now(); - println!("omicron-dev: populating database"); - db_instance.populate().await.context("populating database")?; - let end = tokio::time::Instant::now(); - let duration = end.duration_since(start); - println!( - "omicron-dev: populated database in {}.{} seconds", - duration.as_secs(), - duration.subsec_millis() - ); - } - - // Wait for either the child process to shut down on its own or for us to - // receive SIGINT. - tokio::select! { - _ = db_instance.wait_for_shutdown() => { - db_instance.cleanup().await.context("clean up after shutdown")?; - bail!( - "omicron-dev: database shut down unexpectedly \ - (see error output above)" - ); - } - caught_signal = signal_stream.next() => { - assert_eq!(caught_signal.unwrap(), SIGINT); - - /* - * We don't have to do anything to trigger shutdown because the - * shell will have delivered the same SIGINT that we got to the - * cockroach process as well. - */ - eprintln!( - "omicron-dev: caught signal, shutting down and removing \ - temporary directory" - ); - - db_instance - .wait_for_shutdown() - .await - .context("clean up after SIGINT shutdown")?; - } - } - - Ok(()) -} - -#[derive(Debug, Args)] -struct DbPopulateArgs { - /// URL for connecting to the database (postgresql:///...) - #[clap(long, action)] - database_url: String, - - /// Wipe any existing schema (and data!) before populating - #[clap(long, action)] - wipe: bool, -} - -async fn cmd_db_populate(args: &DbPopulateArgs) -> Result<(), anyhow::Error> { - let config = - args.database_url.parse::().with_context( - || format!("parsing database URL {:?}", args.database_url), - )?; - let client = dev::db::Client::connect(&config, tokio_postgres::NoTls) - .await - .with_context(|| format!("connecting to {:?}", args.database_url))?; - - if args.wipe { - println!("omicron-dev: wiping any existing database"); - dev::db::wipe(&client).await?; - } - - println!("omicron-dev: populating database"); - dev::db::populate(&client).await?; - println!("omicron-dev: populated database"); - client.cleanup().await.expect("connection failed"); - Ok(()) -} - -#[derive(Debug, Args)] -struct DbWipeArgs { - /// URL for connecting to the database (postgresql:///...) - #[clap(long, action)] - database_url: String, -} - -async fn cmd_db_wipe(args: &DbWipeArgs) -> Result<(), anyhow::Error> { - let config = - args.database_url.parse::().with_context( - || format!("parsing database URL {:?}", args.database_url), - )?; - let client = dev::db::Client::connect(&config, tokio_postgres::NoTls) - .await - .with_context(|| format!("connecting to {:?}", args.database_url))?; - - println!("omicron-dev: wiping any existing database"); - dev::db::wipe(&client).await?; - println!("omicron-dev: wiped"); - client.cleanup().await.expect("connection failed"); - Ok(()) -} - -#[derive(Clone, Debug, Args)] -struct ChRunArgs { - /// The HTTP port on which the server will listen - #[clap(short, long, default_value = "8123", action)] - port: u16, - /// Starts a ClickHouse replicated cluster of 2 replicas and 3 keeper nodes - #[clap(long, conflicts_with = "port", action)] - replicated: bool, -} - -async fn cmd_clickhouse_run(args: &ChRunArgs) -> Result<(), anyhow::Error> { - let logctx = LogContext::new( - "omicron-dev", - &dropshot::ConfigLogging::StderrTerminal { - level: dropshot::ConfigLoggingLevel::Info, - }, - ); - if args.replicated { - start_replicated_cluster(&logctx).await?; - } else { - start_single_node(&logctx, args.port).await?; - } - Ok(()) -} - -async fn start_single_node( - logctx: &LogContext, - port: u16, -) -> Result<(), anyhow::Error> { - // Start a stream listening for SIGINT - let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); - let mut signal_stream = signals.fuse(); - - // Start the database server process, possibly on a specific port - let mut db_instance = - dev::clickhouse::ClickHouseInstance::new_single_node(logctx, port) - .await?; - println!( - "omicron-dev: running ClickHouse with full command:\n\"clickhouse {}\"", - db_instance.cmdline().join(" ") - ); - println!( - "omicron-dev: ClickHouse is running with PID {}", - db_instance - .pid() - .expect("Failed to get process PID, it may not have started") - ); - println!( - "omicron-dev: ClickHouse HTTP server listening on port {}", - db_instance.port() - ); - println!( - "omicron-dev: using {} for ClickHouse data storage", - db_instance.data_path() - ); - - // Wait for the DB to exit itself (an error), or for SIGINT - tokio::select! { - _ = db_instance.wait_for_shutdown() => { - db_instance.cleanup().await.context("clean up after shutdown")?; - bail!("omicron-dev: ClickHouse shutdown unexpectedly"); - } - caught_signal = signal_stream.next() => { - assert_eq!(caught_signal.unwrap(), SIGINT); - - // As above, we don't need to explicitly kill the DB process, since - // the shell will have delivered the signal to the whole process group. - eprintln!( - "omicron-dev: caught signal, shutting down and removing \ - temporary directory" - ); - - // Remove the data directory. - db_instance - .wait_for_shutdown() - .await - .context("clean up after SIGINT shutdown")?; - } - } - Ok(()) -} - -async fn start_replicated_cluster( - logctx: &LogContext, -) -> Result<(), anyhow::Error> { - // Start a stream listening for SIGINT - let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); - let mut signal_stream = signals.fuse(); - - // Start the database server and keeper processes - let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - let replica_config = manifest_dir - .as_path() - .join("../../oximeter/db/src/configs/replica_config.xml"); - let keeper_config = manifest_dir - .as_path() - .join("../../oximeter/db/src/configs/keeper_config.xml"); - - let mut cluster = dev::clickhouse::ClickHouseCluster::new( - logctx, - replica_config, - keeper_config, - ) - .await?; - println!( - "omicron-dev: running ClickHouse cluster with configuration files:\n \ - replicas: {}\n keepers: {}", - cluster.replica_config_path().display(), - cluster.keeper_config_path().display() - ); - let pid_error_msg = "Failed to get process PID, it may not have started"; - println!( - "omicron-dev: ClickHouse cluster is running with: server PIDs = [{}, {}] \ - and keeper PIDs = [{}, {}, {}]", - cluster.replica_1 - .pid() - .expect(pid_error_msg), - cluster.replica_2 - .pid() - .expect(pid_error_msg), - cluster.keeper_1 - .pid() - .expect(pid_error_msg), - cluster.keeper_2 - .pid() - .expect(pid_error_msg), - cluster.keeper_3 - .pid() - .expect(pid_error_msg), - ); - println!( - "omicron-dev: ClickHouse HTTP servers listening on ports: {}, {}", - cluster.replica_1.port(), - cluster.replica_2.port() - ); - println!( - "omicron-dev: using {} and {} for ClickHouse data storage", - cluster.replica_1.data_path(), - cluster.replica_2.data_path() - ); - - // Wait for the replicas and keepers to exit themselves (an error), or for SIGINT - tokio::select! { - _ = cluster.replica_1.wait_for_shutdown() => { - cluster.replica_1.cleanup().await.context( - format!("clean up {} after shutdown", cluster.replica_1.data_path()) - )?; - bail!("omicron-dev: ClickHouse replica 1 shutdown unexpectedly"); - } - _ = cluster.replica_2.wait_for_shutdown() => { - cluster.replica_2.cleanup().await.context( - format!("clean up {} after shutdown", cluster.replica_2.data_path()) - )?; - bail!("omicron-dev: ClickHouse replica 2 shutdown unexpectedly"); - } - _ = cluster.keeper_1.wait_for_shutdown() => { - cluster.keeper_1.cleanup().await.context( - format!("clean up {} after shutdown", cluster.keeper_1.data_path()) - )?; - bail!("omicron-dev: ClickHouse keeper 1 shutdown unexpectedly"); - } - _ = cluster.keeper_2.wait_for_shutdown() => { - cluster.keeper_2.cleanup().await.context( - format!("clean up {} after shutdown", cluster.keeper_2.data_path()) - )?; - bail!("omicron-dev: ClickHouse keeper 2 shutdown unexpectedly"); - } - _ = cluster.keeper_3.wait_for_shutdown() => { - cluster.keeper_3.cleanup().await.context( - format!("clean up {} after shutdown", cluster.keeper_3.data_path()) - )?; - bail!("omicron-dev: ClickHouse keeper 3 shutdown unexpectedly"); - } - caught_signal = signal_stream.next() => { - assert_eq!(caught_signal.unwrap(), SIGINT); - eprintln!( - "omicron-dev: caught signal, shutting down and removing \ - temporary directories" - ); - - // Remove the data directories. - let mut instances = vec![ - cluster.replica_1, - cluster.replica_2, - cluster.keeper_1, - cluster.keeper_2, - cluster.keeper_3, - ]; - for instance in instances.iter_mut() { - instance - .wait_for_shutdown() - .await - .context(format!("clean up {} after SIGINT shutdown", instance.data_path()))?; - }; - } - } - Ok(()) -} - -#[derive(Clone, Debug, Args)] -struct RunAllArgs { - /// Nexus external API listen port. Use `0` to request any available port. - #[clap(long, action)] - nexus_listen_port: Option, -} - -async fn cmd_run_all(args: &RunAllArgs) -> Result<(), anyhow::Error> { - // Start a stream listening for SIGINT - let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); - let mut signal_stream = signals.fuse(); - - // Read configuration. - let config_str = include_str!("../../../../nexus/examples/config.toml"); - let mut config: NexusConfig = - toml::from_str(config_str).context("parsing example config")?; - config.pkg.log = dropshot::ConfigLogging::File { - // See LogContext::new(), - path: "UNUSED".to_string().into(), - level: dropshot::ConfigLoggingLevel::Trace, - if_exists: dropshot::ConfigLoggingIfExists::Fail, - }; - - if let Some(p) = args.nexus_listen_port { - config.deployment.dropshot_external.dropshot.bind_address.set_port(p); - } - - println!("omicron-dev: setting up all services ... "); - let cptestctx = nexus_test_utils::omicron_dev_setup_with_config::< - omicron_nexus::Server, - >(&mut config) - .await - .context("error setting up services")?; - println!("omicron-dev: services are running."); - - // Print out basic information about what was started. - // NOTE: The stdout strings here are not intended to be stable, but they are - // used by the test suite. - let addr = cptestctx.external_client.bind_address; - println!("omicron-dev: nexus external API: {:?}", addr); - println!( - "omicron-dev: nexus internal API: {:?}", - cptestctx.server.get_http_server_internal_address().await, - ); - println!( - "omicron-dev: cockroachdb pid: {}", - cptestctx.database.pid(), - ); - println!( - "omicron-dev: cockroachdb URL: {}", - cptestctx.database.pg_config() - ); - println!( - "omicron-dev: cockroachdb directory: {}", - cptestctx.database.temp_dir().display() - ); - println!( - "omicron-dev: internal DNS HTTP: http://{}", - cptestctx.internal_dns.dropshot_server.local_addr() - ); - println!( - "omicron-dev: internal DNS: {}", - cptestctx.internal_dns.dns_server.local_address() - ); - println!( - "omicron-dev: external DNS name: {}", - cptestctx.external_dns_zone_name, - ); - println!( - "omicron-dev: external DNS HTTP: http://{}", - cptestctx.external_dns.dropshot_server.local_addr() - ); - println!( - "omicron-dev: external DNS: {}", - cptestctx.external_dns.dns_server.local_address() - ); - println!( - "omicron-dev: e.g. `dig @{} -p {} {}.sys.{}`", - cptestctx.external_dns.dns_server.local_address().ip(), - cptestctx.external_dns.dns_server.local_address().port(), - cptestctx.silo_name, - cptestctx.external_dns_zone_name, - ); - for (location, gateway) in &cptestctx.gateway { - println!( - "omicron-dev: management gateway: http://{} ({})", - gateway.client.bind_address, location, - ); - } - println!("omicron-dev: silo name: {}", cptestctx.silo_name,); - println!( - "omicron-dev: privileged user name: {}", - cptestctx.user_name.as_ref(), - ); - - // Wait for a signal. - let caught_signal = signal_stream.next().await; - assert_eq!(caught_signal.unwrap(), SIGINT); - eprintln!( - "omicron-dev: caught signal, shutting down and removing \ - temporary directory" - ); - - cptestctx.teardown().await; - Ok(()) -} - -#[derive(Clone, Debug, Args)] -struct CertCreateArgs { - /// path to where the generated certificate and key files should go - /// (e.g., "out/initial-" would cause the files to be called - /// "out/initial-cert.pem" and "out/initial-key.pem") - #[clap(action)] - output_base: Utf8PathBuf, - - /// DNS names that the certificate claims to be valid for (subject - /// alternative names) - #[clap(action, required = true)] - server_names: Vec, -} - -async fn cmd_cert_create(args: &CertCreateArgs) -> Result<(), anyhow::Error> { - let cert = rcgen::generate_simple_self_signed(args.server_names.clone()) - .context("generating certificate")?; - let cert_pem = - cert.serialize_pem().context("serializing certificate as PEM")?; - let key_pem = cert.serialize_private_key_pem(); - - let cert_path = Utf8PathBuf::from(format!("{}cert.pem", args.output_base)); - write_private_file(&cert_path, cert_pem.as_bytes()) - .context("writing certificate file")?; - println!("wrote certificate to {}", cert_path); - - let key_path = Utf8PathBuf::from(format!("{}key.pem", args.output_base)); - write_private_file(&key_path, key_pem.as_bytes()) - .context("writing private key file")?; - println!("wrote private key to {}", key_path); - - Ok(()) -} - -#[cfg_attr(not(target_os = "macos"), allow(clippy::useless_conversion))] -fn write_private_file( - path: &Utf8Path, - contents: &[u8], -) -> Result<(), anyhow::Error> { - // The file should be readable and writable by the user only. - let perms = libc::S_IRUSR | libc::S_IWUSR; - let mut file = std::fs::OpenOptions::new() - .write(true) - .create_new(true) - .mode(perms.into()) // into() needed on mac only - .open(path) - .with_context(|| format!("open {:?} for writing", path))?; - file.write_all(contents).with_context(|| format!("write to {:?}", path)) -} - -#[derive(Clone, Debug, Args)] -struct MgsRunArgs {} - -async fn cmd_mgs_run(_args: &MgsRunArgs) -> Result<(), anyhow::Error> { - // Start a stream listening for SIGINT - let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); - let mut signal_stream = signals.fuse(); - - println!("omicron-dev: setting up MGS ... "); - let gwtestctx = gateway_test_utils::setup::test_setup( - "omicron-dev", - gateway_messages::SpPort::One, - ) - .await; - println!("omicron-dev: MGS is running."); - - let addr = gwtestctx.client.bind_address; - println!("omicron-dev: MGS API: http://{:?}", addr); - - // Wait for a signal. - let caught_signal = signal_stream.next().await; - assert_eq!(caught_signal.unwrap(), SIGINT); - eprintln!( - "omicron-dev: caught signal, shutting down and removing \ - temporary directory" - ); - - gwtestctx.teardown().await; - Ok(()) -} diff --git a/dev-tools/omicron-dev/src/main.rs b/dev-tools/omicron-dev/src/main.rs new file mode 100644 index 0000000000..391ec3cfab --- /dev/null +++ b/dev-tools/omicron-dev/src/main.rs @@ -0,0 +1,152 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::Context; +use clap::{Args, Parser, Subcommand}; +use futures::StreamExt; +use libc::SIGINT; +use nexus_config::NexusConfig; +use nexus_test_interface::NexusServer; +use signal_hook_tokio::Signals; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = OmicronDevApp::parse(); + args.exec().await +} + +#[derive(Clone, Debug, Parser)] +pub struct OmicronDevApp { + #[clap(subcommand)] + command: OmicronDevCmd, +} + +impl OmicronDevApp { + pub async fn exec(&self) -> Result<(), anyhow::Error> { + match &self.command { + OmicronDevCmd::RunAll(args) => args.exec().await, + } + } +} + +#[derive(Clone, Debug, Subcommand)] +pub(crate) enum OmicronDevCmd { + /// Run a full simulated control plane + RunAll(RunAllArgs), +} + +#[derive(Clone, Debug, Args)] +pub(crate) struct RunAllArgs { + /// Nexus external API listen port. Use `0` to request any available port. + #[clap(long, action)] + nexus_listen_port: Option, +} + +impl RunAllArgs { + pub(crate) async fn exec(&self) -> Result<(), anyhow::Error> { + // Start a stream listening for SIGINT + let signals = + Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); + let mut signal_stream = signals.fuse(); + + // Read configuration. + let config_str = include_str!("../../../nexus/examples/config.toml"); + let mut config: NexusConfig = + toml::from_str(config_str).context("parsing example config")?; + config.pkg.log = dropshot::ConfigLogging::File { + // See LogContext::new(), + path: "UNUSED".to_string().into(), + level: dropshot::ConfigLoggingLevel::Trace, + if_exists: dropshot::ConfigLoggingIfExists::Fail, + }; + + if let Some(p) = self.nexus_listen_port { + config + .deployment + .dropshot_external + .dropshot + .bind_address + .set_port(p); + } + + println!("omicron-dev: setting up all services ... "); + let cptestctx = nexus_test_utils::omicron_dev_setup_with_config::< + omicron_nexus::Server, + >(&mut config) + .await + .context("error setting up services")?; + println!("omicron-dev: services are running."); + + // Print out basic information about what was started. + // NOTE: The stdout strings here are not intended to be stable, but they are + // used by the test suite. + let addr = cptestctx.external_client.bind_address; + println!("omicron-dev: nexus external API: {:?}", addr); + println!( + "omicron-dev: nexus internal API: {:?}", + cptestctx.server.get_http_server_internal_address().await, + ); + println!( + "omicron-dev: cockroachdb pid: {}", + cptestctx.database.pid(), + ); + println!( + "omicron-dev: cockroachdb URL: {}", + cptestctx.database.pg_config() + ); + println!( + "omicron-dev: cockroachdb directory: {}", + cptestctx.database.temp_dir().display() + ); + println!( + "omicron-dev: internal DNS HTTP: http://{}", + cptestctx.internal_dns.dropshot_server.local_addr() + ); + println!( + "omicron-dev: internal DNS: {}", + cptestctx.internal_dns.dns_server.local_address() + ); + println!( + "omicron-dev: external DNS name: {}", + cptestctx.external_dns_zone_name, + ); + println!( + "omicron-dev: external DNS HTTP: http://{}", + cptestctx.external_dns.dropshot_server.local_addr() + ); + println!( + "omicron-dev: external DNS: {}", + cptestctx.external_dns.dns_server.local_address() + ); + println!( + "omicron-dev: e.g. `dig @{} -p {} {}.sys.{}`", + cptestctx.external_dns.dns_server.local_address().ip(), + cptestctx.external_dns.dns_server.local_address().port(), + cptestctx.silo_name, + cptestctx.external_dns_zone_name, + ); + for (location, gateway) in &cptestctx.gateway { + println!( + "omicron-dev: management gateway: http://{} ({})", + gateway.client.bind_address, location, + ); + } + println!("omicron-dev: silo name: {}", cptestctx.silo_name,); + println!( + "omicron-dev: privileged user name: {}", + cptestctx.user_name.as_ref(), + ); + + // Wait for a signal. + let caught_signal = signal_stream.next().await; + assert_eq!(caught_signal.unwrap(), SIGINT); + eprintln!( + "omicron-dev: caught signal, shutting down and removing \ + temporary directory" + ); + + cptestctx.teardown().await; + Ok(()) + } +} diff --git a/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr index ac1c87e165..49f770eb21 100644 --- a/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr +++ b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr @@ -1,17 +1,8 @@ -Tools for working with a local Omicron deployment - Usage: omicron-dev Commands: - db-run Start a CockroachDB cluster for development - db-populate Populate an existing CockroachDB cluster with the Omicron schema - db-wipe Wipe the Omicron schema (and all data) from an existing CockroachDB cluster - ch-run Run a ClickHouse database server for development - mgs-run Run a simulated Management Gateway Service for development - run-all Run a full simulated control plane - cert-create Create a self-signed certificate for use with Omicron - help Print this message or the help of the given subcommand(s) + run-all Run a full simulated control plane + help Print this message or the help of the given subcommand(s) Options: - -h, --help Print help - -V, --version Print version + -h, --help Print help diff --git a/dev-tools/omicron-dev/tests/test-omicron-dev.rs b/dev-tools/omicron-dev/tests/test-omicron-dev.rs new file mode 100644 index 0000000000..927b600283 --- /dev/null +++ b/dev-tools/omicron-dev/tests/test-omicron-dev.rs @@ -0,0 +1,224 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Smoke tests for the omicron-dev command-line tool + +use anyhow::Context; +use expectorate::assert_contents; +use omicron_dev_lib::test_utils::verify_graceful_exit; +use omicron_test_utils::dev::db::has_omicron_schema; +use omicron_test_utils::dev::process_running; +use omicron_test_utils::dev::test_cmds::assert_exit_code; +use omicron_test_utils::dev::test_cmds::path_to_executable; +use omicron_test_utils::dev::test_cmds::run_command; +use omicron_test_utils::dev::test_cmds::EXIT_USAGE; +use omicron_test_utils::dev::CRDB_SEED_TAR_ENV; +use oxide_client::ClientHiddenExt; +use std::io::BufRead; +use std::path::PathBuf; +use subprocess::Exec; +use subprocess::Redirection; + +/// name of the "omicron-dev" executable +const CMD_OMICRON_DEV: &str = env!("CARGO_BIN_EXE_omicron-dev"); + +fn path_to_omicron_dev() -> PathBuf { + path_to_executable(CMD_OMICRON_DEV) +} + +/// Encapsulates the information we need from a running `omicron-dev run-all` +/// command. +#[derive(Debug)] +struct RunAll { + subproc: subprocess::Popen, + cmd_pid: u32, + db_pid: u32, + postgres_config: tokio_postgres::Config, + temp_dir: PathBuf, + external_url: String, +} + +/// Like `run_db_run()`, but for the `run-all` command +fn run_run_all(exec: Exec) -> RunAll { + let cmdline = exec.to_cmdline_lossy(); + eprintln!("will run: {}", cmdline); + + let subproc = exec + .stdout(Redirection::Pipe) + .popen() + .expect("failed to start command"); + let mut subproc_out = + std::io::BufReader::new(subproc.stdout.as_ref().unwrap()); + let cmd_pid = subproc.pid().unwrap(); + let (mut db_pid, mut external_url, mut postgres_url, mut temp_dir) = + (None, None, None, None); + + eprintln!("waiting for stdout from child process"); + while db_pid.is_none() + || external_url.is_none() + || postgres_url.is_none() + || temp_dir.is_none() + { + let mut buf = String::with_capacity(80); + match subproc_out.read_line(&mut buf) { + Ok(0) => { + panic!("unexpected EOF from child process stdout"); + } + Err(e) => { + panic!("unexpected error reading child process stdout: {}", e); + } + Ok(_) => { + print!("subproc stdout: {}", buf); + } + } + + if let Some(s) = + buf.strip_prefix("omicron-dev: cockroachdb directory: ") + { + eprint!("found cockroachdb directory: {}", s); + temp_dir = Some(PathBuf::from(s.trim().to_string())); + continue; + } + + if let Some(s) = buf.strip_prefix("omicron-dev: nexus external API: ") { + eprint!("found Nexus external API: {}", s); + external_url = Some(s.trim().to_string()); + continue; + } + + if let Some(s) = buf.strip_prefix("omicron-dev: cockroachdb pid: ") { + eprint!("found cockroachdb pid: {}", s); + db_pid = + Some(s.trim().to_string().parse().expect("pid was not a u32")); + continue; + } + + if let Some(s) = buf.strip_prefix("omicron-dev: cockroachdb URL: ") { + eprint!("found postgres listen URL: {}", s); + postgres_url = Some(s.trim().to_string()); + continue; + } + } + + assert!(process_running(cmd_pid)); + + let postgres_config = postgres_url + .as_ref() + .unwrap() + .parse::() + .expect("invalid PostgreSQL URL"); + + RunAll { + subproc, + cmd_pid, + db_pid: db_pid.unwrap(), + external_url: external_url.unwrap(), + postgres_config, + temp_dir: temp_dir.unwrap(), + } +} + +// Exercises the normal use case of `omicron-dev run-all`: everything starts up, +// we can connect to Nexus and CockroachDB and query them, then we simulate the +// user typing ^C at the shell, and then it cleans up its temporary directory. +// +// This mirrors the `test_db_run()` test. +#[tokio::test] +async fn test_run_all() { + // Ensure that the CRDB_SEED_TAR environment variable is not set. We want to + // simulate a user running omicron-dev without the test environment. + // Check if CRDB_SEED_TAR_ENV is set and panic if it is + if let Ok(val) = std::env::var(CRDB_SEED_TAR_ENV) { + panic!( + "CRDB_SEED_TAR_ENV should not be set here, but is set to {}", + val + ); + } + + let cmd_path = path_to_omicron_dev(); + + let cmdstr = format!( + "( set -o monitor; {} run-all --nexus-listen-port 0 && true )", + cmd_path.display() + ); + let exec = + Exec::cmd("bash").arg("-c").arg(cmdstr).stderr(Redirection::Merge); + let runall = run_run_all(exec); + + let test_task = async { + // Make sure we can connect to CockroachDB. + let (client, connection) = runall + .postgres_config + .connect(tokio_postgres::NoTls) + .await + .context("failed to connect to newly setup database")?; + let conn_task = tokio::spawn(connection); + anyhow::ensure!(has_omicron_schema(&client).await); + drop(client); + conn_task + .await + .context("failed to join on connection")? + .context("connection failed with an error")?; + eprintln!("cleaned up connection"); + + // Make sure we can connect to Nexus. + let client = oxide_client::Client::new(&format!( + "http://{}", + runall.external_url + )); + let _ = + client.logout().send().await.context( + "Unexpectedly failed to reach Nexus at logout endpoint", + )?; + Ok(()) + }; + let res = test_task.await; + + // Figure out what process group our child processes are in. (That won't be + // the child's pid because the immediate shell will be in our process group, + // and it's the omicron-dev command that's the process group leader.) + let pgid = unsafe { libc::getpgid(runall.db_pid as libc::pid_t) }; + assert_ne!(pgid, -1); + + // Send SIGINT to that process group. This simulates an interactive session + // where the user hits ^C. Make sure everything is cleaned up gracefully. + eprintln!("sending SIGINT to process group {}", pgid); + assert_eq!(0, unsafe { libc::kill(-pgid, libc::SIGINT) }); + + let wait = verify_graceful_exit( + runall.subproc, + runall.cmd_pid, + runall.db_pid, + &runall.temp_dir, + ); + eprintln!("wait result: {:?}", wait); + assert!(matches!(wait, subprocess::ExitStatus::Exited(0))); + + // Unwrap the caught errors we are actually trying to test. + res.expect("failed to run test"); +} + +#[test] +fn test_omicron_dev_no_args() { + let exec = Exec::cmd(path_to_omicron_dev()); + let (exit_status, stdout_text, stderr_text) = run_command(exec); + assert_exit_code(exit_status, EXIT_USAGE, &stderr_text); + assert_contents("tests/output/cmd-omicron-dev-noargs-stdout", &stdout_text); + assert_contents("tests/output/cmd-omicron-dev-noargs-stderr", &stderr_text); +} + +#[test] +fn test_omicron_dev_bad_cmd() { + let exec = Exec::cmd(path_to_omicron_dev()).arg("bogus-command"); + let (exit_status, stdout_text, stderr_text) = run_command(exec); + assert_exit_code(exit_status, EXIT_USAGE, &stderr_text); + assert_contents( + "tests/output/cmd-omicron-dev-bad-cmd-stdout", + &stdout_text, + ); + assert_contents( + "tests/output/cmd-omicron-dev-bad-cmd-stderr", + &stderr_text, + ); +} diff --git a/dev-tools/omicron-dev/tests/test_omicron_dev.rs b/dev-tools/omicron-dev/tests/test_omicron_dev.rs deleted file mode 100644 index 7e78e5dc5a..0000000000 --- a/dev-tools/omicron-dev/tests/test_omicron_dev.rs +++ /dev/null @@ -1,585 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Smoke tests for the omicron-dev command-line tool - -use anyhow::Context; -use expectorate::assert_contents; -use omicron_test_utils::dev::db::has_omicron_schema; -use omicron_test_utils::dev::process_running; -use omicron_test_utils::dev::test_cmds::assert_exit_code; -use omicron_test_utils::dev::test_cmds::path_to_executable; -use omicron_test_utils::dev::test_cmds::run_command; -use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; -use omicron_test_utils::dev::test_cmds::EXIT_USAGE; -use omicron_test_utils::dev::CRDB_SEED_TAR_ENV; -use oxide_client::ClientHiddenExt; -use std::io::BufRead; -use std::path::Path; -use std::path::PathBuf; -use std::time::Duration; -use subprocess::Exec; -use subprocess::ExitStatus; -use subprocess::Redirection; - -/// name of the "omicron-dev" executable -const CMD_OMICRON_DEV: &str = env!("CARGO_BIN_EXE_omicron-dev"); - -/// timeout used for various things that should be pretty quick -const TIMEOUT: Duration = Duration::from_secs(30); - -fn path_to_omicron_dev() -> PathBuf { - path_to_executable(CMD_OMICRON_DEV) -} - -/// Encapsulates the information we need from a running `omicron-dev db-run` -/// command. -#[derive(Debug)] -struct DbRun { - subproc: subprocess::Popen, - cmd_pid: u32, - db_pid: u32, - listen_config_url: String, - listen_config: tokio_postgres::Config, - temp_dir: PathBuf, -} - -/// Starts the "omicron-dev db-run" command and runs it for long enough to parse -/// the child pid, listen URL, and temporary directory. Returns these, along -/// with a handle to the child process. -/// TODO-robustness It would be great to put a timeout on this. -fn run_db_run(exec: Exec, wait_for_populate: bool) -> DbRun { - let cmdline = exec.to_cmdline_lossy(); - eprintln!("will run: {}", cmdline); - - let subproc = exec - .stdout(Redirection::Pipe) - .popen() - .expect("failed to start command"); - let mut subproc_out = - std::io::BufReader::new(subproc.stdout.as_ref().unwrap()); - let cmd_pid = subproc.pid().unwrap(); - let (mut db_pid, mut listen_config_url, mut temp_dir) = (None, None, None); - let mut populated = false; - - eprintln!("waiting for stdout from child process"); - while db_pid.is_none() - || listen_config_url.is_none() - || temp_dir.is_none() - || (wait_for_populate && !populated) - { - let mut buf = String::with_capacity(80); - match subproc_out.read_line(&mut buf) { - Ok(0) => { - panic!("unexpected EOF from child process stdout"); - } - Err(e) => { - panic!("unexpected error reading child process stdout: {}", e); - } - Ok(_) => { - print!("subproc stdout: {}", buf); - } - } - - if let Some(s) = buf.strip_prefix("omicron-dev: temporary directory: ") - { - eprint!("found temporary directory: {}", s); - temp_dir = Some(PathBuf::from(s.trim_end().to_string())); - continue; - } - - if let Some(s) = buf.strip_prefix("omicron-dev: child process: pid ") { - eprint!("found database pid: {}", s); - db_pid = Some( - s.trim_end().to_string().parse().expect("pid was not a u32"), - ); - continue; - } - - if let Some(s) = - buf.strip_prefix("omicron-dev: CockroachDB listening at: ") - { - eprint!("found postgres listen URL: {}", s); - listen_config_url = Some(s.trim_end().to_string()); - continue; - } - - if buf.contains("omicron-dev: populated database") { - eprintln!("found database populated"); - populated = true; - continue; - } - } - - assert!(process_running(cmd_pid)); - assert!(process_running(db_pid.unwrap())); - - let listen_config = listen_config_url - .as_ref() - .unwrap() - .parse::() - .expect("invalid PostgreSQL URL"); - - DbRun { - subproc, - cmd_pid, - db_pid: db_pid.unwrap(), - listen_config_url: listen_config_url.unwrap(), - listen_config, - temp_dir: temp_dir.unwrap(), - } -} - -/// Encapsulates the information we need from a running `omicron-dev run-all` -/// command. -#[derive(Debug)] -struct RunAll { - subproc: subprocess::Popen, - cmd_pid: u32, - db_pid: u32, - postgres_config: tokio_postgres::Config, - temp_dir: PathBuf, - external_url: String, -} - -/// Like `run_db_run()`, but for the `run-all` command -fn run_run_all(exec: Exec) -> RunAll { - let cmdline = exec.to_cmdline_lossy(); - eprintln!("will run: {}", cmdline); - - let subproc = exec - .stdout(Redirection::Pipe) - .popen() - .expect("failed to start command"); - let mut subproc_out = - std::io::BufReader::new(subproc.stdout.as_ref().unwrap()); - let cmd_pid = subproc.pid().unwrap(); - let (mut db_pid, mut external_url, mut postgres_url, mut temp_dir) = - (None, None, None, None); - - eprintln!("waiting for stdout from child process"); - while db_pid.is_none() - || external_url.is_none() - || postgres_url.is_none() - || temp_dir.is_none() - { - let mut buf = String::with_capacity(80); - match subproc_out.read_line(&mut buf) { - Ok(0) => { - panic!("unexpected EOF from child process stdout"); - } - Err(e) => { - panic!("unexpected error reading child process stdout: {}", e); - } - Ok(_) => { - print!("subproc stdout: {}", buf); - } - } - - if let Some(s) = - buf.strip_prefix("omicron-dev: cockroachdb directory: ") - { - eprint!("found cockroachdb directory: {}", s); - temp_dir = Some(PathBuf::from(s.trim().to_string())); - continue; - } - - if let Some(s) = buf.strip_prefix("omicron-dev: nexus external API: ") { - eprint!("found Nexus external API: {}", s); - external_url = Some(s.trim().to_string()); - continue; - } - - if let Some(s) = buf.strip_prefix("omicron-dev: cockroachdb pid: ") { - eprint!("found cockroachdb pid: {}", s); - db_pid = - Some(s.trim().to_string().parse().expect("pid was not a u32")); - continue; - } - - if let Some(s) = buf.strip_prefix("omicron-dev: cockroachdb URL: ") { - eprint!("found postgres listen URL: {}", s); - postgres_url = Some(s.trim().to_string()); - continue; - } - } - - assert!(process_running(cmd_pid)); - - let postgres_config = postgres_url - .as_ref() - .unwrap() - .parse::() - .expect("invalid PostgreSQL URL"); - - RunAll { - subproc, - cmd_pid, - db_pid: db_pid.unwrap(), - external_url: external_url.unwrap(), - postgres_config, - temp_dir: temp_dir.unwrap(), - } -} - -/// Waits for the subprocess to exit and returns status information -/// -/// This assumes the caller has arranged for the processes to terminate. This -/// function verifies that both the omicron-dev and CockroachDB processes are -/// gone and that the temporary directory has been cleaned up. -fn verify_graceful_exit( - mut subproc: subprocess::Popen, - cmd_pid: u32, - db_pid: u32, - temp_dir: &Path, -) -> subprocess::ExitStatus { - let wait_result = subproc - .wait_timeout(TIMEOUT) - .expect("failed to wait for process to exit") - .unwrap_or_else(|| { - panic!("timed out waiting {:?} for process to exit", &TIMEOUT) - }); - - assert!(!process_running(cmd_pid)); - assert!(!process_running(db_pid)); - assert_eq!( - libc::ENOENT, - std::fs::metadata(temp_dir) - .expect_err("temporary directory still exists") - .raw_os_error() - .unwrap() - ); - - wait_result -} - -// Exercises the normal use case of `omicron-dev db-run`: the database starts -// up, we can connect to it and query it, then we simulate the user typing ^C at -// the shell, and then it cleans up its temporary directory. -#[tokio::test] -async fn test_db_run() { - let cmd_path = path_to_omicron_dev(); - - // Rather than invoke the command directly, we'll use the shell to run the - // command in a subshell with monitor mode active. This puts the child - // process into a separate process group, which allows us to send the whole - // group SIGINT, which simulates what would happen if this were run - // interactively from the shell (which is what we want to test). Maybe - // there's a better way to do this. (Ideally, we would fork, use - // setpgid(2) in the child, then exec our command. The standard library - // does not provide facilities to do this. Maybe we could use the `libc` - // crate to do this?) - // - // Note that it's not a good test to just send SIGINT to the CockroachDB - // process. In the real-world case we're trying to test, omicron-dev gets - // SIGINT as well. If it doesn't handle it explicitly, the process will be - // terminated and temporary directories will be leaked. However, the test - // would pass because in the test case omicron-dev would never have gotten - // the SIGINT. - // - // We also redirect stderr to stdout. Originally this was so that the output - // doesn't get dumped to the user's terminal during regular `cargo test` - // runs, though with nextest this is less of an issue. - // - // Finally, we set listen-port=0 to avoid conflicting with concurrent - // invocations. - // - // The `&& true` looks redundant but it prevents recent versions of bash - // from optimising away the fork() and causing cargo itself to receive - // the ^C that we send during testing. - let cmdstr = format!( - "( set -o monitor; {} db-run --listen-port 0 && true )", - cmd_path.display() - ); - let exec = - Exec::cmd("bash").arg("-c").arg(cmdstr).stderr(Redirection::Merge); - let dbrun = run_db_run(exec, true); - let test_task = async { - let (client, connection) = dbrun - .listen_config - .connect(tokio_postgres::NoTls) - .await - .context("failed to connect to newly setup database")?; - let conn_task = tokio::spawn(connection); - - anyhow::ensure!(has_omicron_schema(&client).await); - - // Now run db-populate. - eprintln!("running db-populate"); - let populate_result = Exec::cmd(&cmd_path) - .arg("db-populate") - .arg("--database-url") - .arg(&dbrun.listen_config_url) - .stdout(Redirection::Pipe) - .stderr(Redirection::Pipe) - .capture() - .context("failed to run db-populate")?; - eprintln!("exit status: {:?}", populate_result.exit_status); - eprintln!("stdout: {:?}", populate_result.stdout_str()); - eprintln!("stdout: {:?}", populate_result.stderr_str()); - anyhow::ensure!(has_omicron_schema(&client).await); - - // Try again, but with the --wipe flag. - eprintln!("running db-populate --wipe"); - let populate_result = Exec::cmd(&cmd_path) - .arg("db-populate") - .arg("--wipe") - .arg("--database-url") - .arg(&dbrun.listen_config_url) - .capture() - .context("failed to run db-populate")?; - anyhow::ensure!(matches!( - populate_result.exit_status, - ExitStatus::Exited(0) - )); - anyhow::ensure!(has_omicron_schema(&client).await); - - // Now run db-wipe. This should work. - eprintln!("running db-wipe"); - let wipe_result = Exec::cmd(&cmd_path) - .arg("db-wipe") - .arg("--database-url") - .arg(&dbrun.listen_config_url) - .capture() - .context("failed to run db-wipe")?; - anyhow::ensure!(matches!( - wipe_result.exit_status, - ExitStatus::Exited(0) - )); - anyhow::ensure!(!has_omicron_schema(&client).await); - - // The rest of the populate()/wipe() behavior is tested elsewhere. - - drop(client); - conn_task - .await - .context("failed to join on connection")? - .context("connection failed with an error")?; - eprintln!("cleaned up connection"); - Ok(()) - }; - let res = test_task.await; - - // Figure out what process group our child processes are in. (That won't be - // the child's pid because the immediate shell will be in our process group, - // and it's the omicron-dev command that's the process group leader.) - let pgid = unsafe { libc::getpgid(dbrun.db_pid as libc::pid_t) }; - assert_ne!(pgid, -1); - - // Send SIGINT to that process group. This simulates an interactive session - // where the user hits ^C. Make sure everything is cleaned up gracefully. - eprintln!("sending SIGINT to process group {}", pgid); - assert_eq!(0, unsafe { libc::kill(-pgid, libc::SIGINT) }); - - let wait = verify_graceful_exit( - dbrun.subproc, - dbrun.cmd_pid, - dbrun.db_pid, - &dbrun.temp_dir, - ); - eprintln!("wait result: {:?}", wait); - assert!(matches!(wait, subprocess::ExitStatus::Exited(0))); - res.expect("test task failed"); -} - -// Exercises the normal use case of `omicron-dev run-all`: everything starts up, -// we can connect to Nexus and CockroachDB and query them, then we simulate the -// user typing ^C at the shell, and then it cleans up its temporary directory. -// -// This mirrors the `test_db_run()` test. -#[tokio::test] -async fn test_run_all() { - // Ensure that the CRDB_SEED_TAR environment variable is not set. We want to - // simulate a user running omicron-dev without the test environment. - // Check if CRDB_SEED_TAR_ENV is set and panic if it is - if let Ok(val) = std::env::var(CRDB_SEED_TAR_ENV) { - panic!( - "CRDB_SEED_TAR_ENV should not be set here, but is set to {}", - val - ); - } - - let cmd_path = path_to_omicron_dev(); - - let cmdstr = format!( - "( set -o monitor; {} run-all --nexus-listen-port 0 && true )", - cmd_path.display() - ); - let exec = - Exec::cmd("bash").arg("-c").arg(cmdstr).stderr(Redirection::Merge); - let runall = run_run_all(exec); - - let test_task = async { - // Make sure we can connect to CockroachDB. - let (client, connection) = runall - .postgres_config - .connect(tokio_postgres::NoTls) - .await - .context("failed to connect to newly setup database")?; - let conn_task = tokio::spawn(connection); - anyhow::ensure!(has_omicron_schema(&client).await); - drop(client); - conn_task - .await - .context("failed to join on connection")? - .context("connection failed with an error")?; - eprintln!("cleaned up connection"); - - // Make sure we can connect to Nexus. - let client = oxide_client::Client::new(&format!( - "http://{}", - runall.external_url - )); - let _ = - client.logout().send().await.context( - "Unexpectedly failed to reach Nexus at logout endpoint", - )?; - Ok(()) - }; - let res = test_task.await; - - // Figure out what process group our child processes are in. (That won't be - // the child's pid because the immediate shell will be in our process group, - // and it's the omicron-dev command that's the process group leader.) - let pgid = unsafe { libc::getpgid(runall.db_pid as libc::pid_t) }; - assert_ne!(pgid, -1); - - // Send SIGINT to that process group. This simulates an interactive session - // where the user hits ^C. Make sure everything is cleaned up gracefully. - eprintln!("sending SIGINT to process group {}", pgid); - assert_eq!(0, unsafe { libc::kill(-pgid, libc::SIGINT) }); - - let wait = verify_graceful_exit( - runall.subproc, - runall.cmd_pid, - runall.db_pid, - &runall.temp_dir, - ); - eprintln!("wait result: {:?}", wait); - assert!(matches!(wait, subprocess::ExitStatus::Exited(0))); - - // Unwrap the caught errors we are actually trying to test. - res.expect("failed to run test"); -} - -// Exercises the unusual case of `omicron-dev db-run` where the database shuts -// down unexpectedly. -#[tokio::test] -async fn test_db_killed() { - // Redirect stderr to stdout just so that it doesn't get dumped to the - // user's terminal during regular `cargo test` runs. - let exec = Exec::cmd(&path_to_omicron_dev()) - .arg("db-run") - .arg("--listen-port=0") - .stderr(Redirection::Merge); - // Although it doesn't seem necessary, we wait for "db-run" to finish - // populating the database before we kill CockroachDB. The main reason is - // that we're trying to verify that if CockroachDB exits under normal - // conditions, then db-run notices. If we don't wait for populate() to - // finish, then we might fail during populate(), and that's a different - // failure path. In particular, that path does _not_ necessarily wait for - // CockroachDB to exit. It arguably should, but this is considerably more - // of an edge case than we're testing here. - let dbrun = run_db_run(exec, true); - assert_eq!(0, unsafe { - libc::kill(dbrun.db_pid as libc::pid_t, libc::SIGKILL) - }); - let wait = verify_graceful_exit( - dbrun.subproc, - dbrun.cmd_pid, - dbrun.db_pid, - &dbrun.temp_dir, - ); - eprintln!("wait result: {:?}", wait); - assert!(matches!(wait, subprocess::ExitStatus::Exited(1),)); -} - -#[test] -fn test_omicron_dev_no_args() { - let exec = Exec::cmd(path_to_omicron_dev()); - let (exit_status, stdout_text, stderr_text) = run_command(exec); - assert_exit_code(exit_status, EXIT_USAGE, &stderr_text); - assert_contents("tests/output/cmd-omicron-dev-noargs-stdout", &stdout_text); - assert_contents("tests/output/cmd-omicron-dev-noargs-stderr", &stderr_text); -} - -#[test] -fn test_omicron_dev_bad_cmd() { - let exec = Exec::cmd(path_to_omicron_dev()).arg("bogus-command"); - let (exit_status, stdout_text, stderr_text) = run_command(exec); - assert_exit_code(exit_status, EXIT_USAGE, &stderr_text); - assert_contents( - "tests/output/cmd-omicron-dev-bad-cmd-stdout", - &stdout_text, - ); - assert_contents( - "tests/output/cmd-omicron-dev-bad-cmd-stderr", - &stderr_text, - ); -} - -#[test] -fn test_omicron_dev_db_populate_no_args() { - let exec = Exec::cmd(path_to_omicron_dev()).arg("db-populate"); - let (exit_status, stdout_text, stderr_text) = run_command(exec); - assert_exit_code(exit_status, EXIT_USAGE, &stderr_text); - assert_contents( - "tests/output/cmd-omicron-dev-db-populate-noargs-stdout", - &stdout_text, - ); - assert_contents( - "tests/output/cmd-omicron-dev-db-populate-noargs-stderr", - &stderr_text, - ); -} - -#[test] -fn test_omicron_dev_db_wipe_no_args() { - let exec = Exec::cmd(path_to_omicron_dev()).arg("db-wipe"); - let (exit_status, stdout_text, stderr_text) = run_command(exec); - assert_exit_code(exit_status, EXIT_USAGE, &stderr_text); - assert_contents( - "tests/output/cmd-omicron-dev-db-wipe-noargs-stdout", - &stdout_text, - ); - assert_contents( - "tests/output/cmd-omicron-dev-db-wipe-noargs-stderr", - &stderr_text, - ); -} - -#[test] -fn test_cert_create() { - let tmpdir = camino_tempfile::tempdir().unwrap(); - println!("tmpdir: {}", tmpdir.path()); - let output_base = format!("{}/test-", tmpdir.path()); - let exec = Exec::cmd(path_to_omicron_dev()) - .arg("cert-create") - .arg(output_base) - .arg("foo.example") - .arg("bar.example"); - let (exit_status, _, stderr_text) = run_command(exec); - assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); - let cert_path = tmpdir.path().join("test-cert.pem"); - let key_path = tmpdir.path().join("test-key.pem"); - let cert_contents = std::fs::read(&cert_path) - .with_context(|| format!("reading certificate path {:?}", cert_path)) - .unwrap(); - let key_contents = std::fs::read(&key_path) - .with_context(|| format!("reading private key path: {:?}", key_path)) - .unwrap(); - let certs_pem = openssl::x509::X509::stack_from_pem(&cert_contents) - .context("parsing certificate") - .unwrap(); - let private_key = openssl::pkey::PKey::private_key_from_pem(&key_contents) - .context("parsing private key") - .unwrap(); - assert!(certs_pem - .iter() - .last() - .unwrap() - .public_key() - .unwrap() - .public_eq(&private_key)); -} diff --git a/docs/how-to-run-simulated.adoc b/docs/how-to-run-simulated.adoc index 86f7a0915b..c9723b27d1 100644 --- a/docs/how-to-run-simulated.adoc +++ b/docs/how-to-run-simulated.adoc @@ -52,8 +52,8 @@ You don't need to do this again if you just did it. But you'll need to do it ea To **run Omicron** you need to run several programs: -* a CockroachDB cluster. For development, you can use the `omicron-dev` tool in this repository to start a single-node CockroachDB cluster **that will delete the database when you shut it down.** -* a ClickHouse server. You should use the `omicron-dev` tool for this as well, see below, and as with CockroachDB, +* a CockroachDB cluster. For development, you can use the `db-dev` tool in this repository to start a single-node CockroachDB cluster **that will delete the database when you shut it down.** +* a ClickHouse server. You should use the `ch-dev` tool for this, see below, and as with CockroachDB, the database files will be deleted when you stop the program. * `nexus`: the guts of the control plane * `sled-agent-sim`: a simulator for the component that manages a single sled @@ -68,7 +68,7 @@ You can run these by hand, but it's easier to use `omicron-dev run-all`. See be + [source,text] ---- -$ omicron-dev run-all +$ cargo run --bin omicron-dev -- run-all omicron-dev: setting up all services ... log file: /dangerzone/omicron_tmp/omicron-dev-omicron-dev.4647.0.log note: configured to log to "/dangerzone/omicron_tmp/omicron-dev-omicron-dev.4647.0.log" @@ -98,13 +98,13 @@ There are many reasons it's useful to run the pieces of the stack by hand, espec CAUTION: This process does not currently work. See https://github.com/oxidecomputer/omicron/issues/4421[omicron#4421] for details. The pieces here may still be useful for reference. -. Start CockroachDB using `omicron-dev db-run`: +. Start CockroachDB using `db-dev run`: + [source,text] ---- -$ cargo run --bin=omicron-dev -- db-run +$ cargo run --bin db-dev -- run Finished dev [unoptimized + debuginfo] target(s) in 0.15s - Running `target/debug/omicron-dev db-run` + Running `target/debug/db-dev run` omicron-dev: using temporary directory for database store (cleaned up on clean exit) omicron-dev: will run this to start CockroachDB: cockroach start-single-node --insecure --http-addr=:0 --store /var/tmp/omicron_tmp/.tmpM8KpTf/data --listen-addr 127.0.0.1:32221 --listening-url-file /var/tmp/omicron_tmp/.tmpM8KpTf/listen-url @@ -157,7 +157,7 @@ Note that as the output indicates, this cluster will be available to anybody tha + [source,text] ---- -$ cargo run --bin omicron-dev -- ch-run +$ cargo run --bin ch-dev run Finished dev [unoptimized + debuginfo] target(s) in 0.47s Running `target/debug/omicron-dev ch-run` omicron-dev: running ClickHouse (PID: 2463), full command is "clickhouse server --log-file /var/folders/67/2tlym22x1r3d2kwbh84j298w0000gn/T/.tmpJ5nhot/clickhouse-server.log --errorlog-file /var/folders/67/2tlym22x1r3d2kwbh84j298w0000gn/T/.tmpJ5nhot/clickhouse-server.errlog -- --http_port 8123 --path /var/folders/67/2tlym22x1r3d2kwbh84j298w0000gn/T/.tmpJ5nhot" @@ -167,7 +167,7 @@ omicron-dev: using /var/folders/67/2tlym22x1r3d2kwbh84j298w0000gn/T/.tmpJ5nhot f If you wish to start a ClickHouse replicated cluster instead of a single node, run the following instead: [source,text] --- -$ cargo run --bin omicron-dev -- ch-run --replicated +$ cargo run --bin ch-dev run --replicated Finished dev [unoptimized + debuginfo] target(s) in 0.31s Running `target/debug/omicron-dev ch-run --replicated` omicron-dev: running ClickHouse cluster with configuration files: @@ -221,7 +221,7 @@ To do this, first run `omicron-dev run-all`: [source,text] ---- -$ cargo run --bin=omicron-dev -- run-all +$ cargo run --bin omicron-dev run-all Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.95s Running `target/debug/omicron-dev run-all` omicron-dev: setting up all services ... @@ -312,16 +312,16 @@ Once everything is up and running, you can use the system in a few ways: When you run the above, you will wind up with Nexus listening on HTTP (with no TLS) on its external address. This is convenient for debugging, but not representative of a real system. If you want to run it with TLS, you need to tweak the above procedure slightly: 1. You'll need to use the "Running the pieces by hand" section. `omicron-dev run-all` does not currently provide a way to do this (because it doesn't have a way to specify a certificate to be used during rack initialization). -2. Acquire a TLS certificate. The easiest approach is to use `omicron-dev cert-create` to create a self-signed certificate. However you get one, it should be valid for the domain corresponding to your recovery Silo. When you run the pieces by hand, this would be `demo-silo.sys.oxide-dev.test`. If you want a certificate you can use for multiple Silos, make it a wildcard certificate. Here's an example: +2. Acquire a TLS certificate. The easiest approach is to use `cert-dev create` to create a self-signed certificate. However you get one, it should be valid for the domain corresponding to your recovery Silo. When you run the pieces by hand, this would be `demo-silo.sys.oxide-dev.test`. If you want a certificate you can use for multiple Silos, make it a wildcard certificate. Here's an example: + [source,text] ---- -$ cargo run --bin=omicron-dev -- cert-create demo- '*.sys.oxide-dev.test' +$ cargo run --bin cert-dev create demo- '*.sys.oxide-dev.test' wrote certificate to demo-cert.pem wrote private key to demo-key.pem ---- 3. Modify your Nexus configuration file to include `tls = true`. See `./nexus/examples/config.toml` for an example. This property is present but commented-out in that file. If you're running on standard port 80 (which is not usually the case in development), you may also want to change the `deployment.dropshot_external.bind_address` port to 443. -4. When you run `sled-agent-sim`, pass the `--rss-tls-cert` and `--rss-tls-key` options as well. These should refer to the files created by `omicron-dev cert-create` above. (They can be any PEM-formatted x509 certificate and associated private key.) +4. When you run `sled-agent-sim`, pass the `--rss-tls-cert` and `--rss-tls-key` options as well. These should refer to the files created by `cert-dev create` above. (They can be any PEM-formatted x509 certificate and associated private key.) 5. Usually at this point you'll be using a self-signed certificate for a domain that's not publicly resolvable with DNS. This makes it hard to use standard clients. Fortunately, `curl` does have flags to make this easy. Continuing with this example, assuming your Nexus HTTPS server is listening on 127.0.0.1:12220 and your Silo's DNS name is `demo-silo.sys.oxide-dev.test`: + [source,text] diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 996ee446f2..98d86f6d8a 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -244,7 +244,7 @@ You can skip this step. In that case, the externally-facing services (API and c You can generate a self-signed TLS certificate chain with: ---- -$ cargo run --bin=omicron-dev -- cert-create ./smf/sled-agent/$MACHINE/initial-tls- '*.sys.oxide.test' +$ cargo run --bin cert-dev create ./smf/sled-agent/$MACHINE/initial-tls- '*.sys.oxide.test' ---- === Rack setup configuration diff --git a/docs/repo.adoc b/docs/repo.adoc index 22fc07ae54..815af74d1d 100644 --- a/docs/repo.adoc +++ b/docs/repo.adoc @@ -15,7 +15,7 @@ To help the development process, we seek: * to have clear and up-to-date https://github.com/oxidecomputer/omicron/blob/main/docs/how-to-run-simulated.adoc#installing-prerequisites[instructions] for setting up a development environment from scratch. Most of this is automated. CI uses the same automation to go from a bare environment to one that builds and tests Omicron, so this automation is tested regularly with the rest of the repo. * to have clear instructions for basic activities like formatting code, running clippy, running tests, etc. These should be consistent across components and across local development vs. CI. -* to prioritize debugging and fixing flaky tests so that developers can always expect the tests to pass. Failures don't necessarily need to be reproducible to debug them. The test suite preserves trace-level log files and database contents from failed test runs. You can inspect the database contents using `omicron-dev db-run` to spin up a transient database instance pointed at the saved database contents. +* to prioritize debugging and fixing flaky tests so that developers can always expect the tests to pass. Failures don't necessarily need to be reproducible to debug them. The test suite preserves trace-level log files and database contents from failed test runs. You can inspect the database contents using `db-dev run` to spin up a transient database instance pointed at the saved database contents. * to ensure that a fresh clone and build of the repo should produce equivalent software to any other clone, including the CI environment. If tests pass for one developer on the tip of "main", they should pass for other developers as well as CI. We use rust-toolchain and Cargo.lock to ensure that developers are getting a consistent toolchain and packages as each other and CI. Omicron houses many related components in one repo: diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index 724b25162d..4fcb126356 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -2,7 +2,7 @@ //! //! Initial populating of the CockroachDB database happens in two different ways: //! -//! 1. During "rack setup" (or during `omicron-dev db-run` or test suite +//! 1. During "rack setup" (or during `db-dev run` or test suite //! initialization), we create the omicron database, schema, and the *bare //! minimum* data that needs to be there. //! 2. Every time Nexus starts up, we attempts to insert a bunch of built-in diff --git a/test-utils/src/dev/db.rs b/test-utils/src/dev/db.rs index fcb14a4f15..a3cc0e58b6 100644 --- a/test-utils/src/dev/db.rs +++ b/test-utils/src/dev/db.rs @@ -653,7 +653,7 @@ impl Drop for CockroachInstance { "WARN: temporary directory leaked: {path:?}\n\ \tIf you would like to access the database for debugging, run the following:\n\n\ \t# Run the database\n\ - \tcargo run --bin omicron-dev db-run --no-populate --store-dir {data_path:?}\n\ + \tcargo run --bin db-dev run --no-populate --store-dir {data_path:?}\n\ \t# Access the database. Note the port may change if you run multiple databases.\n\ \tcockroach sql --host=localhost:32221 --insecure", data_path = path.join("data"), diff --git a/wicket/README.md b/wicket/README.md index fc1c93fe83..89515662eb 100644 --- a/wicket/README.md +++ b/wicket/README.md @@ -127,13 +127,14 @@ Making this simpler is tracked in The easiest way to do this is to run: ``` -cargo run -p omicron-dev mgs-run +cargo run --bin mgs-dev run ``` -This will print out a line similar to `omicron-dev: MGS API: http://[::1]:12225`. Note the address for use below. +This will print out a line similar to `mgs-dev: MGS API: http://[::1]:12225`. Note the address for use below. -Another option, which may lead to quicker iteration cycles if you're modifying -MGS or sp-sim, is to run the services by hand from the root of omicron: +#### Running sp-sim and MGS by hand + +If you need to run sp-sim and MGS separately, you can do so with: ``` cargo run --bin sp-sim -- sp-sim/examples/config.toml