From 767228971c3a0f224efc97e52196c251ca29d239 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 9 Feb 2024 20:09:36 +0000 Subject: [PATCH] Remove thing-flinger Thing-flinger was used to deploy multinode omicron clusters running on commodity hardware for early testing needs. This was primarily used for early experimentation with trust-quorum secret sharing and other preliminary multi-node experiments. It deployed via SSH and was never intended to be used forever. It was a stop-gap that nobody has used in a while now that we have real hardware with wicket and the a4x2 falcon testbed. Fixes #5034 --- Cargo.lock | 39 - Cargo.toml | 2 - dev-tools/thing-flinger/.gitignore | 1 - dev-tools/thing-flinger/Cargo.toml | 21 - dev-tools/thing-flinger/README.adoc | 222 ---- .../src/bin/deployment-example.toml | 36 - .../thing-flinger/src/bin/thing-flinger.rs | 968 ------------------ 7 files changed, 1289 deletions(-) delete mode 100644 dev-tools/thing-flinger/.gitignore delete mode 100644 dev-tools/thing-flinger/Cargo.toml delete mode 100644 dev-tools/thing-flinger/README.adoc delete mode 100644 dev-tools/thing-flinger/src/bin/deployment-example.toml delete mode 100644 dev-tools/thing-flinger/src/bin/thing-flinger.rs diff --git a/Cargo.lock b/Cargo.lock index 5814dd101a..b9f1ee2594 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1235,20 +1235,6 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7059fff8937831a9ae6f0fe4d658ffabf58f2ca96aa9dec1c889f936f705f216" -[[package]] -name = "crossbeam" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" -dependencies = [ - "cfg-if", - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", -] - [[package]] name = "crossbeam-channel" version = "0.5.8" @@ -1283,16 +1269,6 @@ dependencies = [ "scopeguard", ] -[[package]] -name = "crossbeam-queue" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.16" @@ -4915,21 +4891,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "omicron-deploy" -version = "0.1.0" -dependencies = [ - "anyhow", - "camino", - "clap 4.4.3", - "crossbeam", - "omicron-package", - "omicron-workspace-hack", - "serde", - "serde_derive", - "thiserror", -] - [[package]] name = "omicron-dev" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 65197da650..3498f385d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,6 @@ members = [ "dev-tools/omdb", "dev-tools/omicron-dev", "dev-tools/oxlog", - "dev-tools/thing-flinger", "dev-tools/xtask", "dns-server", "end-to-end-tests", @@ -96,7 +95,6 @@ default-members = [ "dev-tools/omdb", "dev-tools/omicron-dev", "dev-tools/oxlog", - "dev-tools/thing-flinger", # Do not include xtask in the list of default members, because this causes # hakari to not work as well and build times to be longer. # See omicron#4392. diff --git a/dev-tools/thing-flinger/.gitignore b/dev-tools/thing-flinger/.gitignore deleted file mode 100644 index ea8c4bf7f3..0000000000 --- a/dev-tools/thing-flinger/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/target diff --git a/dev-tools/thing-flinger/Cargo.toml b/dev-tools/thing-flinger/Cargo.toml deleted file mode 100644 index a427685871..0000000000 --- a/dev-tools/thing-flinger/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "omicron-deploy" -description = "Tools for deploying Omicron software to target machines" -version = "0.1.0" -edition = "2021" -license = "MPL-2.0" - -[dependencies] -anyhow.workspace = true -camino.workspace = true -clap.workspace = true -crossbeam.workspace = true -omicron-package.workspace = true -serde.workspace = true -serde_derive.workspace = true -thiserror.workspace = true -omicron-workspace-hack.workspace = true - -[[bin]] -name = "thing-flinger" -doc = false diff --git a/dev-tools/thing-flinger/README.adoc b/dev-tools/thing-flinger/README.adoc deleted file mode 100644 index 9966a7b747..0000000000 --- a/dev-tools/thing-flinger/README.adoc +++ /dev/null @@ -1,222 +0,0 @@ -Omicron is a complex piece of software consisting of many build and install-time dependencies. It's -intended to run primarily on illumos based systems, and as such is built to use runtime facilities -of illumos, such as https://illumos.org/man/5/smf[SMF]. Furthermore, Omicron is fundamentally a -distributed system, with its components intended to run on multiple servers communicating over the -network. In order to secure the system, certain cryptographic primitives, such as asymmetric key -pairs and shared secrets are required. Due to the nature of these cryptographic primitives, there is -a requirement for the distribution or creation of files unique to a specific server, such that no -other server has access to those files. Examples of this are private keys, and threshold key -shares, although other non-cryptographic unique files may also become necessary over time. - -In order to satisfy the above requirements of building and deploying a complex distributed system -consisting of unique, private files, two CLI tools have been created: - - . link:src/bin/omicron-package.rs[omicron-package] - build, package, install on local machine - . link:src/bin/thing-flinger.rs[thing-flinger] - build, package, deploy to remote machines - - -If a user is working on their local illumos based machine, and only wants to run -omicron in single node mode, they should follow the install instruction in -the link:../README.adoc[Omicron README] and use `omicron-package`. If the user -wishes for a more complete workflow, where they can code on their local laptop, -use a remote build machine, and install to multiple machines for a more realistic -deployment, they should use `thing-flinger`. - -The remainder of this document will describe a typical workflow for using -thing-flinger, pointing out room for improvement. - -== Environment and Configuration - - - +------------------+ +------------------+ - | | | | - | | | | - | Client |----------------> Builder | - | | | | - | | | | - +------------------+ +------------------+ - | - | - | - | - +---------------------------+--------------------------+ - | | | - | | | - | | | - +--------v---------+ +---------v--------+ +---------v--------+ - | | | | | | - | | | | | | - | Deployed Server | | Deployed Server | | Deployed Server | - | | | | | | - | | | | | | - +------------------+ +------------------+ +------------------+ - - -`thing-flinger` defines three types of nodes: - - * Client - Where a user typically edits their code and runs thing-flinger. This can run any OS. - * Builder - A Helios box where Omicron is built and packaged - * Deployed Server - Helios machines where Omicron will be installed and run - -It's not at all necessary for these to be separate nodes. For example, a client and builder can be -the same machine, as long as it's a Helios box. Same goes for Builder and a deployment server. The -benefit of this separation though, is that it allows editing on something like a laptop, without -having to worry about setting up a development environment on an illumos based host. - -Machine topology is configured in a `TOML` file that is passed on the command line. All illumos -machines are listed under `servers`, and just the names are used for configuring a builder and -deployment servers. An link:src/bin/deployment-example.toml[example] is provided. - -Thing flinger works over SSH, and so the user must have the public key of their client configured -for their account on all servers. SSH agent forwarding is used to prevent the need for the keys of -the builder to also be on the other servers, thus minimizing needed server configuration. - -== Typical Workflow - -=== Prerequisites - -Ensure you have an account on all illumos boxes, with the client public key in -`~/.ssh/authorized_keys`. - -.The build machine must have Rust and cargo installed, as well as -all the dependencies for Omicron installed. Following the *prerequisites* in the -https://github.com/oxidecomputer/omicron/#build-and-run[Build and run] section of the main Omicron -README is probably a good idea. - -==== Update `config-rss.toml` - -Currently rack setup is driven by a configuration file that lives at -`smf/sled-agent/non-gimlet/config-rss.toml` in the root of this repository. The committed -configuration of that file contains a single `requests` entry (with many -services inside it), which means it will start services on only one sled. To -start services (e.g., nexus) on multiple sleds, add additional entries to that -configuration file before proceeding. - -=== Command Based Workflow - -==== sync -Copy your source code to the builder. - -`+cargo run --bin thing-flinger -- -c sync+` - -==== Install Prerequisites -Install necessary build and runtime dependencies (including downloading prebuilt -binaries like Clickhouse and CockroachDB) on the builder and all deployment -targets. This step only needs to be performed once, absent any changes to the -dependencies, but is idempotent so may be run multiple times. - -`+cargo run --bin thing-flinger -- -c install-prereqs+` - -==== check (optional) -Run `cargo check` on the builder against the copy of `omicron` that was sync'd -to it in the previous step. - -`+cargo run --bin thing-flinger -- -c build check+` - -==== package -Build and package omicron using `omicron-package` on the builder. - -`+cargo run --bin thing-flinger -- -c build package+` - -==== overlay -Create files that are unique to each deployment server. - -`+cargo run --bin thing-flinger -- -c overlay+` - -==== install -Install omicron to all machines, in parallel. This consists of copying the packaged omicron tarballs -along with overlay files, and omicron-package and its manifest to a `staging` directory on each -deployment server, and then running omicron-package, installing overlay files, and restarting -services. - -`+cargo run --bin thing-flinger -- -c deploy install+` - -==== uninstall -Uninstall omicron from all machines. - -`+cargo run --bin thing-flinger -- -c deploy uninstall+` - -=== Current Limitations - -`thing-flinger` is an early prototype. It has served so far to demonstrate that unique files, -specifically secret shares, can be created and distributed over ssh, and that omicron can be -installed remotely using `omicron-package`. It is not currently complete enough to fully test a -distributed omicron setup, as the underlying dependencies are not configured yet. Specifically, -`CockroachDB` and perhaps `Clickhouse`, need to be configured to run in multiple server mode. It's -anticipated that the `overlay` feature of `thing-flinger` can be used to generate and distribute -configs for this. - -=== Design rationale - -`thing-flinger` is a command line program written in rust. It was written this way to build upon -`omicron-package`, which is also in rust, as that is our default language of choice at Oxide. -`thing-flinger` is based around SSH, as that is the minimal viable requirement for a test tool such -as this. Additionally, it provides for the most straightforward implementation, and takes the least -effort to use securely. This particular implementation wraps the openssh ssh client via -`std::process::Command`, rather than using the `ssh2` crate, because ssh2, as a wrapper around -`libssh`, does not support agent-forwarding. - -== Notes on Using VMs as Deployed Servers on a Linux Host - -TODO: This section should be fleshed out more and potentially lifted to its own -document; for now this is a collection of rough notes. - ---- - -It's possible to use a Linux libvirt host running multiple helios VMs as the -builder/deployment server targets, but it requires some additional setup beyond -`https://github.com/oxidecomputer/helios-engvm[helios-engvm]`. - -`thing-flinger` does not have any support for running the -`tools/create_virtual_hardware.sh` script; this will need to be done by hand on -each VM. - ---- - -To enable communication between the VMs over their IPv6 bootstrap networks: - -1. Enable IPv6 and DHCP on the virtual network libvirt uses for the VMs; e.g., - -```xml - - - - - -``` - -After booting the VMs with this enabled, they should be able to ping each other -over their acquired IPv6 addresses, but connecting to each other over the -`bootstrap6` interface that sled-agent creates will fail. - -2. Explicitly add routes in the Linux host for the `bootstrap6` addresses, -specifying the virtual interface libvirt created that is used by the VMs. - -``` -bash% sudo ip -6 route add fdb0:5254:13:7331::1/64 dev virbr1 -bash% sudo ip -6 route add fdb0:5254:f0:acfd::1/64 dev virbr1 -``` - -3. Once the sled-agents advance sufficiently to set up `sled6` interfaces, -routes need to be added for them both in the Linux host and in the Helios VMs. -Assuming two sleds with these interfaces: - -``` -# VM 1 -vioif0/sled6 static ok fd00:1122:3344:1::1/64 -# VM 2 -vioif0/sled6 static ok fd00:1122:3344:2::1/64 -``` - -The Linux host needs to be told to route that subnet to the appropriate virtual -interface: - -``` -bash% ip -6 route add fd00:1122:3344::1/48 dev virbr1 -``` - -and each Helios VM needs to be told to route that subnet to the host gateway: - -``` -vm% pfexec route add -inet6 fd00:1122:3344::/48 $IPV6_HOST_GATEWAY_ADDR -``` diff --git a/dev-tools/thing-flinger/src/bin/deployment-example.toml b/dev-tools/thing-flinger/src/bin/deployment-example.toml deleted file mode 100644 index 6d85de2ba6..0000000000 --- a/dev-tools/thing-flinger/src/bin/deployment-example.toml +++ /dev/null @@ -1,36 +0,0 @@ -# This manifest describes the servers that omicron will be installed to, along -# with any ancillary information specific to a given server. -# -# It is ingested by the `thing-flinger` tool. - -# This must be an absolute path. It refers to the path to Omicron on the -# machine where thing-flinger is being executed. -omicron_path = "/local/path/to/omicron" - -[builder] -# `server` must refer to one of the `servers` in the servers table -server = "foo" -# This must be an absolute path. It refers to the path to Omicron on the -# builder server. -omicron_path = "/remote/path/to/omicron" - -[deployment] -# which server is responsible for running the rack setup service; must -# refer to one of the `servers` in the servers table -rss_server = "foo" -# Location where files to install will be placed before running -# `omicron-package install` -# -# This must be an absolute path -# We specifically allow for $HOME in validating the absolute path -staging_dir = "$HOME/omicron_staging" -# which servers to deploy -servers = ["foo", "bar"] - -[servers.foo] -username = "me" -addr = "foo" - -[servers.bar] -username = "me" -addr = "bar" diff --git a/dev-tools/thing-flinger/src/bin/thing-flinger.rs b/dev-tools/thing-flinger/src/bin/thing-flinger.rs deleted file mode 100644 index 43b137790d..0000000000 --- a/dev-tools/thing-flinger/src/bin/thing-flinger.rs +++ /dev/null @@ -1,968 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Utility for deploying Omicron to remote machines - -use omicron_package::{parse, BuildCommand, DeployCommand}; - -use camino::{Utf8Path, Utf8PathBuf}; -use std::collections::{BTreeMap, BTreeSet}; -use std::process::Command; - -use anyhow::{Context, Result}; -use clap::{Parser, Subcommand}; -use crossbeam::thread::{self, ScopedJoinHandle}; -use serde_derive::Deserialize; -use thiserror::Error; - -// A server on which omicron source should be compiled into packages. -#[derive(Deserialize, Debug)] -struct Builder { - server: String, - omicron_path: Utf8PathBuf, -} - -// A server on which an omicron package is deployed. -#[derive(Deserialize, Debug, Eq, PartialEq)] -struct Server { - username: String, - addr: String, -} - -#[derive(Deserialize, Debug)] -struct Deployment { - rss_server: String, - staging_dir: Utf8PathBuf, - servers: BTreeSet, -} - -#[derive(Debug, Deserialize)] -struct Config { - omicron_path: Utf8PathBuf, - builder: Builder, - servers: BTreeMap, - deployment: Deployment, - - #[serde(default)] - rss_config_path: Option, - - #[serde(default)] - debug: bool, -} - -impl Config { - fn release_arg(&self) -> &str { - if self.debug { - "" - } else { - "--release" - } - } - - fn deployment_servers(&self) -> impl Iterator { - self.servers.iter().filter_map(|(name, s)| { - if self.deployment.servers.contains(name) { - Some(s) - } else { - None - } - }) - } -} - -fn parse_into_set(src: &str) -> Result, &'static str> { - Ok(src.split_whitespace().map(|s| s.to_owned()).collect()) -} - -#[derive(Debug, Subcommand)] -enum SubCommand { - /// Run the given command on the given servers, or all servers if none are - /// specified. - /// - /// Be careful! - Exec { - /// The command to run - #[clap(short, long, action)] - cmd: String, - - /// The servers to run the command on - #[clap(short, long, value_parser = parse_into_set)] - servers: Option>, - }, - - /// Install necessary prerequisites on the "builder" server and all "deploy" - /// servers. - InstallPrereqs, - - /// Sync our local source to the build host - Sync, - - /// Runs a command on the "builder" server. - #[clap(name = "build", subcommand)] - Builder(BuildCommand), - - /// Runs a command on all the "deploy" servers. - #[clap(subcommand)] - Deploy(DeployCommand), - - /// Create an overlay directory tree for each deployment server - /// - /// Each directory tree contains unique files for the given server that will - /// be populated in the svc/pkg dir. - /// - /// This is a separate subcommand so that we can reconstruct overlays - /// without rebuilding or repackaging. - Overlay, -} - -#[derive(Debug, Parser)] -#[clap( - name = "thing-flinger", - about = "A tool for synchronizing packages and configs between machines" -)] -struct Args { - /// The path to the deployment manifest TOML file - #[clap( - short, - long, - help = "Path to deployment manifest toml file", - action - )] - config: Utf8PathBuf, - - #[clap( - short, - long, - help = "The name of the build target to use for this command" - )] - target: String, - - /// The output directory, where artifacts should be built and staged - #[clap(long = "artifacts", default_value = "out/")] - artifact_dir: Utf8PathBuf, - - #[clap(subcommand)] - subcommand: SubCommand, -} - -/// Errors which can be returned when executing subcommands -#[derive(Error, Debug)] -enum FlingError { - #[error("Servers not listed in configuration: {0:?}")] - InvalidServers(Vec), - - /// Failed to rsync omicron to build host - #[error("Failed to sync {src} with {dst}")] - FailedSync { src: String, dst: String }, - - /// The given path must be absolute - #[error("Path for {field} must be absolute")] - NotAbsolutePath { field: &'static str }, -} - -// How should `ssh_exec` be run? -enum SshStrategy { - // Forward agent and source .profile - Forward, - - // Don't forward agent, but source .profile - NoForward, - - // Don't forward agent and don't source .profile - NoForwardNoProfile, -} - -impl SshStrategy { - fn forward_agent(&self) -> bool { - match self { - SshStrategy::Forward => true, - _ => false, - } - } - - fn source_profile(&self) -> bool { - match self { - SshStrategy::Forward | &SshStrategy::NoForward => true, - _ => false, - } - } -} - -// TODO: run in parallel when that option is given -fn do_exec( - config: &Config, - cmd: String, - servers: Option>, -) -> Result<()> { - if let Some(ref servers) = servers { - validate_servers(servers, &config.servers)?; - - for name in servers { - let server = &config.servers[name]; - ssh_exec(&server, &cmd, SshStrategy::NoForward)?; - } - } else { - for (_, server) in config.servers.iter() { - ssh_exec(&server, &cmd, SshStrategy::NoForward)?; - } - } - Ok(()) -} - -// start an `rsync` command with args common to all our uses -fn rsync_common() -> Command { - let mut cmd = Command::new("rsync"); - cmd.arg("-az") - .arg("-e") - .arg("ssh -o StrictHostKeyChecking=no") - .arg("--delete") - .arg("--progress") - .arg("--out-format") - .arg("File changed: %o %t %f"); - cmd -} - -fn do_sync(config: &Config) -> Result<()> { - let builder = - config.servers.get(&config.builder.server).ok_or_else(|| { - FlingError::InvalidServers(vec![config.builder.server.clone()]) - })?; - - // For rsync to copy from the source appropriately we must guarantee a - // trailing slash. - let src = format!( - "{}/", - config.omicron_path.canonicalize_utf8().with_context(|| format!( - "could not canonicalize {}", - config.omicron_path - ))? - ); - let dst = format!( - "{}@{}:{}", - builder.username, builder.addr, config.builder.omicron_path - ); - - println!("Synchronizing source files to: {}", dst); - let mut cmd = rsync_common(); - - // exclude build and development environment artifacts - cmd.arg("--exclude") - .arg("target/") - .arg("--exclude") - .arg("*.vdev") - .arg("--exclude") - .arg("*.swp") - .arg("--exclude") - .arg(".git/") - .arg("--exclude") - .arg("out/"); - - // exclude `config-rss.toml`, which needs to be sent to only one target - // system. we handle this in `do_overlay` below. - cmd.arg("--exclude").arg("**/config-rss.toml"); - - // finish with src/dst - cmd.arg(&src).arg(&dst); - let status = - cmd.status().context(format!("Failed to run command: ({:?})", cmd))?; - if !status.success() { - return Err(FlingError::FailedSync { src, dst }.into()); - } - - Ok(()) -} - -fn copy_to_deployment_staging_dir( - config: &Config, - src: String, - description: &str, -) -> Result<()> { - let partial_cmd = || { - let mut cmd = rsync_common(); - cmd.arg("--relative"); - cmd.arg(&src); - cmd - }; - - // A function for each deployment server to run in parallel - let fns = config.deployment_servers().map(|server| { - || { - let dst = format!( - "{}@{}:{}", - server.username, server.addr, config.deployment.staging_dir - ); - let mut cmd = partial_cmd(); - cmd.arg(&dst); - let status = cmd - .status() - .context(format!("Failed to run command: ({:?})", cmd))?; - if !status.success() { - return Err( - FlingError::FailedSync { src: src.clone(), dst }.into() - ); - } - Ok(()) - } - }); - - let named_fns = config.deployment.servers.iter().zip(fns); - run_in_parallel(description, named_fns); - - Ok(()) -} - -fn rsync_config_needed_for_tools(config: &Config) -> Result<()> { - let src = format!( - // the `./` here is load-bearing; it interacts with `--relative` to tell - // rsync to create `smf/sled-agent` but none of its parents - "{}/./smf/sled-agent/", - config.omicron_path.canonicalize_utf8().with_context(|| format!( - "could not canonicalize {}", - config.omicron_path - ))? - ); - - copy_to_deployment_staging_dir(config, src, "Copy smf/sled-agent dir") -} - -fn rsync_tools_dir_to_deployment_servers(config: &Config) -> Result<()> { - // we need to rsync `./tools/*` to each of the deployment targets (the - // "builder" already has it via `do_sync()`), and then run `pfexec - // tools/install_prerequisites.sh` on each system. - let src = format!( - // the `./` here is load-bearing; it interacts with `--relative` to tell - // rsync to create `tools` but none of its parents - "{}/./tools/", - config.omicron_path.canonicalize_utf8().with_context(|| format!( - "could not canonicalize {}", - config.omicron_path - ))? - ); - copy_to_deployment_staging_dir(config, src, "Copy tools dir") -} - -fn do_install_prereqs(config: &Config) -> Result<()> { - rsync_config_needed_for_tools(config)?; - rsync_tools_dir_to_deployment_servers(config)?; - install_rustup_on_deployment_servers(config); - create_virtual_hardware_on_deployment_servers(config); - create_external_tls_cert_on_builder(config)?; - - // Create a set of servers to install prereqs to - let builder = &config.servers[&config.builder.server]; - let build_server = (builder, &config.builder.omicron_path); - let all_servers = std::iter::once(build_server).chain( - config.deployment_servers().filter_map(|server| { - // Don't duplicate the builder - if server.addr != builder.addr { - Some((server, &config.deployment.staging_dir)) - } else { - None - } - }), - ); - - let server_names = std::iter::once(&config.builder.server).chain( - config - .deployment - .servers - .iter() - .filter(|s| **s != config.builder.server), - ); - - // Install functions to run in parallel on each server - let fns = all_servers.map(|(server, root_path)| { - || { - // -y: assume yes instead of prompting - // -p: skip check that deps end up in $PATH - let (script, script_type) = if *server == *builder { - ("install_builder_prerequisites.sh -y -p", "builder") - } else { - ("install_runner_prerequisites.sh -y", "runner") - }; - - let cmd = format!( - "cd {} && mkdir -p out && pfexec ./tools/{}", - root_path.clone(), - script - ); - println!( - "Install {} prerequisites on {}", - script_type, server.addr - ); - ssh_exec(server, &cmd, SshStrategy::NoForward) - } - }); - - let named_fns = server_names.zip(fns); - run_in_parallel("Install prerequisites", named_fns); - - Ok(()) -} - -fn create_external_tls_cert_on_builder(config: &Config) -> Result<()> { - let builder = &config.servers[&config.builder.server]; - let cmd = format!( - "cd {} && ./tools/create_self_signed_cert.sh", - config.builder.omicron_path, - ); - ssh_exec(&builder, &cmd, SshStrategy::NoForward) -} - -fn create_virtual_hardware_on_deployment_servers(config: &Config) { - let cmd = format!( - "cd {} && pfexec ./tools/create_virtual_hardware.sh", - config.deployment.staging_dir - ); - let fns = config.deployment_servers().map(|server| { - || { - println!("Create virtual hardware on {}", server.addr); - ssh_exec(server, &cmd, SshStrategy::NoForward) - } - }); - - let named_fns = config.deployment.servers.iter().zip(fns); - run_in_parallel("Create virtual hardware", named_fns); -} - -fn install_rustup_on_deployment_servers(config: &Config) { - let cmd = "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y"; - let fns = config.deployment_servers().map(|server| { - || ssh_exec(server, cmd, SshStrategy::NoForwardNoProfile) - }); - - let named_fns = config.deployment.servers.iter().zip(fns); - run_in_parallel("Install rustup", named_fns); -} - -// Build omicron-package and omicron-deploy on the builder -// -// We need to build omicron-deploy for overlay file generation -fn do_build_minimal(config: &Config) -> Result<()> { - let server = &config.servers[&config.builder.server]; - let cmd = format!( - "cd {} && cargo build {} -p {} -p {}", - config.builder.omicron_path, - config.release_arg(), - "omicron-package", - "omicron-deploy" - ); - ssh_exec(&server, &cmd, SshStrategy::NoForward) -} - -fn do_package(config: &Config, artifact_dir: Utf8PathBuf) -> Result<()> { - let builder = &config.servers[&config.builder.server]; - - // We use a bash login shell to get a proper environment, so we have a path to - // postgres, and $DEP_PQ_LIBDIRS is filled in. This is required for building - // nexus. - // - // See https://github.com/oxidecomputer/omicron/blob/8757ec542ea4ffbadd6f26094ed4ba357715d70d/rpaths/src/lib.rs - let cmd = format!( - "bash -lc \ - 'cd {} && \ - cargo run {} --bin omicron-package -- package --out {}'", - config.builder.omicron_path, - config.release_arg(), - artifact_dir, - ); - - ssh_exec(&builder, &cmd, SshStrategy::NoForward) -} - -fn do_dot(_config: &Config) -> Result<()> { - anyhow::bail!("\"dot\" command is not supported for thing-flinger"); -} - -fn do_check(config: &Config) -> Result<()> { - let builder = &config.servers[&config.builder.server]; - - let cmd = format!( - "bash -lc \ - 'cd {} && \ - cargo run {} --bin omicron-package -- check'", - config.builder.omicron_path, - config.release_arg(), - ); - - ssh_exec(&builder, &cmd, SshStrategy::NoForward) -} - -fn do_uninstall(config: &Config) -> Result<()> { - let builder = &config.servers[&config.builder.server]; - for server in config.deployment_servers() { - copy_omicron_package_binary_to_staging(config, builder, server)?; - - // Run `omicron-package uninstall` on the deployment server - let cmd = format!( - "cd {} && pfexec ./omicron-package uninstall", - config.deployment.staging_dir, - ); - println!("$ {}", cmd); - ssh_exec(&server, &cmd, SshStrategy::Forward)?; - } - Ok(()) -} - -fn do_clean( - config: &Config, - artifact_dir: Utf8PathBuf, - install_dir: Utf8PathBuf, -) -> Result<()> { - let mut deployment_src = Utf8PathBuf::from(&config.deployment.staging_dir); - deployment_src.push(&artifact_dir); - let builder = &config.servers[&config.builder.server]; - for server in config.deployment_servers() { - copy_omicron_package_binary_to_staging(config, builder, server)?; - - // Run `omicron-package uninstall` on the deployment server - let cmd = format!( - "cd {} && pfexec ./omicron-package clean --in {} --out {}", - config.deployment.staging_dir, deployment_src, install_dir, - ); - println!("$ {}", cmd); - ssh_exec(&server, &cmd, SshStrategy::Forward)?; - } - Ok(()) -} - -fn run_in_parallel<'a, F>(op: &str, cmds: impl Iterator) -where - F: FnOnce() -> Result<()> + Send, -{ - thread::scope(|s| { - let named_handles: Vec<(_, ScopedJoinHandle<'_, Result<()>>)> = cmds - .map(|(server_name, f)| (server_name, s.spawn(|_| f()))) - .collect(); - - // Join all the handles and print the install status - for (server_name, handle) in named_handles { - match handle.join() { - Ok(Ok(())) => { - println!("{} completed for server: {}", op, server_name) - } - Ok(Err(e)) => { - println!( - "{} failed for server: {} with error: {}", - op, server_name, e - ) - } - Err(_) => { - println!( - "{} failed for server: {}. Thread panicked.", - op, server_name - ) - } - } - } - }) - .unwrap(); -} - -fn do_install( - config: &Config, - artifact_dir: &Utf8Path, - install_dir: &Utf8Path, -) { - let builder = &config.servers[&config.builder.server]; - let mut pkg_dir = Utf8PathBuf::from(&config.builder.omicron_path); - pkg_dir.push(artifact_dir); - - let fns = config.deployment.servers.iter().map(|server_name| { - (server_name, || { - single_server_install( - config, - &artifact_dir, - &install_dir, - pkg_dir.as_str(), - builder, - server_name, - ) - }) - }); - - run_in_parallel("Install", fns); -} - -fn do_overlay(config: &Config) -> Result<()> { - let builder = &config.servers[&config.builder.server]; - let mut root_path = Utf8PathBuf::from(&config.builder.omicron_path); - // TODO: This needs to match the artifact_dir in `package` - root_path.push("out/overlay"); - - // Build a list of directories for each server to be deployed and tag which - // one is the server to run RSS; e.g., for servers ["foo", "bar", "baz"] - // with root_path "/my/path", we produce - // [ - // "/my/path/foo/sled-agent/pkg", - // "/my/path/bar/sled-agent/pkg", - // "/my/path/baz/sled-agent/pkg", - // ] - // As we're doing so, record which directory is the one for the server that - // will run RSS. - let mut rss_server_dir = None; - - for server_name in &config.deployment.servers { - let mut dir = root_path.clone(); - dir.push(server_name); - dir.push("sled-agent/pkg"); - if *server_name == config.deployment.rss_server { - rss_server_dir = Some(dir.clone()); - break; - } - } - - // we know exactly one of the servers matches `rss_server` from our config - // validation, so we can unwrap here - let rss_server_dir = rss_server_dir.unwrap(); - - overlay_rss_config(builder, config, &rss_server_dir)?; - - Ok(()) -} - -fn overlay_rss_config( - builder: &Server, - config: &Config, - rss_server_dir: &Utf8Path, -) -> Result<()> { - // Sync `config-rss.toml` to the directory for the RSS server on the - // builder. - let src = if let Some(src) = &config.rss_config_path { - src.clone() - } else { - config.omicron_path.join("smf/sled-agent/non-gimlet/config-rss.toml") - }; - let dst = format!( - "{}@{}:{}/config-rss.toml", - builder.username, builder.addr, rss_server_dir - ); - - let mut cmd = rsync_common(); - cmd.arg(&src).arg(&dst); - - let status = - cmd.status().context(format!("Failed to run command: ({:?})", cmd))?; - if !status.success() { - return Err(FlingError::FailedSync { src: src.to_string(), dst }.into()); - } - - Ok(()) -} - -fn single_server_install( - config: &Config, - artifact_dir: &Utf8Path, - install_dir: &Utf8Path, - pkg_dir: &str, - builder: &Server, - server_name: &str, -) -> Result<()> { - let server = &config.servers[server_name]; - - println!( - "COPYING packages from builder ({}) -> deploy server ({})", - builder.addr, server_name - ); - copy_package_artifacts_to_staging(config, pkg_dir, builder, server)?; - - println!( - "COPYING deploy tool from builder ({}) -> deploy server ({})", - builder.addr, server_name - ); - copy_omicron_package_binary_to_staging(config, builder, server)?; - - println!( - "COPYING manifest from builder ({}) -> deploy server ({})", - builder.addr, server_name - ); - copy_package_manifest_to_staging(config, builder, server)?; - - println!("UNPACKING packages on deploy server ({})", server_name); - run_omicron_package_unpack_from_staging( - config, - server, - &artifact_dir, - &install_dir, - )?; - - println!( - "COPYING overlay files from builder ({}) -> deploy server ({})", - builder.addr, server_name - ); - copy_overlay_files_to_staging( - config, - pkg_dir, - builder, - server, - server_name, - )?; - - println!("INSTALLING overlay files into the install directory of the deploy server ({})", server_name); - install_overlay_files_from_staging(config, server, &install_dir)?; - - println!("STARTING services on the deploy server ({})", server_name); - run_omicron_package_activate_from_staging(config, server, &install_dir) -} - -// Copy package artifacts as a result of `omicron-package package` from the -// builder to the deployment server staging directory. -// -// This staging directory acts as an intermediate location where -// packages may reside prior to being installed. -fn copy_package_artifacts_to_staging( - config: &Config, - pkg_dir: &str, - builder: &Server, - destination: &Server, -) -> Result<()> { - let cmd = format!( - "rsync -avz -e 'ssh -o StrictHostKeyChecking=no' \ - --include 'out/' \ - --include 'out/*.tar' \ - --include 'out/*.tar.gz' \ - --exclude '*' \ - {} {}@{}:{}", - pkg_dir, - destination.username, - destination.addr, - config.deployment.staging_dir - ); - println!("$ {}", cmd); - ssh_exec(builder, &cmd, SshStrategy::Forward) -} - -fn copy_omicron_package_binary_to_staging( - config: &Config, - builder: &Server, - destination: &Server, -) -> Result<()> { - let mut bin_path = Utf8PathBuf::from(&config.builder.omicron_path); - bin_path.push(format!( - "target/{}/omicron-package", - if config.debug { "debug" } else { "release" } - )); - let cmd = format!( - "rsync -avz {} {}@{}:{}", - bin_path, - destination.username, - destination.addr, - config.deployment.staging_dir - ); - println!("$ {}", cmd); - ssh_exec(builder, &cmd, SshStrategy::Forward) -} - -fn copy_package_manifest_to_staging( - config: &Config, - builder: &Server, - destination: &Server, -) -> Result<()> { - let mut path = Utf8PathBuf::from(&config.builder.omicron_path); - path.push("package-manifest.toml"); - let cmd = format!( - "rsync {} {}@{}:{}", - path, - destination.username, - destination.addr, - config.deployment.staging_dir - ); - println!("$ {}", cmd); - ssh_exec(builder, &cmd, SshStrategy::Forward) -} - -fn run_omicron_package_activate_from_staging( - config: &Config, - destination: &Server, - install_dir: &Utf8Path, -) -> Result<()> { - // Run `omicron-package activate` on the deployment server - let cmd = format!( - "cd {} && pfexec ./omicron-package activate --out {}", - config.deployment.staging_dir, install_dir, - ); - - println!("$ {}", cmd); - ssh_exec(destination, &cmd, SshStrategy::Forward) -} - -fn run_omicron_package_unpack_from_staging( - config: &Config, - destination: &Server, - artifact_dir: &Utf8Path, - install_dir: &Utf8Path, -) -> Result<()> { - let mut deployment_src = Utf8PathBuf::from(&config.deployment.staging_dir); - deployment_src.push(&artifact_dir); - - // Run `omicron-package unpack` on the deployment server - let cmd = format!( - "cd {} && pfexec ./omicron-package unpack --in {} --out {}", - config.deployment.staging_dir, deployment_src, install_dir, - ); - - println!("$ {}", cmd); - ssh_exec(destination, &cmd, SshStrategy::Forward) -} - -fn copy_overlay_files_to_staging( - config: &Config, - pkg_dir: &str, - builder: &Server, - destination: &Server, - destination_name: &str, -) -> Result<()> { - let cmd = format!( - "rsync -avz {}/overlay/{}/ {}@{}:{}/overlay/", - pkg_dir, - destination_name, - destination.username, - destination.addr, - config.deployment.staging_dir - ); - println!("$ {}", cmd); - ssh_exec(builder, &cmd, SshStrategy::Forward) -} - -fn install_overlay_files_from_staging( - config: &Config, - destination: &Server, - install_dir: &Utf8Path, -) -> Result<()> { - let cmd = format!( - "pfexec cp -r {}/overlay/* {}", - config.deployment.staging_dir, install_dir - ); - println!("$ {}", cmd); - ssh_exec(&destination, &cmd, SshStrategy::NoForward) -} - -fn ssh_exec( - server: &Server, - remote_cmd: &str, - strategy: SshStrategy, -) -> Result<()> { - let remote_cmd = if strategy.source_profile() { - // Source .profile, so we have access to cargo. Rustup installs knowledge - // about the cargo path here. - String::from(". $HOME/.profile && ") + remote_cmd - } else { - remote_cmd.into() - }; - - let mut cmd = Command::new("ssh"); - if strategy.forward_agent() { - cmd.arg("-A"); - } - cmd.arg("-o") - .arg("StrictHostKeyChecking=no") - .arg("-l") - .arg(&server.username) - .arg(&server.addr) - .arg(&remote_cmd); - - // If the builder is the same as the client, this will likely not be set, - // as the keys will reside on the builder. - if let Some(auth_sock) = std::env::var_os("SSH_AUTH_SOCK") { - cmd.env("SSH_AUTH_SOCK", auth_sock); - } - let exit_status = cmd - .status() - .context(format!("Failed to run {} on {}", remote_cmd, server.addr))?; - if !exit_status.success() { - anyhow::bail!("Command failed: {}", exit_status); - } - - Ok(()) -} - -fn validate_servers( - chosen: &BTreeSet, - all: &BTreeMap, -) -> Result<(), FlingError> { - let all = all.keys().cloned().collect(); - let diff: Vec = chosen.difference(&all).cloned().collect(); - if !diff.is_empty() { - Err(FlingError::InvalidServers(diff)) - } else { - Ok(()) - } -} - -fn validate_absolute_path( - path: &Utf8Path, - field: &'static str, -) -> Result<(), FlingError> { - if path.is_absolute() || path.starts_with("$HOME") { - Ok(()) - } else { - Err(FlingError::NotAbsolutePath { field }) - } -} - -fn validate(config: &Config) -> Result<(), FlingError> { - validate_absolute_path(&config.omicron_path, "omicron_path")?; - validate_absolute_path( - &config.builder.omicron_path, - "builder.omicron_path", - )?; - validate_absolute_path( - &config.deployment.staging_dir, - "deployment.staging_dir", - )?; - - validate_servers( - &BTreeSet::from([ - config.builder.server.clone(), - config.deployment.rss_server.clone(), - ]), - &config.servers, - ) -} - -fn main() -> Result<()> { - let args = Args::try_parse()?; - let config = parse::<_, Config>(args.config)?; - - validate(&config)?; - - match args.subcommand { - SubCommand::Exec { cmd, servers } => { - do_exec(&config, cmd, servers)?; - } - SubCommand::Sync => do_sync(&config)?, - SubCommand::InstallPrereqs => do_install_prereqs(&config)?, - SubCommand::Builder(BuildCommand::Target { .. }) => { - todo!("Setting target not supported through thing-flinger") - } - SubCommand::Builder(BuildCommand::Package { .. }) => { - do_package(&config, args.artifact_dir)?; - } - SubCommand::Builder(BuildCommand::Stamp { .. }) => { - anyhow::bail!("Distributed package stamping not supported") - } - SubCommand::Builder(BuildCommand::Check) => do_check(&config)?, - SubCommand::Builder(BuildCommand::Dot) => { - do_dot(&config)?; - } - SubCommand::Deploy(DeployCommand::Install { install_dir }) => { - do_build_minimal(&config)?; - do_install(&config, &args.artifact_dir, &install_dir); - } - SubCommand::Deploy(DeployCommand::Uninstall) => { - do_build_minimal(&config)?; - do_uninstall(&config)?; - } - SubCommand::Deploy(DeployCommand::Clean { install_dir }) => { - do_build_minimal(&config)?; - do_clean(&config, args.artifact_dir, install_dir)?; - } - // TODO: It doesn't really make sense to allow the user direct access - // to these low level operations in thing-flinger. Should we not use - // the DeployCommand from omicron-package directly? - SubCommand::Deploy(_) => anyhow::bail!("Unsupported action"), - SubCommand::Overlay => do_overlay(&config)?, - } - Ok(()) -}