From f27b1126c624e49e0fee2d86a3f733176ae4dc73 Mon Sep 17 00:00:00 2001
From: Benjamin Naecker <ben@oxide.computer>
Date: Sat, 20 Jan 2024 01:21:32 +0000
Subject: [PATCH] Groundwork for oximeter instance vCPU metrics

- Adds the silo and project IDs to the instance-ensure request from
  Nexus to the sled-agent. These are used as fields on the
  instance-related statistics. This metadata is currently unused, but
  will be forwarded to Propolis in the instance-ensure request once the
  server is updated to accept it.
- Defines a `VirtualMachine` oximeter target and `VcpuUsage` metric. The
  latter has a `state` field which corresponds to the named kstats
  published by the hypervisor that accumulate the time spent in a number
  of vCPU microstates. The combination of these should allow us to
  aggregate or break down vCPU usage by silo, project, instance, vCPU
  ID, and CPU state. Adds some simple mocks and tests for these.
- Adds more fine-grained feature flags to the `oximeter-instruments`
  crate.
---
 Cargo.lock                                    |   4 +-
 nexus/src/app/instance.rs                     |  16 +
 openapi/sled-agent.json                       |  27 ++
 oximeter/instruments/Cargo.toml               |  48 +-
 oximeter/instruments/src/kstat/mod.rs         |   5 +-
 .../instruments/src/kstat/virtual_machine.rs  | 424 ++++++++++++++++++
 oximeter/instruments/src/lib.rs               |   2 +-
 .../tests/output/virtual-machine-schema.json  |  34 ++
 oximeter/producer/src/lib.rs                  |  22 +-
 sled-agent/src/http_entrypoints.rs            |   1 +
 sled-agent/src/instance.rs                    |  15 +-
 sled-agent/src/instance_manager.rs            |   5 +
 sled-agent/src/metrics.rs                     | 101 +++--
 sled-agent/src/params.rs                      |  10 +
 sled-agent/src/sim/http_entrypoints.rs        |   1 +
 sled-agent/src/sim/sled_agent.rs              |   5 +-
 sled-agent/src/sled_agent.rs                  |  95 ++--
 17 files changed, 704 insertions(+), 111 deletions(-)
 create mode 100644 oximeter/instruments/src/kstat/virtual_machine.rs
 create mode 100644 oximeter/instruments/tests/output/virtual-machine-schema.json
diff --git a/Cargo.lock b/Cargo.lock
index 45d1d47199a..c31348a7810 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1924,7 +1924,7 @@ dependencies = [
 [[package]]
 name = "dropshot"
 version = "0.9.1-dev"
-source = "git+https://github.com/oxidecomputer/dropshot?branch=main#711a7490d81416731cfe0f9fef366ed5f266a0ee"
+source = "git+https://github.com/oxidecomputer/dropshot?branch=main#29ae98d1f909c6832661408a4c03f929e8afa6e9"
 dependencies = [
  "async-stream",
  "async-trait",
@@ -1970,7 +1970,7 @@ dependencies = [
 [[package]]
 name = "dropshot_endpoint"
 version = "0.9.1-dev"
-source = "git+https://github.com/oxidecomputer/dropshot?branch=main#711a7490d81416731cfe0f9fef366ed5f266a0ee"
+source = "git+https://github.com/oxidecomputer/dropshot?branch=main#29ae98d1f909c6832661408a4c03f929e8afa6e9"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
index f9246535259..57592664306 100644
--- a/nexus/src/app/instance.rs
+++ b/nexus/src/app/instance.rs
@@ -1146,6 +1146,21 @@ impl super::Nexus {
             .map(|ssh_key| ssh_key.public_key)
             .collect::<Vec<String>>();
 
+        // Construct instance metadata used to track its statistics.
+        //
+        // This current means fetching the current silo ID, since we have all
+        // the other metadata already.
+        let silo_id = self
+            .current_silo_lookup(opctx)?
+            .lookup_for(authz::Action::Read)
+            .await?
+            .0
+            .id();
+        let metadata = sled_agent_client::types::InstanceMetadata {
+            silo_id,
+            project_id: db_instance.project_id,
+        };
+
         // Ask the sled agent to begin the state change.  Then update the
         // database to reflect the new intermediate state.  If this update is
         // not the newest one, that's fine.  That might just mean the sled agent
@@ -1189,6 +1204,7 @@ impl super::Nexus {
                         PROPOLIS_PORT,
                     )
                     .to_string(),
+                    metadata,
                 },
             )
             .await
diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json
index 7b9a3efcdad..ccebbd273b1 100644
--- a/openapi/sled-agent.json
+++ b/openapi/sled-agent.json
@@ -4589,6 +4589,14 @@
               }
             ]
           },
+          "metadata": {
+            "description": "Metadata used to track instance statistics.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/InstanceMetadata"
+              }
+            ]
+          },
           "propolis_addr": {
             "description": "The address at which this VMM should serve a Propolis server API.",
             "type": "string"
@@ -4610,6 +4618,7 @@
         "required": [
           "hardware",
           "instance_runtime",
+          "metadata",
           "propolis_addr",
           "propolis_id",
           "vmm_runtime"
@@ -4741,6 +4750,24 @@
           "snapshot_id"
         ]
       },
+      "InstanceMetadata": {
+        "description": "Metadata used to track statistics about an instance.",
+        "type": "object",
+        "properties": {
+          "project_id": {
+            "type": "string",
+            "format": "uuid"
+          },
+          "silo_id": {
+            "type": "string",
+            "format": "uuid"
+          }
+        },
+        "required": [
+          "project_id",
+          "silo_id"
+        ]
+      },
       "InstanceMigrationSourceParams": {
         "description": "Instance runtime state to update for a migration.",
         "type": "object",
diff --git a/oximeter/instruments/Cargo.toml b/oximeter/instruments/Cargo.toml
index 8372b7c5602..5903f1da511 100644
--- a/oximeter/instruments/Cargo.toml
+++ b/oximeter/instruments/Cargo.toml
@@ -5,27 +5,49 @@ edition = "2021"
 license = "MPL-2.0"
 
 [dependencies]
-cfg-if.workspace = true
-chrono.workspace = true
-dropshot.workspace = true
-futures.workspace = true
+cfg-if = { workspace = true, optional = true }
+chrono = { workspace = true, optional = true }
+dropshot = { workspace = true, optional = true }
+futures = { workspace = true, optional = true }
 http = { workspace = true, optional = true }
-oximeter.workspace = true
-slog.workspace = true
-tokio.workspace = true
-thiserror.workspace = true
-uuid.workspace = true
-omicron-workspace-hack.workspace = true
+oximeter = { workspace = true, optional = true }
+slog = { workspace = true, optional = true }
+tokio = { workspace = true, optional = true }
+thiserror = { workspace = true, optional = true }
+uuid = { workspace = true, optional = true }
+omicron-workspace-hack = { workspace = true, optional = true }
 
 [features]
-default = ["http-instruments", "kstat"]
-http-instruments = ["http"]
-kstat = ["kstat-rs"]
+default = ["http-instruments", "datalink", "virtual-machine"]
+http-instruments = [
+    "dep:chrono",
+    "dep:dropshot",
+    "dep:futures",
+    "dep:http",
+    "dep:omicron-workspace-hack",
+    "dep:oximeter",
+    "dep:uuid"
+]
+kstat = [
+    "dep:cfg-if",
+    "dep:chrono",
+    "dep:futures",
+    "dep:kstat-rs",
+    "dep:omicron-workspace-hack",
+    "dep:oximeter",
+    "dep:slog",
+    "dep:tokio",
+    "dep:thiserror",
+    "dep:uuid"
+]
+datalink = ["kstat"]
+virtual-machine = ["kstat"]
 
 [dev-dependencies]
 rand.workspace = true
 slog-async.workspace = true
 slog-term.workspace = true
+oximeter.workspace = true
 
 [target.'cfg(target_os = "illumos")'.dependencies]
 kstat-rs = { workspace = true, optional = true }
diff --git a/oximeter/instruments/src/kstat/mod.rs b/oximeter/instruments/src/kstat/mod.rs
index 90f34acae85..0b102ebd6f0 100644
--- a/oximeter/instruments/src/kstat/mod.rs
+++ b/oximeter/instruments/src/kstat/mod.rs
@@ -2,7 +2,7 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-// Copyright 2023 Oxide Computer Company
+// Copyright 2024 Oxide Computer Company
 
 //! Types for publishing kernel statistics via oximeter.
 //!
@@ -87,8 +87,11 @@ use std::cmp::Ordering;
 use std::collections::BTreeMap;
 use std::time::Duration;
 
+#[cfg(any(feature = "datalink", test))]
 pub mod link;
 mod sampler;
+#[cfg(any(feature = "virtual-machine", test))]
+pub mod virtual_machine;
 
 pub use sampler::CollectionDetails;
 pub use sampler::ExpirationBehavior;
diff --git a/oximeter/instruments/src/kstat/virtual_machine.rs b/oximeter/instruments/src/kstat/virtual_machine.rs
new file mode 100644
index 00000000000..72d4ba554b5
--- /dev/null
+++ b/oximeter/instruments/src/kstat/virtual_machine.rs
@@ -0,0 +1,424 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+// Copyright 2024 Oxide Computer Company
+
+//! Types for tracking statistics about virtual machine instances.
+
+use crate::kstat::hrtime_to_utc;
+use crate::kstat::ConvertNamedData;
+use crate::kstat::Error;
+use chrono::DateTime;
+use chrono::Utc;
+use oximeter::types::Cumulative;
+use oximeter::FieldType;
+use oximeter::FieldValue;
+use oximeter::Metric;
+use oximeter::Sample;
+use oximeter::Target;
+use uuid::Uuid;
+
+#[cfg(not(test))]
+mod kstat_types {
+    pub use crate::kstat::KstatList;
+    pub use crate::kstat::KstatTarget;
+    pub use kstat_rs::Data;
+    pub use kstat_rs::Kstat;
+    pub use kstat_rs::Named;
+    pub use kstat_rs::NamedData;
+}
+
+// Mock the relevant subset of `kstat-rs` types needed for tests.
+#[cfg(test)]
+mod kstat_types {
+    #[derive(Debug)]
+    pub enum Data<'a> {
+        Named(Vec<Named<'a>>),
+        Null,
+    }
+
+    #[derive(Debug)]
+    pub enum NamedData<'a> {
+        UInt32(u32),
+        UInt64(u64),
+        String(&'a str),
+    }
+
+    #[derive(Debug)]
+    pub struct Kstat<'a> {
+        pub ks_module: &'a str,
+        pub ks_instance: i32,
+        pub ks_name: &'a str,
+        pub ks_snaptime: i64,
+    }
+
+    #[derive(Debug)]
+    pub struct Named<'a> {
+        pub name: &'a str,
+        pub value: NamedData<'a>,
+    }
+
+    impl<'a> super::ConvertNamedData for NamedData<'a> {
+        fn as_i32(&self) -> Result<i32, crate::kstat::Error> {
+            unimplemented!()
+        }
+
+        fn as_u32(&self) -> Result<u32, crate::kstat::Error> {
+            if let NamedData::UInt32(x) = self {
+                Ok(*x)
+            } else {
+                panic!()
+            }
+        }
+
+        fn as_i64(&self) -> Result<i64, crate::kstat::Error> {
+            unimplemented!()
+        }
+
+        fn as_u64(&self) -> Result<u64, crate::kstat::Error> {
+            if let NamedData::UInt64(x) = self {
+                Ok(*x)
+            } else {
+                panic!()
+            }
+        }
+    }
+}
+
+use kstat_types::*;
+
+/// A single virtual machine instance.
+#[derive(Clone, Debug)]
+pub struct VirtualMachine {
+    /// The silo to which the instance belongs.
+    pub silo_id: Uuid,
+    /// The project to which the instance belongs.
+    pub project_id: Uuid,
+    /// The ID of the instance.
+    pub instance_id: Uuid,
+
+    // This field is not published as part of the target field definitions. It
+    // is needed because the hypervisor currently creates kstats for each vCPU,
+    // regardless of whether they're activated. There is no way to tell from
+    // userland today which vCPU kstats are "real". We include this value here,
+    // and implement `oximeter::Target` manually.
+    pub n_vcpus: u32,
+}
+
+impl Target for VirtualMachine {
+    fn name(&self) -> &'static str {
+        "virtual_machine"
+    }
+
+    fn field_names(&self) -> &'static [&'static str] {
+        &["silo_id", "project_id", "instance_id"]
+    }
+
+    fn field_types(&self) -> Vec<FieldType> {
+        vec![FieldType::Uuid, FieldType::Uuid, FieldType::Uuid]
+    }
+
+    fn field_values(&self) -> Vec<FieldValue> {
+        vec![
+            self.silo_id.into(),
+            self.project_id.into(),
+            self.instance_id.into(),
+        ]
+    }
+}
+
+/// Metric tracking vCPU usage by state.
+#[derive(Clone, Debug, Metric)]
+pub struct VcpuUsage {
+    /// The vCPU ID.
+    pub vcpu_id: u32,
+    /// The state of the vCPU.
+    pub state: String,
+    /// The cumulative time spent in this state, in nanoseconds.
+    pub datum: Cumulative<u64>,
+}
+
+// The name of the kstat module containing virtual machine kstats.
+const VMM_KSTAT_MODULE_NAME: &str = "vmm";
+
+// The name of the kstat with virtual machine metadata (VM name currently).
+const VM_KSTAT_NAME: &str = "vm";
+
+// The named kstat holding the virtual machine's name. This is currently the
+// UUID assigned by the control plane to the virtual machine instance.
+const VM_NAME_KSTAT: &str = "vm_name";
+
+// The name of kstat containing vCPU usage data.
+const VCPU_KSTAT_PREFIX: &str = "vcpu";
+
+// Prefix for all named data with a valid vCPU microstate that we track.
+const VCPU_MICROSTATE_PREFIX: &str = "time_";
+
+// The number of expected vCPU microstates we track. This isn't load-bearing,
+// and only used to help preallocate an array holding the `VcpuUsage` samples.
+const N_VCPU_MICROSTATES: usize = 6;
+
+#[cfg(not(test))]
+impl KstatTarget for VirtualMachine {
+    // The VMM kstats are organized like so:
+    //
+    // - module: vmm
+    // - instance: a kernel-assigned integer
+    // - name: vm -> generic VM info, vcpuX -> info for each vCPU
+    //
+    // At this part of the code, we don't have that kstat instance, only the
+    // virtual machine instance's control plane UUID. However, the VM's "name"
+    // is assigned to be that control plane UUID in the hypervisor. See
+    // https://github.com/oxidecomputer/propolis/blob/759bf4a19990404c135e608afbe0d38b70bfa370/bin/propolis-server/src/lib/vm/mod.rs#L420
+    // for the current code which does that.
+    //
+    // So we need to indicate interest in any VMM-related kstat here, and we are
+    // forced to filter to the right instance by looking up the VM name inside
+    // the `to_samples()` method below.
+    fn interested(&self, kstat: &Kstat<'_>) -> bool {
+        kstat.ks_module == VMM_KSTAT_MODULE_NAME
+    }
+
+    fn to_samples(
+        &self,
+        kstats: KstatList<'_, '_>,
+    ) -> Result<Vec<Sample>, Error> {
+        // First, we need to map the instance's control plane UUID to the
+        // instance ID. We'll find this through the `vmm:<instance>:vm:vm_name`
+        // kstat, which lists the instance's UUID as its name.
+        //
+        // Note that if this code is run from within a Propolis zone, there is
+        // exactly one `vmm` kstat instance in any case.
+        let instance_id = self.instance_id.to_string();
+        let instance = kstats
+            .iter()
+            .find_map(|(_, kstat, data)| {
+                kstat_instance_from_instance_id(kstat, data, &instance_id)
+            })
+            .ok_or_else(|| Error::NoSuchKstat)?;
+
+        // Armed with the kstat instance, find all relevant metrics related to
+        // this particular VM. For now, we produce only vCPU usage metrics, but
+        // others may be chained in the future.
+        let vcpu_stats = kstats.iter().filter(|(_, kstat, _)| {
+            // Filter out those that don't match our kstat instance.
+            if kstat.ks_instance != instance {
+                return false;
+            }
+
+            // Filter out those which are neither a vCPU stat of any kind, nor
+            // for one of the vCPU IDs we know to be active.
+            let Some(suffix) = kstat.ks_name.strip_prefix(VCPU_KSTAT_PREFIX)
+            else {
+                return false;
+            };
+            let Ok(vcpu_id) = suffix.parse::<u32>() else {
+                return false;
+            };
+            vcpu_id < self.n_vcpus
+        });
+        produce_vcpu_usage(self, vcpu_stats)
+    }
+}
+
+// Given a kstat and an instance's ID, return the kstat instance if it matches.
+pub fn kstat_instance_from_instance_id(
+    kstat: &Kstat<'_>,
+    data: &Data<'_>,
+    instance_id: &str,
+) -> Option<i32> {
+    if kstat.ks_module != VMM_KSTAT_MODULE_NAME {
+        return None;
+    }
+    if kstat.ks_name != VM_KSTAT_NAME {
+        return None;
+    }
+    let Data::Named(named) = data else {
+        return None;
+    };
+    if named.iter().any(|nd| {
+        if nd.name != VM_NAME_KSTAT {
+            return false;
+        }
+        let NamedData::String(name) = &nd.value else {
+            return false;
+        };
+        instance_id == *name
+    }) {
+        return Some(kstat.ks_instance);
+    }
+    None
+}
+
+// Produce `Sample`s for the `VcpuUsage` metric from the relevant kstats.
+pub fn produce_vcpu_usage<'a>(
+    vm: &'a VirtualMachine,
+    vcpu_stats: impl Iterator<Item = &'a (DateTime<Utc>, Kstat<'a>, Data<'a>)> + 'a,
+) -> Result<Vec<Sample>, Error> {
+    let mut out = Vec::with_capacity(vm.n_vcpus as usize * N_VCPU_MICROSTATES);
+    for (creation_time, kstat, data) in vcpu_stats {
+        let Data::Named(named) = data else {
+            return Err(Error::ExpectedNamedKstat);
+        };
+        let snapshot_time = hrtime_to_utc(kstat.ks_snaptime)?;
+
+        // Find the vCPU ID, from the relevant named data item.
+        let vcpu_id = named
+            .iter()
+            .find_map(|named| {
+                if named.name == VCPU_KSTAT_PREFIX {
+                    named.value.as_u32().ok()
+                } else {
+                    None
+                }
+            })
+            .ok_or_else(|| Error::NoSuchKstat)?;
+
+        // We'll track all statistics starting with `time_` as the microstate.
+        for Named { name, value } in named
+            .iter()
+            .filter(|nv| nv.name.starts_with(VCPU_MICROSTATE_PREFIX))
+        {
+            // Safety: We're filtering in the loop on this prefix, so it must
+            // exist.
+            let state =
+                name.strip_prefix(VCPU_MICROSTATE_PREFIX).unwrap().to_string();
+            let datum =
+                Cumulative::with_start_time(*creation_time, value.as_u64()?);
+            let metric = VcpuUsage { vcpu_id, state, datum };
+            let sample =
+                Sample::new_with_timestamp(snapshot_time, vm, &metric)?;
+            out.push(sample);
+        }
+    }
+    Ok(out)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::kstat_instance_from_instance_id;
+    use super::produce_vcpu_usage;
+    use super::Data;
+    use super::Kstat;
+    use super::Named;
+    use super::NamedData;
+    use super::Utc;
+    use super::VcpuUsage;
+    use super::VirtualMachine;
+    use super::VCPU_KSTAT_PREFIX;
+    use super::VMM_KSTAT_MODULE_NAME;
+    use super::VM_KSTAT_NAME;
+    use super::VM_NAME_KSTAT;
+    use oximeter::schema::SchemaSet;
+    use oximeter::types::Cumulative;
+    use oximeter::Datum;
+    use oximeter::FieldValue;
+
+    const TEST_VM: VirtualMachine = VirtualMachine {
+        silo_id: uuid::uuid!("6a4bd4b6-e9aa-44d1-b616-399d48baa173"),
+        project_id: uuid::uuid!("7b61df02-0794-4b37-93bc-89f03c7289ca"),
+        instance_id: uuid::uuid!("96d6ec78-543a-4188-830e-37e2a0eeff16"),
+        n_vcpus: 4,
+    };
+
+    fn test_usage() -> VcpuUsage {
+        VcpuUsage {
+            state: "run".to_string(),
+            vcpu_id: 0,
+            datum: Cumulative::new(100),
+        }
+    }
+
+    #[test]
+    fn test_no_schema_changes() {
+        let mut set = SchemaSet::default();
+        assert!(set.insert_checked(&TEST_VM, &test_usage()).is_none());
+        const PATH: &str = concat!(
+            env!("CARGO_MANIFEST_DIR"),
+            "/tests/output/virtual-machine-schema.json",
+        );
+        set.assert_contents(PATH);
+    }
+
+    #[test]
+    fn test_kstat_instance_from_instance_id() {
+        let ks = Kstat {
+            ks_module: VMM_KSTAT_MODULE_NAME,
+            ks_instance: 0,
+            ks_name: VM_KSTAT_NAME,
+            ks_snaptime: 1,
+        };
+        const INSTANCE_ID: &str = "db198b43-2dee-4b4b-8a68-24cb4c0d6ec8";
+        let data = Data::Named(vec![Named {
+            name: VM_NAME_KSTAT,
+            value: NamedData::String(INSTANCE_ID),
+        }]);
+
+        assert_eq!(
+            kstat_instance_from_instance_id(&ks, &data, INSTANCE_ID)
+                .expect("Should have matched the instance ID"),
+            ks.ks_instance,
+        );
+
+        let data = Data::Named(vec![Named {
+            name: VM_NAME_KSTAT,
+            value: NamedData::String("something-else"),
+        }]);
+        assert!(
+            kstat_instance_from_instance_id(&ks, &data, INSTANCE_ID).is_none(),
+            "Should not have matched an instance ID"
+        );
+    }
+
+    fn vcpu_state_kstats<'a>() -> (Kstat<'a>, Data<'a>) {
+        let ks = Kstat {
+            ks_module: VMM_KSTAT_MODULE_NAME,
+            ks_instance: 0,
+            ks_name: "vcpu0",
+            ks_snaptime: 1,
+        };
+        let data = Data::Named(vec![
+            Named { name: VCPU_KSTAT_PREFIX, value: NamedData::UInt32(0) },
+            Named { name: "time_idle", value: NamedData::UInt64(1) },
+            Named { name: "time_run", value: NamedData::UInt64(2) },
+        ]);
+        (ks, data)
+    }
+
+    #[test]
+    fn test_produce_vcpu_usage() {
+        let (ks, data) = vcpu_state_kstats();
+        let kstats = [(Utc::now(), ks, data)];
+        let samples = produce_vcpu_usage(&TEST_VM, kstats.iter())
+            .expect("Should have produced samples");
+        assert_eq!(
+            samples.len(),
+            2,
+            "Should have samples for 'run' and 'idle' states"
+        );
+        for ((sample, state), x) in
+            samples.iter().zip(["idle", "run"]).zip([1, 2])
+        {
+            let st = sample
+                .fields()
+                .iter()
+                .find_map(|f| {
+                    if f.name == "state" {
+                        let FieldValue::String(state) = &f.value else {
+                            panic!("Expected a string field");
+                        };
+                        Some(state.clone())
+                    } else {
+                        None
+                    }
+                })
+                .expect("expected a field with name \"state\"");
+            assert_eq!(st, state, "Found an incorrect vCPU state");
+            let Datum::CumulativeU64(inner) = sample.measurement.datum() else {
+                panic!("Expected a cumulativeu64 datum");
+            };
+            assert_eq!(inner.value(), x);
+        }
+    }
+}
diff --git a/oximeter/instruments/src/lib.rs b/oximeter/instruments/src/lib.rs
index d003e717395..c1f839c85d2 100644
--- a/oximeter/instruments/src/lib.rs
+++ b/oximeter/instruments/src/lib.rs
@@ -4,7 +4,7 @@
 
 //! General-purpose types for instrumenting code to producer oximeter metrics.
 
-// Copyright 2023 Oxide Computer Company
+// Copyright 2024 Oxide Computer Company
 
 #[cfg(feature = "http-instruments")]
 pub mod http;
diff --git a/oximeter/instruments/tests/output/virtual-machine-schema.json b/oximeter/instruments/tests/output/virtual-machine-schema.json
new file mode 100644
index 00000000000..fb23d031222
--- /dev/null
+++ b/oximeter/instruments/tests/output/virtual-machine-schema.json
@@ -0,0 +1,34 @@
+{
+  "virtual_machine:vcpu_usage": {
+    "timeseries_name": "virtual_machine:vcpu_usage",
+    "field_schema": [
+      {
+        "name": "instance_id",
+        "field_type": "uuid",
+        "source": "target"
+      },
+      {
+        "name": "project_id",
+        "field_type": "uuid",
+        "source": "target"
+      },
+      {
+        "name": "silo_id",
+        "field_type": "uuid",
+        "source": "target"
+      },
+      {
+        "name": "state",
+        "field_type": "string",
+        "source": "metric"
+      },
+      {
+        "name": "vcpu_id",
+        "field_type": "u32",
+        "source": "metric"
+      }
+    ],
+    "datum_type": "cumulative_u64",
+    "created": "2024-01-25T20:08:38.763553812Z"
+  }
+}
\ No newline at end of file
diff --git a/oximeter/producer/src/lib.rs b/oximeter/producer/src/lib.rs
index 2354f9c2176..3fecaadf4fe 100644
--- a/oximeter/producer/src/lib.rs
+++ b/oximeter/producer/src/lib.rs
@@ -38,8 +38,8 @@ pub enum Error {
     #[error("Error running producer HTTP server: {0}")]
     Server(String),
 
-    #[error("Error registering as metric producer: {0}")]
-    RegistrationError(String),
+    #[error("Error registering as metric producer: {msg}")]
+    RegistrationError { retryable: bool, msg: String },
 
     #[error("Producer registry and config UUIDs do not match")]
     UuidMismatch,
@@ -251,11 +251,19 @@ pub async fn register(
 ) -> Result<(), Error> {
     let client =
         nexus_client::Client::new(&format!("http://{}", address), log.clone());
-    client
-        .cpapi_producers_post(&server_info.into())
-        .await
-        .map(|_| ())
-        .map_err(|msg| Error::RegistrationError(msg.to_string()))
+    client.cpapi_producers_post(&server_info.into()).await.map(|_| ()).map_err(
+        |err| {
+            let retryable = match &err {
+                nexus_client::Error::CommunicationError(..) => true,
+                nexus_client::Error::ErrorResponse(resp) => {
+                    resp.status().is_server_error()
+                }
+                _ => false,
+            };
+            let msg = err.to_string();
+            Error::RegistrationError { retryable, msg }
+        },
+    )
 }
 
 /// Handle a request to pull available metric data from a [`ProducerRegistry`].
diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs
index 0798aed664c..3ea2df20c37 100644
--- a/sled-agent/src/http_entrypoints.rs
+++ b/sled-agent/src/http_entrypoints.rs
@@ -412,6 +412,7 @@ async fn instance_register(
             body_args.instance_runtime,
             body_args.vmm_runtime,
             body_args.propolis_addr,
+            body_args.metadata,
         )
         .await?,
     ))
diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs
index 47e61cfe719..6f884b0cad9 100644
--- a/sled-agent/src/instance.rs
+++ b/sled-agent/src/instance.rs
@@ -13,7 +13,7 @@ use crate::nexus::NexusClientWithResolver;
 use crate::params::ZoneBundleMetadata;
 use crate::params::{InstanceExternalIpBody, ZoneBundleCause};
 use crate::params::{
-    InstanceHardware, InstanceMigrationSourceParams,
+    InstanceHardware, InstanceMetadata, InstanceMigrationSourceParams,
     InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule,
 };
 use crate::profile::*;
@@ -192,6 +192,11 @@ struct InstanceInner {
     // Properties visible to Propolis
     properties: propolis_client::types::InstanceProperties,
 
+    // This is currently unused, but will be sent to Propolis as part of the
+    // work tracked in https://github.com/oxidecomputer/omicron/issues/4851. It
+    // will be included in the InstanceProperties above, most likely.
+    _metadata: InstanceMetadata,
+
     // The ID of the Propolis server (and zone) running this instance
     propolis_id: Uuid,
 
@@ -369,6 +374,10 @@ impl InstanceInner {
         // state to Nexus. This ensures that the instance is actually gone from
         // the sled when Nexus receives the state update saying it's actually
         // destroyed.
+        //
+        // In addition, we'll start or stop collecting metrics soley on the
+        // basis of whether the instance is terminated. All other states imply
+        // we start (or continue) to collect instance metrics.
         match action {
             Some(InstanceAction::Destroy) => {
                 info!(self.log, "terminating VMM that has exited";
@@ -702,6 +711,7 @@ impl Instance {
         ticket: InstanceTicket,
         state: InstanceInitialState,
         services: InstanceManagerServices,
+        metadata: InstanceMetadata,
     ) -> Result<Self, Error> {
         info!(log, "initializing new Instance";
               "instance_id" => %id,
@@ -770,6 +780,9 @@ impl Instance {
                 // InstanceCpuCount here, to avoid any casting...
                 vcpus: hardware.properties.ncpus.0 as u8,
             },
+            // This will be used in a follow up, tracked under
+            // https://github.com/oxidecomputer/omicron/issues/4851.
+            _metadata: metadata,
             propolis_id,
             propolis_addr,
             vnic_allocator,
diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs
index b66b0400e1c..4a7d76904e9 100644
--- a/sled-agent/src/instance_manager.rs
+++ b/sled-agent/src/instance_manager.rs
@@ -8,6 +8,7 @@ use crate::instance::propolis_zone_name;
 use crate::instance::Instance;
 use crate::nexus::NexusClientWithResolver;
 use crate::params::InstanceExternalIpBody;
+use crate::params::InstanceMetadata;
 use crate::params::ZoneBundleMetadata;
 use crate::params::{
     InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse,
@@ -216,6 +217,7 @@ impl InstanceManager {
     /// (instance ID, Propolis ID) pair multiple times, but will fail if the
     /// instance is registered with a Propolis ID different from the one the
     /// caller supplied.
+    #[allow(clippy::too_many_arguments)]
     pub async fn ensure_registered(
         &self,
         instance_id: Uuid,
@@ -224,6 +226,7 @@ impl InstanceManager {
         instance_runtime: InstanceRuntimeState,
         vmm_runtime: VmmRuntimeState,
         propolis_addr: SocketAddr,
+        metadata: InstanceMetadata,
     ) -> Result<SledInstanceState, Error> {
         info!(
             &self.inner.log,
@@ -234,6 +237,7 @@ impl InstanceManager {
             "instance_runtime" => ?instance_runtime,
             "vmm_runtime" => ?vmm_runtime,
             "propolis_addr" => ?propolis_addr,
+            "metadata" => ?metadata,
         );
 
         let instance = {
@@ -292,6 +296,7 @@ impl InstanceManager {
                     ticket,
                     state,
                     services,
+                    metadata,
                 )?;
                 let instance_clone = instance.clone();
                 let _old =
diff --git a/sled-agent/src/metrics.rs b/sled-agent/src/metrics.rs
index 6c3383c88f0..33941ab6fe7 100644
--- a/sled-agent/src/metrics.rs
+++ b/sled-agent/src/metrics.rs
@@ -8,6 +8,7 @@ use oximeter::types::MetricsError;
 use oximeter::types::ProducerRegistry;
 use sled_hardware::Baseboard;
 use slog::Logger;
+use std::sync::Arc;
 use std::time::Duration;
 use uuid::Uuid;
 
@@ -19,6 +20,7 @@ cfg_if::cfg_if! {
         use oximeter_instruments::kstat::KstatSampler;
         use oximeter_instruments::kstat::TargetId;
         use std::collections::BTreeMap;
+        use std::sync::Mutex;
     } else {
         use anyhow::anyhow;
     }
@@ -46,6 +48,18 @@ pub enum Error {
 
     #[error("Failed to fetch hostname")]
     Hostname(#[source] std::io::Error),
+
+    #[error("Non-UTF8 hostname")]
+    NonUtf8Hostname,
+}
+
+// Basic metadata about the sled agent used when publishing metrics.
+#[derive(Clone, Debug)]
+#[cfg_attr(not(target_os = "illumos"), allow(dead_code))]
+struct Metadata {
+    sled_id: Uuid,
+    rack_id: Uuid,
+    baseboard: Baseboard,
 }
 
 /// Type managing all oximeter metrics produced by the sled-agent.
@@ -61,10 +75,7 @@ pub enum Error {
 // the name of fields that are not yet used.
 #[cfg_attr(not(target_os = "illumos"), allow(dead_code))]
 pub struct MetricsManager {
-    sled_id: Uuid,
-    rack_id: Uuid,
-    baseboard: Baseboard,
-    hostname: Option<String>,
+    metadata: Arc<Metadata>,
     _log: Logger,
     #[cfg(target_os = "illumos")]
     kstat_sampler: KstatSampler,
@@ -78,7 +89,7 @@ pub struct MetricsManager {
     // namespace them internally, e.g., `"datalink:{link_name}"` would be the
     // real key.
     #[cfg(target_os = "illumos")]
-    tracked_links: BTreeMap<String, TargetId>,
+    tracked_links: Arc<Mutex<BTreeMap<String, TargetId>>>,
     registry: ProducerRegistry,
 }
 
@@ -101,14 +112,11 @@ impl MetricsManager {
                 registry
                     .register_producer(kstat_sampler.clone())
                     .map_err(Error::Registry)?;
-                let tracked_links = BTreeMap::new();
+                let tracked_links = Arc::new(Mutex::new(BTreeMap::new()));
             }
         }
         Ok(Self {
-            sled_id,
-            rack_id,
-            baseboard,
-            hostname: None,
+            metadata: Arc::new(Metadata { sled_id, rack_id, baseboard }),
             _log: log,
             #[cfg(target_os = "illumos")]
             kstat_sampler,
@@ -128,14 +136,14 @@ impl MetricsManager {
 impl MetricsManager {
     /// Track metrics for a physical datalink.
     pub async fn track_physical_link(
-        &mut self,
+        &self,
         link_name: impl AsRef<str>,
         interval: Duration,
     ) -> Result<(), Error> {
-        let hostname = self.hostname().await?;
+        let hostname = hostname()?;
         let link = link::PhysicalDataLink {
-            rack_id: self.rack_id,
-            sled_id: self.sled_id,
+            rack_id: self.metadata.rack_id,
+            sled_id: self.metadata.sled_id,
             serial: self.serial_number(),
             hostname,
             link_name: link_name.as_ref().to_string(),
@@ -146,7 +154,10 @@ impl MetricsManager {
             .add_target(link, details)
             .await
             .map_err(Error::Kstat)?;
-        self.tracked_links.insert(link_name.as_ref().to_string(), id);
+        self.tracked_links
+            .lock()
+            .unwrap()
+            .insert(link_name.as_ref().to_string(), id);
         Ok(())
     }
 
@@ -155,10 +166,12 @@ impl MetricsManager {
     /// This works for both physical and virtual links.
     #[allow(dead_code)]
     pub async fn stop_tracking_link(
-        &mut self,
+        &self,
         link_name: impl AsRef<str>,
     ) -> Result<(), Error> {
-        if let Some(id) = self.tracked_links.remove(link_name.as_ref()) {
+        let maybe_id =
+            self.tracked_links.lock().unwrap().remove(link_name.as_ref());
+        if let Some(id) = maybe_id {
             self.kstat_sampler.remove_target(id).await.map_err(Error::Kstat)
         } else {
             Ok(())
@@ -174,8 +187,8 @@ impl MetricsManager {
         interval: Duration,
     ) -> Result<(), Error> {
         let link = link::VirtualDataLink {
-            rack_id: self.rack_id,
-            sled_id: self.sled_id,
+            rack_id: self.metadata.rack_id,
+            sled_id: self.metadata.sled_id,
             serial: self.serial_number(),
             hostname: hostname.as_ref().to_string(),
             link_name: link_name.as_ref().to_string(),
@@ -190,40 +203,19 @@ impl MetricsManager {
 
     // Return the serial number out of the baseboard, if one exists.
     fn serial_number(&self) -> String {
-        match &self.baseboard {
+        match &self.metadata.baseboard {
             Baseboard::Gimlet { identifier, .. } => identifier.clone(),
             Baseboard::Unknown => String::from("unknown"),
             Baseboard::Pc { identifier, .. } => identifier.clone(),
         }
     }
-
-    // Return the system's hostname.
-    //
-    // If we've failed to get it previously, we try again. If _that_ fails,
-    // return an error.
-    //
-    // TODO-cleanup: This will become much simpler once
-    // `OnceCell::get_or_try_init` is stabilized.
-    async fn hostname(&mut self) -> Result<String, Error> {
-        if let Some(hn) = &self.hostname {
-            return Ok(hn.clone());
-        }
-        let hn = tokio::process::Command::new("hostname")
-            .env_clear()
-            .output()
-            .await
-            .map(|out| String::from_utf8_lossy(&out.stdout).trim().to_string())
-            .map_err(Error::Hostname)?;
-        self.hostname.replace(hn.clone());
-        Ok(hn)
-    }
 }
 
 #[cfg(not(target_os = "illumos"))]
 impl MetricsManager {
     /// Track metrics for a physical datalink.
     pub async fn track_physical_link(
-        &mut self,
+        &self,
         _link_name: impl AsRef<str>,
         _interval: Duration,
     ) -> Result<(), Error> {
@@ -237,7 +229,7 @@ impl MetricsManager {
     /// This works for both physical and virtual links.
     #[allow(dead_code)]
     pub async fn stop_tracking_link(
-        &mut self,
+        &self,
         _link_name: impl AsRef<str>,
     ) -> Result<(), Error> {
         Err(Error::Kstat(anyhow!(
@@ -258,3 +250,26 @@ impl MetricsManager {
         )))
     }
 }
+
+// Return the current hostname if possible.
+#[cfg(target_os = "illumos")]
+fn hostname() -> Result<String, Error> {
+    // See netdb.h
+    const MAX_LEN: usize = 256;
+    let mut out = vec![0u8; MAX_LEN + 1];
+    if unsafe {
+        libc::gethostname(out.as_mut_ptr() as *mut libc::c_char, MAX_LEN)
+    } == 0
+    {
+        // Split into subslices by NULL bytes.
+        //
+        // Safety: We've passed an extra NULL to the gethostname(2) call, so
+        // there must be at least one of these.
+        let chunk = out.split(|x| *x == 0).next().unwrap();
+        let s = std::ffi::CString::new(chunk)
+            .map_err(|_| Error::NonUtf8Hostname)?;
+        s.into_string().map_err(|_| Error::NonUtf8Hostname)
+    } else {
+        Err(std::io::Error::last_os_error()).map_err(|_| Error::NonUtf8Hostname)
+    }
+}
diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs
index f14a13aa411..c53a61a0a84 100644
--- a/sled-agent/src/params.rs
+++ b/sled-agent/src/params.rs
@@ -81,6 +81,13 @@ pub struct InstanceHardware {
     pub cloud_init_bytes: Option<String>,
 }
 
+/// Metadata used to track statistics about an instance.
+#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)]
+pub struct InstanceMetadata {
+    pub silo_id: Uuid,
+    pub project_id: Uuid,
+}
+
 /// The body of a request to ensure that a instance and VMM are known to a sled
 /// agent.
 #[derive(Serialize, Deserialize, JsonSchema)]
@@ -102,6 +109,9 @@ pub struct InstanceEnsureBody {
 
     /// The address at which this VMM should serve a Propolis server API.
     pub propolis_addr: SocketAddr,
+
+    /// Metadata used to track instance statistics.
+    pub metadata: InstanceMetadata,
 }
 
 /// The body of a request to move a previously-ensured instance into a specific
diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs
index 09ffdf5dc40..494db588288 100644
--- a/sled-agent/src/sim/http_entrypoints.rs
+++ b/sled-agent/src/sim/http_entrypoints.rs
@@ -98,6 +98,7 @@ async fn instance_register(
             body_args.hardware,
             body_args.instance_runtime,
             body_args.vmm_runtime,
+            body_args.metadata,
         )
         .await?,
     ))
diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs
index 56cfaf57c86..26643e0426e 100644
--- a/sled-agent/src/sim/sled_agent.rs
+++ b/sled-agent/src/sim/sled_agent.rs
@@ -13,7 +13,7 @@ use super::storage::Storage;
 use crate::nexus::NexusClient;
 use crate::params::{
     DiskStateRequested, InstanceExternalIpBody, InstanceHardware,
-    InstanceMigrationSourceParams, InstancePutStateResponse,
+    InstanceMetadata, InstanceMigrationSourceParams, InstancePutStateResponse,
     InstanceStateRequested, InstanceUnregisterResponse, Inventory,
     OmicronZonesConfig, SledRole,
 };
@@ -240,6 +240,9 @@ impl SledAgent {
         hardware: InstanceHardware,
         instance_runtime: InstanceRuntimeState,
         vmm_runtime: VmmRuntimeState,
+        // This is currently unused, but will be included as part of work
+        // tracked in https://github.com/oxidecomputer/omicron/issues/4851.
+        _metadata: InstanceMetadata,
     ) -> Result<SledInstanceState, Error> {
         // respond with a fake 500 level failure if asked to ensure an instance
         // with more than 16 CPUs.
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index eaf354db26c..bffe3799be1 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -17,7 +17,7 @@ use crate::metrics::MetricsManager;
 use crate::nexus::{ConvertInto, NexusClientWithResolver, NexusRequestQueue};
 use crate::params::{
     DiskStateRequested, InstanceExternalIpBody, InstanceHardware,
-    InstanceMigrationSourceParams, InstancePutStateResponse,
+    InstanceMetadata, InstanceMigrationSourceParams, InstancePutStateResponse,
     InstanceStateRequested, InstanceUnregisterResponse, Inventory,
     OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRule,
     ZoneBundleMetadata, Zpool,
@@ -394,6 +394,55 @@ impl SledAgent {
         let underlay_nics = underlay::find_nics(&config.data_links)?;
         illumos_utils::opte::initialize_xde_driver(&log, &underlay_nics)?;
 
+        // Start collecting metric data.
+        //
+        // First, we're creating a shareable type for managing the metrics
+        // themselves early on, so that we can pass it to other components of
+        // the sled agent that need it.
+        //
+        // Then we'll start tracking physical links and register as a producer
+        // with Nexus in the background.
+        let metrics_manager = MetricsManager::new(
+            request.body.id,
+            request.body.rack_id,
+            long_running_task_handles.hardware_manager.baseboard(),
+            log.new(o!("component" => "MetricsManager")),
+        )?;
+
+        // Start tracking the underlay physical links.
+        for nic in underlay::find_nics(&config.data_links)? {
+            let link_name = nic.interface();
+            if let Err(e) = metrics_manager
+                .track_physical_link(
+                    link_name,
+                    crate::metrics::LINK_SAMPLE_INTERVAL,
+                )
+                .await
+            {
+                error!(
+                    log,
+                    "failed to start tracking physical link metrics";
+                    "link_name" => link_name,
+                    "error" => ?e,
+                );
+            }
+        }
+
+        // Spawn a task in the background to register our metric producer with
+        // Nexus. This should not block progress here.
+        let endpoint = ProducerEndpoint {
+            id: request.body.id,
+            kind: ProducerKind::SledAgent,
+            address: sled_address.into(),
+            base_route: String::from("/metrics/collect"),
+            interval: crate::metrics::METRIC_COLLECTION_INTERVAL,
+        };
+        tokio::task::spawn(register_metric_producer_with_nexus(
+            log.clone(),
+            nexus_client.clone(),
+            endpoint,
+        ));
+
         // Create the PortManager to manage all the OPTE ports on the sled.
         let port_manager = PortManager::new(
             parent_log.new(o!("component" => "PortManager")),
@@ -514,47 +563,6 @@ impl SledAgent {
             rack_network_config.clone(),
         )?;
 
-        let mut metrics_manager = MetricsManager::new(
-            request.body.id,
-            request.body.rack_id,
-            long_running_task_handles.hardware_manager.baseboard(),
-            log.new(o!("component" => "MetricsManager")),
-        )?;
-
-        // Start tracking the underlay physical links.
-        for nic in underlay::find_nics(&config.data_links)? {
-            let link_name = nic.interface();
-            if let Err(e) = metrics_manager
-                .track_physical_link(
-                    link_name,
-                    crate::metrics::LINK_SAMPLE_INTERVAL,
-                )
-                .await
-            {
-                error!(
-                    log,
-                    "failed to start tracking physical link metrics";
-                    "link_name" => link_name,
-                    "error" => ?e,
-                );
-            }
-        }
-
-        // Spawn a task in the background to register our metric producer with
-        // Nexus. This should not block progress here.
-        let endpoint = ProducerEndpoint {
-            id: request.body.id,
-            kind: ProducerKind::SledAgent,
-            address: sled_address.into(),
-            base_route: String::from("/metrics/collect"),
-            interval: crate::metrics::METRIC_COLLECTION_INTERVAL,
-        };
-        tokio::task::spawn(register_metric_producer_with_nexus(
-            log.clone(),
-            nexus_client.clone(),
-            endpoint,
-        ));
-
         let sled_agent = SledAgent {
             inner: Arc::new(SledAgentInner {
                 id: request.body.id,
@@ -910,6 +918,7 @@ impl SledAgent {
     /// Idempotently ensures that a given instance is registered with this sled,
     /// i.e., that it can be addressed by future calls to
     /// [`Self::instance_ensure_state`].
+    #[allow(clippy::too_many_arguments)]
     pub async fn instance_ensure_registered(
         &self,
         instance_id: Uuid,
@@ -918,6 +927,7 @@ impl SledAgent {
         instance_runtime: InstanceRuntimeState,
         vmm_runtime: VmmRuntimeState,
         propolis_addr: SocketAddr,
+        metadata: InstanceMetadata,
     ) -> Result<SledInstanceState, Error> {
         self.inner
             .instances
@@ -928,6 +938,7 @@ impl SledAgent {
                 instance_runtime,
                 vmm_runtime,
                 propolis_addr,
+                metadata,
             )
             .await
             .map_err(|e| Error::Instance(e))