diff --git a/Cargo.lock b/Cargo.lock
index 45d1d47199a..c31348a7810 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1924,7 +1924,7 @@ dependencies = [
 [[package]]
 name = "dropshot"
 version = "0.9.1-dev"
-source = "git+https://github.com/oxidecomputer/dropshot?branch=main#711a7490d81416731cfe0f9fef366ed5f266a0ee"
+source = "git+https://github.com/oxidecomputer/dropshot?branch=main#29ae98d1f909c6832661408a4c03f929e8afa6e9"
 dependencies = [
  "async-stream",
  "async-trait",
@@ -1970,7 +1970,7 @@ dependencies = [
 [[package]]
 name = "dropshot_endpoint"
 version = "0.9.1-dev"
-source = "git+https://github.com/oxidecomputer/dropshot?branch=main#711a7490d81416731cfe0f9fef366ed5f266a0ee"
+source = "git+https://github.com/oxidecomputer/dropshot?branch=main#29ae98d1f909c6832661408a4c03f929e8afa6e9"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
index f9246535259..57592664306 100644
--- a/nexus/src/app/instance.rs
+++ b/nexus/src/app/instance.rs
@@ -1146,6 +1146,21 @@ impl super::Nexus {
             .map(|ssh_key| ssh_key.public_key)
             .collect::<Vec<String>>();
 
+        // Construct instance metadata used to track its statistics.
+        //
+        // This current means fetching the current silo ID, since we have all
+        // the other metadata already.
+        let silo_id = self
+            .current_silo_lookup(opctx)?
+            .lookup_for(authz::Action::Read)
+            .await?
+            .0
+            .id();
+        let metadata = sled_agent_client::types::InstanceMetadata {
+            silo_id,
+            project_id: db_instance.project_id,
+        };
+
         // Ask the sled agent to begin the state change.  Then update the
         // database to reflect the new intermediate state.  If this update is
         // not the newest one, that's fine.  That might just mean the sled agent
@@ -1189,6 +1204,7 @@ impl super::Nexus {
                         PROPOLIS_PORT,
                     )
                     .to_string(),
+                    metadata,
                 },
             )
             .await
diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json
index 7b9a3efcdad..ccebbd273b1 100644
--- a/openapi/sled-agent.json
+++ b/openapi/sled-agent.json
@@ -4589,6 +4589,14 @@
               }
             ]
           },
+          "metadata": {
+            "description": "Metadata used to track instance statistics.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/InstanceMetadata"
+              }
+            ]
+          },
           "propolis_addr": {
             "description": "The address at which this VMM should serve a Propolis server API.",
             "type": "string"
@@ -4610,6 +4618,7 @@
         "required": [
           "hardware",
           "instance_runtime",
+          "metadata",
           "propolis_addr",
           "propolis_id",
           "vmm_runtime"
@@ -4741,6 +4750,24 @@
           "snapshot_id"
         ]
       },
+      "InstanceMetadata": {
+        "description": "Metadata used to track statistics about an instance.",
+        "type": "object",
+        "properties": {
+          "project_id": {
+            "type": "string",
+            "format": "uuid"
+          },
+          "silo_id": {
+            "type": "string",
+            "format": "uuid"
+          }
+        },
+        "required": [
+          "project_id",
+          "silo_id"
+        ]
+      },
       "InstanceMigrationSourceParams": {
         "description": "Instance runtime state to update for a migration.",
         "type": "object",
diff --git a/oximeter/instruments/Cargo.toml b/oximeter/instruments/Cargo.toml
index 8372b7c5602..5903f1da511 100644
--- a/oximeter/instruments/Cargo.toml
+++ b/oximeter/instruments/Cargo.toml
@@ -5,27 +5,49 @@ edition = "2021"
 license = "MPL-2.0"
 
 [dependencies]
-cfg-if.workspace = true
-chrono.workspace = true
-dropshot.workspace = true
-futures.workspace = true
+cfg-if = { workspace = true, optional = true }
+chrono = { workspace = true, optional = true }
+dropshot = { workspace = true, optional = true }
+futures = { workspace = true, optional = true }
 http = { workspace = true, optional = true }
-oximeter.workspace = true
-slog.workspace = true
-tokio.workspace = true
-thiserror.workspace = true
-uuid.workspace = true
-omicron-workspace-hack.workspace = true
+oximeter = { workspace = true, optional = true }
+slog = { workspace = true, optional = true }
+tokio = { workspace = true, optional = true }
+thiserror = { workspace = true, optional = true }
+uuid = { workspace = true, optional = true }
+omicron-workspace-hack = { workspace = true, optional = true }
 
 [features]
-default = ["http-instruments", "kstat"]
-http-instruments = ["http"]
-kstat = ["kstat-rs"]
+default = ["http-instruments", "datalink", "virtual-machine"]
+http-instruments = [
+    "dep:chrono",
+    "dep:dropshot",
+    "dep:futures",
+    "dep:http",
+    "dep:omicron-workspace-hack",
+    "dep:oximeter",
+    "dep:uuid"
+]
+kstat = [
+    "dep:cfg-if",
+    "dep:chrono",
+    "dep:futures",
+    "dep:kstat-rs",
+    "dep:omicron-workspace-hack",
+    "dep:oximeter",
+    "dep:slog",
+    "dep:tokio",
+    "dep:thiserror",
+    "dep:uuid"
+]
+datalink = ["kstat"]
+virtual-machine = ["kstat"]
 
 [dev-dependencies]
 rand.workspace = true
 slog-async.workspace = true
 slog-term.workspace = true
+oximeter.workspace = true
 
 [target.'cfg(target_os = "illumos")'.dependencies]
 kstat-rs = { workspace = true, optional = true }
diff --git a/oximeter/instruments/src/kstat/mod.rs b/oximeter/instruments/src/kstat/mod.rs
index 90f34acae85..0b102ebd6f0 100644
--- a/oximeter/instruments/src/kstat/mod.rs
+++ b/oximeter/instruments/src/kstat/mod.rs
@@ -2,7 +2,7 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-// Copyright 2023 Oxide Computer Company
+// Copyright 2024 Oxide Computer Company
 
 //! Types for publishing kernel statistics via oximeter.
 //!
@@ -87,8 +87,11 @@ use std::cmp::Ordering;
 use std::collections::BTreeMap;
 use std::time::Duration;
 
+#[cfg(any(feature = "datalink", test))]
 pub mod link;
 mod sampler;
+#[cfg(any(feature = "virtual-machine", test))]
+pub mod virtual_machine;
 
 pub use sampler::CollectionDetails;
 pub use sampler::ExpirationBehavior;
diff --git a/oximeter/instruments/src/kstat/virtual_machine.rs b/oximeter/instruments/src/kstat/virtual_machine.rs
new file mode 100644
index 00000000000..72d4ba554b5
--- /dev/null
+++ b/oximeter/instruments/src/kstat/virtual_machine.rs
@@ -0,0 +1,424 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+// Copyright 2024 Oxide Computer Company
+
+//! Types for tracking statistics about virtual machine instances.
+
+use crate::kstat::hrtime_to_utc;
+use crate::kstat::ConvertNamedData;
+use crate::kstat::Error;
+use chrono::DateTime;
+use chrono::Utc;
+use oximeter::types::Cumulative;
+use oximeter::FieldType;
+use oximeter::FieldValue;
+use oximeter::Metric;
+use oximeter::Sample;
+use oximeter::Target;
+use uuid::Uuid;
+
+#[cfg(not(test))]
+mod kstat_types {
+    pub use crate::kstat::KstatList;
+    pub use crate::kstat::KstatTarget;
+    pub use kstat_rs::Data;
+    pub use kstat_rs::Kstat;
+    pub use kstat_rs::Named;
+    pub use kstat_rs::NamedData;
+}
+
+// Mock the relevant subset of `kstat-rs` types needed for tests.
+#[cfg(test)]
+mod kstat_types {
+    #[derive(Debug)]
+    pub enum Data<'a> {
+        Named(Vec<Named<'a>>),
+        Null,
+    }
+
+    #[derive(Debug)]
+    pub enum NamedData<'a> {
+        UInt32(u32),
+        UInt64(u64),
+        String(&'a str),
+    }
+
+    #[derive(Debug)]
+    pub struct Kstat<'a> {
+        pub ks_module: &'a str,
+        pub ks_instance: i32,
+        pub ks_name: &'a str,
+        pub ks_snaptime: i64,
+    }
+
+    #[derive(Debug)]
+    pub struct Named<'a> {
+        pub name: &'a str,
+        pub value: NamedData<'a>,
+    }
+
+    impl<'a> super::ConvertNamedData for NamedData<'a> {
+        fn as_i32(&self) -> Result<i32, crate::kstat::Error> {
+            unimplemented!()
+        }
+
+        fn as_u32(&self) -> Result<u32, crate::kstat::Error> {
+            if let NamedData::UInt32(x) = self {
+                Ok(*x)
+            } else {
+                panic!()
+            }
+        }
+
+        fn as_i64(&self) -> Result<i64, crate::kstat::Error> {
+            unimplemented!()
+        }
+
+        fn as_u64(&self) -> Result<u64, crate::kstat::Error> {
+            if let NamedData::UInt64(x) = self {
+                Ok(*x)
+            } else {
+                panic!()
+            }
+        }
+    }
+}
+
+use kstat_types::*;
+
+/// A single virtual machine instance.
+#[derive(Clone, Debug)]
+pub struct VirtualMachine {
+    /// The silo to which the instance belongs.
+    pub silo_id: Uuid,
+    /// The project to which the instance belongs.
+    pub project_id: Uuid,
+    /// The ID of the instance.
+    pub instance_id: Uuid,
+
+    // This field is not published as part of the target field definitions. It
+    // is needed because the hypervisor currently creates kstats for each vCPU,
+    // regardless of whether they're activated. There is no way to tell from
+    // userland today which vCPU kstats are "real". We include this value here,
+    // and implement `oximeter::Target` manually.
+    pub n_vcpus: u32,
+}
+
+impl Target for VirtualMachine {
+    fn name(&self) -> &'static str {
+        "virtual_machine"
+    }
+
+    fn field_names(&self) -> &'static [&'static str] {
+        &["silo_id", "project_id", "instance_id"]
+    }
+
+    fn field_types(&self) -> Vec<FieldType> {
+        vec![FieldType::Uuid, FieldType::Uuid, FieldType::Uuid]
+    }
+
+    fn field_values(&self) -> Vec<FieldValue> {
+        vec![
+            self.silo_id.into(),
+            self.project_id.into(),
+            self.instance_id.into(),
+        ]
+    }
+}
+
+/// Metric tracking vCPU usage by state.
+#[derive(Clone, Debug, Metric)]
+pub struct VcpuUsage {
+    /// The vCPU ID.
+    pub vcpu_id: u32,
+    /// The state of the vCPU.
+    pub state: String,
+    /// The cumulative time spent in this state, in nanoseconds.
+    pub datum: Cumulative<u64>,
+}
+
+// The name of the kstat module containing virtual machine kstats.
+const VMM_KSTAT_MODULE_NAME: &str = "vmm";
+
+// The name of the kstat with virtual machine metadata (VM name currently).
+const VM_KSTAT_NAME: &str = "vm";
+
+// The named kstat holding the virtual machine's name. This is currently the
+// UUID assigned by the control plane to the virtual machine instance.
+const VM_NAME_KSTAT: &str = "vm_name";
+
+// The name of kstat containing vCPU usage data.
+const VCPU_KSTAT_PREFIX: &str = "vcpu";
+
+// Prefix for all named data with a valid vCPU microstate that we track.
+const VCPU_MICROSTATE_PREFIX: &str = "time_";
+
+// The number of expected vCPU microstates we track. This isn't load-bearing,
+// and only used to help preallocate an array holding the `VcpuUsage` samples.
+const N_VCPU_MICROSTATES: usize = 6;
+
+#[cfg(not(test))]
+impl KstatTarget for VirtualMachine {
+    // The VMM kstats are organized like so:
+    //
+    // - module: vmm
+    // - instance: a kernel-assigned integer
+    // - name: vm -> generic VM info, vcpuX -> info for each vCPU
+    //
+    // At this part of the code, we don't have that kstat instance, only the
+    // virtual machine instance's control plane UUID. However, the VM's "name"
+    // is assigned to be that control plane UUID in the hypervisor. See
+    // https://github.com/oxidecomputer/propolis/blob/759bf4a19990404c135e608afbe0d38b70bfa370/bin/propolis-server/src/lib/vm/mod.rs#L420
+    // for the current code which does that.
+    //
+    // So we need to indicate interest in any VMM-related kstat here, and we are
+    // forced to filter to the right instance by looking up the VM name inside
+    // the `to_samples()` method below.
+    fn interested(&self, kstat: &Kstat<'_>) -> bool {
+        kstat.ks_module == VMM_KSTAT_MODULE_NAME
+    }
+
+    fn to_samples(
+        &self,
+        kstats: KstatList<'_, '_>,
+    ) -> Result<Vec<Sample>, Error> {
+        // First, we need to map the instance's control plane UUID to the
+        // instance ID. We'll find this through the `vmm:<instance>:vm:vm_name`
+        // kstat, which lists the instance's UUID as its name.
+        //
+        // Note that if this code is run from within a Propolis zone, there is
+        // exactly one `vmm` kstat instance in any case.
+        let instance_id = self.instance_id.to_string();
+        let instance = kstats
+            .iter()
+            .find_map(|(_, kstat, data)| {
+                kstat_instance_from_instance_id(kstat, data, &instance_id)
+            })
+            .ok_or_else(|| Error::NoSuchKstat)?;
+
+        // Armed with the kstat instance, find all relevant metrics related to
+        // this particular VM. For now, we produce only vCPU usage metrics, but
+        // others may be chained in the future.
+        let vcpu_stats = kstats.iter().filter(|(_, kstat, _)| {
+            // Filter out those that don't match our kstat instance.
+            if kstat.ks_instance != instance {
+                return false;
+            }
+
+            // Filter out those which are neither a vCPU stat of any kind, nor
+            // for one of the vCPU IDs we know to be active.
+            let Some(suffix) = kstat.ks_name.strip_prefix(VCPU_KSTAT_PREFIX)
+            else {
+                return false;
+            };
+            let Ok(vcpu_id) = suffix.parse::<u32>() else {
+                return false;
+            };
+            vcpu_id < self.n_vcpus
+        });
+        produce_vcpu_usage(self, vcpu_stats)
+    }
+}
+
+// Given a kstat and an instance's ID, return the kstat instance if it matches.
+pub fn kstat_instance_from_instance_id(
+    kstat: &Kstat<'_>,
+    data: &Data<'_>,
+    instance_id: &str,
+) -> Option<i32> {
+    if kstat.ks_module != VMM_KSTAT_MODULE_NAME {
+        return None;
+    }
+    if kstat.ks_name != VM_KSTAT_NAME {
+        return None;
+    }
+    let Data::Named(named) = data else {
+        return None;
+    };
+    if named.iter().any(|nd| {
+        if nd.name != VM_NAME_KSTAT {
+            return false;
+        }
+        let NamedData::String(name) = &nd.value else {
+            return false;
+        };
+        instance_id == *name
+    }) {
+        return Some(kstat.ks_instance);
+    }
+    None
+}
+
+// Produce `Sample`s for the `VcpuUsage` metric from the relevant kstats.
+pub fn produce_vcpu_usage<'a>(
+    vm: &'a VirtualMachine,
+    vcpu_stats: impl Iterator<Item = &'a (DateTime<Utc>, Kstat<'a>, Data<'a>)> + 'a,
+) -> Result<Vec<Sample>, Error> {
+    let mut out = Vec::with_capacity(vm.n_vcpus as usize * N_VCPU_MICROSTATES);
+    for (creation_time, kstat, data) in vcpu_stats {
+        let Data::Named(named) = data else {
+            return Err(Error::ExpectedNamedKstat);
+        };
+        let snapshot_time = hrtime_to_utc(kstat.ks_snaptime)?;
+
+        // Find the vCPU ID, from the relevant named data item.
+        let vcpu_id = named
+            .iter()
+            .find_map(|named| {
+                if named.name == VCPU_KSTAT_PREFIX {
+                    named.value.as_u32().ok()
+                } else {
+                    None
+                }
+            })
+            .ok_or_else(|| Error::NoSuchKstat)?;
+
+        // We'll track all statistics starting with `time_` as the microstate.
+        for Named { name, value } in named
+            .iter()
+            .filter(|nv| nv.name.starts_with(VCPU_MICROSTATE_PREFIX))
+        {
+            // Safety: We're filtering in the loop on this prefix, so it must
+            // exist.
+            let state =
+                name.strip_prefix(VCPU_MICROSTATE_PREFIX).unwrap().to_string();
+            let datum =
+                Cumulative::with_start_time(*creation_time, value.as_u64()?);
+            let metric = VcpuUsage { vcpu_id, state, datum };
+            let sample =
+                Sample::new_with_timestamp(snapshot_time, vm, &metric)?;
+            out.push(sample);
+        }
+    }
+    Ok(out)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::kstat_instance_from_instance_id;
+    use super::produce_vcpu_usage;
+    use super::Data;
+    use super::Kstat;
+    use super::Named;
+    use super::NamedData;
+    use super::Utc;
+    use super::VcpuUsage;
+    use super::VirtualMachine;
+    use super::VCPU_KSTAT_PREFIX;
+    use super::VMM_KSTAT_MODULE_NAME;
+    use super::VM_KSTAT_NAME;
+    use super::VM_NAME_KSTAT;
+    use oximeter::schema::SchemaSet;
+    use oximeter::types::Cumulative;
+    use oximeter::Datum;
+    use oximeter::FieldValue;
+
+    const TEST_VM: VirtualMachine = VirtualMachine {
+        silo_id: uuid::uuid!("6a4bd4b6-e9aa-44d1-b616-399d48baa173"),
+        project_id: uuid::uuid!("7b61df02-0794-4b37-93bc-89f03c7289ca"),
+        instance_id: uuid::uuid!("96d6ec78-543a-4188-830e-37e2a0eeff16"),
+        n_vcpus: 4,
+    };
+
+    fn test_usage() -> VcpuUsage {
+        VcpuUsage {
+            state: "run".to_string(),
+            vcpu_id: 0,
+            datum: Cumulative::new(100),
+        }
+    }
+
+    #[test]
+    fn test_no_schema_changes() {
+        let mut set = SchemaSet::default();
+        assert!(set.insert_checked(&TEST_VM, &test_usage()).is_none());
+        const PATH: &str = concat!(
+            env!("CARGO_MANIFEST_DIR"),
+            "/tests/output/virtual-machine-schema.json",
+        );
+        set.assert_contents(PATH);
+    }
+
+    #[test]
+    fn test_kstat_instance_from_instance_id() {
+        let ks = Kstat {
+            ks_module: VMM_KSTAT_MODULE_NAME,
+            ks_instance: 0,
+            ks_name: VM_KSTAT_NAME,
+            ks_snaptime: 1,
+        };
+        const INSTANCE_ID: &str = "db198b43-2dee-4b4b-8a68-24cb4c0d6ec8";
+        let data = Data::Named(vec![Named {
+            name: VM_NAME_KSTAT,
+            value: NamedData::String(INSTANCE_ID),
+        }]);
+
+        assert_eq!(
+            kstat_instance_from_instance_id(&ks, &data, INSTANCE_ID)
+                .expect("Should have matched the instance ID"),
+            ks.ks_instance,
+        );
+
+        let data = Data::Named(vec![Named {
+            name: VM_NAME_KSTAT,
+            value: NamedData::String("something-else"),
+        }]);
+        assert!(
+            kstat_instance_from_instance_id(&ks, &data, INSTANCE_ID).is_none(),
+            "Should not have matched an instance ID"
+        );
+    }
+
+    fn vcpu_state_kstats<'a>() -> (Kstat<'a>, Data<'a>) {
+        let ks = Kstat {
+            ks_module: VMM_KSTAT_MODULE_NAME,
+            ks_instance: 0,
+            ks_name: "vcpu0",
+            ks_snaptime: 1,
+        };
+        let data = Data::Named(vec![
+            Named { name: VCPU_KSTAT_PREFIX, value: NamedData::UInt32(0) },
+            Named { name: "time_idle", value: NamedData::UInt64(1) },
+            Named { name: "time_run", value: NamedData::UInt64(2) },
+        ]);
+        (ks, data)
+    }
+
+    #[test]
+    fn test_produce_vcpu_usage() {
+        let (ks, data) = vcpu_state_kstats();
+        let kstats = [(Utc::now(), ks, data)];
+        let samples = produce_vcpu_usage(&TEST_VM, kstats.iter())
+            .expect("Should have produced samples");
+        assert_eq!(
+            samples.len(),
+            2,
+            "Should have samples for 'run' and 'idle' states"
+        );
+        for ((sample, state), x) in
+            samples.iter().zip(["idle", "run"]).zip([1, 2])
+        {
+            let st = sample
+                .fields()
+                .iter()
+                .find_map(|f| {
+                    if f.name == "state" {
+                        let FieldValue::String(state) = &f.value else {
+                            panic!("Expected a string field");
+                        };
+                        Some(state.clone())
+                    } else {
+                        None
+                    }
+                })
+                .expect("expected a field with name \"state\"");
+            assert_eq!(st, state, "Found an incorrect vCPU state");
+            let Datum::CumulativeU64(inner) = sample.measurement.datum() else {
+                panic!("Expected a cumulativeu64 datum");
+            };
+            assert_eq!(inner.value(), x);
+        }
+    }
+}
diff --git a/oximeter/instruments/src/lib.rs b/oximeter/instruments/src/lib.rs
index d003e717395..c1f839c85d2 100644
--- a/oximeter/instruments/src/lib.rs
+++ b/oximeter/instruments/src/lib.rs
@@ -4,7 +4,7 @@
 
 //! General-purpose types for instrumenting code to producer oximeter metrics.
 
-// Copyright 2023 Oxide Computer Company
+// Copyright 2024 Oxide Computer Company
 
 #[cfg(feature = "http-instruments")]
 pub mod http;
diff --git a/oximeter/instruments/tests/output/virtual-machine-schema.json b/oximeter/instruments/tests/output/virtual-machine-schema.json
new file mode 100644
index 00000000000..fb23d031222
--- /dev/null
+++ b/oximeter/instruments/tests/output/virtual-machine-schema.json
@@ -0,0 +1,34 @@
+{
+  "virtual_machine:vcpu_usage": {
+    "timeseries_name": "virtual_machine:vcpu_usage",
+    "field_schema": [
+      {
+        "name": "instance_id",
+        "field_type": "uuid",
+        "source": "target"
+      },
+      {
+        "name": "project_id",
+        "field_type": "uuid",
+        "source": "target"
+      },
+      {
+        "name": "silo_id",
+        "field_type": "uuid",
+        "source": "target"
+      },
+      {
+        "name": "state",
+        "field_type": "string",
+        "source": "metric"
+      },
+      {
+        "name": "vcpu_id",
+        "field_type": "u32",
+        "source": "metric"
+      }
+    ],
+    "datum_type": "cumulative_u64",
+    "created": "2024-01-25T20:08:38.763553812Z"
+  }
+}
\ No newline at end of file
diff --git a/oximeter/producer/src/lib.rs b/oximeter/producer/src/lib.rs
index 2354f9c2176..3fecaadf4fe 100644
--- a/oximeter/producer/src/lib.rs
+++ b/oximeter/producer/src/lib.rs
@@ -38,8 +38,8 @@ pub enum Error {
     #[error("Error running producer HTTP server: {0}")]
     Server(String),
 
-    #[error("Error registering as metric producer: {0}")]
-    RegistrationError(String),
+    #[error("Error registering as metric producer: {msg}")]
+    RegistrationError { retryable: bool, msg: String },
 
     #[error("Producer registry and config UUIDs do not match")]
     UuidMismatch,
@@ -251,11 +251,19 @@ pub async fn register(
 ) -> Result<(), Error> {
     let client =
         nexus_client::Client::new(&format!("http://{}", address), log.clone());
-    client
-        .cpapi_producers_post(&server_info.into())
-        .await
-        .map(|_| ())
-        .map_err(|msg| Error::RegistrationError(msg.to_string()))
+    client.cpapi_producers_post(&server_info.into()).await.map(|_| ()).map_err(
+        |err| {
+            let retryable = match &err {
+                nexus_client::Error::CommunicationError(..) => true,
+                nexus_client::Error::ErrorResponse(resp) => {
+                    resp.status().is_server_error()
+                }
+                _ => false,
+            };
+            let msg = err.to_string();
+            Error::RegistrationError { retryable, msg }
+        },
+    )
 }
 
 /// Handle a request to pull available metric data from a [`ProducerRegistry`].
diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs
index 0798aed664c..3ea2df20c37 100644
--- a/sled-agent/src/http_entrypoints.rs
+++ b/sled-agent/src/http_entrypoints.rs
@@ -412,6 +412,7 @@ async fn instance_register(
             body_args.instance_runtime,
             body_args.vmm_runtime,
             body_args.propolis_addr,
+            body_args.metadata,
         )
         .await?,
     ))
diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs
index 47e61cfe719..6f884b0cad9 100644
--- a/sled-agent/src/instance.rs
+++ b/sled-agent/src/instance.rs
@@ -13,7 +13,7 @@ use crate::nexus::NexusClientWithResolver;
 use crate::params::ZoneBundleMetadata;
 use crate::params::{InstanceExternalIpBody, ZoneBundleCause};
 use crate::params::{
-    InstanceHardware, InstanceMigrationSourceParams,
+    InstanceHardware, InstanceMetadata, InstanceMigrationSourceParams,
     InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule,
 };
 use crate::profile::*;
@@ -192,6 +192,11 @@ struct InstanceInner {
     // Properties visible to Propolis
     properties: propolis_client::types::InstanceProperties,
 
+    // This is currently unused, but will be sent to Propolis as part of the
+    // work tracked in https://github.com/oxidecomputer/omicron/issues/4851. It
+    // will be included in the InstanceProperties above, most likely.
+    _metadata: InstanceMetadata,
+
     // The ID of the Propolis server (and zone) running this instance
     propolis_id: Uuid,
 
@@ -369,6 +374,10 @@ impl InstanceInner {
         // state to Nexus. This ensures that the instance is actually gone from
         // the sled when Nexus receives the state update saying it's actually
         // destroyed.
+        //
+        // In addition, we'll start or stop collecting metrics soley on the
+        // basis of whether the instance is terminated. All other states imply
+        // we start (or continue) to collect instance metrics.
         match action {
             Some(InstanceAction::Destroy) => {
                 info!(self.log, "terminating VMM that has exited";
@@ -702,6 +711,7 @@ impl Instance {
         ticket: InstanceTicket,
         state: InstanceInitialState,
         services: InstanceManagerServices,
+        metadata: InstanceMetadata,
     ) -> Result<Self, Error> {
         info!(log, "initializing new Instance";
               "instance_id" => %id,
@@ -770,6 +780,9 @@ impl Instance {
                 // InstanceCpuCount here, to avoid any casting...
                 vcpus: hardware.properties.ncpus.0 as u8,
             },
+            // This will be used in a follow up, tracked under
+            // https://github.com/oxidecomputer/omicron/issues/4851.
+            _metadata: metadata,
             propolis_id,
             propolis_addr,
             vnic_allocator,
diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs
index b66b0400e1c..4a7d76904e9 100644
--- a/sled-agent/src/instance_manager.rs
+++ b/sled-agent/src/instance_manager.rs
@@ -8,6 +8,7 @@ use crate::instance::propolis_zone_name;
 use crate::instance::Instance;
 use crate::nexus::NexusClientWithResolver;
 use crate::params::InstanceExternalIpBody;
+use crate::params::InstanceMetadata;
 use crate::params::ZoneBundleMetadata;
 use crate::params::{
     InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse,
@@ -216,6 +217,7 @@ impl InstanceManager {
     /// (instance ID, Propolis ID) pair multiple times, but will fail if the
     /// instance is registered with a Propolis ID different from the one the
     /// caller supplied.
+    #[allow(clippy::too_many_arguments)]
     pub async fn ensure_registered(
         &self,
         instance_id: Uuid,
@@ -224,6 +226,7 @@ impl InstanceManager {
         instance_runtime: InstanceRuntimeState,
         vmm_runtime: VmmRuntimeState,
         propolis_addr: SocketAddr,
+        metadata: InstanceMetadata,
     ) -> Result<SledInstanceState, Error> {
         info!(
             &self.inner.log,
@@ -234,6 +237,7 @@ impl InstanceManager {
             "instance_runtime" => ?instance_runtime,
             "vmm_runtime" => ?vmm_runtime,
             "propolis_addr" => ?propolis_addr,
+            "metadata" => ?metadata,
         );
 
         let instance = {
@@ -292,6 +296,7 @@ impl InstanceManager {
                     ticket,
                     state,
                     services,
+                    metadata,
                 )?;
                 let instance_clone = instance.clone();
                 let _old =
diff --git a/sled-agent/src/metrics.rs b/sled-agent/src/metrics.rs
index 6c3383c88f0..33941ab6fe7 100644
--- a/sled-agent/src/metrics.rs
+++ b/sled-agent/src/metrics.rs
@@ -8,6 +8,7 @@ use oximeter::types::MetricsError;
 use oximeter::types::ProducerRegistry;
 use sled_hardware::Baseboard;
 use slog::Logger;
+use std::sync::Arc;
 use std::time::Duration;
 use uuid::Uuid;
 
@@ -19,6 +20,7 @@ cfg_if::cfg_if! {
         use oximeter_instruments::kstat::KstatSampler;
         use oximeter_instruments::kstat::TargetId;
         use std::collections::BTreeMap;
+        use std::sync::Mutex;
     } else {
         use anyhow::anyhow;
     }
@@ -46,6 +48,18 @@ pub enum Error {
 
     #[error("Failed to fetch hostname")]
     Hostname(#[source] std::io::Error),
+
+    #[error("Non-UTF8 hostname")]
+    NonUtf8Hostname,
+}
+
+// Basic metadata about the sled agent used when publishing metrics.
+#[derive(Clone, Debug)]
+#[cfg_attr(not(target_os = "illumos"), allow(dead_code))]
+struct Metadata {
+    sled_id: Uuid,
+    rack_id: Uuid,
+    baseboard: Baseboard,
 }
 
 /// Type managing all oximeter metrics produced by the sled-agent.
@@ -61,10 +75,7 @@ pub enum Error {
 // the name of fields that are not yet used.
 #[cfg_attr(not(target_os = "illumos"), allow(dead_code))]
 pub struct MetricsManager {
-    sled_id: Uuid,
-    rack_id: Uuid,
-    baseboard: Baseboard,
-    hostname: Option<String>,
+    metadata: Arc<Metadata>,
     _log: Logger,
     #[cfg(target_os = "illumos")]
     kstat_sampler: KstatSampler,
@@ -78,7 +89,7 @@ pub struct MetricsManager {
     // namespace them internally, e.g., `"datalink:{link_name}"` would be the
     // real key.
     #[cfg(target_os = "illumos")]
-    tracked_links: BTreeMap<String, TargetId>,
+    tracked_links: Arc<Mutex<BTreeMap<String, TargetId>>>,
     registry: ProducerRegistry,
 }
 
@@ -101,14 +112,11 @@ impl MetricsManager {
                 registry
                     .register_producer(kstat_sampler.clone())
                     .map_err(Error::Registry)?;
-                let tracked_links = BTreeMap::new();
+                let tracked_links = Arc::new(Mutex::new(BTreeMap::new()));
             }
         }
         Ok(Self {
-            sled_id,
-            rack_id,
-            baseboard,
-            hostname: None,
+            metadata: Arc::new(Metadata { sled_id, rack_id, baseboard }),
             _log: log,
             #[cfg(target_os = "illumos")]
             kstat_sampler,
@@ -128,14 +136,14 @@ impl MetricsManager {
 impl MetricsManager {
     /// Track metrics for a physical datalink.
     pub async fn track_physical_link(
-        &mut self,
+        &self,
         link_name: impl AsRef<str>,
         interval: Duration,
     ) -> Result<(), Error> {
-        let hostname = self.hostname().await?;
+        let hostname = hostname()?;
         let link = link::PhysicalDataLink {
-            rack_id: self.rack_id,
-            sled_id: self.sled_id,
+            rack_id: self.metadata.rack_id,
+            sled_id: self.metadata.sled_id,
             serial: self.serial_number(),
             hostname,
             link_name: link_name.as_ref().to_string(),
@@ -146,7 +154,10 @@ impl MetricsManager {
             .add_target(link, details)
             .await
             .map_err(Error::Kstat)?;
-        self.tracked_links.insert(link_name.as_ref().to_string(), id);
+        self.tracked_links
+            .lock()
+            .unwrap()
+            .insert(link_name.as_ref().to_string(), id);
         Ok(())
     }
 
@@ -155,10 +166,12 @@ impl MetricsManager {
     /// This works for both physical and virtual links.
     #[allow(dead_code)]
     pub async fn stop_tracking_link(
-        &mut self,
+        &self,
         link_name: impl AsRef<str>,
     ) -> Result<(), Error> {
-        if let Some(id) = self.tracked_links.remove(link_name.as_ref()) {
+        let maybe_id =
+            self.tracked_links.lock().unwrap().remove(link_name.as_ref());
+        if let Some(id) = maybe_id {
             self.kstat_sampler.remove_target(id).await.map_err(Error::Kstat)
         } else {
             Ok(())
@@ -174,8 +187,8 @@ impl MetricsManager {
         interval: Duration,
     ) -> Result<(), Error> {
         let link = link::VirtualDataLink {
-            rack_id: self.rack_id,
-            sled_id: self.sled_id,
+            rack_id: self.metadata.rack_id,
+            sled_id: self.metadata.sled_id,
             serial: self.serial_number(),
             hostname: hostname.as_ref().to_string(),
             link_name: link_name.as_ref().to_string(),
@@ -190,40 +203,19 @@ impl MetricsManager {
 
     // Return the serial number out of the baseboard, if one exists.
     fn serial_number(&self) -> String {
-        match &self.baseboard {
+        match &self.metadata.baseboard {
             Baseboard::Gimlet { identifier, .. } => identifier.clone(),
             Baseboard::Unknown => String::from("unknown"),
             Baseboard::Pc { identifier, .. } => identifier.clone(),
         }
     }
-
-    // Return the system's hostname.
-    //
-    // If we've failed to get it previously, we try again. If _that_ fails,
-    // return an error.
-    //
-    // TODO-cleanup: This will become much simpler once
-    // `OnceCell::get_or_try_init` is stabilized.
-    async fn hostname(&mut self) -> Result<String, Error> {
-        if let Some(hn) = &self.hostname {
-            return Ok(hn.clone());
-        }
-        let hn = tokio::process::Command::new("hostname")
-            .env_clear()
-            .output()
-            .await
-            .map(|out| String::from_utf8_lossy(&out.stdout).trim().to_string())
-            .map_err(Error::Hostname)?;
-        self.hostname.replace(hn.clone());
-        Ok(hn)
-    }
 }
 
 #[cfg(not(target_os = "illumos"))]
 impl MetricsManager {
     /// Track metrics for a physical datalink.
     pub async fn track_physical_link(
-        &mut self,
+        &self,
         _link_name: impl AsRef<str>,
         _interval: Duration,
     ) -> Result<(), Error> {
@@ -237,7 +229,7 @@ impl MetricsManager {
     /// This works for both physical and virtual links.
     #[allow(dead_code)]
     pub async fn stop_tracking_link(
-        &mut self,
+        &self,
         _link_name: impl AsRef<str>,
     ) -> Result<(), Error> {
         Err(Error::Kstat(anyhow!(
@@ -258,3 +250,26 @@ impl MetricsManager {
         )))
     }
 }
+
+// Return the current hostname if possible.
+#[cfg(target_os = "illumos")]
+fn hostname() -> Result<String, Error> {
+    // See netdb.h
+    const MAX_LEN: usize = 256;
+    let mut out = vec![0u8; MAX_LEN + 1];
+    if unsafe {
+        libc::gethostname(out.as_mut_ptr() as *mut libc::c_char, MAX_LEN)
+    } == 0
+    {
+        // Split into subslices by NULL bytes.
+        //
+        // Safety: We've passed an extra NULL to the gethostname(2) call, so
+        // there must be at least one of these.
+        let chunk = out.split(|x| *x == 0).next().unwrap();
+        let s = std::ffi::CString::new(chunk)
+            .map_err(|_| Error::NonUtf8Hostname)?;
+        s.into_string().map_err(|_| Error::NonUtf8Hostname)
+    } else {
+        Err(std::io::Error::last_os_error()).map_err(|_| Error::NonUtf8Hostname)
+    }
+}
diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs
index f14a13aa411..c53a61a0a84 100644
--- a/sled-agent/src/params.rs
+++ b/sled-agent/src/params.rs
@@ -81,6 +81,13 @@ pub struct InstanceHardware {
     pub cloud_init_bytes: Option<String>,
 }
 
+/// Metadata used to track statistics about an instance.
+#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)]
+pub struct InstanceMetadata {
+    pub silo_id: Uuid,
+    pub project_id: Uuid,
+}
+
 /// The body of a request to ensure that a instance and VMM are known to a sled
 /// agent.
 #[derive(Serialize, Deserialize, JsonSchema)]
@@ -102,6 +109,9 @@ pub struct InstanceEnsureBody {
 
     /// The address at which this VMM should serve a Propolis server API.
     pub propolis_addr: SocketAddr,
+
+    /// Metadata used to track instance statistics.
+    pub metadata: InstanceMetadata,
 }
 
 /// The body of a request to move a previously-ensured instance into a specific
diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs
index 09ffdf5dc40..494db588288 100644
--- a/sled-agent/src/sim/http_entrypoints.rs
+++ b/sled-agent/src/sim/http_entrypoints.rs
@@ -98,6 +98,7 @@ async fn instance_register(
             body_args.hardware,
             body_args.instance_runtime,
             body_args.vmm_runtime,
+            body_args.metadata,
         )
         .await?,
     ))
diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs
index 56cfaf57c86..26643e0426e 100644
--- a/sled-agent/src/sim/sled_agent.rs
+++ b/sled-agent/src/sim/sled_agent.rs
@@ -13,7 +13,7 @@ use super::storage::Storage;
 use crate::nexus::NexusClient;
 use crate::params::{
     DiskStateRequested, InstanceExternalIpBody, InstanceHardware,
-    InstanceMigrationSourceParams, InstancePutStateResponse,
+    InstanceMetadata, InstanceMigrationSourceParams, InstancePutStateResponse,
     InstanceStateRequested, InstanceUnregisterResponse, Inventory,
     OmicronZonesConfig, SledRole,
 };
@@ -240,6 +240,9 @@ impl SledAgent {
         hardware: InstanceHardware,
         instance_runtime: InstanceRuntimeState,
         vmm_runtime: VmmRuntimeState,
+        // This is currently unused, but will be included as part of work
+        // tracked in https://github.com/oxidecomputer/omicron/issues/4851.
+        _metadata: InstanceMetadata,
     ) -> Result<SledInstanceState, Error> {
         // respond with a fake 500 level failure if asked to ensure an instance
         // with more than 16 CPUs.
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index eaf354db26c..bffe3799be1 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -17,7 +17,7 @@ use crate::metrics::MetricsManager;
 use crate::nexus::{ConvertInto, NexusClientWithResolver, NexusRequestQueue};
 use crate::params::{
     DiskStateRequested, InstanceExternalIpBody, InstanceHardware,
-    InstanceMigrationSourceParams, InstancePutStateResponse,
+    InstanceMetadata, InstanceMigrationSourceParams, InstancePutStateResponse,
     InstanceStateRequested, InstanceUnregisterResponse, Inventory,
     OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRule,
     ZoneBundleMetadata, Zpool,
@@ -394,6 +394,55 @@ impl SledAgent {
         let underlay_nics = underlay::find_nics(&config.data_links)?;
         illumos_utils::opte::initialize_xde_driver(&log, &underlay_nics)?;
 
+        // Start collecting metric data.
+        //
+        // First, we're creating a shareable type for managing the metrics
+        // themselves early on, so that we can pass it to other components of
+        // the sled agent that need it.
+        //
+        // Then we'll start tracking physical links and register as a producer
+        // with Nexus in the background.
+        let metrics_manager = MetricsManager::new(
+            request.body.id,
+            request.body.rack_id,
+            long_running_task_handles.hardware_manager.baseboard(),
+            log.new(o!("component" => "MetricsManager")),
+        )?;
+
+        // Start tracking the underlay physical links.
+        for nic in underlay::find_nics(&config.data_links)? {
+            let link_name = nic.interface();
+            if let Err(e) = metrics_manager
+                .track_physical_link(
+                    link_name,
+                    crate::metrics::LINK_SAMPLE_INTERVAL,
+                )
+                .await
+            {
+                error!(
+                    log,
+                    "failed to start tracking physical link metrics";
+                    "link_name" => link_name,
+                    "error" => ?e,
+                );
+            }
+        }
+
+        // Spawn a task in the background to register our metric producer with
+        // Nexus. This should not block progress here.
+        let endpoint = ProducerEndpoint {
+            id: request.body.id,
+            kind: ProducerKind::SledAgent,
+            address: sled_address.into(),
+            base_route: String::from("/metrics/collect"),
+            interval: crate::metrics::METRIC_COLLECTION_INTERVAL,
+        };
+        tokio::task::spawn(register_metric_producer_with_nexus(
+            log.clone(),
+            nexus_client.clone(),
+            endpoint,
+        ));
+
         // Create the PortManager to manage all the OPTE ports on the sled.
         let port_manager = PortManager::new(
             parent_log.new(o!("component" => "PortManager")),
@@ -514,47 +563,6 @@ impl SledAgent {
             rack_network_config.clone(),
         )?;
 
-        let mut metrics_manager = MetricsManager::new(
-            request.body.id,
-            request.body.rack_id,
-            long_running_task_handles.hardware_manager.baseboard(),
-            log.new(o!("component" => "MetricsManager")),
-        )?;
-
-        // Start tracking the underlay physical links.
-        for nic in underlay::find_nics(&config.data_links)? {
-            let link_name = nic.interface();
-            if let Err(e) = metrics_manager
-                .track_physical_link(
-                    link_name,
-                    crate::metrics::LINK_SAMPLE_INTERVAL,
-                )
-                .await
-            {
-                error!(
-                    log,
-                    "failed to start tracking physical link metrics";
-                    "link_name" => link_name,
-                    "error" => ?e,
-                );
-            }
-        }
-
-        // Spawn a task in the background to register our metric producer with
-        // Nexus. This should not block progress here.
-        let endpoint = ProducerEndpoint {
-            id: request.body.id,
-            kind: ProducerKind::SledAgent,
-            address: sled_address.into(),
-            base_route: String::from("/metrics/collect"),
-            interval: crate::metrics::METRIC_COLLECTION_INTERVAL,
-        };
-        tokio::task::spawn(register_metric_producer_with_nexus(
-            log.clone(),
-            nexus_client.clone(),
-            endpoint,
-        ));
-
         let sled_agent = SledAgent {
             inner: Arc::new(SledAgentInner {
                 id: request.body.id,
@@ -910,6 +918,7 @@ impl SledAgent {
     /// Idempotently ensures that a given instance is registered with this sled,
     /// i.e., that it can be addressed by future calls to
     /// [`Self::instance_ensure_state`].
+    #[allow(clippy::too_many_arguments)]
     pub async fn instance_ensure_registered(
         &self,
         instance_id: Uuid,
@@ -918,6 +927,7 @@ impl SledAgent {
         instance_runtime: InstanceRuntimeState,
         vmm_runtime: VmmRuntimeState,
         propolis_addr: SocketAddr,
+        metadata: InstanceMetadata,
     ) -> Result<SledInstanceState, Error> {
         self.inner
             .instances
@@ -928,6 +938,7 @@ impl SledAgent {
                 instance_runtime,
                 vmm_runtime,
                 propolis_addr,
+                metadata,
             )
             .await
             .map_err(|e| Error::Instance(e))