Skip to content

Commit

Permalink
Publish instance vCPU usage statistics to oximeter
Browse files Browse the repository at this point in the history
- Adds the silo and project IDs to the instance-ensure request from
  Nexus to the sled-agent. These are used as fields on the
  instance-related statistics.
- Defines a `VirtualMachine` oximeter target and `VcpuUsage` metric. The
  latter has a `state` field which corresponds to the named kstats
  published by the hypervisor that accumulate the time spent in a number
  of vCPU microstates. The combination of these should allow us to
  aggregate or break down vCPU usage by silo, project, instance, vCPU
  ID, and CPU state.
- Adds APIs to the `MetricsManager` for starting / stopping tracking
  instance-related metrics, and plumbs the type through the
  `InstanceManager` and `Instance` (and their internal friends), so that
  new instances can control when data is produced from them. Currently,
  we'll start producing as soon as we get a non-terminate response from
  Propolis in the `instance_state_monitor()` task, and stop when the
  instance is terminated.
  • Loading branch information
bnaecker committed Jan 21, 2024
1 parent 1ae97e4 commit a900cdd
Show file tree
Hide file tree
Showing 12 changed files with 482 additions and 93 deletions.
16 changes: 16 additions & 0 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1135,6 +1135,21 @@ impl super::Nexus {
.map(|ssh_key| ssh_key.public_key)
.collect::<Vec<String>>();

// Construct instance metadata used to track its statistics.
//
// This current means fetching the current silo ID, since we have all
// the other metadata already.
let silo_id = self
.current_silo_lookup(opctx)?
.lookup_for(authz::Action::Read)
.await?
.0
.id();
let metadata = sled_agent_client::types::InstanceMetadata {
silo_id,
project_id: db_instance.project_id,
};

// Ask the sled agent to begin the state change. Then update the
// database to reflect the new intermediate state. If this update is
// not the newest one, that's fine. That might just mean the sled agent
Expand Down Expand Up @@ -1178,6 +1193,7 @@ impl super::Nexus {
PROPOLIS_PORT,
)
.to_string(),
metadata,
},
)
.await
Expand Down
27 changes: 27 additions & 0 deletions openapi/sled-agent.json
Original file line number Diff line number Diff line change
Expand Up @@ -4515,6 +4515,14 @@
}
]
},
"metadata": {
"description": "Metadata used to track instance statistics.",
"allOf": [
{
"$ref": "#/components/schemas/InstanceMetadata"
}
]
},
"propolis_addr": {
"description": "The address at which this VMM should serve a Propolis server API.",
"type": "string"
Expand All @@ -4536,6 +4544,7 @@
"required": [
"hardware",
"instance_runtime",
"metadata",
"propolis_addr",
"propolis_id",
"vmm_runtime"
Expand Down Expand Up @@ -4624,6 +4633,24 @@
"snapshot_id"
]
},
"InstanceMetadata": {
"description": "Metadata used to track statistics about an instance.",
"type": "object",
"properties": {
"project_id": {
"type": "string",
"format": "uuid"
},
"silo_id": {
"type": "string",
"format": "uuid"
}
},
"required": [
"project_id",
"silo_id"
]
},
"InstanceMigrationSourceParams": {
"description": "Instance runtime state to update for a migration.",
"type": "object",
Expand Down
1 change: 1 addition & 0 deletions oximeter/instruments/src/kstat/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ use std::time::Duration;

pub mod link;
mod sampler;
pub mod virtual_machine;

pub use sampler::CollectionDetails;
pub use sampler::ExpirationBehavior;
Expand Down
185 changes: 185 additions & 0 deletions oximeter/instruments/src/kstat/virtual_machine.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

// Copyright 2023 Oxide Computer Company

//! Types for tracking statistics about virtual machine instances.
use crate::kstat::hrtime_to_utc;
use crate::kstat::ConvertNamedData;
use crate::kstat::Error;
use crate::kstat::KstatList;
use crate::kstat::KstatTarget;
use chrono::DateTime;
use chrono::Utc;
use kstat_rs::Data;
use kstat_rs::Kstat;
use kstat_rs::Named;
use kstat_rs::NamedData;
use oximeter::types::Cumulative;
use oximeter::Metric;
use oximeter::Sample;
use oximeter::Target;
use uuid::Uuid;

/// A single virtual machine
#[derive(Clone, Debug, Target)]
pub struct VirtualMachine {
/// The silo to which the instance belongs.
pub silo_id: Uuid,
/// The project to which the instance belongs.
pub project_id: Uuid,
/// The ID of the instance.
pub instance_id: Uuid,
}

/// Metric tracking vCPU usage by state.
#[derive(Clone, Debug, Metric)]
pub struct VcpuUsage {
/// The vCPU ID.
pub vcpu_id: u32,
/// The state of the vCPU.
pub state: String,
/// The cumulative time spent in this state, in nanoseconds.
pub datum: Cumulative<u64>,
}

// The name of the kstat module containing virtual machine kstats.
const VMM_KSTAT_MODULE_NAME: &str = "vmm";

// The name of the kstat with virtual machine metadata (VM name currently).
const VM_KSTAT_NAME: &str = "vm";

// The named kstat holding the virtual machine's name. This is currently the
// UUID assigned by the control plane to the virtual machine instance.
const VM_NAME_KSTAT: &str = "vm_name";

// The name of kstat containing vCPU usage data.
const VCPU_KSTAT_PREFIX: &str = "vcpu";

// Prefix for all named data with a valid vCPU microstate that we track.
const VCPU_MICROSTATE_PREFIX: &str = "time_";

// The number of expected vCPU microstates we track. This isn't load-bearing,
// and only used to help preallocate an array holding the `VcpuUsage` samples.
const N_VCPU_MICROSTATES: usize = 6;

impl KstatTarget for VirtualMachine {
// The VMM kstats are organized like so:
//
// - module: vmm
// - instance: a kernel-assigned integer
// - name: vm -> generic VM info, vcpuX -> info for each vCPU
//
// At this part of the code, we don't have that kstat instance, only the
// virtual machine instance's control plane UUID. However, the VM's "name"
// is assigned to be that control plane UUID in the hypervisor. See
// https://github.com/oxidecomputer/propolis/blob/759bf4a19990404c135e608afbe0d38b70bfa370/bin/propolis-server/src/lib/vm/mod.rs#L420
// for the current code which does that.
//
// So we need to indicate interest in any VMM-related kstat here, and we are
// forced to filter to the right instance by looking up the VM name inside
// the `to_samples()` method below.
fn interested(&self, kstat: &Kstat<'_>) -> bool {
kstat.ks_module == VMM_KSTAT_MODULE_NAME
}

fn to_samples(
&self,
kstats: KstatList<'_, '_>,
) -> Result<Vec<Sample>, Error> {
// First, we need to map the instance's control plane UUID to the
// instance ID. We'll find this through the `vmm:<instance>:vm:vm_name`
// kstat, which lists the instance's UUID as its name.
let instance_id = self.instance_id.to_string();
let instance = kstats
.iter()
.find_map(|(_, kstat, data)| {
kstat_instance_from_instance_id(kstat, data, &instance_id)
})
.ok_or_else(|| Error::NoSuchKstat)?;

// Armed with the kstat instance, find all relevant metrics related to
// this particular VM. For now, we produce only vCPU usage metrics, but
// others may be chained in the future.
let vcpu_stats = kstats.iter().filter(|(_, kstat, _)| {
kstat.ks_instance == instance
&& kstat.ks_name.starts_with(VCPU_KSTAT_PREFIX)
});
produce_vcpu_usage(self, vcpu_stats)
}
}

// Given a kstat and an instance's ID, return the kstat instance if it matches.
pub fn kstat_instance_from_instance_id(
kstat: &Kstat<'_>,
data: &Data<'_>,
instance_id: &str,
) -> Option<i32> {
if kstat.ks_module != VMM_KSTAT_MODULE_NAME {
return None;
}
if kstat.ks_name != VM_KSTAT_NAME {
return None;
}
let Data::Named(named) = data else {
return None;
};
if named.iter().any(|nd| {
if nd.name != VM_NAME_KSTAT {
return false;
}
let NamedData::String(name) = &nd.value else {
return false;
};
instance_id == *name
}) {
return Some(kstat.ks_instance);
}
None
}

// Produce `Sample`s for the `VcpuUsage` metric from the relevant kstats.
pub fn produce_vcpu_usage<'a>(
vm: &'a VirtualMachine,
vcpu_stats: impl Iterator<Item = &'a (DateTime<Utc>, Kstat<'a>, Data<'a>)> + 'a,
) -> Result<Vec<Sample>, Error> {
let mut out = Vec::with_capacity(N_VCPU_MICROSTATES);
for (creation_time, kstat, data) in vcpu_stats {
let Data::Named(named) = data else {
return Err(Error::ExpectedNamedKstat);
};
let snapshot_time = hrtime_to_utc(kstat.ks_snaptime)?;

// Find the vCPU ID, from the relevant named data item.
let vcpu_id = named
.iter()
.find_map(|named| {
if named.name == VCPU_KSTAT_PREFIX {
named.value.as_u32().ok()
} else {
None
}
})
.ok_or_else(|| Error::NoSuchKstat)?;

// We'll track all statistics starting with `time_` as the microstate.
for Named { name, value } in named
.iter()
.filter(|nv| nv.name.starts_with(VCPU_MICROSTATE_PREFIX))
{
// Safety: We're filtering in the loop on this prefix, so it must
// exist.
let state =
name.strip_prefix(VCPU_MICROSTATE_PREFIX).unwrap().to_string();
let datum =
Cumulative::with_start_time(*creation_time, value.as_u64()?);
let metric = VcpuUsage { vcpu_id, state, datum };
let sample =
Sample::new_with_timestamp(snapshot_time, vm, &metric)?;
out.push(sample);
}
}
Ok(out)
}
1 change: 1 addition & 0 deletions sled-agent/src/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ async fn instance_register(
body_args.instance_runtime,
body_args.vmm_runtime,
body_args.propolis_addr,
body_args.metadata,
)
.await?,
))
Expand Down
46 changes: 44 additions & 2 deletions sled-agent/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@ use crate::common::instance::{
PublishedVmmState,
};
use crate::instance_manager::{InstanceManagerServices, InstanceTicket};
use crate::metrics::Error as MetricsError;
use crate::metrics::MetricsManager;
use crate::metrics::INSTANCE_SAMPLE_INTERVAL;
use crate::nexus::NexusClientWithResolver;
use crate::params::ZoneBundleCause;
use crate::params::ZoneBundleMetadata;
use crate::params::{
InstanceHardware, InstanceMigrationSourceParams,
InstanceHardware, InstanceMetadata, InstanceMigrationSourceParams,
InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule,
};
use crate::profile::*;
Expand Down Expand Up @@ -108,6 +111,9 @@ pub enum Error {

#[error("I/O error")]
Io(#[from] std::io::Error),

#[error("Failed to track instance metrics")]
Metrics(#[source] MetricsError),
}

// Issues read-only, idempotent HTTP requests at propolis until it responds with
Expand Down Expand Up @@ -233,8 +239,14 @@ struct InstanceInner {
// Object used to collect zone bundles from this instance when terminated.
zone_bundler: ZoneBundler,

// Object used to start / stop collection of instance-related metrics.
metrics_manager: MetricsManager,

// Object representing membership in the "instance manager".
instance_ticket: InstanceTicket,

// Metadata used to track statistics for this instance.
metadata: InstanceMetadata,
}

impl InstanceInner {
Expand Down Expand Up @@ -367,6 +379,10 @@ impl InstanceInner {
// state to Nexus. This ensures that the instance is actually gone from
// the sled when Nexus receives the state update saying it's actually
// destroyed.
//
// In addition, we'll start or stop collecting metrics soley on the
// basis of whether the instance is terminated. All other states imply
// we start (or continue) to collect instance metrics.
match action {
Some(InstanceAction::Destroy) => {
info!(self.log, "terminating VMM that has exited";
Expand All @@ -375,7 +391,17 @@ impl InstanceInner {
self.terminate().await?;
Ok(Reaction::Terminate)
}
None => Ok(Reaction::Continue),
None => {
self.metrics_manager
.track_instance(
&self.id(),
&self.metadata,
INSTANCE_SAMPLE_INTERVAL,
)
.await
.map_err(Error::Metrics)?;
Ok(Reaction::Continue)
}
}
}

Expand Down Expand Up @@ -537,6 +563,18 @@ impl InstanceInner {
);
}

// Stop tracking instance-related metrics.
if let Err(e) =
self.metrics_manager.stop_tracking_instance(self.id()).await
{
error!(
self.log,
"Failed to stop tracking instance metrics";
"instance_id" => %self.id(),
"error" => ?e,
);
}

// Ensure that no zone exists. This succeeds even if no zone was ever
// created.
// NOTE: we call`Zones::halt_and_remove_logged` directly instead of
Expand Down Expand Up @@ -596,6 +634,7 @@ impl Instance {
ticket: InstanceTicket,
state: InstanceInitialState,
services: InstanceManagerServices,
metadata: InstanceMetadata,
) -> Result<Self, Error> {
info!(log, "initializing new Instance";
"instance_id" => %id,
Expand All @@ -615,6 +654,7 @@ impl Instance {
port_manager,
storage,
zone_bundler,
metrics_manager,
zone_builder_factory,
} = services;

Expand Down Expand Up @@ -686,7 +726,9 @@ impl Instance {
storage,
zone_builder_factory,
zone_bundler,
metrics_manager,
instance_ticket: ticket,
metadata,
};

let inner = Arc::new(Mutex::new(instance));
Expand Down
Loading

0 comments on commit a900cdd

Please sign in to comment.