Skip to content

Commit

Permalink
Groundwork for oximeter instance vCPU metrics
Browse files Browse the repository at this point in the history
- Adds the silo and project IDs to the instance-ensure request from
  Nexus to the sled-agent. These are used as fields on the
  instance-related statistics. This metadata is currently unused, but
  will be forwarded to Propolis in the instance-ensure request once the
  server is updated to accept it.
- Defines a `VirtualMachine` oximeter target and `VcpuUsage` metric. The
  latter has a `state` field which corresponds to the named kstats
  published by the hypervisor that accumulate the time spent in a number
  of vCPU microstates. The combination of these should allow us to
  aggregate or break down vCPU usage by silo, project, instance, vCPU
  ID, and CPU state. Adds some simple mocks and tests for these.
- Adds more fine-grained feature flags to the `oximeter-instruments`
  crate.
  • Loading branch information
bnaecker committed Jan 25, 2024
1 parent 4fef599 commit d24514b
Show file tree
Hide file tree
Showing 17 changed files with 704 additions and 111 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1146,6 +1146,21 @@ impl super::Nexus {
.map(|ssh_key| ssh_key.public_key)
.collect::<Vec<String>>();

// Construct instance metadata used to track its statistics.
//
// This current means fetching the current silo ID, since we have all
// the other metadata already.
let silo_id = self
.current_silo_lookup(opctx)?
.lookup_for(authz::Action::Read)
.await?
.0
.id();
let metadata = sled_agent_client::types::InstanceMetadata {
silo_id,
project_id: db_instance.project_id,
};

// Ask the sled agent to begin the state change. Then update the
// database to reflect the new intermediate state. If this update is
// not the newest one, that's fine. That might just mean the sled agent
Expand Down Expand Up @@ -1189,6 +1204,7 @@ impl super::Nexus {
PROPOLIS_PORT,
)
.to_string(),
metadata,
},
)
.await
Expand Down
27 changes: 27 additions & 0 deletions openapi/sled-agent.json
Original file line number Diff line number Diff line change
Expand Up @@ -4589,6 +4589,14 @@
}
]
},
"metadata": {
"description": "Metadata used to track instance statistics.",
"allOf": [
{
"$ref": "#/components/schemas/InstanceMetadata"
}
]
},
"propolis_addr": {
"description": "The address at which this VMM should serve a Propolis server API.",
"type": "string"
Expand All @@ -4610,6 +4618,7 @@
"required": [
"hardware",
"instance_runtime",
"metadata",
"propolis_addr",
"propolis_id",
"vmm_runtime"
Expand Down Expand Up @@ -4741,6 +4750,24 @@
"snapshot_id"
]
},
"InstanceMetadata": {
"description": "Metadata used to track statistics about an instance.",
"type": "object",
"properties": {
"project_id": {
"type": "string",
"format": "uuid"
},
"silo_id": {
"type": "string",
"format": "uuid"
}
},
"required": [
"project_id",
"silo_id"
]
},
"InstanceMigrationSourceParams": {
"description": "Instance runtime state to update for a migration.",
"type": "object",
Expand Down
48 changes: 35 additions & 13 deletions oximeter/instruments/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,49 @@ edition = "2021"
license = "MPL-2.0"

[dependencies]
cfg-if.workspace = true
chrono.workspace = true
dropshot.workspace = true
futures.workspace = true
cfg-if = { workspace = true, optional = true }
chrono = { workspace = true, optional = true }
dropshot = { workspace = true, optional = true }
futures = { workspace = true, optional = true }
http = { workspace = true, optional = true }
oximeter.workspace = true
slog.workspace = true
tokio.workspace = true
thiserror.workspace = true
uuid.workspace = true
omicron-workspace-hack.workspace = true
oximeter = { workspace = true, optional = true }
slog = { workspace = true, optional = true }
tokio = { workspace = true, optional = true }
thiserror = { workspace = true, optional = true }
uuid = { workspace = true, optional = true }
omicron-workspace-hack = { workspace = true, optional = true }

[features]
default = ["http-instruments", "kstat"]
http-instruments = ["http"]
kstat = ["kstat-rs"]
default = ["http-instruments", "datalink", "virtual-machine"]
http-instruments = [
"dep:chrono",
"dep:dropshot",
"dep:futures",
"dep:http",
"dep:omicron-workspace-hack",
"dep:oximeter",
"dep:uuid"
]
kstat = [
"dep:cfg-if",
"dep:chrono",
"dep:futures",
"dep:kstat-rs",
"dep:omicron-workspace-hack",
"dep:oximeter",
"dep:slog",
"dep:tokio",
"dep:thiserror",
"dep:uuid"
]
datalink = ["kstat"]
virtual-machine = ["kstat"]

[dev-dependencies]
rand.workspace = true
slog-async.workspace = true
slog-term.workspace = true
oximeter.workspace = true

[target.'cfg(target_os = "illumos")'.dependencies]
kstat-rs = { workspace = true, optional = true }
3 changes: 3 additions & 0 deletions oximeter/instruments/src/kstat/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,11 @@ use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::time::Duration;

#[cfg(any(feature = "datalink", test))]
pub mod link;
mod sampler;
#[cfg(any(feature = "virtual-machine", test))]
pub mod virtual_machine;

pub use sampler::CollectionDetails;
pub use sampler::ExpirationBehavior;
Expand Down
Loading

0 comments on commit d24514b

Please sign in to comment.