Skip to content

Commit

Permalink
Record Crucible region ports in CRDB (#5955)
Browse files Browse the repository at this point in the history
It was a mistake not to record the port that an Agent assigns a Region
when it's created, and this PR rectifies that: ports are recorded when
regions are ensured, and a background task will scan for regions that
are missing a port, and attempt to contact the appropriate Agent in
order to fill that in.

Fixes #5846

---------

Co-authored-by: Sean Klein <[email protected]>
  • Loading branch information
jmpesp and smklein authored Jul 4, 2024
1 parent 30b6713 commit bb083ae
Show file tree
Hide file tree
Showing 33 changed files with 574 additions and 148 deletions.
31 changes: 31 additions & 0 deletions dev-tools/omdb/src/bin/omdb/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,8 @@ enum DbCommands {
Inventory(InventoryArgs),
/// Save the current Reconfigurator inputs to a file
ReconfiguratorSave(ReconfiguratorSaveArgs),
/// Print information about regions
Region(RegionArgs),
/// Query for information about region replacements, optionally manually
/// triggering one.
RegionReplacement(RegionReplacementArgs),
Expand Down Expand Up @@ -456,6 +458,18 @@ struct SledsArgs {
filter: Option<SledFilter>,
}

#[derive(Debug, Args)]
struct RegionArgs {
#[command(subcommand)]
command: RegionCommands,
}

#[derive(Debug, Subcommand)]
enum RegionCommands {
/// List regions that are still missing ports
ListRegionsMissingPorts,
}

#[derive(Debug, Args)]
struct RegionReplacementArgs {
#[command(subcommand)]
Expand Down Expand Up @@ -605,6 +619,9 @@ impl DbArgs {
)
.await
}
DbCommands::Region(RegionArgs {
command: RegionCommands::ListRegionsMissingPorts,
}) => cmd_db_region_missing_porst(&opctx, &datastore).await,
DbCommands::RegionReplacement(RegionReplacementArgs {
command: RegionReplacementCommands::List(args),
}) => {
Expand Down Expand Up @@ -1523,6 +1540,20 @@ async fn cmd_db_snapshot_info(
Ok(())
}

/// List all regions still missing ports
async fn cmd_db_region_missing_porst(
opctx: &OpContext,
datastore: &DataStore,
) -> Result<(), anyhow::Error> {
let regions: Vec<Region> = datastore.regions_missing_ports(opctx).await?;

for region in regions {
println!("{:?}", region.id());
}

Ok(())
}

/// List all region replacement requests
async fn cmd_db_region_replacement_list(
datastore: &DataStore,
Expand Down
21 changes: 21 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ use nexus_client::types::SledSelector;
use nexus_client::types::UninitializedSledId;
use nexus_db_queries::db::lookup::LookupPath;
use nexus_types::deployment::Blueprint;
use nexus_types::internal_api::background::LookupRegionPortStatus;
use nexus_types::internal_api::background::RegionReplacementDriverStatus;
use nexus_types::inventory::BaseboardId;
use omicron_uuid_kinds::CollectionUuid;
Expand Down Expand Up @@ -1082,6 +1083,26 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
}
}
};
} else if name == "lookup_region_port" {
match serde_json::from_value::<LookupRegionPortStatus>(details.clone())
{
Err(error) => eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
),

Ok(LookupRegionPortStatus { found_port_ok, errors }) => {
println!(" total filled in ports: {}", found_port_ok.len());
for line in &found_port_ok {
println!(" > {line}");
}

println!(" errors: {}", errors.len());
for line in &errors {
println!(" > {line}");
}
}
};
} else {
println!(
"warning: unknown background task: {:?} \
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ task: "inventory_collection"
collects hardware and software inventory data from the whole system


task: "lookup_region_port"
fill in missing ports for region records


task: "metrics_producer_gc"
unregisters Oximeter metrics producers that have not renewed their lease

Expand Down Expand Up @@ -221,6 +225,10 @@ task: "inventory_collection"
collects hardware and software inventory data from the whole system


task: "lookup_region_port"
fill in missing ports for region records


task: "metrics_producer_gc"
unregisters Oximeter metrics producers that have not renewed their lease

Expand Down Expand Up @@ -340,6 +348,10 @@ task: "inventory_collection"
collects hardware and software inventory data from the whole system


task: "lookup_region_port"
fill in missing ports for region records


task: "metrics_producer_gc"
unregisters Oximeter metrics producers that have not renewed their lease

Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,10 @@ task: "inventory_collection"
collects hardware and software inventory data from the whole system


task: "lookup_region_port"
fill in missing ports for region records


task: "metrics_producer_gc"
unregisters Oximeter metrics producers that have not renewed their lease

Expand Down Expand Up @@ -483,6 +487,14 @@ task: "inventory_collection"
last collection started: <REDACTED_TIMESTAMP>
last collection done: <REDACTED_TIMESTAMP>

task: "lookup_region_port"
configured period: every 1m
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
total filled in ports: 0
errors: 0

task: "metrics_producer_gc"
configured period: every 1m
currently executing: no
Expand Down
2 changes: 2 additions & 0 deletions dev-tools/omdb/tests/usage_errors.out
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ Commands:
dns Print information about internal and external DNS
inventory Print information about collected hardware/software inventory
reconfigurator-save Save the current Reconfigurator inputs to a file
region Print information about regions
region-replacement Query for information about region replacements, optionally manually
triggering one
sleds Print information about sleds
Expand Down Expand Up @@ -147,6 +148,7 @@ Commands:
dns Print information about internal and external DNS
inventory Print information about collected hardware/software inventory
reconfigurator-save Save the current Reconfigurator inputs to a file
region Print information about regions
region-replacement Query for information about region replacements, optionally manually
triggering one
sleds Print information about sleds
Expand Down
17 changes: 16 additions & 1 deletion nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,8 @@ pub struct BackgroundTaskConfig {
pub v2p_mapping_propagation: V2PMappingPropagationConfig,
/// configuration for abandoned VMM reaper task
pub abandoned_vmm_reaper: AbandonedVmmReaperConfig,
/// configuration for lookup region port task
pub lookup_region_port: LookupRegionPortConfig,
}

#[serde_as]
Expand Down Expand Up @@ -574,6 +576,14 @@ pub struct RegionReplacementDriverConfig {
pub period_secs: Duration,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct LookupRegionPortConfig {
/// period (in seconds) for periodic activations of this background task
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
}

/// Configuration for a nexus server
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub struct PackageConfig {
Expand Down Expand Up @@ -816,6 +826,7 @@ mod test {
service_firewall_propagation.period_secs = 300
v2p_mapping_propagation.period_secs = 30
abandoned_vmm_reaper.period_secs = 60
lookup_region_port.period_secs = 60
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -962,7 +973,10 @@ mod test {
},
abandoned_vmm_reaper: AbandonedVmmReaperConfig {
period_secs: Duration::from_secs(60),
}
},
lookup_region_port: LookupRegionPortConfig {
period_secs: Duration::from_secs(60),
},
},
default_region_allocation_strategy:
crate::nexus_config::RegionAllocationStrategy::Random {
Expand Down Expand Up @@ -1035,6 +1049,7 @@ mod test {
service_firewall_propagation.period_secs = 300
v2p_mapping_propagation.period_secs = 30
abandoned_vmm_reaper.period_secs = 60
lookup_region_port.period_secs = 60
[default_region_allocation_strategy]
type = "random"
"##,
Expand Down
10 changes: 10 additions & 0 deletions nexus/db-model/src/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use super::ByteCount;
use crate::schema::region;
use crate::SqlU16;
use db_macros::Asset;
use omicron_common::api::external;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -38,6 +39,10 @@ pub struct Region {
// never expect them to be negative.
blocks_per_extent: i64,
extent_count: i64,

// The port that was returned when the region was created. This field didn't
// originally exist, so records may not have it filled in.
port: Option<SqlU16>,
}

impl Region {
Expand All @@ -47,6 +52,7 @@ impl Region {
block_size: ByteCount,
blocks_per_extent: u64,
extent_count: u64,
port: u16,
) -> Self {
Self {
identity: RegionIdentity::new(Uuid::new_v4()),
Expand All @@ -55,6 +61,7 @@ impl Region {
block_size,
blocks_per_extent: blocks_per_extent as i64,
extent_count: extent_count as i64,
port: Some(port.into()),
}
}

Expand All @@ -81,4 +88,7 @@ impl Region {
// external, customer-supplied keys is a non-requirement.
true
}
pub fn port(&self) -> Option<u16> {
self.port.map(|port| port.into())
}
}
2 changes: 2 additions & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,8 @@ table! {
block_size -> Int8,
blocks_per_extent -> Int8,
extent_count -> Int8,

port -> Nullable<Int4>,
}
}

Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::collections::BTreeMap;
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(81, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(82, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(82, "region-port"),
KnownVersion::new(81, "add-nullable-filesystem-pool"),
KnownVersion::new(80, "add-instance-id-to-migrations"),
KnownVersion::new(79, "nic-spoof-allow"),
Expand Down
Loading

0 comments on commit bb083ae

Please sign in to comment.