Skip to content

Commit

Permalink
merge main into branch
Browse files Browse the repository at this point in the history
  • Loading branch information
sunshowers committed Feb 28, 2024
2 parents 7f6e6c1 + 0761ada commit 1795a0e
Show file tree
Hide file tree
Showing 9 changed files with 618 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/hakari.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
with:
toolchain: stable
- name: Install cargo-hakari
uses: taiki-e/install-action@4ce8785db2a8a56c9ede16f705c2c49c5c61669c # v2
uses: taiki-e/install-action@1d776b18af134fca43744080130085d256938196 # v2
with:
tool: cargo-hakari
- name: Check workspace-hack Cargo.toml is up-to-date
Expand Down
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ dns-server = { path = "dns-server" }
dns-service-client = { path = "clients/dns-service-client" }
dpd-client = { path = "clients/dpd-client" }
dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] }
dyn-clone = "1.0.16"
dyn-clone = "1.0.17"
either = "1.10.0"
expectorate = "1.1.0"
fatfs = "0.3.6"
Expand Down
1 change: 1 addition & 0 deletions nexus/blueprint-execution/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ omicron-rpaths.workspace = true
anyhow.workspace = true
dns-service-client.workspace = true
futures.workspace = true
illumos-utils.workspace = true
internal-dns.workspace = true
nexus-db-model.workspace = true
nexus-db-queries.workspace = true
Expand Down
315 changes: 315 additions & 0 deletions nexus/blueprint-execution/src/datasets.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Ensures dataset records required by a given blueprint
use anyhow::Context;
use illumos_utils::zpool::ZpoolName;
use nexus_db_model::Dataset;
use nexus_db_model::DatasetKind;
use nexus_db_queries::context::OpContext;
use nexus_db_queries::db::DataStore;
use nexus_types::deployment::OmicronZoneConfig;
use nexus_types::deployment::OmicronZoneType;
use nexus_types::identity::Asset;
use slog::info;
use slog::warn;
use slog_error_chain::InlineErrorChain;
use std::collections::BTreeSet;
use std::net::SocketAddrV6;

/// For each crucible zone in `blueprint`, ensure that a corresponding dataset
/// record exists in `datastore`
///
/// Does not modify any existing dataset records. Returns the number of datasets
/// inserted.
pub(crate) async fn ensure_crucible_dataset_records_exist(
opctx: &OpContext,
datastore: &DataStore,
all_omicron_zones: impl Iterator<Item = &OmicronZoneConfig>,
) -> anyhow::Result<usize> {
// Before attempting to insert any datasets, first query for any existing
// dataset records so we can filter them out. This looks like a typical
// TOCTOU issue, but it is purely a performance optimization. We expect
// almost all executions of this function to do nothing: new crucible
// datasets are created very rarely relative to how frequently blueprint
// realization happens. We could remove this check and filter and instead
// run the below "insert if not exists" query on every crucible zone, and
// the behavior would still be correct. However, that would issue far more
// queries than necessary in the very common case of "we don't need to do
// anything at all".
let mut crucible_datasets = datastore
.dataset_list_all_batched(opctx, Some(DatasetKind::Crucible))
.await
.context("failed to list all datasets")?
.into_iter()
.map(|dataset| dataset.id())
.collect::<BTreeSet<_>>();

let mut num_inserted = 0;
let mut num_already_exist = 0;

for zone in all_omicron_zones {
let OmicronZoneType::Crucible { address, dataset } = &zone.zone_type
else {
continue;
};

let id = zone.id;

// If already present in the datastore, move on.
if crucible_datasets.remove(&id) {
num_already_exist += 1;
continue;
}

// Map progenitor client strings into the types we need. We never
// expect these to fail.
let addr: SocketAddrV6 = match address.parse() {
Ok(addr) => addr,
Err(err) => {
warn!(
opctx.log, "failed to parse crucible zone address";
"address" => address,
"err" => InlineErrorChain::new(&err),
);
continue;
}
};
let zpool_name: ZpoolName = match dataset.pool_name.parse() {
Ok(name) => name,
Err(err) => {
warn!(
opctx.log, "failed to parse crucible zone pool name";
"pool_name" => &*dataset.pool_name,
"err" => err,
);
continue;
}
};

let pool_id = zpool_name.id();
let dataset = Dataset::new(id, pool_id, addr, DatasetKind::Crucible);
let maybe_inserted = datastore
.dataset_insert_if_not_exists(dataset)
.await
.with_context(|| {
format!("failed to insert dataset record for dataset {id}")
})?;

// If we succeeded in inserting, log it; if `maybe_dataset` is `None`,
// we must have lost the TOCTOU race described above, and another Nexus
// must have inserted this dataset before we could.
if maybe_inserted.is_some() {
info!(
opctx.log,
"inserted new dataset for crucible zone";
"id" => %id,
);
num_inserted += 1;
} else {
num_already_exist += 1;
}
}

// We don't currently support removing datasets, so this would be
// surprising: the database contains dataset records that are no longer in
// our blueprint. We can't do anything about this, so just warn.
if !crucible_datasets.is_empty() {
warn!(
opctx.log,
"database contains {} unexpected crucible datasets",
crucible_datasets.len();
"dataset_ids" => ?crucible_datasets,
);
}

info!(
opctx.log,
"ensured all crucible zones have dataset records";
"num_inserted" => num_inserted,
"num_already_existed" => num_already_exist,
);

Ok(num_inserted)
}

#[cfg(test)]
mod tests {
use super::*;
use nexus_db_model::SledBaseboard;
use nexus_db_model::SledSystemHardware;
use nexus_db_model::SledUpdate;
use nexus_db_model::Zpool;
use nexus_test_utils_macros::nexus_test;
use sled_agent_client::types::OmicronZoneDataset;
use uuid::Uuid;

type ControlPlaneTestContext =
nexus_test_utils::ControlPlaneTestContext<omicron_nexus::Server>;

#[nexus_test]
async fn test_ensure_crucible_dataset_records_exist(
cptestctx: &ControlPlaneTestContext,
) {
// Set up.
let nexus = &cptestctx.server.apictx().nexus;
let datastore = nexus.datastore();
let opctx = OpContext::for_tests(
cptestctx.logctx.log.clone(),
datastore.clone(),
);
let opctx = &opctx;

// Use the standard representative inventory collection.
let representative = nexus_inventory::examples::representative();
let collection = representative.builder.build();

// Record the sleds and zpools contained in this collection.
let rack_id = Uuid::new_v4();
for (&sled_id, config) in &collection.omicron_zones {
let sled = SledUpdate::new(
sled_id,
"[::1]:0".parse().unwrap(),
SledBaseboard {
serial_number: format!("test-{sled_id}"),
part_number: "test-sled".to_string(),
revision: 0,
},
SledSystemHardware {
is_scrimlet: false,
usable_hardware_threads: 128,
usable_physical_ram: (64 << 30).try_into().unwrap(),
reservoir_size: (16 << 30).try_into().unwrap(),
},
rack_id,
);
datastore
.sled_upsert(sled)
.await
.expect("failed to upsert sled")
.unwrap();

for zone in &config.zones.zones {
let OmicronZoneType::Crucible { dataset, .. } = &zone.zone_type
else {
continue;
};
let zpool_name: ZpoolName =
dataset.pool_name.parse().expect("invalid zpool name");
let zpool = Zpool::new(
zpool_name.id(),
sled_id,
Uuid::new_v4(), // physical_disk_id
(1 << 30).try_into().unwrap(), // total_size
);
datastore
.zpool_upsert(zpool)
.await
.expect("failed to upsert zpool");
}
}

// How many crucible zones are there?
let ncrucible_zones = collection
.all_omicron_zones()
.filter(|z| matches!(z.zone_type, OmicronZoneType::Crucible { .. }))
.count();

// Prior to ensuring datasets exist, there should be none.
assert_eq!(
datastore
.dataset_list_all_batched(opctx, Some(DatasetKind::Crucible))
.await
.unwrap()
.len(),
0
);
let ndatasets_inserted = ensure_crucible_dataset_records_exist(
opctx,
datastore,
collection.all_omicron_zones(),
)
.await
.expect("failed to ensure crucible datasets");

// We should have inserted a dataset for each crucible zone.
assert_eq!(ncrucible_zones, ndatasets_inserted);
assert_eq!(
datastore
.dataset_list_all_batched(opctx, Some(DatasetKind::Crucible))
.await
.unwrap()
.len(),
ncrucible_zones,
);

// Ensuring the same crucible datasets again should insert no new
// records.
let ndatasets_inserted = ensure_crucible_dataset_records_exist(
opctx,
datastore,
collection.all_omicron_zones(),
)
.await
.expect("failed to ensure crucible datasets");
assert_eq!(0, ndatasets_inserted);
assert_eq!(
datastore
.dataset_list_all_batched(opctx, Some(DatasetKind::Crucible))
.await
.unwrap()
.len(),
ncrucible_zones,
);

// Create another zpool on one of the sleds, so we can add a new
// crucible zone that uses it.
let new_zpool_id = Uuid::new_v4();
for &sled_id in collection.omicron_zones.keys().take(1) {
let zpool = Zpool::new(
new_zpool_id,
sled_id,
Uuid::new_v4(), // physical_disk_id
(1 << 30).try_into().unwrap(), // total_size
);
datastore
.zpool_upsert(zpool)
.await
.expect("failed to upsert zpool");
}

// Call `ensure_crucible_dataset_records_exist` again, adding a new
// crucible zone. It should insert only this new zone.
let new_zone = OmicronZoneConfig {
id: Uuid::new_v4(),
underlay_address: "::1".parse().unwrap(),
zone_type: OmicronZoneType::Crucible {
address: "[::1]:0".to_string(),
dataset: OmicronZoneDataset {
pool_name: ZpoolName::new_external(new_zpool_id)
.to_string()
.parse()
.unwrap(),
},
},
};
let ndatasets_inserted = ensure_crucible_dataset_records_exist(
opctx,
datastore,
collection.all_omicron_zones().chain(std::iter::once(&new_zone)),
)
.await
.expect("failed to ensure crucible datasets");
assert_eq!(ndatasets_inserted, 1);
assert_eq!(
datastore
.dataset_list_all_batched(opctx, Some(DatasetKind::Crucible))
.await
.unwrap()
.len(),
ncrucible_zones + 1,
);
}
}
13 changes: 12 additions & 1 deletion nexus/blueprint-execution/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::collections::BTreeMap;
use std::net::SocketAddrV6;
use uuid::Uuid;

mod datasets;
mod dns;
mod omicron_zones;
mod resource_allocation;
Expand Down Expand Up @@ -89,6 +90,14 @@ where
omicron_zones::deploy_zones(&opctx, &sleds_by_id, &blueprint.omicron_zones)
.await?;

datasets::ensure_crucible_dataset_records_exist(
&opctx,
datastore,
blueprint.all_omicron_zones().map(|(_sled_id, zone)| zone),
)
.await
.map_err(|err| vec![err])?;

dns::deploy_dns(
&opctx,
datastore,
Expand All @@ -97,5 +106,7 @@ where
&sleds_by_id,
)
.await
.map_err(|e| vec![anyhow!("{}", InlineErrorChain::new(&e))])
.map_err(|e| vec![anyhow!("{}", InlineErrorChain::new(&e))])?;

Ok(())
}
Loading

0 comments on commit 1795a0e

Please sign in to comment.