Skip to content

Commit

Permalink
RandomnWithDistinctSleds region allocation strategy (#3858)
Browse files Browse the repository at this point in the history
PR #3650 introduced the Random region allocation strategy to allocate
regions randomly across the rack. This expands on that with the addition
of the RandomWithDistinctSleds region allocation strategy. This strategy
is the same, but requires the 3 crucible regions be allocated on 3
different sleds to improve resiliency against a whole-sled failure.

The Random strategy still exists, and does not require 3 distinct sleds.
This is useful in one-sled environments such as the integration tests,
and lab setups.

This also fixes a shortcoming of #3650 whereby multiple datasets on a
single zpool could be selected. That fix applies to both the old Random
strategy and the new RandomWithDistinctSleds strategy.

In the present, I have unit tests that verify the allocation behavior
works correctly with cockroachdb, and we can try it out on dogfood.

Adds the `-r` / `--rack-topology` command line argument to omicron-package target create. Use this to specify whether you are packaging for a single-sled or multi-sled environment. Under single-sled environments, the requirement for 3 distinct sleds is removed.

Fixes #3702

---------

Co-authored-by: iliana etaoin <[email protected]>
  • Loading branch information
faithanalog and iliana authored Oct 3, 2023
1 parent e86579c commit 6bc5e60
Show file tree
Hide file tree
Showing 24 changed files with 617 additions and 204 deletions.
1 change: 1 addition & 0 deletions .github/buildomat/jobs/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ cd /opt/oxide/work

ptime -m tar xvzf /input/package/work/package.tar.gz
cp /input/package/work/zones/* out/
mv out/omicron-nexus-single-sled.tar.gz out/omicron-nexus.tar.gz
mkdir tests
for p in /input/ci-tools/work/end-to-end-tests/*.gz; do
ptime -m gunzip < "$p" > "tests/$(basename "${p%.gz}")"
Expand Down
9 changes: 7 additions & 2 deletions .github/buildomat/jobs/package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ ptime -m ./tools/ci_download_softnpu_machinery

# Build the test target
ptime -m cargo run --locked --release --bin omicron-package -- \
-t test target create -i standard -m non-gimlet -s softnpu
-t test target create -i standard -m non-gimlet -s softnpu -r single-sled
ptime -m cargo run --locked --release --bin omicron-package -- \
-t test package

Expand Down Expand Up @@ -81,9 +81,13 @@ stamp_packages() {
done
}

# Keep the single-sled Nexus zone around for the deploy job. (The global zone
# build below overwrites the file.)
mv out/omicron-nexus.tar.gz out/omicron-nexus-single-sled.tar.gz

# Build necessary for the global zone
ptime -m cargo run --locked --release --bin omicron-package -- \
-t host target create -i standard -m gimlet -s asic
-t host target create -i standard -m gimlet -s asic -r multi-sled
ptime -m cargo run --locked --release --bin omicron-package -- \
-t host package
stamp_packages omicron-sled-agent maghemite propolis-server overlay
Expand Down Expand Up @@ -111,6 +115,7 @@ zones=(
out/external-dns.tar.gz
out/internal-dns.tar.gz
out/omicron-nexus.tar.gz
out/omicron-nexus-single-sled.tar.gz
out/oximeter-collector.tar.gz
out/propolis-server.tar.gz
out/switch-*.tar.gz
Expand Down
3 changes: 2 additions & 1 deletion .github/buildomat/jobs/tuf-repo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,11 @@ done
mkdir /work/package
pushd /work/package
tar xf /input/package/work/package.tar.gz out package-manifest.toml target/release/omicron-package
target/release/omicron-package -t default target create -i standard -m gimlet -s asic
target/release/omicron-package -t default target create -i standard -m gimlet -s asic -r multi-sled
ln -s /input/package/work/zones/* out/
rm out/switch-softnpu.tar.gz # not used when target switch=asic
rm out/omicron-gateway-softnpu.tar.gz # not used when target switch=asic
rm out/omicron-nexus-single-sled.tar.gz # only used for deploy tests
for zone in out/*.tar.gz; do
target/release/omicron-package stamp "$(basename "${zone%.tar.gz}")" "$VERSION"
done
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
- name: Install Pre-Requisites
run: ./tools/install_builder_prerequisites.sh -y
- name: Set default target
run: cargo run --bin omicron-package -- -t default target create
run: cargo run --bin omicron-package -- -t default target create -r single-sled
- name: Check build of deployed Omicron packages
run: cargo run --bin omicron-package -- -t default check

Expand Down
74 changes: 59 additions & 15 deletions common/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ pub struct PackageConfig {
pub dendrite: HashMap<SwitchLocation, DpdConfig>,
/// Background task configuration
pub background_tasks: BackgroundTaskConfig,
/// Default Crucible region allocation strategy
pub default_region_allocation_strategy: RegionAllocationStrategy,
}

#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
Expand Down Expand Up @@ -594,6 +596,9 @@ mod test {
dns_external.period_secs_propagation = 7
dns_external.max_concurrent_server_updates = 8
external_endpoints.period_secs = 9
[default_region_allocation_strategy]
type = "random"
seed = 0
"##,
)
.unwrap();
Expand Down Expand Up @@ -677,6 +682,10 @@ mod test {
period_secs: Duration::from_secs(9),
}
},
default_region_allocation_strategy:
crate::nexus_config::RegionAllocationStrategy::Random {
seed: Some(0)
}
},
}
);
Expand Down Expand Up @@ -724,6 +733,8 @@ mod test {
dns_external.period_secs_propagation = 7
dns_external.max_concurrent_server_updates = 8
external_endpoints.period_secs = 9
[default_region_allocation_strategy]
type = "random"
"##,
)
.unwrap();
Expand Down Expand Up @@ -864,25 +875,31 @@ mod test {
struct DummyConfig {
deployment: DeploymentConfig,
}
let config_path = "../smf/nexus/config-partial.toml";
println!(
"checking {:?} with example deployment section added",
config_path
);
let mut contents = std::fs::read_to_string(config_path)
.expect("failed to read Nexus SMF config file");
contents.push_str(
"\n\n\n \
# !! content below added by test_repo_configs_are_valid()\n\
\n\n\n",
);
let example_deployment = toml::to_string_pretty(&DummyConfig {
deployment: example_config.deployment,
})
.unwrap();
contents.push_str(&example_deployment);
let _: Config = toml::from_str(&contents)
.expect("Nexus SMF config file is not valid");

let nexus_config_paths = [
"../smf/nexus/single-sled/config-partial.toml",
"../smf/nexus/multi-sled/config-partial.toml",
];
for config_path in nexus_config_paths {
println!(
"checking {:?} with example deployment section added",
config_path
);
let mut contents = std::fs::read_to_string(config_path)
.expect("failed to read Nexus SMF config file");
contents.push_str(
"\n\n\n \
# !! content below added by test_repo_configs_are_valid()\n\
\n\n\n",
);
contents.push_str(&example_deployment);
let _: Config = toml::from_str(&contents)
.expect("Nexus SMF config file is not valid");
}
}

#[test]
Expand All @@ -894,3 +911,30 @@ mod test {
);
}
}

/// Defines a strategy for choosing what physical disks to use when allocating
/// new crucible regions.
///
/// NOTE: More strategies can - and should! - be added.
///
/// See <https://rfd.shared.oxide.computer/rfd/0205> for a more
/// complete discussion.
///
/// Longer-term, we should consider:
/// - Storage size + remaining free space
/// - Sled placement of datasets
/// - What sort of loads we'd like to create (even split across all disks
/// may not be preferable, especially if maintenance is expected)
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum RegionAllocationStrategy {
/// Choose disks pseudo-randomly. An optional seed may be provided to make
/// the ordering deterministic, otherwise the current time in nanoseconds
/// will be used. Ordering is based on sorting the output of `md5(UUID of
/// candidate dataset + seed)`. The seed does not need to come from a
/// cryptographically secure source.
Random { seed: Option<u64> },

/// Like Random, but ensures that each region is allocated on its own sled.
RandomWithDistinctSleds { seed: Option<u64> },
}
34 changes: 28 additions & 6 deletions docs/how-to-run.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -321,20 +321,42 @@ Error: Creates a new build target, and sets it as "active"
Usage: omicron-package target create [OPTIONS]
Options:
-i, --image <IMAGE> [default: standard] [possible values: standard, trampoline]
-m, --machine <MACHINE> [possible values: gimlet, gimlet-standalone, non-gimlet]
-s, --switch <SWITCH> [possible values: asic, stub, softnpu]
-h, --help Print help (see more with '--help')
-i, --image <IMAGE>
[default: standard]
Possible values:
- standard: A typical host OS image
- trampoline: A recovery host OS image, intended to bootstrap a Standard image
-m, --machine <MACHINE>
Possible values:
- gimlet: Use sled agent configuration for a Gimlet
- gimlet-standalone: Use sled agent configuration for a Gimlet running in isolation
- non-gimlet: Use sled agent configuration for a device emulating a Gimlet
-s, --switch <SWITCH>
Possible values:
- asic: Use the "real" Dendrite, that attempts to interact with the Tofino
- stub: Use a "stub" Dendrite that does not require any real hardware
- softnpu: Use a "softnpu" Dendrite that uses the SoftNPU asic emulator
-r, --rack-topology <RACK_TOPOLOGY>
Possible values:
- multi-sled: Use configurations suitable for a multi-sled deployment, such as dogfood and production racks
- single-sled: Use configurations suitable for a single-sled deployment, such as CI and dev machines
-h, --help
Print help (see a summary with '-h')
----

To set up a build target for a non-Gimlet machine with simulated (but fully functional) external networking, you would run:

[source,console]
----
$ cargo run --release --bin omicron-package -- -t default target create -i standard -m non-gimlet -s softnpu
$ cargo run --release --bin omicron-package -- -t default target create -i standard -m non-gimlet -s softnpu -r single-sled
Finished release [optimized] target(s) in 0.66s
Running `target/release/omicron-package -t default target create -i standard -m non-gimlet -s softnpu`
Running `target/release/omicron-package -t default target create -i standard -m non-gimlet -s softnpu -r single-sled`
Created new build target 'default' and set it as active
----

Expand Down
1 change: 1 addition & 0 deletions installinator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,4 @@ tokio-stream.workspace = true
[features]
image-standard = []
image-trampoline = []
rack-topology-single-sled = []
22 changes: 22 additions & 0 deletions nexus/db-model/src/queries/region_allocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ table! {
}
}

table! {
shuffled_candidate_datasets {
id -> Uuid,
pool_id -> Uuid,
}
}

table! {
candidate_regions {
id -> Uuid,
Expand Down Expand Up @@ -89,6 +96,19 @@ table! {
}
}

table! {
one_zpool_per_sled (pool_id) {
pool_id -> Uuid
}
}

table! {
one_dataset_per_zpool {
id -> Uuid,
pool_id -> Uuid
}
}

table! {
inserted_regions {
id -> Uuid,
Expand Down Expand Up @@ -141,6 +161,7 @@ diesel::allow_tables_to_appear_in_same_query!(
);

diesel::allow_tables_to_appear_in_same_query!(old_regions, dataset,);
diesel::allow_tables_to_appear_in_same_query!(old_regions, zpool,);

diesel::allow_tables_to_appear_in_same_query!(
inserted_regions,
Expand All @@ -149,6 +170,7 @@ diesel::allow_tables_to_appear_in_same_query!(

diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, dataset,);
diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, zpool,);
diesel::allow_tables_to_appear_in_same_query!(candidate_datasets, dataset);

// == Needed for random region allocation ==

Expand Down
Loading

0 comments on commit 6bc5e60

Please sign in to comment.