Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rpws for all networking #4822

Merged
merged 48 commits into from
Mar 16, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
a3d0f56
WIP: placeholder to keep PR open
Feb 1, 2024
f289028
Revert "WIP: placeholder to keep PR open"
Feb 5, 2024
ac7a086
Implement BackgroundTask for switch port settings
Feb 5, 2024
5152b0b
bump fixtures
internet-diglett Feb 5, 2024
f69c3ec
Merge branch 'main' into rpws-for-all-networking
internet-diglett Feb 5, 2024
97db963
fix log attribute
internet-diglett Feb 6, 2024
185ae2f
comments for aiding refactor
internet-diglett Feb 8, 2024
2939cd9
Merge branch 'main' into rpws-for-all-networking
internet-diglett Feb 14, 2024
a00f21b
fixup! Merge branch 'main' into rpws-for-all-networking
internet-diglett Feb 14, 2024
09b241c
scaffold new background tasks
internet-diglett Feb 14, 2024
31db009
WIP: port switch port settings saga to RPW
internet-diglett Feb 20, 2024
ab0ed14
more WIP: continue port of saga to RPW
internet-diglett Feb 20, 2024
3d9fc13
clean up debris from refactor
internet-diglett Feb 21, 2024
6e3e390
More WIP: port bootstore updates
internet-diglett Feb 23, 2024
46264e8
more refactor debris cleanup
internet-diglett Feb 24, 2024
cb7ae9c
add more logging to workflow for additional tshooting
internet-diglett Feb 28, 2024
bec23c1
Enable runtime toggling of RPWs
internet-diglett Feb 28, 2024
4b970f1
WIP: remove routing from dendrite API calls
internet-diglett Mar 1, 2024
24aa837
cleanup todo!()
internet-diglett Mar 4, 2024
2820627
Merge branch 'main' into rpws-for-all-networking
internet-diglett Mar 4, 2024
05b37de
first round of pr fixes
Mar 4, 2024
7c7e22a
Second pass of PR fixes
Mar 5, 2024
78818d1
convert route diff methods to use HashSet
Mar 5, 2024
3b0ae08
try reordering rack init logic to resolve race
Mar 6, 2024
1abe3ed
try iterating over *initialized* racks
Mar 6, 2024
25a6ffe
remove toggle table, add index on rack(initialized)
Mar 6, 2024
90e3ed6
add the fully qualified name to the schema
Mar 7, 2024
a32d6fa
cache bootstore configs for subsequent comparisons and auditing
Mar 7, 2024
e01fb5b
track bootstore history
Mar 8, 2024
121c8b5
EXPECTORATE
internet-diglett Mar 8, 2024
61fc7ae
timeout on nexus zone startup
Mar 8, 2024
29dbb07
set the timeout to a more reasonable value
Mar 8, 2024
0126ae6
fix broken sed command
Mar 8, 2024
90cd854
Fix Correctness Issues
Mar 9, 2024
9aa14e8
move loopback address management to rpw
internet-diglett Mar 11, 2024
dbcdc5b
EXPECTORATE
internet-diglett Mar 11, 2024
9c58bf1
Merge branch 'main' into rpws-for-all-networking
internet-diglett Mar 12, 2024
cc2255e
remove duplicates
internet-diglett Mar 12, 2024
2f0fc52
Do not fail if address-lot already exists
Mar 14, 2024
8aadca9
WIP: make bgp creation idempotent
Mar 15, 2024
8db6137
WIP: make bgp call idempotent
Mar 15, 2024
af4fc83
add description to bgp config
Mar 15, 2024
1376116
add timestamp fields to bgp config insertion
Mar 16, 2024
4f6067c
adjust vdev creation parameters for virt disks
Mar 16, 2024
95024d9
Make Address Lot Creation Idempotent
Mar 16, 2024
3efe2de
a bit more cleanup
Mar 16, 2024
7d41d73
bump dendrite
Mar 16, 2024
e28135f
Merge branch 'main' into rpws-for-all-networking
Mar 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion common/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,8 @@ pub struct BackgroundTaskConfig {
pub sync_service_zone_nat: SyncServiceZoneNatConfig,
/// configuration for the bfd manager task
pub bfd_manager: BfdManagerConfig,
/// configuration for the switch port settings manager task
pub switch_port_settings_manager: SwitchPortSettingsManagerConfig,
/// configuration for region replacement task
pub region_replacement: RegionReplacementConfig,
}
Expand Down Expand Up @@ -400,6 +402,13 @@ pub struct SyncServiceZoneNatConfig {
pub period_secs: Duration,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct SwitchPortSettingsManagerConfig {
/// period (in seconds) for periodic activations of this background task
internet-diglett marked this conversation as resolved.
Show resolved Hide resolved
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
}
#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct InventoryConfig {
Expand Down Expand Up @@ -563,7 +572,10 @@ mod test {
};
use crate::address::{Ipv6Subnet, RACK_PREFIX};
use crate::api::internal::shared::SwitchLocation;
use crate::nexus_config::{BfdManagerConfig, SyncServiceZoneNatConfig};
use crate::nexus_config::{
BfdManagerConfig, SwitchPortSettingsManagerConfig,
SyncServiceZoneNatConfig,
};
use camino::{Utf8Path, Utf8PathBuf};
use dropshot::ConfigDropshot;
use dropshot::ConfigLogging;
Expand Down Expand Up @@ -716,6 +728,7 @@ mod test {
blueprints.period_secs_load = 10
blueprints.period_secs_execute = 60
sync_service_zone_nat.period_secs = 30
switch_port_settings_manager.period_secs = 30
region_replacement.period_secs = 30
[default_region_allocation_strategy]
type = "random"
Expand Down Expand Up @@ -831,6 +844,10 @@ mod test {
sync_service_zone_nat: SyncServiceZoneNatConfig {
period_secs: Duration::from_secs(30)
},
switch_port_settings_manager:
SwitchPortSettingsManagerConfig {
period_secs: Duration::from_secs(30),
},
region_replacement: RegionReplacementConfig {
period_secs: Duration::from_secs(30),
},
Expand Down Expand Up @@ -896,6 +913,7 @@ mod test {
blueprints.period_secs_load = 10
blueprints.period_secs_execute = 60
sync_service_zone_nat.period_secs = 30
switch_port_settings_manager.period_secs = 30
region_replacement.period_secs = 30
[default_region_allocation_strategy]
type = "random"
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ task: "service_zone_nat_tracker"
ensures service zone nat records are recorded in NAT RPW table


task: "switch_port_config_manager"
manages switch port settings for rack switches


---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT
Expand Down Expand Up @@ -181,6 +185,10 @@ task: "service_zone_nat_tracker"
ensures service zone nat records are recorded in NAT RPW table


task: "switch_port_config_manager"
manages switch port settings for rack switches


---------------------------------------------
stderr:
note: Nexus URL not specified. Will pick one from DNS.
Expand Down Expand Up @@ -258,6 +266,10 @@ task: "service_zone_nat_tracker"
ensures service zone nat records are recorded in NAT RPW table


task: "switch_port_config_manager"
manages switch port settings for rack switches


---------------------------------------------
stderr:
note: Nexus URL not specified. Will pick one from DNS.
Expand Down
11 changes: 11 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,10 @@ task: "service_zone_nat_tracker"
ensures service zone nat records are recorded in NAT RPW table


task: "switch_port_config_manager"
manages switch port settings for rack switches


---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT/
Expand Down Expand Up @@ -426,6 +430,13 @@ task: "service_zone_nat_tracker"
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: inventory collection is None

task: "switch_port_config_manager"
configured period: every 30s
currently executing: no
last completed activation: iter 2, triggered by an explicit signal
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: Could not determine if task is enabled via db: Invalid Request: no matching records

---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT/
Expand Down
29 changes: 29 additions & 0 deletions nexus/db-model/src/background_task_toggles.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use chrono::{DateTime, Utc};
use serde::Deserialize;
use serde::Serialize;
use uuid::Uuid;

use crate::schema::background_task_toggles;

/// Values used to create a background task toggle
#[derive(Insertable, Debug, Clone, Eq, PartialEq)]
#[diesel(table_name = background_task_toggles)]
pub struct BackgroundTaskToggleValues {
pub name: String,
pub enabled: bool,
}

/// Database representation of a background task toggle
#[derive(Queryable, Debug, Clone, Selectable, Serialize, Deserialize)]
#[diesel(table_name = background_task_toggles)]
pub struct BackgroundTaskToggle {
internet-diglett marked this conversation as resolved.
Show resolved Hide resolved
pub id: Uuid,
pub name: String,
pub enabled: bool,
pub time_created: DateTime<Utc>,
pub time_modified: DateTime<Utc>,
pub time_deleted: Option<DateTime<Utc>>,
}

// TODO convert to enum?
pub const SYNC_SWITCH_PORT_SETTINGS: &str = "sync_switch_port_settings";
1 change: 1 addition & 0 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ extern crate diesel;
extern crate newtype_derive;

mod address_lot;
pub mod background_task_toggles;
mod bfd;
mod bgp;
mod block_size;
Expand Down
13 changes: 12 additions & 1 deletion nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion;
///
/// This should be updated whenever the schema is changed. For more details,
/// refer to: schema/crdb/README.adoc
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(33, 0, 1);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(34, 0, 0);

table! {
disk (id) {
Expand Down Expand Up @@ -1515,6 +1515,17 @@ table! {
}
}

table! {
background_task_toggles (id) {
id -> Uuid,
name -> Text,
enabled -> Bool,
time_created -> Timestamptz,
time_modified -> Timestamptz,
time_deleted -> Nullable<Timestamptz>,
}
}

table! {
db_metadata (singleton) {
singleton -> Bool,
Expand Down
94 changes: 94 additions & 0 deletions nexus/db-queries/src/db/datastore/background_task_toggle.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
use crate::db::error::{public_error_from_diesel, ErrorHandler};
use crate::{
context::OpContext,
db::model::background_task_toggles::{
BackgroundTaskToggle, BackgroundTaskToggleValues,
},
};
use async_bb8_diesel::AsyncRunQueryDsl;
use diesel::{prelude::*, ExpressionMethods, SelectableHelper};
use omicron_common::api::external::{CreateResult, Error, LookupResult};

use super::DataStore;

impl DataStore {
pub async fn set_background_task_toggle(
internet-diglett marked this conversation as resolved.
Show resolved Hide resolved
&self,
opctx: &OpContext,
values: BackgroundTaskToggleValues,
) -> CreateResult<()> {
use crate::db::schema::background_task_toggles::dsl;

diesel::insert_into(dsl::background_task_toggles)
.values(values.clone())
.on_conflict(dsl::name)
.do_update()
.set(dsl::enabled.eq(values.enabled))
.execute_async(&*self.pool_connection_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;
Ok(())
}

pub async fn get_background_task_toggle(
&self,
opctx: &OpContext,
name: String,
) -> LookupResult<BackgroundTaskToggle> {
use crate::db::schema::background_task_toggles::dsl;

let result = dsl::background_task_toggles
.filter(dsl::name.eq(name))
.select(BackgroundTaskToggle::as_select())
.limit(1)
.load_async(&*self.pool_connection_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;

if let Some(nat_entry) = result.first() {
Ok(nat_entry.clone())
} else {
Err(Error::invalid_request("no matching records"))
}
}
}

#[cfg(test)]
mod test {
use crate::db::datastore::datastore_test;
use nexus_test_utils::db::test_setup_database;
use omicron_test_utils::dev;

// Test our ability to track additions and deletions since a given version number
#[tokio::test]
async fn set_and_get_toggles() {
let logctx = dev::test_setup_log("test_set_and_get_toggles");
let mut db = test_setup_database(&logctx.log).await;
let (opctx, datastore) = datastore_test(&logctx, &db).await;

// no toggle should be set for "my_task"
assert!(datastore
.get_background_task_toggle(&opctx, "my_task".into())
.await
.is_err());

assert!(datastore
.set_background_task_toggle(
&opctx,
super::BackgroundTaskToggleValues {
name: "my_task".into(),
enabled: true
}
)
.await
.is_ok());

assert!(datastore
.get_background_task_toggle(&opctx, "my_task".into())
.await
.is_ok());

db.cleanup().await.unwrap();
logctx.cleanup_successful();
}
}
1 change: 1 addition & 0 deletions nexus/db-queries/src/db/datastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ use std::sync::Arc;
use uuid::Uuid;

mod address_lot;
mod background_task_toggle;
mod bfd;
mod bgp;
mod bootstore;
Expand Down
5 changes: 3 additions & 2 deletions nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[console]
# Directory for static assets. Absolute path or relative to CWD.
static_dir = "out/console-assets"
session_idle_timeout_minutes = 480 # 6 hours
session_idle_timeout_minutes = 480 # 6 hours
session_absolute_timeout_minutes = 1440 # 24 hours

# List of authentication schemes to support.
Expand Down Expand Up @@ -35,7 +35,7 @@ rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc"

# Nexus may need to resolve external hosts (e.g. to grab IdP metadata).
# These are the DNS servers it should use.
external_dns_servers = [ "1.1.1.1", "9.9.9.9" ]
external_dns_servers = ["1.1.1.1", "9.9.9.9"]

[deployment.dropshot_external]
# IP Address and TCP port on which to listen for the external API
Expand Down Expand Up @@ -109,6 +109,7 @@ phantom_disks.period_secs = 30
blueprints.period_secs_load = 10
blueprints.period_secs_execute = 60
sync_service_zone_nat.period_secs = 30
switch_port_settings_manager.period_secs = 30
region_replacement.period_secs = 30

[default_region_allocation_strategy]
Expand Down
27 changes: 23 additions & 4 deletions nexus/src/app/background/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use super::nat_cleanup;
use super::phantom_disks;
use super::region_replacement;
use super::sync_service_zone_nat::ServiceZoneNatTracker;
use super::sync_switch_port_settings::SwitchPortSettingsManager;
use crate::app::sagas::SagaRequest;
use nexus_db_model::DnsGroup;
use nexus_db_queries::context::OpContext;
Expand Down Expand Up @@ -76,6 +77,9 @@ pub struct BackgroundTasks {
/// task handle for the service zone nat tracker
pub task_service_zone_nat_tracker: common::TaskHandle,

/// task handle for the switch port settings manager
pub task_switch_port_settings_manager: common::TaskHandle,

/// task handle for the task that detects if regions need replacement and
/// begins the process
pub task_region_replacement: common::TaskHandle,
Expand Down Expand Up @@ -134,7 +138,7 @@ impl BackgroundTasks {
(task, watcher_channel)
};

let dpd_clients: Vec<_> = dpd_clients.values().cloned().collect();
let dpd_client_list: Vec<_> = dpd_clients.values().cloned().collect();

let nat_cleanup = {
driver.register(
Expand All @@ -146,7 +150,7 @@ impl BackgroundTasks {
config.nat_cleanup.period_secs,
Box::new(nat_cleanup::Ipv4NatGarbageCollector::new(
datastore.clone(),
dpd_clients.clone(),
dpd_client_list.clone(),
)),
opctx.child(BTreeMap::new()),
vec![],
Expand Down Expand Up @@ -174,7 +178,7 @@ impl BackgroundTasks {
let task_inventory_collection = {
let collector = inventory_collection::InventoryCollector::new(
datastore.clone(),
resolver,
resolver.clone(),
&nexus_id.to_string(),
config.inventory.nkeep,
config.inventory.disable,
Expand Down Expand Up @@ -247,7 +251,21 @@ impl BackgroundTasks {
config.sync_service_zone_nat.period_secs,
Box::new(ServiceZoneNatTracker::new(
datastore.clone(),
dpd_clients.clone(),
dpd_client_list.clone(),
)),
opctx.child(BTreeMap::new()),
vec![],
)
};

let task_switch_port_settings_manager = {
driver.register(
"switch_port_config_manager".to_string(),
String::from("manages switch port settings for rack switches"),
config.switch_port_settings_manager.period_secs,
Box::new(SwitchPortSettingsManager::new(
datastore.clone(),
resolver.clone(),
)),
opctx.child(BTreeMap::new()),
vec![],
Expand Down Expand Up @@ -289,6 +307,7 @@ impl BackgroundTasks {
task_blueprint_loader,
task_blueprint_executor,
task_service_zone_nat_tracker,
task_switch_port_settings_manager,
task_region_replacement,
}
}
Expand Down
Loading