Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

background task for service zone nat #4857

Merged
merged 21 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
d13a4ea
update instructions for deploying dev env
internet-diglett Jan 16, 2024
112765e
WIP: background task for syncronizing NAT information for service zones
internet-diglett Jan 16, 2024
085b0db
WIP: ensure service zone nat entries are tracked by RPW
internet-diglett Jan 19, 2024
402f513
wrap up not TODOs
internet-diglett Jan 19, 2024
56ba5be
revert changes to inventory_collection.rs
internet-diglett Jan 19, 2024
c794b1a
add config for nat sync task
internet-diglett Jan 20, 2024
fe1e3e8
PR fixes
internet-diglett Jan 20, 2024
337bb23
Merge branch 'main' into rpw-for-service-zone-nat
internet-diglett Jan 22, 2024
929ba7c
update schema
internet-diglett Jan 22, 2024
d62b814
add additional documentation
internet-diglett Jan 22, 2024
9eb1111
create tracking issue for TODO
internet-diglett Jan 22, 2024
b92b0bd
set mandatory minimums for service zone nat entries
internet-diglett Jan 23, 2024
1c6e60e
adjust minimum ntp count so job will actually run
internet-diglett Jan 23, 2024
22f36d6
remove comment, issue is WIP
internet-diglett Jan 23, 2024
3ebf9df
adjust minimum count for all services to 1
internet-diglett Jan 23, 2024
5811317
bump schema version
internet-diglett Jan 24, 2024
28f61b0
Merge branch 'main' into rpw-for-service-zone-nat
internet-diglett Jan 24, 2024
d3501dc
BUGFIX: nat entries missing after sled restart
internet-diglett Jan 26, 2024
a193845
Merge branch 'main' into rpw-for-service-zone-nat
internet-diglett Jan 26, 2024
8c6a23e
Merge branch 'main' into rpw-for-service-zone-nat
internet-diglett Jan 26, 2024
6cd49f7
bump schema
internet-diglett Jan 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/src/address.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ pub const AZ_PREFIX: u8 = 48;
pub const RACK_PREFIX: u8 = 56;
pub const SLED_PREFIX: u8 = 64;

/// maximum possible value for a tcp or udp port
pub const MAX_PORT: u16 = u16::MAX;

/// minimum possible value for a tcp or udp port
pub const MIN_PORT: u16 = u16::MIN;

/// The amount of redundancy for internal DNS servers.
///
/// Must be less than or equal to MAX_DNS_REDUNDANCY.
Expand Down
16 changes: 16 additions & 0 deletions common/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ pub struct BackgroundTaskConfig {
pub inventory: InventoryConfig,
/// configuration for phantom disks task
pub phantom_disks: PhantomDiskConfig,
/// configuration for service zone nat sync task
pub sync_service_zone_nat: SyncServiceZoneNatConfig,
}

#[serde_as]
Expand Down Expand Up @@ -383,6 +385,14 @@ pub struct NatCleanupConfig {
pub period_secs: Duration,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct SyncServiceZoneNatConfig {
/// period (in seconds) for periodic activations of this background task
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct InventoryConfig {
Expand Down Expand Up @@ -524,6 +534,7 @@ mod test {
};
use crate::address::{Ipv6Subnet, RACK_PREFIX};
use crate::api::internal::shared::SwitchLocation;
use crate::nexus_config::SyncServiceZoneNatConfig;
use dropshot::ConfigDropshot;
use dropshot::ConfigLogging;
use dropshot::ConfigLoggingIfExists;
Expand Down Expand Up @@ -675,6 +686,7 @@ mod test {
inventory.nkeep = 11
inventory.disable = false
phantom_disks.period_secs = 30
sync_service_zone_nat.period_secs = 30
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -780,6 +792,9 @@ mod test {
phantom_disks: PhantomDiskConfig {
period_secs: Duration::from_secs(30),
},
sync_service_zone_nat: SyncServiceZoneNatConfig {
period_secs: Duration::from_secs(30)
}
},
default_region_allocation_strategy:
crate::nexus_config::RegionAllocationStrategy::Random {
Expand Down Expand Up @@ -838,6 +853,7 @@ mod test {
inventory.nkeep = 3
inventory.disable = false
phantom_disks.period_secs = 30
sync_service_zone_nat.period_secs = 30
[default_region_allocation_strategy]
type = "random"
"##,
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ task: "phantom_disks"
detects and un-deletes phantom disks


task: "service_zone_nat_tracker"
ensures service zone nat records are recorded in NAT RPW table


---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT
Expand Down Expand Up @@ -139,6 +143,10 @@ task: "phantom_disks"
detects and un-deletes phantom disks


task: "service_zone_nat_tracker"
ensures service zone nat records are recorded in NAT RPW table


---------------------------------------------
stderr:
note: Nexus URL not specified. Will pick one from DNS.
Expand Down Expand Up @@ -195,6 +203,10 @@ task: "phantom_disks"
detects and un-deletes phantom disks


task: "service_zone_nat_tracker"
ensures service zone nat records are recorded in NAT RPW table


---------------------------------------------
stderr:
note: Nexus URL not specified. Will pick one from DNS.
Expand Down
11 changes: 11 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,10 @@ task: "phantom_disks"
detects and un-deletes phantom disks


task: "service_zone_nat_tracker"
ensures service zone nat records are recorded in NAT RPW table


---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT/
Expand Down Expand Up @@ -369,6 +373,13 @@ task: "phantom_disks"
number of phantom disks deleted: 0
number of phantom disk delete errors: 0

task: "service_zone_nat_tracker"
configured period: every 30s
currently executing: no
last completed activation: iter 2, triggered by an explicit signal
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: inventory collection is None

---------------------------------------------
stderr:
note: using Nexus URL http://127.0.0.1:REDACTED_PORT/
Expand Down
102 changes: 77 additions & 25 deletions docs/how-to-run.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -498,41 +498,93 @@ Follow the instructions to set up the https://github.com/oxidecomputer/oxide.rs[
oxide auth login --host http://192.168.1.21
----

=== Configure quotas for your silo

Setting resource quotas is required before you can begin uploading images, provisioning instances, etc.
In this example we'll update the recovery silo so we can provision instances directly from it:

[source, console]
----
$ oxide api /v1/system/silos/recovery/quotas --method PUT --input - <<EOF
{
"cpus": 9999999999,
"memory": 999999999999999999,
"storage": 999999999999999999
}
EOF

# example response
{
"cpus": 9999999999,
"memory": 999999999999999999,
"silo_id": "fa12b74d-30f8-4d5a-bc0e-4d229f13c6e5",
"storage": 999999999999999999
}
----

=== Create an IP pool

An IP pool is needed to provide external connectivity to Instances. The addresses you use here should be addresses you've reserved from the external network (see <<_external_networking>>).

Here we will first create an ip pool for the recovery silo:
[source,console]
----
$ oxide ip-pool range add --pool default --first 192.168.1.31 --last 192.168.1.40
success
IpPoolRange {
id: 4a61e65a-d96d-4c56-9cfd-dc1e44d9e99b,
ip_pool_id: 1b1289a7-cefe-4a7e-a8c9-d93330846301,
range: V4(
Ipv4Range {
first: 192.168.1.31,
last: 192.168.1.40,
},
),
time_created: 2023-08-02T16:31:43.679785Z,
---
$ oxide api /v1/system/ip-pools --method POST --input - <<EOF
{
"name": "default",
"description": "default ip-pool"
}
----
EOF

# example response
{
"description": "default ip-pool",
"id": "1c3dfa5c-7b00-46ff-987a-4e59e512b250",
"name": "default",
"time_created": "2024-01-16T22:51:54.679751Z",
"time_modified": "2024-01-16T22:51:54.679751Z"
}
---

Now we will associate the pool with the recovery silo.
[source,console]
---
$ oxide api /v1/system/ip-pools/default/silos --method POST --input - <<EOF
{
"silo": "recovery",
"is_default": true
}
EOF

# example response
{
"ip_pool_id": "1c3dfa5c-7b00-46ff-987a-4e59e512b250",
"is_default": true,
"silo_id": "5c0aca09-d7ee-4be6-b7b1-060655659f74"
}
---

With SoftNPU you will generally also need to configure Proxy ARP. Below, `IP_POOL_START` and `IP_POOL_END` are the first and last addresses you used in the previous command:
Now we will add an address range to the recovery silo:

[source,console]
----
# dladm won't return leading zeroes but `scadm` expects them
$ SOFTNPU_MAC=$(dladm show-vnic sc0_1 -p -o macaddress | gsed 's/\b\(\w\)\b/0\1/g')
$ pfexec zlogin sidecar_softnpu /softnpu/scadm \
--server /softnpu/server \
--client /softnpu/client \
standalone \
add-proxy-arp \
$IP_POOL_START \
$IP_POOL_END \
$SOFTNPU_MAC
oxide api /v1/system/ip-pools/default/ranges/add --method POST --input - <<EOF
{
"first": "$IP_POOL_START",
"last": "$IP_POOL_END"
}
EOF

# example response
{
"id": "6209516e-2b38-4cbd-bff4-688ffa39d50b",
"ip_pool_id": "1c3dfa5c-7b00-46ff-987a-4e59e512b250",
"range": {
"first": "192.168.1.35",
"last": "192.168.1.40"
},
"time_created": "2024-01-16T22:53:43.179726Z"
}
----

=== Create a Project and Image
Expand Down
2 changes: 1 addition & 1 deletion nexus/db-model/src/ipv4_nat_entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use serde::Serialize;
use uuid::Uuid;

/// Values used to create an Ipv4NatEntry
#[derive(Insertable, Debug, Clone)]
#[derive(Insertable, Debug, Clone, Eq, PartialEq)]
#[diesel(table_name = ipv4_nat_entry)]
pub struct Ipv4NatValues {
pub external_address: Ipv4Net,
Expand Down
1 change: 1 addition & 0 deletions nexus/db-model/src/ipv4net.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use std::net::Ipv4Addr;
Clone,
Copy,
Debug,
Eq,
PartialEq,
AsExpression,
FromSqlRow,
Expand Down
1 change: 1 addition & 0 deletions nexus/db-model/src/ipv6net.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::net::Ipv6Addr;
Clone,
Copy,
Debug,
Eq,
PartialEq,
AsExpression,
FromSqlRow,
Expand Down
2 changes: 1 addition & 1 deletion nexus/db-model/src/macaddr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use diesel::serialize::{self, ToSql};
use diesel::sql_types;
use omicron_common::api::external;

#[derive(Clone, Copy, Debug, PartialEq, AsExpression, FromSqlRow)]
#[derive(Clone, Copy, Debug, Eq, PartialEq, AsExpression, FromSqlRow)]
#[diesel(sql_type = sql_types::BigInt)]
pub struct MacAddr(pub external::MacAddr);

Expand Down
2 changes: 1 addition & 1 deletion nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion;
///
/// This should be updated whenever the schema is changed. For more details,
/// refer to: schema/crdb/README.adoc
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(23, 0, 1);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(24, 0, 0);

table! {
disk (id) {
Expand Down
10 changes: 9 additions & 1 deletion nexus/db-model/src/vni.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,15 @@ use serde::Deserialize;
use serde::Serialize;

#[derive(
Clone, Debug, Copy, AsExpression, FromSqlRow, Serialize, Deserialize,
Clone,
Debug,
Copy,
AsExpression,
FromSqlRow,
Serialize,
Deserialize,
Eq,
PartialEq,
)]
#[diesel(sql_type = sql_types::Int4)]
pub struct Vni(pub external::Vni);
Expand Down
3 changes: 3 additions & 0 deletions nexus/db-queries/src/db/datastore/ip_pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ impl DataStore {
// join ip_pool to ip_pool_resource and filter

// used in both success and error outcomes
// TODO: Correctness
internet-diglett marked this conversation as resolved.
Show resolved Hide resolved
// we're not propogating the data used to performed the query, which
// makes troubleshooting a lookup failure a lot more time consuming
let lookup_type = LookupType::ByCompositeId(
"Default pool for current silo".to_string(),
);
Expand Down
Loading