Skip to content

Commit

Permalink
updates for tunnel routing (#3859)
Browse files Browse the repository at this point in the history
  • Loading branch information
rcgoodfellow authored Jan 24, 2024
1 parent c392c76 commit 9ac047e
Show file tree
Hide file tree
Showing 23 changed files with 342 additions and 327 deletions.
21 changes: 19 additions & 2 deletions .github/buildomat/jobs/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#:
#: name = "helios / deploy"
#: variety = "basic"
#: target = "lab-2.0-opte-0.27"
#: target = "lab-2.0-opte-0.28"
#: output_rules = [
#: "%/var/svc/log/oxide-sled-agent:default.log*",
#: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/oxide-*.log*",
Expand Down Expand Up @@ -33,6 +33,9 @@ _exit_trap() {
local status=$?
[[ $status -eq 0 ]] && exit 0

# XXX paranoia
pfexec cp /tmp/opteadm /opt/oxide/opte/bin/opteadm

set +o errexit
set -o xtrace
banner evidence
Expand All @@ -50,6 +53,7 @@ _exit_trap() {
standalone \
dump-state
pfexec /opt/oxide/opte/bin/opteadm list-ports
pfexec /opt/oxide/opte/bin/opteadm dump-v2b
z_swadm link ls
z_swadm addr list
z_swadm route list
Expand Down Expand Up @@ -97,6 +101,19 @@ z_swadm () {
pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm $@
}

# XXX remove. This is just to test against a development branch of OPTE in CI.
set +x
OPTE_COMMIT="73d4669ea213d0b7aca35c4babb6fd09ed51d29e"
curl -sSfOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde
pfexec rem_drv xde || true
pfexec mv xde /kernel/drv/amd64/xde
pfexec add_drv xde || true
curl -sSfOL https://buildomat.eng.oxide.computer/wg/0/artefact/01HM09S4M15WNXB2B2MX8R1GBT/yLalJU5vT4S4IEpwSeY4hPuspxw3JcINokZmlfNU14npHkzG/01HM09SJ2RQSFGW7MVKC9JKZ8D/01HM0A58D888AJ7YP6N1Q6T6ZD/opteadm
chmod +x opteadm
cp opteadm /tmp/opteadm
pfexec mv opteadm /opt/oxide/opte/bin/opteadm
set -x

#
# XXX work around 14537 (UFS should not allow directories to be unlinked) which
# is probably not yet fixed in xde branch? Once the xde branch merges from
Expand Down Expand Up @@ -236,7 +253,7 @@ infra_ip_last = \"$UPLINK_IP\"
/^routes/c\\
routes = \\[{nexthop = \"$GATEWAY_IP\", destination = \"0.0.0.0/0\"}\\]
/^addresses/c\\
addresses = \\[\"$UPLINK_IP/32\"\\]
addresses = \\[\"$UPLINK_IP/24\"\\]
}
" pkg/config-rss.toml
diff -u pkg/config-rss.toml{~,} || true
Expand Down
19 changes: 13 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -270,15 +270,15 @@ omicron-sled-agent = { path = "sled-agent" }
omicron-test-utils = { path = "test-utils" }
omicron-zone-package = "0.10.1"
oxide-client = { path = "clients/oxide-client" }
oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244", features = [ "api", "std" ] }
oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "1d29ef60a18179babfb44f0f7a3c2fe71034a2c1", features = [ "api", "std" ] }
once_cell = "1.19.0"
openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" }
openapiv3 = "2.0.0"
# must match samael's crate!
openssl = "0.10"
openssl-sys = "0.9"
openssl-probe = "0.1.5"
opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244" }
opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" }
oso = "0.27"
owo-colors = "3.5.0"
oximeter = { path = "oximeter/oximeter" }
Expand Down
18 changes: 17 additions & 1 deletion clients/ddm-admin-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub use inner::types;
pub use inner::Error;

use either::Either;
use inner::types::Ipv6Prefix;
use inner::types::{Ipv6Prefix, TunnelOrigin};
use inner::Client as InnerClient;
use omicron_common::address::Ipv6Subnet;
use omicron_common::address::SLED_PREFIX;
Expand Down Expand Up @@ -108,6 +108,22 @@ impl Client {
});
}

pub fn advertise_tunnel_endpoint(&self, endpoint: TunnelOrigin) {
let me = self.clone();
tokio::spawn(async move {
retry_notify(retry_policy_internal_service_aggressive(), || async {
me.inner.advertise_tunnel_endpoints(&vec![endpoint.clone()]).await?;
Ok(())
}, |err, duration| {
info!(
me.log,
"Failed to notify ddmd of tunnel endpoint (retry in {duration:?}";
"err" => %err,
);
}).await.unwrap();
});
}

/// Returns the addresses of connected sleds.
///
/// Note: These sleds have not yet been verified.
Expand Down
2 changes: 1 addition & 1 deletion common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3253,7 +3253,7 @@ mod test {
let net_des = serde_json::from_str::<IpNet>(&ser).unwrap();
assert_eq!(net, net_des);

let net_str = "fd00:99::1/64";
let net_str = "fd00:47::1/64";
let net = IpNet::from_str(net_str).unwrap();
let ser = serde_json::to_string(&net).unwrap();

Expand Down
117 changes: 10 additions & 107 deletions docs/boundary-services-a-to-z.adoc
Original file line number Diff line number Diff line change
@@ -1,115 +1,18 @@
= Boundary Services A-Z

NOTE: The instructions for _deploying_ SoftNPU with Omicron have been folded into xref:how-to-run.adoc[the main how-to-run docs].
NOTE: The instructions for _deploying_ SoftNPU with Omicron have been folded
into xref:how-to-run.adoc[the main how-to-run docs].

The virtual hardware making up SoftNPU is a bit different than what was previously used. What we now have looks like this.
The virtual hardware making up SoftNPU is depicted in the diagram below.

image::plumbing.png[]

The `softnpu` zone will be configured and launched during the `create_virtual_hardware.sh` script.
The `softnpu` zone will be configured and launched during the
`create_virtual_hardware.sh` script.

Once the control plane is running, `softnpu` can be configured via `dendrite`
using the `swadm` binary located in the `oxz_switch` zone.
An example script is provided in `tools/scrimlet/softnpu-init.sh`.
This script should work without modification for basic development setups,
but feel free to tweak it as needed.

----
$ ./tools/scrimlet/softnpu-init.sh
++ netstat -rn -f inet
++ grep default
++ awk -F ' ' '{print $2}'
+ GATEWAY_IP=10.85.0.1
+ echo 'Using 10.85.0.1 as gateway ip'
Using 10.85.0.1 as gateway ip
++ arp 10.85.0.1
++ awk -F ' ' '{print $4}'
+ GATEWAY_MAC=68:d7:9a:1f:77:a1
+ echo 'Using 68:d7:9a:1f:77:a1 as gateway mac'
Using 68:d7:9a:1f:77:a1 as gateway mac
+ z_swadm link create rear0 --speed 100G --fec RS
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link create rear0 --speed 100G --fec RS
+ z_swadm link create qsfp0 --speed 100G --fec RS
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link create qsfp0 --speed 100G --fec RS
+ z_swadm addr add rear0/0 fe80::aae1:deff:fe01:701c
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add rear0/0 fe80::aae1:deff:fe01:701c
+ z_swadm addr add qsfp0/0 fe80::aae1:deff:fe01:701d
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add qsfp0/0 fe80::aae1:deff:fe01:701d
+ z_swadm addr add rear0/0 fd00:99::1
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add rear0/0 fd00:99::1
+ z_swadm route add fd00:1122:3344:0101::/64 rear0/0 fe80::aae1:deff:fe00:1
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route add fd00:1122:3344:0101::/64 rear0/0 fe80::aae1:deff:fe00:1
+ z_swadm arp add fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp add fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01
+ z_swadm arp add 10.85.0.1 68:d7:9a:1f:77:a1
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp add 10.85.0.1 68:d7:9a:1f:77:a1
+ z_swadm route add 0.0.0.0/0 qsfp0/0 10.85.0.1
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route add 0.0.0.0/0 qsfp0/0 10.85.0.1
+ z_swadm link ls
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link ls
Port/Link Media Speed FEC Enabled Link MAC
rear0/0 Copper 100G RS true Up a8:40:25:46:55:e3
qsfp0/0 Copper 100G RS true Up a8:40:25:46:55:e4
+ z_swadm addr list
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr list
Link IPv4 IPv6
rear0/0 fe80::aae1:deff:fe01:701c
fd00:99::1
qsfp0/0 fe80::aae1:deff:fe01:701d
+ z_swadm route list
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route list
Subnet Port Link Gateway
0.0.0.0/0 qsfp0 0 10.85.0.1
fd00:1122:3344:101::/64 rear0 0 fe80::aae1:deff:fe00:1
+ z_swadm arp list
+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp list
host mac age
10.85.0.1 68:d7:9a:1f:77:a1 0s
fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01 0s
----

While following
https://github.com/oxidecomputer/omicron/blob/main/docs/how-to-run.adoc[how-to-run.adoc]
to set up IPs, images, disks, instances etc, pay particular attention to the
following.

- The address range in the IP pool should be on a subnet in your local network that
can NAT out to the Internet.
- Be sure to set up an external IP for the instance you create.
You will need to set up `proxy-arp` if your VM external IP addresses are on the
same L2 network as the router or other non-oxide hosts:
----
pfexec /opt/oxide/softnpu/stuff/scadm \
--server /opt/oxide/softnpu/stuff/server \
--client /opt/oxide/softnpu/stuff/client \
standalone \
add-proxy-arp \
$ip_pool_start \
$ip_pool_end \
$softnpu_mac
----

By the end, we have an instance up and running with external connectivity
configured via boundary services:
----
ry@korgano:~/omicron$ ~/propolis/target/release/propolis-cli --server fd00:1122:3344:101::c serial

debian login: root
Linux debian 5.10.0-9-amd64 #1 SMP Debian 5.10.70-1 (2021-09-30) x86_64

The programs included with the Debian GNU/Linux system are free software;
the exact distribution terms for each program are described in the
individual files in /usr/share/doc/*/copyright.
Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent
permitted by applicable law.
root@debian:~# host oxide.computer
oxide.computer has address 76.76.21.61
oxide.computer has address 76.76.21.22
oxide.computer mail is handled by 5 alt2.aspmx.l.google.com.
oxide.computer mail is handled by 1 aspmx.l.google.com.
oxide.computer mail is handled by 10 aspmx3.googlemail.com.
oxide.computer mail is handled by 5 alt1.aspmx.l.google.com.
oxide.computer mail is handled by 10 aspmx2.googlemail.com.
----
using the `swadm` binary located in the `oxz_switch` zone. This is not necessary
under normal operation, as the switch state will be managed automatically by the
control plane and networking daemons. An example script is provided in
`tools/scrimlet/softnpu-init.sh`. This script should work without modification
for basic development setups, but feel free to tweak it as needed.
10 changes: 7 additions & 3 deletions docs/networking.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -669,13 +669,13 @@ fdb0:a840:2504:352::/64 fe80::aa40:25ff:fe05:c UG 2 640 cxgbe0
fd00:1122:3344:1::/64 fe80::aa40:25ff:fe05:c UG 2 2401 cxgbe0
fd00:1122:3344:1::/64 fe80::aa40:25ff:fe05:40c UG 2 51 cxgbe1
fdb0:a840:2504:352::/64 fe80::aa40:25ff:fe05:40c UG 2 11090 cxgbe1
fd00:99::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0
fdb2:ceeb:3ab7:8c9d::1/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0
fdb0:a840:2504:1d1::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0
fdb0:a840:2504:393::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0
fdb0:a840:2504:191::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0
fdb0:a840:2504:353::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0
fd00:1122:3344:101::/64 fe80::aa40:25ff:fe05:c UG 2 634578 cxgbe0
fd00:99::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1
fd96:354:c1dc:606d::1/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1
fd00:1122:3344:101::/64 fe80::aa40:25ff:fe05:40c UG 2 14094545 cxgbe1
fdb0:a840:2504:1d1::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1
fdb0:a840:2504:353::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1
Expand Down Expand Up @@ -733,7 +733,11 @@ fd00:1122:3344:3::/64 fe80::aa40:25ff:fe05:c UG 2 2437 cxgbe0

Recall that cxgbe0 and cxgbe1 are connected to separate switches in the rack. So we're seeing the prefixes for the other sleds in this deployment. We have two routes to reach each sled: one through each switch. The gateway is the link-local address _of each switch_ on the corresponding link. One notable exception: the route for this same sled (`fd00:1122:3344:104::/64`) points to `underlay0`, the GZ's VNIC on the sled's underlay network. In this way, traffic leaving the GZ (whether it originated in this GZ or arrived from one of the switches) is directed to the sled's underlay network etherstub and from there to the right zone VNIC.

(Questions: Why does 107 only have one route? What are the `fd00:99::` routes?)
(Questions: Why does 107 only have one route?)

The `fdb2:ceeb:3ab7:8c9d::1/64` and `fd96:354:c1dc:606d::1/64` routes are
randomly generated boundary services tunnel endpoint addresses. See RFD 404 for
more details.

There are similar routes for other sleds' prefixes on the bootstrap network.

Expand Down
20 changes: 0 additions & 20 deletions illumos-utils/src/opte/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,6 @@ pub use oxide_vpc::api::DhcpCfg;
pub use oxide_vpc::api::Vni;
use std::net::IpAddr;

fn default_boundary_services() -> BoundaryServices {
use oxide_vpc::api::Ipv6Addr;
use oxide_vpc::api::MacAddr;
// TODO-completeness: Don't hardcode any of these values.
//
// Boundary Services will be started on several Sidecars during rack
// setup, and those addresses and VNIs will need to be propagated here.
// See https://github.com/oxidecomputer/omicron/issues/1382
let ip = Ipv6Addr::from([0xfd00, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01]);

// This MAC address is entirely irrelevant to the functionality of OPTE and
// the Oxide VPC. It's never used to actually forward packets. It only
// represents the "logical" destination of Boundary Services as a
// destination that OPTE as a virtual gateway forwards packets to as its
// next hop.
let mac = MacAddr::from_const([0xa8, 0x40, 0x25, 0xf9, 0x99, 0x99]);
let vni = Vni::new(99_u32).unwrap();
BoundaryServices { ip, mac, vni }
}

/// Information about the gateway for an OPTE port
#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
Expand Down
3 changes: 0 additions & 3 deletions illumos-utils/src/opte/port_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

//! Manager for all OPTE ports on a Helios system
use crate::opte::default_boundary_services;
use crate::opte::opte_firewall_rules;
use crate::opte::params::DeleteVirtualNetworkInterfaceHost;
use crate::opte::params::SetVirtualNetworkInterfaceHost;
Expand Down Expand Up @@ -110,7 +109,6 @@ impl PortManager {
let subnet = IpNetwork::from(nic.subnet);
let vpc_subnet = IpCidr::from(subnet);
let gateway = Gateway::from_subnet(&subnet);
let boundary_services = default_boundary_services();

// Describe the external IP addresses for this port.
macro_rules! ip_cfg {
Expand Down Expand Up @@ -219,7 +217,6 @@ impl PortManager {
gateway_mac: MacAddr::from(gateway.mac.into_array()),
vni,
phys_ip: self.inner.underlay_ip.into(),
boundary_services,
};

// Create the xde device.
Expand Down
Loading

0 comments on commit 9ac047e

Please sign in to comment.