diff --git a/docs/how-to-run-simulated.adoc b/docs/how-to-run-simulated.adoc index de19b70f04..86f7a0915b 100644 --- a/docs/how-to-run-simulated.adoc +++ b/docs/how-to-run-simulated.adoc @@ -94,6 +94,10 @@ omicron-dev: external DNS: [::1]:54342 === Running the pieces by hand +There are many reasons it's useful to run the pieces of the stack by hand, especially during development and debugging: to test stopping and starting a component while the rest of the stack remains online; to run one component in a custom environment; to use a custom binary; to use a custom config file; to run under the debugger or with extra tracing enabled; etc. + +CAUTION: This process does not currently work. See https://github.com/oxidecomputer/omicron/issues/4421[omicron#4421] for details. The pieces here may still be useful for reference. + . Start CockroachDB using `omicron-dev db-run`: + [source,text] @@ -181,6 +185,8 @@ omicron-dev: using /tmp/.tmpFH6v8h and /tmp/.tmpkUjDji for ClickHouse data stora $ cargo run --bin=nexus -- nexus/examples/config.toml ---- Nexus can also serve the web console. Instructions for downloading (or building) the console's static assets and pointing Nexus to them are https://github.com/oxidecomputer/console/blob/main/docs/serve-from-nexus.md[here]. Without console assets, Nexus will still start and run normally as an API. A few link:./nexus/src/external_api/console_api.rs[console-specific routes] will 404. ++ +CAUTION: This step does not currently work. See https://github.com/oxidecomputer/omicron/issues/4421[omicron#4421] for details. . `dns-server` is run similar to Nexus, except that the bind addresses are specified on the command line: + @@ -207,9 +213,98 @@ Dec 02 18:00:01.093 DEBG registered endpoint, path: /producers, method: POST, lo ... ---- +=== Using both `omicron-dev run-all` and running Nexus manually + +While it's often useful to run _some_ part of the stack by hand (see above), if you only want to run your own Nexus, one option is to run `omicron-dev run-all` first to get a whole simulated stack up, then run a second Nexus by hand with a custom config file. + +To do this, first run `omicron-dev run-all`: + +[source,text] +---- +$ cargo run --bin=omicron-dev -- run-all + Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.95s + Running `target/debug/omicron-dev run-all` +omicron-dev: setting up all services ... +log file: /dangerzone/omicron_tmp/omicron-dev-omicron-dev.29765.0.log +note: configured to log to "/dangerzone/omicron_tmp/omicron-dev-omicron-dev.29765.0.log" +DB URL: postgresql://root@[::1]:43256/omicron?sslmode=disable +DB address: [::1]:43256 +log file: /dangerzone/omicron_tmp/omicron-dev-omicron-dev.29765.2.log +note: configured to log to "/dangerzone/omicron_tmp/omicron-dev-omicron-dev.29765.2.log" +log file: /dangerzone/omicron_tmp/omicron-dev-omicron-dev.29765.3.log +note: configured to log to "/dangerzone/omicron_tmp/omicron-dev-omicron-dev.29765.3.log" +omicron-dev: services are running. +omicron-dev: nexus external API: 127.0.0.1:12220 +omicron-dev: nexus internal API: [::1]:12221 +omicron-dev: cockroachdb pid: 29769 +omicron-dev: cockroachdb URL: postgresql://root@[::1]:43256/omicron?sslmode=disable +omicron-dev: cockroachdb directory: /dangerzone/omicron_tmp/.tmpikyLO8 +omicron-dev: internal DNS HTTP: http://[::1]:39841 +omicron-dev: internal DNS: [::1]:54025 +omicron-dev: external DNS name: oxide-dev.test +omicron-dev: external DNS HTTP: http://[::1]:63482 +omicron-dev: external DNS: [::1]:45276 +omicron-dev: e.g. `dig @::1 -p 45276 test-suite-silo.sys.oxide-dev.test` +omicron-dev: management gateway: http://[::1]:49188 (switch0) +omicron-dev: management gateway: http://[::1]:39352 (switch1) +omicron-dev: silo name: test-suite-silo +omicron-dev: privileged user name: test-privileged +---- + +You'll need to note: + +* the TCP ports for the two management gateways (`49188` and `39352` here for switch0 and switch1, respectively) +* the TCP port for internal DNS (`54025` here) +* the TCP port in the CockroachDB URL (`43256` here) + +Next, you'll need to customize the Nexus configuration file. Start with nexus/examples/config-second.toml (_not_ nexus/examples/config.toml, which uses various values that conflict with what `omicron-dev run-all` uses). You should only need to modify the block at the **bottom** of the file: + +[source,toml] +---- +################################################################################ +# INSTRUCTIONS: To run Nexus against an existing stack started with # +# `omicron-dev run-all`, you should only have to modify values in this # +# section. # +# # +# Modify the port numbers below based on the output of `omicron-dev run-all` # +################################################################################ + +[mgd] +# Look for "management gateway: http://[::1]:49188 (switch0)" +# The "http://" does not go in this string -- just the socket address. +switch0.address = "[::1]:49188" + +# Look for "management gateway: http://[::1]:39352 (switch1)" +# The "http://" does not go in this string -- just the socket address. +switch1.address = "[::1]:39352" + +[deployment.internal_dns] +# Look for "internal DNS: [::1]:54025" +# and adjust the port number below. +address = "[::1]:54025" +# You should not need to change this. +type = "from_address" + +[deployment.database] +# Look for "cockroachdb URL: postgresql://root@[::1]:43256/omicron?sslmode=disable" +# and adjust the port number below. +url = "postgresql://root@[::1]:43256/omicron?sslmode=disable" +# You should not need to change this. +type = "from_url" +################################################################################ +---- + +So it's: + +* Copy the example config file: `cp nexus/examples/config-second.toml config-second.toml` +* Edit as described above: `vim config-second.toml` +* Start Nexus like above, but with this config file: `cargo run --bin=nexus -- config-second.toml` + +=== Using the stack + Once everything is up and running, you can use the system in a few ways: -* Use the browser-based console. The Nexus log output will show what IP address and port it's listening on. This is also configured in the config file. If you're using the defaults, you can reach the console at `http://127.0.0.1:12220/projects`. Depending on the environment where you're running this, you may need an ssh tunnel or the like to reach this from your browser. +* Use the browser-based console. The Nexus log output will show what IP address and port it's listening on. This is also configured in the config file. If you're using the defaults with `omicron-dev run-all`, you can reach the console at `http://127.0.0.1:12220/projects`. If you ran a second Nexus using the `config-second.toml` config file, it will be on port `12222` instead (because that config file specifies port 12222). Depending on the environment where you're running this, you may need an ssh tunnel or the like to reach this from your browser. * Use the xref:cli.adoc[`oxide` CLI]. == Running with TLS diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 5ca1d2d6ed..4bdee4ab4e 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -1174,6 +1174,12 @@ mod test { let example_config = NexusConfig::from_file(config_path) .expect("example config file is not valid"); + // The second example config file should be valid. + let config_path = "../nexus/examples/config-second.toml"; + println!("checking {:?}", config_path); + let _ = NexusConfig::from_file(config_path) + .expect("second example config file is not valid"); + // The config file used for the tests should also be valid. The tests // won't clear the runway anyway if this file isn't valid. But it's // helpful to verify this here explicitly as well. diff --git a/nexus/examples/config-second.toml b/nexus/examples/config-second.toml new file mode 100644 index 0000000000..5dadb329cd --- /dev/null +++ b/nexus/examples/config-second.toml @@ -0,0 +1,180 @@ +# +# Example configuration file for running a second Nexus instance locally +# alongside the stack started by `omicron-dev run-all`. See the +# how-to-run-simulated instructions for details. +# + +################################################################################ +# INSTRUCTIONS: To run Nexus against an existing stack started with # +# `omicron-dev run-all`, see the very bottom of this file. # +################################################################################ + +[console] +# Directory for static assets. Absolute path or relative to CWD. +static_dir = "out/console-assets" +session_idle_timeout_minutes = 480 # 8 hours +session_absolute_timeout_minutes = 1440 # 24 hours + +# List of authentication schemes to support. +[authn] +schemes_external = ["session_cookie", "access_token"] + +[log] +# Show log messages of this level and more severe +level = "info" + +# Example output to a terminal (with colors) +mode = "stderr-terminal" + +# Example output to a file, appending if it already exists. +#mode = "file" +#path = "logs/server.log" +#if_exists = "append" + +# Configuration for interacting with the timeseries database +[timeseries_db] +address = "[::1]:8123" + + + +[deployment] +# Identifier for this instance of Nexus +id = "a4ef738a-1fb0-47b1-9da2-4919c7ec7c7f" +rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" +# Since we expect to be the second instance of Nexus running on this system, +# pick any available port. +techport_external_server_port = 0 + +# Nexus may need to resolve external hosts (e.g. to grab IdP metadata). +# These are the DNS servers it should use. +external_dns_servers = ["1.1.1.1", "9.9.9.9"] + +[deployment.dropshot_external] +# IP Address and TCP port on which to listen for the external API +# This config file uses 12222 to avoid colliding with the usual 12220 that's +# used by `omicron-dev run-all` +bind_address = "127.0.0.1:12222" +# Allow large request bodies to support uploading TUF archives. The number here +# is picked based on the typical size for tuf-mupdate.zip as of 2024-01 +# (~1.5GiB) and multiplying it by 2. +# +# This should be brought back down to a more reasonable value once per-endpoint +# request body limits are implemented. +request_body_max_bytes = 3221225472 +# To have Nexus's external HTTP endpoint use TLS, uncomment the line below. You +# will also need to provide an initial TLS certificate during rack +# initialization. If you're using this config file, you're probably running a +# simulated system. In that case, the initial certificate is provided to the +# simulated sled agent (acting as RSS) via command-line arguments. +#tls = true + +[deployment.dropshot_internal] +# IP Address and TCP port on which to listen for the internal API +# This config file uses 12223 to avoid colliding with the usual 12221 that's +# used by `omicron-dev run-all` +bind_address = "[::1]:12223" +request_body_max_bytes = 1048576 + +#[deployment.internal_dns] +## These values are overridden at the bottom of this file. +#type = "from_address" +#address = "[::1]:3535" + +#[deployment.database] +## These values are overridden at the bottom of this file. +#type = "from_url" +#url = "postgresql://root@[::1]:32221/omicron?sslmode=disable" + +# Tunable configuration parameters, for testing or experimentation +[tunables] + +# The maximum allowed prefix (thus smallest size) for a VPC Subnet's +# IPv4 subnetwork. This size allows for ~60 hosts. +max_vpc_ipv4_subnet_prefix = 26 + +# Configuration for interacting with the dataplane daemon +[dendrite.switch0] +address = "[::1]:12224" + +[background_tasks] +dns_internal.period_secs_config = 60 +dns_internal.period_secs_servers = 60 +dns_internal.period_secs_propagation = 60 +dns_internal.max_concurrent_server_updates = 5 +dns_external.period_secs_config = 60 +dns_external.period_secs_servers = 60 +dns_external.period_secs_propagation = 60 +dns_external.max_concurrent_server_updates = 5 +metrics_producer_gc.period_secs = 60 +# How frequently we check the list of stored TLS certificates. This is +# approximately an upper bound on how soon after updating the list of +# certificates it will take _other_ Nexus instances to notice and stop serving +# them (on a sunny day). +external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 +bfd_manager.period_secs = 30 +# How frequently to collect hardware/software inventory from the whole system +# (even if we don't have reason to believe anything has changed). +inventory.period_secs = 600 +# Maximum number of past collections to keep in the database +inventory.nkeep = 5 +# Disable inventory collection altogether (for emergencies) +inventory.disable = false +phantom_disks.period_secs = 30 +physical_disk_adoption.period_secs = 30 +blueprints.period_secs_load = 10 +blueprints.period_secs_execute = 60 +blueprints.period_secs_collect_crdb_node_ids = 180 +sync_service_zone_nat.period_secs = 30 +switch_port_settings_manager.period_secs = 30 +region_replacement.period_secs = 30 +region_replacement_driver.period_secs = 10 +# How frequently to query the status of active instances. +instance_watcher.period_secs = 30 +service_firewall_propagation.period_secs = 300 +v2p_mapping_propagation.period_secs = 30 +abandoned_vmm_reaper.period_secs = 60 +lookup_region_port.period_secs = 60 + +[default_region_allocation_strategy] +# allocate region on 3 random distinct zpools, on 3 random distinct sleds. +type = "random_with_distinct_sleds" + +# the same as random_with_distinct_sleds, but without requiring distinct sleds +# type = "random" + +# setting `seed` to a fixed value will make dataset selection ordering use the +# same shuffling order for every region allocation. +# seed = 0 + +################################################################################ +# INSTRUCTIONS: To run Nexus against an existing stack started with # +# `omicron-dev run-all`, you should only have to modify values in this # +# section. # +# # +# Modify the port numbers below based on the output of `omicron-dev run-all` # +################################################################################ + +[mgd] +# Look for "management gateway: http://[::1]:49188 (switch0)" +# The "http://" does not go in this string -- just the socket address. +switch0.address = "[::1]:49188" + +# Look for "management gateway: http://[::1]:39352 (switch1)" +# The "http://" does not go in this string -- just the socket address. +switch1.address = "[::1]:39352" + +[deployment.internal_dns] +# Look for "internal DNS: [::1]:54025" +# and adjust the port number below. +address = "[::1]:54025" +# You should not need to change this. +type = "from_address" + +[deployment.database] +# Look for "cockroachdb URL: postgresql://root@[::1]:43256/omicron?sslmode=disable" +# and adjust the port number below. +url = "postgresql://root@[::1]:43256/omicron?sslmode=disable" +# You should not need to change this. +type = "from_url" +################################################################################ diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 38cdac5fcb..18efe40e27 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -118,7 +118,7 @@ pub struct ControlPlaneTestContext { pub sled_agent2: sim::Server, pub oximeter: Oximeter, pub producer: ProducerServer, - pub gateway: HashMap, + pub gateway: BTreeMap, pub dendrite: HashMap, pub mgd: HashMap, pub external_dns_zone_name: String, @@ -280,7 +280,7 @@ pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> { pub sled_agent2: Option, pub oximeter: Option, pub producer: Option, - pub gateway: HashMap, + pub gateway: BTreeMap, pub dendrite: HashMap, pub mgd: HashMap, @@ -330,7 +330,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { sled_agent2: None, oximeter: None, producer: None, - gateway: HashMap::new(), + gateway: BTreeMap::new(), dendrite: HashMap::new(), mgd: HashMap::new(), nexus_internal: None, diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 8415a192b1..dfcaec2157 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -37,7 +37,7 @@ max_vpc_ipv4_subnet_prefix = 29 [deployment] # Identifier for this instance of Nexus. # NOTE: The test suite always overrides this. -id = "e6bff1ff-24fb-49dc-a54e-c6a350cd4d6c" +id = "913233fe-92a8-4635-9572-183f495429c4" rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" techport_external_server_port = 0