diff --git a/Cargo.lock b/Cargo.lock index 84669a13e7..4bbdd23053 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1929,6 +1929,7 @@ dependencies = [ "camino-tempfile", "chrono", "clap", + "dns-server-api", "dns-service-client", "dropshot", "expectorate", @@ -1958,6 +1959,17 @@ dependencies = [ "uuid", ] +[[package]] +name = "dns-server-api" +version = "0.1.0" +dependencies = [ + "chrono", + "dropshot", + "omicron-workspace-hack", + "schemars", + "serde", +] + [[package]] name = "dns-service-client" version = "0.1.0" @@ -6106,6 +6118,7 @@ dependencies = [ "atomicwrites", "camino", "clap", + "dns-server-api", "dropshot", "fs-err", "indent_write", diff --git a/Cargo.toml b/Cargo.toml index e5783b39eb..c417b85ad0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ members = [ "dev-tools/releng", "dev-tools/xtask", "dns-server", + "dns-server-api", "end-to-end-tests", "gateway-cli", "gateway-test-utils", @@ -119,6 +120,7 @@ default-members = [ # hakari to not work as well and build times to be longer. # See omicron#4392. "dns-server", + "dns-server-api", # Do not include end-to-end-tests in the list of default members, as its # tests only work on a deployed control plane. "gateway-cli", @@ -279,6 +281,7 @@ derive-where = "1.2.7" diesel = { version = "2.1.6", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } diesel-dtrace = { git = "https://github.com/oxidecomputer/diesel-dtrace", branch = "main" } dns-server = { path = "dns-server" } +dns-server-api = { path = "dns-server-api" } dns-service-client = { path = "clients/dns-service-client" } dpd-client = { path = "clients/dpd-client" } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } diff --git a/dev-tools/openapi-manager/Cargo.toml b/dev-tools/openapi-manager/Cargo.toml index b50aeec69f..1534181e9c 100644 --- a/dev-tools/openapi-manager/Cargo.toml +++ b/dev-tools/openapi-manager/Cargo.toml @@ -12,6 +12,7 @@ anyhow.workspace = true atomicwrites.workspace = true camino.workspace = true clap.workspace = true +dns-server-api.workspace = true dropshot.workspace = true fs-err.workspace = true indent_write.workspace = true diff --git a/dev-tools/openapi-manager/src/spec.rs b/dev-tools/openapi-manager/src/spec.rs index 37330d6922..53f3260ca9 100644 --- a/dev-tools/openapi-manager/src/spec.rs +++ b/dev-tools/openapi-manager/src/spec.rs @@ -14,6 +14,16 @@ use openapiv3::OpenAPI; /// All APIs managed by openapi-manager. pub fn all_apis() -> Vec { vec![ + ApiSpec { + title: "Internal DNS".to_string(), + version: "0.0.1".to_string(), + description: "API for the internal DNS server".to_string(), + boundary: ApiBoundary::Internal, + api_description: + dns_server_api::dns_server_api::stub_api_description, + filename: "dns-server.json".to_string(), + extra_validation: None, + }, ApiSpec { title: "Nexus internal API".to_string(), version: "0.0.1".to_string(), diff --git a/dns-server-api/Cargo.toml b/dns-server-api/Cargo.toml new file mode 100644 index 0000000000..c87af14e0d --- /dev/null +++ b/dns-server-api/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "dns-server-api" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +chrono.workspace = true +dropshot.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true diff --git a/dns-server-api/src/lib.rs b/dns-server-api/src/lib.rs new file mode 100644 index 0000000000..2c59caf0c5 --- /dev/null +++ b/dns-server-api/src/lib.rs @@ -0,0 +1,160 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Dropshot API for configuring DNS namespace. +//! +//! ## Shape of the API +//! +//! The DNS configuration API has just two endpoints: PUT and GET of the entire +//! DNS configuration. This is pretty anti-REST. But it's important to think +//! about how this server fits into the rest of the system. When changes are +//! made to DNS data, they're grouped together and assigned a monotonically +//! increasing generation number. The DNS data is first stored into CockroachDB +//! and then propagated from a distributed fleet of Nexus instances to a +//! distributed fleet of these DNS servers. If we accepted individual updates to +//! DNS names, then propagating a particular change would be non-atomic, and +//! Nexus would have to do a lot more work to ensure (1) that all changes were +//! propagated (even if it crashes) and (2) that they were propagated in the +//! correct order (even if two Nexus instances concurrently propagate separate +//! changes). +//! +//! This DNS server supports hosting multiple zones. We could imagine supporting +//! separate endpoints to update the DNS data for a particular zone. That feels +//! nicer (although it's not clear what it would buy us). But as with updates to +//! multiple names, Nexus's job is potentially much easier if the entire state +//! for all zones is updated at once. (Otherwise, imagine how Nexus would +//! implement _renaming_ one zone to another without loss of service. With +//! a combined endpoint and generation number for all zones, all that's necessary +//! is to configure a new zone with all the same names, and then remove the old +//! zone later in another update. That can be managed by the same mechanism in +//! Nexus that manages regular name updates. On the other hand, if there were +//! separate endpoints with separate generation numbers, then Nexus has more to +//! keep track of in order to do the rename safely.) +//! +//! See RFD 367 for more on DNS propagation. +//! +//! ## ETags and Conditional Requests +//! +//! It's idiomatic in HTTP use ETags and conditional requests to provide +//! synchronization. We could define an ETag to be just the current generation +//! number of the server and honor standard `if-match` headers to fail requests +//! where the generation number doesn't match what the client expects. This +//! would be fine, but it's rather annoying: +//! +//! 1. When the client wants to propagate generation X, the client would have +//! make an extra request just to fetch the current ETag, just so it can put +//! it into the conditional request. +//! +//! 2. If some other client changes the configuration in the meantime, the +//! conditional request would fail and the client would have to take another +//! lap (fetching the current config and potentially making another +//! conditional PUT). +//! +//! 3. This approach would make synchronization opt-in. If a client (or just +//! one errant code path) neglected to set the if-match header, we could do +//! the wrong thing and cause the system to come to rest with the wrong DNS +//! data. +//! +//! Since the semantics here are so simple (we only ever want to move the +//! generation number forward), we don't bother with ETags or conditional +//! requests. Instead we have the server implement the behavior we want, which +//! is that when a request comes in to update DNS data to generation X, the +//! server replies with one of: +//! +//! (1) the update has been applied and the server is now running generation X +//! (client treats this as success) +//! +//! (2) the update was not applied because the server is already at generation X +//! (client treats this as success) +//! +//! (3) the update was not applied because the server is already at a newer +//! generation +//! (client probably starts the whole propagation process over because its +//! current view of the world is out of date) +//! +//! This way, the DNS data can never move backwards and the client only ever has +//! to make one request. +//! +//! ## Concurrent updates +//! +//! Given that we've got just one API to update the all DNS zones, and given +//! that might therefore take a minute for a large zone, and also that there may +//! be multiple Nexus instances trying to do it at the same time, we need to +//! think a bit about what should happen if two Nexus do try to do it at the same +//! time. Spoiler: we immediately fail any request to update the DNS data if +//! there's already an update in progress. +//! +//! What else could we do? We could queue the incoming request behind the +//! in-progress one. How large do we allow that queue to grow? At some point +//! we'll need to stop queueing them. So why bother at all? + +use std::{ + collections::HashMap, + net::{Ipv4Addr, Ipv6Addr}, +}; + +use dropshot::{HttpError, HttpResponseOk, RequestContext}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[dropshot::api_description] +pub trait DnsServerApi { + type Context; + + #[endpoint( + method = GET, + path = "/config", + )] + async fn dns_config_get( + rqctx: RequestContext, + ) -> Result, HttpError>; + + #[endpoint( + method = PUT, + path = "/config", + )] + async fn dns_config_put( + rqctx: RequestContext, + rq: dropshot::TypedBody, + ) -> Result; +} + +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct DnsConfigParams { + pub generation: u64, + pub time_created: chrono::DateTime, + pub zones: Vec, +} + +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct DnsConfig { + pub generation: u64, + pub time_created: chrono::DateTime, + pub time_applied: chrono::DateTime, + pub zones: Vec, +} + +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] +pub struct DnsConfigZone { + pub zone_name: String, + pub records: HashMap>, +} + +#[allow(clippy::upper_case_acronyms)] +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +#[serde(tag = "type", content = "data")] +pub enum DnsRecord { + A(Ipv4Addr), + AAAA(Ipv6Addr), + SRV(SRV), +} + +#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] +#[serde(rename = "Srv")] +pub struct SRV { + pub prio: u16, + pub weight: u16, + pub port: u16, + pub target: String, +} diff --git a/dns-server/Cargo.toml b/dns-server/Cargo.toml index 237d2a2fbb..d11dabaf85 100644 --- a/dns-server/Cargo.toml +++ b/dns-server/Cargo.toml @@ -12,6 +12,7 @@ anyhow.workspace = true camino.workspace = true chrono.workspace = true clap.workspace = true +dns-server-api.workspace = true dns-service-client.workspace = true dropshot.workspace = true http.workspace = true diff --git a/dns-server/src/bin/apigen.rs b/dns-server/src/bin/apigen.rs deleted file mode 100644 index e130ee0211..0000000000 --- a/dns-server/src/bin/apigen.rs +++ /dev/null @@ -1,29 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Generate the OpenAPI spec for the DNS server - -use anyhow::{bail, Result}; -use dns_server::http_server::api; -use std::fs::File; -use std::io; - -fn usage(args: &[String]) -> String { - format!("{} [output path]", args[0]) -} - -fn main() -> Result<()> { - let args: Vec = std::env::args().collect(); - - let mut out = match args.len() { - 1 => Box::new(io::stdout()) as Box, - 2 => Box::new(File::create(args[1].clone())?) as Box, - _ => bail!(usage(&args)), - }; - - let api = api(); - let openapi = api.openapi("Internal DNS", "v0.1.0"); - openapi.write(&mut out)?; - Ok(()) -} diff --git a/dns-server/src/dns_server.rs b/dns-server/src/dns_server.rs index 01a8430b62..5c761f2aa3 100644 --- a/dns-server/src/dns_server.rs +++ b/dns-server/src/dns_server.rs @@ -7,12 +7,12 @@ //! The facilities here handle binding a UDP socket, receiving DNS messages on //! that socket, and replying to them. -use crate::dns_types::DnsRecord; use crate::storage; use crate::storage::QueryError; use crate::storage::Store; use anyhow::anyhow; use anyhow::Context; +use dns_server_api::DnsRecord; use pretty_hex::*; use serde::Deserialize; use slog::{debug, error, info, o, trace, Logger}; @@ -234,12 +234,7 @@ fn dns_record_to_record( Ok(aaaa) } - DnsRecord::SRV(crate::dns_types::SRV { - prio, - weight, - port, - target, - }) => { + DnsRecord::SRV(dns_server_api::SRV { prio, weight, port, target }) => { let tgt = Name::from_str(&target).map_err(|error| { RequestError::ServFail(anyhow!( "serialization failed due to bad SRV target {:?}: {:#}", diff --git a/dns-server/src/dns_types.rs b/dns-server/src/dns_types.rs deleted file mode 100644 index 941124feb6..0000000000 --- a/dns-server/src/dns_types.rs +++ /dev/null @@ -1,50 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! types describing DNS records and configuration - -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::net::Ipv4Addr; -use std::net::Ipv6Addr; - -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct DnsConfigParams { - pub generation: u64, - pub time_created: chrono::DateTime, - pub zones: Vec, -} - -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct DnsConfig { - pub generation: u64, - pub time_created: chrono::DateTime, - pub time_applied: chrono::DateTime, - pub zones: Vec, -} - -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct DnsConfigZone { - pub zone_name: String, - pub records: HashMap>, -} - -#[allow(clippy::upper_case_acronyms)] -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] -#[serde(tag = "type", content = "data")] -pub enum DnsRecord { - A(Ipv4Addr), - AAAA(Ipv6Addr), - SRV(SRV), -} - -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)] -#[serde(rename = "Srv")] -pub struct SRV { - pub prio: u16, - pub weight: u16, - pub port: u16, - pub target: String, -} diff --git a/dns-server/src/http_server.rs b/dns-server/src/http_server.rs index e50346d828..84ffbc90e9 100644 --- a/dns-server/src/http_server.rs +++ b/dns-server/src/http_server.rs @@ -4,102 +4,12 @@ //! Dropshot server for configuring DNS namespace -// Shape of the API -// ------------------------------ -// -// The DNS configuration API has just two endpoints: PUT and GET of the entire -// DNS configuration. This is pretty anti-REST. But it's important to think -// about how this server fits into the rest of the system. When changes are -// made to DNS data, they're grouped together and assigned a monotonically -// increasing generation number. The DNS data is first stored into CockroachDB -// and then propagated from a distributed fleet of Nexus instances to a -// distributed fleet of these DNS servers. If we accepted individual updates to -// DNS names, then propagating a particular change would be non-atomic, and -// Nexus would have to do a lot more work to ensure (1) that all changes were -// propagated (even if it crashes) and (2) that they were propagated in the -// correct order (even if two Nexus instances concurrently propagate separate -// changes). -// -// This DNS server supports hosting multiple zones. We could imagine supporting -// separate endpoints to update the DNS data for a particular zone. That feels -// nicer (although it's not clear what it would buy us). But as with updates to -// multiple names, Nexus's job is potentially much easier if the entire state -// for all zones is updated at once. (Otherwise, imagine how Nexus would -// implement _renaming_ one zone to another without loss of service. With -// a combined endpoint and generation number for all zones, all that's necessary -// is to configure a new zone with all the same names, and then remove the old -// zone later in another update. That can be managed by the same mechanism in -// Nexus that manages regular name updates. On the other hand, if there were -// separate endpoints with separate generation numbers, then Nexus has more to -// keep track of in order to do the rename safely.) -// -// See RFD 367 for more on DNS propagation. -// -// -// ETags and Conditional Requests -// ------------------------------ -// -// It's idiomatic in HTTP use ETags and conditional requests to provide -// synchronization. We could define an ETag to be just the current generation -// number of the server and honor standard `if-match` headers to fail requests -// where the generation number doesn't match what the client expects. This -// would be fine, but it's rather annoying: -// -// (1) When the client wants to propagate generation X, the client would have -// make an extra request just to fetch the current ETag, just so it can put -// it into the conditional request. -// -// (2) If some other client changes the configuration in the meantime, the -// conditional request would fail and the client would have to take another -// lap (fetching the current config and potentially making another -// conditional PUT). -// -// (3) This approach would make synchronization opt-in. If a client (or just -// one errant code path) neglected to set the if-match header, we could do -// the wrong thing and cause the system to come to rest with the wrong DNS -// data. -// -// Since the semantics here are so simple (we only ever want to move the -// generation number forward), we don't bother with ETags or conditional -// requests. Instead we have the server implement the behavior we want, which -// is that when a request comes in to update DNS data to generation X, the -// server replies with one of: -// -// (1) the update has been applied and the server is now running generation X -// (client treats this as success) -// -// (2) the update was not applied because the server is already at generation X -// (client treats this as success) -// -// (3) the update was not applied because the server is already at a newer -// generation -// (client probably starts the whole propagation process over because its -// current view of the world is out of date) -// -// This way, the DNS data can never move backwards and the client only ever has -// to make one request. -// -// -// Concurrent updates -// ------------------ -// -// Given that we've got just one API to update the all DNS zones, and given -// that might therefore take a minute for a large zone, and also that there may -// be multiple Nexus instances trying to do it at the same time, we need to -// think a bit about what should happen if two Nexus do try to do it at the same -// time. Spoiler: we immediately fail any request to update the DNS data if -// there's already an update in progress. -// -// What else could we do? We could queue the incoming request behind the -// in-progress one. How large do we allow that queue to grow? At some point -// we'll need to stop queueing them. So why bother at all? - -use crate::dns_types::{DnsConfig, DnsConfigParams}; use crate::storage::{self, UpdateError}; +use dns_server_api::{DnsConfig, DnsConfigParams, DnsServerApi}; use dns_service_client::{ ERROR_CODE_BAD_UPDATE_GENERATION, ERROR_CODE_UPDATE_IN_PROGRESS, }; -use dropshot::{endpoint, RequestContext}; +use dropshot::RequestContext; pub struct Context { store: storage::Store, @@ -112,41 +22,40 @@ impl Context { } pub fn api() -> dropshot::ApiDescription { - let mut api = dropshot::ApiDescription::new(); - - api.register(dns_config_get).expect("register dns_config_get"); - api.register(dns_config_put).expect("register dns_config_update"); - api + dns_server_api::dns_server_api::api_description::() + .expect("registered DNS server entrypoints") } -#[endpoint( - method = GET, - path = "/config", -)] -async fn dns_config_get( - rqctx: RequestContext, -) -> Result, dropshot::HttpError> { - let apictx = rqctx.context(); - let config = apictx.store.dns_config().await.map_err(|e| { - dropshot::HttpError::for_internal_error(format!( - "internal error: {:?}", - e - )) - })?; - Ok(dropshot::HttpResponseOk(config)) -} +enum DnsServerApiImpl {} + +impl DnsServerApi for DnsServerApiImpl { + type Context = Context; -#[endpoint( - method = PUT, - path = "/config", -)] -async fn dns_config_put( - rqctx: RequestContext, - rq: dropshot::TypedBody, -) -> Result { - let apictx = rqctx.context(); - apictx.store.dns_config_update(&rq.into_inner(), &rqctx.request_id).await?; - Ok(dropshot::HttpResponseUpdatedNoContent()) + async fn dns_config_get( + rqctx: RequestContext, + ) -> Result, dropshot::HttpError> { + let apictx = rqctx.context(); + let config = apictx.store.dns_config().await.map_err(|e| { + dropshot::HttpError::for_internal_error(format!( + "internal error: {:?}", + e + )) + })?; + Ok(dropshot::HttpResponseOk(config)) + } + + async fn dns_config_put( + rqctx: RequestContext, + rq: dropshot::TypedBody, + ) -> Result + { + let apictx = rqctx.context(); + apictx + .store + .dns_config_update(&rq.into_inner(), &rqctx.request_id) + .await?; + Ok(dropshot::HttpResponseUpdatedNoContent()) + } } impl From for dropshot::HttpError { diff --git a/dns-server/src/lib.rs b/dns-server/src/lib.rs index ea8625a667..a2b1fda0d7 100644 --- a/dns-server/src/lib.rs +++ b/dns-server/src/lib.rs @@ -43,7 +43,6 @@ //! the persistent DNS data pub mod dns_server; -pub mod dns_types; pub mod http_server; pub mod storage; diff --git a/dns-server/src/storage.rs b/dns-server/src/storage.rs index 21fb9ebdc6..85b2e79b8b 100644 --- a/dns-server/src/storage.rs +++ b/dns-server/src/storage.rs @@ -92,9 +92,9 @@ // backwards-compatible way (but obviously one wouldn't get the scaling benefits // while continuing to use the old API). -use crate::dns_types::{DnsConfig, DnsConfigParams, DnsConfigZone, DnsRecord}; use anyhow::{anyhow, Context}; use camino::Utf8PathBuf; +use dns_server_api::{DnsConfig, DnsConfigParams, DnsConfigZone, DnsRecord}; use serde::{Deserialize, Serialize}; use sled::transaction::ConflictableTransactionError; use slog::{debug, error, info, o, warn}; @@ -777,13 +777,13 @@ impl<'a, 'b> Drop for UpdateGuard<'a, 'b> { #[cfg(test)] mod test { use super::{Config, Store, UpdateError}; - use crate::dns_types::DnsConfigParams; - use crate::dns_types::DnsConfigZone; - use crate::dns_types::DnsRecord; use crate::storage::QueryError; use anyhow::Context; use camino::Utf8PathBuf; use camino_tempfile::Utf8TempDir; + use dns_server_api::DnsConfigParams; + use dns_server_api::DnsConfigZone; + use dns_server_api::DnsRecord; use omicron_test_utils::dev::test_setup_log; use std::collections::BTreeSet; use std::collections::HashMap; diff --git a/dns-server/tests/openapi_test.rs b/dns-server/tests/openapi_test.rs deleted file mode 100644 index 490680eda4..0000000000 --- a/dns-server/tests/openapi_test.rs +++ /dev/null @@ -1,27 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use expectorate::assert_contents; -use omicron_test_utils::dev::test_cmds::assert_exit_code; -use omicron_test_utils::dev::test_cmds::path_to_executable; -use omicron_test_utils::dev::test_cmds::run_command; -use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; -use openapiv3::OpenAPI; -use subprocess::Exec; - -const CMD_API_GEN: &str = env!("CARGO_BIN_EXE_apigen"); - -#[test] -fn test_dns_server_openapi() { - let exec = Exec::cmd(path_to_executable(CMD_API_GEN)); - let (exit_status, stdout, stderr) = run_command(exec); - assert_exit_code(exit_status, EXIT_SUCCESS, &stderr); - - let spec: OpenAPI = - serde_json::from_str(&stdout).expect("stdout was not valid OpenAPI"); - let errors = openapi_lint::validate(&spec); - assert!(errors.is_empty(), "{}", errors.join("\n\n")); - - assert_contents("../openapi/dns-server.json", &stdout); -} diff --git a/openapi/dns-server.json b/openapi/dns-server.json index 1b02199b76..0252c1538a 100644 --- a/openapi/dns-server.json +++ b/openapi/dns-server.json @@ -2,7 +2,12 @@ "openapi": "3.0.3", "info": { "title": "Internal DNS", - "version": "v0.1.0" + "description": "API for the internal DNS server", + "contact": { + "url": "https://oxide.computer", + "email": "api@oxide.computer" + }, + "version": "0.0.1" }, "paths": { "/config": {