Skip to content

Commit

Permalink
Merge branch 'main' into qorb-terminate
Browse files Browse the repository at this point in the history
  • Loading branch information
smklein committed Oct 18, 2024
2 parents 0d59b62 + 778a7e9 commit 2c082c1
Show file tree
Hide file tree
Showing 54 changed files with 1,482 additions and 379 deletions.
28 changes: 15 additions & 13 deletions .github/buildomat/build-and-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,21 @@ mkdir -p "$OUTPUT_DIR"
banner prerequisites
ptime -m bash ./tools/install_builder_prerequisites.sh -y

# Do some test runs of the `ls-apis` command.
#
# This may require cloning some dependent private repos. We do this before the
# main battery of tests because the GitHub tokens required for this only last
# for an hour so we want to get this done early.
#
# (TODO: This makes the build timings we record inaccurate.)
banner ls-apis
(
source ./tools/include/force-git-over-https.sh;
ptime -m cargo xtask ls-apis apis &&
ptime -m cargo xtask ls-apis deployment-units &&
ptime -m cargo xtask ls-apis servers
)

#
# We build with:
#
Expand Down Expand Up @@ -82,19 +97,6 @@ export RUSTC_BOOTSTRAP=1
# We report build progress to stderr, and the "--timings=json" output goes to stdout.
ptime -m cargo build -Z unstable-options --timings=json --workspace --tests --locked --verbose 1> "$OUTPUT_DIR/crate-build-timings.json"

# Do some test runs of the `ls-apis` command.
#
# This may require cloning some dependent private repos. We do this before the
# main battery of tests because the GitHub tokens required for this only last
# for an hour so we want to get this done early.
banner ls-apis
(
source ./tools/include/force-git-over-https.sh;
ptime -m cargo xtask ls-apis apis &&
ptime -m cargo xtask ls-apis deployment-units &&
ptime -m cargo xtask ls-apis servers
)

#
# We apply our own timeout to ensure that we get a normal failure on timeout
# rather than a buildomat timeout. See oxidecomputer/buildomat#8.
Expand Down
6 changes: 5 additions & 1 deletion .github/buildomat/jobs/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,11 @@ done

/usr/oxide/oxide --resolve "$OXIDE_RESOLVE" --cacert "$E2E_TLS_CERT" \
project create --name images --description "some images"
/usr/oxide/oxide --resolve "$OXIDE_RESOLVE" --cacert "$E2E_TLS_CERT" \
# NOTE: Use a relatively large timeout on this call, to avoid #6771
/usr/oxide/oxide \
--resolve "$OXIDE_RESOLVE" \
--cacert "$E2E_TLS_CERT" \
--timeout 60 \
disk import \
--path debian-11-genericcloud-amd64.raw \
--disk debian11-boot \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/hakari.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
with:
toolchain: stable
- name: Install cargo-hakari
uses: taiki-e/install-action@42f4ec8e42bf7fe4dadd39bfc534566095a8edff # v2
uses: taiki-e/install-action@3e1dd227d968fb9fa43ff604bd9b0ccd1b714919 # v2
with:
tool: cargo-hakari
- name: Check workspace-hack Cargo.toml is up-to-date
Expand Down
11 changes: 6 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ clickhouse-admin-api = { path = "clickhouse-admin/api" }
clickhouse-admin-keeper-client = { path = "clients/clickhouse-admin-keeper-client" }
clickhouse-admin-server-client = { path = "clients/clickhouse-admin-server-client" }
clickhouse-admin-types = { path = "clickhouse-admin/types" }
clickward = { git = "https://github.com/oxidecomputer/clickward", rev = "ceec762e6a87d2a22bf56792a3025e145caa095e" }
clickward = { git = "https://github.com/oxidecomputer/clickward", rev = "a1b342c2558e835d09e6e39a40d3de798a29c2f" }
cockroach-admin-api = { path = "cockroach-admin/api" }
cockroach-admin-client = { path = "clients/cockroach-admin-client" }
cockroach-admin-types = { path = "cockroach-admin/types" }
Expand Down Expand Up @@ -418,7 +418,7 @@ ipnetwork = { version = "0.20", features = ["schemars"] }
ispf = { git = "https://github.com/oxidecomputer/ispf" }
key-manager = { path = "key-manager" }
kstat-rs = "0.2.4"
libc = "0.2.159"
libc = "0.2.161"
libipcc = { git = "https://github.com/oxidecomputer/libipcc", rev = "fdffa212373a8f92473ea5f411088912bf458d5f" }
libfalcon = { git = "https://github.com/oxidecomputer/falcon", branch = "main" }
libnvme = { git = "https://github.com/oxidecomputer/libnvme", rev = "dd5bb221d327a1bc9287961718c3c10d6bd37da0" }
Expand Down Expand Up @@ -624,7 +624,7 @@ update-engine = { path = "update-engine" }
url = "2.5.2"
usdt = "0.5.0"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
uzers = "0.11"
uzers = "0.12"
walkdir = "2.5"
whoami = "1.5"
wicket = { path = "wicket" }
Expand Down
89 changes: 88 additions & 1 deletion dev-tools/ls-apis/src/cargo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,14 @@ impl Workspace {
if let Some(manifest_path) = manifest_path {
cmd.manifest_path(manifest_path);
}
let metadata = cmd.exec().context("loading metadata")?;
let metadata = match cmd.exec() {
Err(original_err) if name == "maghemite" => {
dendrite_workaround(cmd, original_err)?
}
otherwise => otherwise.with_context(|| {
format!("failed to load metadata for {name}")
})?,
};
let workspace_root = metadata.workspace_root;

// Build an index of all packages by id. Identify duplicates because we
Expand Down Expand Up @@ -375,3 +382,83 @@ impl DepPath {
self.0.iter().any(|p| pkgids.contains(p))
}
}

// Dendrite is not (yet) a public repository, but it's a dependency of
// Maghemite. There are two expected cases for running Omicron tests locally
// that we know of:
// - The developer has a Git credential helper of some kind set up to
// successfully clone private repositories over HTTPS.
// - The developer has an SSH agent or other local SSH key that they use to
// clone repositories over SSH.
// We call this function when we fail to fetch the Dendrite repository over
// HTTPS. Under the assumption that the user falls in the second group.
// we attempt to use SSH to fetch the repository by setting `GIT_CONFIG_*`
// environment variables to rewrite the repository URL to an SSH URL. If that
// fails, we'll verbosely inform the user as to how both methods failed and
// provide some context.
//
// This entire workaround can and very much should go away once Dendrite is
// public.
fn dendrite_workaround(
mut cmd: cargo_metadata::MetadataCommand,
original_err: cargo_metadata::Error,
) -> Result<cargo_metadata::Metadata> {
eprintln!(
"warning: failed to load metadata for maghemite; \
trying dendrite workaround"
);

let count = std::env::var_os("GIT_CONFIG_COUNT")
.map(|s| -> Result<u64> {
s.into_string()
.map_err(|_| anyhow!("$GIT_CONFIG_COUNT is not an integer"))?
.parse()
.context("$GIT_CONFIG_COUNT is not an integer")
})
.transpose()?
.unwrap_or_default();
cmd.env("CARGO_NET_GIT_FETCH_WITH_CLI", "true");
cmd.env(
format!("GIT_CONFIG_KEY_{count}"),
"[email protected]:oxidecomputer/dendrite.insteadOf",
);
cmd.env(
format!("GIT_CONFIG_VALUE_{count}"),
"https://github.com/oxidecomputer/dendrite",
);
cmd.env("GIT_CONFIG_COUNT", (count + 1).to_string());
cmd.exec().map_err(|err| {
let cmd = cmd.cargo_command();
let original_err = anyhow::Error::from(original_err);
let err = anyhow::Error::from(err);
anyhow::anyhow!("failed to load metadata for maghemite
`cargo xtask ls-apis` expects to be able to run `cargo metadata` on the
Maghemite workspace that Omicron depends on. Maghemite has a dependency on a
private repository (Dendrite), so `cargo metadata` can fail if you are unable
to clone Dendrite via an HTTPS URL. As a fallback, we also tried to run `cargo
metadata` with environment variables that force `cargo metadata` to use an SSH
URL; unfortunately that also failed.
To successfully run this command (or expectorate test), your environment needs
to be set up to clone a private Oxide repository from GitHub. This can be done
with either a Git credential helper or an SSH key:
https://doc.rust-lang.org/cargo/appendix/git-authentication.html
https://docs.github.com/en/get-started/getting-started-with-git/caching-your-github-credentials-in-git
(If you don't have access to private Oxide repos, you won't be able to
successfully run this command or test.)
More context: https://github.com/oxidecomputer/omicron/issues/6839
===== The fallback command that failed: =====
{cmd:?}
===== The error that occurred while fetching using HTTPS: =====
{original_err:?}
===== The error that occurred while fetching using SSH (fallback): =====
{err:?}")
})
}
56 changes: 49 additions & 7 deletions dev-tools/omdb/src/bin/omdb/oxql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ pub struct OxqlArgs {
)]
clickhouse_url: Option<String>,

/// URL of the ClickHouse server to connect to for the native protcol.
#[arg(
long,
env = "OMDB_CLICKHOUSE_NATIVE_URL",
global = true,
help_heading = CONNECTION_OPTIONS_HEADING,
)]
clickhouse_native_url: Option<String>,

/// Print summaries of each SQL query run against the database.
#[clap(long = "summaries")]
print_summaries: bool,
Expand All @@ -47,29 +56,62 @@ impl OxqlArgs {
omdb: &Omdb,
log: &Logger,
) -> anyhow::Result<()> {
let addr = self.addr(omdb, log).await?;
let http_addr = self.resolve_http_addr(omdb, log).await?;
let native_addr = self.resolve_native_addr(omdb, log).await?;

let opts = ShellOptions {
print_summaries: self.print_summaries,
print_elapsed: self.print_elapsed,
};

oxql::shell(
addr.ip(),
addr.port(),
http_addr.ip(),
http_addr.port(),
native_addr.port(),
log.new(slog::o!("component" => "clickhouse-client")),
opts,
)
.await
}

/// Resolve the ClickHouse URL to a socket address.
async fn addr(
/// Resolve the ClickHouse native TCP socket address.
async fn resolve_native_addr(
&self,
omdb: &Omdb,
log: &Logger,
) -> anyhow::Result<SocketAddr> {
self.resolve_addr(
omdb,
log,
self.clickhouse_native_url.as_deref(),
ServiceName::ClickhouseNative,
)
.await
}

/// Resolve the ClickHouse HTTP URL to a socket address.
async fn resolve_http_addr(
&self,
omdb: &Omdb,
log: &Logger,
) -> anyhow::Result<SocketAddr> {
self.resolve_addr(
omdb,
log,
self.clickhouse_url.as_deref(),
ServiceName::Clickhouse,
)
.await
}

async fn resolve_addr(
&self,
omdb: &Omdb,
log: &Logger,
maybe_url: Option<&str>,
srv: ServiceName,
) -> anyhow::Result<SocketAddr> {
match &self.clickhouse_url {
match maybe_url {
Some(cli_or_env_url) => Url::parse(&cli_or_env_url)
.context(
"failed parsing URL from command-line or environment variable",
Expand All @@ -87,7 +129,7 @@ impl OxqlArgs {
Ok(SocketAddr::V6(
omdb.dns_lookup_one(
log.clone(),
ServiceName::Clickhouse,
srv,
)
.await
.context("failed looking up ClickHouse internal DNS entry")?,
Expand Down
7 changes: 5 additions & 2 deletions dev-tools/omdb/tests/usage_errors.out
Original file line number Diff line number Diff line change
Expand Up @@ -783,13 +783,16 @@ Usage: omdb oxql [OPTIONS]
Options:
--log-level <LOG_LEVEL> log level filter [env: LOG_LEVEL=] [default: warn]
--summaries Print summaries of each SQL query run against the database
--elapsed Print the total elapsed query duration
--color <COLOR> Color output [default: auto] [possible values: auto, always, never]
--elapsed Print the total elapsed query duration
-h, --help Print help

Connection Options:
--clickhouse-url <CLICKHOUSE_URL>
URL of the ClickHouse server to connect to [env: OMDB_CLICKHOUSE_URL=]
--clickhouse-native-url <CLICKHOUSE_NATIVE_URL>
URL of the ClickHouse server to connect to for the native protcol [env:
OMDB_CLICKHOUSE_NATIVE_URL=]
--dns-server <DNS_SERVER>
[env: OMDB_DNS_SERVER=]

Expand All @@ -808,7 +811,7 @@ error: unexpected argument '--summarizes' found

tip: a similar argument exists: '--summaries'

Usage: omdb oxql <--clickhouse-url <CLICKHOUSE_URL>|--summaries|--elapsed>
Usage: omdb oxql <--clickhouse-url <CLICKHOUSE_URL>|--clickhouse-native-url <CLICKHOUSE_NATIVE_URL>|--summaries|--elapsed>

For more information, try '--help'.
=============================================
6 changes: 5 additions & 1 deletion gateway-test-utils/src/setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,11 @@ pub async fn test_setup_with_config(
future::ready(result)
},
&Duration::from_millis(100),
&Duration::from_secs(1),
// This seems like a pretty long time to wait for MGS to discover the
// simulated SPs, but we've seen tests fail due to timeouts here in the
// past, so we may as well be generous:
// https://github.com/oxidecomputer/omicron/issues/6877
&Duration::from_secs(30),
)
.await
.unwrap();
Expand Down
Loading

0 comments on commit 2c082c1

Please sign in to comment.