Skip to content

Commit

Permalink
Move oxql from oxdb to mainline omdb
Browse files Browse the repository at this point in the history
Why?:

Concerning [network observability work](https://github.com/orgs/oxidecomputer/projects/55/views/1?filterQuery=&pane=issue&itemId=68336554),
this makes the [`oxql`](https://rfd.shared.oxide.computer/rfd/0463) interactive query repl accessible via omdb, as we start to give users and
ourselves the ability to query timeseries and metrics more easily. Additionally, in the "now", this aids in debugging through our metrics
set and makes it available, via omdb, throughout our ecosystem/a4x2.

Includes:
  * Moves `oxql_shell` into the oximeter_db lib for use by both omdb and oxdb.
  * If no URL is given to `omdb oxql`, it will leverage internal DNS.
  * Update the oximeter omdb call (for listing producers) to leverage internal.
    DNS if no URL is given.
  * Update command/output tests/generations and collector specific tests for list producers.

Notes:
  * The oxql client still expects an socket address as liked it typed
    specifically v.s. a String. Instead, upon running the `omdb oxql` command,
    we take in a URL String and parse it into the socket address directly.
  • Loading branch information
zeeshanlakhani committed Jul 4, 2024
1 parent 30b6713 commit e6e052d
Show file tree
Hide file tree
Showing 23 changed files with 793 additions and 498 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ tags
.falcon/*
.img/*
connectivity-report.json
*.local
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions dev-tools/omdb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ nexus-types.workspace = true
omicron-common.workspace = true
omicron-uuid-kinds.workspace = true
oximeter-client.workspace = true
oximeter-db.workspace = true
# See omicron-rpaths for more about the "pq-sys" dependency.
pq-sys = "*"
ratatui.workspace = true
Expand All @@ -51,6 +52,7 @@ tabled.workspace = true
textwrap.workspace = true
tokio = { workspace = true, features = [ "full" ] }
unicode-width.workspace = true
url.workspace = true
uuid.workspace = true
ipnetwork.workspace = true
omicron-workspace-hack.workspace = true
Expand Down
6 changes: 5 additions & 1 deletion dev-tools/omdb/src/bin/omdb/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ mod helpers;
mod mgs;
mod nexus;
mod oximeter;
mod oxql;
mod sled_agent;

#[tokio::main]
Expand All @@ -66,7 +67,8 @@ async fn main() -> Result<(), anyhow::Error> {
OmdbCommands::Db(db) => db.run_cmd(&args, &log).await,
OmdbCommands::Mgs(mgs) => mgs.run_cmd(&args, &log).await,
OmdbCommands::Nexus(nexus) => nexus.run_cmd(&args, &log).await,
OmdbCommands::Oximeter(oximeter) => oximeter.run_cmd(&log).await,
OmdbCommands::Oximeter(oximeter) => oximeter.run_cmd(&args, &log).await,
OmdbCommands::Oxql(oxql) => oxql.run_cmd(&args, &log).await,
OmdbCommands::SledAgent(sled) => sled.run_cmd(&args, &log).await,
OmdbCommands::CrucibleAgent(crucible) => crucible.run_cmd(&args).await,
}
Expand Down Expand Up @@ -269,6 +271,8 @@ enum OmdbCommands {
Nexus(nexus::NexusArgs),
/// Query oximeter collector state
Oximeter(oximeter::OximeterArgs),
/// Enter the Oximeter Query Language shell for interactive querying.
Oxql(oxql::OxqlArgs),
/// Debug a specific Sled
SledAgent(sled_agent::SledAgentArgs),
}
Expand Down
47 changes: 36 additions & 11 deletions dev-tools/omdb/src/bin/omdb/oximeter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
//! omdb commands that query oximeter
use crate::helpers::CONNECTION_OPTIONS_HEADING;
use crate::Omdb;
use anyhow::Context;
use clap::Args;
use clap::Subcommand;
Expand All @@ -24,12 +25,10 @@ pub struct OximeterArgs {
#[arg(
long,
env = "OMDB_OXIMETER_URL",
// This can't be global = true (i.e. passed in later in the
// command-line) because global options can't be required. If this
// changes to being optional, we should set global = true.
global = true,
help_heading = CONNECTION_OPTIONS_HEADING,
)]
oximeter_url: String,
oximeter_url: Option<String>,

#[command(subcommand)]
command: OximeterCommands,
Expand All @@ -38,20 +37,46 @@ pub struct OximeterArgs {
/// Subcommands that query oximeter collector state
#[derive(Debug, Subcommand)]
enum OximeterCommands {
/// List the producers the collector is assigned to poll
/// List the producers the collector is assigned to poll.
ListProducers,
}

impl OximeterArgs {
fn client(&self, log: &Logger) -> Client {
Client::new(
&self.oximeter_url,
async fn client(
&self,
omdb: &Omdb,
log: &Logger,
) -> Result<Client, anyhow::Error> {
let oximeter_url = match &self.oximeter_url {
Some(cli_or_env_url) => cli_or_env_url.clone(),
None => {
eprintln!(
"note: Oximeter URL not specified. Will pick one from DNS."
);
let addr = omdb
.dns_lookup_one(
log.clone(),
internal_dns::ServiceName::Oximeter,
)
.await?;
format!("http://{}", addr)
}
};
eprintln!("note: using Oximeter URL {}", &oximeter_url);

let client = Client::new(
&oximeter_url,
log.new(slog::o!("component" => "oximeter-client")),
)
);
Ok(client)
}

pub async fn run_cmd(&self, log: &Logger) -> anyhow::Result<()> {
let client = self.client(log);
pub async fn run_cmd(
&self,
omdb: &Omdb,
log: &Logger,
) -> anyhow::Result<()> {
let client = self.client(omdb, log).await?;
match self.command {
OximeterCommands::ListProducers => {
self.list_producers(client).await
Expand Down
92 changes: 92 additions & 0 deletions dev-tools/omdb/src/bin/omdb/oxql.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! omdb OxQL shell for interactive queries on metrics/timeseries.
// Copyright 2024 Oxide Computer

use crate::helpers::CONNECTION_OPTIONS_HEADING;
use crate::Omdb;
use anyhow::Context;
use clap::Args;
use oximeter_db::{self, Client, DbWrite};
use slog::Logger;
use std::net::SocketAddr;
use url::Url;

/// Command-line arguments for the OxQL shell.
#[derive(Debug, Args)]
pub struct OxqlArgs {
/// URL of the metrics database.
#[arg(
long,
env = "OMDB_METRICS_DB_URL",
global = true,
help_heading = CONNECTION_OPTIONS_HEADING,
)]
metrics_db_url: Option<String>,

/// Print summaries of each SQL query run against the database.
#[clap(long = "summaries")]
print_summaries: bool,

/// Print the total elapsed query duration.
#[clap(long = "elapsed")]
print_elapsed: bool,
}

impl OxqlArgs {
async fn client(
&self,
omdb: &Omdb,
log: &Logger,
) -> Result<Client, anyhow::Error> {
let socket_addr = match &self.metrics_db_url {
Some(cli_or_env_url) => Url::parse(&cli_or_env_url)
.context("Failed to parse metrics DB URL")?
.socket_addrs(|| None)
.context("Failed to resolve metrics DB URL")?
.drain(..)
.next()
.context("Failed to resolve metrics DB URL")?,
_ => {
eprintln!(
"note: Metrics DB address/port not specified. Will pick one from DNS."
);
SocketAddr::V6(
omdb.dns_lookup_one(
log.clone(),
internal_dns::ServiceName::Clickhouse,
)
.await?,
)
}
};
eprintln!("note: using Metrics DB socket address: {}", &socket_addr);

let client = Client::new(socket_addr, log);

client
.init_single_node_db()
.await
.context("Failed to initialize timeseries database")?;
Ok(client)
}

/// Run the OxQL shell via the `omdb oxql` subcommand.
pub async fn run_cmd(
&self,
omdb: &Omdb,
log: &Logger,
) -> anyhow::Result<()> {
let client = self.client(omdb, log).await?;
oximeter_db::oxql::oxql_shell(
client,
self.print_summaries,
self.print_elapsed,
)
.await?;
Ok(())
}
}
25 changes: 25 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -421,3 +421,28 @@ note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=d
note: database schema version matches expected (<redacted database version>)
note: listing all commissioned sleds (use -F to filter, e.g. -F in-service)
=============================================
EXECUTING COMMAND: omdb ["oximeter", "--oximeter-url", "junk", "list-producers"]
termination: Exited(1)
---------------------------------------------
stdout:
---------------------------------------------
stderr:
note: using Oximeter URL junk
Error: failed to fetch collector info

Caused by:
0: Communication Error: builder error: relative URL without a base
1: builder error: relative URL without a base
2: relative URL without a base
=============================================
EXECUTING COMMAND: omdb ["oxql", "--metrics-db-url", "junk"]
termination: Exited(1)
---------------------------------------------
stdout:
---------------------------------------------
stderr:
Error: Failed to parse metrics DB URL

Caused by:
relative URL without a base
=============================================
24 changes: 12 additions & 12 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -401,14 +401,14 @@ task: "dns_propagation_external"


task: "nat_v4_garbage_collector"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }

task: "blueprint_loader"
configured period: every 1m 40s
configured period: every 1m <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
Expand All @@ -432,7 +432,7 @@ task: "abandoned_vmm_reaper"
sled resource reservations deleted: 0

task: "bfd_manager"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
Expand Down Expand Up @@ -463,7 +463,7 @@ task: "external_endpoints"
TLS certificates: 0

task: "instance_watcher"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
Expand Down Expand Up @@ -491,30 +491,30 @@ task: "metrics_producer_gc"
warning: unknown background task: "metrics_producer_gc" (don't know how to interpret details: Object {"expiration": String("<REDACTED TIMESTAMP>"), "pruned": Array []})

task: "phantom_disks"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
number of phantom disks deleted: 0
number of phantom disk delete errors: 0

task: "physical_disk_adoption"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a dependent task completing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: task disabled

task: "region_replacement"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
number of region replacements started ok: 0
number of region replacement start errors: 0

task: "region_replacement_driver"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
Expand All @@ -529,28 +529,28 @@ task: "service_firewall_rule_propagation"
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms

task: "service_zone_nat_tracker"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: inventory collection is None

task: "switch_port_config_manager"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {})

task: "v2p_manager"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
warning: unknown background task: "v2p_manager" (don't know how to interpret details: Object {})

task: "vpc_route_manager"
configured period: every 30s
configured period: every <REDACTED_DURATION>s
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
Expand Down
Loading

0 comments on commit e6e052d

Please sign in to comment.