From 4d644f720e470f5dc8c38e8d54cd8688297c1285 Mon Sep 17 00:00:00 2001
From: Benjamin Naecker <ben@oxide.computer>
Date: Mon, 14 Aug 2023 16:44:27 +0000
Subject: [PATCH] Support a restricted SQL subset for querying timeseries

- Add methods for querying oximeter timeseries using a limited subset of
  SQL. The raw string is heavily validated, and only simple SELECT
  queries are currently supported, with a limited subset of ClickHouse
  functions. Still, this allows running many different kinds of queries,
  including aggregations, window functions, and joins.
- Adds a few types for reading out tabular data from the query result,
  and parsing basic metadata for understanding resource usage of the
  queries.
- Add a barebones SQL shell for running oximeter SQL queries, using the
  new `Client::query` method.
- Include a bunch of tests for the restricted SQL subset as well as
  correctness of the actual returned queries against the DB.
- Add a small README for the SQL shell
---
 Cargo.lock                                |  157 ++-
 Cargo.toml                                |    3 +
 oximeter/db/Cargo.toml                    |   29 +-
 oximeter/db/README-oxdb-sql.md            |  219 ++++
 oximeter/db/src/bin/oxdb.rs               |  311 ++++-
 oximeter/db/src/client.rs                 |  294 ++++-
 oximeter/db/src/lib.rs                    |   18 +-
 oximeter/db/src/query.rs                  |    2 +
 oximeter/db/src/sql/mod.rs                | 1350 +++++++++++++++++++++
 oximeter/db/test-output/sql/00/query.sql  |    1 +
 oximeter/db/test-output/sql/00/result.txt |   10 +
 oximeter/db/test-output/sql/01/query.sql  |    1 +
 oximeter/db/test-output/sql/01/result.txt |   10 +
 oximeter/db/test-output/sql/02/query.sql  |    1 +
 oximeter/db/test-output/sql/02/result.txt |   10 +
 oximeter/db/test-output/sql/03/query.sql  |    4 +
 oximeter/db/test-output/sql/03/result.txt |   43 +
 oximeter/db/test-output/sql/04/query.sql  |    5 +
 oximeter/db/test-output/sql/04/result.txt |   56 +
 oximeter/oximeter/src/types.rs            |    5 +
 wicket-dbg/Cargo.toml                     |    2 +-
 workspace-hack/Cargo.toml                 |    4 +-
 22 files changed, 2501 insertions(+), 34 deletions(-)
 create mode 100644 oximeter/db/README-oxdb-sql.md
 create mode 100644 oximeter/db/src/sql/mod.rs
 create mode 100644 oximeter/db/test-output/sql/00/query.sql
 create mode 100644 oximeter/db/test-output/sql/00/result.txt
 create mode 100644 oximeter/db/test-output/sql/01/query.sql
 create mode 100644 oximeter/db/test-output/sql/01/result.txt
 create mode 100644 oximeter/db/test-output/sql/02/query.sql
 create mode 100644 oximeter/db/test-output/sql/02/result.txt
 create mode 100644 oximeter/db/test-output/sql/03/query.sql
 create mode 100644 oximeter/db/test-output/sql/03/result.txt
 create mode 100644 oximeter/db/test-output/sql/04/query.sql
 create mode 100644 oximeter/db/test-output/sql/04/result.txt

diff --git a/Cargo.lock b/Cargo.lock
index c379dcfbff8..e15d02defd0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -211,6 +211,12 @@ version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
 
+[[package]]
+name = "arrayvec"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
+
 [[package]]
 name = "arrayvec"
 version = "0.7.4"
@@ -555,7 +561,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3c2f0dc9a68c6317d884f97cc36cf5a3d20ba14ce404227df55e1af708ab04bc"
 dependencies = [
  "arrayref",
- "arrayvec",
+ "arrayvec 0.7.4",
  "constant_time_eq 0.2.6",
 ]
 
@@ -991,6 +997,17 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961"
 
+[[package]]
+name = "clipboard-win"
+version = "4.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7191c27c2357d9b7ef96baac1773290d4ca63b24205b82a3fd8a0637afcf0362"
+dependencies = [
+ "error-code",
+ "str-buf",
+ "winapi",
+]
+
 [[package]]
 name = "cobs"
 version = "0.2.3"
@@ -1245,6 +1262,23 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "crossterm"
+version = "0.26.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a84cda67535339806297f1b331d6dd6320470d2a0fe65381e79ee9e156dd3d13"
+dependencies = [
+ "bitflags 1.3.2",
+ "crossterm_winapi",
+ "libc",
+ "mio",
+ "parking_lot 0.12.1",
+ "serde",
+ "signal-hook",
+ "signal-hook-mio",
+ "winapi",
+]
+
 [[package]]
 name = "crossterm"
 version = "0.27.0"
@@ -1257,7 +1291,6 @@ dependencies = [
  "libc",
  "mio",
  "parking_lot 0.12.1",
- "serde",
  "signal-hook",
  "signal-hook-mio",
  "winapi",
@@ -2149,6 +2182,16 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "error-code"
+version = "2.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64f18991e7bf11e7ffee451b5318b5c1a73c52d0d0ada6e5a3017c8c1ced6a21"
+dependencies = [
+ "libc",
+ "str-buf",
+]
+
 [[package]]
 name = "expectorate"
 version = "1.1.0"
@@ -2535,7 +2578,7 @@ dependencies = [
  "hubpack 0.1.2",
  "hubtools",
  "lru-cache",
- "nix",
+ "nix 0.26.2",
  "once_cell",
  "paste",
  "serde",
@@ -4251,6 +4294,17 @@ dependencies = [
  "static_assertions",
 ]
 
+[[package]]
+name = "nix"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
+dependencies = [
+ "bitflags 1.3.2",
+ "cfg-if",
+ "libc",
+]
+
 [[package]]
 name = "nodrop"
 version = "0.1.14"
@@ -4983,7 +5037,6 @@ dependencies = [
  "const-oid",
  "crossbeam-epoch",
  "crossbeam-utils",
- "crossterm",
  "crypto-common",
  "der",
  "diesel",
@@ -5020,6 +5073,7 @@ dependencies = [
  "managed",
  "memchr",
  "mio",
+ "nom",
  "num-bigint",
  "num-integer",
  "num-iter",
@@ -5389,14 +5443,18 @@ dependencies = [
  "clap 4.4.3",
  "dropshot",
  "expectorate",
+ "futures",
  "highway",
+ "indexmap 2.1.0",
  "itertools 0.12.0",
  "omicron-common",
  "omicron-test-utils",
  "omicron-workspace-hack",
  "oximeter",
+ "reedline",
  "regex",
  "reqwest",
+ "rustyline",
  "schemars",
  "serde",
  "serde_json",
@@ -5404,7 +5462,10 @@ dependencies = [
  "slog-async",
  "slog-dtrace",
  "slog-term",
+ "sqlformat",
+ "sqlparser",
  "strum",
+ "tabled",
  "tempfile",
  "thiserror",
  "tokio",
@@ -6407,7 +6468,7 @@ checksum = "2e2e4cd95294a85c3b4446e63ef054eea43e0205b1fd60120c16b74ff7ff96ad"
 dependencies = [
  "bitflags 2.4.0",
  "cassowary",
- "crossterm",
+ "crossterm 0.27.0",
  "indoc 2.0.3",
  "itertools 0.11.0",
  "paste",
@@ -6497,12 +6558,12 @@ dependencies = [
 
 [[package]]
 name = "reedline"
-version = "0.26.0"
+version = "0.22.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0a093a20a6c473247c2e9971aaf4cedf9041bcd3f444dc7fad667d3b6b7a5fd"
+checksum = "c2fde955d11817fdcb79d703932fb6b473192cb36b6a92ba21f7f4ac0513374e"
 dependencies = [
  "chrono",
- "crossterm",
+ "crossterm 0.26.1",
  "fd-lock",
  "itertools 0.10.5",
  "nu-ansi-term",
@@ -7037,6 +7098,29 @@ dependencies = [
  "wait-timeout",
 ]
 
+[[package]]
+name = "rustyline"
+version = "12.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9"
+dependencies = [
+ "bitflags 2.4.0",
+ "cfg-if",
+ "clipboard-win",
+ "fd-lock",
+ "home",
+ "libc",
+ "log",
+ "memchr",
+ "nix 0.26.4",
+ "radix_trie",
+ "scopeguard",
+ "unicode-segmentation",
+ "unicode-width",
+ "utf8parse",
+ "winapi",
+]
+
 [[package]]
 name = "ryu"
 version = "1.0.15"
@@ -7931,6 +8015,38 @@ dependencies = [
  "tinyvec",
 ]
 
+[[package]]
+name = "sqlformat"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c"
+dependencies = [
+ "itertools 0.12.0",
+ "nom",
+ "unicode_categories",
+]
+
+[[package]]
+name = "sqlparser"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87"
+dependencies = [
+ "log",
+ "sqlparser_derive",
+]
+
+[[package]]
+name = "sqlparser_derive"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "stable_deref_trait"
 version = "1.2.0"
@@ -7965,6 +8081,12 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "str-buf"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0"
+
 [[package]]
 name = "string_cache"
 version = "0.8.7"
@@ -7991,9 +8113,9 @@ dependencies = [
 
 [[package]]
 name = "strip-ansi-escapes"
-version = "0.2.0"
+version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55ff8ef943b384c414f54aefa961dd2bd853add74ec75e7ac74cf91dba62bcfa"
+checksum = "011cbb39cf7c1f62871aea3cc46e5817b0937b49e9447370c93cacbe93a766d8"
 dependencies = [
  "vte",
 ]
@@ -9095,6 +9217,12 @@ version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
 
+[[package]]
+name = "unicode_categories"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
+
 [[package]]
 name = "universal-hash"
 version = "0.5.1"
@@ -9291,10 +9419,11 @@ dependencies = [
 
 [[package]]
 name = "vte"
-version = "0.11.1"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f5022b5fbf9407086c180e9557be968742d839e68346af7792b8592489732197"
+checksum = "6cbce692ab4ca2f1f3047fcf732430249c0e971bfdd2b234cf2c47ad93af5983"
 dependencies = [
+ "arrayvec 0.5.2",
  "utf8parse",
  "vte_generate_state_changes",
 ]
@@ -9484,7 +9613,7 @@ dependencies = [
  "camino",
  "ciborium",
  "clap 4.4.3",
- "crossterm",
+ "crossterm 0.27.0",
  "futures",
  "humantime",
  "indexmap 2.1.0",
@@ -9545,7 +9674,7 @@ dependencies = [
  "camino",
  "ciborium",
  "clap 4.4.3",
- "crossterm",
+ "crossterm 0.27.0",
  "omicron-workspace-hack",
  "reedline",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index 5591dcebc93..aa27f61a71e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -301,6 +301,7 @@ rand = "0.8.5"
 ratatui = "0.23.0"
 rayon = "1.8"
 rcgen = "0.11.3"
+reedline = "0.22.0"
 ref-cast = "1.0"
 regex = "1.10.2"
 regress = "0.7.1"
@@ -310,6 +311,7 @@ rpassword = "7.3.1"
 rstest = "0.18.2"
 rustfmt-wrapper = "0.2"
 rustls = "0.21.9"
+rustyline = "12.0.0"
 samael = { git = "https://github.com/njaremko/samael", features = ["xmlsec"], branch = "master" }
 schemars = "0.8.12"
 secrecy = "0.8.0"
@@ -343,6 +345,7 @@ sp-sim = { path = "sp-sim" }
 sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" }
 sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" }
 sprockets-rot = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" }
+sqlparser = { version = "0.36.1", features = [ "visitor" ] }
 static_assertions = "1.1.0"
 # Please do not change the Steno version to a Git dependency.  It makes it
 # harder than expected to make breaking changes (even if you specify a specific
diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml
index 4d53869d0dc..99985a3b809 100644
--- a/oximeter/db/Cargo.toml
+++ b/oximeter/db/Cargo.toml
@@ -9,27 +9,46 @@ license = "MPL-2.0"
 anyhow.workspace = true
 async-trait.workspace = true
 bcs.workspace = true
-bytes = { workspace = true, features = [ "serde" ] }
 camino.workspace = true
 chrono.workspace = true
 clap.workspace = true
 dropshot.workspace = true
+futures.workspace = true
 highway.workspace = true
+indexmap.workspace = true
 omicron-common.workspace = true
+omicron-workspace-hack.workspace = true
 oximeter.workspace = true
+reedline.workspace = true
 regex.workspace = true
-reqwest = { workspace = true, features = [ "json" ] }
-schemars = { workspace = true, features = [ "uuid1", "bytes", "chrono" ] }
+rustyline.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 slog.workspace = true
 slog-async.workspace = true
 slog-term.workspace = true
+sqlparser.workspace = true
+sqlformat = "0.2.2"
+tabled.workspace = true
 thiserror.workspace = true
-tokio = { workspace = true, features = [ "rt-multi-thread", "macros" ] }
 usdt.workspace = true
 uuid.workspace = true
-omicron-workspace-hack.workspace = true
+
+[dependencies.bytes]
+workspace = true
+features = [ "serde" ]
+
+[dependencies.reqwest]
+workspace = true
+features = [ "json" ]
+
+[dependencies.schemars]
+workspace = true
+features = [ "uuid1", "bytes", "chrono" ]
+
+[dependencies.tokio]
+workspace = true
+features = [ "rt-multi-thread", "macros" ]
 
 [dev-dependencies]
 expectorate.workspace = true
diff --git a/oximeter/db/README-oxdb-sql.md b/oximeter/db/README-oxdb-sql.md
new file mode 100644
index 00000000000..8ebecdd409b
--- /dev/null
+++ b/oximeter/db/README-oxdb-sql.md
@@ -0,0 +1,219 @@
+# `oxdb sql`
+
+This is a short how-to for using SQL to query timeseries. If you're eager to get
+started, find a ClickHouse server with `oximeter` data, and run:
+
+```console
+oxdb --address $CLICKHOUSE_ADDR sql
+```
+
+You can use `help` to get a help menu on the CLI, or run `\l` to list available
+timeseries to start querying.
+
+## `oximeter` overview
+
+In general, `oximeter`'s architecture and data model are laid out in RFDs 161
+and 162. These provide a good detailed look at the system.
+
+### Terminology
+
+`oximeter` is the subsystem for describing, collecting, and storing telemetry
+data from the Oxide rack. Software components make data available to an
+`oximeter` collector in the form of _samples_, which are timestamped datapoints
+from a single timeseries.
+
+Timeseries are named for their _target_, the component being measured or
+monitored, and the _metric_, the measured feature or aspect of the target. The
+timeseries name is derived as `target_name:metric_name`. The target and metric
+can both have name-value pairs called _fields_, and the metric additionally has
+a _measurement_, the actual measured value. Both are strongly typed.
+
+### Data normalization
+
+As samples are collected, `oximeter` normalizes them before storing in
+ClickHouse. The database consists of a set of tables for fields and
+measurements, with each _type_ stored in a different table. For fields, the name
+of the field is also stored; for measurements, the timestamp and actual datum
+are stored. Additionally, one table stores all the received _timeseries schema_,
+which describes the name, fields, and measurement types for each timeseries.
+
+Normalizing the tables has many benefits. Less duplicated data is stored;
+simpler, more static table arrangements; better compression; and more. It does
+have drawbacks. Querying becomes especially tricky, because one needs to join
+many tables together the reconstitute the original samples. This is exacerbated
+by ClickHouse's lack of unique primary keys, which means we need to generate a
+tag used to associated records from a single timeseries. These are called
+_timeseries keys_, and are just hashes computed when a sample is received.
+
+## Oximeter SQL
+
+While writing the full set of join expressions needed to denormalize samples is
+not very human-friendly, it _is_ relatively easy to generate these in code.
+Using the stored timeseries schema and timeseries keys, one can write a (huge)
+join expression that results in the full timeseries _as if_ it were a real table
+in the database. `oxdb sql` generates this expression, and then runs whatever
+query the user supplied on _the resulting in-memory table_.
+
+### Basic commands
+
+After starting the SQL shell with `oxdb sql`, one can run the following basic
+operations:
+
+- `\h` or `help` will print a _help_ menu
+- `\l` will _list_ all available timeseries by name
+- `\d <timeseries>` will _describe_ the schema of a single named timeseries
+- `\f` will list supported ClickHouse functions and `\f <function>` will print
+  more details about the function and its usage
+
+### SQL
+
+In general, normal ANSI SQL is supported. Instead of _table_, however, one
+queries against a _timeseries_. For example:
+
+```sql
+SELECT count() FROM physical_data_link:bytes_received;
+```
+
+This will return the total number of samples in the timeseries representing the
+number of bytes received on an Ethernet data link on a Gimlet. Here are the
+available fields:
+
+```console
+0x〉\d physical_data_link:bytes_received
+ hostname | link_name | rack_id | serial | sled_id | timestamp  | start_time | datum
+----------+-----------+---------+--------+---------+------------+------------+---------------
+ String   | String    | Uuid    | String | Uuid    | DateTime64 | DateTime64 | CumulativeU64
+```
+
+Any of these fields can be queried, including aggregations, groupings, etc.
+
+```console
+0x〉select min(timestamp), max(timestamp) from physical_data_link:bytes_received;
+
+ min(timestamp)                  | max(timestamp)
+---------------------------------+---------------------------------
+ "2023-11-09 04:24:53.284336528" | "2023-11-09 22:12:58.986751414"
+
+Metadata
+ Query ID:    66e68db5-8792-4e48-af2d-e5a2a117ab0d
+ Result rows: 1
+ Time:        72.371047ms
+ Read:        19736 rows (1292186 bytes)
+
+```
+
+or
+
+```console
+0x〉select distinct route from http_service:request_latency_histogram where name = 'nexus-internal';
+
+ route
+-------------------------------------------------------------------------------------------------
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/d462a7f7-b628-40fe-80ff-4e4189e2d62b"
+ "/metrics/producers"
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/616b26df-e62a-4c68-b506-f4a923d8aaf7"
+ "/metrics/collect/1e9a8843-2327-4d59-94b2-14f909b6f207"
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/f4b4dc87-ab46-49fb-a4b4-d361ae214c03"
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/a462a7f7-b628-40fe-80ff-4e4189e2d62b"
+ "/metrics/collect/4b795850-8320-4b7d-9048-aa277653ab8e"
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/14b4dc87-ab46-49fb-a4b4-d361ae214c03"
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/e4b4dc87-ab46-49fb-a4b4-d361ae214c03"
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/cd70d7f6-2354-4bf2-8012-55bf9eaf7930"
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/31bd71cd-4736-4a12-a387-9b74b050396f"
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/b462a7f7-b628-40fe-80ff-4e4189e2d62b"
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/24b4dc87-ab46-49fb-a4b4-d361ae214c03"
+ "/physical-disk"
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803"
+ "/sled-agents/a8c6432e-338f-4839-bfb5-297112b39803/zpools/ceb4461c-cf56-4719-ad3c-14430bfdfb60"
+ "/metrics/collect/5c4f4629-1325-4123-bdcd-01bc9a18d740"
+ "/metrics/collectors"
+
+Metadata
+ Query ID:    3107c7ca-6906-4ce9-9d57-19cf0c1d6c71
+ Result rows: 18
+ Time:        119.387667ms
+ Read:        206840 rows (14749196 bytes)
+
+```
+
+or
+
+```console
+0x〉select link_name, formatReadableSize(max(datum)) from physical_data_link:bytes_sent group by link_name;
+
+ link_name | formatReadableSize(max(datum))
+-----------+--------------------------------
+ "net0"    | "5.96 MiB"
+ "net1"    | "0.00 B"
+
+Metadata
+ Query ID:    cd101b14-a91e-419b-b2d0-633047db219e
+ Result rows: 2
+ Time:        56.036663ms
+ Read:        27025 rows (1558430 bytes)
+
+```
+
+> Note the metadata at the bottom. The query ID is assigned by the server, which
+  also returns the number of rows / bytes read. The _time_ includes the server
+  processing time and the network time, and is usually dominated by the latter.
+
+### JOINs
+
+SQL joins are also supported, as long as they are either _inner joins_ or the
+ClickHoouse-specific _asof join_. Inner joins are pretty standard, but `ASOF
+JOIN` is unique and very useful. It provides a way to match up rows that do not
+have an _exact_ equal in each table. As an example, we can use this to match up
+metrics from different timeseries
+
+```console
+0x〉select timestamp, datum as bytes_received, s.timestamp, s.datum as bytes_sent from physical_data_link:bytes_received asof join physical_data_link:bytes_sent as s using (link_name, timestamp) where link_name = 'net0' limit 10;
+
+ timestamp                       | bytes_received | s.timestamp                     | bytes_sent
+---------------------------------+----------------+---------------------------------+------------
+ "2023-11-09 04:24:53.284336528" | 0              | "2023-11-09 04:24:53.284336528" | 10661
+ "2023-11-09 04:25:06.960255374" | 1064           | "2023-11-09 04:25:06.960255374" | 11937
+ "2023-11-09 04:25:16.962286001" | 3748           | "2023-11-09 04:25:16.962286001" | 15910
+ "2023-11-09 04:25:26.964768912" | 5278           | "2023-11-09 04:25:26.964768912" | 18465
+ "2023-11-09 04:25:36.966422345" | 7146           | "2023-11-09 04:25:36.966422345" | 24423
+ "2023-11-09 04:25:46.969640057" | 8032           | "2023-11-09 04:25:46.969640057" | 25370
+ "2023-11-09 04:25:57.589902294" | 8868           | "2023-11-09 04:25:57.589902294" | 26277
+ "2023-11-09 04:26:07.590262491" | 13120          | "2023-11-09 04:26:07.590262491" | 30225
+ "2023-11-09 04:26:17.592895364" | 14584          | "2023-11-09 04:26:17.592895364" | 31501
+ "2023-11-09 04:26:27.594820340" | 15344          | "2023-11-09 04:26:27.594820340" | 32211
+
+Metadata
+ Query ID:    a13f3abf-9c57-4caa-bfc6-c1b26732d2ad
+ Result rows: 10
+ Time:        124.670777ms
+ Read:        45858 rows (3331596 bytes)
+
+```
+
+Note that these happen to have exactly the same timestamp, based on how they
+were generated, but that need not be the case.
+
+## Warnings and caveats
+
+First this is a **prototype**. It is also designed for testing and
+experimentation, and little or none of the product should expect this to work on
+customer sites any time soon.
+
+Second, the abstraction here is pretty leaky. SQL expressions that might
+normally work against a real table can easily fail here. Please file a bug if
+you think something _should_ work.
+
+Last, and maybe most important, be aware of the resource limitations here. This
+all works by constructing an _enormous_ joined table in memory. ClickHouse is
+extremely fast, but it is relatively simplistic when it comes to query planning
+and optimization. That means it will do exactly what the query says, including
+trying to create tables much larger than the available memory.
+
+For the most part, the blast radius of those problems should be limited to the
+ClickHouse zone itself. We also limit the total memory consumption of the
+server, currently to 90% of the zone's memory. But since we don't limit the
+_zone's_ memory, that's 90% of the physical memory, which is very large indeed.
+If you're curious how a query will perform, it's probably a good idea to try it
+out on a small subset of data, by adding a `LIMIT` clause or similar. You can
+also run `oxdb sql --transform $QUERY_STRING` to print the full query that will
+actually be executed on the server.
diff --git a/oximeter/db/src/bin/oxdb.rs b/oximeter/db/src/bin/oxdb.rs
index e14fdeb6a8e..43352d551ed 100644
--- a/oximeter/db/src/bin/oxdb.rs
+++ b/oximeter/db/src/bin/oxdb.rs
@@ -3,16 +3,27 @@
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
 //! Tool for developing against the Oximeter timeseries database, populating data and querying.
-// Copyright 2021 Oxide Computer Company
+
+// Copyright 2023 Oxide Computer Company
 
 use anyhow::{bail, Context};
 use chrono::{DateTime, Utc};
 use clap::{Args, Parser};
+use dropshot::EmptyScanParams;
+use dropshot::WhichPage;
 use oximeter::{
     types::{Cumulative, Sample},
     Metric, Target,
 };
+use oximeter_db::sql::function_allow_list;
+use oximeter_db::QueryMetadata;
+use oximeter_db::QueryResult;
+use oximeter_db::Table;
 use oximeter_db::{query, Client, DbWrite};
+use reedline::DefaultPrompt;
+use reedline::DefaultPromptSegment;
+use reedline::Reedline;
+use reedline::Signal;
 use slog::{debug, info, o, Drain, Level, Logger};
 use std::net::IpAddr;
 use std::net::SocketAddr;
@@ -138,6 +149,12 @@ enum Subcommand {
         #[clap(long, conflicts_with("end"), action)]
         end_exclusive: Option<DateTime<Utc>>,
     },
+
+    /// Enter a SQL shell for interactive querying.
+    Sql {
+        #[clap(flatten)]
+        opts: ShellOptions,
+    },
 }
 
 async fn make_client(
@@ -295,8 +312,285 @@ async fn query(
     Ok(())
 }
 
+fn print_basic_commands() {
+    println!("Basic commands:");
+    println!("  \\?, \\h, help      - Print this help");
+    println!("  \\q, quit, exit, ^D - Exit the shell");
+    println!("  \\l                 - List tables");
+    println!("  \\d <table>         - Describe a table");
+    println!(
+        "  \\f <function>      - List or describe ClickHouse SQL functions"
+    );
+    println!();
+    println!("Or try entering a SQL `SELECT` statement");
+}
+
+async fn list_virtual_tables(client: &Client) -> anyhow::Result<()> {
+    let mut page = WhichPage::First(EmptyScanParams {});
+    let limit = 100.try_into().unwrap();
+    loop {
+        let results = client.timeseries_schema_list(&page, limit).await?;
+        for schema in results.items.iter() {
+            println!("{}", schema.timeseries_name);
+        }
+        if results.next_page.is_some() {
+            if let Some(last) = results.items.last() {
+                page = WhichPage::Next(last.timeseries_name.clone());
+            } else {
+                return Ok(());
+            }
+        } else {
+            return Ok(());
+        }
+    }
+}
+
+async fn describe_virtual_table(
+    client: &Client,
+    table: &str,
+) -> anyhow::Result<()> {
+    match table.parse() {
+        Err(_) => println!("Invalid timeseries name: {table}"),
+        Ok(name) => {
+            if let Some(schema) = client.schema_for_timeseries(&name).await? {
+                let mut cols =
+                    Vec::with_capacity(schema.field_schema.len() + 2);
+                let mut types = cols.clone();
+                for field in schema.field_schema.iter() {
+                    cols.push(field.name.clone());
+                    types.push(field.ty.to_string());
+                }
+                cols.push("timestamp".into());
+                types.push("DateTime64".into());
+
+                if schema.datum_type.is_histogram() {
+                    cols.push("start_time".into());
+                    types.push("DateTime64".into());
+
+                    cols.push("bins".into());
+                    types.push(format!(
+                        "Array[{}]",
+                        schema
+                            .datum_type
+                            .to_string()
+                            .strip_prefix("Histogram")
+                            .unwrap()
+                            .to_lowercase(),
+                    ));
+
+                    cols.push("counts".into());
+                    types.push("Array[u64]".into());
+                } else if schema.datum_type.is_cumulative() {
+                    cols.push("start_time".into());
+                    types.push("DateTime64".into());
+                    cols.push("datum".into());
+                    types.push(schema.datum_type.to_string());
+                } else {
+                    cols.push("datum".into());
+                    types.push(schema.datum_type.to_string());
+                }
+
+                let mut builder = tabled::builder::Builder::default();
+                builder.set_header(cols);
+                builder.push_record(types);
+                println!(
+                    "{}",
+                    builder.build().with(tabled::settings::Style::psql())
+                );
+            } else {
+                println!("No such timeseries: {table}");
+            }
+        }
+    }
+    Ok(())
+}
+
+#[derive(Clone, Debug, Args)]
+struct ShellOptions {
+    /// Print query metadata.
+    #[clap(long = "metadata")]
+    print_metadata: bool,
+    /// Print the original SQL query.
+    #[clap(long = "original")]
+    print_original_query: bool,
+    /// Print the rewritten SQL query that is actually run on the DB.
+    #[clap(long = "rewritten")]
+    print_rewritten_query: bool,
+    /// Print the transformed query, but do not run it.
+    #[clap(long)]
+    transform: Option<String>,
+}
+
+impl Default for ShellOptions {
+    fn default() -> Self {
+        Self {
+            print_metadata: true,
+            print_original_query: false,
+            print_rewritten_query: false,
+            transform: None,
+        }
+    }
+}
+
+fn list_supported_functions() {
+    println!("Subset of ClickHouse SQL functions currently supported");
+    println!(
+        "See https://clickhouse.com/docs/en/sql-reference/functions for more"
+    );
+    println!();
+    for func in function_allow_list().iter() {
+        println!(" {func}");
+    }
+}
+
+fn show_supported_function(name: &str) {
+    if let Some(func) = function_allow_list().iter().find(|f| f.name == name) {
+        println!("{}", func.name);
+        println!("  {}", func.usage);
+        println!("  {}", func.description);
+    } else {
+        println!("No supported function '{name}'");
+    }
+}
+
+fn print_sql_query(query: &str) {
+    println!(
+        "{}",
+        sqlformat::format(
+            &query,
+            &sqlformat::QueryParams::None,
+            sqlformat::FormatOptions { uppercase: true, ..Default::default() }
+        )
+    );
+    println!();
+}
+
+fn print_query_metadata(table: &Table, metadata: &QueryMetadata) {
+    println!("Metadata");
+    println!(" Query ID:    {}", metadata.id);
+    println!(" Result rows: {}", table.rows.len());
+    println!(" Time:        {:?}", metadata.elapsed);
+    println!(" Read:        {}\n", metadata.summary.read);
+}
+
+async fn sql_shell(
+    address: IpAddr,
+    port: u16,
+    log: Logger,
+    opts: ShellOptions,
+) -> anyhow::Result<()> {
+    let client = make_client(address, port, &log).await?;
+
+    // A workaround to ensure the client has all available timeseries when the
+    // shell starts.
+    let dummy = "foo:bar".parse().unwrap();
+    let _ = client.schema_for_timeseries(&dummy).await;
+
+    // Possibly just transform the query, but do not execute it.
+    if let Some(query) = &opts.transform {
+        let transformed = client.transform_query(query).await?;
+        println!(
+            "{}",
+            sqlformat::format(
+                &transformed,
+                &sqlformat::QueryParams::None,
+                sqlformat::FormatOptions {
+                    uppercase: true,
+                    ..Default::default()
+                }
+            )
+        );
+        return Ok(());
+    }
+
+    let mut ed = Reedline::create();
+    let prompt = DefaultPrompt::new(
+        DefaultPromptSegment::Basic("0x".to_string()),
+        DefaultPromptSegment::Empty,
+    );
+    println!("Oximeter SQL shell");
+    println!();
+    print_basic_commands();
+    loop {
+        let sig = ed.read_line(&prompt);
+        match sig {
+            Ok(Signal::Success(buf)) => {
+                let cmd = buf.as_str().trim();
+                match cmd {
+                    "" => continue,
+                    "\\?" | "\\h" | "help" => print_basic_commands(),
+                    "\\q" | "quit" | "exit" => return Ok(()),
+                    "\\l" | "\\d" => list_virtual_tables(&client).await?,
+                    _ => {
+                        if let Some(table_name) = cmd.strip_prefix("\\d") {
+                            if table_name.is_empty() {
+                                list_virtual_tables(&client).await?;
+                            } else {
+                                describe_virtual_table(
+                                    &client,
+                                    table_name.trim().trim_end_matches(';'),
+                                )
+                                .await?;
+                            }
+                        } else if let Some(func_name) = cmd.strip_prefix("\\f")
+                        {
+                            if func_name.is_empty() {
+                                list_supported_functions();
+                            } else {
+                                show_supported_function(
+                                    func_name.trim().trim_end_matches(';'),
+                                );
+                            }
+                        } else {
+                            match client.query(&buf).await {
+                                Err(e) => println!("Query failed: {e:#?}"),
+                                Ok(QueryResult {
+                                    original_query,
+                                    rewritten_query,
+                                    metadata,
+                                    table,
+                                }) => {
+                                    println!();
+                                    let mut builder =
+                                        tabled::builder::Builder::default();
+                                    builder.set_header(&table.column_names);
+                                    for row in table.rows.iter() {
+                                        builder.push_record(
+                                            row.iter().map(ToString::to_string),
+                                        );
+                                    }
+                                    if opts.print_original_query {
+                                        print_sql_query(&original_query);
+                                    }
+                                    if opts.print_rewritten_query {
+                                        print_sql_query(&rewritten_query);
+                                    }
+                                    println!(
+                                        "{}\n",
+                                        builder.build().with(
+                                            tabled::settings::Style::psql()
+                                        )
+                                    );
+                                    if opts.print_metadata {
+                                        print_query_metadata(&table, &metadata);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            Ok(Signal::CtrlD) => return Ok(()),
+            Ok(Signal::CtrlC) => continue,
+            err => println!("err: {err:?}"),
+        }
+    }
+}
+
 #[tokio::main]
-async fn main() {
+async fn main() -> anyhow::Result<()> {
+    usdt::register_probes().context("Failed to register USDT probes")?;
+
     let args = OxDb::parse();
     let decorator = slog_term::TermDecorator::new().build();
     let drain = slog_term::FullFormat::new(decorator)
@@ -308,12 +602,10 @@ async fn main() {
     match args.cmd {
         Subcommand::Describe => describe_data(),
         Subcommand::Populate { populate_args } => {
-            populate(args.address, args.port, log, populate_args)
-                .await
-                .unwrap();
+            populate(args.address, args.port, log, populate_args).await?
         }
         Subcommand::Wipe => {
-            wipe_single_node_db(args.address, args.port, log).await.unwrap()
+            wipe_single_node_db(args.address, args.port, log).await?
         }
         Subcommand::Query {
             timeseries_name,
@@ -342,8 +634,11 @@ async fn main() {
                 start,
                 end,
             )
-            .await
-            .unwrap();
+            .await?;
+        }
+        Subcommand::Sql { opts } => {
+            sql_shell(args.address, args.port, log, opts).await?
         }
     }
+    Ok(())
 }
diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs
index d295d0dcdfd..e7f7b6e1929 100644
--- a/oximeter/db/src/client.rs
+++ b/oximeter/db/src/client.rs
@@ -8,6 +8,7 @@
 
 use crate::model;
 use crate::query;
+use crate::sql::RestrictedQuery;
 use crate::Error;
 use crate::Metric;
 use crate::Target;
@@ -22,9 +23,11 @@ use dropshot::EmptyScanParams;
 use dropshot::PaginationOrder;
 use dropshot::ResultsPage;
 use dropshot::WhichPage;
+use indexmap::IndexMap;
 use oximeter::types::Sample;
 use regex::Regex;
 use regex::RegexBuilder;
+use reqwest::header::HeaderMap;
 use slog::debug;
 use slog::error;
 use slog::info;
@@ -41,6 +44,8 @@ use std::ops::Bound;
 use std::path::Path;
 use std::path::PathBuf;
 use std::sync::OnceLock;
+use std::time::Duration;
+use std::time::Instant;
 use tokio::fs;
 use tokio::sync::Mutex;
 use uuid::Uuid;
@@ -51,6 +56,137 @@ mod probes {
     fn query__done(_: &usdt::UniqueId) {}
 }
 
+/// A count of bytes / rows accessed during a query.
+#[derive(Clone, Copy, Debug)]
+pub struct IoCount {
+    pub bytes: u64,
+    pub rows: u64,
+}
+
+impl std::fmt::Display for IoCount {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "{} rows ({} bytes)", self.rows, self.bytes)
+    }
+}
+
+/// Summary of the I/O and duration of a query.
+#[derive(Clone, Copy, Debug, serde::Deserialize)]
+#[serde(try_from = "serde_json::Value")]
+pub struct QuerySummary {
+    /// The bytes and rows read by the query.
+    pub read: IoCount,
+    /// The bytes and rows written by the query.
+    pub written: IoCount,
+}
+
+impl TryFrom<serde_json::Value> for QuerySummary {
+    type Error = Error;
+
+    fn try_from(j: serde_json::Value) -> Result<Self, Self::Error> {
+        use serde_json::Map;
+        use serde_json::Value;
+        use std::str::FromStr;
+
+        let Value::Object(map) = j else {
+            return Err(Error::Database(String::from(
+                "Expected a JSON object for a metadata summary",
+            )));
+        };
+
+        fn unpack_summary_value<T>(
+            map: &Map<String, Value>,
+            key: &str,
+        ) -> Result<T, Error>
+        where
+            T: FromStr,
+            <T as FromStr>::Err: std::error::Error,
+        {
+            let value = map.get(key).ok_or_else(|| {
+                Error::MissingHeaderKey { key: key.to_string() }
+            })?;
+            let Value::String(v) = value else {
+                return Err(Error::BadMetadata {
+                    key: key.to_string(),
+                    msg: String::from("Expected a string value"),
+                });
+            };
+            v.parse::<T>().map_err(|e| Error::BadMetadata {
+                key: key.to_string(),
+                msg: e.to_string(),
+            })
+        }
+        let rows_read: u64 = unpack_summary_value(&map, "read_rows")?;
+        let bytes_read: u64 = unpack_summary_value(&map, "read_bytes")?;
+        let rows_written: u64 = unpack_summary_value(&map, "written_rows")?;
+        let bytes_written: u64 = unpack_summary_value(&map, "written_bytes")?;
+        Ok(Self {
+            read: IoCount { bytes: bytes_read, rows: rows_read },
+            written: IoCount { bytes: bytes_written, rows: rows_written },
+        })
+    }
+}
+
+/// Basic metadata about the resource usage of a single SQL query.
+#[derive(Clone, Copy, Debug)]
+pub struct QueryMetadata {
+    /// The database-assigned query ID.
+    pub id: Uuid,
+    /// The total duration of the query (network plus execution).
+    pub elapsed: Duration,
+    /// Summary of the data read and written.
+    pub summary: QuerySummary,
+}
+
+impl QueryMetadata {
+    fn from_headers(
+        elapsed: Duration,
+        headers: &HeaderMap,
+    ) -> Result<Self, Error> {
+        fn get_header<'a>(
+            map: &'a HeaderMap,
+            key: &'a str,
+        ) -> Result<&'a str, Error> {
+            let hdr = map.get(key).ok_or_else(|| Error::MissingHeaderKey {
+                key: key.to_string(),
+            })?;
+            std::str::from_utf8(hdr.as_bytes())
+                .map_err(|err| Error::Database(err.to_string()))
+        }
+        let summary =
+            serde_json::from_str(get_header(headers, "X-ClickHouse-Summary")?)
+                .map_err(|err| Error::Database(err.to_string()))?;
+        let id = get_header(headers, "X-ClickHouse-Query-Id")?
+            .parse()
+            .map_err(|err: uuid::Error| Error::Database(err.to_string()))?;
+        Ok(Self { id, elapsed, summary })
+    }
+}
+
+/// A tabular result from a SQL query against a timeseries.
+#[derive(Clone, Debug, Default, serde::Serialize)]
+pub struct Table {
+    /// The name of each column in the result set.
+    pub column_names: Vec<String>,
+    /// The rows of the result set, one per column.
+    pub rows: Vec<Vec<serde_json::Value>>,
+}
+
+/// The full result of running a SQL query against a timeseries.
+#[derive(Clone, Debug)]
+pub struct QueryResult {
+    /// The query as written by the client.
+    pub original_query: String,
+    /// The rewritten query, run against the JOINed representation of the
+    /// timeseries.
+    ///
+    /// This is the query that is actually run in the database itself.
+    pub rewritten_query: String,
+    /// Metadata about the resource usage of the query.
+    pub metadata: QueryMetadata,
+    /// The result of the query, with column names and rows.
+    pub table: Table,
+}
+
 /// A `Client` to the ClickHouse metrics database.
 #[derive(Debug)]
 pub struct Client {
@@ -89,6 +225,76 @@ impl Client {
         Ok(())
     }
 
+    /// Transform a SQL query against a timeseries, but do not execute it.
+    pub async fn transform_query(
+        &self,
+        query: impl AsRef<str>,
+    ) -> Result<String, Error> {
+        let restricted = RestrictedQuery::new(query.as_ref())?;
+        restricted.to_oximeter_sql(&*self.schema.lock().await)
+    }
+
+    /// Run a SQL query against a timeseries.
+    pub async fn query(
+        &self,
+        query: impl AsRef<str>,
+    ) -> Result<QueryResult, Error> {
+        let original_query = query.as_ref().trim_end_matches(';');
+        let ox_sql = self.transform_query(original_query).await?;
+        let rewritten = format!("{ox_sql} FORMAT JSONEachRow");
+        debug!(
+            self.log,
+            "rewrote restricted query";
+            "original_sql" => &original_query,
+            "rewritten_sql" => &rewritten,
+        );
+        let request = self
+            .client
+            .post(&self.url)
+            .query(&[
+                ("output_format_json_quote_64bit_integers", "0"),
+                ("database", crate::DATABASE_NAME),
+            ])
+            .body(rewritten.clone());
+        let query_start = Instant::now();
+        let response = handle_db_response(
+            request
+                .send()
+                .await
+                .map_err(|err| Error::DatabaseUnavailable(err.to_string()))?,
+        )
+        .await?;
+        let metadata = QueryMetadata::from_headers(
+            query_start.elapsed(),
+            response.headers(),
+        )?;
+        let text = response.text().await.unwrap();
+        let mut table = Table::default();
+        for line in text.lines() {
+            let row =
+                serde_json::from_str::<IndexMap<String, serde_json::Value>>(
+                    line.trim(),
+                )
+                .unwrap();
+            if table.column_names.is_empty() {
+                table.column_names.extend(row.keys().cloned())
+            } else {
+                assert!(table
+                    .column_names
+                    .iter()
+                    .zip(row.keys())
+                    .all(|(k1, k2)| k1 == k2));
+            }
+            table.rows.push(row.into_values().collect());
+        }
+        Ok(QueryResult {
+            original_query: original_query.to_string(),
+            rewritten_query: rewritten,
+            metadata,
+            table,
+        })
+    }
+
     /// Select timeseries from criteria on the fields and start/end timestamps.
     pub async fn select_timeseries_with(
         &self,
@@ -271,7 +477,7 @@ impl Client {
         ResultsPage::new(schema, &dropshot::EmptyScanParams {}, |schema, _| {
             schema.timeseries_name.clone()
         })
-        .map_err(|e| Error::Database(e.to_string()))
+        .map_err(|err| Error::Database(err.to_string()))
     }
 
     /// Read the available schema versions in the provided directory.
@@ -1181,13 +1387,14 @@ async fn handle_db_response(
         // NOTE: ClickHouse returns 404 for all errors (so far encountered). We pull the text from
         // the body if possible, which contains the actual error from the database.
         let body = response.text().await.unwrap_or_else(|e| e.to_string());
-        Err(Error::Database(body))
+        Err(Error::Database(format!("Query failed: {body}")))
     }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::model::OXIMETER_VERSION;
     use crate::query;
     use crate::query::field_table_name;
     use bytes::Bytes;
@@ -4261,4 +4468,87 @@ mod tests {
         db.cleanup().await.expect("Failed to cleanup ClickHouse server");
         logctx.cleanup_successful();
     }
+
+    #[tokio::test]
+    async fn test_sql_query_output() {
+        let logctx = test_setup_log("test_sql_query_output");
+        let log = &logctx.log;
+        let mut db = ClickHouseInstance::new_single_node(0)
+            .await
+            .expect("Failed to start ClickHouse");
+        let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port());
+        let client = Client::new(address, &log);
+        client
+            .initialize_db_with_version(false, OXIMETER_VERSION)
+            .await
+            .expect("Failed to initialize timeseries database");
+        let (_target, metrics, samples) = setup_select_test();
+        client.insert_samples(&samples).await.unwrap();
+
+        // Sanity check that we get exactly the number of samples we expected.
+        let res = client
+            .query("SELECT count() AS total FROM service:request_latency")
+            .await
+            .unwrap();
+        assert_eq!(res.table.rows.len(), 1);
+        let serde_json::Value::Number(n) = &res.table.rows[0][0] else {
+            panic!("Expected exactly 1 row with 1 item");
+        };
+        assert_eq!(n.as_u64().unwrap(), samples.len() as u64);
+
+        // Assert grouping by the keys results in exactly the number of samples
+        // expected for each timeseries.
+        let res = client
+            .query(
+                "SELECT count() AS total \
+                FROM service:request_latency \
+                GROUP BY timeseries_key; \
+            ",
+            )
+            .await
+            .unwrap();
+        assert_eq!(res.table.rows.len(), metrics.len());
+        for row in res.table.rows.iter() {
+            assert_eq!(row.len(), 1);
+            let serde_json::Value::Number(n) = &row[0] else {
+                panic!("Expected a number in each row");
+            };
+            assert_eq!(
+                n.as_u64().unwrap(),
+                (samples.len() / metrics.len()) as u64
+            );
+        }
+
+        // Read test SQL and make sure we're getting expected results.
+        let sql_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("test-output")
+            .join("sql");
+        let mut rd = tokio::fs::read_dir(&sql_dir)
+            .await
+            .expect("failed to read SQL test directory");
+        while let Some(next_entry) =
+            rd.next_entry().await.expect("failed to read directory entry")
+        {
+            let sql_file = next_entry.path().join("query.sql");
+            let result_file = next_entry.path().join("result.txt");
+            let query = tokio::fs::read_to_string(&sql_file)
+                .await
+                .unwrap_or_else(|_| {
+                    panic!(
+                        "failed to read test SQL query in '{}",
+                        sql_file.display()
+                    )
+                });
+            let res = client
+                .query(&query)
+                .await
+                .expect("failed to execute test query");
+            expectorate::assert_contents(
+                result_file,
+                &serde_json::to_string_pretty(&res.table).unwrap(),
+            );
+        }
+        db.cleanup().await.unwrap();
+        logctx.cleanup_successful();
+    }
 }
diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs
index 9029319048c..f632a531aff 100644
--- a/oximeter/db/src/lib.rs
+++ b/oximeter/db/src/lib.rs
@@ -33,9 +33,13 @@ use thiserror::Error;
 mod client;
 pub mod model;
 pub mod query;
+pub mod sql;
+
 pub use client::Client;
 pub use client::DbWrite;
-
+pub use client::QueryMetadata;
+pub use client::QueryResult;
+pub use client::Table;
 pub use model::OXIMETER_VERSION;
 
 #[derive(Debug, Error)]
@@ -47,8 +51,14 @@ pub enum Error {
     #[error("Telemetry database unavailable: {0}")]
     DatabaseUnavailable(String),
 
+    #[error("Missing expected metadata header key '{key}'")]
+    MissingHeaderKey { key: String },
+
+    #[error("Invalid or malformed query metadata for key '{key}': {msg}")]
+    BadMetadata { key: String, msg: String },
+
     /// An error interacting with the telemetry database
-    #[error("Error interacting with telemetry database: {0}")]
+    #[error("Error interacting with telemetry database")]
     Database(String),
 
     /// A schema provided when collecting samples did not match the expected schema
@@ -123,6 +133,9 @@ pub enum Error {
 
     #[error("Schema update versions must be sequential without gaps")]
     NonSequentialSchemaVersions,
+
+    #[error("SQL error")]
+    Sql(#[from] sql::Error),
 }
 
 impl From<model::DbTimeseriesSchema> for TimeseriesSchema {
@@ -290,6 +303,7 @@ mod tests {
         assert!(TimeseriesName::try_from(":b").is_err());
         assert!(TimeseriesName::try_from("a:").is_err());
         assert!(TimeseriesName::try_from("123").is_err());
+        assert!(TimeseriesName::try_from("no:no:no").is_err());
     }
 
     // Validates that the timeseries_key stability for a sample is stable.
diff --git a/oximeter/db/src/query.rs b/oximeter/db/src/query.rs
index 2caefb24c37..92127695739 100644
--- a/oximeter/db/src/query.rs
+++ b/oximeter/db/src/query.rs
@@ -296,6 +296,7 @@ impl SelectQueryBuilder {
     }
 }
 
+/// Return the name of the measurements table for a datum type.
 pub(crate) fn measurement_table_name(ty: DatumType) -> String {
     format!("measurements_{}", ty.to_string().to_lowercase())
 }
@@ -335,6 +336,7 @@ pub struct FieldSelector {
     comparison: Option<FieldComparison>,
 }
 
+/// Return the name of the field table for the provided field type.
 pub(crate) fn field_table_name(ty: FieldType) -> String {
     format!("fields_{}", ty.to_string().to_lowercase())
 }
diff --git a/oximeter/db/src/sql/mod.rs b/oximeter/db/src/sql/mod.rs
new file mode 100644
index 00000000000..b7a2232cf4a
--- /dev/null
+++ b/oximeter/db/src/sql/mod.rs
@@ -0,0 +1,1350 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Run SQL queries against the timeseries database.
+//!
+//! # Overview
+//!
+//! `oximeter` collects and stores samples from timeseries. The schema for those
+//! samples is defined by applications, using the [`Target`](oximeter::Target)
+//! and [`Metric`](oximeter::Metric) traits. Samples from these timeseries are
+//! not stored in explicit tables, however. They are "unrolled" into the fields
+//! and measurements, which are stored in a table based on their _data type_.
+//! For example, `String` fields are stored in the `oximeter.fields_string`
+//! table. (See RFD 161 for more details.)
+//!
+//! This arrangement is flexible and simple, since we can statically define the
+//! tables we need, rather than say create a new table for each timeseries
+//! schema. However, the drawback of this is that the timeseries data is not
+//! easily queried directly. The data is split across many tables, and
+//! interleaved with other timeseries, which may not even share a schema.
+//!
+//! The tools in this module are for making "normal" SQL queries transparently
+//! act on the "virtual tables" that are implied by each timeseries. It's
+//! effectively a SQL-to-SQL transpiler, converting queries against the
+//! timeseries into one or more queries against the actual tables in ClickHouse.
+
+// Copyright 2023 Oxide Computer Company
+
+use crate::query::field_table_name;
+use crate::query::measurement_table_name;
+use crate::DatumType;
+use crate::Error as OxdbError;
+use crate::FieldType;
+use crate::TimeseriesName;
+use crate::TimeseriesSchema;
+use indexmap::IndexSet;
+use sqlparser::ast::BinaryOperator;
+use sqlparser::ast::Cte;
+use sqlparser::ast::Distinct;
+use sqlparser::ast::Expr;
+use sqlparser::ast::Ident;
+use sqlparser::ast::Join;
+use sqlparser::ast::JoinConstraint;
+use sqlparser::ast::JoinOperator;
+use sqlparser::ast::ObjectName;
+use sqlparser::ast::OrderByExpr;
+use sqlparser::ast::Query;
+use sqlparser::ast::Select;
+use sqlparser::ast::SelectItem;
+use sqlparser::ast::SetExpr;
+use sqlparser::ast::Statement;
+use sqlparser::ast::TableAlias;
+use sqlparser::ast::TableFactor;
+use sqlparser::ast::TableWithJoins;
+use sqlparser::ast::Value;
+use sqlparser::ast::With;
+use sqlparser::dialect::AnsiDialect;
+use sqlparser::dialect::Dialect;
+use sqlparser::parser::Parser;
+use sqlparser::parser::ParserError;
+use std::collections::BTreeMap;
+use std::collections::BTreeSet;
+use std::ops::ControlFlow;
+use std::sync::OnceLock;
+
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+    #[error("SQL parsing error")]
+    Parser(#[from] ParserError),
+
+    #[error("Unsupported SQL: {0}")]
+    UnsupportedSql(&'static str),
+
+    #[error("Unsupported function: '{func}'")]
+    UnsupportedFunction { func: String },
+
+    #[error("Invalid column '{name}' for timeseries '{timeseries_name}'")]
+    InvalidColumn { name: String, timeseries_name: String },
+
+    #[error(
+        "Table name '{table_name}' in select query does not match \
+        timeseries name '{timeseries_name}'"
+    )]
+    TableInSelectIsNotTimeseries { table_name: String, timeseries_name: String },
+
+    #[error("Invalid timeseries name: '{name}'")]
+    InvalidTimeseriesName { name: String },
+}
+
+/// The oximeter timeseries SQL dialect.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct OxdbDialect;
+
+impl Dialect for OxdbDialect {
+    fn is_identifier_start(&self, ch: char) -> bool {
+        AnsiDialect {}.is_identifier_start(ch)
+    }
+
+    fn is_identifier_part(&self, ch: char) -> bool {
+        AnsiDialect {}.is_identifier_part(ch) || ch == ':'
+    }
+}
+
+/// A SQL statement that is probably supported.
+///
+/// There's a big range of statements that are not supported. This is guaranteed
+/// to be a single select statement, where all the items being selected FROM
+/// are:
+///
+/// - concrete tables that could be timeseries (valid names)
+/// - a subquery against a restricted query
+#[derive(Clone, Debug)]
+pub struct RestrictedQuery {
+    safe_sql: SafeSql,
+    query: Query,
+    timeseries: IndexSet<TimeseriesName>,
+}
+
+impl std::fmt::Display for RestrictedQuery {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "{}", self.query)
+    }
+}
+
+macro_rules! unsupported {
+    ($msg:literal) => {
+        Err(OxdbError::from(Error::UnsupportedSql($msg)))
+    };
+}
+
+/// A helper type to preprocess any ClickHouse-specific SQL, and present a
+/// known-safe version of it to the main `sqlparser` code.
+///
+/// This is currently used to handle ASOF JOINs, which are a ClickHouse-specific
+/// JOIN that joins rows based on a "closest match" condition. However, a
+/// standard SQL parser will take an expression like:
+///
+/// ```sql
+/// SELECT foo ASOF JOIN bar
+/// ```
+///
+/// And interpret the `ASOF` as an alias for `FOO`, as if one had written
+///
+/// ```sql
+/// SELECT foo AS asof JOIN bar
+/// ```
+///
+/// This basically detects and removes a bare `ASOF` in that case, so the parser
+/// can run normally.
+#[derive(Clone, Debug)]
+struct SafeSql {
+    original: String,
+    safe: String,
+}
+
+impl SafeSql {
+    fn new(sql: impl AsRef<str>) -> Self {
+        // The regex crate doesn't support look-arounds, so we'll have to
+        // manually find sequences like `ASOF JOIN`, that are not preceded by
+        // `AS`.
+        let sql = sql.as_ref().trim().trim_end_matches(';');
+        let mut original = Vec::new();
+        let mut safe = Vec::new();
+        let mut tokens = sql.split_ascii_whitespace().peekable();
+        while let Some(token) = tokens.next() {
+            // Always push the current token.
+            if token.parse::<TimeseriesName>().is_ok() {
+                let tok = format!("\"{token}\"");
+                safe.push(tok.clone());
+                original.push(tok);
+            } else {
+                safe.push(token.to_string());
+                original.push(token.to_string());
+            }
+
+            // If the next token is ASOF, and the current is _not_ AS, then this
+            // is something like `select foo asof join bar`, and we want to chop
+            // out the `asof`. Consume the next token, and break the SQL string,
+            // by pushing a new chunk at the end.
+            if let Some(next_token) = tokens.peek() {
+                if !token.eq_ignore_ascii_case("as")
+                    && next_token.eq_ignore_ascii_case("asof")
+                {
+                    original.push(tokens.next().unwrap().to_string());
+                }
+            }
+        }
+        Self { original: original.join(" "), safe: safe.join(" ") }
+    }
+
+    fn safe_sql(&self) -> &str {
+        &self.safe
+    }
+}
+
+impl RestrictedQuery {
+    /// Construct a new restricted query.
+    pub fn new(sql: impl AsRef<str>) -> Result<Self, OxdbError> {
+        let safe_sql = SafeSql::new(sql);
+        let statements = Parser::parse_sql(&OxdbDialect, &safe_sql.safe_sql())
+            .map_err(Error::from)?;
+        if statements.len() != 1 {
+            return unsupported!("Only a single SQL statement is supported");
+        }
+
+        let statement = statements.into_iter().next().unwrap();
+        let Statement::Query(mut query) = statement else {
+            return unsupported!("Statement must be a SELECT query");
+        };
+
+        // Walk the AST before doing any real processing or transformation, and
+        // validate any function calls are on the allow-list.
+        let maybe_denied_function =
+            sqlparser::ast::visit_expressions(&query, |expr| {
+                if let Expr::Function(func) = expr {
+                    if let Some(name) = func.name.0.first() {
+                        if !function_allow_list()
+                            .iter()
+                            .any(|f| f.name == name.value.as_str())
+                        {
+                            return ControlFlow::Break(name.value.clone());
+                        }
+                    }
+                }
+                ControlFlow::Continue(())
+            });
+        if let ControlFlow::Break(func) = maybe_denied_function {
+            return Err(OxdbError::from(Error::UnsupportedFunction { func }));
+        };
+
+        let timeseries = Self::process_query(&mut query)?;
+        Ok(Self { safe_sql, query: *query, timeseries })
+    }
+
+    /// Convert the original SQL into a query specifically for the `oximeter`
+    /// timeseries table organization.
+    pub fn to_oximeter_sql(
+        &self,
+        timeseries_schema: &BTreeMap<TimeseriesName, TimeseriesSchema>,
+    ) -> Result<String, OxdbError> {
+        self.generate_timeseries_ctes(&timeseries_schema).map(|cte_tables| {
+            if cte_tables.is_empty() {
+                // The query didn't reference any timeseries at all, let's just
+                // return it
+                self.safe_sql.original.clone()
+            } else {
+                // There are some timeseries referenced. Let's return a query
+                // constructed by building the CTEs, and then the _original_
+                // SQL, which may have `ASOF JOIN`s in it.
+                format!(
+                    "{} {}",
+                    With { recursive: false, cte_tables },
+                    self.safe_sql.original,
+                )
+            }
+        })
+    }
+
+    // For each timeseries named in `self`, generate a CTE that creates the
+    // virtual table for that timeseries by joining all its component parts.
+    fn generate_timeseries_ctes(
+        &self,
+        timeseries_schema: &BTreeMap<TimeseriesName, TimeseriesSchema>,
+    ) -> Result<Vec<Cte>, OxdbError> {
+        let mut ctes = Vec::with_capacity(self.timeseries.len());
+        for timeseries in self.timeseries.iter() {
+            let schema =
+                timeseries_schema.get(timeseries).ok_or_else(|| {
+                    OxdbError::TimeseriesNotFound(
+                        timeseries.as_str().to_owned(),
+                    )
+                })?;
+            ctes.push(Self::build_timeseries_cte(schema));
+        }
+        Ok(ctes)
+    }
+
+    // Given a timeseries schema, return a CTE which generates the equivalent
+    // virtual table.
+    //
+    // As timeseries samples are ingested, we "unroll" them in various ways, and
+    // store them in a set of normalized tables. These contain the _fields_ (on
+    // table per field data type) and the measurements (one table per
+    // measurement data type). This method reverses that process, creating a
+    // single, virtual table that represents all samples from the timeseries
+    // (plural) of the same schema.
+    //
+    // It generates a CTE like so:
+    //
+    // ```sql
+    // WITH {timeseries_name} AS (
+    //   SELECT
+    //     timeseries_key,
+    //     filter_on_{field_name0}.field_value as {field_name0},
+    //     filter_on_{field_name1}.field_value as {field_name1},
+    //     ...
+    //     measurements.timestamp AS timestamp,
+    //     measurements.datum as datum,
+    //   FROM
+    //     (
+    //       SELECT DINSTINCT timeseries_key,
+    //         field_value
+    //       FROM
+    //         fields_{field_type}
+    //       WHERE
+    //         timeseries_name = '{timeseries_name}'
+    //         AND field_name = '{field_name0}
+    //     ) AS filter_on_{field_name0}
+    //     JOIN (
+    //       ... select next field table
+    //     ) AS filter_on_{field_name1} ON filter_on_{field_name0}.timeseries_key = filter_on_{field_name1}
+    //     ...
+    //     JOIN (
+    //       SELECT
+    //         timeseries_key,
+    //         timestamp,
+    //         datum,
+    //       FROM
+    //         measurements_{datum_type}
+    //       WHERE
+    //         timeseries_name = '{timeseries_name}'
+    //     ) AS measurements ON filter_on_fieldN.timeseries_key = measurements.timeseries_key
+    //   ORDER BY
+    //     timeseries_key,
+    //     timestamp
+    // )
+    // ```
+    //
+    // In other words, it should generate a CTE that one can query as if the
+    // timeseries itself where an actual table in the database, like:
+    //
+    // ```
+    // timeseries_key | field_name0 | field_name1 | ... | timestamp | datum
+    // ---------------+-------------+-------------+ ... +-----------+------
+    // key0           |  field0_0   |  field0_1   | ... |    t0     |   d0
+    // key0           |  field0_0   |  field0_1   | ... |    t1     |   d1
+    // key0           |  field0_0   |  field0_1   | ... |    t2     |   d2
+    // key0           |  field0_0   |  field0_1   | ... |    t3     |   d3
+    // ...
+    // key1           |  field1_0   |  field1_1   | ... |    t0     |   d0
+    // key1           |  field1_0   |  field1_1   | ... |    t1     |   d1
+    // key1           |  field1_0   |  field1_1   | ... |    t2     |   d2
+    // key1           |  field1_0   |  field1_1   | ... |    t3     |   d3
+    // ...
+    // ```
+    //
+    // In this case, all rows with `key0` are from the "first" timeseries with
+    // this schema. `fieldX_Y` indicates the Yth field from timeseries with
+    // `key0` as its key.
+    fn build_timeseries_cte(schema: &TimeseriesSchema) -> Cte {
+        // First build each query against the relevant field tables.
+        //
+        // These are the `SELECT DISTINCT ... FROM fields_{field_type}`
+        // subqueries above.
+        let mut field_queries = Vec::with_capacity(schema.field_schema.len());
+        for field_schema in schema.field_schema.iter() {
+            let field_query = Self::build_field_query(
+                &schema.timeseries_name,
+                &field_schema.name,
+                &field_schema.ty,
+            );
+            field_queries.push((field_schema.name.as_str(), field_query));
+        }
+
+        // Generate the last measurement query, the last subquery in the main
+        // CTE.
+        let measurement_query = Self::build_measurement_query(
+            &schema.timeseries_name,
+            &schema.datum_type,
+        );
+
+        // The "top-level" columns are the columns outputted by the CTE itself.
+        //
+        // These are the aliased columns of the full, reconstructed table
+        // representing the timeseries. This makes the timeseries_key available,
+        // as well as each field aliased to the actual field name, and the
+        // measurements.
+        let mut top_level_projections =
+            Vec::with_capacity(field_queries.len() + 2);
+
+        // Create the projection of the top-level timeseries_key.
+        //
+        // This is taken from the first field, which always exists, since
+        // timeseries have at least one field. This creates the expression:
+        // `filter_{field_name}.timeseries_key AS timeseries_key`
+        let timeseries_key_projection = SelectItem::ExprWithAlias {
+            expr: Expr::CompoundIdentifier(vec![
+                Self::field_subquery_alias(field_queries[0].0),
+                Self::str_to_ident("timeseries_key"),
+            ]),
+            alias: Self::str_to_ident("timeseries_key"),
+        };
+        top_level_projections.push(timeseries_key_projection);
+
+        // We'll build a big `TableWithJoins` to express the entire JOIN
+        // operation between all fields and the measurements. This is the "meat"
+        // of the CTE for this timeseries, joining the constituent records into
+        // the virtual table for this schema.
+        //
+        // We select first from the subquery specifying the first field query.
+        let mut cte_from = TableWithJoins {
+            relation: TableFactor::Derived {
+                lateral: false,
+                subquery: Self::select_to_query(field_queries[0].1.clone()),
+                alias: Some(TableAlias {
+                    name: Self::field_subquery_alias(field_queries[0].0),
+                    columns: vec![],
+                }),
+            },
+            joins: Vec::with_capacity(field_queries.len()),
+        };
+
+        // For all field queries, create a projection for the field_value,
+        // aliased as the field name.
+        let field_queries: Vec<_> = field_queries.into_iter().collect();
+        for (i, (field_name, query)) in field_queries.iter().enumerate() {
+            // Select the field_value from this field query, renaming it to the
+            // actual field name.
+            let projection = SelectItem::ExprWithAlias {
+                expr: Expr::CompoundIdentifier(vec![
+                    Self::field_subquery_alias(field_name),
+                    Self::str_to_ident("field_value"),
+                ]),
+                alias: Self::str_to_ident(field_name),
+            };
+            top_level_projections.push(projection);
+
+            // We've inserted the first subquery as the `from.relation` field in
+            // the main CTE we're building. We need to skip that one, even
+            // though we added its aliased `field_value` column to the top level
+            // projections.
+            //
+            // Any additional field subqueries are inserted in the JOIN portion
+            // of the CTE.
+            if i == 0 {
+                continue;
+            }
+            let relation = TableFactor::Derived {
+                lateral: false,
+                subquery: Self::select_to_query(query.clone()),
+                alias: Some(TableAlias {
+                    name: Self::field_subquery_alias(field_name),
+                    columns: vec![],
+                }),
+            };
+
+            // The join is always INNER, and is on the timeseries_key only.
+            // ClickHouse does not support `USING <column>` when using multiple
+            // JOINs simultaneously, so we always write this as an `ON`
+            // constraint, between the previous field subquery and this one.
+            //
+            // I.e., `ON filter_foo.timeseries_key = filter_bar.timeseries_key`
+            let last_field_name = &field_queries[i - 1].0;
+            let constraints = Expr::BinaryOp {
+                left: Box::new(Expr::CompoundIdentifier(vec![
+                    Self::field_subquery_alias(last_field_name),
+                    Self::str_to_ident("timeseries_key"),
+                ])),
+                op: BinaryOperator::Eq,
+                right: Box::new(Expr::CompoundIdentifier(vec![
+                    Self::field_subquery_alias(field_name),
+                    Self::str_to_ident("timeseries_key"),
+                ])),
+            };
+            let join_operator =
+                JoinOperator::Inner(JoinConstraint::On(constraints));
+            cte_from.joins.push(Join { relation, join_operator });
+        }
+
+        // Finally, we need to project and join in the measurements table.
+        let datum_columns = Self::datum_type_to_columns(&schema.datum_type);
+        for col in datum_columns.iter() {
+            let projection = SelectItem::ExprWithAlias {
+                expr: Expr::CompoundIdentifier(vec![
+                    Self::str_to_ident("measurements"),
+                    Self::str_to_ident(col),
+                ]),
+                alias: Self::str_to_ident(col),
+            };
+            top_level_projections.push(projection);
+        }
+        let relation = TableFactor::Derived {
+            lateral: false,
+            subquery: Self::select_to_query(measurement_query),
+            alias: Some(TableAlias {
+                name: Self::str_to_ident("measurements"),
+                columns: vec![],
+            }),
+        };
+        let constraints = Expr::BinaryOp {
+            left: Box::new(Expr::CompoundIdentifier(vec![
+                Self::field_subquery_alias(
+                    &schema.field_schema.last().unwrap().name,
+                ),
+                Self::str_to_ident("timeseries_key"),
+            ])),
+            op: BinaryOperator::Eq,
+            right: Box::new(Expr::CompoundIdentifier(vec![
+                Self::str_to_ident("measurements"),
+                Self::str_to_ident("timeseries_key"),
+            ])),
+        };
+        let join_operator =
+            JoinOperator::Inner(JoinConstraint::On(constraints));
+        cte_from.joins.push(Join { relation, join_operator });
+
+        // To build the real virtual table for all the timeseries, we really
+        // need to sort the samples as if they were inserted into the table
+        // itself. ClickHouse partitions the tables dynamically since we're
+        // using a MergeTree engine, which groups and repacks rows in the
+        // background.
+        //
+        // We'll impose a consistent sorting order here. If one does not include
+        // this, results are inconsistent, since the different data parts of the
+        // measurements tables are not read in order every time.
+        let order_by = top_level_projections
+            .iter()
+            .filter_map(|proj| {
+                let SelectItem::ExprWithAlias { alias, .. } = &proj else {
+                    unreachable!();
+                };
+                if alias.value == "timeseries_key"
+                    || alias.value == "start_time"
+                    || alias.value == "timestamp"
+                {
+                    Some(OrderByExpr {
+                        expr: Expr::Identifier(alias.clone()),
+                        asc: None,
+                        nulls_first: None,
+                    })
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        // We now have all the subqueries joined together, plus the columns
+        // we're projecting from that join result. We need to build the final
+        // CTE that represents the full virtual timeseries table.
+        let alias = TableAlias {
+            name: Ident {
+                value: schema.timeseries_name.to_string(),
+                quote_style: Some('"'),
+            },
+            columns: vec![],
+        };
+        let top_level_select = Select {
+            distinct: None,
+            top: None,
+            projection: top_level_projections,
+            into: None,
+            from: vec![cte_from],
+            lateral_views: vec![],
+            selection: None,
+            group_by: vec![],
+            cluster_by: vec![],
+            distribute_by: vec![],
+            sort_by: vec![],
+            having: None,
+            named_window: vec![],
+            qualify: None,
+        };
+        let mut query = Self::select_to_query(top_level_select);
+        query.order_by = order_by;
+        Cte { alias, query, from: None }
+    }
+
+    // Create a SQL parser `Ident` with a the given name.
+    fn str_to_ident(s: &str) -> Ident {
+        Ident { value: s.to_string(), quote_style: None }
+    }
+
+    // Return an `Ident` alias for a subquery of a specific field table.
+    //
+    // E.g., the `filter_on_foo` in `(SELECT DISTINCT ... ) AS filter_on_foo`.
+    fn field_subquery_alias(field_name: &str) -> Ident {
+        Self::str_to_ident(format!("filter_on_{field_name}").as_str())
+    }
+
+    // Return the required measurement columns for a specific datum type.
+    //
+    // Scalar measurements have only a timestamp and datum. Cumulative counters
+    // have those plus a start_time. And histograms have those plus the bins.
+    fn datum_type_to_columns(
+        datum_type: &DatumType,
+    ) -> &'static [&'static str] {
+        if datum_type.is_histogram() {
+            &["start_time", "timestamp", "bins", "counts"]
+        } else if datum_type.is_cumulative() {
+            &["start_time", "timestamp", "datum"]
+        } else {
+            &["timestamp", "datum"]
+        }
+    }
+
+    fn select_to_query(select: Select) -> Box<Query> {
+        Box::new(Query {
+            with: None,
+            body: Box::new(SetExpr::Select(Box::new(select))),
+            order_by: vec![],
+            limit: None,
+            offset: None,
+            fetch: None,
+            locks: vec![],
+        })
+    }
+
+    // Build a single subquery which selects the unique fields with the provided
+    // name. E.g., this creates:
+    //
+    // ```sql
+    // SELECT DISTINCT timeseries_key,
+    //   field_value
+    // FROM
+    //   fields_{field_type}
+    // WHERE
+    //   timeseries_name = '{timeseries_name}'
+    //   AND field_name = '{field_name}'
+    // ```
+    fn build_field_query(
+        timeseries_name: &TimeseriesName,
+        field_name: &str,
+        field_type: &FieldType,
+    ) -> Select {
+        // FROM fields_{field_type}
+        let from = TableWithJoins {
+            relation: TableFactor::Table {
+                name: ObjectName(vec![Self::str_to_ident(&field_table_name(
+                    *field_type,
+                ))]),
+                alias: None,
+                args: None,
+                with_hints: vec![],
+            },
+            joins: vec![],
+        };
+
+        // SELECT timeseries_key, field_value
+        let projection = vec![
+            SelectItem::UnnamedExpr(Expr::Identifier(Self::str_to_ident(
+                "timeseries_key",
+            ))),
+            SelectItem::UnnamedExpr(Expr::Identifier(Self::str_to_ident(
+                "field_value",
+            ))),
+        ];
+
+        // WHERE timeseries_name = '{timeseries_name}' AND field_name = '{field_name}'
+        let selection = Some(Expr::BinaryOp {
+            left: Box::new(Expr::BinaryOp {
+                left: Box::new(Expr::Identifier(Self::str_to_ident(
+                    "timeseries_name",
+                ))),
+                op: BinaryOperator::Eq,
+                right: Box::new(Expr::Value(Value::SingleQuotedString(
+                    timeseries_name.to_string(),
+                ))),
+            }),
+            op: BinaryOperator::And,
+            right: Box::new(Expr::BinaryOp {
+                left: Box::new(Expr::Identifier(Self::str_to_ident(
+                    "field_name",
+                ))),
+                op: BinaryOperator::Eq,
+                right: Box::new(Expr::Value(Value::SingleQuotedString(
+                    field_name.to_string(),
+                ))),
+            }),
+        });
+
+        Select {
+            distinct: Some(Distinct::Distinct),
+            top: None,
+            projection,
+            into: None,
+            from: vec![from],
+            lateral_views: vec![],
+            selection,
+            group_by: vec![],
+            cluster_by: vec![],
+            distribute_by: vec![],
+            sort_by: vec![],
+            having: None,
+            named_window: vec![],
+            qualify: None,
+        }
+    }
+
+    // Build a single subquery which selects the measurements with the provided
+    // name. E.g., this creates:
+    //
+    // ```sql
+    // SELECT
+    //   timeseries_key,
+    //   timestamp,
+    //   datum
+    // FROM
+    //   measurements_{datum_type}
+    // WHERE
+    //   timeseries_name = '{timeseries_name}'
+    // ```
+    fn build_measurement_query(
+        timeseries_name: &TimeseriesName,
+        datum_type: &DatumType,
+    ) -> Select {
+        // FROM measurements_{datum_type}
+        let from = TableWithJoins {
+            relation: TableFactor::Table {
+                name: ObjectName(vec![Self::str_to_ident(
+                    &measurement_table_name(*datum_type),
+                )]),
+                alias: None,
+                args: None,
+                with_hints: vec![],
+            },
+            joins: vec![],
+        };
+
+        // SELECT timeseries_key, timestamp, [datum type columns]
+        let mut projection = vec![SelectItem::UnnamedExpr(Expr::Identifier(
+            Self::str_to_ident("timeseries_key"),
+        ))];
+        let datum_projection = Self::datum_type_to_columns(datum_type);
+        projection.extend(datum_projection.iter().map(|name| {
+            SelectItem::UnnamedExpr(Expr::Identifier(Self::str_to_ident(name)))
+        }));
+
+        // WHERE timeseries_name = '{timeseries_name}'
+        let selection = Some(Expr::BinaryOp {
+            left: Box::new(Expr::Identifier(Self::str_to_ident(
+                "timeseries_name",
+            ))),
+            op: BinaryOperator::Eq,
+            right: Box::new(Expr::Value(Value::SingleQuotedString(
+                timeseries_name.to_string(),
+            ))),
+        });
+
+        Select {
+            distinct: None,
+            top: None,
+            projection,
+            into: None,
+            from: vec![from],
+            lateral_views: vec![],
+            selection,
+            group_by: vec![],
+            cluster_by: vec![],
+            distribute_by: vec![],
+            sort_by: vec![],
+            having: None,
+            named_window: vec![],
+            qualify: None,
+        }
+    }
+
+    // Process a single "table factor", the <object> in `FROM <object>` to
+    // extract the names of the timeseries it refers to.
+    //
+    // Note this is recursive since we do support basic inner joins.
+    fn process_table_factor(
+        relation: &mut TableFactor,
+    ) -> Result<IndexSet<TimeseriesName>, OxdbError> {
+        match relation {
+            TableFactor::Table { ref mut name, args, with_hints, .. } => {
+                if args.is_some() || !with_hints.is_empty() {
+                    return unsupported!(
+                        "Table functions and hints are not supported"
+                    );
+                }
+                if name.0.len() != 1 {
+                    return unsupported!(
+                        "Query must select from single named \
+                        table, with no database"
+                    );
+                }
+                let timeseries_name = name.0[0]
+                    .value
+                    .parse()
+                    .map(|n| indexmap::indexset! { n })
+                    .map_err(|_| OxdbError::InvalidTimeseriesName)?;
+                // Rewrite the quote style to be backticks, so that the
+                // resulting actual query translates into a valid identifier for
+                // ClickHouse, naming the CTE's well generate later.
+                name.0[0].quote_style = Some('"');
+                Ok(timeseries_name)
+            }
+            TableFactor::Derived { lateral: false, subquery, .. } => {
+                RestrictedQuery::process_query(subquery)
+            }
+            _ => {
+                return unsupported!(
+                "Query must select from concrete tables or subqueries on them"
+            )
+            }
+        }
+    }
+
+    // Process a parsed query, returning the named timeseries that it refers to.
+    //
+    // This is the entry-point for our query processing implementation. We take
+    // a parsed query from `sqlparser`, and extract the virtual tables
+    // (timeseries names) that we'll need to construct in order to actually run
+    // it against our database.
+    //
+    // Note that we return an _ordered set_ of the timeseries names. This is to
+    // produce the CTEs that correspond to each timeseries, but without
+    // duplicating the actual CTE.
+    fn process_query(
+        query: &mut Query,
+    ) -> Result<IndexSet<TimeseriesName>, OxdbError> {
+        // Some basic checks limiting the scope of the query.
+        if query.with.is_some()
+            || query.fetch.is_some()
+            || !query.locks.is_empty()
+        {
+            return unsupported!(
+                "CTEs, FETCH and LOCKS are not currently supported"
+            );
+        }
+        let SetExpr::Select(select) = &mut *query.body else {
+            return unsupported!("Only SELECT queries are currently supported");
+        };
+
+        // For each object we're selecting from (a table factor), process that
+        // directly, and process any JOINs it also contains.
+        let mut timeseries = IndexSet::with_capacity(select.from.len());
+        if select.from.len() > 1 {
+            return unsupported!(
+                "Query must select from a single named table, with no database"
+            );
+        }
+        if let Some(from) = select.from.iter_mut().next() {
+            timeseries.extend(Self::process_table_factor(&mut from.relation)?);
+            for join in from.joins.iter_mut() {
+                let JoinOperator::Inner(op) = &join.join_operator else {
+                    return unsupported!(
+                        "Only INNER JOINs are supported, using \
+                        explicit constraints"
+                    );
+                };
+                if matches!(op, JoinConstraint::Natural) {
+                    return unsupported!(
+                        "Only INNER JOINs are supported, using \
+                        explicit constraints"
+                    );
+                }
+                timeseries
+                    .extend(Self::process_table_factor(&mut join.relation)?);
+            }
+        }
+        Ok(timeseries)
+    }
+}
+
+static CLICKHOUSE_FUNCTION_ALLOW_LIST: OnceLock<BTreeSet<ClickHouseFunction>> =
+    OnceLock::new();
+
+#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
+pub struct ClickHouseFunction {
+    pub name: &'static str,
+    pub usage: &'static str,
+    pub description: &'static str,
+}
+
+impl std::fmt::Display for ClickHouseFunction {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "{}", self.name)
+    }
+}
+
+impl ClickHouseFunction {
+    fn new(usage: &'static str, description: &'static str) -> Self {
+        let name = usage.split_once('(').expect("need parentheses").0;
+        Self { name, usage, description }
+    }
+}
+
+/// Return the set of supported ClickHouse SQL functions, with a short help
+/// string.
+pub fn function_allow_list() -> &'static BTreeSet<ClickHouseFunction> {
+    CLICKHOUSE_FUNCTION_ALLOW_LIST.get_or_init(|| {
+        let mut out = BTreeSet::new();
+
+        // Core functions
+        out.insert(ClickHouseFunction::new("avg(expr)", "Arithmetic mean"));
+        out.insert(ClickHouseFunction::new("min(expr)", "Minimum value"));
+        out.insert(ClickHouseFunction::new("max(expr)", "Maximum value"));
+        out.insert(ClickHouseFunction::new("sum(expr)", "Sum values"));
+        out.insert(ClickHouseFunction::new(
+            "count(expr)",
+            "Count number of rows",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "now()",
+            "Return current timestamp",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "first_value(expr)",
+            "First value in a partition",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "last_value(expr)",
+            "Last value in a partition",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "any(expr)",
+            "First non-NULL value",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "topK(k)(expr)",
+            "Estimate K most frequent values",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "groupArray(expr)",
+            "Create an array from rows",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "argMin(arg, val)",
+            "Argument of minimum value",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "argMax(arg, val)",
+            "Argument of maximum value",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "quantileExact(quantile)(expr)",
+            "Exact quantile of inputs",
+        ));
+
+        // To support histograrms, we allow the `-ForEach` combinator functions.
+        //
+        // See
+        // https://clickhouse.com/docs/en/sql-reference/aggregate-functions/combinators#-foreach,
+        // but briefly, this allows computing the aggregate function across
+        // corresponding array elements.
+        out.insert(ClickHouseFunction::new(
+            "maxForEach(array expr)",
+            "Max of corresponding array elements",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "minForEach(array expr)",
+            "Min of corresponding array elements",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "sumForEach(array expr)",
+            "Sum of corresponding array elements",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "avgForEach(array expr)",
+            "Mean of corresponding array elements",
+        ));
+
+        // Type conversions
+        //
+        // Note that `cast` itself will be difficult to use, because ClickHouse is
+        // particular about the capitalization of type names, e.g., it must be
+        // `cast(x as String)` not `cast(x as STRING)`.
+        out.insert(ClickHouseFunction::new(
+            "toString(x)",
+            "Convert to a string",
+        ));
+        out.insert(ClickHouseFunction::new("toInt8(x)", "Convert to an i8"));
+        out.insert(ClickHouseFunction::new("toUInt8(x)", "Convert to a u8"));
+        out.insert(ClickHouseFunction::new("toInt16(x)", "Convert to an i16"));
+        out.insert(ClickHouseFunction::new("toUInt16(x)", "Convert to a u16"));
+        out.insert(ClickHouseFunction::new("toInt32(x)", "Convert to an i32"));
+        out.insert(ClickHouseFunction::new("toUInt32(x)", "Convert to a u32"));
+        out.insert(ClickHouseFunction::new("toInt64(x)", "Convert to an i64"));
+        out.insert(ClickHouseFunction::new("toUInt64(x)", "Convert to a u64"));
+        out.insert(ClickHouseFunction::new(
+            "toFloat32(x)",
+            "Convert to an f32",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toFloat64(x)",
+            "Convert to an f64",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toDate(x)",
+            "Convert to a 32-bit date",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toDateTime(x)",
+            "Convert to a 32-bit date and time",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toDateTime64(x)",
+            "Convert to a 64-bit date and time",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toIntervalYear(x)",
+            "Convert to an interval in years",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toIntervalQuarter(x)",
+            "Convert to an interval in quarters",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toIntervalMonth(x)",
+            "Convert to an interval in months",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toIntervalWeek(x)",
+            "Convert to an interval in weeks",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toIntervalDay(x)",
+            "Convert to an interval in days",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toIntervalHour(x)",
+            "Convert to an interval in hours",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toIntervalMinute(x)",
+            "Convert to an interval in minutes",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toIntervalSecond(x)",
+            "Convert to an interval in seconds",
+        ));
+
+        // Array functions
+        out.insert(ClickHouseFunction::new(
+            "arrayMax([func,] arr)",
+            "Maximum in source array",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "arrayMin([func,] arr)",
+            "Minimum in source array",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "arraySum([func,] arr)",
+            "Sum of elements in source array",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "arrayAvg([func,] arr)",
+            "Mean of elements in source array",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "arrayMap(func, arr, ...)",
+            "Apply function to elements in source array",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "arrayReduce(func, arr, ...)",
+            "Aggregate elements in source array with a function",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "arrayFilter(func, arr, ...)",
+            "Apply a lambda to source array",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "arrayDifference(arr)",
+            "Difference between adjacent elements in source array",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "indexOf(arr, x)",
+            "Index of `x` in source array, or 0",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "length(arr)",
+            "Length of source array",
+        ));
+
+        // Strings
+        out.insert(ClickHouseFunction::new(
+            "empty(x)",
+            "True if array or string is empty",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "lower(x)",
+            "Convert a string to lowercase",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "upper(x)",
+            "Convert a string to uppercase",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "reverse(x)",
+            "Reverse the bytes (not chars) in a string",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "reverseUTF8(x)",
+            "Reverse the characters in a string",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "concat(s1, s2, ...)",
+            "Concatenate two or more strings",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "concatWithSeparator(sep, s1, s2, ..)",
+            "Concatenate two or more strings with a separator",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "substring(s, offset, len)",
+            "Return a substring",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "endsWith(s, suffix)",
+            "True if `s` ends with `suffix`",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "startsWith(s, prefix)",
+            "True if `s` starts with `prefix`",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "splitByChar(sep, s[, limit])",
+            "Split on a separator, up to `limit` times",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "splitByString(sep, s[, limit])",
+            "Split by a separating string, up to `limit` times",
+        ));
+
+        // Time.
+        out.insert(ClickHouseFunction::new(
+            "tumble(datetime, interval[, tz])",
+            "Nonoverlapping time windows of a specified interval",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toYear(date)",
+            "Extract year from date",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toQuarter(date)",
+            "Extract quarter from date",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toMonth(date)",
+            "Extract month from date",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toDayOfYear(date)",
+            "Index of day in its year",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toDayOfMonth(date)",
+            "Index of day in its month",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toDayOfWeek(date)",
+            "Index of day in its week",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toHour(date)",
+            "Extract hour from date",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toMinute(date)",
+            "Extract minute from date",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toSecond(date)",
+            "Extract second from date",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toUnixTimestamp(date)",
+            "Convert to UNIX timestamp",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "toStartOfInterval(date, INTERVAL x UNIT[, tz])",
+            "Convert date to the start of the specified interval",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "date_diff('unit', start, end[, tz])",
+            "Difference between two dates in the provided unit",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "date_trunc('unit', date[, tz])",
+            "Truncate a datetime to the provided unit",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "date_add('unit', count, date)",
+            "Add `count` units to `date`",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "date_sub('unit', count, date)",
+            "Subtract `count` units from `date`",
+        ));
+
+        // Other
+        out.insert(ClickHouseFunction::new(
+            "generateUUIDv4()",
+            "Generate a random UUID v4",
+        ));
+        out.insert(ClickHouseFunction::new("rand()", "Uniform random u32"));
+        out.insert(ClickHouseFunction::new("rand64()", "Uniform random u64"));
+        out.insert(ClickHouseFunction::new(
+            "runningDifference(arr)",
+            "Difference between adjacent values",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "formatReadableSize(x)",
+            "Format a byte count for humans",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "formatReadableTimeDelta(x)",
+            "Format an interval for humans",
+        ));
+        out.insert(ClickHouseFunction::new(
+            "formatReadableQuantity(x)",
+            "Format a quantity for humans",
+        ));
+        out
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::Error;
+    use super::OxdbError;
+    use super::RestrictedQuery;
+    use super::SafeSql;
+
+    #[test]
+    fn test_function_allow_list() {
+        assert!(RestrictedQuery::new("SELECT bogus()").is_err());
+        assert!(matches!(
+            RestrictedQuery::new("SELECT bogus()").unwrap_err(),
+            OxdbError::Sql(Error::UnsupportedFunction { .. })
+        ));
+        assert!(RestrictedQuery::new("SELECT now()").is_ok());
+    }
+
+    #[test]
+    fn test_ctes_are_not_supported() {
+        assert!(matches!(
+            RestrictedQuery::new("WITH nono AS (SELECT 1) SELECT * FROM NONO")
+                .unwrap_err(),
+            OxdbError::Sql(Error::UnsupportedSql(_))
+        ));
+    }
+
+    #[test]
+    fn test_multiple_statements_are_not_supported() {
+        assert!(matches!(
+            RestrictedQuery::new("SELECT 1; SELECT 2;").unwrap_err(),
+            OxdbError::Sql(Error::UnsupportedSql(_))
+        ));
+    }
+
+    #[test]
+    fn test_query_must_be_select_statement() {
+        for query in [
+            "SHOW TABLES",
+            "DROP TABLE foo",
+            "CREATE TABLE foo (x Int4)",
+            "DESCRIBE foo",
+            "EXPLAIN SELECT 1",
+            "INSERT INTO foo VALUES (1)",
+        ] {
+            let err = RestrictedQuery::new(query).unwrap_err();
+            println!("{err:?}");
+            assert!(matches!(err, OxdbError::Sql(Error::UnsupportedSql(_))));
+        }
+    }
+
+    #[test]
+    fn test_cannot_name_database() {
+        let err = RestrictedQuery::new("SELECT * FROM dbname.a:a").unwrap_err();
+        assert!(matches!(err, OxdbError::Sql(Error::UnsupportedSql(_))));
+    }
+
+    #[test]
+    fn test_with_comma_join_fails() {
+        let err = RestrictedQuery::new("SELECT * FROM a:a, b:b").unwrap_err();
+        println!("{err:?}");
+        assert!(matches!(err, OxdbError::Sql(Error::UnsupportedSql(_))));
+    }
+
+    #[test]
+    fn test_join_must_be_inner() {
+        let allowed = ["inner", ""];
+        let denied =
+            ["natural", "cross", "left outer", "right outer", "full outer"];
+        for join in allowed.iter() {
+            RestrictedQuery::new(format!("SELECT * FROM a:a {join} JOIN b:b"))
+                .unwrap_or_else(|_| {
+                    panic!("Should be able to use join type '{join}'")
+                });
+        }
+        for join in denied.iter() {
+            let sql = format!("SELECT * FROM a:a {join} JOIN b:b");
+            println!("{sql}");
+            let err = RestrictedQuery::new(&sql).expect_err(
+                format!("Should not be able to use join type '{join}'")
+                    .as_str(),
+            );
+            println!("{err:?}");
+            assert!(matches!(err, OxdbError::Sql(Error::UnsupportedSql(_))));
+        }
+    }
+
+    #[test]
+    fn test_allow_limit_offset() {
+        let sql = "SELECT * FROM a:b LIMIT 10 OFFSET 10;";
+        println!("{sql}");
+        RestrictedQuery::new(&sql)
+            .expect("Should be able to use LIMIT / OFFSET queries");
+    }
+
+    #[test]
+    fn test_require_table_is_timeseries_name() {
+        assert!(RestrictedQuery::new("SELECT * FROM a:b").is_ok());
+        let bad = ["table", "db.table", "no:no:no"];
+        for each in bad.iter() {
+            let sql = format!("SELECT * FROM {each}");
+            RestrictedQuery::new(&sql)
+                .expect_err("Should have validated timeseries name");
+        }
+    }
+
+    #[test]
+    fn test_allow_subqueries() {
+        assert!(RestrictedQuery::new("SELECT * FROM (SELECT 1);").is_ok());
+    }
+
+    #[test]
+    fn test_query_with_multiple_timeseries_generates_one_cte() {
+        let query = "SELECT * FROM a:b JOIN a:b USING (timeseries_key);";
+        let res = RestrictedQuery::new(&query).unwrap();
+        assert_eq!(res.timeseries.len(), 1);
+    }
+
+    #[test]
+    fn test_safe_sql_does_not_modify_original_alias() {
+        let query = "SELECT * FROM a:b AS ASOF JOIN a:b";
+        let query_with_quotes = "SELECT * FROM \"a:b\" AS ASOF JOIN \"a:b\"";
+        let safe = SafeSql::new(query);
+        let rewritten = safe.safe_sql();
+        println!("{query}");
+        println!("{query_with_quotes}");
+        println!("{rewritten}");
+
+        // Check that we've written out the same query words, ignoring
+        // whitespace.
+        let words = query_with_quotes
+            .split_ascii_whitespace()
+            .rev()
+            .collect::<Vec<_>>();
+        let rewritten_words = rewritten
+            .split_ascii_whitespace()
+            .rev()
+            .take(words.len())
+            .collect::<Vec<_>>();
+        assert_eq!(words, rewritten_words);
+    }
+}
diff --git a/oximeter/db/test-output/sql/00/query.sql b/oximeter/db/test-output/sql/00/query.sql
new file mode 100644
index 00000000000..e0ac49d1ecf
--- /dev/null
+++ b/oximeter/db/test-output/sql/00/query.sql
@@ -0,0 +1 @@
+SELECT 1;
diff --git a/oximeter/db/test-output/sql/00/result.txt b/oximeter/db/test-output/sql/00/result.txt
new file mode 100644
index 00000000000..925e298e866
--- /dev/null
+++ b/oximeter/db/test-output/sql/00/result.txt
@@ -0,0 +1,10 @@
+{
+  "column_names": [
+    "1"
+  ],
+  "rows": [
+    [
+      1
+    ]
+  ]
+}
\ No newline at end of file
diff --git a/oximeter/db/test-output/sql/01/query.sql b/oximeter/db/test-output/sql/01/query.sql
new file mode 100644
index 00000000000..f3e5549e7f8
--- /dev/null
+++ b/oximeter/db/test-output/sql/01/query.sql
@@ -0,0 +1 @@
+SELECT 1 + 1 AS total;
diff --git a/oximeter/db/test-output/sql/01/result.txt b/oximeter/db/test-output/sql/01/result.txt
new file mode 100644
index 00000000000..ee17f9993e6
--- /dev/null
+++ b/oximeter/db/test-output/sql/01/result.txt
@@ -0,0 +1,10 @@
+{
+  "column_names": [
+    "total"
+  ],
+  "rows": [
+    [
+      2
+    ]
+  ]
+}
\ No newline at end of file
diff --git a/oximeter/db/test-output/sql/02/query.sql b/oximeter/db/test-output/sql/02/query.sql
new file mode 100644
index 00000000000..cd16a883aa3
--- /dev/null
+++ b/oximeter/db/test-output/sql/02/query.sql
@@ -0,0 +1 @@
+SELECT count() FROM service:request_latency WHERE route = '/a';
diff --git a/oximeter/db/test-output/sql/02/result.txt b/oximeter/db/test-output/sql/02/result.txt
new file mode 100644
index 00000000000..7bae246ae86
--- /dev/null
+++ b/oximeter/db/test-output/sql/02/result.txt
@@ -0,0 +1,10 @@
+{
+  "column_names": [
+    "count()"
+  ],
+  "rows": [
+    [
+      12
+    ]
+  ]
+}
\ No newline at end of file
diff --git a/oximeter/db/test-output/sql/03/query.sql b/oximeter/db/test-output/sql/03/query.sql
new file mode 100644
index 00000000000..9d043eda5b4
--- /dev/null
+++ b/oximeter/db/test-output/sql/03/query.sql
@@ -0,0 +1,4 @@
+SELECT
+    count() AS total
+FROM service:request_latency
+GROUP BY name, id, route, method, status_code;
diff --git a/oximeter/db/test-output/sql/03/result.txt b/oximeter/db/test-output/sql/03/result.txt
new file mode 100644
index 00000000000..246b8a224ee
--- /dev/null
+++ b/oximeter/db/test-output/sql/03/result.txt
@@ -0,0 +1,43 @@
+{
+  "column_names": [
+    "total"
+  ],
+  "rows": [
+    [
+      2
+    ],
+    [
+      2
+    ],
+    [
+      2
+    ],
+    [
+      2
+    ],
+    [
+      2
+    ],
+    [
+      2
+    ],
+    [
+      2
+    ],
+    [
+      2
+    ],
+    [
+      2
+    ],
+    [
+      2
+    ],
+    [
+      2
+    ],
+    [
+      2
+    ]
+  ]
+}
\ No newline at end of file
diff --git a/oximeter/db/test-output/sql/04/query.sql b/oximeter/db/test-output/sql/04/query.sql
new file mode 100644
index 00000000000..a276475a0dc
--- /dev/null
+++ b/oximeter/db/test-output/sql/04/query.sql
@@ -0,0 +1,5 @@
+SELECT
+    timeseries_key,
+    count() AS total
+FROM service:request_latency
+GROUP BY timeseries_key;
diff --git a/oximeter/db/test-output/sql/04/result.txt b/oximeter/db/test-output/sql/04/result.txt
new file mode 100644
index 00000000000..4eca1fd93df
--- /dev/null
+++ b/oximeter/db/test-output/sql/04/result.txt
@@ -0,0 +1,56 @@
+{
+  "column_names": [
+    "timeseries_key",
+    "total"
+  ],
+  "rows": [
+    [
+      1249464505628069370,
+      2
+    ],
+    [
+      1201872630192423018,
+      2
+    ],
+    [
+      1490072383288995413,
+      2
+    ],
+    [
+      4845785484328932020,
+      2
+    ],
+    [
+      16162802647654680800,
+      2
+    ],
+    [
+      9308844330114997943,
+      2
+    ],
+    [
+      5233273748839477731,
+      2
+    ],
+    [
+      12759963114254845848,
+      2
+    ],
+    [
+      8677807063017961056,
+      2
+    ],
+    [
+      17069599562714970297,
+      2
+    ],
+    [
+      1477351355909737762,
+      2
+    ],
+    [
+      16473879070749258520,
+      2
+    ]
+  ]
+}
\ No newline at end of file
diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs
index 3d74bec72c0..eff5c399e3e 100644
--- a/oximeter/oximeter/src/types.rs
+++ b/oximeter/oximeter/src/types.rs
@@ -330,6 +330,11 @@ impl DatumType {
                 | DatumType::HistogramF64
         )
     }
+
+    /// Return `true` if this datum type is a histogram, and `false` otherwise.
+    pub const fn is_histogram(&self) -> bool {
+        matches!(self, DatumType::HistogramF64 | DatumType::HistogramI64)
+    }
 }
 
 impl std::fmt::Display for DatumType {
diff --git a/wicket-dbg/Cargo.toml b/wicket-dbg/Cargo.toml
index f9047297af1..f42ed335c8a 100644
--- a/wicket-dbg/Cargo.toml
+++ b/wicket-dbg/Cargo.toml
@@ -20,7 +20,7 @@ tokio = { workspace = true, features = ["full"] }
 wicket.workspace = true
 
 # used only by wicket-dbg binary
-reedline = "0.26.0"
+reedline.workspace = true
 omicron-workspace-hack.workspace = true
 
 [[bin]]
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 1d14b26a69c..00be70aef50 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -32,7 +32,6 @@ console = { version = "0.15.7" }
 const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] }
 crossbeam-epoch = { version = "0.9.15" }
 crossbeam-utils = { version = "0.8.16" }
-crossterm = { version = "0.27.0", features = ["event-stream", "serde"] }
 crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] }
 der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] }
 diesel = { version = "2.1.4", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] }
@@ -66,6 +65,7 @@ libc = { version = "0.2.150", features = ["extra_traits"] }
 log = { version = "0.4.20", default-features = false, features = ["std"] }
 managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] }
 memchr = { version = "2.6.3" }
+nom = { version = "7.1.3" }
 num-bigint = { version = "0.4.4", features = ["rand"] }
 num-integer = { version = "0.1.45", features = ["i128"] }
 num-iter = { version = "0.1.43", default-features = false, features = ["i128"] }
@@ -133,7 +133,6 @@ console = { version = "0.15.7" }
 const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] }
 crossbeam-epoch = { version = "0.9.15" }
 crossbeam-utils = { version = "0.8.16" }
-crossterm = { version = "0.27.0", features = ["event-stream", "serde"] }
 crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] }
 der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] }
 diesel = { version = "2.1.4", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] }
@@ -167,6 +166,7 @@ libc = { version = "0.2.150", features = ["extra_traits"] }
 log = { version = "0.4.20", default-features = false, features = ["std"] }
 managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] }
 memchr = { version = "2.6.3" }
+nom = { version = "7.1.3" }
 num-bigint = { version = "0.4.4", features = ["rand"] }
 num-integer = { version = "0.1.45", features = ["i128"] }
 num-iter = { version = "0.1.43", default-features = false, features = ["i128"] }