From 8238f985dd70f09fd45a4738dcbbcc07e790d2f0 Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Mon, 24 Jun 2024 17:12:58 -0400 Subject: [PATCH] Additional tracking on histograms for means, std dev, p* quantile estimates (#5897) Closes https://github.com/oxidecomputer/omicron/issues/4913. This is part 1 of tracking more information within a histogram to compute min, max, means, std_dev, and estimates for p50, p90, and p99 and how to store/retrieve the associated info from Clickhouse. --- Cargo.lock | 19 + Cargo.toml | 2 + docs/how-to-run.adoc | 6 +- openapi/nexus.json | 712 ++++++++++++++++++- oximeter/db/schema/replicated/5/up01.sql | 1 + oximeter/db/schema/replicated/5/up02.sql | 1 + oximeter/db/schema/replicated/5/up03.sql | 25 + oximeter/db/schema/replicated/5/up04.sql | 3 + oximeter/db/schema/replicated/5/up05.sql | 1 + oximeter/db/schema/replicated/5/up06.sql | 1 + oximeter/db/schema/replicated/5/up07.sql | 25 + oximeter/db/schema/replicated/5/up08.sql | 3 + oximeter/db/schema/replicated/5/up09.sql | 1 + oximeter/db/schema/replicated/5/up10.sql | 1 + oximeter/db/schema/replicated/5/up11.sql | 25 + oximeter/db/schema/replicated/5/up12.sql | 3 + oximeter/db/schema/replicated/5/up13.sql | 1 + oximeter/db/schema/replicated/5/up14.sql | 1 + oximeter/db/schema/replicated/5/up15.sql | 25 + oximeter/db/schema/replicated/5/up16.sql | 3 + oximeter/db/schema/replicated/5/up17.sql | 1 + oximeter/db/schema/replicated/5/up18.sql | 1 + oximeter/db/schema/replicated/5/up19.sql | 25 + oximeter/db/schema/replicated/5/up20.sql | 3 + oximeter/db/schema/replicated/5/up21.sql | 1 + oximeter/db/schema/replicated/5/up22.sql | 1 + oximeter/db/schema/replicated/5/up23.sql | 25 + oximeter/db/schema/replicated/5/up24.sql | 3 + oximeter/db/schema/replicated/5/up25.sql | 1 + oximeter/db/schema/replicated/5/up26.sql | 1 + oximeter/db/schema/replicated/5/up27.sql | 25 + oximeter/db/schema/replicated/5/up28.sql | 3 + oximeter/db/schema/replicated/5/up29.sql | 1 + oximeter/db/schema/replicated/5/up30.sql | 1 + oximeter/db/schema/replicated/5/up31.sql | 25 + oximeter/db/schema/replicated/5/up32.sql | 3 + oximeter/db/schema/replicated/5/up33.sql | 1 + oximeter/db/schema/replicated/5/up34.sql | 1 + oximeter/db/schema/replicated/5/up35.sql | 25 + oximeter/db/schema/replicated/5/up36.sql | 3 + oximeter/db/schema/replicated/5/up37.sql | 1 + oximeter/db/schema/replicated/5/up38.sql | 1 + oximeter/db/schema/replicated/5/up39.sql | 25 + oximeter/db/schema/replicated/5/up40.sql | 3 + oximeter/db/schema/replicated/db-init.sql | 240 ++++--- oximeter/db/schema/single-node/5/up01.sql | 1 + oximeter/db/schema/single-node/5/up02.sql | 25 + oximeter/db/schema/single-node/5/up03.sql | 1 + oximeter/db/schema/single-node/5/up04.sql | 25 + oximeter/db/schema/single-node/5/up05.sql | 1 + oximeter/db/schema/single-node/5/up06.sql | 25 + oximeter/db/schema/single-node/5/up07.sql | 1 + oximeter/db/schema/single-node/5/up08.sql | 25 + oximeter/db/schema/single-node/5/up09.sql | 1 + oximeter/db/schema/single-node/5/up10.sql | 25 + oximeter/db/schema/single-node/5/up11.sql | 1 + oximeter/db/schema/single-node/5/up12.sql | 25 + oximeter/db/schema/single-node/5/up13.sql | 1 + oximeter/db/schema/single-node/5/up14.sql | 25 + oximeter/db/schema/single-node/5/up15.sql | 1 + oximeter/db/schema/single-node/5/up16.sql | 25 + oximeter/db/schema/single-node/5/up17.sql | 1 + oximeter/db/schema/single-node/5/up18.sql | 25 + oximeter/db/schema/single-node/5/up19.sql | 1 + oximeter/db/schema/single-node/5/up20.sql | 25 + oximeter/db/schema/single-node/db-init.sql | 150 +++- oximeter/db/src/bin/oxdb/oxql.rs | 101 ++- oximeter/db/src/bin/oxdb/sql.rs | 39 +- oximeter/db/src/client/mod.rs | 2 +- oximeter/db/src/client/oxql.rs | 10 +- oximeter/db/src/model.rs | 272 +++++-- oximeter/db/src/oxql/ast/table_ops/filter.rs | 10 +- oximeter/db/src/oxql/point.rs | 280 +++++++- oximeter/db/src/oxql/query/mod.rs | 32 + oximeter/db/src/sql/mod.rs | 23 +- oximeter/instruments/src/http.rs | 6 +- oximeter/oximeter/Cargo.toml | 3 + oximeter/oximeter/src/histogram.rs | 639 +++++++++++++---- oximeter/oximeter/src/lib.rs | 3 + oximeter/oximeter/src/quantile.rs | 592 +++++++++++++++ oximeter/oximeter/src/test_util.rs | 2 +- 81 files changed, 3302 insertions(+), 401 deletions(-) create mode 100644 oximeter/db/schema/replicated/5/up01.sql create mode 100644 oximeter/db/schema/replicated/5/up02.sql create mode 100644 oximeter/db/schema/replicated/5/up03.sql create mode 100644 oximeter/db/schema/replicated/5/up04.sql create mode 100644 oximeter/db/schema/replicated/5/up05.sql create mode 100644 oximeter/db/schema/replicated/5/up06.sql create mode 100644 oximeter/db/schema/replicated/5/up07.sql create mode 100644 oximeter/db/schema/replicated/5/up08.sql create mode 100644 oximeter/db/schema/replicated/5/up09.sql create mode 100644 oximeter/db/schema/replicated/5/up10.sql create mode 100644 oximeter/db/schema/replicated/5/up11.sql create mode 100644 oximeter/db/schema/replicated/5/up12.sql create mode 100644 oximeter/db/schema/replicated/5/up13.sql create mode 100644 oximeter/db/schema/replicated/5/up14.sql create mode 100644 oximeter/db/schema/replicated/5/up15.sql create mode 100644 oximeter/db/schema/replicated/5/up16.sql create mode 100644 oximeter/db/schema/replicated/5/up17.sql create mode 100644 oximeter/db/schema/replicated/5/up18.sql create mode 100644 oximeter/db/schema/replicated/5/up19.sql create mode 100644 oximeter/db/schema/replicated/5/up20.sql create mode 100644 oximeter/db/schema/replicated/5/up21.sql create mode 100644 oximeter/db/schema/replicated/5/up22.sql create mode 100644 oximeter/db/schema/replicated/5/up23.sql create mode 100644 oximeter/db/schema/replicated/5/up24.sql create mode 100644 oximeter/db/schema/replicated/5/up25.sql create mode 100644 oximeter/db/schema/replicated/5/up26.sql create mode 100644 oximeter/db/schema/replicated/5/up27.sql create mode 100644 oximeter/db/schema/replicated/5/up28.sql create mode 100644 oximeter/db/schema/replicated/5/up29.sql create mode 100644 oximeter/db/schema/replicated/5/up30.sql create mode 100644 oximeter/db/schema/replicated/5/up31.sql create mode 100644 oximeter/db/schema/replicated/5/up32.sql create mode 100644 oximeter/db/schema/replicated/5/up33.sql create mode 100644 oximeter/db/schema/replicated/5/up34.sql create mode 100644 oximeter/db/schema/replicated/5/up35.sql create mode 100644 oximeter/db/schema/replicated/5/up36.sql create mode 100644 oximeter/db/schema/replicated/5/up37.sql create mode 100644 oximeter/db/schema/replicated/5/up38.sql create mode 100644 oximeter/db/schema/replicated/5/up39.sql create mode 100644 oximeter/db/schema/replicated/5/up40.sql create mode 100644 oximeter/db/schema/single-node/5/up01.sql create mode 100644 oximeter/db/schema/single-node/5/up02.sql create mode 100644 oximeter/db/schema/single-node/5/up03.sql create mode 100644 oximeter/db/schema/single-node/5/up04.sql create mode 100644 oximeter/db/schema/single-node/5/up05.sql create mode 100644 oximeter/db/schema/single-node/5/up06.sql create mode 100644 oximeter/db/schema/single-node/5/up07.sql create mode 100644 oximeter/db/schema/single-node/5/up08.sql create mode 100644 oximeter/db/schema/single-node/5/up09.sql create mode 100644 oximeter/db/schema/single-node/5/up10.sql create mode 100644 oximeter/db/schema/single-node/5/up11.sql create mode 100644 oximeter/db/schema/single-node/5/up12.sql create mode 100644 oximeter/db/schema/single-node/5/up13.sql create mode 100644 oximeter/db/schema/single-node/5/up14.sql create mode 100644 oximeter/db/schema/single-node/5/up15.sql create mode 100644 oximeter/db/schema/single-node/5/up16.sql create mode 100644 oximeter/db/schema/single-node/5/up17.sql create mode 100644 oximeter/db/schema/single-node/5/up18.sql create mode 100644 oximeter/db/schema/single-node/5/up19.sql create mode 100644 oximeter/db/schema/single-node/5/up20.sql create mode 100644 oximeter/oximeter/src/quantile.rs diff --git a/Cargo.lock b/Cargo.lock index ac3149c6ff..093061216a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2435,6 +2435,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "float-ord" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce81f49ae8a0482e4c55ea62ebbd7e5a686af544c00b9d090bba3ff9be97b3d" + [[package]] name = "flume" version = "0.11.0" @@ -6222,10 +6228,13 @@ dependencies = [ "approx", "bytes", "chrono", + "float-ord", "num", "omicron-common", "omicron-workspace-hack", "oximeter-macro-impl", + "rand 0.8.5", + "rand_distr", "regex", "rstest", "schemars", @@ -7457,6 +7466,16 @@ dependencies = [ "getrandom 0.2.14", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + [[package]] name = "rand_hc" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index fbb301511b..1291f18726 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -280,6 +280,7 @@ expectorate = "1.1.0" fatfs = "0.3.6" filetime = "0.2.23" flate2 = "1.0.30" +float-ord = "0.3.2" flume = "0.11.0" foreign-types = "0.3.2" fs-err = "2.11.0" @@ -408,6 +409,7 @@ proptest = "1.4.0" quote = "1.0" rand = "0.8.5" rand_core = "0.6.4" +rand_distr = "0.4.3" rand_seeder = "0.2.3" ratatui = "0.26.2" rayon = "1.10" diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 097467ef04..9bd99c23d3 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -173,7 +173,7 @@ Then install prerequisite software with the following script: [source,text] ---- -$ pfexec ./tools/install_prerequisites.sh +$ ./tools/install_prerequisites.sh ---- You need to do this step once per workspace and potentially again each time you fetch new changes. If the script reports any PATH problems, you'll need to correct those before proceeding. @@ -410,9 +410,9 @@ $ pfexec ./target/release/omicron-package install [WARNING] ==== -**Do not use `pfexec cargo run` directly**; it will cause files in `~/.cargo` and `target/` to be owned by root, which will cause problems down the road. +**Do not use `pfexec cargo run` directly**; it will cause files in `~/.cargo`, `out/`, and `target/` to be owned by root, which will cause problems down the road. -If you've done this already, and you wish to recover, run from the root of this repository `pfexec chown -R $USER:$(id -ng $USER) target ${CARGO_HOME:-~/.cargo}`. +If you've done this already, and you wish to recover, run from the root of this repository `pfexec chown -R $USER:$(id -ng $USER) out target ${CARGO_HOME:-~/.cargo}`. ==== This command installs an SMF service called `svc:/oxide/sled-agent:default`, which itself starts the other required services. This will take a few minutes. You can watch the progress by looking at the Sled Agent log: diff --git a/openapi/nexus.json b/openapi/nexus.json index 42ccb567cf..ab7192d1e0 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -12309,7 +12309,7 @@ ] }, "Distributiondouble": { - "description": "A distribution is a sequence of bins and counts in those bins.", + "description": "A distribution is a sequence of bins and counts in those bins, and some statistical information tracked to compute the mean, standard deviation, and quantile estimates.\n\nMin, max, and the p-* quantiles are treated as optional due to the possibility of distribution operations, like subtraction.", "type": "object", "properties": { "bins": { @@ -12326,15 +12326,59 @@ "format": "uint64", "minimum": 0 } + }, + "max": { + "nullable": true, + "type": "number", + "format": "double" + }, + "min": { + "nullable": true, + "type": "number", + "format": "double" + }, + "p50": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "type": "number", + "format": "double" + }, + "sum_of_samples": { + "type": "number", + "format": "double" } }, "required": [ "bins", - "counts" + "counts", + "squared_mean", + "sum_of_samples" ] }, "Distributionint64": { - "description": "A distribution is a sequence of bins and counts in those bins.", + "description": "A distribution is a sequence of bins and counts in those bins, and some statistical information tracked to compute the mean, standard deviation, and quantile estimates.\n\nMin, max, and the p-* quantiles are treated as optional due to the possibility of distribution operations, like subtraction.", "type": "object", "properties": { "bins": { @@ -12351,11 +12395,55 @@ "format": "uint64", "minimum": 0 } + }, + "max": { + "nullable": true, + "type": "integer", + "format": "int64" + }, + "min": { + "nullable": true, + "type": "integer", + "format": "int64" + }, + "p50": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "type": "number", + "format": "double" + }, + "sum_of_samples": { + "type": "integer", + "format": "int64" } }, "required": [ "bins", - "counts" + "counts", + "squared_mean", + "sum_of_samples" ] }, "EphemeralIpCreate": { @@ -13112,25 +13200,79 @@ "type": "object", "properties": { "bins": { + "description": "The bins of the histogram.", "type": "array", "items": { "$ref": "#/components/schemas/Bindouble" } }, + "max": { + "description": "The maximum value of all samples in the histogram.", + "type": "number", + "format": "double" + }, + "min": { + "description": "The minimum value of all samples in the histogram.", + "type": "number", + "format": "double" + }, "n_samples": { + "description": "The total number of samples in the histogram.", "type": "integer", "format": "uint64", "minimum": 0 }, + "p50": { + "description": "p50 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "description": "p95 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "description": "p99 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "description": "M2 for Welford's algorithm for variance calculation.\n\nRead about [Welford's algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) for more information on the algorithm.", + "type": "number", + "format": "double" + }, "start_time": { + "description": "The start time of the histogram.", "type": "string", "format": "date-time" + }, + "sum_of_samples": { + "description": "The sum of all samples in the histogram.", + "type": "number", + "format": "double" } }, "required": [ "bins", + "max", + "min", "n_samples", - "start_time" + "p50", + "p90", + "p99", + "squared_mean", + "start_time", + "sum_of_samples" ] }, "Histogramfloat": { @@ -13138,25 +13280,79 @@ "type": "object", "properties": { "bins": { + "description": "The bins of the histogram.", "type": "array", "items": { "$ref": "#/components/schemas/Binfloat" } }, + "max": { + "description": "The maximum value of all samples in the histogram.", + "type": "number", + "format": "float" + }, + "min": { + "description": "The minimum value of all samples in the histogram.", + "type": "number", + "format": "float" + }, "n_samples": { + "description": "The total number of samples in the histogram.", "type": "integer", "format": "uint64", "minimum": 0 }, + "p50": { + "description": "p50 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "description": "p95 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "description": "p99 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "description": "M2 for Welford's algorithm for variance calculation.\n\nRead about [Welford's algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) for more information on the algorithm.", + "type": "number", + "format": "double" + }, "start_time": { + "description": "The start time of the histogram.", "type": "string", "format": "date-time" + }, + "sum_of_samples": { + "description": "The sum of all samples in the histogram.", + "type": "number", + "format": "double" } }, "required": [ "bins", + "max", + "min", "n_samples", - "start_time" + "p50", + "p90", + "p99", + "squared_mean", + "start_time", + "sum_of_samples" ] }, "Histogramint16": { @@ -13164,25 +13360,79 @@ "type": "object", "properties": { "bins": { + "description": "The bins of the histogram.", "type": "array", "items": { "$ref": "#/components/schemas/Binint16" } }, + "max": { + "description": "The maximum value of all samples in the histogram.", + "type": "integer", + "format": "int16" + }, + "min": { + "description": "The minimum value of all samples in the histogram.", + "type": "integer", + "format": "int16" + }, "n_samples": { + "description": "The total number of samples in the histogram.", "type": "integer", "format": "uint64", "minimum": 0 }, + "p50": { + "description": "p50 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "description": "p95 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "description": "p99 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "description": "M2 for Welford's algorithm for variance calculation.\n\nRead about [Welford's algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) for more information on the algorithm.", + "type": "number", + "format": "double" + }, "start_time": { + "description": "The start time of the histogram.", "type": "string", "format": "date-time" + }, + "sum_of_samples": { + "description": "The sum of all samples in the histogram.", + "type": "integer", + "format": "int64" } }, "required": [ "bins", + "max", + "min", "n_samples", - "start_time" + "p50", + "p90", + "p99", + "squared_mean", + "start_time", + "sum_of_samples" ] }, "Histogramint32": { @@ -13190,25 +13440,79 @@ "type": "object", "properties": { "bins": { + "description": "The bins of the histogram.", "type": "array", "items": { "$ref": "#/components/schemas/Binint32" } }, + "max": { + "description": "The maximum value of all samples in the histogram.", + "type": "integer", + "format": "int32" + }, + "min": { + "description": "The minimum value of all samples in the histogram.", + "type": "integer", + "format": "int32" + }, "n_samples": { + "description": "The total number of samples in the histogram.", "type": "integer", "format": "uint64", "minimum": 0 }, + "p50": { + "description": "p50 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "description": "p95 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "description": "p99 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "description": "M2 for Welford's algorithm for variance calculation.\n\nRead about [Welford's algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) for more information on the algorithm.", + "type": "number", + "format": "double" + }, "start_time": { + "description": "The start time of the histogram.", "type": "string", "format": "date-time" + }, + "sum_of_samples": { + "description": "The sum of all samples in the histogram.", + "type": "integer", + "format": "int64" } }, "required": [ "bins", + "max", + "min", "n_samples", - "start_time" + "p50", + "p90", + "p99", + "squared_mean", + "start_time", + "sum_of_samples" ] }, "Histogramint64": { @@ -13216,25 +13520,79 @@ "type": "object", "properties": { "bins": { + "description": "The bins of the histogram.", "type": "array", "items": { "$ref": "#/components/schemas/Binint64" } }, + "max": { + "description": "The maximum value of all samples in the histogram.", + "type": "integer", + "format": "int64" + }, + "min": { + "description": "The minimum value of all samples in the histogram.", + "type": "integer", + "format": "int64" + }, "n_samples": { + "description": "The total number of samples in the histogram.", "type": "integer", "format": "uint64", "minimum": 0 }, + "p50": { + "description": "p50 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "description": "p95 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "description": "p99 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "description": "M2 for Welford's algorithm for variance calculation.\n\nRead about [Welford's algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) for more information on the algorithm.", + "type": "number", + "format": "double" + }, "start_time": { + "description": "The start time of the histogram.", "type": "string", "format": "date-time" + }, + "sum_of_samples": { + "description": "The sum of all samples in the histogram.", + "type": "integer", + "format": "int64" } }, "required": [ "bins", + "max", + "min", "n_samples", - "start_time" + "p50", + "p90", + "p99", + "squared_mean", + "start_time", + "sum_of_samples" ] }, "Histogramint8": { @@ -13242,25 +13600,79 @@ "type": "object", "properties": { "bins": { + "description": "The bins of the histogram.", "type": "array", "items": { "$ref": "#/components/schemas/Binint8" } }, + "max": { + "description": "The maximum value of all samples in the histogram.", + "type": "integer", + "format": "int8" + }, + "min": { + "description": "The minimum value of all samples in the histogram.", + "type": "integer", + "format": "int8" + }, "n_samples": { + "description": "The total number of samples in the histogram.", "type": "integer", "format": "uint64", "minimum": 0 }, + "p50": { + "description": "p50 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "description": "p95 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "description": "p99 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "description": "M2 for Welford's algorithm for variance calculation.\n\nRead about [Welford's algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) for more information on the algorithm.", + "type": "number", + "format": "double" + }, "start_time": { + "description": "The start time of the histogram.", "type": "string", "format": "date-time" + }, + "sum_of_samples": { + "description": "The sum of all samples in the histogram.", + "type": "integer", + "format": "int64" } }, "required": [ "bins", + "max", + "min", "n_samples", - "start_time" + "p50", + "p90", + "p99", + "squared_mean", + "start_time", + "sum_of_samples" ] }, "Histogramuint16": { @@ -13268,25 +13680,81 @@ "type": "object", "properties": { "bins": { + "description": "The bins of the histogram.", "type": "array", "items": { "$ref": "#/components/schemas/Binuint16" } }, + "max": { + "description": "The maximum value of all samples in the histogram.", + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "min": { + "description": "The minimum value of all samples in the histogram.", + "type": "integer", + "format": "uint16", + "minimum": 0 + }, "n_samples": { + "description": "The total number of samples in the histogram.", "type": "integer", "format": "uint64", "minimum": 0 }, + "p50": { + "description": "p50 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "description": "p95 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "description": "p99 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "description": "M2 for Welford's algorithm for variance calculation.\n\nRead about [Welford's algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) for more information on the algorithm.", + "type": "number", + "format": "double" + }, "start_time": { + "description": "The start time of the histogram.", "type": "string", "format": "date-time" + }, + "sum_of_samples": { + "description": "The sum of all samples in the histogram.", + "type": "integer", + "format": "int64" } }, "required": [ "bins", + "max", + "min", "n_samples", - "start_time" + "p50", + "p90", + "p99", + "squared_mean", + "start_time", + "sum_of_samples" ] }, "Histogramuint32": { @@ -13294,25 +13762,81 @@ "type": "object", "properties": { "bins": { + "description": "The bins of the histogram.", "type": "array", "items": { "$ref": "#/components/schemas/Binuint32" } }, + "max": { + "description": "The maximum value of all samples in the histogram.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "min": { + "description": "The minimum value of all samples in the histogram.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "n_samples": { + "description": "The total number of samples in the histogram.", "type": "integer", "format": "uint64", "minimum": 0 }, + "p50": { + "description": "p50 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "description": "p95 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "description": "p99 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "description": "M2 for Welford's algorithm for variance calculation.\n\nRead about [Welford's algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) for more information on the algorithm.", + "type": "number", + "format": "double" + }, "start_time": { + "description": "The start time of the histogram.", "type": "string", "format": "date-time" + }, + "sum_of_samples": { + "description": "The sum of all samples in the histogram.", + "type": "integer", + "format": "int64" } }, "required": [ "bins", + "max", + "min", "n_samples", - "start_time" + "p50", + "p90", + "p99", + "squared_mean", + "start_time", + "sum_of_samples" ] }, "Histogramuint64": { @@ -13320,25 +13844,81 @@ "type": "object", "properties": { "bins": { + "description": "The bins of the histogram.", "type": "array", "items": { "$ref": "#/components/schemas/Binuint64" } }, + "max": { + "description": "The maximum value of all samples in the histogram.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "min": { + "description": "The minimum value of all samples in the histogram.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, "n_samples": { + "description": "The total number of samples in the histogram.", "type": "integer", "format": "uint64", "minimum": 0 }, + "p50": { + "description": "p50 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "description": "p95 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "description": "p99 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "description": "M2 for Welford's algorithm for variance calculation.\n\nRead about [Welford's algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) for more information on the algorithm.", + "type": "number", + "format": "double" + }, "start_time": { + "description": "The start time of the histogram.", "type": "string", "format": "date-time" + }, + "sum_of_samples": { + "description": "The sum of all samples in the histogram.", + "type": "integer", + "format": "int64" } }, "required": [ "bins", + "max", + "min", "n_samples", - "start_time" + "p50", + "p90", + "p99", + "squared_mean", + "start_time", + "sum_of_samples" ] }, "Histogramuint8": { @@ -13346,25 +13926,81 @@ "type": "object", "properties": { "bins": { + "description": "The bins of the histogram.", "type": "array", "items": { "$ref": "#/components/schemas/Binuint8" } }, + "max": { + "description": "The maximum value of all samples in the histogram.", + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "min": { + "description": "The minimum value of all samples in the histogram.", + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "n_samples": { + "description": "The total number of samples in the histogram.", "type": "integer", "format": "uint64", "minimum": 0 }, + "p50": { + "description": "p50 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p90": { + "description": "p95 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "p99": { + "description": "p99 Quantile", + "allOf": [ + { + "$ref": "#/components/schemas/Quantile" + } + ] + }, + "squared_mean": { + "description": "M2 for Welford's algorithm for variance calculation.\n\nRead about [Welford's algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) for more information on the algorithm.", + "type": "number", + "format": "double" + }, "start_time": { + "description": "The start time of the histogram.", "type": "string", "format": "date-time" + }, + "sum_of_samples": { + "description": "The sum of all samples in the histogram.", + "type": "integer", + "format": "int64" } }, "required": [ "bins", + "max", + "min", "n_samples", - "start_time" + "p50", + "p90", + "p99", + "squared_mean", + "start_time", + "sum_of_samples" ] }, "Hostname": { @@ -15756,6 +16392,54 @@ } } }, + "Quantile": { + "description": "Structure for estimating the p-quantile of a population.\n\nThis is based on the P² algorithm for estimating quantiles using constant space.\n\nThe algorithm consists of maintaining five markers: the minimum, the p/2-, p-, and (1 + p)/2 quantiles, and the maximum.", + "type": "object", + "properties": { + "desired_marker_positions": { + "description": "The desired marker positions.", + "type": "array", + "items": { + "type": "number", + "format": "double" + }, + "minItems": 5, + "maxItems": 5 + }, + "marker_heights": { + "description": "The heights of the markers.", + "type": "array", + "items": { + "type": "number", + "format": "double" + }, + "minItems": 5, + "maxItems": 5 + }, + "marker_positions": { + "description": "The positions of the markers.\n\nWe track sample size in the 5th position, as useful observations won't start until we've filled the heights at the 6th sample anyway This does deviate from the paper, but it's a more useful representation that works according to the paper's algorithm.", + "type": "array", + "items": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "minItems": 5, + "maxItems": 5 + }, + "p": { + "description": "The p value for the quantile.", + "type": "number", + "format": "double" + } + }, + "required": [ + "desired_marker_positions", + "marker_heights", + "marker_positions", + "p" + ] + }, "Rack": { "description": "View of an Rack", "type": "object", diff --git a/oximeter/db/schema/replicated/5/up01.sql b/oximeter/db/schema/replicated/5/up01.sql new file mode 100644 index 0000000000..5fd811bcbb --- /dev/null +++ b/oximeter/db/schema/replicated/5/up01.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogrami8_local ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up02.sql b/oximeter/db/schema/replicated/5/up02.sql new file mode 100644 index 0000000000..78f6bf30fc --- /dev/null +++ b/oximeter/db/schema/replicated/5/up02.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogrami8 ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up03.sql b/oximeter/db/schema/replicated/5/up03.sql new file mode 100644 index 0000000000..79208d2c45 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up03.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int8), + counts Array(UInt64), + min Int8, + max Int8, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami8_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/5/up04.sql b/oximeter/db/schema/replicated/5/up04.sql new file mode 100644 index 0000000000..1c8a62d2a5 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up04.sql @@ -0,0 +1,3 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ON CLUSTER oximeter_cluster +AS oximeter.measurements_histogrami8_local +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami8_local', xxHash64(splitByChar(':', timeseries_name)[1])); diff --git a/oximeter/db/schema/replicated/5/up05.sql b/oximeter/db/schema/replicated/5/up05.sql new file mode 100644 index 0000000000..5736e4cfce --- /dev/null +++ b/oximeter/db/schema/replicated/5/up05.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu8_local ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up06.sql b/oximeter/db/schema/replicated/5/up06.sql new file mode 100644 index 0000000000..485c79dd7d --- /dev/null +++ b/oximeter/db/schema/replicated/5/up06.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu8 ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up07.sql b/oximeter/db/schema/replicated/5/up07.sql new file mode 100644 index 0000000000..3fe9810a9e --- /dev/null +++ b/oximeter/db/schema/replicated/5/up07.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt8), + counts Array(UInt64), + min UInt8, + max UInt8, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu8_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/5/up08.sql b/oximeter/db/schema/replicated/5/up08.sql new file mode 100644 index 0000000000..80cf392978 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up08.sql @@ -0,0 +1,3 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ON CLUSTER oximeter_cluster +AS oximeter.measurements_histogramu8_local +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu8_local', xxHash64(splitByChar(':', timeseries_name)[1])); diff --git a/oximeter/db/schema/replicated/5/up09.sql b/oximeter/db/schema/replicated/5/up09.sql new file mode 100644 index 0000000000..5d43e5db84 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up09.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogrami16_local ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up10.sql b/oximeter/db/schema/replicated/5/up10.sql new file mode 100644 index 0000000000..ea2776fd06 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up10.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogrami16 ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up11.sql b/oximeter/db/schema/replicated/5/up11.sql new file mode 100644 index 0000000000..ac5ef4fbb9 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up11.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int16), + counts Array(UInt64), + min Int16, + max Int16, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami16_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/5/up12.sql b/oximeter/db/schema/replicated/5/up12.sql new file mode 100644 index 0000000000..3e3cdd4e71 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up12.sql @@ -0,0 +1,3 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ON CLUSTER oximeter_cluster +AS oximeter.measurements_histogrami16_local +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami16_local', xxHash64(splitByChar(':', timeseries_name)[1])); diff --git a/oximeter/db/schema/replicated/5/up13.sql b/oximeter/db/schema/replicated/5/up13.sql new file mode 100644 index 0000000000..cfe3e278d4 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up13.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu16_local ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up14.sql b/oximeter/db/schema/replicated/5/up14.sql new file mode 100644 index 0000000000..4905db35c3 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up14.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu16 ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up15.sql b/oximeter/db/schema/replicated/5/up15.sql new file mode 100644 index 0000000000..c505bb7892 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up15.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt16), + counts Array(UInt64), + min UInt16, + max UInt16, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu16_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/5/up16.sql b/oximeter/db/schema/replicated/5/up16.sql new file mode 100644 index 0000000000..0a0607de26 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up16.sql @@ -0,0 +1,3 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ON CLUSTER oximeter_cluster +AS oximeter.measurements_histogramu16_local +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu16_local', xxHash64(splitByChar(':', timeseries_name)[1])); diff --git a/oximeter/db/schema/replicated/5/up17.sql b/oximeter/db/schema/replicated/5/up17.sql new file mode 100644 index 0000000000..741cf9bed1 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up17.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogrami32_local ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up18.sql b/oximeter/db/schema/replicated/5/up18.sql new file mode 100644 index 0000000000..b4c7888e3e --- /dev/null +++ b/oximeter/db/schema/replicated/5/up18.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogrami32 ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up19.sql b/oximeter/db/schema/replicated/5/up19.sql new file mode 100644 index 0000000000..391e77a1cf --- /dev/null +++ b/oximeter/db/schema/replicated/5/up19.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int32), + counts Array(UInt64), + min Int32, + max Int32, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/5/up20.sql b/oximeter/db/schema/replicated/5/up20.sql new file mode 100644 index 0000000000..4680183918 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up20.sql @@ -0,0 +1,3 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ON CLUSTER oximeter_cluster +AS oximeter.measurements_histogrami32_local +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami32_local', xxHash64(splitByChar(':', timeseries_name)[1])); diff --git a/oximeter/db/schema/replicated/5/up21.sql b/oximeter/db/schema/replicated/5/up21.sql new file mode 100644 index 0000000000..863ebdd677 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up21.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu32_local ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up22.sql b/oximeter/db/schema/replicated/5/up22.sql new file mode 100644 index 0000000000..969d0ec578 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up22.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu32 ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up23.sql b/oximeter/db/schema/replicated/5/up23.sql new file mode 100644 index 0000000000..3b65222ab9 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up23.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt32), + counts Array(UInt64), + min UInt32, + max UInt32, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/5/up24.sql b/oximeter/db/schema/replicated/5/up24.sql new file mode 100644 index 0000000000..8e66095c28 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up24.sql @@ -0,0 +1,3 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ON CLUSTER oximeter_cluster +AS oximeter.measurements_histogramu32_local +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu32_local', xxHash64(splitByChar(':', timeseries_name)[1])); diff --git a/oximeter/db/schema/replicated/5/up25.sql b/oximeter/db/schema/replicated/5/up25.sql new file mode 100644 index 0000000000..79f1337bb6 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up25.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogrami64_local ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up26.sql b/oximeter/db/schema/replicated/5/up26.sql new file mode 100644 index 0000000000..be7ac420c3 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up26.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogrami64 ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up27.sql b/oximeter/db/schema/replicated/5/up27.sql new file mode 100644 index 0000000000..716d450f18 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up27.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int64), + counts Array(UInt64), + min Int64, + max Int64, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/5/up28.sql b/oximeter/db/schema/replicated/5/up28.sql new file mode 100644 index 0000000000..81031811c4 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up28.sql @@ -0,0 +1,3 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ON CLUSTER oximeter_cluster +AS oximeter.measurements_histogrami64_local +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami64_local', xxHash64(splitByChar(':', timeseries_name)[1])); diff --git a/oximeter/db/schema/replicated/5/up29.sql b/oximeter/db/schema/replicated/5/up29.sql new file mode 100644 index 0000000000..e02de80c59 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up29.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu64_local ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up30.sql b/oximeter/db/schema/replicated/5/up30.sql new file mode 100644 index 0000000000..4f938b220c --- /dev/null +++ b/oximeter/db/schema/replicated/5/up30.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu64 ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up31.sql b/oximeter/db/schema/replicated/5/up31.sql new file mode 100644 index 0000000000..603e78e668 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up31.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt64), + counts Array(UInt64), + min UInt64, + max UInt64, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/5/up32.sql b/oximeter/db/schema/replicated/5/up32.sql new file mode 100644 index 0000000000..9fa2e1b730 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up32.sql @@ -0,0 +1,3 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ON CLUSTER oximeter_cluster +AS oximeter.measurements_histogramu64_local +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu64_local', xxHash64(splitByChar(':', timeseries_name)[1])); diff --git a/oximeter/db/schema/replicated/5/up33.sql b/oximeter/db/schema/replicated/5/up33.sql new file mode 100644 index 0000000000..36bb01b64b --- /dev/null +++ b/oximeter/db/schema/replicated/5/up33.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramf32_local ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up34.sql b/oximeter/db/schema/replicated/5/up34.sql new file mode 100644 index 0000000000..00f8c60701 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up34.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramf32 ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up35.sql b/oximeter/db/schema/replicated/5/up35.sql new file mode 100644 index 0000000000..c1ef9c505c --- /dev/null +++ b/oximeter/db/schema/replicated/5/up35.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float32), + counts Array(UInt64), + min Float32, + max Float32, + sum_of_samples Float64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/5/up36.sql b/oximeter/db/schema/replicated/5/up36.sql new file mode 100644 index 0000000000..935eac5e8c --- /dev/null +++ b/oximeter/db/schema/replicated/5/up36.sql @@ -0,0 +1,3 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ON CLUSTER oximeter_cluster +AS oximeter.measurements_histogramf32_local +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf32_local', xxHash64(splitByChar(':', timeseries_name)[1])); diff --git a/oximeter/db/schema/replicated/5/up37.sql b/oximeter/db/schema/replicated/5/up37.sql new file mode 100644 index 0000000000..7a1a1e19d6 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up37.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramf64_local ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up38.sql b/oximeter/db/schema/replicated/5/up38.sql new file mode 100644 index 0000000000..c643d5532c --- /dev/null +++ b/oximeter/db/schema/replicated/5/up38.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramf64 ON CLUSTER oximeter_cluster SYNC; diff --git a/oximeter/db/schema/replicated/5/up39.sql b/oximeter/db/schema/replicated/5/up39.sql new file mode 100644 index 0000000000..703832106a --- /dev/null +++ b/oximeter/db/schema/replicated/5/up39.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float64), + counts Array(UInt64), + min Float64, + max Float64, + sum_of_samples Float64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/replicated/5/up40.sql b/oximeter/db/schema/replicated/5/up40.sql new file mode 100644 index 0000000000..4f3e6c58a3 --- /dev/null +++ b/oximeter/db/schema/replicated/5/up40.sql @@ -0,0 +1,3 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ON CLUSTER oximeter_cluster +AS oximeter.measurements_histogramf64_local +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf64_local', xxHash64(splitByChar(':', timeseries_name)[1])); diff --git a/oximeter/db/schema/replicated/db-init.sql b/oximeter/db/schema/replicated/db-init.sql index 27df02b709..8f651b4510 100644 --- a/oximeter/db/schema/replicated/db-init.sql +++ b/oximeter/db/schema/replicated/db-init.sql @@ -374,21 +374,27 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8_local ON CLUSTER ox start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(Int8), - counts Array(UInt64) + counts Array(UInt64), + min Int8, + max Int8, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ON CLUSTER oximeter_cluster -( - timeseries_name String, - timeseries_key UInt64, - start_time DateTime64(9, 'UTC'), - timestamp DateTime64(9, 'UTC'), - bins Array(Int8), - counts Array(UInt64) -) +AS oximeter.measurements_histogrami8_local ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami8_local', xxHash64(splitByChar(':', timeseries_name)[1])); CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8_local ON CLUSTER oximeter_cluster @@ -398,21 +404,27 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8_local ON CLUSTER ox start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(UInt8), - counts Array(UInt64) + counts Array(UInt64), + min UInt8, + max UInt8, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ON CLUSTER oximeter_cluster -( - timeseries_name String, - timeseries_key UInt64, - start_time DateTime64(9, 'UTC'), - timestamp DateTime64(9, 'UTC'), - bins Array(UInt8), - counts Array(UInt64) -) +AS oximeter.measurements_histogramu8_local ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu8_local', xxHash64(splitByChar(':', timeseries_name)[1])); CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16_local ON CLUSTER oximeter_cluster @@ -422,21 +434,27 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16_local ON CLUSTER o start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(Int16), - counts Array(UInt64) + counts Array(UInt64), + min Int16, + max Int16, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ON CLUSTER oximeter_cluster -( - timeseries_name String, - timeseries_key UInt64, - start_time DateTime64(9, 'UTC'), - timestamp DateTime64(9, 'UTC'), - bins Array(Int16), - counts Array(UInt64) -) +AS oximeter.measurements_histogrami16_local ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami16_local', xxHash64(splitByChar(':', timeseries_name)[1])); CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16_local ON CLUSTER oximeter_cluster @@ -446,21 +464,27 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16_local ON CLUSTER o start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(UInt16), - counts Array(UInt64) + counts Array(UInt64), + min UInt16, + max UInt16, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ON CLUSTER oximeter_cluster -( - timeseries_name String, - timeseries_key UInt64, - start_time DateTime64(9, 'UTC'), - timestamp DateTime64(9, 'UTC'), - bins Array(UInt16), - counts Array(UInt64) -) +AS oximeter.measurements_histogramu16_local ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu16_local', xxHash64(splitByChar(':', timeseries_name)[1])); CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32_local ON CLUSTER oximeter_cluster @@ -470,21 +494,27 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32_local ON CLUSTER o start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(Int32), - counts Array(UInt64) + counts Array(UInt64), + min Int32, + max Int32, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ON CLUSTER oximeter_cluster -( - timeseries_name String, - timeseries_key UInt64, - start_time DateTime64(9, 'UTC'), - timestamp DateTime64(9, 'UTC'), - bins Array(Int32), - counts Array(UInt64) -) +AS oximeter.measurements_histogrami32_local ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami32_local', xxHash64(splitByChar(':', timeseries_name)[1])); CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32_local ON CLUSTER oximeter_cluster @@ -494,21 +524,27 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32_local ON CLUSTER o start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(UInt32), - counts Array(UInt64) + counts Array(UInt64), + min UInt32, + max UInt32, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ON CLUSTER oximeter_cluster -( - timeseries_name String, - timeseries_key UInt64, - start_time DateTime64(9, 'UTC'), - timestamp DateTime64(9, 'UTC'), - bins Array(UInt32), - counts Array(UInt64) -) +AS oximeter.measurements_histogramu32_local ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu32_local', xxHash64(splitByChar(':', timeseries_name)[1])); CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64_local ON CLUSTER oximeter_cluster @@ -518,21 +554,27 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64_local ON CLUSTER o start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(Int64), - counts Array(UInt64) + counts Array(UInt64), + min Int64, + max Int64, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ON CLUSTER oximeter_cluster -( - timeseries_name String, - timeseries_key UInt64, - start_time DateTime64(9, 'UTC'), - timestamp DateTime64(9, 'UTC'), - bins Array(Int64), - counts Array(UInt64) -) +AS oximeter.measurements_histogrami64_local ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami64_local', xxHash64(splitByChar(':', timeseries_name)[1])); CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64_local ON CLUSTER oximeter_cluster @@ -542,21 +584,27 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64_local ON CLUSTER o start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(UInt64), - counts Array(UInt64) + counts Array(UInt64), + min UInt64, + max UInt64, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ON CLUSTER oximeter_cluster -( - timeseries_name String, - timeseries_key UInt64, - start_time DateTime64(9, 'UTC'), - timestamp DateTime64(9, 'UTC'), - bins Array(UInt64), - counts Array(UInt64) -) +AS oximeter.measurements_histogramu64_local ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu64_local', xxHash64(splitByChar(':', timeseries_name)[1])); CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32_local ON CLUSTER oximeter_cluster @@ -566,21 +614,27 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32_local ON CLUSTER o start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(Float32), - counts Array(UInt64) + counts Array(UInt64), + min Float32, + max Float32, + sum_of_samples Float64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ON CLUSTER oximeter_cluster -( - timeseries_name String, - timeseries_key UInt64, - start_time DateTime64(9, 'UTC'), - timestamp DateTime64(9, 'UTC'), - bins Array(Float32), - counts Array(UInt64) -) +AS oximeter.measurements_histogramf32_local ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf32_local', xxHash64(splitByChar(':', timeseries_name)[1])); CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64_local ON CLUSTER oximeter_cluster @@ -590,21 +644,27 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64_local ON CLUSTER o start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(Float64), - counts Array(UInt64) + counts Array(UInt64), + min Float64, + max Float64, + sum_of_samples Float64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ON CLUSTER oximeter_cluster -( - timeseries_name String, - timeseries_key UInt64, - start_time DateTime64(9, 'UTC'), - timestamp DateTime64(9, 'UTC'), - bins Array(Float64), - counts Array(UInt64) -) +AS oximeter.measurements_histogramf64_local ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf64_local', xxHash64(splitByChar(':', timeseries_name)[1])); /* The field tables store named dimensions of each timeseries. diff --git a/oximeter/db/schema/single-node/5/up01.sql b/oximeter/db/schema/single-node/5/up01.sql new file mode 100644 index 0000000000..41751aac7a --- /dev/null +++ b/oximeter/db/schema/single-node/5/up01.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogrami8; diff --git a/oximeter/db/schema/single-node/5/up02.sql b/oximeter/db/schema/single-node/5/up02.sql new file mode 100644 index 0000000000..ba9b2c1762 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up02.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int8), + counts Array(UInt64), + min Int8, + max Int8, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/5/up03.sql b/oximeter/db/schema/single-node/5/up03.sql new file mode 100644 index 0000000000..dec872f0ae --- /dev/null +++ b/oximeter/db/schema/single-node/5/up03.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu8; diff --git a/oximeter/db/schema/single-node/5/up04.sql b/oximeter/db/schema/single-node/5/up04.sql new file mode 100644 index 0000000000..1337a4ffe2 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up04.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt8), + counts Array(UInt64), + min UInt8, + max UInt8, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/5/up05.sql b/oximeter/db/schema/single-node/5/up05.sql new file mode 100644 index 0000000000..2a789beaf2 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up05.sql @@ -0,0 +1 @@ +DROP TABLE oximeter.measurements_histogrami16; diff --git a/oximeter/db/schema/single-node/5/up06.sql b/oximeter/db/schema/single-node/5/up06.sql new file mode 100644 index 0000000000..7aa8fa6696 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up06.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int16), + counts Array(UInt64), + min Int16, + max Int16, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/5/up07.sql b/oximeter/db/schema/single-node/5/up07.sql new file mode 100644 index 0000000000..3fb69b754b --- /dev/null +++ b/oximeter/db/schema/single-node/5/up07.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu16; diff --git a/oximeter/db/schema/single-node/5/up08.sql b/oximeter/db/schema/single-node/5/up08.sql new file mode 100644 index 0000000000..cbe0aedc05 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up08.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt16), + counts Array(UInt64), + min UInt16, + max UInt16, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/5/up09.sql b/oximeter/db/schema/single-node/5/up09.sql new file mode 100644 index 0000000000..c57462016b --- /dev/null +++ b/oximeter/db/schema/single-node/5/up09.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogrami32; diff --git a/oximeter/db/schema/single-node/5/up10.sql b/oximeter/db/schema/single-node/5/up10.sql new file mode 100644 index 0000000000..571cdfb924 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up10.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int32), + counts Array(UInt64), + min Int32, + max Int32, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/5/up11.sql b/oximeter/db/schema/single-node/5/up11.sql new file mode 100644 index 0000000000..5029f357aa --- /dev/null +++ b/oximeter/db/schema/single-node/5/up11.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu32; diff --git a/oximeter/db/schema/single-node/5/up12.sql b/oximeter/db/schema/single-node/5/up12.sql new file mode 100644 index 0000000000..c430ee9046 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up12.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt32), + counts Array(UInt64), + min UInt32, + max UInt32, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/5/up13.sql b/oximeter/db/schema/single-node/5/up13.sql new file mode 100644 index 0000000000..a1669359ef --- /dev/null +++ b/oximeter/db/schema/single-node/5/up13.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogrami64; diff --git a/oximeter/db/schema/single-node/5/up14.sql b/oximeter/db/schema/single-node/5/up14.sql new file mode 100644 index 0000000000..bda2eab542 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up14.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int64), + counts Array(UInt64), + min Int64, + max Int64, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/5/up15.sql b/oximeter/db/schema/single-node/5/up15.sql new file mode 100644 index 0000000000..3bd8b61024 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up15.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramu64; diff --git a/oximeter/db/schema/single-node/5/up16.sql b/oximeter/db/schema/single-node/5/up16.sql new file mode 100644 index 0000000000..8d6f07c96e --- /dev/null +++ b/oximeter/db/schema/single-node/5/up16.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt64), + counts Array(UInt64), + min UInt64, + max UInt64, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/5/up17.sql b/oximeter/db/schema/single-node/5/up17.sql new file mode 100644 index 0000000000..91d7743445 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up17.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramf32; diff --git a/oximeter/db/schema/single-node/5/up18.sql b/oximeter/db/schema/single-node/5/up18.sql new file mode 100644 index 0000000000..a8335944a7 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up18.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float32), + counts Array(UInt64), + min Float32, + max Float32, + sum_of_samples Float64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/5/up19.sql b/oximeter/db/schema/single-node/5/up19.sql new file mode 100644 index 0000000000..f690feff00 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up19.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS oximeter.measurements_histogramf64; diff --git a/oximeter/db/schema/single-node/5/up20.sql b/oximeter/db/schema/single-node/5/up20.sql new file mode 100644 index 0000000000..ceed990747 --- /dev/null +++ b/oximeter/db/schema/single-node/5/up20.sql @@ -0,0 +1,25 @@ +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float64), + counts Array(UInt64), + min Float64, + max Float64, + sum_of_samples Float64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; diff --git a/oximeter/db/schema/single-node/db-init.sql b/oximeter/db/schema/single-node/db-init.sql index 510c1071c8..38e9d0b70c 100644 --- a/oximeter/db/schema/single-node/db-init.sql +++ b/oximeter/db/schema/single-node/db-init.sql @@ -235,7 +235,20 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 * to figure out another way to represent missing samples here. */ bins Array(Int8), - counts Array(UInt64) + counts Array(UInt64), + min Int8, + max Int8, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -248,7 +261,20 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(UInt8), - counts Array(UInt64) + counts Array(UInt64), + min UInt8, + max UInt8, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -261,7 +287,20 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(Int16), - counts Array(UInt64) + counts Array(UInt64), + min Int16, + max Int16, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -274,7 +313,20 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(UInt16), - counts Array(UInt64) + counts Array(UInt64), + min UInt16, + max UInt16, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -287,7 +339,20 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(Int32), - counts Array(UInt64) + counts Array(UInt64), + min Int32, + max Int32, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -300,7 +365,20 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(UInt32), - counts Array(UInt64) + counts Array(UInt64), + min UInt32, + max UInt32, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -313,7 +391,20 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(Int64), - counts Array(UInt64) + counts Array(UInt64), + min Int64, + max Int64, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -326,7 +417,20 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(UInt64), - counts Array(UInt64) + counts Array(UInt64), + min UInt64, + max UInt64, + sum_of_samples Int64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -339,7 +443,20 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(Float32), - counts Array(UInt64) + counts Array(UInt64), + min Float32, + max Float32, + sum_of_samples Float64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) @@ -352,7 +469,20 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 start_time DateTime64(9, 'UTC'), timestamp DateTime64(9, 'UTC'), bins Array(Float64), - counts Array(UInt64) + counts Array(UInt64), + min Float64, + max Float64, + sum_of_samples Float64, + squared_mean Float64, + p50_marker_heights Array(Float64), + p50_marker_positions Array(UInt64), + p50_desired_marker_positions Array(Float64), + p90_marker_heights Array(Float64), + p90_marker_positions Array(UInt64), + p90_desired_marker_positions Array(Float64), + p99_marker_heights Array(Float64), + p99_marker_positions Array(UInt64), + p99_desired_marker_positions Array(Float64) ) ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) diff --git a/oximeter/db/src/bin/oxdb/oxql.rs b/oximeter/db/src/bin/oxdb/oxql.rs index 54e40afa15..ebe55dc7a7 100644 --- a/oximeter/db/src/bin/oxdb/oxql.rs +++ b/oximeter/db/src/bin/oxdb/oxql.rs @@ -11,6 +11,7 @@ use clap::Args; use crossterm::style::Stylize; use dropshot::EmptyScanParams; use dropshot::WhichPage; +use oximeter::TimeseriesSchema; use oximeter_db::oxql::query::special_idents; use oximeter_db::oxql::Table; use oximeter_db::Client; @@ -145,7 +146,70 @@ async fn list_timeseries(client: &Client) -> anyhow::Result<()> { } } -// Describe a single timeseries. +/// Prepare the columns for a timeseries or virtual table. +pub(crate) fn prepare_columns( + schema: &TimeseriesSchema, +) -> (Vec, Vec) { + let mut cols = Vec::with_capacity(schema.field_schema.len() + 2); + let mut types = cols.clone(); + + for field in schema.field_schema.iter() { + cols.push(field.name.clone()); + types.push(field.field_type.to_string()); + } + + cols.push(special_idents::TIMESTAMP.into()); + types.push(special_idents::DATETIME64.into()); + + if schema.datum_type.is_histogram() { + cols.push(special_idents::START_TIME.into()); + types.push(special_idents::DATETIME64.into()); + + cols.push(special_idents::BINS.into()); + types.push( + special_idents::array_type_name_from_histogram_type( + schema.datum_type, + ) + .unwrap(), + ); + + cols.push(special_idents::COUNTS.into()); + types.push(special_idents::ARRAYU64.into()); + + cols.push(special_idents::MIN.into()); + types.push(special_idents::FLOAT64.into()); + + cols.push(special_idents::MAX.into()); + types.push(special_idents::FLOAT64.into()); + + cols.push(special_idents::SUM_OF_SAMPLES.into()); + types.push(special_idents::UINT64.into()); + + cols.push(special_idents::SQUARED_MEAN.into()); + types.push(special_idents::UINT64.into()); + + for quantile in ["P50", "P90", "P99"].iter() { + cols.push(format!("{}_MARKER_HEIGHTS", quantile)); + types.push(special_idents::ARRAYFLOAT64.into()); + cols.push(format!("{}_MARKER_POSITIONS", quantile)); + types.push(special_idents::ARRAYINT64.into()); + cols.push(format!("{}_DESIRED_MARKER_POSITIONS", quantile)); + types.push(special_idents::ARRAYFLOAT64.into()); + } + } else if schema.datum_type.is_cumulative() { + cols.push(special_idents::START_TIME.into()); + types.push(special_idents::DATETIME64.into()); + cols.push(special_idents::DATUM.into()); + types.push(schema.datum_type.to_string()); + } else { + cols.push(special_idents::DATUM.into()); + types.push(schema.datum_type.to_string()); + } + + (cols, types) +} + +/// Describe a single timeseries. async fn describe_timeseries( client: &Client, timeseries: &str, @@ -158,40 +222,7 @@ async fn describe_timeseries( ), Ok(name) => { if let Some(schema) = client.schema_for_timeseries(&name).await? { - let mut cols = - Vec::with_capacity(schema.field_schema.len() + 2); - let mut types = cols.clone(); - for field in schema.field_schema.iter() { - cols.push(field.name.clone()); - types.push(field.field_type.to_string()); - } - cols.push(special_idents::TIMESTAMP.into()); - types.push(special_idents::DATETIME64.into()); - - if schema.datum_type.is_histogram() { - cols.push(special_idents::START_TIME.into()); - types.push(special_idents::DATETIME64.into()); - - cols.push(special_idents::BINS.into()); - types.push( - special_idents::array_type_name_from_histogram_type( - schema.datum_type, - ) - .unwrap(), - ); - - cols.push(special_idents::COUNTS.into()); - types.push(special_idents::ARRAYU64.into()); - } else if schema.datum_type.is_cumulative() { - cols.push(special_idents::START_TIME.into()); - types.push(special_idents::DATETIME64.into()); - cols.push(special_idents::DATUM.into()); - types.push(schema.datum_type.to_string()); - } else { - cols.push(special_idents::DATUM.into()); - types.push(schema.datum_type.to_string()); - } - + let (cols, types) = prepare_columns(&schema); let mut builder = tabled::builder::Builder::default(); builder.push_record(cols); // first record is the header builder.push_record(types); diff --git a/oximeter/db/src/bin/oxdb/sql.rs b/oximeter/db/src/bin/oxdb/sql.rs index d50a60f4d7..44780592fc 100644 --- a/oximeter/db/src/bin/oxdb/sql.rs +++ b/oximeter/db/src/bin/oxdb/sql.rs @@ -6,6 +6,7 @@ // Copyright 2024 Oxide Computer Company +use super::oxql; use crate::make_client; use clap::Args; use dropshot::EmptyScanParams; @@ -63,43 +64,7 @@ async fn describe_virtual_table( Err(_) => println!("Invalid timeseries name: {table}"), Ok(name) => { if let Some(schema) = client.schema_for_timeseries(&name).await? { - let mut cols = - Vec::with_capacity(schema.field_schema.len() + 2); - let mut types = cols.clone(); - for field in schema.field_schema.iter() { - cols.push(field.name.clone()); - types.push(field.field_type.to_string()); - } - cols.push("timestamp".into()); - types.push("DateTime64".into()); - - if schema.datum_type.is_histogram() { - cols.push("start_time".into()); - types.push("DateTime64".into()); - - cols.push("bins".into()); - types.push(format!( - "Array[{}]", - schema - .datum_type - .to_string() - .strip_prefix("Histogram") - .unwrap() - .to_lowercase(), - )); - - cols.push("counts".into()); - types.push("Array[u64]".into()); - } else if schema.datum_type.is_cumulative() { - cols.push("start_time".into()); - types.push("DateTime64".into()); - cols.push("datum".into()); - types.push(schema.datum_type.to_string()); - } else { - cols.push("datum".into()); - types.push(schema.datum_type.to_string()); - } - + let (cols, types) = oxql::prepare_columns(&schema); let mut builder = tabled::builder::Builder::default(); builder.push_record(cols); // first record is the header builder.push_record(types); diff --git a/oximeter/db/src/client/mod.rs b/oximeter/db/src/client/mod.rs index 0db71d195a..2d6212971e 100644 --- a/oximeter/db/src/client/mod.rs +++ b/oximeter/db/src/client/mod.rs @@ -4217,7 +4217,7 @@ mod tests { const FIRST_VERSION: u64 = 3; for version in FIRST_VERSION..=OXIMETER_VERSION { let upgrade_file_contents = Client::read_schema_upgrade_sql_files( - log, false, version, SCHEMA_DIR, + log, replicated, version, SCHEMA_DIR, ) .await .expect("failed to read schema upgrade files"); diff --git a/oximeter/db/src/client/oxql.rs b/oximeter/db/src/client/oxql.rs index d1ce131581..29586b8189 100644 --- a/oximeter/db/src/client/oxql.rs +++ b/oximeter/db/src/client/oxql.rs @@ -825,7 +825,13 @@ impl Client { datum_type: oximeter::DatumType, ) -> String { let value_columns = if datum_type.is_histogram() { - "timeseries_key, start_time, timestamp, bins, counts" + concat!( + "timeseries_key, start_time, timestamp, bins, counts, min, max, ", + "sum_of_samples, squared_mean, p50_marker_heights, p50_marker_positions, ", + "p50_desired_marker_positions, p90_marker_heights, p90_marker_positions, ", + "p90_desired_marker_positions, p99_marker_heights, p99_marker_positions, ", + "p99_desired_marker_positions" + ) } else if datum_type.is_cumulative() { "timeseries_key, start_time, timestamp, datum" } else { @@ -1203,7 +1209,7 @@ mod tests { // Create the first metric, starting from a count of 0. let mut metric = SomeMetric { foo: *foo, datum }; - // Create all the samples,, incrementing the datum and sample + // Create all the samples, incrementing the datum and sample // time. for i in 0..N_SAMPLES_PER_TIMESERIES { let sample_time = diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 106c347ef6..e7f9f56b63 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -4,7 +4,7 @@ //! Models for timeseries data in ClickHouse -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company use crate::DbFieldSource; use crate::FieldSchema; @@ -16,6 +16,7 @@ use crate::TimeseriesSchema; use bytes::Bytes; use chrono::DateTime; use chrono::Utc; +use num::traits::Zero; use oximeter::histogram::Histogram; use oximeter::traits; use oximeter::types::Cumulative; @@ -27,6 +28,7 @@ use oximeter::types::FieldValue; use oximeter::types::Measurement; use oximeter::types::MissingDatum; use oximeter::types::Sample; +use oximeter::Quantile; use serde::Deserialize; use serde::Serialize; use std::collections::BTreeMap; @@ -43,7 +45,7 @@ use uuid::Uuid; /// - [`crate::Client::initialize_db_with_version`] /// - [`crate::Client::ensure_schema`] /// - The `clickhouse-schema-updater` binary in this crate -pub const OXIMETER_VERSION: u64 = 4; +pub const OXIMETER_VERSION: u64 = 5; // Wrapper type to represent a boolean in the database. // @@ -446,15 +448,83 @@ declare_cumulative_measurement_row! { CumulativeU64MeasurementRow, u64, "cumulat declare_cumulative_measurement_row! { CumulativeF32MeasurementRow, f32, "cumulativef32" } declare_cumulative_measurement_row! { CumulativeF64MeasurementRow, f64, "cumulativef64" } +/// A representation of all quantiles for a histogram. +#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq)] +struct AllQuantiles { + p50_marker_heights: [f64; 5], + p50_marker_positions: [u64; 5], + p50_desired_marker_positions: [f64; 5], + + p90_marker_heights: [f64; 5], + p90_marker_positions: [u64; 5], + p90_desired_marker_positions: [f64; 5], + + p99_marker_heights: [f64; 5], + p99_marker_positions: [u64; 5], + p99_desired_marker_positions: [f64; 5], +} + +impl AllQuantiles { + /// Create a flat `AllQuantiles` struct from the given quantiles. + fn flatten(q50: Quantile, q90: Quantile, q99: Quantile) -> Self { + Self { + p50_marker_heights: q50.marker_heights(), + p50_marker_positions: q50.marker_positions(), + p50_desired_marker_positions: q50.desired_marker_positions(), + + p90_marker_heights: q90.marker_heights(), + p90_marker_positions: q90.marker_positions(), + p90_desired_marker_positions: q90.desired_marker_positions(), + + p99_marker_heights: q99.marker_heights(), + p99_marker_positions: q99.marker_positions(), + p99_desired_marker_positions: q99.desired_marker_positions(), + } + } + + /// Split the quantiles into separate `Quantile` structs in order of P. + fn split(&self) -> (Quantile, Quantile, Quantile) { + ( + Quantile::from_parts( + 0.5, + self.p50_marker_heights, + self.p50_marker_positions, + self.p50_desired_marker_positions, + ), + Quantile::from_parts( + 0.9, + self.p90_marker_heights, + self.p90_marker_positions, + self.p90_desired_marker_positions, + ), + Quantile::from_parts( + 0.99, + self.p99_marker_heights, + self.p99_marker_positions, + self.p99_desired_marker_positions, + ), + ) + } +} + // Representation of a histogram in ClickHouse. // -// The tables storing measurements of a histogram metric use a pair of arrays to represent them, -// for the bins and counts, respectively. This handles conversion between the type used to -// represent histograms in Rust, [`Histogram`], and this in-database representation. +// The tables storing measurements of a histogram metric use a set of arrays to +// represent them. This handles conversion between the type used to represent +// histograms in Rust, [`Histogram`], and this in-database representation. #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] -struct DbHistogram { +struct DbHistogram +where + T: traits::HistogramSupport, +{ pub bins: Vec, pub counts: Vec, + pub min: T, + pub max: T, + pub sum_of_samples: T::Width, + pub squared_mean: f64, + #[serde(flatten)] + pub quantiles: AllQuantiles, } // We use an empty histogram to indicate a missing sample. @@ -467,9 +537,24 @@ struct DbHistogram { // // That means we can currently use an empty array from the database as a // sentinel for a missing sample. -impl DbHistogram { +impl DbHistogram +where + T: traits::HistogramSupport, +{ fn null() -> Self { - Self { bins: vec![], counts: vec![] } + let p50 = Quantile::p50(); + let p90 = Quantile::p90(); + let p99 = Quantile::p99(); + + Self { + bins: vec![], + counts: vec![], + min: T::zero(), + max: T::zero(), + sum_of_samples: T::Width::zero(), + squared_mean: 0.0, + quantiles: AllQuantiles::flatten(p50, p90, p99), + } } } @@ -478,8 +563,20 @@ where T: traits::HistogramSupport, { fn from(hist: &Histogram) -> Self { - let (bins, counts) = hist.to_arrays(); - Self { bins, counts } + let (bins, counts) = hist.bins_and_counts(); + Self { + bins, + counts, + min: hist.min(), + max: hist.max(), + sum_of_samples: hist.sum_of_samples(), + squared_mean: hist.squared_mean(), + quantiles: AllQuantiles::flatten( + hist.p50q(), + hist.p90q(), + hist.p99q(), + ), + } } } @@ -1255,7 +1352,10 @@ struct DbTimeseriesScalarCumulativeSample { // A histogram timestamped sample from a timeseries, as extracted from a query to the database. #[derive(Debug, Clone, Deserialize)] -struct DbTimeseriesHistogramSample { +struct DbTimeseriesHistogramSample +where + T: traits::HistogramSupport, +{ timeseries_key: TimeseriesKey, #[serde(with = "serde_timestamp")] start_time: DateTime, @@ -1263,6 +1363,12 @@ struct DbTimeseriesHistogramSample { timestamp: DateTime, bins: Vec, counts: Vec, + min: T, + max: T, + sum_of_samples: T::Width, + squared_mean: f64, + #[serde(flatten)] + quantiles: AllQuantiles, } impl From> for Measurement @@ -1314,14 +1420,30 @@ where .unwrap(), ) } else { - Datum::from( - Histogram::from_arrays( - sample.start_time, - sample.bins, - sample.counts, - ) - .unwrap(), + if sample.bins.len() != sample.counts.len() { + panic!( + "Array size mismatch: bins: {}, counts: {}", + sample.bins.len(), + sample.counts.len() + ); + } + + let (p50, p90, p99) = sample.quantiles.split(); + let hist = Histogram::from_parts( + sample.start_time, + sample.bins, + sample.counts, + sample.min, + sample.max, + sample.sum_of_samples, + sample.squared_mean, + p50, + p90, + p99, ) + .unwrap(); + + Datum::from(hist) }; Measurement::new(sample.timestamp, datum) } @@ -1475,12 +1597,16 @@ where (sample.timeseries_key, sample.into()) } -fn parse_timeseries_histogram_measurement( - line: &str, +fn parse_timeseries_histogram_measurement<'a, T>( + line: &'a str, ) -> (TimeseriesKey, Measurement) where - T: Into + traits::HistogramSupport + FromDbHistogram, + T: Into + + traits::HistogramSupport + + FromDbHistogram + + Deserialize<'a>, Datum: From>, + ::Width: Deserialize<'a>, { let sample = serde_json::from_str::>(line).unwrap(); @@ -1741,6 +1867,7 @@ pub(crate) fn parse_field_select_row( mod tests { use super::*; use chrono::Timelike; + use oximeter::histogram::Record; use oximeter::test_util; use oximeter::Datum; @@ -1826,9 +1953,18 @@ mod tests { hist.sample(1).unwrap(); hist.sample(10).unwrap(); let dbhist = DbHistogram::from(&hist); - let (bins, counts) = hist.to_arrays(); + let (bins, counts) = hist.bins_and_counts(); assert_eq!(dbhist.bins, bins); assert_eq!(dbhist.counts, counts); + assert_eq!(dbhist.min, hist.min()); + assert_eq!(dbhist.max, hist.max()); + assert_eq!(dbhist.sum_of_samples, hist.sum_of_samples()); + assert_eq!(dbhist.squared_mean, hist.squared_mean()); + + let (p50, p90, p99) = dbhist.quantiles.split(); + assert_eq!(p50, hist.p50q()); + assert_eq!(p90, hist.p90q()); + assert_eq!(p99, hist.p99q()); } #[test] @@ -1877,10 +2013,20 @@ mod tests { assert_eq!(table_name, "oximeter.measurements_histogramf64"); let unpacked: HistogramF64MeasurementRow = serde_json::from_str(&row).unwrap(); - let unpacked_hist = Histogram::from_arrays( + let (unpacked_p50, unpacked_p90, unpacked_p99) = + unpacked.datum.quantiles.split(); + + let unpacked_hist = Histogram::from_parts( unpacked.start_time, unpacked.datum.bins, unpacked.datum.counts, + unpacked.datum.min, + unpacked.datum.max, + unpacked.datum.sum_of_samples, + unpacked.datum.squared_mean, + unpacked_p50, + unpacked_p90, + unpacked_p99, ) .unwrap(); let measurement = &sample.measurement; @@ -1986,7 +2132,27 @@ mod tests { .with_nanosecond(123_456_789) .unwrap(); - let line = r#"{"timeseries_key": 12, "start_time": "2021-01-01 00:00:00.123456789", "timestamp": "2021-01-01 01:00:00.123456789", "bins": [0, 1], "counts": [1, 1] }"#; + let line = r#" + { + "timeseries_key": 12, + "start_time": "2021-01-01 00:00:00.123456789", + "timestamp": "2021-01-01 01:00:00.123456789", + "bins": [0, 1], + "counts": [1, 1], + "min": 0, + "max": 1, + "sum_of_samples": 2, + "squared_mean": 2.0, + "p50_marker_heights": [0.0, 0.0, 0.0, 0.0, 1.0], + "p50_marker_positions": [1, 2, 3, 4, 2], + "p50_desired_marker_positions": [1.0, 3.0, 5.0, 5.0, 5.0], + "p90_marker_heights": [0.0, 0.0, 0.0, 0.0, 1.0], + "p90_marker_positions": [1, 2, 3, 4, 2], + "p90_desired_marker_positions": [1.0, 3.0, 5.0, 5.0, 5.0], + "p99_marker_heights": [0.0, 0.0, 0.0, 0.0, 1.0], + "p99_marker_positions": [1, 2, 3, 4, 2], + "p99_desired_marker_positions": [1.0, 3.0, 5.0, 5.0, 5.0] + }"#; let (key, measurement) = parse_measurement_from_row(line, DatumType::HistogramI64); assert_eq!(key, 12); @@ -1997,6 +2163,38 @@ mod tests { }; assert_eq!(hist.n_bins(), 3); assert_eq!(hist.n_samples(), 2); + assert_eq!(hist.min(), 0); + assert_eq!(hist.max(), 1); + assert_eq!(hist.sum_of_samples(), 2); + assert_eq!(hist.squared_mean(), 2.); + assert_eq!( + hist.p50q(), + Quantile::from_parts( + 0.5, + [0.0, 0.0, 0.0, 0.0, 1.0], + [1, 2, 3, 4, 2], + [1.0, 3.0, 5.0, 5.0, 5.0], + ) + ); + assert_eq!( + hist.p90q(), + Quantile::from_parts( + 0.9, + [0.0, 0.0, 0.0, 0.0, 1.0], + [1, 2, 3, 4, 2], + [1.0, 3.0, 5.0, 5.0, 5.0], + ) + ); + + assert_eq!( + hist.p99q(), + Quantile::from_parts( + 0.99, + [0.0, 0.0, 0.0, 0.0, 1.0], + [1, 2, 3, 4, 2], + [1.0, 3.0, 5.0, 5.0, 5.0], + ) + ); } #[test] @@ -2007,32 +2205,6 @@ mod tests { assert_eq!(measurement.datum(), &Datum::from("/some/path")); } - #[test] - fn test_histogram_to_arrays() { - let mut hist = Histogram::new(&[0, 10, 20]).unwrap(); - hist.sample(1).unwrap(); - hist.sample(11).unwrap(); - - let (bins, counts) = hist.to_arrays(); - assert_eq!( - bins.len(), - counts.len(), - "Bins and counts should have the same size" - ); - assert_eq!( - bins.len(), - hist.n_bins(), - "Paired-array bins should be of the same length as the histogram" - ); - assert_eq!(counts, &[0, 1, 1, 0], "Paired-array counts are incorrect"); - - let rebuilt = - Histogram::from_arrays(hist.start_time(), bins, counts).unwrap(); - assert_eq!( - hist, rebuilt, - "Histogram reconstructed from paired arrays is not correct" - ); - } #[test] fn test_parse_bytes_measurement() { let s = r#"{"timeseries_key": 101, "timestamp": "2023-11-21 18:25:21.963714255", "datum": "\u0001\u0002\u0003"}"#; diff --git a/oximeter/db/src/oxql/ast/table_ops/filter.rs b/oximeter/db/src/oxql/ast/table_ops/filter.rs index 4e838f3388..9e796bc730 100644 --- a/oximeter/db/src/oxql/ast/table_ops/filter.rs +++ b/oximeter/db/src/oxql/ast/table_ops/filter.rs @@ -518,8 +518,9 @@ fn implicit_field_names( MetricType::Gauge, DataType::IntegerDistribution | DataType::DoubleDistribution, ) => { - out.insert(special_idents::BINS); - out.insert(special_idents::COUNTS); + special_idents::DISTRIBUTION_IDENTS.iter().for_each(|ident| { + out.insert(ident); + }); } // Scalars, either delta or cumulatives. ( @@ -534,8 +535,9 @@ fn implicit_field_names( MetricType::Delta | MetricType::Cumulative, DataType::IntegerDistribution | DataType::DoubleDistribution, ) => { - out.insert(special_idents::BINS); - out.insert(special_idents::COUNTS); + special_idents::DISTRIBUTION_IDENTS.iter().for_each(|ident| { + out.insert(ident); + }); out.insert(special_idents::START_TIME); } // Impossible combinations diff --git a/oximeter/db/src/oxql/point.rs b/oximeter/db/src/oxql/point.rs index e12214aaf0..7805ec64be 100644 --- a/oximeter/db/src/oxql/point.rs +++ b/oximeter/db/src/oxql/point.rs @@ -11,12 +11,15 @@ use anyhow::Context; use chrono::DateTime; use chrono::Utc; use num::ToPrimitive; +use oximeter::traits::HistogramSupport; use oximeter::DatumType; use oximeter::Measurement; +use oximeter::Quantile; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; use std::fmt; +use std::ops::Sub; /// The type of each individual data point's value in a timeseries. #[derive( @@ -1428,7 +1431,7 @@ impl ValueArray { CumulativeDatum::DoubleDistribution(last), oximeter::Datum::HistogramF32(new), ) => { - let new = Distribution::from(new); + let new = Distribution::::from(new); self.as_double_distribution_mut()? .push(Some(new.checked_sub(&last)?)); } @@ -1436,7 +1439,7 @@ impl ValueArray { CumulativeDatum::DoubleDistribution(last), oximeter::Datum::HistogramF64(new), ) => { - let new = Distribution::from(new); + let new = Distribution::::from(new); self.as_double_distribution_mut()? .push(Some(new.checked_sub(&last)?)); } @@ -1517,15 +1520,30 @@ pub trait DistributionSupport: impl DistributionSupport for i64 {} impl DistributionSupport for f64 {} -/// A distribution is a sequence of bins and counts in those bins. +/// A distribution is a sequence of bins and counts in those bins, and some +/// statistical information tracked to compute the mean, standard deviation, and +/// quantile estimates. +/// +/// Min, max, and the p-* quantiles are treated as optional due to the +/// possibility of distribution operations, like subtraction. #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] #[schemars(rename = "Distribution{T}")] pub struct Distribution { bins: Vec, counts: Vec, + min: Option, + max: Option, + sum_of_samples: T, + squared_mean: f64, + p50: Option, + p90: Option, + p99: Option, } -impl fmt::Display for Distribution { +impl fmt::Display for Distribution +where + T: DistributionSupport + HistogramSupport + Sub, +{ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let elems = self .bins @@ -1534,12 +1552,52 @@ impl fmt::Display for Distribution { .map(|(bin, count)| format!("{bin}: {count}")) .collect::>() .join(", "); - write!(f, "{}", elems) + + let unwrap_estimate = |opt: Option| { + opt.map_or("None".to_string(), |v| match v.estimate() { + Ok(v) => v.to_string(), + Err(err) => err.to_string(), + }) + }; + + let p50_estimate = unwrap_estimate(self.p50); + let p90_estimate = unwrap_estimate(self.p90); + let p99_estimate = unwrap_estimate(self.p99); + + write!( + f, + "{}, min: {}, max: {}, mean: {}, std_dev: {}, p50: {}, p90: {}, p99: {}", + elems, + self.min.map_or("none".to_string(), |m| m.to_string()), + self.max.unwrap_or_default(), + self.mean(), + self.std_dev().unwrap_or_default(), + p50_estimate, + p90_estimate, + p99_estimate + ) } } -impl Distribution { - // Subtract two distributions, checking that they have the same bins. +impl Distribution +where + T: DistributionSupport + HistogramSupport + Sub, +{ + /// Subtract two distributions, checking that they have the same bins. + /// + /// Min and max values are returned as None, as they lose meaning + /// when subtracting distributions. The same is true for p50, p90, and p99 + /// quantiles. + /// + /// TODO: It's not really clear how to compute the "difference" of two + /// histograms for items like min, max, p*'s. It's certainly not linear, and + /// although we might be able to make some estimates in the case of min and + /// max, we'll defer it for now. Instead, we'll store None for all these + /// values when computing the diff. They will be very useful later, when we + /// start generating distributions in OxQL itself, from a sequence of + /// scalars (similar to a DTrace aggregation). We'll wait to put that in + /// place until we have more data that we want to start aggregating that + /// way. fn checked_sub( &self, rhs: &Distribution, @@ -1548,14 +1606,34 @@ impl Distribution { self.bins == rhs.bins, "Cannot subtract distributions with different bins", ); - let counts = self + let counts: Vec<_> = self .counts .iter() - .zip(rhs.counts.iter().copied()) - .map(|(x, y)| x.checked_sub(y)) + .zip(rhs.counts.iter()) + .map(|(x, y)| x.checked_sub(*y)) .collect::>() .context("Underflow subtracting distributions values")?; - Ok(Self { bins: self.bins.clone(), counts }) + + // Subtract sum_of_samples. + // This can be negative as T is either i64 or f64. + let sum_of_samples = self.sum_of_samples - rhs.sum_of_samples; + + // Squared means are not linear, so we subtract the means and then + // square that number. + let sub_means = self.mean() - rhs.mean(); + let squared_mean = sub_means.powi(2); + + Ok(Self { + bins: self.bins.clone(), + counts, + min: None, + max: None, + sum_of_samples, + squared_mean, + p50: None, + p90: None, + p99: None, + }) } /// Return the slice of bins. @@ -1568,6 +1646,85 @@ impl Distribution { &self.counts } + /// Return the number of samples in the distribution. + pub fn n_samples(&self) -> u64 { + self.counts.iter().sum() + } + + /// Return the minimum value in the distribution. + pub fn min(&self) -> Option { + self.min + } + + /// Return the maximum value in the distribution. + pub fn max(&self) -> Option { + self.max + } + + /// Return the mean of the distribution. + pub fn mean(&self) -> f64 { + if self.n_samples() > 0 { + // We can unwrap here because we know n_samples() > 0, + // so the sum_of_samples should convert to f64 without issue. + self.sum_of_samples + .to_f64() + .map(|sum| sum / (self.n_samples() as f64)) + .unwrap() + } else { + 0. + } + } + + /// Return the variance for inputs to the histogram based on the Welford's + /// algorithm, using the squared mean (M2). + /// + /// Returns `None` if there are fewer than two samples. + pub fn variance(&self) -> Option { + (self.n_samples() > 1) + .then(|| self.squared_mean / (self.n_samples() as f64)) + } + + /// Return the sample variance for inputs to the histogram based on the + /// Welford's algorithm, using the squared mean (M2). + /// + /// Returns `None` if there are fewer than two samples. + pub fn sample_variance(&self) -> Option { + (self.n_samples() > 1) + .then(|| self.squared_mean / ((self.n_samples() - 1) as f64)) + } + + /// Return the standard deviation for inputs to the histogram. + /// + /// This is a biased (as a consequence of Jensen’s inequality), estimate of + /// the population deviation that returns the standard deviation of the + /// samples seen by the histogram. + /// + /// Returns `None` if the variance is `None`, i.e., if there are fewer than + /// two samples. + pub fn std_dev(&self) -> Option { + match self.variance() { + Some(variance) => Some(variance.sqrt()), + None => None, + } + } + + /// Return the "corrected" sample standard deviation for inputs to the + /// histogram. + /// + /// This is an unbiased estimate of the population deviation, applying + /// Bessel's correction, which corrects the bias in the estimation of the + /// population variance, and some, but not all of the bias in the estimation + /// of the population standard deviation. + /// + /// Returns `None` if the variance is `None`, i.e., if there are fewer than + /// two samples. + pub fn sample_std_dev(&self) -> Option { + match self.sample_variance() { + Some(variance) => Some(variance.sqrt()), + None => None, + } + } + /// Return an iterator over each bin and count. pub fn iter(&self) -> impl ExactSizeIterator + '_ { self.bins.iter().zip(self.counts.iter()) @@ -1578,8 +1735,18 @@ macro_rules! i64_dist_from { ($t:ty) => { impl From<&oximeter::histogram::Histogram<$t>> for Distribution { fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { - let (bins, counts) = hist.to_arrays(); - Self { bins: bins.into_iter().map(i64::from).collect(), counts } + let (bins, counts) = hist.bins_and_counts(); + Self { + bins: bins.into_iter().map(i64::from).collect(), + counts, + min: Some(hist.min() as i64), + max: Some(hist.max() as i64), + sum_of_samples: hist.sum_of_samples(), + squared_mean: hist.squared_mean(), + p50: Some(hist.p50q()), + p90: Some(hist.p90q()), + p99: Some(hist.p99q()), + } } } @@ -1604,13 +1771,23 @@ impl TryFrom<&oximeter::histogram::Histogram> for Distribution { fn try_from( hist: &oximeter::histogram::Histogram, ) -> Result { - let (bins, counts) = hist.to_arrays(); + let (bins, counts) = hist.bins_and_counts(); let bins = bins .into_iter() .map(i64::try_from) .collect::>() .context("Overflow converting u64 to i64")?; - Ok(Self { bins, counts }) + Ok(Self { + bins, + counts, + min: Some(hist.min() as i64), + max: Some(hist.max() as i64), + sum_of_samples: hist.sum_of_samples(), + squared_mean: hist.squared_mean(), + p50: Some(hist.p50q()), + p90: Some(hist.p90q()), + p99: Some(hist.p99q()), + }) } } @@ -1627,8 +1804,18 @@ macro_rules! f64_dist_from { ($t:ty) => { impl From<&oximeter::histogram::Histogram<$t>> for Distribution { fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { - let (bins, counts) = hist.to_arrays(); - Self { bins: bins.into_iter().map(f64::from).collect(), counts } + let (bins, counts) = hist.bins_and_counts(); + Self { + bins: bins.into_iter().map(f64::from).collect(), + counts, + min: Some(hist.min() as f64), + max: Some(hist.max() as f64), + sum_of_samples: hist.sum_of_samples() as f64, + squared_mean: hist.squared_mean(), + p50: Some(hist.p50q()), + p90: Some(hist.p90q()), + p99: Some(hist.p99q()), + } } } @@ -1645,12 +1832,12 @@ f64_dist_from!(f64); #[cfg(test)] mod tests { - use crate::oxql::point::{DataType, ValueArray}; - use super::{Distribution, MetricType, Points, Values}; + use crate::oxql::point::{DataType, ValueArray}; use chrono::{DateTime, Utc}; - use oximeter::types::Cumulative; - use oximeter::Measurement; + use oximeter::{ + histogram::Record, types::Cumulative, Measurement, Quantile, + }; use std::time::Duration; #[test] @@ -1747,6 +1934,38 @@ mod tests { ); } + #[test] + fn test_sub_between_histogram_distributions() { + let now = Utc::now(); + let current1 = now + Duration::from_secs(1); + let mut hist1 = + oximeter::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); + hist1.sample(1).unwrap(); + hist1.set_start_time(current1); + let current2 = now + Duration::from_secs(2); + let mut hist2 = + oximeter::histogram::Histogram::new(&[0i64, 10, 20]).unwrap(); + hist2.sample(5).unwrap(); + hist2.sample(10).unwrap(); + hist2.sample(15).unwrap(); + hist2.set_start_time(current2); + let dist1 = Distribution::from(&hist1); + let dist2 = Distribution::from(&hist2); + + let diff = dist2.checked_sub(&dist1).unwrap(); + assert_eq!(diff.bins(), &[i64::MIN, 0, 10, 20]); + assert_eq!(diff.counts(), &[0, 0, 2, 0]); + assert_eq!(diff.n_samples(), 2); + assert!(diff.min().is_none()); + assert!(diff.max().is_none()); + assert_eq!(diff.mean(), 14.5); + assert_eq!(diff.std_dev(), Some(6.363961030678928)); + assert_eq!(diff.sample_std_dev(), Some(9.0)); + assert!(diff.p50.is_none()); + assert!(diff.p90.is_none()); + assert!(diff.p99.is_none()); + } + fn timestamps(n: usize) -> Vec> { let now = Utc::now(); let mut out = Vec::with_capacity(n); @@ -1972,7 +2191,17 @@ mod tests { timestamps: timestamps(1), values: vec![Values { values: ValueArray::IntegerDistribution(vec![Some( - Distribution { bins: vec![0, 1, 2], counts: vec![0; 3] }, + Distribution { + bins: vec![0, 1, 2], + counts: vec![0; 3], + min: Some(0), + max: Some(2), + sum_of_samples: 0, + squared_mean: 0.0, + p50: Some(Quantile::p50()), + p90: Some(Quantile::p90()), + p99: Some(Quantile::p99()), + }, )]), metric_type: MetricType::Gauge, }], @@ -2012,6 +2241,13 @@ mod tests { Distribution { bins: vec![0.0, 1.0, 2.0], counts: vec![0; 3], + min: Some(0.0), + max: Some(2.0), + sum_of_samples: 0.0, + squared_mean: 0.0, + p50: Some(Quantile::p50()), + p90: Some(Quantile::p90()), + p99: Some(Quantile::p99()), }, )]), metric_type: MetricType::Gauge, diff --git a/oximeter/db/src/oxql/query/mod.rs b/oximeter/db/src/oxql/query/mod.rs index 1c4383d68d..40a6c82f93 100644 --- a/oximeter/db/src/oxql/query/mod.rs +++ b/oximeter/db/src/oxql/query/mod.rs @@ -29,13 +29,45 @@ use std::time::Duration; pub mod special_idents { use oximeter::DatumType; + macro_rules! gen_marker { + ($p:expr, $field:expr) => { + concat!("p", $p, "_", $field) + }; + } + pub const TIMESTAMP: &str = "timestamp"; pub const START_TIME: &str = "start_time"; pub const DATUM: &str = "datum"; pub const BINS: &str = "bins"; pub const COUNTS: &str = "counts"; + pub const MIN: &str = "min"; + pub const MAX: &str = "max"; + pub const SUM_OF_SAMPLES: &str = "sum_of_samples"; + pub const SQUARED_MEAN: &str = "squared_mean"; pub const DATETIME64: &str = "DateTime64"; pub const ARRAYU64: &str = "Array[u64]"; + pub const ARRAYFLOAT64: &str = "Array[f64]"; + pub const ARRAYINT64: &str = "Array[i64]"; + pub const FLOAT64: &str = "f64"; + pub const UINT64: &str = "u64"; + + pub const DISTRIBUTION_IDENTS: [&str; 15] = [ + "bins", + "counts", + "min", + "max", + "sum_of_samples", + "squared_mean", + gen_marker!("50", "marker_heights"), + gen_marker!("50", "marker_positions"), + gen_marker!("50", "desired_marker_positions"), + gen_marker!("90", "marker_heights"), + gen_marker!("90", "marker_positions"), + gen_marker!("90", "desired_marker_positions"), + gen_marker!("99", "marker_heights"), + gen_marker!("99", "marker_positions"), + gen_marker!("99", "desired_marker_positions"), + ]; pub fn array_type_name_from_histogram_type( type_: DatumType, diff --git a/oximeter/db/src/sql/mod.rs b/oximeter/db/src/sql/mod.rs index f3082dcaa5..e434608b1c 100644 --- a/oximeter/db/src/sql/mod.rs +++ b/oximeter/db/src/sql/mod.rs @@ -610,12 +610,31 @@ impl RestrictedQuery { // Return the required measurement columns for a specific datum type. // // Scalar measurements have only a timestamp and datum. Cumulative counters - // have those plus a start_time. And histograms have those plus the bins. + // have those plus a start_time. And histograms have those plus the bins, + // counts, min, max, sum of samples, sum of squares, and quantile arrays. fn datum_type_to_columns( datum_type: &DatumType, ) -> &'static [&'static str] { if datum_type.is_histogram() { - &["start_time", "timestamp", "bins", "counts"] + &[ + "start_time", + "timestamp", + "bins", + "counts", + "min", + "max", + "sum_of_samples", + "squared_mean", + "p50_marker_heights", + "p50_marker_positions", + "p50_desired_marker_positions", + "p90_marker_heights", + "p90_marker_positions", + "p90_desired_marker_positions", + "p99_marker_heights", + "p99_marker_positions", + "p99_desired_marker_positions", + ] } else if datum_type.is_cumulative() { &["start_time", "timestamp", "datum"] } else { diff --git a/oximeter/instruments/src/http.rs b/oximeter/instruments/src/http.rs index dcbaf65c06..4bc6cf8677 100644 --- a/oximeter/instruments/src/http.rs +++ b/oximeter/instruments/src/http.rs @@ -12,8 +12,10 @@ use dropshot::{ use futures::Future; use http::StatusCode; use http::Uri; -use oximeter::histogram::Histogram; -use oximeter::{Metric, MetricsError, Producer, Sample, Target}; +use oximeter::{ + histogram::Histogram, histogram::Record, Metric, MetricsError, Producer, + Sample, Target, +}; use std::collections::BTreeMap; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; diff --git a/oximeter/oximeter/Cargo.toml b/oximeter/oximeter/Cargo.toml index 2445e0483a..0fe52bc4ac 100644 --- a/oximeter/oximeter/Cargo.toml +++ b/oximeter/oximeter/Cargo.toml @@ -11,6 +11,7 @@ workspace = true [dependencies] bytes = { workspace = true, features = [ "serde" ] } chrono.workspace = true +float-ord.workspace = true num.workspace = true omicron-common.workspace = true oximeter-macro-impl.workspace = true @@ -25,6 +26,8 @@ omicron-workspace-hack.workspace = true [dev-dependencies] approx.workspace = true +rand = { workspace = true, features = ["std_rng"] } +rand_distr.workspace = true rstest.workspace = true serde_json.workspace = true trybuild.workspace = true diff --git a/oximeter/oximeter/src/histogram.rs b/oximeter/oximeter/src/histogram.rs index 82b9916153..9ce7b65121 100644 --- a/oximeter/oximeter/src/histogram.rs +++ b/oximeter/oximeter/src/histogram.rs @@ -4,23 +4,28 @@ //! Types for managing metrics that are histograms. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company +use super::Quantile; +use super::QuantileError; use chrono::DateTime; use chrono::Utc; use num::traits::Bounded; use num::traits::FromPrimitive; use num::traits::Num; use num::traits::ToPrimitive; +use num::traits::Zero; +use num::CheckedAdd; +use num::CheckedMul; use num::Float; use num::Integer; use num::NumCast; use schemars::JsonSchema; -use serde::de::DeserializeOwned; use serde::Deserialize; use serde::Serialize; use std::cmp::Ordering; use std::num::NonZeroUsize; +use std::ops::AddAssign; use std::ops::Bound; use std::ops::Range; use std::ops::RangeBounds; @@ -37,24 +42,34 @@ pub trait HistogramSupport: + Bounded + JsonSchema + Serialize - + DeserializeOwned + Clone + Num + + Zero + FromPrimitive + ToPrimitive + + AddAssign + NumCast + 'static { type Power; + type Width: HistogramAdditiveWidth; /// Return true if `self` is a finite number, not NAN or infinite. fn is_finite(&self) -> bool; } +/// Used for designating the subset of types that can be used as the width for +/// summing up values in a histogram. +pub trait HistogramAdditiveWidth: HistogramSupport {} + +impl HistogramAdditiveWidth for i64 {} +impl HistogramAdditiveWidth for f64 {} + macro_rules! impl_int_histogram_support { ($($type:ty),+) => { $( impl HistogramSupport for $type { type Power = u16; + type Width = i64; fn is_finite(&self) -> bool { true } @@ -70,6 +85,7 @@ macro_rules! impl_float_histogram_support { $( impl HistogramSupport for $type { type Power = i16; + type Width = f64; fn is_finite(&self) -> bool { <$type>::is_finite(*self) } @@ -93,8 +109,10 @@ pub enum HistogramError { NonmonotonicBins, /// A non-finite was encountered, either as a bin edge or a sample. - #[error("Bin edges and samples must be finite values, found: {0:?}")] - NonFiniteValue(String), + #[error( + "Bin edges and samples must be finite values, not Infinity or NaN" + )] + NonFiniteValue, /// Error returned when two neighboring bins are not adjoining (there's space between them) #[error("Neigboring bins {left} and {right} are not adjoining")] @@ -104,8 +122,13 @@ pub enum HistogramError { #[error("Bin and count arrays must have the same size, found {n_bins} and {n_counts}")] ArraySizeMismatch { n_bins: usize, n_counts: usize }, + /// Error returned when a quantization error occurs. #[error("Quantization error")] Quantization(#[from] QuantizationError), + + /// Error returned when a quantile error occurs. + #[error("Quantile error")] + Quantile(#[from] QuantileError), } /// Errors occurring during quantizated bin generation. @@ -272,6 +295,10 @@ pub struct Bin { pub count: u64, } +/// Internal, creation-specific newtype wrapper around `Vec>` to +/// implement conversion(s). +struct Bins(Vec>); + /// Histogram metric /// /// A histogram maintains the count of any number of samples, over a set of bins. Bins are @@ -333,12 +360,139 @@ pub struct Bin { // `Histogram::with_log_linear_bins()` are exactly the ones expected. #[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema)] #[schemars(rename = "Histogram{T}")] -pub struct Histogram { +pub struct Histogram +where + T: HistogramSupport, +{ + /// The start time of the histogram. start_time: DateTime, + /// The bins of the histogram. bins: Vec>, + /// The total number of samples in the histogram. n_samples: u64, + /// The minimum value of all samples in the histogram. + min: T, + /// The maximum value of all samples in the histogram. + max: T, + /// The sum of all samples in the histogram. + sum_of_samples: T::Width, + /// M2 for Welford's algorithm for variance calculation. + /// + /// Read about [Welford's algorithm](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm) + /// for more information on the algorithm. + squared_mean: f64, + /// p50 Quantile + p50: Quantile, + /// p95 Quantile + p90: Quantile, + /// p99 Quantile + p99: Quantile, } +/// A trait for recording samples into a histogram. +pub trait Record { + /// Add a new sample into the histogram. + /// + /// This bumps the internal counter at the bin containing `value`. An `Err` is returned if the + /// sample is not within the distribution's support (non-finite). + fn sample(&mut self, value: T) -> Result<(), HistogramError>; +} + +macro_rules! impl_int_sample { + ($($type:ty),+) => { + $( + impl Record<$type> for Histogram<$type> where $type: HistogramSupport + Integer + CheckedAdd + CheckedMul { + fn sample(&mut self, value: $type) -> Result<(), HistogramError> { + ensure_finite(value)?; + + if self.n_samples == 0 { + self.min = <$type>::max_value(); + self.max = <$type>::min_value(); + } + + // For squared mean (M2) calculation, before we update the + // count. + let value_f = value as f64; + let current_mean = self.mean(); + + let index = self + .bins + .binary_search_by(|bin| bin.range.cmp(&value).reverse()) + .unwrap(); // The `ensure_finite` call above catches values that don't end up in a bin + self.bins[index].count += 1; + self.n_samples += 1; + self.min = self.min.min(value); + self.max = self.max.max(value); + self.sum_of_samples = self.sum_of_samples.saturating_add(value as i64); + + let delta = value_f - current_mean; + let updated_mean = current_mean + delta / (self.n_samples as f64); + let delta2 = value_f - updated_mean; + self.squared_mean += (delta * delta2); + + self.p50.append(value)?; + self.p90.append(value)?; + self.p99.append(value)?; + Ok(()) + } + } + )+ + } +} + +impl_int_sample! { i8, u8, i16, u16, i32, u32, i64, u64 } + +macro_rules! impl_float_sample { + ($($type:ty),+) => { + $( + impl Record<$type> for Histogram<$type> where $type: HistogramSupport + Float { + fn sample(&mut self, value: $type) -> Result<(), HistogramError> { + ensure_finite(value)?; + + if self.n_samples == 0 { + self.min = <$type as num::Bounded>::max_value(); + self.max = <$type as num::Bounded>::min_value(); + } + + // For squared mean (M2) calculation, before we update the + // count. + let value_f = value as f64; + let current_mean = self.mean(); + + let index = self + .bins + .binary_search_by(|bin| bin.range.cmp(&value).reverse()) + .unwrap(); // The `ensure_finite` call above catches values that don't end up in a bin + self.bins[index].count += 1; + self.n_samples += 1; + + if value < self.min { + self.min = value; + } + if value > self.max { + self.max = value; + } + + self.sum_of_samples += value_f; + + let delta = value_f - current_mean; + let updated_mean = current_mean + delta / (self.n_samples as f64); + let delta2 = value_f - updated_mean; + self.squared_mean += (delta * delta2); + + self.p50.append(value)?; + self.p90.append(value)?; + self.p99.append(value)?; + + Ok(()) + } + } + )+ + } +} + +impl_float_sample! { f32, f64 } + impl Histogram where T: HistogramSupport, @@ -435,67 +589,82 @@ where if let Bound::Excluded(end) = bins_.last().unwrap().range.end_bound() { ensure_finite(*end)?; } - Ok(Self { start_time: Utc::now(), bins: bins_, n_samples: 0 }) + Ok(Self { + start_time: Utc::now(), + bins: bins_, + n_samples: 0, + min: T::zero(), + max: T::zero(), + sum_of_samples: T::Width::zero(), + squared_mean: 0.0, + p50: Quantile::p50(), + p90: Quantile::p90(), + p99: Quantile::p99(), + }) } /// Construct a new histogram from left bin edges. /// - /// The left edges of the bins must be specified as a non-empty, monotonically increasing - /// slice. An `Err` is returned if either constraint is violated. + /// The left edges of the bins must be specified as a non-empty, + /// monotonically increasing slice. An `Err` is returned if either + /// constraint is violated. pub fn new(left_edges: &[T]) -> Result { - let mut items = left_edges.iter(); - let mut bins = Vec::with_capacity(left_edges.len() + 1); - let mut current = *items.next().ok_or(HistogramError::EmptyBins)?; - ensure_finite(current)?; - let min = ::min_value(); - if current > min { - // Bin greater than the minimum was specified, insert a new one from `MIN..current`. - bins.push(Bin { range: BinRange::range(min, current), count: 0 }); - } else if current == min { - // An edge *at* the minimum was specified. Consume it, and insert a bin from - // `MIN..next`, if one exists. If one does not, or if this is the last item, the - // following loop will not be entered. - let next = - items.next().cloned().unwrap_or_else(::max_value); - bins.push(Bin { range: BinRange::range(min, next), count: 0 }); - current = next; - } - for &next in items { - if current < next { - ensure_finite(next)?; - bins.push(Bin { - range: BinRange::range(current, next), - count: 0, - }); - current = next; - } else if current >= next { - return Err(HistogramError::NonmonotonicBins); - } else { - return Err(HistogramError::NonFiniteValue(format!( - "{:?}", - current - ))); - } + let bins = Bins::try_from(left_edges)?; + Ok(Self { + start_time: Utc::now(), + bins: bins.0, + n_samples: 0, + min: T::zero(), + max: T::zero(), + sum_of_samples: T::Width::zero(), + squared_mean: 0.0, + p50: Quantile::p50(), + p90: Quantile::p90(), + p99: Quantile::p99(), + }) + } + + /// Construct a new histogram with the given struct information, including + /// bins, counts, and quantiles. + #[allow(clippy::too_many_arguments)] + pub fn from_parts( + start_time: DateTime, + bins: Vec, + counts: Vec, + min: T, + max: T, + sum_of_samples: T::Width, + squared_mean: f64, + p50: Quantile, + p90: Quantile, + p99: Quantile, + ) -> Result { + if bins.len() != counts.len() { + return Err(HistogramError::ArraySizeMismatch { + n_bins: bins.len(), + n_counts: counts.len(), + }); } - if current < ::max_value() { - bins.push(Bin { range: BinRange::from(current), count: 0 }); + + let mut bins = Bins::try_from(bins.as_slice())?.0; + let mut n_samples = 0; + for (bin, count) in bins.iter_mut().zip(counts.into_iter()) { + bin.count = count; + n_samples += count; } - Ok(Self { start_time: Utc::now(), bins, n_samples: 0 }) - } - /// Add a new sample into the histogram. - /// - /// This bumps the internal counter at the bin containing `value`. An `Err` is returned if the - /// sample is not within the distribution's support (non-finite). - pub fn sample(&mut self, value: T) -> Result<(), HistogramError> { - ensure_finite(value)?; - let index = self - .bins - .binary_search_by(|bin| bin.range.cmp(&value).reverse()) - .unwrap(); // The `ensure_finite` call above catches values that don't end up in a bin - self.bins[index].count += 1; - self.n_samples += 1; - Ok(()) + Ok(Self { + start_time, + bins, + n_samples, + min, + max, + sum_of_samples, + squared_mean, + p50, + p90, + p99, + }) } /// Return the total number of samples contained in the histogram. @@ -508,32 +677,18 @@ where self.bins.len() } - /// Iterate over the bins of the histogram. - pub fn iter(&self) -> impl Iterator> { - self.bins.iter() - } - - /// Get the bin at the given index. - pub fn get(&self, index: usize) -> Option<&Bin> { - self.bins.get(index) - } - - /// Generate paired arrays with the left bin edges and the counts, for each bin. - /// - /// The returned edges are always left-inclusive, by construction of the histogram. - pub fn to_arrays(&self) -> (Vec, Vec) { + /// Return the bins of the histogram. + pub fn bins_and_counts(&self) -> (Vec, Vec) { let mut bins = Vec::with_capacity(self.n_bins()); let mut counts = Vec::with_capacity(self.n_bins()); - - // The first bin may either be BinRange::To or BinRange::Range. for bin in self.bins.iter() { match bin.range { BinRange::Range { start, .. } => { bins.push(start); - }, - BinRange::RangeFrom{start} => { + } + BinRange::RangeFrom { start} => { bins.push(start); - }, + } _ => unreachable!("No bins in a constructed histogram should be of type RangeTo"), } counts.push(bin.count); @@ -541,33 +696,183 @@ where (bins, counts) } - /// Construct a histogram from a start time and paired arrays with the left bin-edge and counts. - pub fn from_arrays( - start_time: DateTime, - bins: Vec, - counts: Vec, - ) -> Result { - if bins.len() != counts.len() { - return Err(HistogramError::ArraySizeMismatch { - n_bins: bins.len(), - n_counts: counts.len(), - }); + /// Return the minimum value of inputs to the histogram. + pub fn min(&self) -> T { + self.min + } + + /// Return the maximum value of all inputs to the histogram. + pub fn max(&self) -> T { + self.max + } + + /// Return the sum of all inputs to the histogram. + pub fn sum_of_samples(&self) -> T::Width { + self.sum_of_samples + } + + /// Return the squared mean (M2) of all inputs to the histogram. + pub fn squared_mean(&self) -> f64 { + self.squared_mean + } + + /// Return the mean of all inputs/samples in the histogram. + pub fn mean(&self) -> f64 { + if self.n_samples() > 0 { + self.sum_of_samples + .to_f64() + .map(|sum| sum / (self.n_samples() as f64)) + .unwrap() + } else { + 0. } - let mut hist = Self::new(&bins)?; - hist.start_time = start_time; - let mut n_samples = 0; - for (bin, count) in hist.bins.iter_mut().zip(counts.into_iter()) { - bin.count = count; - n_samples += count; + } + + /// Return the variance for inputs to the histogram based on the Welford's + /// algorithm, using the squared mean (M2). + /// + /// Returns `None` if there are fewer than two samples. + pub fn variance(&self) -> Option { + (self.n_samples() > 1) + .then(|| self.squared_mean / (self.n_samples() as f64)) + } + + /// Return the sample variance for inputs to the histogram based on the + /// Welford's algorithm, using the squared mean (M2). + /// + /// Returns `None` if there are fewer than two samples. + pub fn sample_variance(&self) -> Option { + (self.n_samples() > 1) + .then(|| self.squared_mean / ((self.n_samples() - 1) as f64)) + } + + /// Return the standard deviation for inputs to the histogram. + /// + /// This is a biased (as a consequence of Jensen’s inequality), estimate of + /// the population deviation that returns the standard deviation of the + /// samples seen by the histogram. + /// + /// Returns `None` if the variance is `None`, i.e., if there are fewer than + /// two samples. + pub fn std_dev(&self) -> Option { + match self.variance() { + Some(variance) => Some(variance.sqrt()), + None => None, } - hist.n_samples = n_samples; - Ok(hist) } - /// Return the start time for this histogram + /// Return the "corrected" sample standard deviation for inputs to the + /// histogram. + /// + /// This is an unbiased estimate of the population deviation, applying + /// Bessel's correction, which corrects the bias in the estimation of the + /// population variance, and some, but not all of the bias in the estimation + /// of the population standard deviation. + /// + /// Returns `None` if the variance is `None`, i.e., if there are fewer than + /// two samples. + pub fn sample_std_dev(&self) -> Option { + match self.sample_variance() { + Some(variance) => Some(variance.sqrt()), + None => None, + } + } + + /// Iterate over the bins of the histogram. + pub fn iter(&self) -> impl Iterator> { + self.bins.iter() + } + + /// Get the bin at the given index. + pub fn get(&self, index: usize) -> Option<&Bin> { + self.bins.get(index) + } + + /// Return the start time for this histogram. pub fn start_time(&self) -> DateTime { self.start_time } + + /// Set the start time for this histogram. + pub fn set_start_time(&mut self, start_time: DateTime) { + self.start_time = start_time; + } + + /// Return the p50 quantile for the histogram. + pub fn p50q(&self) -> Quantile { + self.p50 + } + + /// Return the p90 quantile for the histogram. + pub fn p90q(&self) -> Quantile { + self.p90 + } + + /// Return the p99 quantile for the histogram. + pub fn p99q(&self) -> Quantile { + self.p99 + } + + /// Return the p50 estimate for the histogram. + pub fn p50(&self) -> Result { + self.p50.estimate() + } + + /// Return the p90 estimate for the histogram. + pub fn p90(&self) -> Result { + self.p90.estimate() + } + + /// Return the p99 estimate for the histogram. + pub fn p99(&self) -> Result { + self.p99.estimate() + } +} + +impl TryFrom<&[T]> for Bins +where + T: HistogramSupport, +{ + type Error = HistogramError; + + fn try_from(left_edges: &[T]) -> Result { + let mut items = left_edges.iter(); + let mut bins: Vec> = Vec::with_capacity(left_edges.len() + 1); + let mut current: T = *items.next().ok_or(HistogramError::EmptyBins)?; + ensure_finite(current)?; + let min: T = ::min_value(); + if current > min { + // Bin greater than the minimum was specified, insert a new one from `MIN..current`. + bins.push(Bin { range: BinRange::range(min, current), count: 0 }); + } else if current == min { + // An edge *at* the minimum was specified. Consume it, and insert a bin from + // `MIN..next`, if one exists. If one does not, or if this is the last item, the + // following loop will not be entered. + let next: T = + items.next().cloned().unwrap_or_else(::max_value); + bins.push(Bin { range: BinRange::range(min, next), count: 0 }); + current = next; + } + for &next in items { + if current < next { + ensure_finite(next)?; + bins.push(Bin { + range: BinRange::range(current, next), + count: 0, + }); + current = next; + } else if current >= next { + return Err(HistogramError::NonmonotonicBins); + } else { + return Err(HistogramError::NonFiniteValue); + } + } + if current < ::max_value() { + bins.push(Bin { range: BinRange::from(current), count: 0 }); + } + + Ok(Bins(bins)) + } } impl Histogram @@ -871,7 +1176,7 @@ where if value.is_finite() { Ok(()) } else { - Err(HistogramError::NonFiniteValue(format!("{:?}", value))) + Err(HistogramError::NonFiniteValue) } } @@ -938,20 +1243,77 @@ mod tests { "Histogram should have 1 more bin than bin edges specified" ); assert_eq!(hist.n_samples(), 0, "Histogram should init with 0 samples"); - - let samples = [-10i64, 0, 1, 10, 50]; + let max_sample = 100; + let min_sample = -10i64; + let samples = [min_sample, 0, 1, 10, max_sample]; let expected_counts = [1u64, 2, 1, 1]; for (i, sample) in samples.iter().enumerate() { hist.sample(*sample).unwrap(); let count = i as u64 + 1; + let current_sum = samples[..=i].iter().sum::() as f64; + let current_mean = current_sum / count as f64; + let current_std_dev = (samples[..=i] + .iter() + .map(|x| (*x as f64 - current_mean).powi(2)) + .sum::() + / count as f64) + .sqrt(); + let current_sample_std_dev = (samples[..=i] + .iter() + .map(|x| (*x as f64 - current_mean).powi(2)) + .sum::() + / (count - 1) as f64) + .sqrt(); assert_eq!( hist.n_samples(), count, "Histogram should have {} sample(s)", count ); + + if count > 0 { + assert_eq!( + hist.mean(), + current_mean, + "Histogram should have a mean of {}", + current_mean + ); + } else { + assert!(hist.mean().is_zero()); + } + + if count > 1 { + assert_eq!( + hist.std_dev().unwrap(), + current_std_dev, + "Histogram should have a sample standard deviation of {}", + current_std_dev + ); + assert_eq!( + hist.sample_std_dev().unwrap(), + current_sample_std_dev, + "Histogram should have a sample standard deviation of {}", + current_sample_std_dev + ); + } else { + assert!(hist.std_dev().is_none()); + assert!(hist.sample_std_dev().is_none()); + } } + assert_eq!( + hist.min(), + min_sample, + "Histogram should have a minimum value of {}", + min_sample + ); + assert_eq!( + hist.max(), + max_sample, + "Histogram should have a maximum value of {}", + max_sample + ); + for (bin, &expected_count) in hist.iter().zip(expected_counts.iter()) { assert_eq!( bin.count, expected_count, @@ -959,6 +1321,15 @@ mod tests { bin.range, expected_count, bin.count ); } + + let p50 = hist.p50().unwrap(); + assert_eq!(p50, 1.0, "P50 should be 1.0, but found {}", p50); + + let p90 = hist.p90().unwrap(); + assert_eq!(p90, 100.0, "P90 should be 100.0, but found {}", p90); + + let p99 = hist.p99().unwrap(); + assert_eq!(p99, 100.0, "P99 should be 100.0, but found {}", p99); } #[test] @@ -972,6 +1343,45 @@ mod tests { assert_eq!(data[2].range, BinRange::from(10)); } + #[test] + fn test_histogram_construct_with() { + let mut hist = Histogram::new(&[0, 10, 20]).unwrap(); + hist.sample(1).unwrap(); + hist.sample(11).unwrap(); + + let (bins, counts) = hist.bins_and_counts(); + assert_eq!( + bins.len(), + counts.len(), + "Bins and counts should have the same size" + ); + assert_eq!( + bins.len(), + hist.n_bins(), + "Paired-array bins should be of the same length as the histogram" + ); + assert_eq!(counts, &[0, 1, 1, 0], "Paired-array counts are incorrect"); + assert_eq!(hist.n_samples(), 2); + + let rebuilt = Histogram::from_parts( + hist.start_time(), + bins, + counts, + hist.min(), + hist.max(), + hist.sum_of_samples(), + hist.squared_mean(), + hist.p50, + hist.p90, + hist.p99, + ) + .unwrap(); + assert_eq!( + hist, rebuilt, + "Histogram reconstructed from paired arrays is not correct" + ); + } + #[test] fn test_histogram_with_overlapping_bins() { let bins = &[(..1_u64).into(), (0..10).into()]; @@ -1082,33 +1492,6 @@ mod tests { ); } - #[test] - fn test_histogram_to_arrays() { - let mut hist = Histogram::new(&[0, 10, 20]).unwrap(); - hist.sample(1).unwrap(); - hist.sample(11).unwrap(); - - let (bins, counts) = hist.to_arrays(); - assert_eq!( - bins.len(), - counts.len(), - "Bins and counts should have the same size" - ); - assert_eq!( - bins.len(), - hist.n_bins(), - "Paired-array bins should be of the same length as the histogram" - ); - assert_eq!(counts, &[0, 1, 1, 0], "Paired-array counts are incorrect"); - - let rebuilt = - Histogram::from_arrays(hist.start_time(), bins, counts).unwrap(); - assert_eq!( - hist, rebuilt, - "Histogram reconstructed from paired arrays is not correct" - ); - } - #[test] fn test_span_decades() { let hist = Histogram::::span_decades(0, 3).unwrap(); diff --git a/oximeter/oximeter/src/lib.rs b/oximeter/oximeter/src/lib.rs index 1855762abe..cd5c5adf8c 100644 --- a/oximeter/oximeter/src/lib.rs +++ b/oximeter/oximeter/src/lib.rs @@ -108,11 +108,14 @@ pub use oximeter_macro_impl::*; extern crate self as oximeter; pub mod histogram; +pub mod quantile; pub mod schema; pub mod test_util; pub mod traits; pub mod types; +pub use quantile::Quantile; +pub use quantile::QuantileError; pub use schema::FieldSchema; pub use schema::TimeseriesName; pub use schema::TimeseriesSchema; diff --git a/oximeter/oximeter/src/quantile.rs b/oximeter/oximeter/src/quantile.rs new file mode 100644 index 0000000000..8bc144bb0a --- /dev/null +++ b/oximeter/oximeter/src/quantile.rs @@ -0,0 +1,592 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Data structure for expressing quantile estimation. +//! This is based on the P² heuristic algorithm for dynamic +//! calculation of the median and other quantiles. The estimates +//! are produced dynamically as the observations are generated. +//! The observations are not stored; therefore, the algorithm has +//! a very small and fixed storage requirement regardless of the +//! number of observations. +//! +//! Read the [paper](https://www.cs.wustl.edu/~jain/papers/ftp/psqr.pdf) +//! for more specifics. + +// Copyright 2024 Oxide Computer Company + +use crate::traits::HistogramSupport; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use thiserror::Error; + +const FILLED_MARKER_LEN: usize = 5; + +/// Errors related to constructing a `Quantile` instance or estimating the +/// p-quantile. +#[derive( + Debug, Clone, Error, JsonSchema, Serialize, Deserialize, PartialEq, +)] +#[serde(tag = "type", content = "content", rename_all = "snake_case")] +pub enum QuantileError { + /// The p value must be in the range [0, 1]. + #[error("The p value must be in the range [0, 1].")] + InvalidPValue, + /// Quantile estimation is not possible without samples. + #[error("Quantile estimation is not possible without any samples.")] + InsufficientSampleSize, + /// A non-finite was encountered, either as a bin edge or a sample. + #[error("Samples must be finite values, not Infinity or NaN.")] + NonFiniteValue, +} + +/// Structure for estimating the p-quantile of a population. +/// +/// This is based on the P² algorithm for estimating quantiles using +/// constant space. +/// +/// The algorithm consists of maintaining five markers: the +/// minimum, the p/2-, p-, and (1 + p)/2 quantiles, and the maximum. +#[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] +pub struct Quantile { + /// The p value for the quantile. + p: f64, + /// The heights of the markers. + marker_heights: [f64; FILLED_MARKER_LEN], + /// The positions of the markers. + /// + /// We track sample size in the 5th position, as useful observations won't + /// start until we've filled the heights at the 6th sample anyway + /// This does deviate from the paper, but it's a more useful representation + /// that works according to the paper's algorithm. + marker_positions: [u64; FILLED_MARKER_LEN], + /// The desired marker positions. + desired_marker_positions: [f64; FILLED_MARKER_LEN], +} + +impl Quantile { + /// Create a new `Quantile` instance. + /// + /// Returns a result containing the `Quantile` instance or an error. + /// + /// # Errors + /// + /// Returns [`QuantileError::InvalidPValue`] if the p value is not in the + /// range [0, 1]. + /// + /// # Examples + /// + /// ``` + /// use oximeter::Quantile; + /// let q = Quantile::new(0.5).unwrap(); + /// + /// assert_eq!(q.p(), 0.5); + /// assert_eq!(q.len(), 0); + /// ``` + pub fn new(p: f64) -> Result { + if p < 0. || p > 1. { + return Err(QuantileError::InvalidPValue); + } + + Ok(Self { + p, + marker_heights: [0.; FILLED_MARKER_LEN], + // We start with a sample size of 0. + marker_positions: [1, 2, 3, 4, 0], + // 1-indexed, which is like the paper, but + // used to keep track of the sample size without + // needing to do a separate count, use a Vec, + // or do any other kind of bookkeeping. + desired_marker_positions: [ + 1., + 1. + 2. * p, + 1. + 4. * p, + 3. + 2. * p, + 5., + ], + }) + } + + /// Create a new `Quantile` instance from the given a p-value, marker + /// heights and positions. + /// + /// # Examples + /// ``` + /// use oximeter::Quantile; + /// let q = Quantile::from_parts( + /// 0.5, + /// [0., 1., 2., 3., 4.], + /// [1, 2, 3, 4, 5], + /// [1., 3., 5., 7., 9.], + /// ); + /// ``` + pub fn from_parts( + p: f64, + marker_heights: [f64; FILLED_MARKER_LEN], + marker_positions: [u64; FILLED_MARKER_LEN], + desired_marker_positions: [f64; FILLED_MARKER_LEN], + ) -> Self { + Self { p, marker_heights, marker_positions, desired_marker_positions } + } + + /// Construct a `Quantile` instance for the 50th/median percentile. + pub fn p50() -> Self { + Self::new(0.5).unwrap() + } + + /// Construct a `Quantile` instance for the 90th percentile. + pub fn p90() -> Self { + Self::new(0.9).unwrap() + } + + /// Construct a `Quantile` instance for the 95th percentile. + pub fn p95() -> Self { + Self::new(0.95).unwrap() + } + + /// Construct a `Quantile` instance for the 99th percentile. + pub fn p99() -> Self { + Self::new(0.99).unwrap() + } + + /// Get the p value as a float. + pub fn p(&self) -> f64 { + self.p + } + + /// Return the sample size. + pub fn len(&self) -> u64 { + self.marker_positions[4] + } + + /// Determine if the number of samples in the population are empty. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Return the marker heights. + pub fn marker_heights(&self) -> [f64; FILLED_MARKER_LEN] { + self.marker_heights + } + + /// Return the marker positions. + pub fn marker_positions(&self) -> [u64; FILLED_MARKER_LEN] { + self.marker_positions + } + + /// Return the desired marker positions. + pub fn desired_marker_positions(&self) -> [f64; FILLED_MARKER_LEN] { + self.desired_marker_positions + } + + /// Estimate the p-quantile of the population. + /// + /// This is step B.4 in the P² algorithm. + /// + /// Returns a result containing the estimated p-quantile or an error. + /// + /// # Errors + /// + /// Returns [`QuantileError::InsufficientSampleSize`] if the sample size + /// is empty. + /// + /// # Examples + /// + /// ``` + /// use oximeter::Quantile; + /// let mut q = Quantile::new(0.5).unwrap(); + /// for o in 1..=100 { + /// q.append(o).unwrap(); + /// } + /// assert_eq!(q.estimate().unwrap(), 50.0); + /// ``` + pub fn estimate(&self) -> Result { + if self.is_empty() { + return Err(QuantileError::InsufficientSampleSize); + } + + if self.len() >= FILLED_MARKER_LEN as u64 { + return Ok(self.marker_heights[2]); + } + + // Try to find an index in heights that is correlated with the p value + // when we have less than 5 samples, but more than 0. + let mut heights = self.marker_heights; + float_ord::sort(&mut heights); + let idx = (heights.len() as f64 - 1.) * self.p(); + return Ok(heights[idx.round() as usize]); + } + + /// Append a value/observation to the population and adjust the heights. + /// + /// This comprises steps B.1, B.2, B.3 (adjust heights) in the P² algorithm, + /// including finding the cell k containing the input value and updating the + /// current and desired marker positions. + /// + /// Returns an empty result or an error. + /// + /// # Errors + /// + /// Returns [`QuantileError::NonFiniteValue`] if the value is not finite + /// when casting to a float. + /// + /// # Examples + /// + /// ``` + /// use oximeter::Quantile; + /// let mut q = Quantile::new(0.9).unwrap(); + /// q.append(10).unwrap(); + /// assert_eq!(q.len(), 1); + /// ``` + pub fn append(&mut self, value: T) -> Result<(), QuantileError> + where + T: HistogramSupport, + { + if !value.is_finite() { + return Err(QuantileError::NonFiniteValue); + } + // We've already checked that the value is finite. + let value_f = value.to_f64().unwrap(); + + if self.len() < FILLED_MARKER_LEN as u64 { + self.marker_heights[self.len() as usize] = value_f; + self.marker_positions[4] += 1; + if self.len() == FILLED_MARKER_LEN as u64 { + float_ord::sort(&mut self.marker_heights); + self.adaptive_init(); + } + return Ok(()); + } + + // Find the cell k containing the new value. + let k = match self.find_cell(value_f) { + Some(4) => { + self.marker_heights[4] = value_f; + 3 + } + Some(i) => i, + None => { + self.marker_heights[0] = value_f; + 0 + } + }; + + // Handle rounding issues as described in + // . + let count = self.len() as f64; + self.desired_marker_positions[1] = count * (self.p() / 2.) + 1.; + self.desired_marker_positions[2] = count * self.p() + 1.; + self.desired_marker_positions[3] = count * ((1. + self.p()) / 2.) + 1.; + self.desired_marker_positions[4] = count + 1.; + + for i in k + 1..FILLED_MARKER_LEN { + self.marker_positions[i] += 1; + } + + // Adjust height of markers adaptively to be more optimal for + // not just higher quantiles, but also lower ones. + // + // This is a deviation from the paper, taken from + // . + if self.p >= 0.5 { + for i in 1..4 { + self.adjust_heights(i) + } + } else { + for i in (1..4).rev() { + self.adjust_heights(i) + } + } + + Ok(()) + } + + /// Find the higher marker cell whose height is lower than the observation. + /// + /// Returns `None` if the value is less than the initial marker height. + fn find_cell(&mut self, value: f64) -> Option { + if value < self.marker_heights[0] { + None + } else { + Some( + self.marker_heights + .partition_point(|&height| height <= value) + .saturating_sub(1), + ) + } + } + + /// Adjust the heights of the markers if necessary. + /// + /// Step B.3 in the P² algorithm. Should be used within a loop + /// after appending a value to the population. + fn adjust_heights(&mut self, i: usize) { + let d = + self.desired_marker_positions[i] - self.marker_positions[i] as f64; + + if (d >= 1. + && self.marker_positions[i + 1] > self.marker_positions[i] + 1) + || (d <= -1. + && self.marker_positions[i - 1] < self.marker_positions[i] - 1) + { + let d_signum = d.signum(); + let q_prime = self.parabolic(i, d_signum); + if self.marker_heights[i - 1] < q_prime + && q_prime < self.marker_heights[i + 1] + { + self.marker_heights[i] = q_prime; + } else { + let q_prime = self.linear(i, d_signum); + self.marker_heights[i] = q_prime; + } + + // Update marker positions based on the sign of d. + if d_signum < 0. { + self.marker_positions[i] -= 1; + } else { + self.marker_positions[i] += 1; + } + } + } + + /// An implementation to adaptively initialize the marker heights and + /// positions, particularly useful for extreme quantiles (e.g., 0.99) + /// when estimating on a small sample size. + /// + /// Read + /// for more. + fn adaptive_init(&mut self) { + self.desired_marker_positions[..FILLED_MARKER_LEN] + .copy_from_slice(&self.marker_heights[..FILLED_MARKER_LEN]); + + self.marker_positions[1] = (1. + 2. * self.p()).round() as u64; + self.marker_positions[2] = (1. + 4. * self.p()).round() as u64; + self.marker_positions[3] = (3. + 2. * self.p()).round() as u64; + self.marker_heights[1] = self.desired_marker_positions + [self.marker_positions[1] as usize - 1]; + self.marker_heights[2] = self.desired_marker_positions + [self.marker_positions[2] as usize - 1]; + self.marker_heights[3] = self.desired_marker_positions + [self.marker_positions[3] as usize - 1]; + } + + /// Parabolic prediction for marker height. + fn parabolic(&self, i: usize, d_signum: f64) -> f64 { + let pos_diff1 = (self.marker_positions[i + 1] as i64 + - self.marker_positions[i - 1] as i64) + as f64; + + let pos_diff2 = (self.marker_positions[i + 1] as i64 + - self.marker_positions[i] as i64) as f64; + + let pos_diff3 = (self.marker_positions[i] as i64 + - self.marker_positions[i - 1] as i64) + as f64; + + let term1 = d_signum / pos_diff1; + let term2 = ((self.marker_positions[i] - self.marker_positions[i - 1]) + as f64 + + d_signum) + * (self.marker_heights[i + 1] - self.marker_heights[i]) + / pos_diff2; + let term3 = ((self.marker_positions[i + 1] - self.marker_positions[i]) + as f64 + - d_signum) + * (self.marker_heights[i] - self.marker_heights[i - 1]) + / pos_diff3; + + self.marker_heights[i] + term1 * (term2 + term3) + } + + /// Linear prediction for marker height. + fn linear(&self, i: usize, d_signum: f64) -> f64 { + let idx = if d_signum < 0. { i - 1 } else { i + 1 }; + self.marker_heights[i] + + d_signum * (self.marker_heights[idx] - self.marker_heights[i]) + / (self.marker_positions[idx] as i64 + - self.marker_positions[i] as i64) as f64 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use approx::assert_relative_eq; + use rand::{Rng, SeedableRng}; + use rand_distr::{Distribution, Normal}; + + fn test_quantile_impl( + p: f64, + observations: u64, + assert_on: Option, + ) -> Quantile { + let mut q = Quantile::new(p).unwrap(); + for o in 1..=observations { + q.append(o).unwrap(); + } + assert_eq!(q.p(), p); + assert_eq!(q.estimate().unwrap(), assert_on.unwrap_or(p * 100.)); + q + } + + #[test] + fn test_min_p() { + let observations = [3, 6, 7, 8, 8, 10, 13, 15, 16, 20]; + + let mut q = Quantile::new(0.0).unwrap(); + //assert_eq!(q.p(), 0.1); + for &o in observations.iter() { + q.append(o).unwrap(); + } + assert_eq!(q.estimate().unwrap(), 3.); + } + + /// Compared with C# implementation of P² algorithm. + #[test] + fn test_max_p() { + let observations = [3, 6, 7, 8, 8, 10, 13, 15, 16, 20]; + + let mut q = Quantile::new(1.).unwrap(); + assert_eq!(q.p(), 1.); + + for &o in observations.iter() { + q.append(o).unwrap(); + } + + assert_eq!(q.estimate().unwrap(), 11.66543209876543); + } + + /// Example observations from the P² paper. + #[test] + fn test_float_observations() { + let observations = [ + 0.02, 0.5, 0.74, 3.39, 0.83, 22.37, 10.15, 15.43, 38.62, 15.92, + 34.60, 10.28, 1.47, 0.40, 0.05, 11.39, 0.27, 0.42, 0.09, 11.37, + ]; + let mut q = Quantile::p50(); + for &o in observations.iter() { + q.append(o).unwrap(); + } + assert_eq!(q.marker_positions, [1, 6, 10, 16, 20]); + assert_eq!(q.desired_marker_positions, [0.02, 5.75, 10.5, 15.25, 20.0]); + assert_eq!(q.p(), 0.5); + assert_eq!(q.len(), 20); + assert_relative_eq!(q.estimate().unwrap(), 4.2462394088036435,); + } + + #[test] + fn test_rounding() { + let mut rng = rand::rngs::StdRng::seed_from_u64(42); + let mut estimator = Quantile::new(0.6).unwrap(); + + for _ in 0..100 { + let x: f64 = rng.gen(); + estimator.append(x).unwrap(); + } + + assert_relative_eq!( + estimator.estimate().unwrap(), + 0.552428024067269, + epsilon = f64::EPSILON + ); + } + + #[test] + fn test_integer_observations() { + let observations = 1..=100; + let mut q = Quantile::new(0.3).unwrap(); + for o in observations { + q.append(o).unwrap(); + } + assert_eq!(q.marker_positions, [1, 15, 30, 65, 100]); + assert_eq!( + q.desired_marker_positions, + [1.0, 15.85, 30.7, 65.35000000000001, 100.0] + ); + + assert_eq!(q.p(), 0.3); + assert_eq!(q.estimate().unwrap(), 30.0); + } + + #[test] + fn test_empty_observations() { + let q = Quantile::p50(); + assert_eq!( + q.estimate().err().unwrap(), + QuantileError::InsufficientSampleSize + ); + } + + #[test] + fn test_non_filled_observations() { + let mut q = Quantile::p99(); + let observations = [-10., 0., 1., 10.]; + for &o in observations.iter() { + q.append(o).unwrap(); + } + assert_eq!(q.estimate().unwrap(), 10.); + } + + #[test] + fn test_default_percentiles() { + test_quantile_impl(0.5, 100, None); + test_quantile_impl(0.9, 100, None); + test_quantile_impl(0.95, 100, None); + test_quantile_impl(0.99, 100, Some(97.)); + } + + #[test] + fn test_invalid_p_value() { + assert_eq!( + Quantile::new(1.01).err().unwrap(), + QuantileError::InvalidPValue + ); + assert_eq!( + Quantile::new(f64::MAX).err().unwrap(), + QuantileError::InvalidPValue + ); + } + + #[test] + fn test_find_cells() { + let mut q = test_quantile_impl(0.5, 5, Some(3.)); + assert_eq!(q.find_cell(0.), None); + assert_eq!(q.find_cell(7.), Some(4)); + assert_eq!(q.find_cell(4.), Some(3)); + assert_eq!(q.find_cell(3.5), Some(2)); + } + + /// Emulates baseline test in a basic Python implementation of the P² + /// algorithm: + /// . + #[test] + fn test_against_baseline_normal_distribution() { + let mu = 500.; + let sigma = 100.; + let size = 1000; + let p = 0.9; + + let normal = Normal::new(mu, sigma); + let mut observations = (0..size) + .map(|_| normal.unwrap().sample(&mut rand::thread_rng())) + .collect::>(); + float_ord::sort(&mut observations); + let idx = ((f64::from(size) - 1.) * p) as usize; + + let base_p_est = observations[idx]; + + let mut q = Quantile::new(p).unwrap(); + for o in observations.iter() { + q.append(*o).unwrap(); + } + let p_est = q.estimate().unwrap(); + + println!("Base: {}, Est: {}", base_p_est, p_est); + assert!( + (base_p_est - p_est).abs() < 10.0, + "Difference {} is not less than 10", + (base_p_est - p_est).abs() + ); + } +} diff --git a/oximeter/oximeter/src/test_util.rs b/oximeter/oximeter/src/test_util.rs index a9778d03bc..56992623d7 100644 --- a/oximeter/oximeter/src/test_util.rs +++ b/oximeter/oximeter/src/test_util.rs @@ -6,7 +6,7 @@ // Copyright 2021 Oxide Computer Company use crate::histogram; -use crate::histogram::Histogram; +use crate::histogram::{Histogram, Record}; use crate::types::{Cumulative, Sample}; use uuid::Uuid;