Skip to content

Commit

Permalink
Use ClickHouse bools and disable sparse serialization (#6997)
Browse files Browse the repository at this point in the history
- Use `Bool` data type in the `oximeter` database tables. Fixes #6994.
- Disable "sparse serialization" optimization for ClickHouse columns,
which are unlikely to help us an use an undocumented data format. This
turns off sparse serialization for test and deployed versions of
ClickHouse, both single-node and replicated. Fixes #6995.
  • Loading branch information
bnaecker authored Nov 6, 2024
1 parent 76bff3f commit 64eb23e
Show file tree
Hide file tree
Showing 17 changed files with 113 additions and 17 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions clickhouse-admin/types/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,11 @@ impl ReplicaConfig {
<!-- Controls how many tasks could be in the queue -->
<max_tasks_in_queue>1000</max_tasks_in_queue>
</distributed_ddl>
<!-- Disable sparse column serialization, which we expect to not need -->
<merge_tree>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
{macros}
{remote_servers}
{keepers}
Expand Down
5 changes: 5 additions & 0 deletions clickhouse-admin/types/testutils/replica-server-config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@
<max_tasks_in_queue>1000</max_tasks_in_queue>
</distributed_ddl>

<!-- Disable sparse column serialization, which we expect to not need -->
<merge_tree>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>

<macros>
<shard>1</shard>
<replica>1</replica>
Expand Down
4 changes: 4 additions & 0 deletions oximeter/db/schema/replicated/13/up1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ALTER TABLE oximeter.fields_bool_local
ON CLUSTER oximeter_cluster
ALTER COLUMN IF EXISTS
field_value TYPE Bool;
4 changes: 4 additions & 0 deletions oximeter/db/schema/replicated/13/up2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ALTER TABLE oximeter.measurements_bool_local
ON CLUSTER oximeter_cluster
ALTER COLUMN IF EXISTS
datum TYPE Nullable(Bool);
4 changes: 4 additions & 0 deletions oximeter/db/schema/replicated/13/up3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ALTER TABLE oximeter.fields_bool
ON CLUSTER oximeter_cluster
ALTER COLUMN IF EXISTS
field_value TYPE Bool;
4 changes: 4 additions & 0 deletions oximeter/db/schema/replicated/13/up4.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ALTER TABLE oximeter.measurements_bool
ON CLUSTER oximeter_cluster
ALTER COLUMN IF EXISTS
datum TYPE Nullable(Bool);
4 changes: 2 additions & 2 deletions oximeter/db/schema/replicated/db-init-2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool_local ON CLUSTER oximeter_
timeseries_name String,
timeseries_key UInt64,
timestamp DateTime64(9, 'UTC'),
datum Nullable(UInt8)
datum Nullable(Bool)
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bool_local', '{replica}')
ORDER BY (timeseries_name, timeseries_key, timestamp)
Expand Down Expand Up @@ -595,7 +595,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool_local ON CLUSTER oximeter_cluste
timeseries_name String,
timeseries_key UInt64,
field_name String,
field_value UInt8,
field_value Bool,
last_updated_at DateTime MATERIALIZED now()
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/fields_bool_local', '{replica}')
Expand Down
1 change: 1 addition & 0 deletions oximeter/db/schema/single-node/13/up1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE oximeter.fields_bool ALTER COLUMN IF EXISTS field_value TYPE Bool;
1 change: 1 addition & 0 deletions oximeter/db/schema/single-node/13/up2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE oximeter.measurements_bool ALTER COLUMN IF EXISTS datum TYPE Nullable(Bool);
4 changes: 2 additions & 2 deletions oximeter/db/schema/single-node/db-init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool
timeseries_name String,
timeseries_key UInt64,
timestamp DateTime64(9, 'UTC'),
datum Nullable(UInt8)
datum Nullable(Bool)
)
ENGINE = MergeTree()
ORDER BY (timeseries_name, timeseries_key, timestamp)
Expand Down Expand Up @@ -518,7 +518,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool
timeseries_name String,
timeseries_key UInt64,
field_name String,
field_value UInt8,
field_value Bool,
last_updated_at DateTime MATERIALIZED now()
)
ENGINE = ReplacingMergeTree()
Expand Down
2 changes: 1 addition & 1 deletion oximeter/db/src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3039,7 +3039,7 @@ mod tests {
client: &Client,
) -> Result<(), Error> {
let field = FieldValue::Bool(true);
let as_json = serde_json::Value::from(1_u64);
let as_json = serde_json::Value::from(true);
test_recall_field_value_impl(field, as_json, client).await?;
Ok(())
}
Expand Down
5 changes: 5 additions & 0 deletions oximeter/db/src/configs/replica_config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -484,4 +484,9 @@
<enabled>false</enabled>
<anonymize>false</anonymize>
</send_crash_reports>

<!-- Disable sparse column serialization -->
<merge_tree>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
</clickhouse>
20 changes: 10 additions & 10 deletions oximeter/db/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use uuid::Uuid;
/// - [`crate::Client::initialize_db_with_version`]
/// - [`crate::Client::ensure_schema`]
/// - The `clickhouse-schema-updater` binary in this crate
pub const OXIMETER_VERSION: u64 = 12;
pub const OXIMETER_VERSION: u64 = 13;

// Wrapper type to represent a boolean in the database.
//
Expand Down Expand Up @@ -392,7 +392,7 @@ macro_rules! declare_field_row {
}
}

declare_field_row! {BoolFieldRow, DbBool, "bool"}
declare_field_row! {BoolFieldRow, bool, "bool"}
declare_field_row! {I8FieldRow, i8, "i8"}
declare_field_row! {U8FieldRow, u8, "u8"}
declare_field_row! {I16FieldRow, i16, "i16"}
Expand Down Expand Up @@ -630,7 +630,7 @@ fn unroll_from_source(sample: &Sample) -> BTreeMap<String, Vec<String>> {
timeseries_name,
timeseries_key,
field_name,
field_value: DbBool::from(*inner),
field_value: *inner,
};
(row.table_name(), serde_json::to_string(&row).unwrap())
}
Expand Down Expand Up @@ -1468,7 +1468,7 @@ trait FromDbScalar {
const DATUM_TYPE: DatumType;
}

impl FromDbScalar for DbBool {
impl FromDbScalar for bool {
const DATUM_TYPE: DatumType = DatumType::Bool;
}

Expand Down Expand Up @@ -1633,7 +1633,7 @@ pub(crate) fn parse_measurement_from_row(
) -> (TimeseriesKey, Measurement) {
match datum_type {
DatumType::Bool => {
parse_timeseries_scalar_gauge_measurement::<DbBool>(line)
parse_timeseries_scalar_gauge_measurement::<bool>(line)
}
DatumType::I8 => parse_timeseries_scalar_gauge_measurement::<i8>(line),
DatumType::U8 => parse_timeseries_scalar_gauge_measurement::<u8>(line),
Expand Down Expand Up @@ -1757,11 +1757,11 @@ pub(crate) fn parse_field_select_row(
// Parse the field value as the expected type
let value = match expected_field.field_type {
FieldType::Bool => {
FieldValue::Bool(bool::from(DbBool::from(
FieldValue::Bool(
actual_field_value
.as_u64()
.expect("Expected a u64 for a boolean field from the database")
)))
.as_bool()
.expect("Expected a boolean field from the database")
)
}
FieldType::I8 => {
let wide = actual_field_value
Expand Down Expand Up @@ -2071,7 +2071,7 @@ mod tests {
assert_eq!(measurement.datum(), datum);
}

let line = r#"{"timeseries_key": 12, "timestamp": "2021-01-01 00:00:00.123456789", "datum": 1 }"#;
let line = r#"{"timeseries_key": 12, "timestamp": "2021-01-01 00:00:00.123456789", "datum": true }"#;
let datum = Datum::from(true);
run_test(line, &datum, timestamp);

Expand Down
4 changes: 4 additions & 0 deletions smf/clickhouse/config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,8 @@
<quotas>
<default/>
</quotas>

<merge_tree>
<ratio_of_defaults_for_sparse_serialization>1.0</ratio_of_defaults_for_sparse_serialization>
</merge_tree>
</clickhouse>
1 change: 1 addition & 0 deletions test-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ uuid.workspace = true
chrono.workspace = true
expectorate.workspace = true
gethostname.workspace = true
serde.workspace = true

[features]
seed-gen = ["dep:filetime"]
Expand Down
61 changes: 59 additions & 2 deletions test-utils/src/dev/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,9 @@ pub enum ClickHouseError {
Timeout,
}

const SINGLE_NODE_CONFIG_FILE: &str =
concat!(env!("CARGO_MANIFEST_DIR"), "/../smf/clickhouse/config.xml");

impl ClickHouseProcess {
/// Start a new single node ClickHouse server listening on the provided
/// ports.
Expand All @@ -395,8 +398,22 @@ impl ClickHouseProcess {
ports: ClickHousePorts,
) -> Result<ClickHouseReplica, anyhow::Error> {
let data_dir = ClickHouseDataDir::new(logctx)?;

// Copy the configuration file into the test directory, so we don't
// leave the preprocessed config file hanging around.
tokio::fs::copy(SINGLE_NODE_CONFIG_FILE, data_dir.config_file_path())
.await
.with_context(|| {
format!(
"failed to copy config file from {} to test data path {}",
SINGLE_NODE_CONFIG_FILE,
data_dir.config_file_path(),
)
})?;
let args = vec![
"server".to_string(),
"--config-file".to_string(),
data_dir.config_file_path().to_string(),
"--log-file".to_string(),
data_dir.log_path().to_string(),
"--errorlog-file".to_string(),
Expand Down Expand Up @@ -731,6 +748,10 @@ impl ClickHouseDataDir {
self.dir.path()
}

fn config_file_path(&self) -> Utf8PathBuf {
self.root_path().join("config.xml")
}

fn datastore_path(&self) -> Utf8PathBuf {
self.dir.path().join("datastore/")
}
Expand Down Expand Up @@ -1299,10 +1320,11 @@ async fn clickhouse_ready_from_log(
#[cfg(test)]
mod tests {
use super::{
discover_ready, wait_for_ports, ClickHouseError, ClickHousePorts,
CLICKHOUSE_HTTP_PORT_NEEDLE, CLICKHOUSE_READY,
discover_ready, wait_for_ports, ClickHouseDeployment, ClickHouseError,
ClickHousePorts, CLICKHOUSE_HTTP_PORT_NEEDLE, CLICKHOUSE_READY,
CLICKHOUSE_TCP_PORT_NEEDLE, CLICKHOUSE_TIMEOUT,
};
use crate::dev::test_setup_log;
use camino_tempfile::NamedUtf8TempFile;
use std::process::Stdio;
use std::{io::Write, sync::Arc, time::Duration};
Expand Down Expand Up @@ -1458,4 +1480,39 @@ mod tests {
let second = ClickHousePorts { http: 1, native: 1 };
ClickHousePorts { http: 2, native: 2 }.assert_consistent(&second);
}

#[derive(Debug, serde::Deserialize)]
struct Setting {
name: String,
value: String,
changed: u8,
}

#[tokio::test]
async fn sparse_serialization_is_disabled() {
let logctx = test_setup_log("sparse_serialization_is_disabled");
let mut db =
ClickHouseDeployment::new_single_node(&logctx).await.unwrap();
let client = reqwest::Client::new();
let url = format!("http://{}", db.http_address());
let setting: Setting = client
.post(url)
.body(
"SELECT name, value, changed \
FROM system.merge_tree_settings \
WHERE name == 'ratio_of_defaults_for_sparse_serialization' \
FORMAT JSONEachRow",
)
.send()
.await
.unwrap()
.json()
.await
.unwrap();
assert_eq!(setting.name, "ratio_of_defaults_for_sparse_serialization");
assert_eq!(setting.value, "1");
assert_eq!(setting.changed, 1);
db.cleanup().await.unwrap();
logctx.cleanup_successful();
}
}

0 comments on commit 64eb23e

Please sign in to comment.