From bf635a6c7cd0a4a9bc892a4dfc1196cfbea8659b Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Tue, 26 Dec 2023 15:45:31 +0800 Subject: [PATCH] feat: replace ahash with murmur3 on generating tsid (#3007) * feat: replace ahash with murmur3 on generating tsid Signed-off-by: Ruihang Xia * change random seed Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- Cargo.lock | 8 ++++++- src/metric-engine/Cargo.toml | 2 +- src/metric-engine/src/engine.rs | 3 --- src/metric-engine/src/engine/put.rs | 33 +++++++++++++++-------------- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ba1d36fc121b..4892674eb661 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4873,7 +4873,6 @@ dependencies = [ name = "metric-engine" version = "0.5.0" dependencies = [ - "ahash 0.8.6", "api", "aquamarine", "async-trait", @@ -4889,6 +4888,7 @@ dependencies = [ "datatypes", "lazy_static", "mito2", + "mur3", "object-store", "prometheus", "serde_json", @@ -5066,6 +5066,12 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +[[package]] +name = "mur3" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97af489e1e21b68de4c390ecca6703318bc1aa16e9733bcb62c089b73c6fbb1b" + [[package]] name = "mysql-common-derive" version = "0.30.2" diff --git a/src/metric-engine/Cargo.toml b/src/metric-engine/Cargo.toml index 2d820b9a2787..def5885cf908 100644 --- a/src/metric-engine/Cargo.toml +++ b/src/metric-engine/Cargo.toml @@ -5,7 +5,6 @@ edition.workspace = true license.workspace = true [dependencies] -ahash.workspace = true api.workspace = true aquamarine.workspace = true async-trait.workspace = true @@ -20,6 +19,7 @@ datafusion.workspace = true datatypes.workspace = true lazy_static = "1.4" mito2.workspace = true +mur3 = "0.1" object-store.workspace = true prometheus.workspace = true serde_json.workspace = true diff --git a/src/metric-engine/src/engine.rs b/src/metric-engine/src/engine.rs index 70038f528b60..5e610c3e91a3 100644 --- a/src/metric-engine/src/engine.rs +++ b/src/metric-engine/src/engine.rs @@ -40,9 +40,6 @@ use self::state::MetricEngineState; use crate::data_region::DataRegion; use crate::metadata_region::MetadataRegion; -/// Fixed random state for generating tsid -pub(crate) const RANDOM_STATE: ahash::RandomState = ahash::RandomState::with_seeds(1, 2, 3, 4); - #[cfg_attr(doc, aquamarine::aquamarine)] /// # Metric Engine /// diff --git a/src/metric-engine/src/engine/put.rs b/src/metric-engine/src/engine/put.rs index a0f187faaa7a..19f40a509975 100644 --- a/src/metric-engine/src/engine/put.rs +++ b/src/metric-engine/src/engine/put.rs @@ -14,7 +14,6 @@ use std::hash::{BuildHasher, Hash, Hasher}; -use ahash::RandomState; use api::v1::value::ValueData; use api::v1::{ColumnDataType, ColumnSchema, Row, Rows, SemanticType}; use common_telemetry::{error, info}; @@ -25,13 +24,16 @@ use store_api::metric_engine_consts::{ use store_api::region_request::{AffectedRows, RegionPutRequest}; use store_api::storage::{RegionId, TableId}; -use crate::engine::{MetricEngineInner, RANDOM_STATE}; +use crate::engine::MetricEngineInner; use crate::error::{ ColumnNotFoundSnafu, ForbiddenPhysicalAlterSnafu, LogicalRegionNotFoundSnafu, Result, }; use crate::metrics::FORBIDDEN_OPERATION_COUNT; use crate::utils::{to_data_region_id, to_metadata_region_id}; +// A random number +const TSID_HASH_SEED: u32 = 846793005; + impl MetricEngineInner { /// Dispatch region put request pub async fn put_region( @@ -174,9 +176,8 @@ impl MetricEngineInner { }); // fill internal columns - let mut random_state = RANDOM_STATE.clone(); for row in &mut rows.rows { - Self::fill_internal_columns(&mut random_state, table_id, &tag_col_indices, row); + Self::fill_internal_columns(table_id, &tag_col_indices, row); } Ok(()) @@ -184,12 +185,11 @@ impl MetricEngineInner { /// Fills internal columns of a row with table name and a hash of tag values. fn fill_internal_columns( - random_state: &mut RandomState, table_id: TableId, tag_col_indices: &[(usize, String)], row: &mut Row, ) { - let mut hasher = random_state.build_hasher(); + let mut hasher = mur3::Hasher128::with_seed(TSID_HASH_SEED); for (idx, name) in tag_col_indices { let tag = row.values[*idx].clone(); name.hash(&mut hasher); @@ -198,7 +198,8 @@ impl MetricEngineInner { string.hash(&mut hasher); } } - let hash = hasher.finish(); + // TSID is 64 bits, simply truncate the 128 bits hash + let (hash, _) = hasher.finish128(); // fill table id and tsid row.values.push(ValueData::U32Value(table_id).into()); @@ -247,15 +248,15 @@ mod tests { .unwrap(); let batches = RecordBatches::try_collect(stream).await.unwrap(); let expected = "\ -+-------------------------+----------------+------------+---------------------+-------+ -| greptime_timestamp | greptime_value | __table_id | __tsid | job | -+-------------------------+----------------+------------+---------------------+-------+ -| 1970-01-01T00:00:00 | 0.0 | 3 | 4844750677434873907 | tag_0 | -| 1970-01-01T00:00:00.001 | 1.0 | 3 | 4844750677434873907 | tag_0 | -| 1970-01-01T00:00:00.002 | 2.0 | 3 | 4844750677434873907 | tag_0 | -| 1970-01-01T00:00:00.003 | 3.0 | 3 | 4844750677434873907 | tag_0 | -| 1970-01-01T00:00:00.004 | 4.0 | 3 | 4844750677434873907 | tag_0 | -+-------------------------+----------------+------------+---------------------+-------+"; ++-------------------------+----------------+------------+----------------------+-------+ +| greptime_timestamp | greptime_value | __table_id | __tsid | job | ++-------------------------+----------------+------------+----------------------+-------+ +| 1970-01-01T00:00:00 | 0.0 | 3 | 12881218023286672757 | tag_0 | +| 1970-01-01T00:00:00.001 | 1.0 | 3 | 12881218023286672757 | tag_0 | +| 1970-01-01T00:00:00.002 | 2.0 | 3 | 12881218023286672757 | tag_0 | +| 1970-01-01T00:00:00.003 | 3.0 | 3 | 12881218023286672757 | tag_0 | +| 1970-01-01T00:00:00.004 | 4.0 | 3 | 12881218023286672757 | tag_0 | ++-------------------------+----------------+------------+----------------------+-------+"; assert_eq!(expected, batches.pretty_print().unwrap(), "physical region"); // read data from logical region