oxidecomputer · smklein · Oct 12, 2023 · Oct 11, 2023 · Oct 12, 2023 · Oct 12, 2023
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -199,6 +199,7 @@ headers = "0.3.9"
 heck = "0.4"
 hex = "0.4.3"
 hex-literal = "0.4.1"
+highway = "1.1.0"
 hkdf = "0.12.3"
 http = "0.2.9"
 httptest = "0.15.4"

diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml
@@ -8,10 +8,12 @@ license = "MPL-2.0"
 [dependencies]
 anyhow.workspace = true
 async-trait.workspace = true
+bcs.workspace = true
 bytes = { workspace = true, features = [ "serde" ] }
 chrono.workspace = true
 clap.workspace = true
 dropshot.workspace = true
+highway.workspace = true
 oximeter.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = [ "json" ] }
@@ -28,9 +30,11 @@ uuid.workspace = true
 omicron-workspace-hack.workspace = true
 
 [dev-dependencies]
+expectorate.workspace = true
 itertools.workspace = true
 omicron-test-utils.workspace = true
 slog-dtrace.workspace = true
+strum.workspace = true
 
 [[bin]]
 name = "oxdb"

diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs
@@ -335,11 +335,13 @@ pub(crate) fn timeseries_key_for<'a>(
     target_fields: impl Iterator<Item = &'a Field>,
     metric_fields: impl Iterator<Item = &'a Field>,
 /// A `Field` is a named aspect of a target or metric. 
 #[derive( 
     Clone, Debug, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize, 
 )] 
 pub struct Field { 
     pub name: String, 
     pub value: FieldValue, 
 } 
 // A helper type for representing the name and fields derived from targets and metrics 
 #[derive(Clone, Debug, PartialEq, JsonSchema, Deserialize, Serialize)] 
 pub(crate) struct FieldSet { 
     pub name: String, 
     pub fields: BTreeMap<String, Field>, 
 } 
 /// A `Field` is a named aspect of a target or metric. 
 #[derive( 
     Clone, Debug, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize, 
 )] 
 pub struct Field { 
     pub name: String, 
     pub value: FieldValue, 
 } 
 // A helper type for representing the name and fields derived from targets and metrics 
 #[derive(Clone, Debug, PartialEq, JsonSchema, Deserialize, Serialize)] 
 pub(crate) struct FieldSet { 
     pub name: String, 
     pub fields: BTreeMap<String, Field>, 
 } 
 ) -> TimeseriesKey {
-    use std::collections::hash_map::DefaultHasher;
+    use highway::HighwayHasher;
     use std::hash::{Hash, Hasher};
-    let mut hasher = DefaultHasher::new();
+    let mut hasher = HighwayHasher::default();
     for field in target_fields.chain(metric_fields) {
-        field.hash(&mut hasher);
+        bcs::to_bytes(&field)
+            .expect("Failed to serialized field to bytes")
+            .hash(&mut hasher);
     }
     hasher.finish()
 }
@@ -393,4 +395,90 @@ mod tests {
         assert!(TimeseriesName::try_from("a:").is_err());
         assert!(TimeseriesName::try_from("123").is_err());
     }
+
+    #[test]
+    fn test_timeseries_key_generation_hashes_fields_sequentially() {
+        use super::timeseries_key_for;
+        use oximeter::{Field, FieldValue};
+
+        let f = |name: &str, value| Field { name: name.to_string(), value };
+
+        // Confirm that "targets" and "metrics" are interchangeable,
+        // we just hash everything sequentially.
+        assert_eq!(
+            timeseries_key_for(
+                [&f("a", FieldValue::String("a".to_string()))].into_iter(),
+                [&f("b", FieldValue::String("b".to_string()))].into_iter(),
+            ),
+            timeseries_key_for(
+                [
+                    &f("a", FieldValue::String("a".to_string())),
+                    &f("b", FieldValue::String("b".to_string())),
+                ]
+                .into_iter(),
+                [].into_iter(),
+            ),
+        );
+
+        // However, order still matters ("a, b" != "b, a")
+        assert_ne!(
+            timeseries_key_for(
+                [&f("a", FieldValue::String("a".to_string()))].into_iter(),
+                [&f("b", FieldValue::String("b".to_string()))].into_iter(),
+            ),
+            timeseries_key_for(
+                [&f("b", FieldValue::String("b".to_string()))].into_iter(),
+                [&f("a", FieldValue::String("a".to_string()))].into_iter(),
+            ),
+        );
+    }
+
+    #[test]
+    fn test_timeseries_key_stability() {
+        use super::timeseries_key_for;
+        use oximeter::{Field, FieldValue};
+        use strum::EnumCount;
+
+        let values = [
+            ("string", FieldValue::String(String::default())),
+            ("i8", FieldValue::I8(-0x0A)),
+            ("u8", FieldValue::U8(0x0A)),
+            ("i16", FieldValue::I16(-0x0ABC)),
+            ("u16", FieldValue::U16(0x0ABC)),
+            ("i32", FieldValue::I32(-0x0ABC_0000)),
+            ("u32", FieldValue::U32(0x0ABC_0000)),
+            ("i64", FieldValue::I64(-0x0ABC_0000_0000_0000)),
+            ("u64", FieldValue::U64(0x0ABC_0000_0000_0000)),
+            (
+                "ipaddr",
+                FieldValue::IpAddr(std::net::IpAddr::V4(
+                    std::net::Ipv4Addr::LOCALHOST,
+                )),
+            ),
+            ("uuid", FieldValue::Uuid(uuid::Uuid::nil())),
+            ("bool", FieldValue::Bool(true)),
+        ];
+
+        // Exhaustively testing enums is a bit tricky. Although it's easy to
+        // check "all variants of an enum are matched", it harder to test "all
+        // variants of an enum have been supplied".
+        //
+        // We use this as a proxy, confirming that each variant is represented
+        // here for the purposes of tracking stability.
+        assert_eq!(values.len(), FieldValue::COUNT);
+
+        let mut output = vec![];
+        for (name, value) in values {
+            let key = timeseries_key_for(
+                [&Field { name: name.to_string(), value }].into_iter(),
+                [].into_iter(),
+            );
+            output.push(format!("{name} -> {key}"));
+        }
+
+        expectorate::assert_contents(
+            "test-output/timeseries-keys.txt",
+            &output.join("\n"),
+        );
+    }
 }
diff --git a/oximeter/db/test-output/timeseries-keys.txt b/oximeter/db/test-output/timeseries-keys.txt
@@ -0,0 +1,12 @@
+string -> 11027713821408113420
+i8 -> 11040867383141339877
+u8 -> 15607688254753473406
+i16 -> 13295860386975871341
+u16 -> 2145565374036862015
+i32 -> 10931977742674674182
+u32 -> 6138968585984979982
+i64 -> 981603688282082647
+u64 -> 4754979326506678930
+ipaddr -> 17297273002898199682
+uuid -> 9564253805848631232
+bool -> 13392625023595096799
diff --git a/oximeter/oximeter/Cargo.toml b/oximeter/oximeter/Cargo.toml
@@ -13,6 +13,7 @@ omicron-common.workspace = true
 oximeter-macro-impl.workspace = true
 schemars = { workspace = true, features = [ "uuid1", "bytes", "chrono" ] }
 serde.workspace = true
+strum.workspace = true
 thiserror.workspace = true
 uuid.workspace = true
 omicron-workspace-hack.workspace = true

diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs
@@ -90,7 +90,15 @@ impl_field_type_from! { bool, FieldType::Bool }
 
 /// The `FieldValue` contains the value of a target or metric field.
 #[derive(
-    Clone, Debug, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize,
+    Clone,
+    Debug,
+    Hash,
+    PartialEq,
+    Eq,
+    JsonSchema,
+    Serialize,
+    Deserialize,
+    strum::EnumCount,
 )]
 #[serde(tag = "type", content = "value", rename_all = "snake_case")]
 pub enum FieldValue {