Add more checks to timeseries schema (#6009)

- Add checks for the length of all field names, timeseries names, and descriptions. All must be non-empty, and below some const limit. Add tests for these as well. - Add extra derives to generated target / metric types. Where possible, derive Copy, Eq, PartialOrd, Ord, and Hash, based on the types in the definitions (e.g., strings mean the type cannot be copy).
oxidecomputer · Jul 8, 2024 · dceb1cb · dceb1cb
1 parent 26b70bc
commit dceb1cb
Show file tree

Hide file tree

Showing 4 changed files with 363 additions and 5 deletions.
diff --git a/oximeter/impl/src/schema/codegen.rs b/oximeter/impl/src/schema/codegen.rs
@@ -74,6 +74,43 @@ fn emit_target(schema: &TimeseriesSchema) -> TokenStream {
     emit_one(FieldSource::Target, schema)
 }
 
+/// Return true if all the fields in the schema are `Copy`.
+fn fields_are_copyable<'a>(
+    mut fields: impl Iterator<Item = &'a FieldSchema>,
+) -> bool {
+    // Do a positive match, to ensure new variants don't actually derive copy
+    // inappropriately. Better we clone, in that case.
+    fields.all(FieldSchema::is_copyable)
+}
+
+fn compute_extra_derives(
+    source: FieldSource,
+    schema: &TimeseriesSchema,
+) -> TokenStream {
+    match source {
+        FieldSource::Target => {
+            if fields_are_copyable(schema.target_fields()) {
+                quote! { #[derive(Copy, Eq, Hash, Ord, PartialOrd)] }
+            } else {
+                quote! { #[derive(Eq, Hash, Ord, PartialOrd)] }
+            }
+        }
+        FieldSource::Metric => {
+            if schema.datum_type.is_histogram() {
+                // No other traits can be derived, since the Histogram<T> won't
+                // satisfy them.
+                quote! {}
+            } else {
+                if fields_are_copyable(schema.metric_fields()) {
+                    quote! { #[derive(Copy, Eq, Hash, Ord, PartialOrd)] }
+                } else {
+                    quote! { #[derive(Eq, Hash, Ord, PartialOrd)] }
+                }
+            }
+        }
+    }
+}
+
 fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream {
     let name = match source {
         FieldSource::Target => schema.target_name(),
@@ -98,6 +135,7 @@ fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream {
             }
         })
         .collect();
+    let extra_derives = compute_extra_derives(source, schema);
     let (oximeter_trait, maybe_datum, type_docstring) = match source {
         FieldSource::Target => (
             quote! {::oximeter::Target },
@@ -116,6 +154,7 @@ fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream {
     quote! {
         #[doc = #type_docstring]
         #[derive(Clone, Debug, PartialEq, #oximeter_trait)]
+        #extra_derives
         pub struct #struct_name {
             #( #field_defs, )*
             #maybe_datum
@@ -431,13 +470,15 @@ mod tests {
         let expected = quote! {
             #[doc = "a target"]
             #[derive(Clone, Debug, PartialEq, ::oximeter::Target)]
+            #[derive(Eq, Hash, Ord, PartialOrd)]
             pub struct Foo {
                 #[doc = "target field"]
                 pub f0: ::std::borrow::Cow<'static, str>,
             }
 
             #[doc = "a metric"]
             #[derive(Clone, Debug, PartialEq, ::oximeter::Metric)]
+            #[derive(Copy, Eq, Hash, Ord, PartialOrd)]
             pub struct Bar {
                 #[doc = "metric field"]
                 pub f1: ::uuid::Uuid,
@@ -474,13 +515,15 @@ mod tests {
         let expected = quote! {
             #[doc = "a target"]
             #[derive(Clone, Debug, PartialEq, ::oximeter::Target)]
+            #[derive(Eq, Hash, Ord, PartialOrd)]
             pub struct Foo {
                 #[doc = "target field"]
                 pub f0: ::std::borrow::Cow<'static, str>,
             }
 
             #[doc = "a metric"]
             #[derive(Clone, Debug, PartialEq, ::oximeter::Metric)]
+            #[derive(Copy, Eq, Hash, Ord, PartialOrd)]
             pub struct Bar {
                 pub datum: ::oximeter::types::Cumulative<u64>,
             }

diff --git a/oximeter/impl/src/schema/ir.rs b/oximeter/impl/src/schema/ir.rs
@@ -28,6 +28,12 @@ use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 use std::num::NonZeroU8;
 
+mod limits {
+    pub const MAX_FIELD_NAME_LENGTH: usize = 64;
+    pub const MAX_DESCRIPTION_LENGTH: usize = 1024;
+    pub const MAX_TIMESERIES_NAME_LENGTH: usize = 128;
+}
+
 #[derive(Debug, Deserialize)]
 pub struct FieldMetadata {
     #[serde(rename = "type")]
@@ -107,6 +113,44 @@ impl TimeseriesDefinition {
         let mut timeseries = BTreeMap::new();
         let target_name = &self.target.name;
 
+        // Validate text length limits on field names and descriptions.
+        if self.target.name.is_empty() {
+            return Err(MetricsError::SchemaDefinition(String::from(
+                "Target name cannot be empty",
+            )));
+        }
+        for (name, metadata) in self.fields.iter() {
+            if name.is_empty() {
+                return Err(MetricsError::SchemaDefinition(String::from(
+                    "Field names cannot be empty",
+                )));
+            }
+            if name.len() > limits::MAX_FIELD_NAME_LENGTH {
+                return Err(MetricsError::SchemaDefinition(format!(
+                    "Field name '{}' is {} characters, which exceeds the \
+                    maximum field name length of {}",
+                    name,
+                    name.len(),
+                    limits::MAX_FIELD_NAME_LENGTH,
+                )));
+            }
+            if metadata.description.is_empty() {
+                return Err(MetricsError::SchemaDefinition(format!(
+                    "Description of field '{}' cannot be empty",
+                    name,
+                )));
+            }
+            if metadata.description.len() > limits::MAX_DESCRIPTION_LENGTH {
+                return Err(MetricsError::SchemaDefinition(format!(
+                    "Description of field '{}' is {} characters, which \
+                    exceeds the maximum description length of {}",
+                    name,
+                    metadata.description.len(),
+                    limits::MAX_DESCRIPTION_LENGTH,
+                )));
+            }
+        }
+
         // At this point, we do not support actually _modifying_ schema.
         // Instead, we're putting in place infrastructure to support multiple
         // versions, while still requiring all schema to define the first and
@@ -177,6 +221,26 @@ impl TimeseriesDefinition {
         // version, along with running some basic lints and checks.
         for metric in self.metrics.iter() {
             let metric_name = &metric.name;
+            if metric_name.is_empty() {
+                return Err(MetricsError::SchemaDefinition(String::from(
+                    "Metric name cannot be empty",
+                )));
+            }
+            let timeseries_name = TimeseriesName::try_from(format!(
+                "{}:{}",
+                target_name, metric_name
+            ))?;
+            if timeseries_name.as_str().len()
+                > limits::MAX_TIMESERIES_NAME_LENGTH
+            {
+                return Err(MetricsError::SchemaDefinition(format!(
+                    "Timeseries name '{}' is {} characters, which \
+                    exceeds the maximum length of {}",
+                    timeseries_name,
+                    timeseries_name.len(),
+                    limits::MAX_TIMESERIES_NAME_LENGTH,
+                )));
+            }
 
             // Store the current version of the metric. This doesn't need to be
             // sequential, but they do need to be monotonic and have a matching
@@ -231,9 +295,6 @@ impl TimeseriesDefinition {
                             self.target.authz_scope,
                             &field_schema,
                         )?;
-                        let timeseries_name = TimeseriesName::try_from(
-                            format!("{}:{}", target_name, metric_name),
-                        )?;
                         let version =
                             NonZeroU8::new(last_target_version).unwrap();
                         let description = TimeseriesDescription {
@@ -251,7 +312,7 @@ impl TimeseriesDefinition {
                             created: Utc::now(),
                         };
                         if let Some(old) = timeseries
-                            .insert((timeseries_name, version), schema)
+                            .insert((timeseries_name.clone(), version), schema)
                         {
                             return Err(MetricsError::SchemaDefinition(
                                 format!(
@@ -1413,4 +1474,198 @@ mod tests {
             have at least one field, but found {msg:?}",
         );
     }
+
+    #[test]
+    fn fail_on_very_long_timeseries_name() {
+        let contents = r#"
+        format_version = 1
+
+        [target]
+        name = "veeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeery_long_target"
+        description = "some target"
+        authz_scope = "fleet"
+        versions = [
+            { version = 1, fields = [ "foo" ] },
+        ]
+
+        [[metrics]]
+        name = "veeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeery_long_metric"
+        description = "some metric"
+        datum_type = "u8"
+        units = "count"
+        versions = [
+            { added_in = 1, fields = [ ] },
+        ]
+
+        [fields.foo]
+        type = "string"
+        description = "a field"
+        "#;
+        let res = load_schema(contents);
+        let Err(MetricsError::SchemaDefinition(msg)) = &res else {
+            panic!(
+                "Expected to fail when timeseries name is too long, found {res:#?}"
+            );
+        };
+        assert!(
+            msg.contains("exceeds the maximum"),
+            "Message should complain about a long timeseries name, but found {msg:?}",
+        );
+    }
+
+    #[test]
+    fn fail_on_empty_metric_name() {
+        let contents = r#"
+        format_version = 1
+
+        [target]
+        name = "target"
+        description = "some target"
+        authz_scope = "fleet"
+        versions = [
+            { version = 1, fields = [ "foo" ] },
+        ]
+
+        [[metrics]]
+        name = ""
+        description = "some metric"
+        datum_type = "u8"
+        units = "count"
+        versions = [
+            { added_in = 1, fields = [ ] },
+        ]
+
+        [fields.foo]
+        type = "string"
+        description = "a field"
+        "#;
+        let res = load_schema(contents);
+        let Err(MetricsError::SchemaDefinition(msg)) = &res else {
+            panic!(
+                "Expected to fail when metric name is empty, found {res:#?}"
+            );
+        };
+        assert_eq!(
+            msg, "Metric name cannot be empty",
+            "Message should complain about an empty metric name \
+            but found {msg:?}",
+        );
+    }
+
+    #[test]
+    fn fail_on_empty_target_name() {
+        let contents = r#"
+        format_version = 1
+
+        [target]
+        name = ""
+        description = "some target"
+        authz_scope = "fleet"
+        versions = [
+            { version = 1, fields = [ "foo" ] },
+        ]
+
+        [[metrics]]
+        name = "metric"
+        description = "some metric"
+        datum_type = "u8"
+        units = "count"
+        versions = [
+            { added_in = 1, fields = [ ] },
+        ]
+
+        [fields.foo]
+        type = "string"
+        description = "a field"
+        "#;
+        let res = load_schema(contents);
+        let Err(MetricsError::SchemaDefinition(msg)) = &res else {
+            panic!(
+                "Expected to fail when target name is empty, found {res:#?}"
+            );
+        };
+        assert_eq!(
+            msg, "Target name cannot be empty",
+            "Message should complain about an empty target name \
+            but found {msg:?}",
+        );
+    }
+
+    #[test]
+    fn fail_on_very_long_field_names() {
+        let contents = r#"
+        format_version = 1
+
+        [target]
+        name = "target"
+        description = "some target"
+        authz_scope = "fleet"
+        versions = [
+            { version = 1, fields = [ "foo" ] },
+        ]
+
+        [[metrics]]
+        name = "metric"
+        description = "some metric"
+        datum_type = "u8"
+        units = "count"
+        versions = [
+            { added_in = 1, fields = [ ] },
+        ]
+
+        [fields.this_is_a_reeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeealy_long_field_name]
+        type = "string"
+        description = "a field"
+        "#;
+        let res = load_schema(contents);
+        let Err(MetricsError::SchemaDefinition(msg)) = &res else {
+            panic!(
+                "Expected to fail when field name is too long, found {res:#?}"
+            );
+        };
+        assert!(
+            msg.contains("which exceeds the maximum field name length"),
+            "Message should complain about a field name being \
+            too long, but found {msg:?}",
+        );
+    }
+
+    #[test]
+    fn fail_on_empty_descriptions() {
+        let contents = r#"
+        format_version = 1
+
+        [target]
+        name = "target"
+        description = "some target"
+        authz_scope = "fleet"
+        versions = [
+            { version = 1, fields = [ "foo" ] },
+        ]
+
+        [[metrics]]
+        name = "metric"
+        description = "some metric"
+        datum_type = "u8"
+        units = "count"
+        versions = [
+            { added_in = 1, fields = [ ] },
+        ]
+
+        [fields.foo]
+        type = "string"
+        description = ""
+        "#;
+        let res = load_schema(contents);
+        let Err(MetricsError::SchemaDefinition(msg)) = &res else {
+            panic!(
+                "Expected to fail when field description is empty, found {res:#?}"
+            );
+        };
+        assert_eq!(
+            msg, "Description of field 'foo' cannot be empty",
+            "Message should complain about a field description being \
+            empty, but found {msg:?}",
+        );
+    }
 }