diff --git a/oximeter/impl/src/schema/codegen.rs b/oximeter/impl/src/schema/codegen.rs index 4aa09cf1361..af1e8e22580 100644 --- a/oximeter/impl/src/schema/codegen.rs +++ b/oximeter/impl/src/schema/codegen.rs @@ -74,6 +74,41 @@ fn emit_target(schema: &TimeseriesSchema) -> TokenStream { emit_one(FieldSource::Target, schema) } +/// Return true if all the fields in the schema are `Copy`. +fn fields_are_copyable<'a>( + mut fields: impl Iterator, +) -> bool { + !fields.any(|field| field.field_type == FieldType::String) +} + +fn compute_extra_derives( + source: FieldSource, + schema: &TimeseriesSchema, +) -> TokenStream { + match source { + FieldSource::Target => { + if fields_are_copyable(schema.target_fields()) { + quote! { #[derive(Copy, Eq, Hash, Ord, PartialOrd)] } + } else { + quote! { #[derive(Eq, Hash, Ord, PartialOrd)] } + } + } + FieldSource::Metric => { + if schema.datum_type.is_histogram() { + // No other traits can be derived, since the Histogram won't + // satisfy them. + quote! {} + } else { + if fields_are_copyable(schema.metric_fields()) { + quote! { #[derive(Copy, Eq, Hash, Ord, PartialOrd)] } + } else { + quote! { #[derive(Eq, Hash, Ord, PartialOrd)] } + } + } + } + } +} + fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream { let name = match source { FieldSource::Target => schema.target_name(), @@ -98,6 +133,7 @@ fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream { } }) .collect(); + let extra_derives = compute_extra_derives(source, schema); let (oximeter_trait, maybe_datum, type_docstring) = match source { FieldSource::Target => ( quote! {::oximeter::Target }, @@ -116,6 +152,7 @@ fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream { quote! { #[doc = #type_docstring] #[derive(Clone, Debug, PartialEq, #oximeter_trait)] + #extra_derives pub struct #struct_name { #( #field_defs, )* #maybe_datum @@ -431,6 +468,7 @@ mod tests { let expected = quote! { #[doc = "a target"] #[derive(Clone, Debug, PartialEq, ::oximeter::Target)] + #[derive(Eq, Hash, PartialOrd)] pub struct Foo { #[doc = "target field"] pub f0: ::std::borrow::Cow<'static, str>, @@ -438,6 +476,7 @@ mod tests { #[doc = "a metric"] #[derive(Clone, Debug, PartialEq, ::oximeter::Metric)] + #[derive(Copy, Eq, Hash, PartialOrd)] pub struct Bar { #[doc = "metric field"] pub f1: ::uuid::Uuid, @@ -474,6 +513,7 @@ mod tests { let expected = quote! { #[doc = "a target"] #[derive(Clone, Debug, PartialEq, ::oximeter::Target)] + #[derive(Eq, Hash, PartialOrd)] pub struct Foo { #[doc = "target field"] pub f0: ::std::borrow::Cow<'static, str>, @@ -481,6 +521,7 @@ mod tests { #[doc = "a metric"] #[derive(Clone, Debug, PartialEq, ::oximeter::Metric)] + #[derive(Copy, Eq, Hash, PartialOrd)] pub struct Bar { pub datum: ::oximeter::types::Cumulative, } diff --git a/oximeter/impl/src/schema/ir.rs b/oximeter/impl/src/schema/ir.rs index 573af9c2b0e..f7a209294f2 100644 --- a/oximeter/impl/src/schema/ir.rs +++ b/oximeter/impl/src/schema/ir.rs @@ -28,6 +28,12 @@ use std::collections::BTreeMap; use std::collections::BTreeSet; use std::num::NonZeroU8; +mod limits { + pub const MAX_FIELD_NAME_LENGTH: usize = 64; + pub const MAX_DESCRIPTION_LENGTH: usize = 1024; + pub const MAX_TIMESERIES_NAME_LENGTH: usize = 128; +} + #[derive(Debug, Deserialize)] pub struct FieldMetadata { #[serde(rename = "type")] @@ -107,6 +113,44 @@ impl TimeseriesDefinition { let mut timeseries = BTreeMap::new(); let target_name = &self.target.name; + // Validate text length limits on field names and descriptions. + if self.target.name.is_empty() { + return Err(MetricsError::SchemaDefinition(String::from( + "Target name cannot be empty", + ))); + } + for (name, metadata) in self.fields.iter() { + if name.is_empty() { + return Err(MetricsError::SchemaDefinition(String::from( + "Field names cannot be empty", + ))); + } + if name.len() > limits::MAX_FIELD_NAME_LENGTH { + return Err(MetricsError::SchemaDefinition(format!( + "Field name '{}' is {} characters, which exceeds the \ + maximum field name length of {}", + name, + name.len(), + limits::MAX_FIELD_NAME_LENGTH, + ))); + } + if metadata.description.is_empty() { + return Err(MetricsError::SchemaDefinition(format!( + "Description of field '{}' cannot be empty", + name, + ))); + } + if metadata.description.len() > limits::MAX_DESCRIPTION_LENGTH { + return Err(MetricsError::SchemaDefinition(format!( + "Description of field '{}' is {} characters, which \ + exceeds the maximum description length of {}", + name, + metadata.description.len(), + limits::MAX_DESCRIPTION_LENGTH, + ))); + } + } + // At this point, we do not support actually _modifying_ schema. // Instead, we're putting in place infrastructure to support multiple // versions, while still requiring all schema to define the first and @@ -177,6 +221,26 @@ impl TimeseriesDefinition { // version, along with running some basic lints and checks. for metric in self.metrics.iter() { let metric_name = &metric.name; + if metric_name.is_empty() { + return Err(MetricsError::SchemaDefinition(String::from( + "Metric name cannot be empty", + ))); + } + let timeseries_name = TimeseriesName::try_from(format!( + "{}:{}", + target_name, metric_name + ))?; + if timeseries_name.as_str().len() + > limits::MAX_TIMESERIES_NAME_LENGTH + { + return Err(MetricsError::SchemaDefinition(format!( + "Timeseries name '{}' is {} characters, which \ + exceeds the maximum length of {}", + timeseries_name, + timeseries_name.len(), + limits::MAX_TIMESERIES_NAME_LENGTH, + ))); + } // Store the current version of the metric. This doesn't need to be // sequential, but they do need to be monotonic and have a matching @@ -231,9 +295,6 @@ impl TimeseriesDefinition { self.target.authz_scope, &field_schema, )?; - let timeseries_name = TimeseriesName::try_from( - format!("{}:{}", target_name, metric_name), - )?; let version = NonZeroU8::new(last_target_version).unwrap(); let description = TimeseriesDescription { @@ -251,7 +312,7 @@ impl TimeseriesDefinition { created: Utc::now(), }; if let Some(old) = timeseries - .insert((timeseries_name, version), schema) + .insert((timeseries_name.clone(), version), schema) { return Err(MetricsError::SchemaDefinition( format!( @@ -1413,4 +1474,198 @@ mod tests { have at least one field, but found {msg:?}", ); } + + #[test] + fn fail_on_very_long_timeseries_name() { + let contents = r#" + format_version = 1 + + [target] + name = "veeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeery_long_target" + description = "some target" + authz_scope = "fleet" + versions = [ + { version = 1, fields = [ "foo" ] }, + ] + + [[metrics]] + name = "veeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeery_long_metric" + description = "some metric" + datum_type = "u8" + units = "count" + versions = [ + { added_in = 1, fields = [ ] }, + ] + + [fields.foo] + type = "string" + description = "a field" + "#; + let res = load_schema(contents); + let Err(MetricsError::SchemaDefinition(msg)) = &res else { + panic!( + "Expected to fail when timeseries name is too long, found {res:#?}" + ); + }; + assert!( + msg.contains("exceeds the maximum"), + "Message should complain about a long timeseries name, but found {msg:?}", + ); + } + + #[test] + fn fail_on_empty_metric_name() { + let contents = r#" + format_version = 1 + + [target] + name = "target" + description = "some target" + authz_scope = "fleet" + versions = [ + { version = 1, fields = [ "foo" ] }, + ] + + [[metrics]] + name = "" + description = "some metric" + datum_type = "u8" + units = "count" + versions = [ + { added_in = 1, fields = [ ] }, + ] + + [fields.foo] + type = "string" + description = "a field" + "#; + let res = load_schema(contents); + let Err(MetricsError::SchemaDefinition(msg)) = &res else { + panic!( + "Expected to fail when metric name is empty, found {res:#?}" + ); + }; + assert_eq!( + msg, "Metric name cannot be empty", + "Message should complain about an empty metric name \ + but found {msg:?}", + ); + } + + #[test] + fn fail_on_empty_target_name() { + let contents = r#" + format_version = 1 + + [target] + name = "" + description = "some target" + authz_scope = "fleet" + versions = [ + { version = 1, fields = [ "foo" ] }, + ] + + [[metrics]] + name = "metric" + description = "some metric" + datum_type = "u8" + units = "count" + versions = [ + { added_in = 1, fields = [ ] }, + ] + + [fields.foo] + type = "string" + description = "a field" + "#; + let res = load_schema(contents); + let Err(MetricsError::SchemaDefinition(msg)) = &res else { + panic!( + "Expected to fail when target name is empty, found {res:#?}" + ); + }; + assert_eq!( + msg, "Target name cannot be empty", + "Message should complain about an empty target name \ + but found {msg:?}", + ); + } + + #[test] + fn fail_on_very_long_field_names() { + let contents = r#" + format_version = 1 + + [target] + name = "target" + description = "some target" + authz_scope = "fleet" + versions = [ + { version = 1, fields = [ "foo" ] }, + ] + + [[metrics]] + name = "metric" + description = "some metric" + datum_type = "u8" + units = "count" + versions = [ + { added_in = 1, fields = [ ] }, + ] + + [fields.this_is_a_reeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeealy_long_field_name] + type = "string" + description = "a field" + "#; + let res = load_schema(contents); + let Err(MetricsError::SchemaDefinition(msg)) = &res else { + panic!( + "Expected to fail when field name is too long, found {res:#?}" + ); + }; + assert!( + msg.contains("which exceeds the maximum field name length"), + "Message should complain about a field name being \ + too long, but found {msg:?}", + ); + } + + #[test] + fn fail_on_empty_descriptions() { + let contents = r#" + format_version = 1 + + [target] + name = "target" + description = "some target" + authz_scope = "fleet" + versions = [ + { version = 1, fields = [ "foo" ] }, + ] + + [[metrics]] + name = "metric" + description = "some metric" + datum_type = "u8" + units = "count" + versions = [ + { added_in = 1, fields = [ ] }, + ] + + [fields.foo] + type = "string" + description = "" + "#; + let res = load_schema(contents); + let Err(MetricsError::SchemaDefinition(msg)) = &res else { + panic!( + "Expected to fail when field description is empty, found {res:#?}" + ); + }; + assert_eq!( + msg, "Description of field 'foo' cannot be empty", + "Message should complain about a field description being \ + empty, but found {msg:?}", + ); + } } diff --git a/oximeter/impl/src/schema/mod.rs b/oximeter/impl/src/schema/mod.rs index 28dbf38ab85..49f2ccb4f7e 100644 --- a/oximeter/impl/src/schema/mod.rs +++ b/oximeter/impl/src/schema/mod.rs @@ -297,6 +297,24 @@ impl TimeseriesSchema { self.field_schema.iter().find(|field| field.name == name.as_ref()) } + /// Return an iterator over the target fields. + pub fn target_fields(&self) -> impl Iterator { + self.field_iter(FieldSource::Target) + } + + /// Return an iterator over the metric fields. + pub fn metric_fields(&self) -> impl Iterator { + self.field_iter(FieldSource::Metric) + } + + /// Return an iterator over fields from the given source. + fn field_iter( + &self, + source: FieldSource, + ) -> impl Iterator { + self.field_schema.iter().filter(move |field| field.source == source) + } + /// Return the target and metric component names for this timeseries pub fn component_names(&self) -> (&str, &str) { self.timeseries_name diff --git a/oximeter/impl/src/types.rs b/oximeter/impl/src/types.rs index a6518e4ad57..0a19bc14412 100644 --- a/oximeter/impl/src/types.rs +++ b/oximeter/impl/src/types.rs @@ -693,7 +693,19 @@ impl From for omicron_common::api::external::Error { } /// A cumulative or counter data type. -#[derive(Debug, Clone, Copy, PartialEq, JsonSchema, Deserialize, Serialize)] +#[derive( + Debug, + Deserialize, + Clone, + Copy, + Eq, + Hash, + JsonSchema, + Ord, + PartialEq, + PartialOrd, + Serialize, +)] #[schemars(rename = "Cumulative{T}")] pub struct Cumulative { start_time: DateTime,