Skip to content

Commit

Permalink
Add more checks to timeseries schema (#6009)
Browse files Browse the repository at this point in the history
- Add checks for the length of all field names, timeseries names, and
descriptions. All must be non-empty, and below some const limit. Add
tests for these as well.
- Add extra derives to generated target / metric types. Where possible,
derive Copy, Eq, PartialOrd, Ord, and Hash, based on the types in the
definitions (e.g., strings mean the type cannot be copy).
  • Loading branch information
bnaecker authored Jul 8, 2024
1 parent 26b70bc commit dceb1cb
Show file tree
Hide file tree
Showing 4 changed files with 363 additions and 5 deletions.
43 changes: 43 additions & 0 deletions oximeter/impl/src/schema/codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,43 @@ fn emit_target(schema: &TimeseriesSchema) -> TokenStream {
emit_one(FieldSource::Target, schema)
}

/// Return true if all the fields in the schema are `Copy`.
fn fields_are_copyable<'a>(
mut fields: impl Iterator<Item = &'a FieldSchema>,
) -> bool {
// Do a positive match, to ensure new variants don't actually derive copy
// inappropriately. Better we clone, in that case.
fields.all(FieldSchema::is_copyable)
}

fn compute_extra_derives(
source: FieldSource,
schema: &TimeseriesSchema,
) -> TokenStream {
match source {
FieldSource::Target => {
if fields_are_copyable(schema.target_fields()) {
quote! { #[derive(Copy, Eq, Hash, Ord, PartialOrd)] }
} else {
quote! { #[derive(Eq, Hash, Ord, PartialOrd)] }
}
}
FieldSource::Metric => {
if schema.datum_type.is_histogram() {
// No other traits can be derived, since the Histogram<T> won't
// satisfy them.
quote! {}
} else {
if fields_are_copyable(schema.metric_fields()) {
quote! { #[derive(Copy, Eq, Hash, Ord, PartialOrd)] }
} else {
quote! { #[derive(Eq, Hash, Ord, PartialOrd)] }
}
}
}
}
}

fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream {
let name = match source {
FieldSource::Target => schema.target_name(),
Expand All @@ -98,6 +135,7 @@ fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream {
}
})
.collect();
let extra_derives = compute_extra_derives(source, schema);
let (oximeter_trait, maybe_datum, type_docstring) = match source {
FieldSource::Target => (
quote! {::oximeter::Target },
Expand All @@ -116,6 +154,7 @@ fn emit_one(source: FieldSource, schema: &TimeseriesSchema) -> TokenStream {
quote! {
#[doc = #type_docstring]
#[derive(Clone, Debug, PartialEq, #oximeter_trait)]
#extra_derives
pub struct #struct_name {
#( #field_defs, )*
#maybe_datum
Expand Down Expand Up @@ -431,13 +470,15 @@ mod tests {
let expected = quote! {
#[doc = "a target"]
#[derive(Clone, Debug, PartialEq, ::oximeter::Target)]
#[derive(Eq, Hash, Ord, PartialOrd)]
pub struct Foo {
#[doc = "target field"]
pub f0: ::std::borrow::Cow<'static, str>,
}

#[doc = "a metric"]
#[derive(Clone, Debug, PartialEq, ::oximeter::Metric)]
#[derive(Copy, Eq, Hash, Ord, PartialOrd)]
pub struct Bar {
#[doc = "metric field"]
pub f1: ::uuid::Uuid,
Expand Down Expand Up @@ -474,13 +515,15 @@ mod tests {
let expected = quote! {
#[doc = "a target"]
#[derive(Clone, Debug, PartialEq, ::oximeter::Target)]
#[derive(Eq, Hash, Ord, PartialOrd)]
pub struct Foo {
#[doc = "target field"]
pub f0: ::std::borrow::Cow<'static, str>,
}

#[doc = "a metric"]
#[derive(Clone, Debug, PartialEq, ::oximeter::Metric)]
#[derive(Copy, Eq, Hash, Ord, PartialOrd)]
pub struct Bar {
pub datum: ::oximeter::types::Cumulative<u64>,
}
Expand Down
263 changes: 259 additions & 4 deletions oximeter/impl/src/schema/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::num::NonZeroU8;

mod limits {
pub const MAX_FIELD_NAME_LENGTH: usize = 64;
pub const MAX_DESCRIPTION_LENGTH: usize = 1024;
pub const MAX_TIMESERIES_NAME_LENGTH: usize = 128;
}

#[derive(Debug, Deserialize)]
pub struct FieldMetadata {
#[serde(rename = "type")]
Expand Down Expand Up @@ -107,6 +113,44 @@ impl TimeseriesDefinition {
let mut timeseries = BTreeMap::new();
let target_name = &self.target.name;

// Validate text length limits on field names and descriptions.
if self.target.name.is_empty() {
return Err(MetricsError::SchemaDefinition(String::from(
"Target name cannot be empty",
)));
}
for (name, metadata) in self.fields.iter() {
if name.is_empty() {
return Err(MetricsError::SchemaDefinition(String::from(
"Field names cannot be empty",
)));
}
if name.len() > limits::MAX_FIELD_NAME_LENGTH {
return Err(MetricsError::SchemaDefinition(format!(
"Field name '{}' is {} characters, which exceeds the \
maximum field name length of {}",
name,
name.len(),
limits::MAX_FIELD_NAME_LENGTH,
)));
}
if metadata.description.is_empty() {
return Err(MetricsError::SchemaDefinition(format!(
"Description of field '{}' cannot be empty",
name,
)));
}
if metadata.description.len() > limits::MAX_DESCRIPTION_LENGTH {
return Err(MetricsError::SchemaDefinition(format!(
"Description of field '{}' is {} characters, which \
exceeds the maximum description length of {}",
name,
metadata.description.len(),
limits::MAX_DESCRIPTION_LENGTH,
)));
}
}

// At this point, we do not support actually _modifying_ schema.
// Instead, we're putting in place infrastructure to support multiple
// versions, while still requiring all schema to define the first and
Expand Down Expand Up @@ -177,6 +221,26 @@ impl TimeseriesDefinition {
// version, along with running some basic lints and checks.
for metric in self.metrics.iter() {
let metric_name = &metric.name;
if metric_name.is_empty() {
return Err(MetricsError::SchemaDefinition(String::from(
"Metric name cannot be empty",
)));
}
let timeseries_name = TimeseriesName::try_from(format!(
"{}:{}",
target_name, metric_name
))?;
if timeseries_name.as_str().len()
> limits::MAX_TIMESERIES_NAME_LENGTH
{
return Err(MetricsError::SchemaDefinition(format!(
"Timeseries name '{}' is {} characters, which \
exceeds the maximum length of {}",
timeseries_name,
timeseries_name.len(),
limits::MAX_TIMESERIES_NAME_LENGTH,
)));
}

// Store the current version of the metric. This doesn't need to be
// sequential, but they do need to be monotonic and have a matching
Expand Down Expand Up @@ -231,9 +295,6 @@ impl TimeseriesDefinition {
self.target.authz_scope,
&field_schema,
)?;
let timeseries_name = TimeseriesName::try_from(
format!("{}:{}", target_name, metric_name),
)?;
let version =
NonZeroU8::new(last_target_version).unwrap();
let description = TimeseriesDescription {
Expand All @@ -251,7 +312,7 @@ impl TimeseriesDefinition {
created: Utc::now(),
};
if let Some(old) = timeseries
.insert((timeseries_name, version), schema)
.insert((timeseries_name.clone(), version), schema)
{
return Err(MetricsError::SchemaDefinition(
format!(
Expand Down Expand Up @@ -1413,4 +1474,198 @@ mod tests {
have at least one field, but found {msg:?}",
);
}

#[test]
fn fail_on_very_long_timeseries_name() {
let contents = r#"
format_version = 1
[target]
name = "veeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeery_long_target"
description = "some target"
authz_scope = "fleet"
versions = [
{ version = 1, fields = [ "foo" ] },
]
[[metrics]]
name = "veeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeery_long_metric"
description = "some metric"
datum_type = "u8"
units = "count"
versions = [
{ added_in = 1, fields = [ ] },
]
[fields.foo]
type = "string"
description = "a field"
"#;
let res = load_schema(contents);
let Err(MetricsError::SchemaDefinition(msg)) = &res else {
panic!(
"Expected to fail when timeseries name is too long, found {res:#?}"
);
};
assert!(
msg.contains("exceeds the maximum"),
"Message should complain about a long timeseries name, but found {msg:?}",
);
}

#[test]
fn fail_on_empty_metric_name() {
let contents = r#"
format_version = 1
[target]
name = "target"
description = "some target"
authz_scope = "fleet"
versions = [
{ version = 1, fields = [ "foo" ] },
]
[[metrics]]
name = ""
description = "some metric"
datum_type = "u8"
units = "count"
versions = [
{ added_in = 1, fields = [ ] },
]
[fields.foo]
type = "string"
description = "a field"
"#;
let res = load_schema(contents);
let Err(MetricsError::SchemaDefinition(msg)) = &res else {
panic!(
"Expected to fail when metric name is empty, found {res:#?}"
);
};
assert_eq!(
msg, "Metric name cannot be empty",
"Message should complain about an empty metric name \
but found {msg:?}",
);
}

#[test]
fn fail_on_empty_target_name() {
let contents = r#"
format_version = 1
[target]
name = ""
description = "some target"
authz_scope = "fleet"
versions = [
{ version = 1, fields = [ "foo" ] },
]
[[metrics]]
name = "metric"
description = "some metric"
datum_type = "u8"
units = "count"
versions = [
{ added_in = 1, fields = [ ] },
]
[fields.foo]
type = "string"
description = "a field"
"#;
let res = load_schema(contents);
let Err(MetricsError::SchemaDefinition(msg)) = &res else {
panic!(
"Expected to fail when target name is empty, found {res:#?}"
);
};
assert_eq!(
msg, "Target name cannot be empty",
"Message should complain about an empty target name \
but found {msg:?}",
);
}

#[test]
fn fail_on_very_long_field_names() {
let contents = r#"
format_version = 1
[target]
name = "target"
description = "some target"
authz_scope = "fleet"
versions = [
{ version = 1, fields = [ "foo" ] },
]
[[metrics]]
name = "metric"
description = "some metric"
datum_type = "u8"
units = "count"
versions = [
{ added_in = 1, fields = [ ] },
]
[fields.this_is_a_reeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeealy_long_field_name]
type = "string"
description = "a field"
"#;
let res = load_schema(contents);
let Err(MetricsError::SchemaDefinition(msg)) = &res else {
panic!(
"Expected to fail when field name is too long, found {res:#?}"
);
};
assert!(
msg.contains("which exceeds the maximum field name length"),
"Message should complain about a field name being \
too long, but found {msg:?}",
);
}

#[test]
fn fail_on_empty_descriptions() {
let contents = r#"
format_version = 1
[target]
name = "target"
description = "some target"
authz_scope = "fleet"
versions = [
{ version = 1, fields = [ "foo" ] },
]
[[metrics]]
name = "metric"
description = "some metric"
datum_type = "u8"
units = "count"
versions = [
{ added_in = 1, fields = [ ] },
]
[fields.foo]
type = "string"
description = ""
"#;
let res = load_schema(contents);
let Err(MetricsError::SchemaDefinition(msg)) = &res else {
panic!(
"Expected to fail when field description is empty, found {res:#?}"
);
};
assert_eq!(
msg, "Description of field 'foo' cannot be empty",
"Message should complain about a field description being \
empty, but found {msg:?}",
);
}
}
Loading

0 comments on commit dceb1cb

Please sign in to comment.