Skip to content

Commit

Permalink
Print more information on log line interop test failures (#5659)
Browse files Browse the repository at this point in the history
Print more information on log line interop test failures

(cherry picked from commit 739fb98)
  • Loading branch information
Nathaniel May authored and github-actions[bot] committed Feb 6, 2024
1 parent fb6075d commit c11a26b
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 76 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Under the Hood-20220816-122032.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Under the Hood
body: add more information to log line interop test failures
time: 2022-08-16T12:20:32.119588+01:00
custom:
Author: nathaniel-may
Issue: "5658"
PR: "5659"
192 changes: 116 additions & 76 deletions test/interop/log_parsing/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::env;
Expand All @@ -7,28 +6,35 @@ use std::fs::File;
use std::io::{self, BufRead};
use walkdir::WalkDir;


// Applies schema tests to file input
// if these fail, we either have a problem in dbt that needs to be resolved
// or we have changed our interface and the log_version should be bumped in dbt,
// modeled appropriately here, and publish new docs for the new log_version.
fn main() -> Result<(), Box<dyn Error>> {
let log_name = "dbt.log";
let path = env::var("LOG_DIR").expect("must pass absolute log path to tests with env var `LOG_DIR=/logs/live/here/`");

let path = env::var("LOG_DIR")
.expect("must pass absolute log path to tests with env var `LOG_DIR=/logs/live/here/`");

println!("Looking for files named `{}` in {}", log_name, path);
let lines: Vec<String> = get_input(&path, log_name)?;
println!("collected {} log lines.", lines.len());

println!("");

println!("testing type-level schema compliance by deserializing each line...");
let log_lines: Vec<LogLine> = deserialized_input(&lines)
.map_err(|e| format!("schema test failure: json doesn't match type definition\n{}", e))?;
let log_lines: Vec<LogLine> = deserialized_input(&lines).map_err(|e| {
format!(
"schema test failure: json doesn't match type definition\n{}",
e
)
})?;
println!("Done.");

println!("");
println!("because we skip non-json log lines, there are {} collected values to test.", log_lines.len());
println!(
"because we skip non-json log lines, there are {} collected values to test.",
log_lines.len()
);
println!("");

// make sure when we read a string in then output it back to a string the two strings
Expand All @@ -49,7 +55,6 @@ fn main() -> Result<(), Box<dyn Error>> {
Ok(())
}


// each nested type of LogLine should define its own value_test function
// that asserts values are within an expected set of values when possible.
trait ValueTest {
Expand All @@ -68,11 +73,11 @@ struct LogLine {
level: String,
invocation_id: String,
thread_name: String,
data: serde_json::Value, // TODO be more specific
data: serde_json::Value, // TODO be more specific
}

impl ValueTest for LogLine {
fn value_test(&self){
fn value_test(&self) {
assert_eq!(
self.log_version, 2,
"The log version changed. Be sure this was intentional."
Expand All @@ -99,7 +104,7 @@ impl ValueTest for LogLine {
// this requires handling the date with "%Y-%m-%dT%H:%M:%S%.6f" which requires this
// boilerplate-looking module.
mod custom_date_format {
use chrono::{NaiveDateTime, DateTime, Utc};
use chrono::{DateTime, NaiveDateTime, Utc};
use serde::{self, Deserialize, Deserializer, Serializer};

const FORMAT: &'static str = "%Y-%m-%dT%H:%M:%S%.6fZ";
Expand All @@ -117,7 +122,10 @@ mod custom_date_format {
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Ok(DateTime::<Utc>::from_utc(NaiveDateTime::parse_from_str(&s, FORMAT).map_err(serde::de::Error::custom)?, Utc))
Ok(DateTime::<Utc>::from_utc(
NaiveDateTime::parse_from_str(&s, FORMAT).map_err(serde::de::Error::custom)?,
Utc,
))
}
}

Expand All @@ -134,16 +142,26 @@ fn get_input(path: &str, file_name: &str) -> Result<Vec<String>, String> {
let f_name = e.file_name().to_string_lossy();
if f_name.ends_with(file_name) {
let contents = File::open(e.path())
.map_err(|e| format!("Something went wrong opening the log file {}\n{}", f_name, e))
.map_err(|e| {
format!(
"Something went wrong opening the log file {}\n{}",
f_name, e
)
})
.and_then(|file| {
io::BufReader::new(file)
.lines()
.map(|l| {
l.map_err(|e| format!("Something went wrong reading lines of the log file {}\n{}", f_name, e))
l.map_err(|e| {
format!(
"Something went wrong reading lines of the log file {}\n{}",
f_name, e
)
})
})
.collect::<Result<Vec<String>, String>>()
});

Some(contents)
} else {
None
Expand All @@ -166,69 +184,91 @@ fn deserialized_input(log_lines: &[String]) -> serde_json::Result<Vec<LogLine>>
.collect()
}

// turn a String into a LogLine and back into a String returning both Strings so
// they can be compared
fn deserialize_serialize_loop(
log_lines: &[String],
) -> serde_json::Result<Vec<(String, String)>> {
log_lines
.into_iter()
.map(|log_line| {
serde_json::from_str::<LogLine>(log_line).and_then(|parsed| {
serde_json::to_string(&parsed).map(|json| (log_line.clone(), json))
})
})
.collect()
// Take a json string, deserialize it to a known value, and a generic json value.
// return the two json values so they can be compared.
// this helps to check if the deserialize-serialize loop for the known value drops necessary information.
//
// This function is used as a helper to check every json logline that dbt outputs for expected values.
// In practice, this often returns an Err value if the logs have unexpected non-json values such as logged
// exceptions, warnings, or printed statements.
fn deserialize_twice<'a, T: Serialize + Deserialize<'a>>(
json_str: &'a str,
) -> Result<(serde_json::Value, serde_json::Value), String> {
// deserialize the string into a JSON value with no knowledge of T's structure
let deserialized_json =
serde_json::from_str::<serde_json::Value>(json_str).map_err(|_| json_str)?;

// deserialize the string into a T
let deserialized_t = serde_json::from_str::<'a, T>(json_str).map_err(|_| json_str)?;

// serialize the T value into a string again
let serialized_t = serde_json::to_string(&deserialized_t).map_err(|_| json_str)?;

// deserialize the string into a JSON value
let deserialized_t_json =
serde_json::from_str::<serde_json::Value>(&serialized_t).map_err(|_| json_str)?;

Ok((deserialized_json, deserialized_t_json))
}

// This is only used to make more useful error messages and avoid assert statements that say that two massive
// json objects are not the same. this will dig into the values to find an inner json value where they differ.
fn compare_json(x: &serde_json::Value, y: &serde_json::Value) -> Result<(), serde_json::Value> {
if x == y {
return Ok(());
}

match (x, y) {
// check for object key mismatches
(serde_json::Value::Object(mx), serde_json::Value::Object(my)) => {
// check if the second map is missing keys from the first
for (xk, xv) in mx {
if !my.contains_key(xk) {
let mut m_err = serde_json::Map::new();
m_err.insert(xk.clone(), xv.clone());
return Err(serde_json::Value::Object(m_err));
}
}

// check if the first map is missing keys from the second
for (yk, yv) in my {
if !mx.contains_key(yk) {
let mut m_err = serde_json::Map::new();
m_err.insert(yk.clone(), yv.clone());
return Err(serde_json::Value::Object(m_err));
}
}

// all the keys are the same, so compare each value recursively.
for k in mx.keys() {
// unwrapping (which panics) since we know all these keys exist in both maps.
let xv = mx.get(k).unwrap();
let yv = my.get(k).unwrap();

compare_json(xv, yv)?;
}

// only reaches here if all the keys are the same and all of the values are the same
// and the top-level equality check on the map failed.
Ok(())
}

// must be a non-object json value. since there are no keys to specify, return the first value.
(x, _) => Err(x.clone()),
}
}

// make sure when we read a string in then output it back to a string the two strings
// contain all the same key-value pairs.
fn test_deserialize_serialize_is_unchanged(lines: &[String]) {
let objects: Result<Vec<(serde_json::Value, serde_json::Value)>, serde_json::Error> =
deserialize_serialize_loop(lines).and_then(|v| {
v.into_iter()
.map(|(s0, s1)| {
serde_json::from_str::<serde_json::Value>(&s0).and_then(|s0v| {
serde_json::from_str::<serde_json::Value>(&s1).map(|s1v| (s0v, s1v))
})
})
.collect()
});

match objects {
Err(e) => assert!(false, "{}", e),
Ok(v) => {
for pair in v {
match pair {
(
serde_json::Value::Object(original),
serde_json::Value::Object(looped),
) => {
// looping through each key of each json value gives us meaningful failure messages
// instead of "this big string" != "this other big string"
for (key, value) in original.clone() {
let looped_val = looped.get(&key);
assert_eq!(
looped_val,
Some(&value),
"original key value ({}, {}) expected in re-serialized result",
key,
value
)
}
for (key, value) in looped.clone() {
let original_val = original.get(&key);
assert_eq!(
original_val,
Some(&value),
"looped key value ({}, {}) not found in original result",
key,
value
)
}
}
_ => assert!(false, "not comparing json objects"),
}
for line in lines {
match deserialize_twice::<LogLine>(line) {
// error if there are not two values to compare
Err(log_line) => assert!(false, "Logline cannot be deserialized into a json LogLine twice for value comparison:\n{}\n", log_line),
// if there are two values to compare, assert they are the same or find the key-value pair where they differ.
Ok((x, y)) => match compare_json(&x, &y) {
Err(json_value) => assert!(false, "LogLine values were inconsistent.\nSpecific difference:\n{}\nWhole log line:\n{}", json_value, line),
Ok(()) => ()
}
}
}
Expand All @@ -247,9 +287,9 @@ mod tests {

#[test]
fn test_values() {
assert!(deserialized_input(&[LOG_LINE.to_owned()]).map(|v| {
v.into_iter().map(|ll| ll.value_test())
}).is_ok())
assert!(deserialized_input(&[LOG_LINE.to_owned()])
.map(|v| { v.into_iter().map(|ll| ll.value_test()) })
.is_ok())
}

#[test]
Expand Down

0 comments on commit c11a26b

Please sign in to comment.