Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 1.2.latest] Print more information on log line interop test failures #9521

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Under the Hood-20220816-122032.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Under the Hood
body: add more information to log line interop test failures
time: 2022-08-16T12:20:32.119588+01:00
custom:
Author: nathaniel-may
Issue: "5658"
PR: "5659"
192 changes: 116 additions & 76 deletions test/interop/log_parsing/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::env;
Expand All @@ -7,28 +6,35 @@ use std::fs::File;
use std::io::{self, BufRead};
use walkdir::WalkDir;


// Applies schema tests to file input
// if these fail, we either have a problem in dbt that needs to be resolved
// or we have changed our interface and the log_version should be bumped in dbt,
// modeled appropriately here, and publish new docs for the new log_version.
fn main() -> Result<(), Box<dyn Error>> {
let log_name = "dbt.log";
let path = env::var("LOG_DIR").expect("must pass absolute log path to tests with env var `LOG_DIR=/logs/live/here/`");

let path = env::var("LOG_DIR")
.expect("must pass absolute log path to tests with env var `LOG_DIR=/logs/live/here/`");

println!("Looking for files named `{}` in {}", log_name, path);
let lines: Vec<String> = get_input(&path, log_name)?;
println!("collected {} log lines.", lines.len());

println!("");

println!("testing type-level schema compliance by deserializing each line...");
let log_lines: Vec<LogLine> = deserialized_input(&lines)
.map_err(|e| format!("schema test failure: json doesn't match type definition\n{}", e))?;
let log_lines: Vec<LogLine> = deserialized_input(&lines).map_err(|e| {
format!(
"schema test failure: json doesn't match type definition\n{}",
e
)
})?;
println!("Done.");

println!("");
println!("because we skip non-json log lines, there are {} collected values to test.", log_lines.len());
println!(
"because we skip non-json log lines, there are {} collected values to test.",
log_lines.len()
);
println!("");

// make sure when we read a string in then output it back to a string the two strings
Expand All @@ -49,7 +55,6 @@ fn main() -> Result<(), Box<dyn Error>> {
Ok(())
}


// each nested type of LogLine should define its own value_test function
// that asserts values are within an expected set of values when possible.
trait ValueTest {
Expand All @@ -68,11 +73,11 @@ struct LogLine {
level: String,
invocation_id: String,
thread_name: String,
data: serde_json::Value, // TODO be more specific
data: serde_json::Value, // TODO be more specific
}

impl ValueTest for LogLine {
fn value_test(&self){
fn value_test(&self) {
assert_eq!(
self.log_version, 2,
"The log version changed. Be sure this was intentional."
Expand All @@ -99,7 +104,7 @@ impl ValueTest for LogLine {
// this requires handling the date with "%Y-%m-%dT%H:%M:%S%.6f" which requires this
// boilerplate-looking module.
mod custom_date_format {
use chrono::{NaiveDateTime, DateTime, Utc};
use chrono::{DateTime, NaiveDateTime, Utc};
use serde::{self, Deserialize, Deserializer, Serializer};

const FORMAT: &'static str = "%Y-%m-%dT%H:%M:%S%.6fZ";
Expand All @@ -117,7 +122,10 @@ mod custom_date_format {
D: Deserializer<'de>,
{
let s = String::deserialize(deserializer)?;
Ok(DateTime::<Utc>::from_utc(NaiveDateTime::parse_from_str(&s, FORMAT).map_err(serde::de::Error::custom)?, Utc))
Ok(DateTime::<Utc>::from_utc(
NaiveDateTime::parse_from_str(&s, FORMAT).map_err(serde::de::Error::custom)?,
Utc,
))
}
}

Expand All @@ -134,16 +142,26 @@ fn get_input(path: &str, file_name: &str) -> Result<Vec<String>, String> {
let f_name = e.file_name().to_string_lossy();
if f_name.ends_with(file_name) {
let contents = File::open(e.path())
.map_err(|e| format!("Something went wrong opening the log file {}\n{}", f_name, e))
.map_err(|e| {
format!(
"Something went wrong opening the log file {}\n{}",
f_name, e
)
})
.and_then(|file| {
io::BufReader::new(file)
.lines()
.map(|l| {
l.map_err(|e| format!("Something went wrong reading lines of the log file {}\n{}", f_name, e))
l.map_err(|e| {
format!(
"Something went wrong reading lines of the log file {}\n{}",
f_name, e
)
})
})
.collect::<Result<Vec<String>, String>>()
});

Some(contents)
} else {
None
Expand All @@ -166,69 +184,91 @@ fn deserialized_input(log_lines: &[String]) -> serde_json::Result<Vec<LogLine>>
.collect()
}

// turn a String into a LogLine and back into a String returning both Strings so
// they can be compared
fn deserialize_serialize_loop(
log_lines: &[String],
) -> serde_json::Result<Vec<(String, String)>> {
log_lines
.into_iter()
.map(|log_line| {
serde_json::from_str::<LogLine>(log_line).and_then(|parsed| {
serde_json::to_string(&parsed).map(|json| (log_line.clone(), json))
})
})
.collect()
// Take a json string, deserialize it to a known value, and a generic json value.
// return the two json values so they can be compared.
// this helps to check if the deserialize-serialize loop for the known value drops necessary information.
//
// This function is used as a helper to check every json logline that dbt outputs for expected values.
// In practice, this often returns an Err value if the logs have unexpected non-json values such as logged
// exceptions, warnings, or printed statements.
fn deserialize_twice<'a, T: Serialize + Deserialize<'a>>(
json_str: &'a str,
) -> Result<(serde_json::Value, serde_json::Value), String> {
// deserialize the string into a JSON value with no knowledge of T's structure
let deserialized_json =
serde_json::from_str::<serde_json::Value>(json_str).map_err(|_| json_str)?;

// deserialize the string into a T
let deserialized_t = serde_json::from_str::<'a, T>(json_str).map_err(|_| json_str)?;

// serialize the T value into a string again
let serialized_t = serde_json::to_string(&deserialized_t).map_err(|_| json_str)?;

// deserialize the string into a JSON value
let deserialized_t_json =
serde_json::from_str::<serde_json::Value>(&serialized_t).map_err(|_| json_str)?;

Ok((deserialized_json, deserialized_t_json))
}

// This is only used to make more useful error messages and avoid assert statements that say that two massive
// json objects are not the same. this will dig into the values to find an inner json value where they differ.
fn compare_json(x: &serde_json::Value, y: &serde_json::Value) -> Result<(), serde_json::Value> {
if x == y {
return Ok(());
}

match (x, y) {
// check for object key mismatches
(serde_json::Value::Object(mx), serde_json::Value::Object(my)) => {
// check if the second map is missing keys from the first
for (xk, xv) in mx {
if !my.contains_key(xk) {
let mut m_err = serde_json::Map::new();
m_err.insert(xk.clone(), xv.clone());
return Err(serde_json::Value::Object(m_err));
}
}

// check if the first map is missing keys from the second
for (yk, yv) in my {
if !mx.contains_key(yk) {
let mut m_err = serde_json::Map::new();
m_err.insert(yk.clone(), yv.clone());
return Err(serde_json::Value::Object(m_err));
}
}

// all the keys are the same, so compare each value recursively.
for k in mx.keys() {
// unwrapping (which panics) since we know all these keys exist in both maps.
let xv = mx.get(k).unwrap();
let yv = my.get(k).unwrap();

compare_json(xv, yv)?;
}

// only reaches here if all the keys are the same and all of the values are the same
// and the top-level equality check on the map failed.
Ok(())
}

// must be a non-object json value. since there are no keys to specify, return the first value.
(x, _) => Err(x.clone()),
}
}

// make sure when we read a string in then output it back to a string the two strings
// contain all the same key-value pairs.
fn test_deserialize_serialize_is_unchanged(lines: &[String]) {
let objects: Result<Vec<(serde_json::Value, serde_json::Value)>, serde_json::Error> =
deserialize_serialize_loop(lines).and_then(|v| {
v.into_iter()
.map(|(s0, s1)| {
serde_json::from_str::<serde_json::Value>(&s0).and_then(|s0v| {
serde_json::from_str::<serde_json::Value>(&s1).map(|s1v| (s0v, s1v))
})
})
.collect()
});

match objects {
Err(e) => assert!(false, "{}", e),
Ok(v) => {
for pair in v {
match pair {
(
serde_json::Value::Object(original),
serde_json::Value::Object(looped),
) => {
// looping through each key of each json value gives us meaningful failure messages
// instead of "this big string" != "this other big string"
for (key, value) in original.clone() {
let looped_val = looped.get(&key);
assert_eq!(
looped_val,
Some(&value),
"original key value ({}, {}) expected in re-serialized result",
key,
value
)
}
for (key, value) in looped.clone() {
let original_val = original.get(&key);
assert_eq!(
original_val,
Some(&value),
"looped key value ({}, {}) not found in original result",
key,
value
)
}
}
_ => assert!(false, "not comparing json objects"),
}
for line in lines {
match deserialize_twice::<LogLine>(line) {
// error if there are not two values to compare
Err(log_line) => assert!(false, "Logline cannot be deserialized into a json LogLine twice for value comparison:\n{}\n", log_line),
// if there are two values to compare, assert they are the same or find the key-value pair where they differ.
Ok((x, y)) => match compare_json(&x, &y) {
Err(json_value) => assert!(false, "LogLine values were inconsistent.\nSpecific difference:\n{}\nWhole log line:\n{}", json_value, line),
Ok(()) => ()
}
}
}
Expand All @@ -247,9 +287,9 @@ mod tests {

#[test]
fn test_values() {
assert!(deserialized_input(&[LOG_LINE.to_owned()]).map(|v| {
v.into_iter().map(|ll| ll.value_test())
}).is_ok())
assert!(deserialized_input(&[LOG_LINE.to_owned()])
.map(|v| { v.into_iter().map(|ll| ll.value_test()) })
.is_ok())
}

#[test]
Expand Down
Loading