From 0c13b8427039d95332f1b22c9a436ed392cb67f5 Mon Sep 17 00:00:00 2001 From: Mitsuhiro Nakamura Date: Sat, 29 May 2021 02:04:55 +0900 Subject: [PATCH 1/2] cmd/stats: add tests for --quartiles flag --- tests/test_stats.rs | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/tests/test_stats.rs b/tests/test_stats.rs index b63b396e..37016d49 100644 --- a/tests/test_stats.rs +++ b/tests/test_stats.rs @@ -91,15 +91,31 @@ fn setup(name: S, rows: &[&str], headers: bool, fn get_field_value(wrk: &Workdir, cmd: &mut process::Command, field: &str) -> String { if field == "median" { cmd.arg("--median"); } + if field == "quartiles" { cmd.arg("--quartiles"); } if field == "cardinality" { cmd.arg("--cardinality"); } if field == "mode" { cmd.arg("--mode"); } let mut rows: Vec> = wrk.read_stdout(cmd); let headers = rows.remove(0); + let mut sequence: Vec<&str> = vec![]; for row in rows.iter() { for (h, val) in headers.iter().zip(row.iter()) { - if &**h == field { - return val.clone(); + match field { + "quartiles" => { + match &**h { + "q1" | "q2" => { + sequence.push(val); + }, + "q3" => { + sequence.push(val); + return sequence.join(",").clone(); + }, + _ => {}, + } + }, + _ => { + if &**h == field { return val.clone(); } + }, } } } @@ -127,16 +143,19 @@ stats_tests!(stats_infer_null_int_float_unicode, "type", stats_tests!(stats_no_mean, "mean", &["a"], ""); stats_tests!(stats_no_stddev, "stddev", &["a"], ""); stats_tests!(stats_no_median, "median", &["a"], ""); +stats_tests!(stats_no_quartiles, "quartiles", &["a"], ",,"); stats_tests!(stats_no_mode, "mode", &["a", "b"], "N/A"); stats_tests!(stats_null_mean, "mean", &[""], ""); stats_tests!(stats_null_stddev, "stddev", &[""], ""); stats_tests!(stats_null_median, "median", &[""], ""); +stats_tests!(stats_null_quartiles, "quartiles", &[""], ",,"); stats_tests!(stats_null_mode, "mode", &[""], "N/A"); stats_tests!(stats_includenulls_null_mean, "mean", &[""], "", true); stats_tests!(stats_includenulls_null_stddev, "stddev", &[""], "", true); stats_tests!(stats_includenulls_null_median, "median", &[""], "", true); +stats_tests!(stats_includenulls_null_quartiles, "quartiles", &[""], ",,", true); stats_tests!(stats_includenulls_null_mode, "mode", &[""], "N/A", true); stats_tests!(stats_includenulls_mean, @@ -180,6 +199,11 @@ stats_tests!(stats_median_even, "median", &["1", "2", "3", "4"], "2.5"); stats_tests!(stats_median_even_null, "median", &["", "1", "2", "3", "4"], "2.5"); stats_tests!(stats_median_mix, "median", &["1", "2.5", "3"], "2.5"); +stats_tests!(stats_quartiles, "quartiles", &["1", "2", "3"], "1,2,3"); +stats_tests!(stats_quartiles_null, "quartiles", &["", "1", "2", "3"], "1,2,3"); +stats_tests!(stats_quartiles_even, "quartiles", &["1", "2", "3", "4"], "1.5,2.5,3.5"); +stats_tests!(stats_quartiles_even_null, "quartiles", &["", "1", "2", "3", "4"], "1.5,2.5,3.5"); +stats_tests!(stats_quartiles_mix, "quartiles", &["1", "2.0", "3", "4"], "1.5,2.5,3.5"); mod stats_infer_nothing { // Only test CSV data with headers. @@ -208,6 +232,11 @@ mod stats_zero_median { stats_test_headers!(stats_zero_median, "median", &[], ""); } +mod stats_zero_quartiles { + use super::test_stats; + stats_test_headers!(stats_zero_quartiles, "quartiles", &[], ",,"); +} + mod stats_header_fields { use super::test_stats; stats_test_headers!(stats_header_field_name, "field", &["a"], "header"); From 040e1985a0b7971f396d555a2e730a2ac4bd72f8 Mon Sep 17 00:00:00 2001 From: Mitsuhiro Nakamura Date: Sat, 29 May 2021 01:47:39 +0900 Subject: [PATCH 2/2] cmd/stats: add --quartiles flag See BurntSushi/xsv/issues/87 --- src/cmd/stats.rs | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/src/cmd/stats.rs b/src/cmd/stats.rs index 64572a7b..02ad2270 100644 --- a/src/cmd/stats.rs +++ b/src/cmd/stats.rs @@ -47,6 +47,8 @@ stats options: This requires storing all CSV data in memory. --median Show the median. This requires storing all CSV data in memory. + --quartiles Show the quartiles. + This requires storing all CSV data in memory. --nulls Include NULLs in the population size for computing mean and standard deviation. -j, --jobs The number of jobs to run in parallel. @@ -75,6 +77,7 @@ struct Args { flag_mode: bool, flag_cardinality: bool, flag_median: bool, + flag_quartiles: bool, flag_nulls: bool, flag_jobs: usize, flag_output: Option, @@ -209,7 +212,8 @@ impl Args { range: true, dist: true, cardinality: self.flag_cardinality || self.flag_everything, - median: self.flag_median || self.flag_everything, + median: self.flag_median && !self.flag_quartiles && !self.flag_everything, + quartiles: self.flag_quartiles || self.flag_everything, mode: self.flag_mode || self.flag_everything, })).take(record_len).collect() } @@ -220,7 +224,10 @@ impl Args { "mean", "stddev", ]; let all = self.flag_everything; - if self.flag_median || all { fields.push("median"); } + if self.flag_median && !self.flag_quartiles && !all { fields.push("median"); } + if self.flag_quartiles || all { + fields.push("q1"); fields.push("q2"); fields.push("q3"); + } if self.flag_mode || all { fields.push("mode"); } if self.flag_cardinality || all { fields.push("cardinality"); } csv::StringRecord::from(fields) @@ -235,6 +242,7 @@ struct WhichStats { dist: bool, cardinality: bool, median: bool, + quartiles: bool, mode: bool, } @@ -252,18 +260,20 @@ struct Stats { online: Option, mode: Option>>, median: Option>, + quartiles: Option>, which: WhichStats, } impl Stats { fn new(which: WhichStats) -> Stats { - let (mut sum, mut minmax, mut online, mut mode, mut median) = - (None, None, None, None, None); + let (mut sum, mut minmax, mut online, mut mode, mut median, mut quartiles) = + (None, None, None, None, None, None); if which.sum { sum = Some(Default::default()); } if which.range { minmax = Some(Default::default()); } if which.dist { online = Some(Default::default()); } if which.mode || which.cardinality { mode = Some(Default::default()); } if which.median { median = Some(Default::default()); } + if which.quartiles { quartiles = Some(Default::default()); } Stats { typ: Default::default(), sum: sum, @@ -271,6 +281,7 @@ impl Stats { online: online, mode: mode, median: median, + quartiles: quartiles, which: which, } } @@ -299,6 +310,7 @@ impl Stats { } else { let n = from_bytes::(sample).unwrap(); self.median.as_mut().map(|v| { v.add(n); }); + self.quartiles.as_mut().map(|v| { v.add(n); }); self.online.as_mut().map(|v| { v.add(n); }); } } @@ -343,6 +355,20 @@ impl Stats { } Some(v) => { pieces.push(v.to_string()); } } + match self.quartiles.as_mut().and_then(|v| v.quartiles()) { + None => { + if self.which.quartiles { + pieces.push(empty()); + pieces.push(empty()); + pieces.push(empty()); + } + } + Some((q1, q2, q3)) => { + pieces.push(q1.to_string()); + pieces.push(q2.to_string()); + pieces.push(q3.to_string()); + } + } match self.mode.as_mut() { None => { if self.which.mode { @@ -377,6 +403,7 @@ impl Commute for Stats { self.online.merge(other.online); self.mode.merge(other.mode); self.median.merge(other.median); + self.quartiles.merge(other.quartiles); self.which.merge(other.which); } }