Merge pull request #70 from jonhoo/log

Interval log support
HdrHistogram · Nov 6, 2017 · 0b88329 · 0b88329
2 parents 0290f68 + cf9ec24
commit 0b88329
Show file tree

Hide file tree

Showing 24 changed files with 1,956 additions and 270 deletions.
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,4 @@ Cargo.lock
 
 .criterion
 perf.data*
+/tmp
diff --git a/Cargo.toml b/Cargo.toml
@@ -24,13 +24,15 @@ travis-ci = { repository = "jonhoo/hdrsample" }
 
 [features]
 bench_private = [] # for enabling nightly-only feature(test) on the main crate to allow benchmarking private code
-serialization = [ "flate2" ]
+serialization = [ "flate2", "nom", "base64" ]
 default = [ "serialization" ]
 
 [dependencies]
 num-traits = "0.1"
 byteorder = "1.0.0"
 flate2 = { version = "0.2.17", optional = true }
+nom = { version = "^3.2.0", optional = true }
+base64 = { version = "0.7.0", optional = true }
 
 [dev-dependencies]
 rand = "0.3.15"

diff --git a/benches/interval_log.rs b/benches/interval_log.rs
@@ -0,0 +1,88 @@
+#![feature(test)]
+
+extern crate hdrsample;
+extern crate rand;
+extern crate test;
+
+use std::time;
+
+use hdrsample::*;
+use hdrsample::serialization;
+use hdrsample::serialization::interval_log;
+use test::Bencher;
+
+use self::rand_varint::*;
+
+#[path = "../src/serialization/rand_varint.rs"]
+mod rand_varint;
+
+#[bench]
+fn write_interval_log_1k_hist_10k_value(b: &mut Bencher) {
+    let mut log = Vec::new();
+    let mut histograms = Vec::new();
+    let mut rng = rand::weak_rng();
+
+    for _ in 0..1000 {
+        let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
+
+        for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
+            h.record(v).unwrap();
+        }
+
+        histograms.push(h);
+    }
+
+    let mut serializer = serialization::V2Serializer::new();
+
+    b.iter(|| {
+        log.clear();
+
+        let mut writer = interval_log::IntervalLogWriterBuilder::new()
+            .begin_log_with(&mut log, &mut serializer)
+            .unwrap();
+
+        let dur = time::Duration::new(5, 678_000_000);
+        for h in histograms.iter() {
+            writer
+                .write_histogram(h, time::Duration::new(1, 234_000_000), dur, None)
+                .unwrap();
+        }
+    })
+}
+
+#[bench]
+fn parse_interval_log_1k_hist_10k_value(b: &mut Bencher) {
+    let mut log = Vec::new();
+    let mut histograms = Vec::new();
+    let mut rng = rand::weak_rng();
+
+    for _ in 0..1000 {
+        let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
+
+        for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
+            h.record(v).unwrap();
+        }
+
+        histograms.push(h);
+    }
+
+    {
+        let mut serializer = serialization::V2Serializer::new();
+        let mut writer = interval_log::IntervalLogWriterBuilder::new()
+            .begin_log_with(&mut log, &mut serializer)
+            .unwrap();
+
+        let dur = time::Duration::new(5, 678_000_000);
+        for h in histograms.iter() {
+            writer
+                .write_histogram(h, time::Duration::new(1, 234_000_000), dur, None)
+                .unwrap();
+        }
+    }
+
+    b.iter(|| {
+        let iter = interval_log::IntervalLogIterator::new(&log);
+
+        assert_eq!(1000, iter.count());
+    })
+}
diff --git a/benches/record.rs b/benches/record.rs
@@ -5,22 +5,24 @@ extern crate rand;
 extern crate test;
 
 use hdrsample::*;
-use self::rand::Rng;
 use self::test::Bencher;
 
+use self::rand_varint::*;
+
+#[path = "../src/serialization/rand_varint.rs"]
+mod rand_varint;
+
 #[bench]
 fn record_precalc_random_values_with_1_count_u64(b: &mut Bencher) {
     let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
     let mut indices = Vec::<u64>::new();
-    // TODO improve this and similar benchmarks to use a non-uniform distribution (like that used
-    // in serialization tests) so we're not always recording in the top few buckets
     let mut rng = rand::weak_rng();
 
     // same value approach as record_precalc_random_values_with_max_count_u64 so that they are
     // comparable
 
-    for _ in 0..1000_000 {
-        indices.push(rng.gen());
+    for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
+        indices.push(v);
     }
 
     b.iter(|| {
@@ -39,10 +41,9 @@ fn record_precalc_random_values_with_max_count_u64(b: &mut Bencher) {
 
     // store values in an array and re-use so we can be sure to hit the overflow case
 
-    for _ in 0..1000_000 {
-        let r = rng.gen();
-        indices.push(r);
-        h.record_n(r, u64::max_value()).unwrap();
+    for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
+        indices.push(v);
+        h.record_n(v, u64::max_value()).unwrap();
     }
 
     b.iter(|| {
@@ -59,8 +60,8 @@ fn record_correct_precalc_random_values_with_1_count_u64(b: &mut Bencher) {
     let mut indices = Vec::<u64>::new();
     let mut rng = rand::weak_rng();
 
-    for _ in 0..10_000 {
-        indices.push(rng.gen());
+    for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
+        indices.push(v);
     }
 
     b.iter(|| {
@@ -79,8 +80,10 @@ fn record_random_values_with_1_count_u64(b: &mut Bencher) {
     // This should be *slower* than the benchmarks above where we pre-calculate the values
     // outside of the hot loop. If it isn't, then those measurements are likely spurious.
 
-    b.iter(|| for _ in 0..1000_000 {
-        h.record(rng.gen()).unwrap()
+    b.iter(|| {
+        for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
+            h.record(v).unwrap()
+        }
     })
 }
 
@@ -136,11 +139,10 @@ fn do_subtract_benchmark<F: Fn() -> Histogram<u64>>(
     for _ in 0..1000 {
         let mut h = addend_factory();
 
-        for _ in 0..1000 {
-            let r = rng.gen();
-            h.record_n(r, count_at_each_addend_value).unwrap();
+        for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000) {
+            h.record_n(v, count_at_each_addend_value).unwrap();
             // ensure there's a count to subtract from
-            accum.record_n(r, count_at_each_addend_value).unwrap();
+            accum.record_n(v, count_at_each_addend_value).unwrap();
         }
 
         subtrahends.push(h);
@@ -166,15 +168,16 @@ fn do_add_benchmark<F: Fn() -> Histogram<u64>>(
     for _ in 0..1000 {
         let mut h = addend_factory();
 
-        for _ in 0..1000 {
-            let r = rng.gen();
-            h.record_n(r, count_at_each_addend_value).unwrap();
+        for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000) {
+            h.record_n(v, count_at_each_addend_value).unwrap();
         }
 
         addends.push(h);
     }
 
-    b.iter(|| for h in addends.iter() {
-        accum.add(h).unwrap();
+    b.iter(|| {
+        for h in addends.iter() {
+            accum.add(h).unwrap();
+        }
     })
 }
diff --git a/benches/serialization.rs b/benches/serialization.rs
@@ -6,11 +6,13 @@ extern crate test;
 
 use hdrsample::*;
 use hdrsample::serialization::*;
-use self::rand::distributions::range::Range;
-use self::rand::distributions::IndependentSample;
 use self::test::Bencher;
-use std::io::{Cursor, Write};
-use std::fmt::Debug;
+use std::io::Cursor;
+
+use self::rand_varint::*;
+
+#[path = "../src/serialization/rand_varint.rs"]
+mod rand_varint;
 
 #[bench]
 fn serialize_tiny_dense_v2(b: &mut Bencher) {
@@ -168,17 +170,18 @@ fn do_serialize_bench<S>(
     digits: u8,
     fraction_of_counts_len: f64,
 ) where
-    S: TestOnlyHypotheticalSerializerInterface,
+    S: Serializer,
 {
     let mut h = Histogram::<u64>::new_with_bounds(low, high, digits).unwrap();
     let random_counts = (fraction_of_counts_len * h.distinct_values() as f64) as usize;
     let mut vec = Vec::with_capacity(random_counts);
 
-    let range = Range::new(low, high);
-
     let mut rng = rand::weak_rng();
-    for _ in 0..random_counts {
-        h.record(range.ind_sample(&mut rng)).unwrap();
+    for v in RandomVarintEncodedLengthIter::new(&mut rng)
+        .filter(|v| v >= &low && v <= &high)
+        .take(random_counts)
+    {
+        h.record(v).unwrap();
     }
 
     b.iter(|| {
@@ -196,17 +199,18 @@ fn do_deserialize_bench<S>(
     digits: u8,
     fraction_of_counts_len: f64,
 ) where
-    S: TestOnlyHypotheticalSerializerInterface,
+    S: Serializer,
 {
     let mut h = Histogram::<u64>::new_with_bounds(low, high, digits).unwrap();
     let random_counts = (fraction_of_counts_len * h.distinct_values() as f64) as usize;
     let mut vec = Vec::with_capacity(random_counts);
 
-    let range = Range::new(low, high);
-
     let mut rng = rand::weak_rng();
-    for _ in 0..random_counts {
-        h.record(range.ind_sample(&mut rng)).unwrap();
+    for v in RandomVarintEncodedLengthIter::new(&mut rng)
+        .filter(|v| v >= &low && v <= &high)
+        .take(random_counts)
+    {
+        h.record(v).unwrap();
     }
 
     let _ = s.serialize(&h, &mut vec).unwrap();
@@ -217,5 +221,3 @@ fn do_deserialize_bench<S>(
         let _: Histogram<u64> = d.deserialize(&mut cursor).unwrap();
     });
 }
-
-include!("../src/serialization/test_serialize_trait.rs");
diff --git a/examples/cli.rs b/examples/cli.rs
@@ -9,8 +9,9 @@ use std::fmt::Display;
 use clap::{App, Arg, SubCommand};
 
 use hdrsample::{Histogram, RecordError};
-use hdrsample::serialization::{DeserializeError, Deserializer, V2DeflateSerializeError,
-                               V2DeflateSerializer, V2SerializeError, V2Serializer};
+use hdrsample::serialization::{DeserializeError, Deserializer, Serializer,
+                               V2DeflateSerializeError, V2DeflateSerializer, V2SerializeError,
+                               V2Serializer};
 
 fn main() {
     let default_max = format!("{}", u64::max_value());

diff --git a/src/errors/mod.rs b/src/errors/mod.rs
@@ -1,4 +1,4 @@
-//! Errors types used throughout this library
+//! Error types used throughout this library
 
 /// Errors that can occur when creating a histogram.
 #[derive(Debug, Eq, PartialEq, Clone, Copy)]

diff --git a/src/iterators/linear.rs b/src/iterators/linear.rs
@@ -22,16 +22,16 @@ impl<'a, T: 'a + Counter> Iter<'a, T> {
             value_units_per_bucket > 0,
             "value_units_per_bucket must be > 0"
         );
+
+        let new_lowest = hist.lowest_equivalent(value_units_per_bucket - 1);
         HistogramIterator::new(
             hist,
             Iter {
                 hist,
                 value_units_per_bucket,
                 // won't underflow because value_units_per_bucket > 0
                 current_step_highest_value_reporting_level: value_units_per_bucket - 1,
-                current_step_lowest_value_reporting_level: hist.lowest_equivalent(
-                    value_units_per_bucket - 1,
-                ),
+                current_step_lowest_value_reporting_level: new_lowest,
             },
         )
     }

diff --git a/src/iterators/log.rs b/src/iterators/log.rs
@@ -27,16 +27,16 @@ impl<'a, T: 'a + Counter> Iter<'a, T> {
             "value_units_per_bucket must be > 0"
         );
         assert!(log_base > 1.0, "log_base must be > 1.0");
+
+        let new_lowest = hist.lowest_equivalent(value_units_in_first_bucket - 1);
         HistogramIterator::new(
             hist,
             Iter {
                 hist,
                 log_base,
                 next_value_reporting_level: value_units_in_first_bucket as f64,
                 current_step_highest_value_reporting_level: value_units_in_first_bucket - 1,
-                current_step_lowest_value_reporting_level: hist.lowest_equivalent(
-                    value_units_in_first_bucket - 1,
-                ),
+                current_step_lowest_value_reporting_level: new_lowest,
             },
         )
     }

diff --git a/src/lib.rs b/src/lib.rs
@@ -94,7 +94,7 @@
 //! use hdrsample::Histogram;
 //! let hist = Histogram::<u64>::new(2).unwrap();
 //! // ...
-//! println!("# of samples: {}", hist.count());
+//! println!("# of samples: {}", hist.len());
 //! println!("99.9'th percentile: {}", hist.value_at_quantile(0.999));
 //! ```
 //!
@@ -170,7 +170,6 @@
 //!  - `DoubleHistogram`.
 //!  - The `Recorder` feature of HdrHistogram.
 //!  - Value shifting ("normalization").
-//!  - Timestamps and tags.
 //!  - Textual output methods. These seem almost orthogonal to HdrSample, though it might be
 //!    convenient if we implemented some relevant traits (CSV, JSON, and possibly simple
 //!    `fmt::Display`).
@@ -189,6 +188,10 @@
 
 extern crate num_traits as num;
 
+#[cfg(feature = "serialization")]
+#[macro_use]
+extern crate nom;
+
 use std::borrow::Borrow;
 use std::cmp;
 use std::ops::{AddAssign, SubAssign};
@@ -777,7 +780,8 @@ impl<T: Counter> Histogram<T> {
         };
 
         // Already checked that high >= 2*low
-        h.resize(high).map_err(|_| CreationError::UsizeTypeTooSmall)?;
+        h.resize(high)
+            .map_err(|_| CreationError::UsizeTypeTooSmall)?;
         Ok(h)
     }
 
@@ -1748,9 +1752,6 @@ where
 
 // TODO: shift
 // TODO: hash
-// TODO: encoding/decoding
-// TODO: timestamps and tags
-// TODO: textual output
 
 #[path = "tests/tests.rs"]
 #[cfg(test)]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,3 +12,4 @@ Cargo.lock

		.criterion
		perf.data*
		/tmp