Skip to content

Commit

Permalink
Merge pull request #70 from jonhoo/log
Browse files Browse the repository at this point in the history
Interval log support
  • Loading branch information
jonhoo authored Nov 6, 2017
2 parents 0290f68 + cf9ec24 commit 0b88329
Show file tree
Hide file tree
Showing 24 changed files with 1,956 additions and 270 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ Cargo.lock

.criterion
perf.data*
/tmp
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ travis-ci = { repository = "jonhoo/hdrsample" }

[features]
bench_private = [] # for enabling nightly-only feature(test) on the main crate to allow benchmarking private code
serialization = [ "flate2" ]
serialization = [ "flate2", "nom", "base64" ]
default = [ "serialization" ]

[dependencies]
num-traits = "0.1"
byteorder = "1.0.0"
flate2 = { version = "0.2.17", optional = true }
nom = { version = "^3.2.0", optional = true }
base64 = { version = "0.7.0", optional = true }

[dev-dependencies]
rand = "0.3.15"
Expand Down
88 changes: 88 additions & 0 deletions benches/interval_log.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#![feature(test)]

extern crate hdrsample;
extern crate rand;
extern crate test;

use std::time;

use hdrsample::*;
use hdrsample::serialization;
use hdrsample::serialization::interval_log;
use test::Bencher;

use self::rand_varint::*;

#[path = "../src/serialization/rand_varint.rs"]
mod rand_varint;

#[bench]
fn write_interval_log_1k_hist_10k_value(b: &mut Bencher) {
let mut log = Vec::new();
let mut histograms = Vec::new();
let mut rng = rand::weak_rng();

for _ in 0..1000 {
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();

for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
h.record(v).unwrap();
}

histograms.push(h);
}

let mut serializer = serialization::V2Serializer::new();

b.iter(|| {
log.clear();

let mut writer = interval_log::IntervalLogWriterBuilder::new()
.begin_log_with(&mut log, &mut serializer)
.unwrap();

let dur = time::Duration::new(5, 678_000_000);
for h in histograms.iter() {
writer
.write_histogram(h, time::Duration::new(1, 234_000_000), dur, None)
.unwrap();
}
})
}

#[bench]
fn parse_interval_log_1k_hist_10k_value(b: &mut Bencher) {
let mut log = Vec::new();
let mut histograms = Vec::new();
let mut rng = rand::weak_rng();

for _ in 0..1000 {
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();

for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
h.record(v).unwrap();
}

histograms.push(h);
}

{
let mut serializer = serialization::V2Serializer::new();
let mut writer = interval_log::IntervalLogWriterBuilder::new()
.begin_log_with(&mut log, &mut serializer)
.unwrap();

let dur = time::Duration::new(5, 678_000_000);
for h in histograms.iter() {
writer
.write_histogram(h, time::Duration::new(1, 234_000_000), dur, None)
.unwrap();
}
}

b.iter(|| {
let iter = interval_log::IntervalLogIterator::new(&log);

assert_eq!(1000, iter.count());
})
}
47 changes: 25 additions & 22 deletions benches/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,24 @@ extern crate rand;
extern crate test;

use hdrsample::*;
use self::rand::Rng;
use self::test::Bencher;

use self::rand_varint::*;

#[path = "../src/serialization/rand_varint.rs"]
mod rand_varint;

#[bench]
fn record_precalc_random_values_with_1_count_u64(b: &mut Bencher) {
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
let mut indices = Vec::<u64>::new();
// TODO improve this and similar benchmarks to use a non-uniform distribution (like that used
// in serialization tests) so we're not always recording in the top few buckets
let mut rng = rand::weak_rng();

// same value approach as record_precalc_random_values_with_max_count_u64 so that they are
// comparable

for _ in 0..1000_000 {
indices.push(rng.gen());
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
indices.push(v);
}

b.iter(|| {
Expand All @@ -39,10 +41,9 @@ fn record_precalc_random_values_with_max_count_u64(b: &mut Bencher) {

// store values in an array and re-use so we can be sure to hit the overflow case

for _ in 0..1000_000 {
let r = rng.gen();
indices.push(r);
h.record_n(r, u64::max_value()).unwrap();
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
indices.push(v);
h.record_n(v, u64::max_value()).unwrap();
}

b.iter(|| {
Expand All @@ -59,8 +60,8 @@ fn record_correct_precalc_random_values_with_1_count_u64(b: &mut Bencher) {
let mut indices = Vec::<u64>::new();
let mut rng = rand::weak_rng();

for _ in 0..10_000 {
indices.push(rng.gen());
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
indices.push(v);
}

b.iter(|| {
Expand All @@ -79,8 +80,10 @@ fn record_random_values_with_1_count_u64(b: &mut Bencher) {
// This should be *slower* than the benchmarks above where we pre-calculate the values
// outside of the hot loop. If it isn't, then those measurements are likely spurious.

b.iter(|| for _ in 0..1000_000 {
h.record(rng.gen()).unwrap()
b.iter(|| {
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
h.record(v).unwrap()
}
})
}

Expand Down Expand Up @@ -136,11 +139,10 @@ fn do_subtract_benchmark<F: Fn() -> Histogram<u64>>(
for _ in 0..1000 {
let mut h = addend_factory();

for _ in 0..1000 {
let r = rng.gen();
h.record_n(r, count_at_each_addend_value).unwrap();
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000) {
h.record_n(v, count_at_each_addend_value).unwrap();
// ensure there's a count to subtract from
accum.record_n(r, count_at_each_addend_value).unwrap();
accum.record_n(v, count_at_each_addend_value).unwrap();
}

subtrahends.push(h);
Expand All @@ -166,15 +168,16 @@ fn do_add_benchmark<F: Fn() -> Histogram<u64>>(
for _ in 0..1000 {
let mut h = addend_factory();

for _ in 0..1000 {
let r = rng.gen();
h.record_n(r, count_at_each_addend_value).unwrap();
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000) {
h.record_n(v, count_at_each_addend_value).unwrap();
}

addends.push(h);
}

b.iter(|| for h in addends.iter() {
accum.add(h).unwrap();
b.iter(|| {
for h in addends.iter() {
accum.add(h).unwrap();
}
})
}
34 changes: 18 additions & 16 deletions benches/serialization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ extern crate test;

use hdrsample::*;
use hdrsample::serialization::*;
use self::rand::distributions::range::Range;
use self::rand::distributions::IndependentSample;
use self::test::Bencher;
use std::io::{Cursor, Write};
use std::fmt::Debug;
use std::io::Cursor;

use self::rand_varint::*;

#[path = "../src/serialization/rand_varint.rs"]
mod rand_varint;

#[bench]
fn serialize_tiny_dense_v2(b: &mut Bencher) {
Expand Down Expand Up @@ -168,17 +170,18 @@ fn do_serialize_bench<S>(
digits: u8,
fraction_of_counts_len: f64,
) where
S: TestOnlyHypotheticalSerializerInterface,
S: Serializer,
{
let mut h = Histogram::<u64>::new_with_bounds(low, high, digits).unwrap();
let random_counts = (fraction_of_counts_len * h.distinct_values() as f64) as usize;
let mut vec = Vec::with_capacity(random_counts);

let range = Range::new(low, high);

let mut rng = rand::weak_rng();
for _ in 0..random_counts {
h.record(range.ind_sample(&mut rng)).unwrap();
for v in RandomVarintEncodedLengthIter::new(&mut rng)
.filter(|v| v >= &low && v <= &high)
.take(random_counts)
{
h.record(v).unwrap();
}

b.iter(|| {
Expand All @@ -196,17 +199,18 @@ fn do_deserialize_bench<S>(
digits: u8,
fraction_of_counts_len: f64,
) where
S: TestOnlyHypotheticalSerializerInterface,
S: Serializer,
{
let mut h = Histogram::<u64>::new_with_bounds(low, high, digits).unwrap();
let random_counts = (fraction_of_counts_len * h.distinct_values() as f64) as usize;
let mut vec = Vec::with_capacity(random_counts);

let range = Range::new(low, high);

let mut rng = rand::weak_rng();
for _ in 0..random_counts {
h.record(range.ind_sample(&mut rng)).unwrap();
for v in RandomVarintEncodedLengthIter::new(&mut rng)
.filter(|v| v >= &low && v <= &high)
.take(random_counts)
{
h.record(v).unwrap();
}

let _ = s.serialize(&h, &mut vec).unwrap();
Expand All @@ -217,5 +221,3 @@ fn do_deserialize_bench<S>(
let _: Histogram<u64> = d.deserialize(&mut cursor).unwrap();
});
}

include!("../src/serialization/test_serialize_trait.rs");
5 changes: 3 additions & 2 deletions examples/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ use std::fmt::Display;
use clap::{App, Arg, SubCommand};

use hdrsample::{Histogram, RecordError};
use hdrsample::serialization::{DeserializeError, Deserializer, V2DeflateSerializeError,
V2DeflateSerializer, V2SerializeError, V2Serializer};
use hdrsample::serialization::{DeserializeError, Deserializer, Serializer,
V2DeflateSerializeError, V2DeflateSerializer, V2SerializeError,
V2Serializer};

fn main() {
let default_max = format!("{}", u64::max_value());
Expand Down
2 changes: 1 addition & 1 deletion src/errors/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! Errors types used throughout this library
//! Error types used throughout this library
/// Errors that can occur when creating a histogram.
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
Expand Down
6 changes: 3 additions & 3 deletions src/iterators/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ impl<'a, T: 'a + Counter> Iter<'a, T> {
value_units_per_bucket > 0,
"value_units_per_bucket must be > 0"
);

let new_lowest = hist.lowest_equivalent(value_units_per_bucket - 1);
HistogramIterator::new(
hist,
Iter {
hist,
value_units_per_bucket,
// won't underflow because value_units_per_bucket > 0
current_step_highest_value_reporting_level: value_units_per_bucket - 1,
current_step_lowest_value_reporting_level: hist.lowest_equivalent(
value_units_per_bucket - 1,
),
current_step_lowest_value_reporting_level: new_lowest,
},
)
}
Expand Down
6 changes: 3 additions & 3 deletions src/iterators/log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@ impl<'a, T: 'a + Counter> Iter<'a, T> {
"value_units_per_bucket must be > 0"
);
assert!(log_base > 1.0, "log_base must be > 1.0");

let new_lowest = hist.lowest_equivalent(value_units_in_first_bucket - 1);
HistogramIterator::new(
hist,
Iter {
hist,
log_base,
next_value_reporting_level: value_units_in_first_bucket as f64,
current_step_highest_value_reporting_level: value_units_in_first_bucket - 1,
current_step_lowest_value_reporting_level: hist.lowest_equivalent(
value_units_in_first_bucket - 1,
),
current_step_lowest_value_reporting_level: new_lowest,
},
)
}
Expand Down
13 changes: 7 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
//! use hdrsample::Histogram;
//! let hist = Histogram::<u64>::new(2).unwrap();
//! // ...
//! println!("# of samples: {}", hist.count());
//! println!("# of samples: {}", hist.len());
//! println!("99.9'th percentile: {}", hist.value_at_quantile(0.999));
//! ```
//!
Expand Down Expand Up @@ -170,7 +170,6 @@
//! - `DoubleHistogram`.
//! - The `Recorder` feature of HdrHistogram.
//! - Value shifting ("normalization").
//! - Timestamps and tags.
//! - Textual output methods. These seem almost orthogonal to HdrSample, though it might be
//! convenient if we implemented some relevant traits (CSV, JSON, and possibly simple
//! `fmt::Display`).
Expand All @@ -189,6 +188,10 @@

extern crate num_traits as num;

#[cfg(feature = "serialization")]
#[macro_use]
extern crate nom;

use std::borrow::Borrow;
use std::cmp;
use std::ops::{AddAssign, SubAssign};
Expand Down Expand Up @@ -777,7 +780,8 @@ impl<T: Counter> Histogram<T> {
};

// Already checked that high >= 2*low
h.resize(high).map_err(|_| CreationError::UsizeTypeTooSmall)?;
h.resize(high)
.map_err(|_| CreationError::UsizeTypeTooSmall)?;
Ok(h)
}

Expand Down Expand Up @@ -1748,9 +1752,6 @@ where

// TODO: shift
// TODO: hash
// TODO: encoding/decoding
// TODO: timestamps and tags
// TODO: textual output

#[path = "tests/tests.rs"]
#[cfg(test)]
Expand Down
Loading

0 comments on commit 0b88329

Please sign in to comment.