Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interval log support #70

Merged
merged 18 commits into from
Nov 6, 2017
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ Cargo.lock

.criterion
perf.data*
/tmp
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ travis-ci = { repository = "jonhoo/hdrsample" }

[features]
bench_private = [] # for enabling nightly-only feature(test) on the main crate to allow benchmarking private code
serialization = [ "flate2" ]
serialization = [ "flate2", "nom", "base64" ]
default = [ "serialization" ]

[dependencies]
num-traits = "0.1"
byteorder = "1.0.0"
flate2 = { version = "0.2.17", optional = true }
nom = { version = "^3.2.0", optional = true }
base64 = { version = "0.7.0", optional = true }

[dev-dependencies]
rand = "0.3.15"
Expand Down
84 changes: 84 additions & 0 deletions benches/interval_log.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#![feature(test)]

extern crate hdrsample;
extern crate rand;
extern crate test;

use std::time;

use hdrsample::*;
use hdrsample::serialization;
use hdrsample::serialization::interval_log;
use test::Bencher;

use self::rand_varint::*;

#[path = "../src/serialization/rand_varint.rs"]
mod rand_varint;

#[bench]
fn write_interval_log_1k_hist_10k_value(b: &mut Bencher) {
let mut log = Vec::new();
let mut histograms = Vec::new();
let mut rng = rand::weak_rng();

for _ in 0..1000 {
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();

for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
h.record(v).unwrap();
}

histograms.push(h);
}

let mut serializer = serialization::V2Serializer::new();

b.iter(|| {
log.clear();

let mut writer = interval_log::IntervalLogWriterBuilder::new()
.build_with(&mut log, &mut serializer)
.unwrap();

let dur = time::Duration::new(5, 678_000_000);
for h in histograms.iter() {
writer.write_histogram(h, 1.234, dur, None).unwrap();
}
})
}

#[bench]
fn parse_interval_log_1k_hist_10k_value(b: &mut Bencher) {
let mut log = Vec::new();
let mut histograms = Vec::new();
let mut rng = rand::weak_rng();

for _ in 0..1000 {
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();

for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
h.record(v).unwrap();
}

histograms.push(h);
}

{
let mut serializer = serialization::V2Serializer::new();
let mut writer = interval_log::IntervalLogWriterBuilder::new()
.build_with(&mut log, &mut serializer)
.unwrap();

let dur = time::Duration::new(5, 678_000_000);
for h in histograms.iter() {
writer.write_histogram(h, 1.234, dur, None).unwrap();
}
}

b.iter(|| {
let iter = interval_log::IntervalLogIterator::new(&log);

assert_eq!(1000, iter.count());
})
}
41 changes: 21 additions & 20 deletions benches/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,24 @@ extern crate rand;
extern crate test;

use hdrsample::*;
use self::rand::Rng;
use self::test::Bencher;

use self::rand_varint::*;

#[path = "../src/serialization/rand_varint.rs"]
mod rand_varint;

#[bench]
fn record_precalc_random_values_with_1_count_u64(b: &mut Bencher) {
let mut h = Histogram::<u64>::new_with_bounds(1, u64::max_value(), 3).unwrap();
let mut indices = Vec::<u64>::new();
// TODO improve this and similar benchmarks to use a non-uniform distribution (like that used
// in serialization tests) so we're not always recording in the top few buckets
let mut rng = rand::weak_rng();

// same value approach as record_precalc_random_values_with_max_count_u64 so that they are
// comparable

for _ in 0..1000_000 {
indices.push(rng.gen());
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
indices.push(v);
}

b.iter(|| {
Expand All @@ -39,10 +41,9 @@ fn record_precalc_random_values_with_max_count_u64(b: &mut Bencher) {

// store values in an array and re-use so we can be sure to hit the overflow case

for _ in 0..1000_000 {
let r = rng.gen();
indices.push(r);
h.record_n(r, u64::max_value()).unwrap();
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
indices.push(v);
h.record_n(v, u64::max_value()).unwrap();
}

b.iter(|| {
Expand All @@ -59,8 +60,8 @@ fn record_correct_precalc_random_values_with_1_count_u64(b: &mut Bencher) {
let mut indices = Vec::<u64>::new();
let mut rng = rand::weak_rng();

for _ in 0..10_000 {
indices.push(rng.gen());
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(10_000) {
indices.push(v);
}

b.iter(|| {
Expand All @@ -79,8 +80,10 @@ fn record_random_values_with_1_count_u64(b: &mut Bencher) {
// This should be *slower* than the benchmarks above where we pre-calculate the values
// outside of the hot loop. If it isn't, then those measurements are likely spurious.

b.iter(|| for _ in 0..1000_000 {
h.record(rng.gen()).unwrap()
b.iter(|| {
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000_000) {
h.record(v).unwrap()
}
})
}

Expand Down Expand Up @@ -136,11 +139,10 @@ fn do_subtract_benchmark<F: Fn() -> Histogram<u64>>(
for _ in 0..1000 {
let mut h = addend_factory();

for _ in 0..1000 {
let r = rng.gen();
h.record_n(r, count_at_each_addend_value).unwrap();
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000) {
h.record_n(v, count_at_each_addend_value).unwrap();
// ensure there's a count to subtract from
accum.record_n(r, count_at_each_addend_value).unwrap();
accum.record_n(v, count_at_each_addend_value).unwrap();
}

subtrahends.push(h);
Expand All @@ -166,9 +168,8 @@ fn do_add_benchmark<F: Fn() -> Histogram<u64>>(
for _ in 0..1000 {
let mut h = addend_factory();

for _ in 0..1000 {
let r = rng.gen();
h.record_n(r, count_at_each_addend_value).unwrap();
for v in RandomVarintEncodedLengthIter::new(&mut rng).take(1_000) {
h.record_n(v, count_at_each_addend_value).unwrap();
}

addends.push(h);
Expand Down
34 changes: 18 additions & 16 deletions benches/serialization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ extern crate test;

use hdrsample::*;
use hdrsample::serialization::*;
use self::rand::distributions::range::Range;
use self::rand::distributions::IndependentSample;
use self::test::Bencher;
use std::io::{Cursor, Write};
use std::fmt::Debug;
use std::io::Cursor;

use self::rand_varint::*;

#[path = "../src/serialization/rand_varint.rs"]
mod rand_varint;

#[bench]
fn serialize_tiny_dense_v2(b: &mut Bencher) {
Expand Down Expand Up @@ -168,17 +170,18 @@ fn do_serialize_bench<S>(
digits: u8,
fraction_of_counts_len: f64,
) where
S: TestOnlyHypotheticalSerializerInterface,
S: Serializer,
{
let mut h = Histogram::<u64>::new_with_bounds(low, high, digits).unwrap();
let random_counts = (fraction_of_counts_len * h.distinct_values() as f64) as usize;
let mut vec = Vec::with_capacity(random_counts);

let range = Range::new(low, high);

let mut rng = rand::weak_rng();
for _ in 0..random_counts {
h.record(range.ind_sample(&mut rng)).unwrap();
for v in RandomVarintEncodedLengthIter::new(&mut rng)
.filter(|v| v >= &low && v <= &high)
.take(random_counts)
{
h.record(v).unwrap();
}

b.iter(|| {
Expand All @@ -196,17 +199,18 @@ fn do_deserialize_bench<S>(
digits: u8,
fraction_of_counts_len: f64,
) where
S: TestOnlyHypotheticalSerializerInterface,
S: Serializer,
{
let mut h = Histogram::<u64>::new_with_bounds(low, high, digits).unwrap();
let random_counts = (fraction_of_counts_len * h.distinct_values() as f64) as usize;
let mut vec = Vec::with_capacity(random_counts);

let range = Range::new(low, high);

let mut rng = rand::weak_rng();
for _ in 0..random_counts {
h.record(range.ind_sample(&mut rng)).unwrap();
for v in RandomVarintEncodedLengthIter::new(&mut rng)
.filter(|v| v >= &low && v <= &high)
.take(random_counts)
{
h.record(v).unwrap();
}

let _ = s.serialize(&h, &mut vec).unwrap();
Expand All @@ -217,5 +221,3 @@ fn do_deserialize_bench<S>(
let _: Histogram<u64> = d.deserialize(&mut cursor).unwrap();
});
}

include!("../src/serialization/test_serialize_trait.rs");
5 changes: 3 additions & 2 deletions examples/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ use std::fmt::Display;
use clap::{App, Arg, SubCommand};

use hdrsample::{Histogram, RecordError};
use hdrsample::serialization::{DeserializeError, Deserializer, V2DeflateSerializeError,
V2DeflateSerializer, V2SerializeError, V2Serializer};
use hdrsample::serialization::{DeserializeError, Deserializer, Serializer,
V2DeflateSerializeError, V2DeflateSerializer, V2SerializeError,
V2Serializer};

fn main() {
let default_max = format!("{}", u64::max_value());
Expand Down
2 changes: 1 addition & 1 deletion src/errors/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! Errors types used throughout this library
//! Error types used throughout this library

/// Errors that can occur when creating a histogram.
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
Expand Down
10 changes: 5 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
//! use hdrsample::Histogram;
//! let hist = Histogram::<u64>::new(2).unwrap();
//! // ...
//! println!("# of samples: {}", hist.count());
//! println!("# of samples: {}", hist.len());
//! println!("99.9'th percentile: {}", hist.value_at_quantile(0.999));
//! ```
//!
Expand Down Expand Up @@ -170,7 +170,6 @@
//! - `DoubleHistogram`.
//! - The `Recorder` feature of HdrHistogram.
//! - Value shifting ("normalization").
//! - Timestamps and tags.
//! - Textual output methods. These seem almost orthogonal to HdrSample, though it might be
//! convenient if we implemented some relevant traits (CSV, JSON, and possibly simple
//! `fmt::Display`).
Expand All @@ -189,6 +188,10 @@

extern crate num_traits as num;

#[cfg(feature = "serialization")]
#[macro_use]
extern crate nom;

use std::borrow::Borrow;
use std::cmp;
use std::ops::{AddAssign, SubAssign};
Expand Down Expand Up @@ -1748,9 +1751,6 @@ where

// TODO: shift
// TODO: hash
// TODO: encoding/decoding
// TODO: timestamps and tags
// TODO: textual output

#[path = "tests/tests.rs"]
#[cfg(test)]
Expand Down
Loading