Skip to content

Commit

Permalink
benchmarks for convert_to_state
Browse files Browse the repository at this point in the history
  • Loading branch information
korowa committed Aug 1, 2024
1 parent 0249eb7 commit 098d41c
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 0 deletions.
12 changes: 12 additions & 0 deletions datafusion/functions-aggregate/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,15 @@ datafusion-physical-expr-common = { workspace = true }
log = { workspace = true }
paste = "1.0.14"
sqlparser = { workspace = true }

[dev-dependencies]
arrow = { workspace = true, features = ["test_utils"] }
criterion = "0.5"

[[bench]]
name = "count"
harness = false

[[bench]]
name = "sum"
harness = false
95 changes: 95 additions & 0 deletions datafusion/functions-aggregate/benches/count.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow::array::{ArrayRef, BooleanArray};
use arrow::datatypes::Int32Type;
use arrow::util::bench_util::{create_boolean_array, create_primitive_array};
use arrow_schema::{DataType, Field, Schema};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_common::DFSchema;
use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator};
use datafusion_functions_aggregate::count::Count;
use std::sync::Arc;

fn prepare_accumulator() -> Box<dyn GroupsAccumulator> {
let schema = Arc::new(Schema::new(vec![Field::new("f", DataType::Int32, true)]));
let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap();
let accumulator_args = AccumulatorArgs {
data_type: &DataType::Int64,
schema: &schema,
dfschema: &df_schema,
ignore_nulls: false,
sort_exprs: &[],
is_reversed: false,
name: "COUNT(f)",
is_distinct: false,
input_types: &[DataType::Int32],
input_exprs: &[datafusion_expr::col("f")],
};
let count_fn = Count::new();

count_fn
.create_groups_accumulator(accumulator_args)
.unwrap()
}

fn convert_to_state_bench(
c: &mut Criterion,
name: &str,
values: ArrayRef,
opt_filter: Option<&BooleanArray>,
) {
let accumulator = prepare_accumulator();
c.bench_function(name, |b| {
b.iter(|| {
black_box(
accumulator
.convert_to_state(&[values.clone()], opt_filter)
.unwrap(),
)
})
});
}

fn count_benchmark(c: &mut Criterion) {
let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.0)) as ArrayRef;
convert_to_state_bench(c, "count convert state no nulls, no filter", values, None);

let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.3)) as ArrayRef;
convert_to_state_bench(c, "count convert state nulls, no filter", values, None);

let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.0)) as ArrayRef;
let filter = create_boolean_array(8192, 0.0, 0.5);
convert_to_state_bench(
c,
"count convert state no nulls, filter",
values,
Some(&filter),
);

let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.3)) as ArrayRef;
let filter = create_boolean_array(8192, 0.0, 0.5);
convert_to_state_bench(
c,
"count convert state nulls, filter",
values,
Some(&filter),
);
}

criterion_group!(benches, count_benchmark);
criterion_main!(benches);
93 changes: 93 additions & 0 deletions datafusion/functions-aggregate/benches/sum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow::array::{ArrayRef, BooleanArray};
use arrow::datatypes::Int64Type;
use arrow::util::bench_util::{create_boolean_array, create_primitive_array};
use arrow_schema::{DataType, Field, Schema};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_common::DFSchema;
use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumulator};
use datafusion_functions_aggregate::sum::Sum;
use std::sync::Arc;

fn prepare_accumulator(data_type: &DataType) -> Box<dyn GroupsAccumulator> {
let schema = Arc::new(Schema::new(vec![Field::new("f", data_type.clone(), true)]));
let df_schema = DFSchema::try_from(Arc::clone(&schema)).unwrap();
let accumulator_args = AccumulatorArgs {
data_type,
schema: &schema,
dfschema: &df_schema,
ignore_nulls: false,
sort_exprs: &[],
is_reversed: false,
name: "SUM(f)",
is_distinct: false,
input_types: &[data_type.clone()],
input_exprs: &[datafusion_expr::col("f")],
};
let sum_fn = Sum::new();

sum_fn.create_groups_accumulator(accumulator_args).unwrap()
}

fn convert_to_state_bench(
c: &mut Criterion,
name: &str,
values: ArrayRef,
opt_filter: Option<&BooleanArray>,
) {
let accumulator = prepare_accumulator(values.data_type());
c.bench_function(name, |b| {
b.iter(|| {
black_box(
accumulator
.convert_to_state(&[values.clone()], opt_filter)
.unwrap(),
)
})
});
}

fn count_benchmark(c: &mut Criterion) {
let values = Arc::new(create_primitive_array::<Int64Type>(8192, 0.0)) as ArrayRef;
convert_to_state_bench(c, "sum i64 convert state no nulls, no filter", values, None);

let values = Arc::new(create_primitive_array::<Int64Type>(8192, 0.3)) as ArrayRef;
convert_to_state_bench(c, "sum i64 convert state nulls, no filter", values, None);

let values = Arc::new(create_primitive_array::<Int64Type>(8192, 0.0)) as ArrayRef;
let filter = create_boolean_array(8192, 0.0, 0.5);
convert_to_state_bench(
c,
"sum i64 convert state no nulls, filter",
values,
Some(&filter),
);

let values = Arc::new(create_primitive_array::<Int64Type>(8192, 0.3)) as ArrayRef;
let filter = create_boolean_array(8192, 0.0, 0.5);
convert_to_state_bench(
c,
"sum i64 convert state nulls, filter",
values,
Some(&filter),
);
}

criterion_group!(benches, count_benchmark);
criterion_main!(benches);

0 comments on commit 098d41c

Please sign in to comment.