Skip to content

Commit

Permalink
Merge commit 'bab39f78cd7a4aca92c60950812016b4f72798b1' into chunchun…
Browse files Browse the repository at this point in the history
…/update-df-apr-week-4-3
  • Loading branch information
appletreeisyellow committed Apr 30, 2024
2 parents 30fde12 + bab39f7 commit 83b0553
Show file tree
Hide file tree
Showing 11 changed files with 704 additions and 372 deletions.
11 changes: 10 additions & 1 deletion datafusion-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ struct Args {
short = 'c',
long,
multiple_values = true,
help = "Execute the given command string(s), then exit"
help = "Execute the given command string(s), then exit. Commands are expected to be non empty.",
validator(is_valid_command)
)]
command: Vec<String>,

Expand Down Expand Up @@ -285,6 +286,14 @@ fn is_valid_memory_pool_size(size: &str) -> Result<(), String> {
}
}

fn is_valid_command(command: &str) -> Result<(), String> {
if !command.is_empty() {
Ok(())
} else {
Err("-c flag expects only non empty commands".to_string())
}
}

#[derive(Debug, Clone, Copy)]
enum ByteUnit {
Byte,
Expand Down
17 changes: 15 additions & 2 deletions datafusion/common/src/dfschema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,9 +347,22 @@ impl DFSchema {
matches.next()
}

/// Find the index of the column with the given qualifier and name
pub fn index_of_column(&self, col: &Column) -> Result<usize> {
/// Find the index of the column with the given qualifier and name,
/// returning `None` if not found
///
/// See [Self::index_of_column] for a version that returns an error if the
/// column is not found
pub fn maybe_index_of_column(&self, col: &Column) -> Option<usize> {
self.index_of_column_by_name(col.relation.as_ref(), &col.name)
}

/// Find the index of the column with the given qualifier and name,
/// returning `Err` if not found
///
/// See [Self::maybe_index_of_column] for a version that returns `None` if
/// the column is not found
pub fn index_of_column(&self, col: &Column) -> Result<usize> {
self.maybe_index_of_column(col)
.ok_or_else(|| field_not_found(col.relation.clone(), &col.name, self))
}

Expand Down
17 changes: 8 additions & 9 deletions datafusion/execution/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,30 +157,29 @@ impl SessionConfig {
}

/// Set a configuration option
pub fn set(mut self, key: &str, value: ScalarValue) -> Self {
self.options.set(key, &value.to_string()).unwrap();
self
pub fn set(self, key: &str, value: ScalarValue) -> Self {
self.set_str(key, &value.to_string())
}

/// Set a boolean configuration option
pub fn set_bool(self, key: &str, value: bool) -> Self {
self.set(key, ScalarValue::Boolean(Some(value)))
self.set_str(key, &value.to_string())
}

/// Set a generic `u64` configuration option
pub fn set_u64(self, key: &str, value: u64) -> Self {
self.set(key, ScalarValue::UInt64(Some(value)))
self.set_str(key, &value.to_string())
}

/// Set a generic `usize` configuration option
pub fn set_usize(self, key: &str, value: usize) -> Self {
let value: u64 = value.try_into().expect("convert usize to u64");
self.set(key, ScalarValue::UInt64(Some(value)))
self.set_str(key, &value.to_string())
}

/// Set a generic `str` configuration option
pub fn set_str(self, key: &str, value: &str) -> Self {
self.set(key, ScalarValue::from(value))
pub fn set_str(mut self, key: &str, value: &str) -> Self {
self.options.set(key, value).unwrap();
self
}

/// Customize batch size
Expand Down
5 changes: 5 additions & 0 deletions datafusion/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ harness = false
name = "make_date"
required-features = ["datetime_expressions"]

[[bench]]
harness = false
name = "date_bin"
required-features = ["datetime_expressions"]

[[bench]]
harness = false
name = "to_char"
Expand Down
57 changes: 57 additions & 0 deletions datafusion/functions/benches/date_bin.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

extern crate criterion;

use std::sync::Arc;

use arrow::array::{ArrayRef, TimestampSecondArray};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_common::ScalarValue;
use rand::rngs::ThreadRng;
use rand::Rng;

use datafusion_expr::ColumnarValue;
use datafusion_functions::datetime::date_bin;

fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray {
let mut seconds = vec![];
for _ in 0..1000 {
seconds.push(rng.gen_range(0..1_000_000));
}

TimestampSecondArray::from(seconds)
}

fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("date_bin_1000", |b| {
let mut rng = rand::thread_rng();
let interval = ColumnarValue::Scalar(ScalarValue::new_interval_dt(0, 1_000_000));
let timestamps = ColumnarValue::Array(Arc::new(timestamps(&mut rng)) as ArrayRef);
let udf = date_bin();

b.iter(|| {
black_box(
udf.invoke(&[interval.clone(), timestamps.clone()])
.expect("date_bin should work on valid values"),
)
})
});
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
19 changes: 9 additions & 10 deletions datafusion/functions/src/datetime/date_bin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,46 +320,46 @@ fn date_bin_impl(
origin: i64,
stride: i64,
stride_fn: fn(i64, i64, i64) -> i64,
) -> impl Fn(Option<i64>) -> Option<i64> {
) -> impl Fn(i64) -> i64 {
let scale = match T::UNIT {
Nanosecond => 1,
Microsecond => NANOSECONDS / 1_000_000,
Millisecond => NANOSECONDS / 1_000,
Second => NANOSECONDS,
};
move |x: Option<i64>| x.map(|x| stride_fn(stride, x * scale, origin) / scale)
move |x: i64| stride_fn(stride, x * scale, origin) / scale
}

Ok(match array {
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
let apply_stride_fn =
stride_map_fn::<TimestampNanosecondType>(origin, stride, stride_fn);
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
apply_stride_fn(*v),
v.map(apply_stride_fn),
tz_opt.clone(),
))
}
ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
let apply_stride_fn =
stride_map_fn::<TimestampMicrosecondType>(origin, stride, stride_fn);
ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
apply_stride_fn(*v),
v.map(apply_stride_fn),
tz_opt.clone(),
))
}
ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
let apply_stride_fn =
stride_map_fn::<TimestampMillisecondType>(origin, stride, stride_fn);
ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
apply_stride_fn(*v),
v.map(apply_stride_fn),
tz_opt.clone(),
))
}
ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
let apply_stride_fn =
stride_map_fn::<TimestampSecondType>(origin, stride, stride_fn);
ColumnarValue::Scalar(ScalarValue::TimestampSecond(
apply_stride_fn(*v),
v.map(apply_stride_fn),
tz_opt.clone(),
))
}
Expand All @@ -377,14 +377,13 @@ fn date_bin_impl(
{
let array = as_primitive_array::<T>(array)?;
let apply_stride_fn = stride_map_fn::<T>(origin, stride, stride_fn);
let array = array
.iter()
.map(apply_stride_fn)
.collect::<PrimitiveArray<T>>()
let array: PrimitiveArray<T> = array
.unary(apply_stride_fn)
.with_timezone_opt(tz_opt.clone());

Ok(ColumnarValue::Array(Arc::new(array)))
}

match array.data_type() {
Timestamp(Nanosecond, tz_opt) => {
transform_array_with_stride::<TimestampNanosecondType>(
Expand Down
Loading

0 comments on commit 83b0553

Please sign in to comment.