Skip to content

Commit

Permalink
Merge commit 'deebda78a34251b2bddf0c5f66edfaa112c4559b' into chunchun…
Browse files Browse the repository at this point in the history
…/update-df-apr-week-4-3
  • Loading branch information
appletreeisyellow committed Apr 30, 2024
2 parents 2dba7e4 + deebda7 commit 30fde12
Show file tree
Hide file tree
Showing 144 changed files with 177 additions and 488 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ jobs:
run: cargo check --all-targets --no-default-features -p datafusion-functions

- name: Check workspace in debug mode
run: cargo check
run: cargo check --all-targets --workspace

- name: Check workspace with avro,json features
run: cargo check --workspace --benches --features avro,json
Expand Down
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,6 @@ rpath = false
[workspace.lints.clippy]
# Detects large stack-allocated futures that may cause stack overflow crashes (see threshold in clippy.toml)
large_futures = "warn"

[workspace.lints.rust]
unused_imports = "deny"
3 changes: 1 addition & 2 deletions datafusion-examples/examples/custom_datasource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use std::time::Duration;
use datafusion::arrow::array::{UInt64Builder, UInt8Builder};
use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::dataframe::DataFrame;
use datafusion::datasource::{provider_as_source, TableProvider, TableType};
use datafusion::error::Result;
use datafusion::execution::context::{SessionState, TaskContext};
Expand All @@ -34,7 +33,7 @@ use datafusion::physical_plan::{
Partitioning, PlanProperties, SendableRecordBatchStream,
};
use datafusion::prelude::*;
use datafusion_expr::{Expr, LogicalPlanBuilder};
use datafusion_expr::LogicalPlanBuilder;
use datafusion_physical_expr::EquivalenceProperties;

use async_trait::async_trait;
Expand Down
1 change: 0 additions & 1 deletion datafusion-examples/examples/flight/flight_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
// under the License.

use std::collections::HashMap;
use std::convert::TryFrom;
use std::sync::Arc;

use datafusion::arrow::datatypes::Schema;
Expand Down
1 change: 0 additions & 1 deletion datafusion-examples/examples/simple_udaf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use datafusion::arrow::{
use datafusion::{error::Result, physical_plan::Accumulator};
use datafusion::{logical_expr::Volatility, prelude::*, scalar::ScalarValue};
use datafusion_common::cast::as_float64_array;
use datafusion_expr::create_udaf;
use std::sync::Arc;

// create local session context with an in-memory table
Expand Down
1 change: 0 additions & 1 deletion datafusion-examples/examples/simple_udwf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ use arrow::{
datatypes::Float64Type,
};
use arrow_schema::DataType;
use datafusion::datasource::file_format::options::CsvReadOptions;

use datafusion::error::Result;
use datafusion::prelude::*;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ impl fmt::Display for Column {
mod tests {
use super::*;
use arrow::datatypes::DataType;
use arrow_schema::{Field, SchemaBuilder};
use arrow_schema::SchemaBuilder;

fn create_qualified_schema(qualifier: &str, names: Vec<&str>) -> Result<DFSchema> {
let mut schema_builder = SchemaBuilder::new();
Expand Down
4 changes: 1 addition & 3 deletions datafusion/common/src/dfschema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
//! fields with optional relation names.
use std::collections::{BTreeSet, HashMap, HashSet};
use std::convert::TryFrom;
use std::fmt::{Display, Formatter};
use std::hash::Hash;
use std::sync::Arc;
Expand Down Expand Up @@ -453,7 +452,7 @@ impl DFSchema {
let matches = self.qualified_fields_with_unqualified_name(name);
match matches.len() {
0 => Err(unqualified_field_not_found(name, self)),
1 => Ok((matches[0].0, &matches[0].1)),
1 => Ok((matches[0].0, (matches[0].1))),
_ => {
// When `matches` size > 1, it doesn't necessarily mean an `ambiguous name` problem.
// Because name may generate from Alias/... . It means that it don't own qualifier.
Expand Down Expand Up @@ -1004,7 +1003,6 @@ mod tests {
use crate::assert_contains;

use super::*;
use arrow::datatypes::DataType;

#[test]
fn qualifier_in_name() -> Result<()> {
Expand Down
2 changes: 0 additions & 2 deletions datafusion/common/src/hash_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use arrow::array::*;
use arrow::datatypes::*;
use arrow::row::Rows;
use arrow::{downcast_dictionary_array, downcast_primitive_array};
use arrow_buffer::i256;

use crate::cast::{
as_boolean_array, as_fixed_size_list_array, as_generic_binary_array,
Expand Down Expand Up @@ -450,7 +449,6 @@ pub fn create_row_hashes_v2<'a>(
#[cfg(test)]
mod tests {
use arrow::{array::*, datatypes::*};
use std::sync::Arc;

use super::*;

Expand Down
7 changes: 1 addition & 6 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ mod struct_builder;
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::collections::{HashSet, VecDeque};
use std::convert::{Infallible, TryFrom, TryInto};
use std::convert::Infallible;
use std::fmt;
use std::hash::Hash;
use std::iter::repeat;
Expand Down Expand Up @@ -52,7 +52,6 @@ use arrow::{
UInt16Type, UInt32Type, UInt64Type, UInt8Type, DECIMAL128_MAX_PRECISION,
},
};
use arrow_array::{ArrowNativeTypeOp, Scalar};
use arrow_buffer::Buffer;
use arrow_schema::{UnionFields, UnionMode};

Expand Down Expand Up @@ -3424,8 +3423,6 @@ impl ScalarType<i32> for Date32Type {

#[cfg(test)]
mod tests {
use std::cmp::Ordering;
use std::sync::Arc;

use super::*;
use crate::cast::{
Expand All @@ -3435,9 +3432,7 @@ mod tests {
use crate::assert_batches_eq;
use arrow::buffer::OffsetBuffer;
use arrow::compute::{is_null, kernels};
use arrow::datatypes::{ArrowNumericType, ArrowPrimitiveType};
use arrow::util::pretty::pretty_format_columns;
use arrow_buffer::Buffer;
use arrow_schema::Fields;
use chrono::NaiveDate;
use rand::Rng;
Expand Down
4 changes: 0 additions & 4 deletions datafusion/common/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -681,12 +681,8 @@ pub fn find_indices<T: PartialEq, S: Borrow<T>>(

#[cfg(test)]
mod tests {
use crate::ScalarValue;
use crate::ScalarValue::Null;
use arrow::array::Float64Array;
use arrow_array::Array;
use std::ops::Range;
use std::sync::Arc;

use super::*;

Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/benches/physical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use criterion::{BatchSize, Criterion};
extern crate arrow;
extern crate datafusion;

use std::{iter::FromIterator, sync::Arc};
use std::sync::Arc;

use arrow::{
array::{ArrayRef, Int64Array, StringArray},
Expand Down
8 changes: 3 additions & 5 deletions datafusion/core/src/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1570,19 +1570,17 @@ mod tests {
use std::vec;

use super::*;
use crate::assert_batches_sorted_eq;
use crate::execution::context::SessionConfig;
use crate::physical_plan::{ColumnarValue, Partitioning, PhysicalExpr};
use crate::test_util::{register_aggregate_csv, test_table, test_table_with_name};
use crate::{assert_batches_sorted_eq, execution::context::SessionContext};

use arrow::array::{self, Int32Array};
use arrow::datatypes::DataType;
use datafusion_common::{Constraint, Constraints};
use datafusion_common_runtime::SpawnedTask;
use datafusion_expr::{
avg, cast, count, count_distinct, create_udf, expr, lit, max, min, sum,
BuiltInWindowFunction, ScalarFunctionImplementation, Volatility, WindowFrame,
WindowFunctionDefinition,
cast, count_distinct, create_udf, expr, lit, sum, BuiltInWindowFunction,
ScalarFunctionImplementation, Volatility, WindowFrame, WindowFunctionDefinition,
};
use datafusion_physical_expr::expressions::Column;
use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/avro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use std::any::Any;
use std::sync::Arc;

use arrow::datatypes::Schema;
use arrow::{self, datatypes::SchemaRef};
use arrow::datatypes::SchemaRef;
use async_trait::async_trait;
use datafusion_common::FileType;
use datafusion_physical_expr::PhysicalExpr;
Expand Down
6 changes: 2 additions & 4 deletions datafusion/core/src/datasource/file_format/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ use crate::physical_plan::{ExecutionPlan, SendableRecordBatchStream};

use arrow::array::RecordBatch;
use arrow::csv::WriterBuilder;
use arrow::datatypes::SchemaRef;
use arrow::datatypes::{DataType, Field, Fields, Schema};
use arrow::{self, datatypes::SchemaRef};
use datafusion_common::config::CsvOptions;
use datafusion_common::file_options::csv_writer::CsvWriterOptions;
use datafusion_common::{exec_err, not_impl_err, DataFusionError, FileType};
Expand Down Expand Up @@ -537,12 +537,10 @@ mod tests {
use arrow::compute::concat_batches;
use datafusion_common::cast::as_string_array;
use datafusion_common::stats::Precision;
use datafusion_common::{internal_err, FileType, GetExt};
use datafusion_common::{internal_err, GetExt};
use datafusion_expr::{col, lit};

use bytes::Bytes;
use chrono::DateTime;
use futures::StreamExt;
use object_store::local::LocalFileSystem;
use object_store::path::Path;
use regex::Regex;
Expand Down
8 changes: 0 additions & 8 deletions datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1021,10 +1021,7 @@ pub(crate) mod test_util {
use super::*;
use crate::test::object_store::local_unpartitioned_file;

use arrow::record_batch::RecordBatch;

use parquet::arrow::ArrowWriter;
use parquet::file::properties::WriterProperties;
use tempfile::NamedTempFile;

/// How many rows per page should be written
Expand Down Expand Up @@ -1112,7 +1109,6 @@ mod tests {
use crate::physical_plan::metrics::MetricValue;
use crate::prelude::{SessionConfig, SessionContext};
use arrow::array::{Array, ArrayRef, StringArray};
use arrow::record_batch::RecordBatch;
use arrow_schema::Field;
use async_trait::async_trait;
use bytes::Bytes;
Expand All @@ -1121,16 +1117,13 @@ mod tests {
as_int32_array, as_timestamp_nanosecond_array,
};
use datafusion_common::config::ParquetOptions;
use datafusion_common::config::TableParquetOptions;
use datafusion_common::ScalarValue;
use datafusion_execution::object_store::ObjectStoreUrl;
use datafusion_execution::runtime_env::RuntimeEnv;
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
use futures::stream::BoxStream;
use futures::StreamExt;
use log::error;
use object_store::local::LocalFileSystem;
use object_store::path::Path;
use object_store::{
GetOptions, GetResult, ListResult, MultipartId, PutOptions, PutResult,
};
Expand All @@ -1139,7 +1132,6 @@ mod tests {
use parquet::file::metadata::{KeyValue, ParquetColumnIndex, ParquetOffsetIndex};
use parquet::file::page_index::index::Index;
use tokio::fs::File;
use tokio::io::AsyncWrite;

#[tokio::test]
async fn read_merged_batches() -> Result<()> {
Expand Down
2 changes: 0 additions & 2 deletions datafusion/core/src/datasource/listing/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -423,8 +423,6 @@ where
mod tests {
use std::ops::Not;

use futures::StreamExt;

use crate::logical_expr::{case, col, lit};
use crate::test::object_store::make_test_store_and_state;

Expand Down
5 changes: 0 additions & 5 deletions datafusion/core/src/datasource/listing/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1004,23 +1004,18 @@ impl ListingTable {

#[cfg(test)]
mod tests {
use std::collections::HashMap;

use super::*;
#[cfg(feature = "parquet")]
use crate::datasource::file_format::parquet::ParquetFormat;
use crate::datasource::{provider_as_source, MemTable};
use crate::execution::options::ArrowReadOptions;
use crate::physical_plan::collect;
use crate::prelude::*;
use crate::{
assert_batches_eq,
datasource::file_format::avro::AvroFormat,
logical_expr::{col, lit},
test::{columns, object_store::register_test_store},
};

use arrow::datatypes::{DataType, Schema};
use arrow::record_batch::RecordBatch;
use arrow_schema::SortOptions;
use datafusion_common::stats::Precision;
Expand Down
3 changes: 0 additions & 3 deletions datafusion/core/src/datasource/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,6 @@ impl DataSink for MemSink {

#[cfg(test)]
mod tests {
use std::collections::HashMap;

use super::*;
use crate::datasource::provider_as_source;
Expand All @@ -376,8 +375,6 @@ mod tests {
use datafusion_common::DataFusionError;
use datafusion_expr::LogicalPlanBuilder;

use futures::StreamExt;

#[tokio::test]
async fn test_with_projection() -> Result<()> {
let session_ctx = SessionContext::new();
Expand Down
1 change: 0 additions & 1 deletion datafusion/core/src/datasource/physical_plan/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,6 @@ mod tests {
use datafusion_common::test_util::arrow_test_data;
use datafusion_common::FileType;

use futures::StreamExt;
use object_store::chunked::ChunkedStore;
use object_store::local::LocalFileSystem;
use rstest::*;
Expand Down
8 changes: 1 addition & 7 deletions datafusion/core/src/datasource/physical_plan/file_stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -521,19 +521,13 @@ mod tests {
use super::*;
use crate::datasource::file_format::write::BatchSerializer;
use crate::datasource::object_store::ObjectStoreUrl;
use crate::datasource::physical_plan::FileMeta;
use crate::physical_plan::metrics::ExecutionPlanMetricsSet;
use crate::prelude::SessionContext;
use crate::{
error::Result,
test::{make_partition, object_store::register_test_store},
};
use crate::test::{make_partition, object_store::register_test_store};

use arrow_schema::Schema;
use datafusion_common::{internal_err, Statistics};

use bytes::Bytes;
use futures::StreamExt;

/// Test `FileOpener` which will simulate errors during file opening or scanning
#[derive(Default)]
Expand Down
4 changes: 1 addition & 3 deletions datafusion/core/src/datasource/physical_plan/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
use bytes::{Buf, Bytes};
use futures::{ready, StreamExt, TryStreamExt};
use object_store::buffered::BufWriter;
use object_store::{self, GetOptions, GetResultPayload, ObjectStore};
use object_store::{GetOptions, GetResultPayload, ObjectStore};
use tokio::io::AsyncWriteExt;
use tokio::task::JoinSet;

Expand Down Expand Up @@ -387,7 +387,6 @@ mod tests {
use crate::dataframe::DataFrameWriteOptions;
use crate::datasource::file_format::file_compression_type::FileTypeExt;
use crate::datasource::file_format::{json::JsonFormat, FileFormat};
use crate::datasource::listing::PartitionedFile;
use crate::datasource::object_store::ObjectStoreUrl;
use crate::execution::context::SessionState;
use crate::prelude::{
Expand All @@ -401,7 +400,6 @@ mod tests {
use datafusion_common::FileType;
use flate2::write::GzEncoder;
use flate2::Compression;
use futures::StreamExt;
use object_store::chunked::ChunkedStore;
use object_store::local::LocalFileSystem;
use rstest::*;
Expand Down
6 changes: 2 additions & 4 deletions datafusion/core/src/datasource/physical_plan/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ mod tests {
use crate::datasource::file_format::options::CsvReadOptions;
use crate::datasource::file_format::parquet::test_util::store_parquet;
use crate::datasource::file_format::test_util::scan_format;
use crate::datasource::listing::{FileRange, ListingOptions, PartitionedFile};
use crate::datasource::listing::{FileRange, ListingOptions};
use crate::datasource::object_store::ObjectStoreUrl;
use crate::execution::context::SessionState;
use crate::physical_plan::displayable;
Expand All @@ -790,7 +790,7 @@ mod tests {
StructArray,
};

use arrow::datatypes::{DataType, Field, Schema, SchemaBuilder};
use arrow::datatypes::{Field, Schema, SchemaBuilder};
use arrow::record_batch::RecordBatch;
use arrow_schema::Fields;
use datafusion_common::{assert_contains, FileType, GetExt, ScalarValue, ToDFSchema};
Expand All @@ -799,9 +799,7 @@ mod tests {
use datafusion_physical_expr::create_physical_expr;

use chrono::{TimeZone, Utc};
use futures::StreamExt;
use object_store::local::LocalFileSystem;
use object_store::path::Path;
use object_store::ObjectMeta;
use parquet::arrow::ArrowWriter;
use tempfile::TempDir;
Expand Down
Loading

0 comments on commit 30fde12

Please sign in to comment.