Skip to content

Commit

Permalink
chore(deps): bump datafusion to the latest commit (#1967)
Browse files Browse the repository at this point in the history
* bump deps

Signed-off-by: Ruihang Xia <[email protected]>

* fix compile except pyo3 backend

Signed-off-by: Ruihang Xia <[email protected]>

* fix promql-parser metric name matcher

Signed-off-by: Ruihang Xia <[email protected]>

* fix tests

Signed-off-by: Ruihang Xia <[email protected]>

* fix pyarrow convert

Signed-off-by: Ruihang Xia <[email protected]>

* fix pyo3 compiling

Signed-off-by: Ruihang Xia <[email protected]>

* fix tests

Signed-off-by: Ruihang Xia <[email protected]>

* fix clippy

Signed-off-by: Ruihang Xia <[email protected]>

* remove deadcode

Signed-off-by: Ruihang Xia <[email protected]>

* update stream adapter display format

Signed-off-by: Ruihang Xia <[email protected]>

* fix physical optimizer rule

Signed-off-by: Ruihang Xia <[email protected]>

---------

Signed-off-by: Ruihang Xia <[email protected]>
  • Loading branch information
waynexia authored Aug 1, 2023
1 parent 5bd80a7 commit 44f3ed2
Show file tree
Hide file tree
Showing 59 changed files with 1,138 additions and 1,045 deletions.
1,454 changes: 747 additions & 707 deletions Cargo.lock

Large diffs are not rendered by default.

29 changes: 14 additions & 15 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,39 +56,38 @@ edition = "2021"
license = "Apache-2.0"

[workspace.dependencies]
arrow = { version = "40.0" }
arrow-array = "40.0"
arrow-flight = "40.0"
arrow-schema = { version = "40.0", features = ["serde"] }
arrow = { version = "43.0" }
etcd-client = "0.11"
arrow-array = "43.0"
arrow-flight = "43.0"
arrow-schema = { version = "43.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
chrono = { version = "0.4", features = ["serde"] }
# TODO(ruihang): use arrow-datafusion when it contains https://github.com/apache/arrow-datafusion/pull/6032
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "2ceb7f927c40787773fdc466d6a4b79f3a6c0001" }
derive_builder = "0.12"
etcd-client = "0.11"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "356694a72f12ad9e15008d4245a0b4fe48f982ad" }
itertools = "0.10"
lazy_static = "1.4"
once_cell = "1.18"
opentelemetry-proto = { version = "0.2", features = ["gen-tonic", "metrics"] }
parquet = "40.0"
parquet = "43.0"
paste = "1.0"
prost = "0.11"
rand = "0.8"
regex = "1.8"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
sqlparser = "0.34"
sqlparser = "0.35"
tempfile = "3"
tokio = { version = "1.28", features = ["full"] }
tokio-util = { version = "0.7", features = ["io-util", "compat"] }
Expand Down
2 changes: 1 addition & 1 deletion src/catalog/src/information_schema/columns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::datasource::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::scalars::ScalarVectorBuilder;
Expand Down
2 changes: 1 addition & 1 deletion src/catalog/src/information_schema/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::datasource::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
Expand Down
2 changes: 1 addition & 1 deletion src/common/datasource/src/file_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ use arrow::record_batch::RecordBatch;
use arrow_schema::{ArrowError, Schema as ArrowSchema};
use async_trait::async_trait;
use bytes::{Buf, Bytes};
use datafusion::datasource::physical_plan::FileOpenFuture;
use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::physical_plan::file_format::FileOpenFuture;
use datafusion::physical_plan::SendableRecordBatchStream;
use futures::StreamExt;
use object_store::ObjectStore;
Expand Down
2 changes: 1 addition & 1 deletion src/common/datasource/src/file_format/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use arrow::record_batch::RecordBatch;
use arrow_schema::{Schema, SchemaRef};
use async_trait::async_trait;
use common_runtime;
use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::error::Result as DataFusionResult;
use datafusion::physical_plan::file_format::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::physical_plan::SendableRecordBatchStream;
use derive_builder::Builder;
use object_store::ObjectStore;
Expand Down
2 changes: 1 addition & 1 deletion src/common/datasource/src/file_format/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ use arrow::record_batch::RecordBatch;
use arrow_schema::Schema;
use async_trait::async_trait;
use common_runtime;
use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::physical_plan::file_format::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::physical_plan::SendableRecordBatchStream;
use object_store::ObjectStore;
use snafu::ResultExt;
Expand Down
2 changes: 1 addition & 1 deletion src/common/datasource/src/file_format/orc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ use arrow::compute::cast;
use arrow_schema::{ArrowError, Schema, SchemaRef};
use async_trait::async_trait;
use datafusion::arrow::record_batch::RecordBatch as DfRecordBatch;
use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::error::{DataFusionError, Result as DfResult};
use datafusion::physical_plan::file_format::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::physical_plan::RecordBatchStream;
use futures::{Stream, StreamExt, TryStreamExt};
use object_store::ObjectStore;
Expand Down
2 changes: 1 addition & 1 deletion src/common/datasource/src/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ use std::sync::Arc;
use arrow::record_batch::RecordBatch;
use arrow_schema::Schema;
use async_trait::async_trait;
use datafusion::datasource::physical_plan::{FileMeta, ParquetFileReaderFactory};
use datafusion::error::Result as DatafusionResult;
use datafusion::parquet::arrow::async_reader::AsyncFileReader;
use datafusion::parquet::arrow::{parquet_to_arrow_schema, ArrowWriter};
use datafusion::parquet::errors::{ParquetError, Result as ParquetResult};
use datafusion::parquet::file::metadata::ParquetMetaData;
use datafusion::parquet::format::FileMetaData;
use datafusion::physical_plan::file_format::{FileMeta, ParquetFileReaderFactory};
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use futures::future::BoxFuture;
use object_store::{ObjectStore, Reader};
Expand Down
2 changes: 1 addition & 1 deletion src/common/datasource/src/file_format/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ use std::sync::Arc;
use std::vec;

use datafusion::assert_batches_eq;
use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileStream, ParquetExec};
use datafusion::execution::context::TaskContext;
use datafusion::physical_plan::file_format::{FileOpener, FileScanConfig, FileStream, ParquetExec};
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use datafusion::physical_plan::ExecutionPlan;
use datafusion::prelude::SessionContext;
Expand Down
4 changes: 2 additions & 2 deletions src/common/datasource/src/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use arrow_schema::{DataType, Field, Schema, SchemaRef};
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use datafusion::datasource::listing::PartitionedFile;
use datafusion::datasource::object_store::ObjectStoreUrl;
use datafusion::physical_plan::file_format::{FileScanConfig, FileStream};
use datafusion::datasource::physical_plan::{FileScanConfig, FileStream};
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use object_store::services::Fs;
use object_store::ObjectStore;
Expand Down Expand Up @@ -86,7 +86,7 @@ pub fn scan_config(file_schema: SchemaRef, limit: Option<usize>, filename: &str)
projection: None,
limit,
table_partition_cols: vec![],
output_ordering: None,
output_ordering: vec![],
infinite_source: false,
}
}
Expand Down
15 changes: 8 additions & 7 deletions src/common/grpc/src/flight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@ use std::sync::Arc;

use api::v1::{AffectedRows, FlightMetadata};
use arrow_flight::utils::flight_data_to_arrow_batch;
use arrow_flight::{FlightData, IpcMessage, SchemaAsIpc};
use arrow_flight::{FlightData, SchemaAsIpc};
use common_base::bytes::Bytes;
use common_recordbatch::{RecordBatch, RecordBatches};
use datatypes::arrow;
use datatypes::arrow::datatypes::Schema as ArrowSchema;
use datatypes::arrow::ipc::{root_as_message, writer, MessageHeader};
use datatypes::schema::{Schema, SchemaRef};
use flatbuffers::FlatBufferBuilder;
use prost::bytes::Bytes as ProstBytes;
use prost::Message;
use snafu::{OptionExt, ResultExt};

Expand Down Expand Up @@ -86,12 +87,12 @@ impl FlightEncoder {
affected_rows: Some(AffectedRows { value: rows as _ }),
}
.encode_to_vec();
FlightData::new(
None,
IpcMessage(build_none_flight_msg().into()),
metadata,
vec![],
)
FlightData {
flight_descriptor: None,
data_header: build_none_flight_msg().into(),
app_metadata: metadata.into(),
data_body: ProstBytes::default(),
}
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/common/query/src/logical_plan/udaf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ use std::fmt::{self, Debug, Formatter};
use std::sync::Arc;

use datafusion_expr::{
AccumulatorFunctionImplementation as DfAccumulatorFunctionImplementation,
AggregateUDF as DfAggregateUdf, StateTypeFunction as DfStateTypeFunction,
AccumulatorFactoryFunction, AggregateUDF as DfAggregateUdf,
StateTypeFunction as DfStateTypeFunction,
};
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::prelude::*;
Expand Down Expand Up @@ -103,11 +103,11 @@ impl From<AggregateFunction> for DfAggregateUdf {
fn to_df_accumulator_func(
accumulator: AccumulatorFunctionImpl,
creator: AggregateFunctionCreatorRef,
) -> DfAccumulatorFunctionImplementation {
) -> AccumulatorFactoryFunction {
Arc::new(move |_| {
let accumulator = accumulator()?;
let creator = creator.clone();
Ok(Box::new(DfAccumulatorAdaptor::new(accumulator, creator)))
Ok(Box::new(DfAccumulatorAdaptor::new(accumulator, creator)) as _)
})
}

Expand Down
10 changes: 8 additions & 2 deletions src/common/query/src/physical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

use std::any::Any;
use std::fmt::Debug;
use std::fmt::{self, Debug};
use std::sync::Arc;

use common_recordbatch::adapter::{DfRecordBatchStreamAdapter, RecordBatchStreamAdapter};
Expand All @@ -24,7 +24,7 @@ pub use datafusion::execution::context::{SessionContext, TaskContext};
use datafusion::physical_plan::expressions::PhysicalSortExpr;
use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
pub use datafusion::physical_plan::Partitioning;
use datafusion::physical_plan::Statistics;
use datafusion::physical_plan::{DisplayAs, DisplayFormatType, Statistics};
use datatypes::schema::SchemaRef;
use snafu::ResultExt;

Expand Down Expand Up @@ -218,6 +218,12 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
}
}

impl DisplayAs for DfPhysicalPlanAdapter {
fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self.0)
}
}

#[cfg(test)]
mod test {
use async_trait::async_trait;
Expand Down
2 changes: 1 addition & 1 deletion src/common/substrait/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ table = { path = "../../table" }

[dependencies.substrait_proto]
package = "substrait"
version = "0.10"
version = "0.12"

[dev-dependencies]
datatypes = { path = "../../datatypes" }
Expand Down
2 changes: 1 addition & 1 deletion src/common/substrait/src/df_substrait.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::sync::Arc;

use async_trait::async_trait;
use bytes::{Buf, Bytes, BytesMut};
use datafusion::catalog::catalog::CatalogList;
use datafusion::catalog::CatalogList;
use datafusion::execution::context::SessionState;
use datafusion::execution::runtime_env::RuntimeEnv;
use datafusion::prelude::{SessionConfig, SessionContext};
Expand Down
2 changes: 1 addition & 1 deletion src/common/substrait/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use std::sync::Arc;

use async_trait::async_trait;
use bytes::{Buf, Bytes};
use datafusion::catalog::catalog::CatalogList;
use datafusion::catalog::CatalogList;

pub use crate::df_substrait::DFLogicalSubstraitConvertor;

Expand Down
4 changes: 1 addition & 3 deletions src/datanode/src/instance/grpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@ use async_trait::async_trait;
use catalog::CatalogManagerRef;
use common_grpc_expr::insert::to_table_insert_request;
use common_query::Output;
use datafusion::catalog::catalog::{
CatalogList, CatalogProvider, MemoryCatalogList, MemoryCatalogProvider,
};
use datafusion::catalog::schema::SchemaProvider;
use datafusion::catalog::{CatalogList, CatalogProvider, MemoryCatalogList, MemoryCatalogProvider};
use datafusion::datasource::TableProvider;
use futures::future;
use query::parser::{PromQuery, QueryLanguageParser, QueryStatement};
Expand Down
6 changes: 5 additions & 1 deletion src/datatypes/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ impl TryFrom<ScalarValue> for Value {
ScalarValue::Binary(b)
| ScalarValue::LargeBinary(b)
| ScalarValue::FixedSizeBinary(_, b) => Value::from(b.map(Bytes::from)),
ScalarValue::List(vs, field) => {
ScalarValue::List(vs, field) | ScalarValue::Fixedsizelist(vs, field, _) => {
let items = if let Some(vs) = vs {
let vs = vs
.into_iter()
Expand Down Expand Up @@ -687,6 +687,10 @@ impl TryFrom<ScalarValue> for Value {
.map(|x| Value::Interval(Interval::from_i128(x)))
.unwrap_or(Value::Null),
ScalarValue::Decimal128(_, _, _)
| ScalarValue::DurationSecond(_)
| ScalarValue::DurationMillisecond(_)
| ScalarValue::DurationMicrosecond(_)
| ScalarValue::DurationNanosecond(_)
| ScalarValue::Struct(_, _)
| ScalarValue::Dictionary(_, _) => {
return error::UnsupportedArrowTypeSnafu {
Expand Down
5 changes: 2 additions & 3 deletions src/datatypes/src/vectors/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ mod tests {
use arrow::array::{Array, PrimitiveArray};
use arrow_array::ArrayRef;
use common_time::DateTime;
use datafusion_common::from_slice::FromSlice;

use super::*;
use crate::data_type::DataType;
Expand All @@ -39,7 +38,7 @@ mod tests {
#[test]
fn test_datetime_vector() {
std::env::set_var("TZ", "Asia/Shanghai");
let v = DateTimeVector::new(PrimitiveArray::from_slice([1, 2, 3]));
let v = DateTimeVector::new(PrimitiveArray::from(vec![1, 2, 3]));
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
assert_eq!(3, v.len());
assert_eq!("DateTimeVector", v.vector_type_name());
Expand All @@ -57,7 +56,7 @@ mod tests {
assert_eq!(Some(DateTime::new(2)), iter.next().unwrap());
assert_eq!(Some(DateTime::new(3)), iter.next().unwrap());
assert!(!v.is_null(0));
assert_eq!(64, v.memory_size());
assert_eq!(24, v.memory_size());

if let Value::DateTime(d) = v.get(0) {
assert_eq!(1, d.val());
Expand Down
6 changes: 5 additions & 1 deletion src/datatypes/src/vectors/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ impl Helper {
| ScalarValue::FixedSizeBinary(_, v) => {
ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
}
ScalarValue::List(v, field) => {
ScalarValue::List(v, field) | ScalarValue::Fixedsizelist(v, field, _) => {
let item_type = ConcreteDataType::try_from(field.data_type())?;
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
if let Some(values) = v {
Expand Down Expand Up @@ -219,6 +219,10 @@ impl Helper {
ConstantVector::new(Arc::new(IntervalMonthDayNanoVector::from(vec![v])), length)
}
ScalarValue::Decimal128(_, _, _)
| ScalarValue::DurationSecond(_)
| ScalarValue::DurationMillisecond(_)
| ScalarValue::DurationMicrosecond(_)
| ScalarValue::DurationNanosecond(_)
| ScalarValue::Struct(_, _)
| ScalarValue::Dictionary(_, _) => {
return error::ConversionSnafu {
Expand Down
4 changes: 4 additions & 0 deletions src/datatypes/src/vectors/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,24 +84,28 @@ impl<T: LogicalPrimitiveType> PrimitiveVector<T> {
.as_any()
.downcast_ref::<TimestampSecondArray>()
.unwrap()
.clone()
.with_timezone_opt(None::<String>)
.to_data(),
arrow_schema::TimeUnit::Millisecond => array
.as_any()
.downcast_ref::<TimestampMillisecondArray>()
.unwrap()
.clone()
.with_timezone_opt(None::<String>)
.to_data(),
arrow_schema::TimeUnit::Microsecond => array
.as_any()
.downcast_ref::<TimestampMicrosecondArray>()
.unwrap()
.clone()
.with_timezone_opt(None::<String>)
.to_data(),
arrow_schema::TimeUnit::Nanosecond => array
.as_any()
.downcast_ref::<TimestampNanosecondArray>()
.unwrap()
.clone()
.with_timezone_opt(None::<String>)
.to_data(),
},
Expand Down
2 changes: 1 addition & 1 deletion src/datatypes/src/vectors/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ mod tests {
assert!(!v.is_const());
assert!(v.validity().is_all_valid());
assert!(!v.only_null());
assert_eq!(128, v.memory_size());
assert_eq!(1088, v.memory_size());

for (i, s) in strs.iter().enumerate() {
assert_eq!(Value::from(*s), v.get(i));
Expand Down
Loading

0 comments on commit 44f3ed2

Please sign in to comment.