Skip to content

Commit

Permalink
Merge commit '88c98e1c1ecec357548a89022053d3735568a853' into chunchun…
Browse files Browse the repository at this point in the history
…/update-df-apr-week-3
  • Loading branch information
appletreeisyellow committed Apr 26, 2024
2 parents 37c446a + 88c98e1 commit 05b322e
Show file tree
Hide file tree
Showing 9 changed files with 260 additions and 169 deletions.
14 changes: 14 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,20 @@ updates:
# arrow is bumped manually
- dependency-name: "arrow*"
update-types: ["version-update:semver-major"]
- package-ecosystem: cargo
directory: "datafusion-cli/"
schedule:
interval: daily
open-pull-requests-limit: 10
target-branch: main
labels: [auto-dependencies]
ignore:
# arrow is bumped manually
- dependency-name: "arrow*"
update-types: ["version-update:semver-major"]
# datafusion is bumped manually
- dependency-name: "datafusion*"
update-types: ["version-update:semver-major"]
- package-ecosystem: "github-actions"
directory: "/"
schedule:
Expand Down
52 changes: 52 additions & 0 deletions .github/workflows/dependencies.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

name: Dependencies

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

on:
push:
paths:
- "**/Cargo.toml"
pull_request:
paths:
- "**/Cargo.toml"
# manual trigger
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:

jobs:
depcheck:
name: circular dependency check
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Check dependencies
run: |
cd dev/depcheck
cargo run
19 changes: 1 addition & 18 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -195,24 +195,7 @@ jobs:
- name: Verify Working Directory Clean
run: git diff --exit-code

depcheck:
name: circular dependency check
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Setup Rust toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: stable
- name: Check dependencies
run: |
cd dev/depcheck
cargo run


# Run `cargo test doc` (test documentation examples)
linux-test-doc:
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
Expand Down
26 changes: 14 additions & 12 deletions datafusion/core/tests/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@ enum Scenario {
}

enum Unit {
RowGroup,
Page,
// pass max row per row_group in parquet writer
RowGroup(usize),
// pass max row per page in parquet writer
Page(usize),
}

/// Test fixture that has an execution context that has an external
Expand Down Expand Up @@ -185,13 +187,13 @@ impl ContextWithParquet {
mut config: SessionConfig,
) -> Self {
let file = match unit {
Unit::RowGroup => {
Unit::RowGroup(row_per_group) => {
config = config.with_parquet_bloom_filter_pruning(true);
make_test_file_rg(scenario).await
make_test_file_rg(scenario, row_per_group).await
}
Unit::Page => {
Unit::Page(row_per_page) => {
config = config.with_parquet_page_index_pruning(true);
make_test_file_page(scenario).await
make_test_file_page(scenario, row_per_page).await
}
};
let parquet_path = file.path().to_string_lossy();
Expand Down Expand Up @@ -880,15 +882,15 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
}

/// Create a test parquet file with various data types
async fn make_test_file_rg(scenario: Scenario) -> NamedTempFile {
async fn make_test_file_rg(scenario: Scenario, row_per_group: usize) -> NamedTempFile {
let mut output_file = tempfile::Builder::new()
.prefix("parquet_pruning")
.suffix(".parquet")
.tempfile()
.expect("tempfile creation");

let props = WriterProperties::builder()
.set_max_row_group_size(5)
.set_max_row_group_size(row_per_group)
.set_bloom_filter_enabled(true)
.build();

Expand All @@ -906,17 +908,17 @@ async fn make_test_file_rg(scenario: Scenario) -> NamedTempFile {
output_file
}

async fn make_test_file_page(scenario: Scenario) -> NamedTempFile {
async fn make_test_file_page(scenario: Scenario, row_per_page: usize) -> NamedTempFile {
let mut output_file = tempfile::Builder::new()
.prefix("parquet_page_pruning")
.suffix(".parquet")
.tempfile()
.expect("tempfile creation");

// set row count to 5, should get same result as rowGroup
// set row count to row_per_page, should get same result as rowGroup
let props = WriterProperties::builder()
.set_data_page_row_count_limit(5)
.set_write_batch_size(5)
.set_data_page_row_count_limit(row_per_page)
.set_write_batch_size(row_per_page)
.build();

let batches = create_data_batch(scenario);
Expand Down
Loading

0 comments on commit 05b322e

Please sign in to comment.