From aaaeeb1a97c0b65a01c752c8359969743eb8745f Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 23 Oct 2023 15:50:11 -0400 Subject: [PATCH] First pass at getting architectured builds working (#350) * First pass at getting architectured builds working * Fix RAT failures * Build add builds on windows/macOS * Refactor artifacts upload * Remove windows/macOS builds, include all py versions * Rename conda package to avoid collision * Handle licensing * Update recipes to reflect recent changes * Build for linux-aarch64 * Add licenses to files * Add zlib to host deps * Fixes to build/overlinking errors * Always include c compiler in build deps --- .github/workflows/conda.yml | 94 +++++++++++++++++++++++++++---------- conda/recipes/bld.bat | 26 ++++++++++ conda/recipes/build.sh | 84 +++++++++++++++++++++++++++++++++ conda/recipes/meta.yaml | 31 ++++++++---- 4 files changed, 202 insertions(+), 33 deletions(-) create mode 100644 conda/recipes/bld.bat create mode 100644 conda/recipes/build.sh diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 9853230de..16dfd7a4e 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -1,7 +1,20 @@ name: Build conda nightly -on: [push, pull_request] +on: + push: + branches: + - main + pull_request: + paths: + - Cargo.toml + - Cargo.lock + - pyproject.toml + - conda/recipes/** + - .github/workflows/conda.yml + schedule: + - cron: '0 0 * * 0' -# Cancel any already running instances of this build +# When this workflow is queued, automatically cancel any previous running +# or pending jobs from the same branch concurrency: group: conda-${{ github.head_ref }} cancel-in-progress: true @@ -13,9 +26,44 @@ defaults: jobs: conda: - name: Build (and optionally upload) the conda nightly + name: "Build conda nightlies (python: ${{ matrix.python }}, arch: ${{ matrix.arch }})" runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python: ["3.8", "3.9", "3.10"] + arch: ["linux-64", "linux-aarch64"] steps: + - name: Manage disk space + if: matrix.arch == 'linux-aarch64' + run: | + sudo mkdir -p /opt/empty_dir || true + for d in \ + /opt/ghc \ + /opt/hostedtoolcache \ + /usr/lib/jvm \ + /usr/local/.ghcup \ + /usr/local/lib/android \ + /usr/local/share/powershell \ + /usr/share/dotnet \ + /usr/share/swift \ + ; do + sudo rsync --stats -a --delete /opt/empty_dir/ $d || true + done + sudo apt-get purge -y -f firefox \ + google-chrome-stable \ + microsoft-edge-stable + sudo apt-get autoremove -y >& /dev/null + sudo apt-get autoclean -y >& /dev/null + sudo docker image prune --all --force + df -h + - name: Create swapfile + if: matrix.arch == 'linux-aarch64' + run: | + sudo fallocate -l 10GiB /swapfile || true + sudo chmod 600 /swapfile || true + sudo mkswap /swapfile || true + sudo swapon /swapfile || true - uses: actions/checkout@v3 with: fetch-depth: 0 @@ -24,38 +72,34 @@ jobs: with: miniforge-variant: Mambaforge use-mamba: true - python-version: "3.10" + python-version: "3.8" channel-priority: strict - name: Install dependencies run: | - mamba install boa conda-verify + mamba install -c conda-forge boa conda-verify which python pip list mamba list - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true - components: clippy, rustfmt - - name: Build conda package + - name: Build conda packages run: | # suffix for nightly package versions export VERSION_SUFFIX=a`date +%y%m%d` conda mambabuild conda/recipes \ + --python ${{ matrix.python }} \ + --variants "{target_platform: [${{ matrix.arch }}]}" \ + --error-overlinking \ + --no-test \ --no-anaconda-upload \ - --output-folder . - # - name: Upload conda package - # if: | - # github.event_name == 'push' - # && github.repository == 'apache/arrow-datafusion-python' - # env: - # ANACONDA_API_TOKEN: ${{ secrets.DASK_CONDA_TOKEN }} - # LABEL: ${{ github.ref == 'refs/heads/datafusion-sql-planner' && 'dev_datafusion' || 'dev' }} - # run: | - # # install anaconda for upload - # mamba install anaconda-client - - # anaconda upload --label $LABEL linux-64/*.tar.bz2 + --output-folder packages + - name: Test conda packages + if: matrix.arch == 'linux-64' # can only test native platform packages + run: | + conda mambabuild --test packages/${{ matrix.arch }}/*.tar.bz2 + - name: Upload conda packages as artifacts + uses: actions/upload-artifact@v3 + with: + name: "conda nightlies (python - ${{ matrix.python }}, arch - ${{ matrix.arch }})" + # need to install all conda channel metadata to properly install locally + path: packages/ diff --git a/conda/recipes/bld.bat b/conda/recipes/bld.bat new file mode 100644 index 000000000..fc3ac88b3 --- /dev/null +++ b/conda/recipes/bld.bat @@ -0,0 +1,26 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +maturin build -vv -j %CPU_COUNT% --release --strip --manylinux off --interpreter=%PYTHON% + +FOR /F "delims=" %%i IN ('dir /s /b target\wheels\*.whl') DO set datafusion_wheel=%%i + +%PYTHON% -m pip install --no-deps %datafusion_wheel% -vv + +cargo-bundle-licenses --format yaml --output THIRDPARTY.yml diff --git a/conda/recipes/build.sh b/conda/recipes/build.sh new file mode 100644 index 000000000..af640e6ca --- /dev/null +++ b/conda/recipes/build.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -ex + +# See https://github.com/conda-forge/rust-feedstock/blob/master/recipe/build.sh for cc env explanation +if [ "$c_compiler" = gcc ] ; then + case "$target_platform" in + linux-64) rust_env_arch=X86_64_UNKNOWN_LINUX_GNU ;; + linux-aarch64) rust_env_arch=AARCH64_UNKNOWN_LINUX_GNU ;; + linux-ppc64le) rust_env_arch=POWERPC64LE_UNKNOWN_LINUX_GNU ;; + *) echo "unknown target_platform $target_platform" ; exit 1 ;; + esac + + export CARGO_TARGET_${rust_env_arch}_LINKER=$CC +fi + +declare -a _xtra_maturin_args + +mkdir -p $SRC_DIR/.cargo + +if [ "$target_platform" = "osx-64" ] ; then + cat <> $SRC_DIR/.cargo/config +[target.x86_64-apple-darwin] +linker = "$CC" +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +EOF + + _xtra_maturin_args+=(--target=x86_64-apple-darwin) + +elif [ "$target_platform" = "osx-arm64" ] ; then + cat <> $SRC_DIR/.cargo/config +# Required for intermediate codegen stuff +[target.x86_64-apple-darwin] +linker = "$CC_FOR_BUILD" + +# Required for final binary artifacts for target +[target.aarch64-apple-darwin] +linker = "$CC" +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +EOF + _xtra_maturin_args+=(--target=aarch64-apple-darwin) + + # This variable must be set to the directory containing the target's libpython DSO + export PYO3_CROSS_LIB_DIR=$PREFIX/lib + + # xref: https://github.com/PyO3/pyo3/commit/7beb2720 + export PYO3_PYTHON_VERSION=${PY_VER} + + # xref: https://github.com/conda-forge/python-feedstock/issues/621 + sed -i.bak 's,aarch64,arm64,g' $BUILD_PREFIX/venv/lib/os-patch.py + sed -i.bak 's,aarch64,arm64,g' $BUILD_PREFIX/venv/lib/platform-patch.py +fi + +maturin build -vv -j "${CPU_COUNT}" --release --strip --manylinux off --interpreter="${PYTHON}" "${_xtra_maturin_args[@]}" + +"${PYTHON}" -m pip install $SRC_DIR/target/wheels/datafusion*.whl --no-deps -vv + +cargo-bundle-licenses --format yaml --output THIRDPARTY.yml diff --git a/conda/recipes/meta.yaml b/conda/recipes/meta.yaml index e2bb8bee3..113e7a441 100644 --- a/conda/recipes/meta.yaml +++ b/conda/recipes/meta.yaml @@ -29,18 +29,27 @@ source: git_url: ../.. build: - noarch: python - script: {{ PYTHON }} -m pip install . -vv - number: 0 + number: {{ GIT_DESCRIBE_NUMBER }} + string: py{{ python | replace(".", "") }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} requirements: - host: - - python >=3.7 + build: + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - zlib # [build_platform != target_platform] + - {{ compiler('c') }} + - {{ compiler('rust') }} + - cargo-bundle-licenses - maturin >=0.15,<0.16 - libprotobuf =3 + host: + - python + - maturin >=0.15,<0.16 - pip + - zlib + - xz # [linux64] run: - - python >=3.7 + - python - pyarrow >=11.0.0 test: @@ -55,5 +64,11 @@ about: home: https://arrow.apache.org/datafusion license: Apache-2.0 license_family: APACHE - license_file: LICENSE.txt - summary: Apache Arrow DataFusion Python Bindings + license_file: + - LICENSE.txt + - THIRDPARTY.yml + description: | + DataFusion is an extensible query execution framework, written in Rust, + that uses Apache Arrow as its in-memory format. + doc_url: https://arrow.apache.org/datafusion + dev_url: https://github.com/apache/arrow-datafusion