Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Spark 3 #62

Open
wants to merge 3 commits into
base: branch-1.10.0
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Add CI
Khwunchai Jaengsawang committed Nov 28, 2022
commit 1a809bbde419fd2d078333f3f80f67f5f0f02b36
125 changes: 125 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
name: core
on:
push:

env:
# Disable keepAlive and pool
# https://github.com/actions/virtual-environments/issues/1499#issuecomment-689467080
MAVEN_OPTS: >-
-Xms1024M -Xmx2048M -XX:MaxMetaspaceSize=1024m -XX:-UseGCOverheadLimit -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn
-Dhttp.keepAlive=false
-Dmaven.wagon.http.pool=false
-Dmaven.wagon.http.retryHandler.count=3
CONTAINER_REGISTRY: ghcr.io/khwj

# Use the bash login, because we are using miniconda
defaults:
run:
shell: bash -l {0}

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
build:
runs-on: ubuntu-20.04
permissions:
packages: write
strategy:
fail-fast: false
matrix:
hadoop: [spark3.3]
env:
SPARK_VERSION: 3.3.1
HADOOP_VERSION: 3.3.2
HIVE_VERSION: 2.3.9
HIVE_REF: rel/release-2.3.9-imetastore
SCALA_VERSION: 2.12
AWS_SDK_VERSION: 1.12.206
steps:
- name: Checkout Hive
uses: actions/checkout@v3
with:
repository: khwj/hive
ref: rel/release-2.3.9-imetastore
path: hive
# - name: Set up JDK 11
# uses: actions/setup-java@v3
# with:
# java-version: '11'
# distribution: 'adopt'
- name: Set up JDK 8
uses: actions/setup-java@v3
with:
java-version: "8"
distribution: "zulu"
- name: Cache local Maven repository
uses: actions/cache@v2
with:
path: |
~/.m2/repository
!~/.m2/repository/org/apache/hive/
~/.spark-dist
~/.cache
key: ${{ runner.os }}-hive-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-hive-
- name: Build Hive
run: |
cd hive
mvn --batch-mode -DskipTests clean install
- name: Checkout Glue Data Catalog client
uses: actions/checkout@v3
- name: Build Glue Data Catalog client
run: |
mvn clean install package -DskipTests -Dhive2.version=$HIVE_VERSION -Dspark-hive.version=$HIVE_VERSION -Dhadoop.version=$HADOOP_VERSION -Daws.sdk.version=$AWS_SDK_VERSION
mkdir artifacts
find . -not -path "./spark/**" -not -path "./hive/**" -name "*.jar" -exec cp {} artifacts/ \;
- name: Archive Glue Data Catalog client binary
uses: actions/upload-artifact@v3
with:
name: aws-glue-datacatalog-hive2-client
path: |
artifacts/*.jar
- name: Checkout Spark
uses: actions/checkout@v3
with:
repository: apache/spark
ref: refs/tags/v3.3.1
path: spark
- name: Set up JDK 11
uses: actions/setup-java@v3
with:
java-version: "11"
distribution: "adopt"
- name: Build Spark
env:
MAVEN_OPTS: -Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g
run: |
cd spark
./dev/make-distribution.sh --name hadoop3.2-glue-thriftserver -Dhadoop-3.2 -Phive -Phive-thriftserver -Pkubernetes
- name: Archive Spark binary
uses: actions/upload-artifact@v3
with:
name: spark-${{ env.SPARK_VERSION }}-bin-hadoop3.2-glue-thriftserver
path: |
spark/dist/*
- name: Log in to the Container registry
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
with:
registry: ${{ env.CONTAINER_REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
platforms: linux/amd64
- name: Build Spark container images
run: |
cp artifacts/*.jar spark/dist/jars/;
cd spark/dist
./bin/docker-image-tool.sh -nX -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver build
./bin/docker-image-tool.sh -nX -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
./bin/docker-image-tool.sh -nX -r $CONTAINER_REGISTRY -t ${SPARK_VERSION}-hadoop${HADOOP_VERSION}-glue-thriftserver push
Empty file.
8 changes: 4 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -19,14 +19,14 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<guava.version>14.0.1</guava.version>
<hive2.version>2.3.7</hive2.version>
<spark-hive.version>2.3.7</spark-hive.version>
<aws.sdk.version>1.11.901</aws.sdk.version>
<hive2.version>2.3.9</hive2.version>
<spark-hive.version>2.3.9</spark-hive.version>
<aws.sdk.version>1.12.206</aws.sdk.version>
<junit.version>4.11</junit.version>
<mockito.version>1.10.19</mockito.version>
<surefire.version>2.15</surefire.version>
<powermock.version>1.6.4</powermock.version>
<hadoop.version>2.8.3</hadoop.version>
<hadoop.version>2.10.1</hadoop.version>
<maven.eclipse.plugin.version>2.9</maven.eclipse.plugin.version>
<hamcrest.version>1.3</hamcrest.version>
<httpclient.version>4.5.3</httpclient.version>