diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index 9ce3042ce88..81184557583 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -24,7 +24,7 @@ concurrency: cancel-in-progress: true env: - COMMON_MVN_FLAGS: > + COMMON_MVN_FLAGS: >- -Ddist.jar.compress=false -DskipTests -Dskip @@ -124,14 +124,22 @@ jobs: export JAVA_HOME=${JAVA_HOME_8_X64} export PATH=${JAVA_HOME}/bin:${PATH} java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" - # test command - mvn -Dmaven.wagon.http.retryHandler.count=3 -B package \ - -pl integration_tests,tests -am \ - -P 'individual,pre-merge' \ - -Dbuildver=${{ matrix.spark-version }} \ - -Dmaven.scalastyle.skip=true \ - -Drat.skip=true \ - ${{ env.COMMON_MVN_FLAGS }} + # test command, will retry for 3 times if failed. + max_retry=3; delay=30; i=1 + while true; do + mvn -Dmaven.wagon.http.retryHandler.count=3 \ + -Dmaven.wagon.httpconnectionManager.ttlSeconds=30 \ + -Daether.connector.http.connectionMaxTtl=30 -B package \ + -pl integration_tests,tests -am -P 'individual,pre-merge' \ + -Dbuildver=${{ matrix.spark-version }} -Dmaven.scalastyle.skip=true \ + -Drat.skip=true ${{ env.COMMON_MVN_FLAGS }} && break || { + if [[ $i -le $max_retry ]]; then + echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) + else + echo "mvn command failed. Exit 1"; exit 1 + fi + } + done package-tests-scala213: needs: get-shim-versions-from-dist @@ -170,14 +178,22 @@ jobs: fi # change to Scala 2.13 Directory cd scala2.13 - # test command - mvn -Dmaven.wagon.http.retryHandler.count=3 -B package \ - -pl integration_tests,tests -am \ - -P 'individual,pre-merge' \ - -Dbuildver=${{ matrix.spark-version }} \ - -Dmaven.scalastyle.skip=true \ - -Drat.skip=true \ - ${{ env.COMMON_MVN_FLAGS }} + # test command, will retry for 3 times if failed. + max_retry=3; delay=30; i=1 + while true; do + mvn -Dmaven.wagon.http.retryHandler.count=3 \ + -Dmaven.wagon.httpconnectionManager.ttlSeconds=30 \ + -Daether.connector.http.connectionMaxTtl=30 -B package \ + -pl integration_tests,tests -am -P 'individual,pre-merge' \ + -Dbuildver=${{ matrix.spark-version }} -Dmaven.scalastyle.skip=true \ + -Drat.skip=true ${{ env.COMMON_MVN_FLAGS }} && break || { + if [[ $i -le $max_retry ]]; then + echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) + else + echo "mvn command failed. Exit 1"; exit 1 + fi + } + done verify-all-modules: @@ -206,11 +222,21 @@ jobs: export JAVA_HOME=${JAVA_HOME_${{ matrix.java-version }}_X64} export PATH=${JAVA_HOME}/bin:${PATH} java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" - # test command - mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify \ - -P "individual,pre-merge" \ - -Dbuildver=${{ matrix.spark-version }} \ - ${{ env.COMMON_MVN_FLAGS }} + # test command, will retry for 3 times if failed. + max_retry=3; delay=30; i=1 + while true; do + mvn -Dmaven.wagon.http.retryHandler.count=$max_retry \ + -Dmaven.wagon.httpconnectionManager.ttlSeconds=30 \ + -Daether.connector.http.connectionMaxTtl=30 -B verify \ + -P "individual,pre-merge" -Dbuildver=${{ matrix.spark-version }} \ + ${{ env.COMMON_MVN_FLAGS }} && break || { + if [[ $i -le $max_retry ]]; then + echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) + else + echo "mvn command failed. Exit 1"; exit 1 + fi + } + done install-modules: needs: get-shim-versions-from-dist @@ -242,8 +268,19 @@ jobs: export JAVA_HOME=${JAVA_HOME_11_X64} export PATH=${JAVA_HOME}/bin:${PATH} java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" - # test command - ./mvnw -Dmaven.wagon.http.retryHandler.count=3 -B install \ - -P "individual,pre-merge" \ - -Dbuildver=${{ needs.get-shim-versions-from-dist.outputs.defaultSparkVersion }} \ - ${{ env.COMMON_MVN_FLAGS }} + # test command, will retry for 3 times if failed. + max_retry=3; delay=30; i=1 + while true; do + ./mvnw -Dmaven.wagon.http.retryHandler.count=3 \ + -Dmaven.wagon.httpconnectionManager.ttlSeconds=30 \ + -Daether.connector.http.connectionMaxTtl=30 -B install \ + -P "individual,pre-merge" \ + -Dbuildver=${{ needs.get-shim-versions-from-dist.outputs.defaultSparkVersion }} \ + ${{ env.COMMON_MVN_FLAGS }} && break || { + if [[ $i -le $max_retry ]]; then + echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) + else + echo "mvn command failed. Exit 1"; exit 1 + fi + } + done diff --git a/docs/_config.yml b/docs/_config.yml index 6551395b0cd..e5bcfdfdc71 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -18,7 +18,9 @@ remote_theme: pmarsceill/just-the-docs aux_links: - "Spark Rapids Plugin on Github": + "RAPIDS Accelerator for Apache Spark User Guide": + - "//docs.nvidia.com/spark-rapids/user-guide/latest/index.html" + "RAPIDS Accelerator for Apache Spark Plugin on Github": - "//github.com/nvidia/spark-rapids" plugins: diff --git a/docs/compatibility.md b/docs/compatibility.md index 493d37efd00..8d18d8b57ca 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -106,6 +106,10 @@ it and they should be avoided. Escaped quote characters `'\"'` are not supported well as described by this [issue](https://github.com/NVIDIA/spark-rapids/issues/129). +The GPU accelerated CSV parser does not replace invalid UTF-8 characters with the Unicode +replacement character �. Instead it just passes them through as described in this +[issue](https://github.com/NVIDIA/spark-rapids/issues/9560). + ### CSV Dates Only a limited set of formats are supported when parsing dates. diff --git a/docs/index.md b/docs/index.md index 0334ecc5002..724e6b79a82 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,6 +6,9 @@ permalink: / description: This site serves as a collection of documentation about the RAPIDS accelerator for Apache Spark --- # Overview +**If you are a customer looking for information on how to adopt RAPIDS Accelerator for Apache Spark +for your Spark workloads, please go to our User Guide for more information: [link](https://docs.nvidia.com/spark-rapids/user-guide/latest/index.html).** + The RAPIDS Accelerator for Apache Spark leverages GPUs to accelerate processing via the [RAPIDS libraries](http://rapids.ai). @@ -19,5 +22,3 @@ the scale of the Spark distributed computing framework. The RAPIDS Accelerator built-in accelerated shuffle based on [UCX](https://github.com/openucx/ucx/) that can be configured to leverage GPU-to-GPU communication and RDMA capabilities. -If you are a customer looking for information on how to adopt RAPIDS Accelerator for Apache Spark -for your Spark workloads, please go to our User Guide for more information: [link](https://docs.nvidia.com/spark-rapids/user-guide/latest/index.html).