From c7c97638d5f368423a9eb9edb320c3f08ae80110 Mon Sep 17 00:00:00 2001 From: Jim Brennan Date: Tue, 7 Nov 2023 18:32:08 -0600 Subject: [PATCH 1/3] Document problem with handling of invalid characters in CSV reader (#9655) Signed-off-by: Jim Brennan --- docs/compatibility.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/compatibility.md b/docs/compatibility.md index 493d37efd00..8d18d8b57ca 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -106,6 +106,10 @@ it and they should be avoided. Escaped quote characters `'\"'` are not supported well as described by this [issue](https://github.com/NVIDIA/spark-rapids/issues/129). +The GPU accelerated CSV parser does not replace invalid UTF-8 characters with the Unicode +replacement character �. Instead it just passes them through as described in this +[issue](https://github.com/NVIDIA/spark-rapids/issues/9560). + ### CSV Dates Only a limited set of formats are supported when parsing dates. From 9e10f260bcec14afed86f074dcdbc712a712cc18 Mon Sep 17 00:00:00 2001 From: YanxuanLiu <104543031+YanxuanLiu@users.noreply.github.com> Date: Wed, 8 Nov 2023 09:10:52 +0800 Subject: [PATCH 2/3] Add valid retry solution to mvn-verify [skip ci] (#9609) * add retry in bash-level * add retry and ttl config for all stages * using increamental delay --------- Signed-off-by: YanxuanLiu --- .github/workflows/mvn-verify-check.yml | 91 ++++++++++++++++++-------- 1 file changed, 64 insertions(+), 27 deletions(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index 9ce3042ce88..81184557583 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -24,7 +24,7 @@ concurrency: cancel-in-progress: true env: - COMMON_MVN_FLAGS: > + COMMON_MVN_FLAGS: >- -Ddist.jar.compress=false -DskipTests -Dskip @@ -124,14 +124,22 @@ jobs: export JAVA_HOME=${JAVA_HOME_8_X64} export PATH=${JAVA_HOME}/bin:${PATH} java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" - # test command - mvn -Dmaven.wagon.http.retryHandler.count=3 -B package \ - -pl integration_tests,tests -am \ - -P 'individual,pre-merge' \ - -Dbuildver=${{ matrix.spark-version }} \ - -Dmaven.scalastyle.skip=true \ - -Drat.skip=true \ - ${{ env.COMMON_MVN_FLAGS }} + # test command, will retry for 3 times if failed. + max_retry=3; delay=30; i=1 + while true; do + mvn -Dmaven.wagon.http.retryHandler.count=3 \ + -Dmaven.wagon.httpconnectionManager.ttlSeconds=30 \ + -Daether.connector.http.connectionMaxTtl=30 -B package \ + -pl integration_tests,tests -am -P 'individual,pre-merge' \ + -Dbuildver=${{ matrix.spark-version }} -Dmaven.scalastyle.skip=true \ + -Drat.skip=true ${{ env.COMMON_MVN_FLAGS }} && break || { + if [[ $i -le $max_retry ]]; then + echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) + else + echo "mvn command failed. Exit 1"; exit 1 + fi + } + done package-tests-scala213: needs: get-shim-versions-from-dist @@ -170,14 +178,22 @@ jobs: fi # change to Scala 2.13 Directory cd scala2.13 - # test command - mvn -Dmaven.wagon.http.retryHandler.count=3 -B package \ - -pl integration_tests,tests -am \ - -P 'individual,pre-merge' \ - -Dbuildver=${{ matrix.spark-version }} \ - -Dmaven.scalastyle.skip=true \ - -Drat.skip=true \ - ${{ env.COMMON_MVN_FLAGS }} + # test command, will retry for 3 times if failed. + max_retry=3; delay=30; i=1 + while true; do + mvn -Dmaven.wagon.http.retryHandler.count=3 \ + -Dmaven.wagon.httpconnectionManager.ttlSeconds=30 \ + -Daether.connector.http.connectionMaxTtl=30 -B package \ + -pl integration_tests,tests -am -P 'individual,pre-merge' \ + -Dbuildver=${{ matrix.spark-version }} -Dmaven.scalastyle.skip=true \ + -Drat.skip=true ${{ env.COMMON_MVN_FLAGS }} && break || { + if [[ $i -le $max_retry ]]; then + echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) + else + echo "mvn command failed. Exit 1"; exit 1 + fi + } + done verify-all-modules: @@ -206,11 +222,21 @@ jobs: export JAVA_HOME=${JAVA_HOME_${{ matrix.java-version }}_X64} export PATH=${JAVA_HOME}/bin:${PATH} java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" - # test command - mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify \ - -P "individual,pre-merge" \ - -Dbuildver=${{ matrix.spark-version }} \ - ${{ env.COMMON_MVN_FLAGS }} + # test command, will retry for 3 times if failed. + max_retry=3; delay=30; i=1 + while true; do + mvn -Dmaven.wagon.http.retryHandler.count=$max_retry \ + -Dmaven.wagon.httpconnectionManager.ttlSeconds=30 \ + -Daether.connector.http.connectionMaxTtl=30 -B verify \ + -P "individual,pre-merge" -Dbuildver=${{ matrix.spark-version }} \ + ${{ env.COMMON_MVN_FLAGS }} && break || { + if [[ $i -le $max_retry ]]; then + echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) + else + echo "mvn command failed. Exit 1"; exit 1 + fi + } + done install-modules: needs: get-shim-versions-from-dist @@ -242,8 +268,19 @@ jobs: export JAVA_HOME=${JAVA_HOME_11_X64} export PATH=${JAVA_HOME}/bin:${PATH} java -version && mvn --version && echo "ENV JAVA_HOME: $JAVA_HOME, PATH: $PATH" - # test command - ./mvnw -Dmaven.wagon.http.retryHandler.count=3 -B install \ - -P "individual,pre-merge" \ - -Dbuildver=${{ needs.get-shim-versions-from-dist.outputs.defaultSparkVersion }} \ - ${{ env.COMMON_MVN_FLAGS }} + # test command, will retry for 3 times if failed. + max_retry=3; delay=30; i=1 + while true; do + ./mvnw -Dmaven.wagon.http.retryHandler.count=3 \ + -Dmaven.wagon.httpconnectionManager.ttlSeconds=30 \ + -Daether.connector.http.connectionMaxTtl=30 -B install \ + -P "individual,pre-merge" \ + -Dbuildver=${{ needs.get-shim-versions-from-dist.outputs.defaultSparkVersion }} \ + ${{ env.COMMON_MVN_FLAGS }} && break || { + if [[ $i -le $max_retry ]]; then + echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2)) + else + echo "mvn command failed. Exit 1"; exit 1 + fi + } + done From 25ffa873ad6211f14d5e402805e537b52d60d2dc Mon Sep 17 00:00:00 2001 From: Matt Ahrens Date: Wed, 8 Nov 2023 10:22:57 -0600 Subject: [PATCH 3/3] Making User Guide info in bold and adding it as top right link in github.io [skip ci] (#9663) * Making User Guide info in bold and adding it as top right link in github.io Signed-off-by: mattahrens * Making User Guide info in bold and adding it as top right link in github.io Signed-off-by: mattahrens * Updates based on PR feedback Signed-off-by: mattahrens --------- Signed-off-by: mattahrens --- docs/_config.yml | 4 +++- docs/index.md | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/_config.yml b/docs/_config.yml index 6551395b0cd..e5bcfdfdc71 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -18,7 +18,9 @@ remote_theme: pmarsceill/just-the-docs aux_links: - "Spark Rapids Plugin on Github": + "RAPIDS Accelerator for Apache Spark User Guide": + - "//docs.nvidia.com/spark-rapids/user-guide/latest/index.html" + "RAPIDS Accelerator for Apache Spark Plugin on Github": - "//github.com/nvidia/spark-rapids" plugins: diff --git a/docs/index.md b/docs/index.md index 0334ecc5002..724e6b79a82 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,6 +6,9 @@ permalink: / description: This site serves as a collection of documentation about the RAPIDS accelerator for Apache Spark --- # Overview +**If you are a customer looking for information on how to adopt RAPIDS Accelerator for Apache Spark +for your Spark workloads, please go to our User Guide for more information: [link](https://docs.nvidia.com/spark-rapids/user-guide/latest/index.html).** + The RAPIDS Accelerator for Apache Spark leverages GPUs to accelerate processing via the [RAPIDS libraries](http://rapids.ai). @@ -19,5 +22,3 @@ the scale of the Spark distributed computing framework. The RAPIDS Accelerator built-in accelerated shuffle based on [UCX](https://github.com/openucx/ucx/) that can be configured to leverage GPU-to-GPU communication and RDMA capabilities. -If you are a customer looking for information on how to adopt RAPIDS Accelerator for Apache Spark -for your Spark workloads, please go to our User Guide for more information: [link](https://docs.nvidia.com/spark-rapids/user-guide/latest/index.html).