Merge pull request #430 from nvliyuan/fix-auto-merge-conflict-429

Fix auto merge conflict 429
NVIDIA · Aug 30, 2024 · 6a1233e · 6a1233e
2 parents f80ad60 + 3147d52
commit 6a1233e
Show file tree

Hide file tree

Showing 19 changed files with 25 additions and 25 deletions.
diff --git a/docs/get-started/xgboost-examples/csp/databricks/databricks.md b/docs/get-started/xgboost-examples/csp/databricks/databricks.md
@@ -21,7 +21,7 @@ Navigate to your home directory in the UI and select **Create** > **File** from
 create an `init.sh` scripts with contents:   
    ```bash
    #!/bin/bash
-   sudo wget -O /databricks/jars/rapids-4-spark_2.12-24.06.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0.jar
+   sudo wget -O /databricks/jars/rapids-4-spark_2.12-24.08.1.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar
    ```
 1. Select the Databricks Runtime Version from one of the supported runtimes specified in the
    Prerequisites section.
@@ -68,7 +68,7 @@ create an `init.sh` scripts with contents:
     ```bash
     spark.rapids.sql.python.gpu.enabled true
     spark.python.daemon.module rapids.daemon_databricks
-    spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-24.06.0.jar:/databricks/spark/python
+    spark.executorEnv.PYTHONPATH /databricks/jars/rapids-4-spark_2.12-24.08.1.jar:/databricks/spark/python
     ```
    Note that since python memory pool require installing the cudf library, so you need to install cudf library in 
    each worker nodes `pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com` or disable python memory pool

diff --git a/docs/get-started/xgboost-examples/csp/databricks/init.sh b/docs/get-started/xgboost-examples/csp/databricks/init.sh
@@ -1,7 +1,7 @@
 sudo rm -f /databricks/jars/spark--maven-trees--ml--10.x--xgboost-gpu--ml.dmlc--xgboost4j-gpu_2.12--ml.dmlc__xgboost4j-gpu_2.12__1.5.2.jar
 sudo rm -f /databricks/jars/spark--maven-trees--ml--10.x--xgboost-gpu--ml.dmlc--xgboost4j-spark-gpu_2.12--ml.dmlc__xgboost4j-spark-gpu_2.12__1.5.2.jar
 
-sudo wget -O /databricks/jars/rapids-4-spark_2.12-24.06.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0.jar
+sudo wget -O /databricks/jars/rapids-4-spark_2.12-24.08.1.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar
 sudo wget -O /databricks/jars/xgboost4j-gpu_2.12-1.7.1.jar https://repo1.maven.org/maven2/ml/dmlc/xgboost4j-gpu_2.12/1.7.1/xgboost4j-gpu_2.12-1.7.1.jar
 sudo wget -O /databricks/jars/xgboost4j-spark-gpu_2.12-1.7.1.jar https://repo1.maven.org/maven2/ml/dmlc/xgboost4j-spark-gpu_2.12/1.7.1/xgboost4j-spark-gpu_2.12-1.7.1.jar
 ls -ltr

diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md b/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md
@@ -40,7 +40,7 @@ export SPARK_DOCKER_IMAGE=<gpu spark docker image repo and name>
 export SPARK_DOCKER_TAG=<spark docker image tag>
 
 pushd ${SPARK_HOME}
-wget https://github.com/NVIDIA/spark-rapids-examples/raw/branch-24.06/dockerfile/Dockerfile
+wget https://github.com/NVIDIA/spark-rapids-examples/raw/branch-24.08/dockerfile/Dockerfile
 
 # Optionally install additional jars into ${SPARK_HOME}/jars/
 

diff --git a/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md b/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md
@@ -5,7 +5,7 @@ For simplicity export the location to these jars. All examples assume the packag
 ### Download the jars
 
 Download the RAPIDS Accelerator for Apache Spark plugin jar
-  * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0.jar)
+  * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar)
 
 ### Build XGBoost Python Examples
 

diff --git a/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md b/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md
@@ -5,7 +5,7 @@ For simplicity export the location to these jars. All examples assume the packag
 ### Download the jars
 
 1. Download the RAPIDS Accelerator for Apache Spark plugin jar
-   * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0.jar)
+   * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar)
 
 ### Build XGBoost Scala Examples
 

diff --git a/examples/ML+DL-Examples/Spark-cuML/pca/Dockerfile b/examples/ML+DL-Examples/Spark-cuML/pca/Dockerfile
@@ -18,7 +18,7 @@
 ARG CUDA_VER=11.8.0
 FROM nvidia/cuda:${CUDA_VER}-devel-ubuntu20.04
 # Please do not update the BRANCH_VER version
-ARG BRANCH_VER=24.06
+ARG BRANCH_VER=24.08
 
 RUN apt-get update
 RUN apt-get install -y wget ninja-build git

diff --git a/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb
@@ -22,7 +22,7 @@
     "import os\n",
     "# Change to your cluster ip:port and directories\n",
     "SPARK_MASTER_URL = os.getenv(\"SPARK_MASTER_URL\", \"spark:your-ip:port\")\n",
-    "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-24.06.0.jar\")\n"
+    "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-24.08.1.jar\")\n"
    ]
   },
   {

diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/README.md
@@ -186,7 +186,7 @@ then do the following inside the Docker container.
 
 ### Get jars from Maven Central
 
-[rapids-4-spark_2.12-24.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0.jar)
+[rapids-4-spark_2.12-24.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar)
 
 
 ### Launch a local mode Spark

diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/pom.xml
@@ -233,4 +233,4 @@
             </build>
         </profile>
     </profiles>
-</project>
+</project>
diff --git a/examples/UDF-Examples/RAPIDS-accelerated-UDFs/src/main/cpp/CMakeLists.txt b/examples/UDF-Examples/RAPIDS-accelerated-UDFs/src/main/cpp/CMakeLists.txt
@@ -172,4 +172,4 @@ target_compile_definitions(udfexamplesjni
 ###################################################################################################
 # - link libraries --------------------------------------------------------------------------------
 
-target_link_libraries(udfexamplesjni cudf::cudf)
+target_link_libraries(udfexamplesjni cudf::cudf)
diff --git a/examples/XGBoost-Examples/agaricus/notebooks/python/agaricus-gpu.ipynb b/examples/XGBoost-Examples/agaricus/notebooks/python/agaricus-gpu.ipynb
@@ -73,7 +73,7 @@
       "Setting default log level to \"WARN\".\n",
       "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
       "2022-11-30 06:57:40,550 WARN resource.ResourceUtils: The configuration of cores (exec = 2 task = 1, runnable tasks = 2) will result in wasted resources due to resource gpu limiting the number of runnable tasks per executor to: 1. Please adjust your configuration.\n",
-      "2022-11-30 06:57:54,195 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.06.0 using cudf 24.06.0.\n",
+      "2022-11-30 06:57:54,195 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.08.1 using cudf 24.08.1.\n",
       "2022-11-30 06:57:54,210 WARN rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n",
       "2022-11-30 06:57:54,214 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n",
       "2022-11-30 06:57:54,214 WARN rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n",

diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/MortgageETL.ipynb
@@ -6,18 +6,18 @@
    "source": [
     "## Prerequirement\n",
     "### 1. Download data\n",
-    "Dataset is derived from Fannie Mae’s [Single-Family Loan Performance Data](http://www.fanniemae.com/portal/funding-the-market/data/loan-performance-data.html) with all rights reserved by Fannie Mae. Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-24.06/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n",
+    "Dataset is derived from Fannie Mae’s [Single-Family Loan Performance Data](http://www.fanniemae.com/portal/funding-the-market/data/loan-performance-data.html) with all rights reserved by Fannie Mae. Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-24.08/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n",
     "\n",
     "### 2. Download needed jars\n",
-    "* [rapids-4-spark_2.12-24.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0.jar)\n",
+    "* [rapids-4-spark_2.12-24.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar)\n",
     "\n",
     "\n",
     "### 3. Start Spark Standalone\n",
     "Before running the script, please setup Spark standalone mode\n",
     "\n",
     "### 4. Add ENV\n",
     "```\n",
-    "$ export SPARK_JARS=rapids-4-spark_2.12-24.06.0.jar\n",
+    "$ export SPARK_JARS=rapids-4-spark_2.12-24.08.1.jar\n",
     "$ export PYSPARK_DRIVER_PYTHON=jupyter \n",
     "$ export PYSPARK_DRIVER_PYTHON_OPTS=notebook\n",
     "```\n",

diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/cv-mortgage-gpu.ipynb
@@ -63,7 +63,7 @@
       "Setting default log level to \"WARN\".\n",
       "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
       "2022-11-25 09:34:43,952 WARN resource.ResourceUtils: The configuration of cores (exec = 4 task = 1, runnable tasks = 4) will result in wasted resources due to resource gpu limiting the number of runnable tasks per executor to: 1. Please adjust your configuration.\n",
-      "2022-11-25 09:34:58,155 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.06.0 using cudf 24.06.0.\n",
+      "2022-11-25 09:34:58,155 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.08.1 using cudf 24.08.1.\n",
       "2022-11-25 09:34:58,171 WARN rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n",
       "2022-11-25 09:34:58,175 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n",
       "2022-11-25 09:34:58,175 WARN rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n"

diff --git a/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/python/mortgage-gpu.ipynb
@@ -84,7 +84,7 @@
       "22/11/24 06:14:06 INFO org.apache.spark.SparkEnv: Registering BlockManagerMaster\n",
       "22/11/24 06:14:06 INFO org.apache.spark.SparkEnv: Registering BlockManagerMasterHeartbeat\n",
       "22/11/24 06:14:06 INFO org.apache.spark.SparkEnv: Registering OutputCommitCoordinator\n",
-      "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: RAPIDS Accelerator 24.06.0 using cudf 24.06.0.\n",
+      "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: RAPIDS Accelerator 24.08.1 using cudf 24.08.1.\n",
       "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n",
       "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n",
       "22/11/24 06:14:07 WARN com.nvidia.spark.rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n"

diff --git a/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb b/examples/XGBoost-Examples/mortgage/notebooks/scala/mortgage-ETL.ipynb
@@ -20,14 +20,14 @@
     "Refer to these [instructions](https://github.com/NVIDIA/spark-rapids-examples/blob/branch-23.12/docs/get-started/xgboost-examples/dataset/mortgage.md) to download the dataset.\n",
     "\n",
     "### 2. Download needed jars\n",
-    "* [rapids-4-spark_2.12-24.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0.jar)\n",
+    "* [rapids-4-spark_2.12-24.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar)\n",
     "\n",
     "### 3. Start Spark Standalone\n",
     "Before Running the script, please setup Spark standalone mode\n",
     "\n",
     "### 4. Add ENV\n",
     "```\n",
-    "$ export SPARK_JARS=rapids-4-spark_2.12-24.06.0.jar\n",
+    "$ export SPARK_JARS=rapids-4-spark_2.12-24.08.1.jar\n",
     "\n",
     "```\n",
     "\n",

diff --git a/examples/XGBoost-Examples/taxi/notebooks/python/cv-taxi-gpu.ipynb b/examples/XGBoost-Examples/taxi/notebooks/python/cv-taxi-gpu.ipynb
@@ -62,7 +62,7 @@
       "Setting default log level to \"WARN\".\n",
       "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
       "2022-11-30 08:02:10,103 WARN resource.ResourceUtils: The configuration of cores (exec = 2 task = 1, runnable tasks = 2) will result in wasted resources due to resource gpu limiting the number of runnable tasks per executor to: 1. Please adjust your configuration.\n",
-      "2022-11-30 08:02:23,737 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.06.0 using cudf 24.06.0.\n",
+      "2022-11-30 08:02:23,737 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.08.1 using cudf 24.08.1.\n",
       "2022-11-30 08:02:23,752 WARN rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n",
       "2022-11-30 08:02:23,756 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n",
       "2022-11-30 08:02:23,757 WARN rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n",

diff --git a/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb b/examples/XGBoost-Examples/taxi/notebooks/python/taxi-ETL.ipynb
@@ -19,14 +19,14 @@
     "All data could be found at https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page\n",
     "\n",
     "### 2. Download needed jars\n",
-    "* [rapids-4-spark_2.12-24.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0.jar)\n",
+    "* [rapids-4-spark_2.12-24.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar)\n",
     "\n",
     "### 3. Start Spark Standalone\n",
     "Before running the script, please setup Spark standalone mode\n",
     "\n",
     "### 4. Add ENV\n",
     "```\n",
-    "$ export SPARK_JARS=rapids-4-spark_2.12-24.06.0.jar\n",
+    "$ export SPARK_JARS=rapids-4-spark_2.12-24.08.1.jar\n",
     "$ export PYSPARK_DRIVER_PYTHON=jupyter \n",
     "$ export PYSPARK_DRIVER_PYTHON_OPTS=notebook\n",
     "```\n",

diff --git a/examples/XGBoost-Examples/taxi/notebooks/python/taxi-gpu.ipynb b/examples/XGBoost-Examples/taxi/notebooks/python/taxi-gpu.ipynb
@@ -73,7 +73,7 @@
       "Setting default log level to \"WARN\".\n",
       "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
       "2022-11-30 07:51:19,480 WARN resource.ResourceUtils: The configuration of cores (exec = 2 task = 1, runnable tasks = 2) will result in wasted resources due to resource gpu limiting the number of runnable tasks per executor to: 1. Please adjust your configuration.\n",
-      "2022-11-30 07:51:33,277 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.06.0 using cudf 24.06.0.\n",
+      "2022-11-30 07:51:33,277 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 24.08.1 using cudf 24.08.1.\n",
       "2022-11-30 07:51:33,292 WARN rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n",
       "2022-11-30 07:51:33,295 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n",
       "2022-11-30 07:51:33,295 WARN rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n",

diff --git a/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb b/examples/XGBoost-Examples/taxi/notebooks/scala/taxi-ETL.ipynb
@@ -19,14 +19,14 @@
     "All data could be found at https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page\n",
     "\n",
     "### 2. Download needed jar\n",
-    "* [rapids-4-spark_2.12-24.06.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0.jar)\n",
+    "* [rapids-4-spark_2.12-24.08.1.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.08.1/rapids-4-spark_2.12-24.08.1.jar)\n",
     "\n",
     "### 3. Start Spark Standalone\n",
     "Before running the script, please setup Spark standalone mode\n",
     "\n",
     "### 4. Add ENV\n",
     "```\n",
-    "$ export SPARK_JARS=rapids-4-spark_2.12-24.06.0.jar\n",
+    "$ export SPARK_JARS=rapids-4-spark_2.12-24.08.1.jar\n",
     "\n",
     "```\n",
     "\n",