From d45d1fec39b5295099278585cee3918ef81457e5 Mon Sep 17 00:00:00 2001 From: YanxuanLiu Date: Tue, 23 May 2023 12:29:29 +0800 Subject: [PATCH 01/21] merge jdk steps into one Signed-off-by: YanxuanLiu --- .github/workflows/mvn-verify-check.yml | 39 +++++++------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index 57140044cd8..d88194c4bb7 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -120,11 +120,16 @@ jobs: -Dskip -Dmaven.javadoc.skip - verify-modules-with-jdk11: + verify-modules-with-jdk: needs: get-shim-versions-from-dist runs-on: ubuntu-latest strategy: - matrix: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK11Versions) }} + matrix: + include: + - java-version: 11 + spark-version: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK11Versions) }} + - jave-version: 17 + spark-version: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK17Versions) }} steps: - uses: actions/checkout@v3 # refs/pull/:prNumber/merge @@ -132,36 +137,12 @@ jobs: uses: actions/setup-java@v3 with: distribution: adopt - java-version: 11 + java-version: matrix.java-version - - name: Build JDK11 + - name: Build JDK run: > mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify - -P 'individual,pre-merge,jdk11' - -Dbuildver=${{ matrix.spark-version }} - -DskipTests - -Dskip - -Dmaven.javadoc.skip - - # TODO: use matrix to combine all jdk* jobs - verify-modules-with-jdk17: - needs: get-shim-versions-from-dist - runs-on: ubuntu-latest - strategy: - matrix: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK17Versions) }} - steps: - - uses: actions/checkout@v3 # refs/pull/:prNumber/merge - - - name: Setup Java and Maven Env - uses: actions/setup-java@v3 - with: - distribution: adopt - java-version: 17 - - - name: Build JDK17 - run: > - mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify - -P 'individual,pre-merge,jdk17' + -P "individual,pre-merge,jdk${{ matrix.java-version }}" -Dbuildver=${{ matrix.spark-version }} -DskipTests -Dskip From c0d334709a53d4e8483a3895a9067d0b0b3625dc Mon Sep 17 00:00:00 2001 From: YanxuanLiu Date: Tue, 23 May 2023 14:55:43 +0800 Subject: [PATCH 02/21] fix Signed-off-by: YanxuanLiu --- .github/workflows/mvn-verify-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index d88194c4bb7..21fa3fa0187 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -137,7 +137,7 @@ jobs: uses: actions/setup-java@v3 with: distribution: adopt - java-version: matrix.java-version + java-version: ${{ matrix.java-version }} - name: Build JDK run: > From 76ca72718a231baaf9945fc59fdda61d3946620c Mon Sep 17 00:00:00 2001 From: YanxuanLiu Date: Tue, 23 May 2023 15:08:07 +0800 Subject: [PATCH 03/21] fix Signed-off-by: YanxuanLiu --- .github/workflows/mvn-verify-check.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index 21fa3fa0187..206d6b26cbd 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -126,10 +126,10 @@ jobs: strategy: matrix: include: - - java-version: 11 - spark-version: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK11Versions) }} - - jave-version: 17 - spark-version: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK17Versions) }} + - java-ver: 11 + spark-ver: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK11Versions) }} + - jave-ver: 17 + spark-ver: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK17Versions) }} steps: - uses: actions/checkout@v3 # refs/pull/:prNumber/merge @@ -137,13 +137,13 @@ jobs: uses: actions/setup-java@v3 with: distribution: adopt - java-version: ${{ matrix.java-version }} + java-version: ${{ matrix.java-ver }} - name: Build JDK run: > mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify - -P "individual,pre-merge,jdk${{ matrix.java-version }}" - -Dbuildver=${{ matrix.spark-version }} + -P "individual,pre-merge,jdk${{ matrix.java-ver }}" + -Dbuildver=${{ matrix.spark-ver }} -DskipTests -Dskip -Dmaven.javadoc.skip From d4e3cc8e2a240a09577429985c00ce86be797d48 Mon Sep 17 00:00:00 2001 From: YanxuanLiu Date: Tue, 23 May 2023 15:15:13 +0800 Subject: [PATCH 04/21] for test Signed-off-by: YanxuanLiu --- .github/workflows/mvn-verify-check.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index 206d6b26cbd..f3e2dee5080 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -137,7 +137,10 @@ jobs: uses: actions/setup-java@v3 with: distribution: adopt - java-version: ${{ matrix.java-ver }} + java-version: 11 + run: > + echo ${{ matrix.java-ver }} + echo ${{ matrix.spark-ver }} - name: Build JDK run: > From c40d3e60d766a608fe65d45656535981cac382e4 Mon Sep 17 00:00:00 2001 From: YanxuanLiu Date: Tue, 23 May 2023 15:16:26 +0800 Subject: [PATCH 05/21] for test Signed-off-by: YanxuanLiu --- .github/workflows/mvn-verify-check.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index f3e2dee5080..c7cefeca4fd 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -138,12 +138,11 @@ jobs: with: distribution: adopt java-version: 11 - run: > - echo ${{ matrix.java-ver }} - echo ${{ matrix.spark-ver }} - name: Build JDK run: > + echo ${{ matrix.java-ver }} + echo ${{ matrix.spark-ver }} mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify -P "individual,pre-merge,jdk${{ matrix.java-ver }}" -Dbuildver=${{ matrix.spark-ver }} From 7f46f3185806ba1a7c53882b6e00d55f9234539a Mon Sep 17 00:00:00 2001 From: YanxuanLiu Date: Tue, 23 May 2023 15:31:18 +0800 Subject: [PATCH 06/21] for test Signed-off-by: YanxuanLiu --- .github/workflows/mvn-verify-check.yml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index c7cefeca4fd..8a2e4b04607 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -125,11 +125,10 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - include: - - java-ver: 11 - spark-ver: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK11Versions) }} - - jave-ver: 17 - spark-ver: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK17Versions) }} + java-version: [11, 17] + # include: + # spark-ver: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK11Versions) }} + # spark-ver: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK17Versions) }} steps: - uses: actions/checkout@v3 # refs/pull/:prNumber/merge @@ -137,14 +136,12 @@ jobs: uses: actions/setup-java@v3 with: distribution: adopt - java-version: 11 + java-version: ${{ matrix.java-version }} - name: Build JDK run: > - echo ${{ matrix.java-ver }} - echo ${{ matrix.spark-ver }} mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify - -P "individual,pre-merge,jdk${{ matrix.java-ver }}" + -P "individual,pre-merge,jdk${{ matrix.java-version }}" -Dbuildver=${{ matrix.spark-ver }} -DskipTests -Dskip From 482e6b20bd7ecb1266c96036a52f27afca972dcf Mon Sep 17 00:00:00 2001 From: YanxuanLiu Date: Tue, 23 May 2023 16:25:54 +0800 Subject: [PATCH 07/21] merge jdk version Signed-off-by: YanxuanLiu --- .github/workflows/mvn-verify-check.yml | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index 8a2e4b04607..c6a8ecc9370 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -29,8 +29,7 @@ jobs: outputs: sparkHeadVersion: ${{ steps.allShimVersionsStep.outputs.headVersion }} sparkTailVersions: ${{ steps.allShimVersionsStep.outputs.tailVersions }} - sparkJDK11Versions: ${{ steps.allShimVersionsStep.outputs.jdk11Versions }} - sparkJDK17Versions: ${{ steps.allShimVersionsStep.outputs.jdk17Versions }} + sparkJDKVersions: ${{ steps.allShimVersionsStep.outputs.jdkVersions }} steps: - uses: actions/checkout@v3 # refs/pull/:prNumber/merge @@ -59,15 +58,14 @@ jobs: echo "headVersion=$SPARK_BASE_SHIM_VERSION" >> $GITHUB_OUTPUT echo "tailVersions=$svJsonStr" >> $GITHUB_OUTPUT # jdk11 - jdkVersionArrBody=$(printf ",{\"spark-version\":\"%s\"}" "${SPARK_SHIM_VERSIONS_JDK11[@]}") - jdkVersionArrBody=${jdkVersionArrBody:1} - jdkVersionJsonStr=$(printf {\"include\":[%s]} $jdkVersionArrBody) - echo "jdk11Versions=$jdkVersionJsonStr" >> $GITHUB_OUTPUT + jdkVersionArrBody=$(printf ",{\"spark-version\":\"%s\", \"java-version\": 11}" "${SPARK_SHIM_VERSIONS_JDK11[@]}") + jdk11VersionArrBody=${jdkVersionArrBody:1} # jdk17 - jdkVersionArrBody=$(printf ",{\"spark-version\":\"%s\"}" "${SPARK_SHIM_VERSIONS_JDK17[@]}") - jdkVersionArrBody=${jdkVersionArrBody:1} - jdkVersionJsonStr=$(printf {\"include\":[%s]} $jdkVersionArrBody) - echo "jdk17Versions=$jdkVersionJsonStr" >> $GITHUB_OUTPUT + jdkVersionArrBody=$(printf ",{\"spark-version\":\"%s\", \"java-version\": 17}" "${SPARK_SHIM_VERSIONS_JDK17[@]}") + jdk17VersionArrBody=${jdkVersionArrBody:1} + # jdk + jdkVersionJsonStr=$(printf {\"include\":[%s]} $jdk11VersionArrBody + $jdk17VersionArrBody) + echo "jdkVersions=$jdkVersionJsonStr" >> $GITHUB_OUTPUT package-tests: needs: get-shim-versions-from-dist @@ -124,11 +122,7 @@ jobs: needs: get-shim-versions-from-dist runs-on: ubuntu-latest strategy: - matrix: - java-version: [11, 17] - # include: - # spark-ver: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK11Versions) }} - # spark-ver: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDK17Versions) }} + matrix: ${{ fromJSON(needs.get-shim-versions-from-dist.outputs.sparkJDKVersions) }} steps: - uses: actions/checkout@v3 # refs/pull/:prNumber/merge @@ -142,7 +136,7 @@ jobs: run: > mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify -P "individual,pre-merge,jdk${{ matrix.java-version }}" - -Dbuildver=${{ matrix.spark-ver }} + -Dbuildver=${{ matrix.spark-version }} -DskipTests -Dskip -Dmaven.javadoc.skip From eb45cf1e7df445be84fb9e57c2c76943223c16e3 Mon Sep 17 00:00:00 2001 From: YanxuanLiu Date: Tue, 23 May 2023 16:33:53 +0800 Subject: [PATCH 08/21] fix bug Signed-off-by: YanxuanLiu --- .github/workflows/mvn-verify-check.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index c6a8ecc9370..ee0929e3353 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -58,10 +58,10 @@ jobs: echo "headVersion=$SPARK_BASE_SHIM_VERSION" >> $GITHUB_OUTPUT echo "tailVersions=$svJsonStr" >> $GITHUB_OUTPUT # jdk11 - jdkVersionArrBody=$(printf ",{\"spark-version\":\"%s\", \"java-version\": 11}" "${SPARK_SHIM_VERSIONS_JDK11[@]}") + jdkVersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":11}" "${SPARK_SHIM_VERSIONS_JDK11[@]}") jdk11VersionArrBody=${jdkVersionArrBody:1} # jdk17 - jdkVersionArrBody=$(printf ",{\"spark-version\":\"%s\", \"java-version\": 17}" "${SPARK_SHIM_VERSIONS_JDK17[@]}") + jdkVersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":17}" "${SPARK_SHIM_VERSIONS_JDK17[@]}") jdk17VersionArrBody=${jdkVersionArrBody:1} # jdk jdkVersionJsonStr=$(printf {\"include\":[%s]} $jdk11VersionArrBody + $jdk17VersionArrBody) From 4de963914426e7f9644dda6d75f0fc113ca05dab Mon Sep 17 00:00:00 2001 From: YanxuanLiu Date: Tue, 23 May 2023 16:43:15 +0800 Subject: [PATCH 09/21] fix bug Signed-off-by: YanxuanLiu --- .github/workflows/mvn-verify-check.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index ee0929e3353..ae355dcfd8d 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -64,7 +64,8 @@ jobs: jdkVersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":17}" "${SPARK_SHIM_VERSIONS_JDK17[@]}") jdk17VersionArrBody=${jdkVersionArrBody:1} # jdk - jdkVersionJsonStr=$(printf {\"include\":[%s]} $jdk11VersionArrBody + $jdk17VersionArrBody) + jdkVersionArrBody=$jdk11VersionArrBody+','+$jdk17VersionArrBody + jdkVersionJsonStr=$(printf {\"include\":[%s]} $jdkVersionArrBody) echo "jdkVersions=$jdkVersionJsonStr" >> $GITHUB_OUTPUT package-tests: From e0c84c61d665df74f7fe757add2699b671ed29ce Mon Sep 17 00:00:00 2001 From: YanxuanLiu Date: Tue, 23 May 2023 16:46:53 +0800 Subject: [PATCH 10/21] fix bug Signed-off-by: YanxuanLiu --- .github/workflows/mvn-verify-check.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index ae355dcfd8d..0b1e18e5cee 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -58,13 +58,12 @@ jobs: echo "headVersion=$SPARK_BASE_SHIM_VERSION" >> $GITHUB_OUTPUT echo "tailVersions=$svJsonStr" >> $GITHUB_OUTPUT # jdk11 - jdkVersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":11}" "${SPARK_SHIM_VERSIONS_JDK11[@]}") - jdk11VersionArrBody=${jdkVersionArrBody:1} + jdk11VersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":11}" "${SPARK_SHIM_VERSIONS_JDK11[@]}") # jdk17 - jdkVersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":17}" "${SPARK_SHIM_VERSIONS_JDK17[@]}") - jdk17VersionArrBody=${jdkVersionArrBody:1} + jdk17VersionArrBody=$(printf ",{\"spark-version\":\"%s\",\"java-version\":17}" "${SPARK_SHIM_VERSIONS_JDK17[@]}") # jdk - jdkVersionArrBody=$jdk11VersionArrBody+','+$jdk17VersionArrBody + jdkVersionArrBody=$jdk11VersionArrBody$jdk17VersionArrBody + jdkVersionArrBody=${jdkVersionArrBody:1} jdkVersionJsonStr=$(printf {\"include\":[%s]} $jdkVersionArrBody) echo "jdkVersions=$jdkVersionJsonStr" >> $GITHUB_OUTPUT From a4cf442d9323fc87d3cdcf1e64ba281aa65c97da Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 23 May 2023 07:24:50 -0700 Subject: [PATCH 11/21] Speed up github verify checks [skip ci] (#8355) Verify checks increased runtime since introduction from 2- minutes to 5-7 minutes, mostly due to large dependencies. At least stopping re-compressing them - skip compressing dist jar: 30+ seconds - refactor common maven flags Signed-off-by: Gera Shegalov --- .github/workflows/mvn-verify-check.yml | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index 57140044cd8..07a8f827b32 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -23,6 +23,13 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true +env: + COMMON_MVN_FLAGS: > + -Ddist.jar.compress=false + -DskipTests + -Dskip + -Dmaven.javadoc.skip + jobs: get-shim-versions-from-dist: runs-on: ubuntu-latest @@ -91,11 +98,9 @@ jobs: -pl integration_tests,tests -am -P 'individual,pre-merge' -Dbuildver=${{ matrix.spark-version }} - -DskipTests - -Dskip - -Dmaven.javadoc.skip -Dmaven.scalastyle.skip=true -Drat.skip=true + $COMMON_MVN_FLAGS verify-all-modules-with-headSparkVersion: @@ -116,9 +121,7 @@ jobs: mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify -P 'individual,pre-merge' -Dbuildver=${{ needs.get-shim-versions-from-dist.outputs.sparkHeadVersion }} - -DskipTests - -Dskip - -Dmaven.javadoc.skip + $COMMON_MVN_FLAGS verify-modules-with-jdk11: needs: get-shim-versions-from-dist @@ -133,15 +136,13 @@ jobs: with: distribution: adopt java-version: 11 - + - name: Build JDK11 run: > mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify -P 'individual,pre-merge,jdk11' -Dbuildver=${{ matrix.spark-version }} - -DskipTests - -Dskip - -Dmaven.javadoc.skip + $COMMON_MVN_FLAGS # TODO: use matrix to combine all jdk* jobs verify-modules-with-jdk17: @@ -163,6 +164,4 @@ jobs: mvn -Dmaven.wagon.http.retryHandler.count=3 -B verify -P 'individual,pre-merge,jdk17' -Dbuildver=${{ matrix.spark-version }} - -DskipTests - -Dskip - -Dmaven.javadoc.skip + $COMMON_MVN_FLAGS From 1f41667ae89c6ce8cb08d85048217c6327244fad Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Tue, 23 May 2023 10:28:28 -0500 Subject: [PATCH 12/21] Add in support for filter on empty batch (#8334) Signed-off-by: Robert (Bobby) Evans Co-authored-by: Jason Lowe --- integration_tests/src/main/python/cmp_test.py | 23 +++++- .../spark/rapids/basicPhysicalOperators.scala | 19 ++++- .../sql/rapids/datetimeExpressions.scala | 79 +++++++++---------- 3 files changed, 75 insertions(+), 46 deletions(-) diff --git a/integration_tests/src/main/python/cmp_test.py b/integration_tests/src/main/python/cmp_test.py index de836cc5531..23bba281e8f 100644 --- a/integration_tests/src/main/python/cmp_test.py +++ b/integration_tests/src/main/python/cmp_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -272,6 +272,27 @@ def test_filter_with_project(data_gen): assert_gpu_and_cpu_are_equal_collect( lambda spark : two_col_df(spark, BooleanGen(), data_gen).filter(f.col('a')).selectExpr('*', 'a as a2')) +# It takes quite a bit to get filter to have a column it can filter on, but +# no columns to actually filter. We are making it happen here with a sub-query +# and some constants that then make it so all we need is the number of rows +# of input. +@pytest.mark.parametrize('op', ['>', '<']) +def test_empty_filter(op, spark_tmp_path): + + def do_it(spark): + df = spark.createDataFrame([(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"]) + # we repartition the data to 3 because for some reason spark writes 4 files for 3 rows. + # In this case that causes a race condition with the last aggregation which can result + # in a null being returned. For some reason this happens a lot on the GPU in local mode + # and not on the CPU in local mode. + df.repartition(3).write.mode("overwrite").parquet(spark_tmp_path) + df = spark.read.parquet(spark_tmp_path) + curDate = df.withColumn("current_date", f.current_date()) + curDate.createOrReplaceTempView("empty_filter_test_curDate") + spark.sql("select current_date, ((select last(current_date) from empty_filter_test_curDate) + interval 1 day) as test from empty_filter_test_curDate").createOrReplaceTempView("empty_filter_test2") + return spark.sql(f"select * from empty_filter_test2 where test {op} current_date") + assert_gpu_and_cpu_are_equal_collect(do_it) + def test_nondeterministic_filter(): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, LongGen(), 1).filter(f.rand(0) > 0.5)) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/basicPhysicalOperators.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/basicPhysicalOperators.scala index a8244aa181c..7c2d43fb706 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/basicPhysicalOperators.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/basicPhysicalOperators.scala @@ -501,10 +501,21 @@ object GpuFilter { cb: ColumnarBatch): ColumnarBatch = { checkedFilterMask.map { checkedFilterMask => withResource(checkedFilterMask) { checkedFilterMask => - val colTypes = GpuColumnVector.extractTypes(cb) - withResource(GpuColumnVector.from(cb)) { tbl => - withResource(tbl.filter(checkedFilterMask)) { filteredData => - GpuColumnVector.from(filteredData, colTypes) + if (cb.numCols() <= 0) { + val rowCount = withResource(checkedFilterMask.sum(DType.INT32)) { sum => + if (sum.isValid) { + sum.getInt + } else { + 0 + } + } + new ColumnarBatch(Array(), rowCount) + } else { + val colTypes = GpuColumnVector.extractTypes(cb) + withResource(GpuColumnVector.from(cb)) { tbl => + withResource(tbl.filter(checkedFilterMask)) { filteredData => + GpuColumnVector.from(filteredData, colTypes) + } } } } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala index 9f7928e5224..f845e5458c6 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala @@ -198,51 +198,48 @@ case class GpuDateAddInterval(start: Expression, override def columnarEval(batch: ColumnarBatch): Any = { - withResourceIfAllowed(left.columnarEval(batch)) { lhs => - withResourceIfAllowed(right.columnarEval(batch)) { rhs => - (lhs, rhs) match { - case (l: GpuColumnVector, intvlS: GpuScalar) - if intvlS.dataType.isInstanceOf[CalendarIntervalType] => - // Scalar does not support 'CalendarInterval' now, so use - // the Scala value instead. - // Skip the null check because it wll be detected by the following calls. - val intvl = intvlS.getValue.asInstanceOf[CalendarInterval] - - // ANSI mode checking - if(ansiEnabled && intvl.microseconds != 0) { - val msg = "IllegalArgumentException: Cannot add hours, minutes or seconds" + - ", milliseconds, microseconds to a date. " + - "If necessary set spark.sql.ansi.enabled to false to bypass this error." - throw new IllegalArgumentException(msg) - } + withResourceIfAllowed(GpuExpressionsUtils.columnarEvalToColumn(left, batch)) { lhs => + withResourceIfAllowed(right.columnarEval(batch)) { + case intvlS: GpuScalar if intvlS.dataType.isInstanceOf[CalendarIntervalType] => + // Scalar does not support 'CalendarInterval' now, so use + // the Scala value instead. + // Skip the null check because it will be detected by the following calls. + val intvl = intvlS.getValue.asInstanceOf[CalendarInterval] + + // ANSI mode checking + if (ansiEnabled && intvl.microseconds != 0) { + val msg = "IllegalArgumentException: Cannot add hours, minutes or seconds" + + ", milliseconds, microseconds to a date. " + + "If necessary set spark.sql.ansi.enabled to false to bypass this error." + throw new IllegalArgumentException(msg) + } - if (intvl.months != 0) { - throw new UnsupportedOperationException("Months aren't supported at the moment") - } - val microSecToDays = if (intvl.microseconds < 0) { - // This is to calculate when subtraction is performed. Need to take into account the - // interval( which are less than days). Convert it into days which needs to be - // subtracted along with intvl.days(if provided). - (intvl.microseconds.abs.toDouble / microSecondsInOneDay).ceil.toInt * -1 - } else { - (intvl.microseconds.toDouble / microSecondsInOneDay).toInt - } - val daysToAdd = intvl.days + microSecToDays - if (daysToAdd != 0) { - withResource(Scalar.fromInt(daysToAdd)) { us_s => - withResource(l.getBase.bitCastTo(DType.INT32)) { us => - withResource(intervalMath(us_s, us)) { intResult => - GpuColumnVector.from(intResult.castTo(DType.TIMESTAMP_DAYS), dataType) - } + if (intvl.months != 0) { + throw new UnsupportedOperationException("Months aren't supported at the moment") + } + val microSecToDays = if (intvl.microseconds < 0) { + // This is to calculate when subtraction is performed. Need to take into account the + // interval( which are less than days). Convert it into days which needs to be + // subtracted along with intvl.days(if provided). + (intvl.microseconds.abs.toDouble / microSecondsInOneDay).ceil.toInt * -1 + } else { + (intvl.microseconds.toDouble / microSecondsInOneDay).toInt + } + val daysToAdd = intvl.days + microSecToDays + if (daysToAdd != 0) { + withResource(Scalar.fromInt(daysToAdd)) { us_s => + withResource(lhs.getBase.bitCastTo(DType.INT32)) { us => + withResource(intervalMath(us_s, us)) { intResult => + GpuColumnVector.from(intResult.castTo(DType.TIMESTAMP_DAYS), dataType) } } - } else { - l.incRefCount() } - case _ => - throw new UnsupportedOperationException("GpuDateAddInterval takes column and " + - "interval as an argument only") - } + } else { + lhs.incRefCount() + } + case _ => + throw new UnsupportedOperationException("GpuDateAddInterval requires a scalar " + + "for the interval") } } } From 143b16b1a1668ef6226e82afdd2001c26dc338d7 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 23 May 2023 09:10:08 -0700 Subject: [PATCH 13/21] Elimnate javac warnings [databricks] (#8342) - add Werror to javac args - suppress unavoidable warnings - disable linter for "unclaimed" annotations Fixes #6789 Signed-off-by: Gera Shegalov Co-authored-by: Jason Lowe --- .../rapids/tests/udf/hive/EmptyHiveSimpleUDF.java | 7 +++---- pom.xml | 8 +++++++- .../rapids/iceberg/parquet/ApplyNameMapping.java | 12 ++++++++---- .../rapids/iceberg/parquet/GpuParquetReader.java | 6 +++--- .../rapids/iceberg/parquet/ParquetConversions.java | 4 +++- .../rapids/iceberg/parquet/ParquetTypeVisitor.java | 9 ++++++--- .../iceberg/parquet/TypeWithSchemaVisitor.java | 9 ++++++--- 7 files changed, 36 insertions(+), 19 deletions(-) diff --git a/integration_tests/src/main/java/com/nvidia/spark/rapids/tests/udf/hive/EmptyHiveSimpleUDF.java b/integration_tests/src/main/java/com/nvidia/spark/rapids/tests/udf/hive/EmptyHiveSimpleUDF.java index 337579c2f79..71f971c14cd 100644 --- a/integration_tests/src/main/java/com/nvidia/spark/rapids/tests/udf/hive/EmptyHiveSimpleUDF.java +++ b/integration_tests/src/main/java/com/nvidia/spark/rapids/tests/udf/hive/EmptyHiveSimpleUDF.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,9 @@ package com.nvidia.spark.rapids.tests.udf.hive; -import org.apache.hadoop.hive.ql.exec.UDF; - /** An empty Hive simple UDF returning the first input directly for row-based UDF test only. */ -public class EmptyHiveSimpleUDF extends UDF { +@SuppressWarnings("deprecation") +public class EmptyHiveSimpleUDF extends org.apache.hadoop.hive.ql.exec.UDF { public String evaluate(String in, String in2) { return in; } diff --git a/pom.xml b/pom.xml index 88c5d68b23d..d46c512f9a9 100644 --- a/pom.xml +++ b/pom.xml @@ -636,7 +636,13 @@ 2.8.0 incremental 2.12.15 - -Xlint:all,-serial,-path,-try + + -Xlint:all,-serial,-path,-try,-processing|-Werror 1.12.1