diff --git a/.github/workflows/add-to-project.yml b/.github/workflows/add-to-project.yml index d963cafc7d6..1f84a5c9d7e 100644 --- a/.github/workflows/add-to-project.yml +++ b/.github/workflows/add-to-project.yml @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ jobs: name: Add new issues and pull requests to project runs-on: ubuntu-latest steps: - - uses: actions/add-to-project@v0.4.0 + - uses: actions/add-to-project@v0.6.1 with: project-url: https://github.com/orgs/NVIDIA/projects/4 github-token: ${{ secrets.PROJECT_TOKEN }} diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml index f049bad0a25..3cf2d058f8b 100644 --- a/.github/workflows/auto-merge.yml +++ b/.github/workflows/auto-merge.yml @@ -27,7 +27,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: branch-24.04 # force to fetch from latest upstream instead of PR ref diff --git a/.github/workflows/blossom-ci.yml b/.github/workflows/blossom-ci.yml index 8cd7a2fe6a6..848be4e0a56 100644 --- a/.github/workflows/blossom-ci.yml +++ b/.github/workflows/blossom-ci.yml @@ -35,13 +35,10 @@ jobs: # This job only runs for pull request comments if: contains( '\ abellina,\ - andygrove,\ anfeng,\ firestarman,\ GaryShen2008,\ - jbrennan333, \ jlowe,\ - krajendrannv,\ kuhushukla,\ mythrocks,\ nartal1,\ @@ -49,7 +46,6 @@ jobs: NvTimLiu,\ razajafri,\ revans2,\ - rongou,\ rwlee,\ sameerz,\ tgravescs,\ @@ -72,6 +68,8 @@ jobs: viadea,\ yinqingh,\ parthosa,\ + liurenjie1024,\ + binmahone,\ ', format('{0},', github.actor)) && github.event.comment.body == 'build' steps: - name: Check if comment is issued by authorized person diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml index ad5c3647398..734dd8e6bb9 100644 --- a/.github/workflows/mvn-verify-check.yml +++ b/.github/workflows/mvn-verify-check.yml @@ -45,8 +45,8 @@ jobs: sparkJDKVersions: ${{ steps.allShimVersionsStep.outputs.jdkVersions }} scala213Versions: ${{ steps.allShimVersionsStep.outputs.scala213Versions }} steps: - - uses: actions/checkout@v3 # refs/pull/:prNumber/merge - - uses: actions/setup-java@v3 + - uses: actions/checkout@v4 # refs/pull/:prNumber/merge + - uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: 8 @@ -58,7 +58,7 @@ jobs: echo "dailyCacheKey=$cacheKey" | tee $GITHUB_ENV $GITHUB_OUTPUT - name: Cache local Maven repository id: cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.m2 key: ${{ env.dailyCacheKey }} @@ -146,16 +146,16 @@ jobs: fail-fast: false runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 # refs/pull/:prNumber/merge + - uses: actions/checkout@v4 # refs/pull/:prNumber/merge - name: Setup Java and Maven Env - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: adopt java-version: 8 - name: Cache local Maven repository - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.m2 key: ${{ needs.cache-dependencies.outputs.dailyCacheKey }} @@ -195,16 +195,16 @@ jobs: fail-fast: false runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 # refs/pull/:prNumber/merge + - uses: actions/checkout@v4 # refs/pull/:prNumber/merge - name: Setup Java and Maven Env - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: adopt java-version: 8 - name: Cache local Maven repository - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.m2 key: ${{ needs.cache-dependencies.outputs.dailyCacheKey }} @@ -253,16 +253,16 @@ jobs: strategy: matrix: ${{ fromJSON(needs.cache-dependencies.outputs.sparkJDKVersions) }} steps: - - uses: actions/checkout@v3 # refs/pull/:prNumber/merge + - uses: actions/checkout@v4 # refs/pull/:prNumber/merge - name: Setup Java and Maven Env - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: adopt java-version: ${{ matrix.java-version }} - name: Cache local Maven repository - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.m2 key: ${{ needs.cache-dependencies.outputs.dailyCacheKey }} @@ -300,16 +300,16 @@ jobs: matrix: maven-version: [3.6.3, 3.8.8, 3.9.3] steps: - - uses: actions/checkout@v3 # refs/pull/:prNumber/merge + - uses: actions/checkout@v4 # refs/pull/:prNumber/merge - name: Setup Java - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: adopt java-version: 11 - name: Cache local Maven repository - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/.m2 key: ${{ needs.cache-dependencies.outputs.dailyCacheKey }} diff --git a/.github/workflows/signoff-check.yml b/.github/workflows/signoff-check.yml index 85cc3697c52..076f72c7f28 100644 --- a/.github/workflows/signoff-check.yml +++ b/.github/workflows/signoff-check.yml @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ jobs: signoff-check: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: sigoff-check job uses: ./.github/workflows/signoff-check diff --git a/CHANGELOG.md b/CHANGELOG.md index 42324d41aa8..35e8d00ebe2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,173 @@ # Change log -Generated on 2024-03-06 +Generated on 2024-04-15 + +## Release 24.04 + +### Features +||| +|:---|:---| +|[#10263](https://github.com/NVIDIA/spark-rapids/issues/10263)|[FEA] Add support for reading JSON containing structs where rows are not consistent| +|[#10436](https://github.com/NVIDIA/spark-rapids/issues/10436)|[FEA] Move Spark 3.5.1 out of snapshot once released| +|[#10430](https://github.com/NVIDIA/spark-rapids/issues/10430)|[FEA] Error out when running on an unsupported GPU architecture| +|[#9750](https://github.com/NVIDIA/spark-rapids/issues/9750)|[FEA] Review `JsonToStruct` and `JsonScan` and consolidate some testing and implementation| +|[#8680](https://github.com/NVIDIA/spark-rapids/issues/8680)|[AUDIT][SPARK-42779][SQL] Allow V2 writes to indicate advisory shuffle partition size| +|[#10429](https://github.com/NVIDIA/spark-rapids/issues/10429)|[FEA] Drop support for Databricks 10.4 ML LTS| +|[#10334](https://github.com/NVIDIA/spark-rapids/issues/10334)|[FEA] Turn on memory limits for parquet reader| +|[#10344](https://github.com/NVIDIA/spark-rapids/issues/10344)|[FEA] support barrier mode for mapInPandas/mapInArrow| + +### Performance +||| +|:---|:---| +|[#10578](https://github.com/NVIDIA/spark-rapids/issues/10578)|[FEA] Support project expression rewrite for the case ```stringinstr(str_col, substr) > 0``` to ```contains(str_col, substr)```| +|[#10570](https://github.com/NVIDIA/spark-rapids/issues/10570)|[FEA] See if we can optimize sort for a single batch| +|[#10531](https://github.com/NVIDIA/spark-rapids/issues/10531)|[FEA] Support "WindowGroupLimit" optimization on GPU for Databricks 13.3 ML LTS+| +|[#5553](https://github.com/NVIDIA/spark-rapids/issues/5553)|[FEA][Audit] - Push down StringEndsWith/Contains to Parquet | +|[#8208](https://github.com/NVIDIA/spark-rapids/issues/8208)|[FEA][AUDIT][SPARK-37099][SQL] Introduce the group limit of Window for rank-based filter to optimize top-k computation| +|[#10249](https://github.com/NVIDIA/spark-rapids/issues/10249)|[FEA] Support common subexpression elimination for expand operator| +|[#10301](https://github.com/NVIDIA/spark-rapids/issues/10301)|[FEA] Improve performance of from_json| + +### Bugs Fixed +||| +|:---|:---| +|[#10665](https://github.com/NVIDIA/spark-rapids/issues/10665)|[BUG] Need to update private jar's version to v24.04.1 for spark-rapids v24.04.0 release| +|[#10589](https://github.com/NVIDIA/spark-rapids/issues/10589)|[BUG] ZSTD version mismatch in integration tests| +|[#10255](https://github.com/NVIDIA/spark-rapids/issues/10255)|[BUG] parquet_tests are skipped on Dataproc CI| +|[#10624](https://github.com/NVIDIA/spark-rapids/issues/10624)|[BUG] Deploy script "gpg:sign-and-deploy-file failed: 401 Unauthorized| +|[#10631](https://github.com/NVIDIA/spark-rapids/issues/10631)|[BUG] pending `BlockState` leaks blocks if the shuffle read doesn't finish successfully| +|[#10349](https://github.com/NVIDIA/spark-rapids/issues/10349)|[BUG]Test in json_test.py failed: test_from_json_struct_decimal| +|[#9033](https://github.com/NVIDIA/spark-rapids/issues/9033)|[BUG] GpuGetJsonObject does not expand escaped characters| +|[#10216](https://github.com/NVIDIA/spark-rapids/issues/10216)|[BUG] GetJsonObject fails at spark unit test $.store.book[*].reader| +|[#10217](https://github.com/NVIDIA/spark-rapids/issues/10217)|[BUG] GetJsonObject fails at spark unit test $.store.basket[0][*].b| +|[#10537](https://github.com/NVIDIA/spark-rapids/issues/10537)|[BUG] GetJsonObject throws exception when json path contains a name starting with `'`| +|[#10194](https://github.com/NVIDIA/spark-rapids/issues/10194)|[BUG] GetJsonObject does not validate the input is JSON in the same way as Spark| +|[#10196](https://github.com/NVIDIA/spark-rapids/issues/10196)|[BUG] GetJsonObject does not process escape sequences in returned strings or queries| +|[#10212](https://github.com/NVIDIA/spark-rapids/issues/10212)|[BUG] GetJsonObject should return null for invalid query instead of throwing an exception| +|[#10218](https://github.com/NVIDIA/spark-rapids/issues/10218)|[BUG] GetJsonObject does not normalize non-string output| +|[#10591](https://github.com/NVIDIA/spark-rapids/issues/10591)|[BUG] `test_column_add_after_partition` failed on EGX Standalone cluster| +|[#10277](https://github.com/NVIDIA/spark-rapids/issues/10277)|Add monitoring for GH action deprecations| +|[#10627](https://github.com/NVIDIA/spark-rapids/issues/10627)|[BUG] Integration tests FAILED on: "nvCOMP 2.3/2.4 or newer is required for Zstandard compression"| +|[#10585](https://github.com/NVIDIA/spark-rapids/issues/10585)|[BUG]Test simple pinned blocking alloc Failed nightly tests| +|[#10586](https://github.com/NVIDIA/spark-rapids/issues/10586)|[BUG] YARN EGX IT build failing parquet_testing_test can't find file| +|[#10133](https://github.com/NVIDIA/spark-rapids/issues/10133)|[BUG] test_hash_reduction_collect_set_on_nested_array_type failed in a distributed environment| +|[#10378](https://github.com/NVIDIA/spark-rapids/issues/10378)|[BUG] `test_range_running_window_float_decimal_sum_runs_batched` fails intermittently| +|[#10486](https://github.com/NVIDIA/spark-rapids/issues/10486)|[BUG] StructsToJson does not fall back to the CPU for unsupported timeZone options| +|[#10484](https://github.com/NVIDIA/spark-rapids/issues/10484)|[BUG] JsonToStructs does not fallback when columnNameOfCorruptRecord is set| +|[#10460](https://github.com/NVIDIA/spark-rapids/issues/10460)|[BUG] JsonToStructs should reject float numbers for integer types| +|[#10468](https://github.com/NVIDIA/spark-rapids/issues/10468)|[BUG] JsonToStructs and ScanJson should not treat quoted strings as valid integers| +|[#10470](https://github.com/NVIDIA/spark-rapids/issues/10470)|[BUG] ScanJson and JsonToStructs should support parsing quoted decimal strings that are formatted by local (at least for en-US)| +|[#10494](https://github.com/NVIDIA/spark-rapids/issues/10494)|[BUG] JsonToStructs parses INF wrong when nonNumericNumbers is enabled| +|[#10456](https://github.com/NVIDIA/spark-rapids/issues/10456)|[BUG] allowNonNumericNumbers OFF supported for JSON Scan, but not JsonToStructs| +|[#10467](https://github.com/NVIDIA/spark-rapids/issues/10467)|[BUG] JsonToStructs should reject 1. as a valid number| +|[#10469](https://github.com/NVIDIA/spark-rapids/issues/10469)|[BUG] ScanJson should accept "1." as a valid Decimal| +|[#10559](https://github.com/NVIDIA/spark-rapids/issues/10559)|[BUG] test_spark_from_json_date_with_format FAILED on : Part of the plan is not columnar class org.apache.spark.sql.execution.ProjectExec| +|[#10209](https://github.com/NVIDIA/spark-rapids/issues/10209)|[BUG] Test failure hash_aggregate_test.py::test_hash_reduction_collect_set_on_nested_array_type DATAGEN_SEED=1705515231| +|[#10319](https://github.com/NVIDIA/spark-rapids/issues/10319)|[BUG] Shuffled join OOM with 4GB of GPU memory| +|[#10507](https://github.com/NVIDIA/spark-rapids/issues/10507)|[BUG] regexp_test.py FAILED test_regexp_extract_all_idx_positive[DATAGEN_SEED=1709054829, INJECT_OOM]| +|[#10527](https://github.com/NVIDIA/spark-rapids/issues/10527)|[BUG] Build on Databricks failed with GpuGetJsonObject.scala:19: object parsing is not a member of package util| +|[#10509](https://github.com/NVIDIA/spark-rapids/issues/10509)|[BUG] scalar leaks when running nds query51| +|[#10214](https://github.com/NVIDIA/spark-rapids/issues/10214)|[BUG] GetJsonObject does not support unquoted array like notation| +|[#10215](https://github.com/NVIDIA/spark-rapids/issues/10215)|[BUG] GetJsonObject removes leading space characters| +|[#10213](https://github.com/NVIDIA/spark-rapids/issues/10213)|[BUG] GetJsonObject supports array index notation without a root| +|[#10452](https://github.com/NVIDIA/spark-rapids/issues/10452)|[BUG] JsonScan and from_json share fallback checks, but have hard coded names in the results| +|[#10455](https://github.com/NVIDIA/spark-rapids/issues/10455)|[BUG] JsonToStructs and ScanJson do not fall back/support it properly if single quotes are disabled| +|[#10219](https://github.com/NVIDIA/spark-rapids/issues/10219)|[BUG] GetJsonObject sees a double quote in a single quoted string as invalid| +|[#10431](https://github.com/NVIDIA/spark-rapids/issues/10431)|[BUG] test_casting_from_overflow_double_to_timestamp `DID NOT RAISE `| +|[#10499](https://github.com/NVIDIA/spark-rapids/issues/10499)|[BUG] Unit tests core dump as below| +|[#9325](https://github.com/NVIDIA/spark-rapids/issues/9325)|[BUG] test_csv_infer_schema_timestamp_ntz fails| +|[#10422](https://github.com/NVIDIA/spark-rapids/issues/10422)|[BUG] test_get_json_object_single_quotes failure| +|[#10411](https://github.com/NVIDIA/spark-rapids/issues/10411)|[BUG] Some fast parquet tests fail if the time zone is not UTC| +|[#10410](https://github.com/NVIDIA/spark-rapids/issues/10410)|[BUG]delta_lake_update_test.py::test_delta_update_partitions[['a', 'b']-False] failed by DATAGEN_SEED=1707683137| +|[#10404](https://github.com/NVIDIA/spark-rapids/issues/10404)|[BUG] GpuJsonTuple memory leak| +|[#10382](https://github.com/NVIDIA/spark-rapids/issues/10382)|[BUG] Complile failed on branch-24.04 : literals.scala:32: object codec is not a member of package org.apache.commons| + +### PRs +||| +|:---|:---| +|[#10681](https://github.com/NVIDIA/spark-rapids/pull/10681)|Update rapids JNI dependency to 24.04.0, private to 24.04.1| +|[#10660](https://github.com/NVIDIA/spark-rapids/pull/10660)|Ensure an executor broadcast is in a single batch| +|[#10676](https://github.com/NVIDIA/spark-rapids/pull/10676)|[DOC] Update docs for 24.04.0 release [skip ci]| +|[#10654](https://github.com/NVIDIA/spark-rapids/pull/10654)|Add a config to switch back to old impl for getJsonObject| +|[#10667](https://github.com/NVIDIA/spark-rapids/pull/10667)|Update rapids private dependency to 24.04.1| +|[#10664](https://github.com/NVIDIA/spark-rapids/pull/10664)|Remove build link from the premerge-CI workflow| +|[#10657](https://github.com/NVIDIA/spark-rapids/pull/10657)|Revert "Host Memory OOM handling for RowToColumnarIterator (#10617)"| +|[#10625](https://github.com/NVIDIA/spark-rapids/pull/10625)|Pin to 3.1.0 maven-gpg-plugin in deploy script [skip ci]| +|[#10637](https://github.com/NVIDIA/spark-rapids/pull/10637)|Cleanup async state when multi-threaded shuffle readers fail| +|[#10617](https://github.com/NVIDIA/spark-rapids/pull/10617)|Host Memory OOM handling for RowToColumnarIterator| +|[#10614](https://github.com/NVIDIA/spark-rapids/pull/10614)|Use random seed for `test_from_json_struct_decimal`| +|[#10581](https://github.com/NVIDIA/spark-rapids/pull/10581)|Use new jni kernel for getJsonObject| +|[#10630](https://github.com/NVIDIA/spark-rapids/pull/10630)|Fix removal of internal metadata information in 350 shim| +|[#10623](https://github.com/NVIDIA/spark-rapids/pull/10623)|Auto merge PRs to branch-24.06 from branch-24.04 [skip ci]| +|[#10616](https://github.com/NVIDIA/spark-rapids/pull/10616)|Pass metadata extractors to FileScanRDD| +|[#10620](https://github.com/NVIDIA/spark-rapids/pull/10620)|Remove unused shared lib in Jenkins files| +|[#10615](https://github.com/NVIDIA/spark-rapids/pull/10615)|Turn off state logging in HostAllocSuite| +|[#10610](https://github.com/NVIDIA/spark-rapids/pull/10610)|Do not replace TableCacheQueryStageExec| +|[#10599](https://github.com/NVIDIA/spark-rapids/pull/10599)|Call globStatus directly via PY4J in hdfs_glob to avoid calling hadoop command| +|[#10602](https://github.com/NVIDIA/spark-rapids/pull/10602)|Remove InMemoryTableScanExec support for Spark 3.5+| +|[#10608](https://github.com/NVIDIA/spark-rapids/pull/10608)|Update perfio.s3.enabled doc to fix build failure [skip ci]| +|[#10598](https://github.com/NVIDIA/spark-rapids/pull/10598)|Update CI script to build and deploy using the same CUDA classifier[skip ci]| +|[#10575](https://github.com/NVIDIA/spark-rapids/pull/10575)|Update JsonToStructs and ScanJson to have white space normalization| +|[#10597](https://github.com/NVIDIA/spark-rapids/pull/10597)|add guardword to hide cloud info| +|[#10540](https://github.com/NVIDIA/spark-rapids/pull/10540)|Handle minimum GPU architecture supported| +|[#10584](https://github.com/NVIDIA/spark-rapids/pull/10584)|Add in small optimization for instr comparison| +|[#10590](https://github.com/NVIDIA/spark-rapids/pull/10590)|Turn on transition logging in HostAllocSuite| +|[#10572](https://github.com/NVIDIA/spark-rapids/pull/10572)|Improve performance of Sort for the common single batch use case| +|[#10568](https://github.com/NVIDIA/spark-rapids/pull/10568)|Add configuration to share JNI pinned pool with cuIO| +|[#10550](https://github.com/NVIDIA/spark-rapids/pull/10550)|Enable window-group-limit optimization on| +|[#10542](https://github.com/NVIDIA/spark-rapids/pull/10542)|Make JSON parsing common between JsonToStructs and ScanJson| +|[#10562](https://github.com/NVIDIA/spark-rapids/pull/10562)|Fix test_spark_from_json_date_with_format when run in a non-UTC TZ| +|[#10564](https://github.com/NVIDIA/spark-rapids/pull/10564)|Enable specifying specific integration test methods via TESTS environment| +|[#10563](https://github.com/NVIDIA/spark-rapids/pull/10563)|Append new authorized user to blossom-ci safelist [skip ci]| +|[#10520](https://github.com/NVIDIA/spark-rapids/pull/10520)|Distinct left join| +|[#10538](https://github.com/NVIDIA/spark-rapids/pull/10538)|Move K8s cloud name into common lib for Jenkins CI| +|[#10552](https://github.com/NVIDIA/spark-rapids/pull/10552)|Fix issues when no value can be extracted from a regular expression| +|[#10522](https://github.com/NVIDIA/spark-rapids/pull/10522)|Fix missing scala-parser-combinators dependency on Databricks| +|[#10549](https://github.com/NVIDIA/spark-rapids/pull/10549)|Update to latest branch-24.02 [skip ci]| +|[#10544](https://github.com/NVIDIA/spark-rapids/pull/10544)|Fix merge conflict from branch-24.02| +|[#10503](https://github.com/NVIDIA/spark-rapids/pull/10503)|Distinct inner join| +|[#10512](https://github.com/NVIDIA/spark-rapids/pull/10512)|Move to parsing from_json input preserving quoted strings.| +|[#10528](https://github.com/NVIDIA/spark-rapids/pull/10528)|Fix auto merge conflict 10523| +|[#10519](https://github.com/NVIDIA/spark-rapids/pull/10519)|Replicate HostColumnVector.ColumnBuilder in plugin to enable host memory oom work| +|[#10521](https://github.com/NVIDIA/spark-rapids/pull/10521)|Fix Spark 3.5.1 build| +|[#10516](https://github.com/NVIDIA/spark-rapids/pull/10516)|One more metric for expand| +|[#10500](https://github.com/NVIDIA/spark-rapids/pull/10500)|Support "WindowGroupLimit" optimization on GPU| +|[#10508](https://github.com/NVIDIA/spark-rapids/pull/10508)|Move 351 shims into noSnapshot buildvers| +|[#10510](https://github.com/NVIDIA/spark-rapids/pull/10510)|Fix scalar leak in SumBinaryFixer| +|[#10466](https://github.com/NVIDIA/spark-rapids/pull/10466)|Use parser from spark to normalize json path in GetJsonObject| +|[#10490](https://github.com/NVIDIA/spark-rapids/pull/10490)|Start working on a more complete json test matrix json| +|[#10497](https://github.com/NVIDIA/spark-rapids/pull/10497)|Add minValue overflow check in ORC double-to-timestamp cast| +|[#10501](https://github.com/NVIDIA/spark-rapids/pull/10501)|Fix scalar leak in WindowRetrySuite| +|[#10474](https://github.com/NVIDIA/spark-rapids/pull/10474)|Remove Support for Databricks 10.4| +|[#10418](https://github.com/NVIDIA/spark-rapids/pull/10418)|Enable GpuShuffledSymmetricHashJoin by default| +|[#10450](https://github.com/NVIDIA/spark-rapids/pull/10450)|Improve internal row to columnar host memory by using a combined spillable buffer| +|[#10440](https://github.com/NVIDIA/spark-rapids/pull/10440)|Generate CSV data per Spark version for tools| +|[#10449](https://github.com/NVIDIA/spark-rapids/pull/10449)|[DOC] Fix table rendering issue in github.io download UI page [skip ci]| +|[#10438](https://github.com/NVIDIA/spark-rapids/pull/10438)|Integrate perfio.s3 reader| +|[#10423](https://github.com/NVIDIA/spark-rapids/pull/10423)|Disable Integration Test:`test_get_json_object_single_quotes` on DB 10.4| +|[#10419](https://github.com/NVIDIA/spark-rapids/pull/10419)|Export TZ in tests when default TZ is used| +|[#10426](https://github.com/NVIDIA/spark-rapids/pull/10426)|Fix auto merge conflict 10425 [skip ci]| +|[#10427](https://github.com/NVIDIA/spark-rapids/pull/10427)|Update test doc for 24.04 [skip ci]| +|[#10396](https://github.com/NVIDIA/spark-rapids/pull/10396)|Remove inactive user from github workflow [skip ci]| +|[#10421](https://github.com/NVIDIA/spark-rapids/pull/10421)|Use withRetry when manifesting spillable batch in GpuShuffledHashJoinExec| +|[#10420](https://github.com/NVIDIA/spark-rapids/pull/10420)|Disable JsonTuple by default| +|[#10407](https://github.com/NVIDIA/spark-rapids/pull/10407)|Enable Single Quote Support in getJSONObject API with GetJsonObjectOptions| +|[#10415](https://github.com/NVIDIA/spark-rapids/pull/10415)|Avoid comparing Delta logs when writing partitioned tables| +|[#10247](https://github.com/NVIDIA/spark-rapids/pull/10247)|Improve `GpuExpand` by pre-projecting some columns| +|[#10248](https://github.com/NVIDIA/spark-rapids/pull/10248)|Group-by aggregation based optimization for UNBOUNDED `collect_set` window function| +|[#10406](https://github.com/NVIDIA/spark-rapids/pull/10406)|Enabled subPage chunking by default| +|[#10361](https://github.com/NVIDIA/spark-rapids/pull/10361)|Add in basic support for JSON generation in BigDataGen and improve performance of from_json| +|[#10158](https://github.com/NVIDIA/spark-rapids/pull/10158)|Add in framework for unbounded to unbounded window agg optimization| +|[#10394](https://github.com/NVIDIA/spark-rapids/pull/10394)|Fix auto merge conflict 10393 [skip ci]| +|[#10375](https://github.com/NVIDIA/spark-rapids/pull/10375)|Support barrier mode for mapInPandas/mapInArrow| +|[#10356](https://github.com/NVIDIA/spark-rapids/pull/10356)|Update locate_parquet_testing_files function to support hdfs input path for dataproc CI| +|[#10369](https://github.com/NVIDIA/spark-rapids/pull/10369)|Revert "Support barrier mode for mapInPandas/mapInArrow (#10364)"| +|[#10358](https://github.com/NVIDIA/spark-rapids/pull/10358)|Disable Spark UI by default for integration tests| +|[#10360](https://github.com/NVIDIA/spark-rapids/pull/10360)|Fix a memory leak in json tuple| +|[#10364](https://github.com/NVIDIA/spark-rapids/pull/10364)|Support barrier mode for mapInPandas/mapInArrow| +|[#10348](https://github.com/NVIDIA/spark-rapids/pull/10348)|Remove redundant joinOutputRows metric| +|[#10321](https://github.com/NVIDIA/spark-rapids/pull/10321)|Bump up dependency version to 24.04.0-SNAPSHOT| +|[#10330](https://github.com/NVIDIA/spark-rapids/pull/10330)|Add tryAcquire to GpuSemaphore| +|[#10331](https://github.com/NVIDIA/spark-rapids/pull/10331)|Revert "Update to libcudf unsigned sum aggregation types change (#10267)"| +|[#10258](https://github.com/NVIDIA/spark-rapids/pull/10258)|Init project version 24.04.0-SNAPSHOT| ## Release 24.02 @@ -124,6 +292,7 @@ Generated on 2024-03-06 ### PRs ||| |:---|:---| +|[#10555](https://github.com/NVIDIA/spark-rapids/pull/10555)|Update change log [skip ci]| |[#10551](https://github.com/NVIDIA/spark-rapids/pull/10551)|Try to make degenerative joins here impossible for these tests| |[#10546](https://github.com/NVIDIA/spark-rapids/pull/10546)|Update changelog [skip ci]| |[#10541](https://github.com/NVIDIA/spark-rapids/pull/10541)|Fix Delta log cache size settings during integration tests| @@ -143,6 +312,7 @@ Generated on 2024-03-06 |[#10387](https://github.com/NVIDIA/spark-rapids/pull/10387)|[DOC] Update docs for 24.02.0 release [skip ci]| |[#10399](https://github.com/NVIDIA/spark-rapids/pull/10399)|Update NOTICE-binary| |[#10389](https://github.com/NVIDIA/spark-rapids/pull/10389)|Change version and branch to 24.02 in docs [skip ci]| +|[#10384](https://github.com/NVIDIA/spark-rapids/pull/10384)|[DOC] Update docs for 23.12.2 release [skip ci] | |[#10309](https://github.com/NVIDIA/spark-rapids/pull/10309)|[DOC] add custom 404 page and fix some document issue [skip ci]| |[#10352](https://github.com/NVIDIA/spark-rapids/pull/10352)|xfail mixed type test| |[#10355](https://github.com/NVIDIA/spark-rapids/pull/10355)|Revert "Support barrier mode for mapInPandas/mapInArrow (#10343)"| @@ -241,6 +411,7 @@ Generated on 2024-03-06 |[#9996](https://github.com/NVIDIA/spark-rapids/pull/9996)|Test full timestamp output range in PySpark| |[#10081](https://github.com/NVIDIA/spark-rapids/pull/10081)|Add a fallback Cloudera Maven repo URL [skip ci]| |[#10065](https://github.com/NVIDIA/spark-rapids/pull/10065)|Improve host memory spill interfaces| +|[#10069](https://github.com/NVIDIA/spark-rapids/pull/10069)|Revert "Support split broadcast join condition into ast and non-ast […| |[#10070](https://github.com/NVIDIA/spark-rapids/pull/10070)|Fix 332db build failure| |[#10060](https://github.com/NVIDIA/spark-rapids/pull/10060)|Fix failed cases for non-utc time zone| |[#10038](https://github.com/NVIDIA/spark-rapids/pull/10038)|Remove spark.rapids.sql.nonUTC.enabled configuration option| @@ -250,6 +421,7 @@ Generated on 2024-03-06 |[#10053](https://github.com/NVIDIA/spark-rapids/pull/10053)|Remove invalid user from CODEOWNER file [skip ci]| |[#10049](https://github.com/NVIDIA/spark-rapids/pull/10049)|Fix out of range error from pySpark in test_timestamp_millis and other two integration test cases| |[#9721](https://github.com/NVIDIA/spark-rapids/pull/9721)|Support date_format via Gpu for non-UTC time zone| +|[#9470](https://github.com/NVIDIA/spark-rapids/pull/9470)|Use float to string kernel| |[#9845](https://github.com/NVIDIA/spark-rapids/pull/9845)|Use parse_url kernel for HOST parsing| |[#10024](https://github.com/NVIDIA/spark-rapids/pull/10024)|Support hour minute second for non-UTC time zone| |[#9973](https://github.com/NVIDIA/spark-rapids/pull/9973)|Batching support for row-based bounded window functions | @@ -259,6 +431,7 @@ Generated on 2024-03-06 |[#10023](https://github.com/NVIDIA/spark-rapids/pull/10023)|GPU supports `yyyyMMdd` format by post process for the `from_unixtime` function| |[#10033](https://github.com/NVIDIA/spark-rapids/pull/10033)|Remove GpuToTimestampImproved and spark.rapids.sql.improvedTimeOps.enabled| |[#10016](https://github.com/NVIDIA/spark-rapids/pull/10016)|Fix infinite loop in test_str_to_map_expr_random_delimiters| +|[#9481](https://github.com/NVIDIA/spark-rapids/pull/9481)|Use parse_url kernel for PROTOCOL parsing| |[#10030](https://github.com/NVIDIA/spark-rapids/pull/10030)|Update links in shims.md| |[#10015](https://github.com/NVIDIA/spark-rapids/pull/10015)|Fix array_transform to not recompute the argument| |[#10011](https://github.com/NVIDIA/spark-rapids/pull/10011)|Add cpu oom retry split handling to InternalRowToColumnarBatchIterator| @@ -286,316 +459,14 @@ Generated on 2024-03-06 |[#9852](https://github.com/NVIDIA/spark-rapids/pull/9852)|Avoid generating duplicate nan keys with MapGen(FloatGen)| |[#9674](https://github.com/NVIDIA/spark-rapids/pull/9674)|Add cache action to speed up mvn workflow [skip ci]| |[#9900](https://github.com/NVIDIA/spark-rapids/pull/9900)|Revert "Remove Databricks 13.3 from release 23.12 (#9890)"| +|[#9889](https://github.com/NVIDIA/spark-rapids/pull/9889)|Fix test_cast_string_ts_valid_format test| |[#9888](https://github.com/NVIDIA/spark-rapids/pull/9888)|Update nightly build and deploy script for arm artifacts [skip ci]| +|[#9833](https://github.com/NVIDIA/spark-rapids/pull/9833)|Fix a hang for Pandas UDFs on DB 13.3| |[#9656](https://github.com/NVIDIA/spark-rapids/pull/9656)|Update for new retry state machine JNI APIs| |[#9654](https://github.com/NVIDIA/spark-rapids/pull/9654)|Detect multiple jars on the classpath when init plugin| |[#9857](https://github.com/NVIDIA/spark-rapids/pull/9857)|Skip redundant steps in nightly build [skip ci]| |[#9812](https://github.com/NVIDIA/spark-rapids/pull/9812)|Update JNI and private dep version to 24.02.0-SNAPSHOT| - -## Release 23.12 - -### Features -||| -|:---|:---| -|[#6832](https://github.com/NVIDIA/spark-rapids/issues/6832)|[FEA] Convert Timestamp/Timezone tests/checks to be per operator instead of generic | -|[#9805](https://github.com/NVIDIA/spark-rapids/issues/9805)|[FEA] Support ```current_date``` expression function with CST (UTC + 8) timezone support| -|[#9515](https://github.com/NVIDIA/spark-rapids/issues/9515)|[FEA] Support temporal types in to_json| -|[#9872](https://github.com/NVIDIA/spark-rapids/issues/9872)|[FEA][JSON] Support Decimal type in `to_json`| -|[#9802](https://github.com/NVIDIA/spark-rapids/issues/9802)|[FEA] Support FromUTCTimestamp on the GPU with a non-UTC time zone| -|[#6831](https://github.com/NVIDIA/spark-rapids/issues/6831)|[FEA] Support timestamp transitions to and from UTC for single time zones with no repeating rules| -|[#9590](https://github.com/NVIDIA/spark-rapids/issues/9590)|[FEA][JSON] Support temporal types in `from_json`| -|[#9804](https://github.com/NVIDIA/spark-rapids/issues/9804)|[FEA] Support CPU path for from_utc_timestamp function with timezone| -|[#9461](https://github.com/NVIDIA/spark-rapids/issues/9461)|[FEA] Validate nvcomp-3.0 with spark rapids plugin| -|[#8832](https://github.com/NVIDIA/spark-rapids/issues/8832)|[FEA] rewrite join conditions where only part of it can fit on the AST| -|[#9059](https://github.com/NVIDIA/spark-rapids/issues/9059)|[FEA] Support spark.sql.parquet.datetimeRebaseModeInRead=LEGACY| -|[#9037](https://github.com/NVIDIA/spark-rapids/issues/9037)|[FEA] Support spark.sql.parquet.int96RebaseModeInWrite= LEGACY| -|[#9632](https://github.com/NVIDIA/spark-rapids/issues/9632)|[FEA] Take into account `org.apache.spark.timeZone` in Parquet/Avro from Spark 3.2| -|[#8770](https://github.com/NVIDIA/spark-rapids/issues/8770)|[FEA] add more metrics to Eventlogs or Executor logs| -|[#9597](https://github.com/NVIDIA/spark-rapids/issues/9597)|[FEA][JSON] Support boolean type in `from_json`| -|[#9516](https://github.com/NVIDIA/spark-rapids/issues/9516)|[FEA] Add support for JSON data source option `ignoreNullFields=false` in `to_json`| -|[#9520](https://github.com/NVIDIA/spark-rapids/issues/9520)|[FEA] Add support for `LAST()` as running window function| -|[#9518](https://github.com/NVIDIA/spark-rapids/issues/9518)|[FEA] Add support for relevant JSON data source options in `to_json`| -|[#9218](https://github.com/NVIDIA/spark-rapids/issues/9218)|[FEA] Support stack function| -|[#9532](https://github.com/NVIDIA/spark-rapids/issues/9532)|[FEA] Support Delta Lake 2.3.0| -|[#1525](https://github.com/NVIDIA/spark-rapids/issues/1525)|[FEA] Support Scala 2.13| -|[#7279](https://github.com/NVIDIA/spark-rapids/issues/7279)|[FEA] Support OverwriteByExpressionExecV1 for Delta Lake| -|[#9326](https://github.com/NVIDIA/spark-rapids/issues/9326)|[FEA] Specify `recover_with_null` when reading JSON files| -|[#8780](https://github.com/NVIDIA/spark-rapids/issues/8780)|[FEA] Support to_json function| -|[#7278](https://github.com/NVIDIA/spark-rapids/issues/7278)|[FEA] Support AppendDataExecV1 for Delta Lake| -|[#6266](https://github.com/NVIDIA/spark-rapids/issues/6266)|[FEA] Support Percentile| -|[#7277](https://github.com/NVIDIA/spark-rapids/issues/7277)|[FEA] Support AtomicReplaceTableAsSelect for Delta Lake| -|[#7276](https://github.com/NVIDIA/spark-rapids/issues/7276)|[FEA] Support AtomicCreateTableAsSelect for Delta Lake| - -### Performance -||| -|:---|:---| -|[#8137](https://github.com/NVIDIA/spark-rapids/issues/8137)|[FEA] Upgrade to UCX 1.15| -|[#8157](https://github.com/NVIDIA/spark-rapids/issues/8157)|[FEA] Add string comparison to AST expressions| -|[#9398](https://github.com/NVIDIA/spark-rapids/issues/9398)|[FEA] Compress/encrypt spill to disk| - -### Bugs Fixed -||| -|:---|:---| -|[#9687](https://github.com/NVIDIA/spark-rapids/issues/9687)|[BUG] `test_in_set` fails when DATAGEN_SEED=1698940723| -|[#9659](https://github.com/NVIDIA/spark-rapids/issues/9659)|[BUG] executor crash intermittantly in scala2.13-built spark332 integration tests| -|[#9923](https://github.com/NVIDIA/spark-rapids/issues/9923)|[BUG] Failed case about ```test_timestamp_seconds_rounding_necessary[Decimal(20,7)][DATAGEN_SEED=1701412018] – src.main.python.date_time_test```| -|[#9982](https://github.com/NVIDIA/spark-rapids/issues/9982)|[BUG] test "convert large InternalRow iterator to cached batch single col" failed with arena pool| -|[#9683](https://github.com/NVIDIA/spark-rapids/issues/9683)|[BUG] test_map_scalars_supported_key_types fails with DATAGEN_SEED=1698940723| -|[#9976](https://github.com/NVIDIA/spark-rapids/issues/9976)|[BUG] test_part_write_round_trip[Float] Failed on -0.0 partition| -|[#9948](https://github.com/NVIDIA/spark-rapids/issues/9948)|[BUG] parquet reader data corruption in nested schema after https://github.com/rapidsai/cudf/pull/13302| -|[#9867](https://github.com/NVIDIA/spark-rapids/issues/9867)|[BUG] Unable to use Spark Rapids with Spark Thrift Server| -|[#9934](https://github.com/NVIDIA/spark-rapids/issues/9934)|[BUG] test_delta_multi_part_write_round_trip_unmanaged and test_delta_part_write_round_trip_unmanaged failed DATA_SEED=1701608331 | -|[#9933](https://github.com/NVIDIA/spark-rapids/issues/9933)|[BUG] collection_ops_test.py::test_sequence_too_long_sequence[Long(not_null)][DATAGEN_SEED=1701553915, INJECT_OOM]| -|[#9837](https://github.com/NVIDIA/spark-rapids/issues/9837)|[BUG] test_part_write_round_trip failed| -|[#9932](https://github.com/NVIDIA/spark-rapids/issues/9932)|[BUG] Failed test_multi_tier_ast[DATAGEN_SEED=1701445668] on CI| -|[#9829](https://github.com/NVIDIA/spark-rapids/issues/9829)|[BUG] Java OOM when testing non-UTC time zone with lots of cases fallback.| -|[#9403](https://github.com/NVIDIA/spark-rapids/issues/9403)|[BUG] test_cogroup_apply_udf[Short(not_null)] failed with pandas 2.1.X| -|[#9684](https://github.com/NVIDIA/spark-rapids/issues/9684)|[BUG] test_coalesce fails with DATAGEN_SEED=1698940723| -|[#9685](https://github.com/NVIDIA/spark-rapids/issues/9685)|[BUG] test_case_when fails with DATAGEN_SEED=1698940723| -|[#9776](https://github.com/NVIDIA/spark-rapids/issues/9776)|[BUG] fastparquet compatibility tests fail with data mismatch if TZ is not set and system timezone is not UTC| -|[#9733](https://github.com/NVIDIA/spark-rapids/issues/9733)|[BUG] Complex AST expressions can crash with non-matching operand type error| -|[#9877](https://github.com/NVIDIA/spark-rapids/issues/9877)|[BUG] Fix resource leak in to_json| -|[#9722](https://github.com/NVIDIA/spark-rapids/issues/9722)|[BUG] test_floor_scale_zero fails with DATAGEN_SEED=1700009407| -|[#9846](https://github.com/NVIDIA/spark-rapids/issues/9846)|[BUG] test_ceil_scale_zero may fail with different datagen_seed| -|[#9781](https://github.com/NVIDIA/spark-rapids/issues/9781)|[BUG] test_cast_string_date_valid_format fails on DATAGEN_SEED=1700250017| -|[#9714](https://github.com/NVIDIA/spark-rapids/issues/9714)|Scala Map class not found when executing the benchmark on Spark 3.5.0 with Scala 2.13| -|[#9856](https://github.com/NVIDIA/spark-rapids/issues/9856)|collection_ops_test.py failed on Dataproc-2.1 with: Column 'None' does not exist| -|[#9397](https://github.com/NVIDIA/spark-rapids/issues/9397)|[BUG] RapidsShuffleManager MULTITHREADED on Databricks, we see loss of executors due to Rpc issues| -|[#9738](https://github.com/NVIDIA/spark-rapids/issues/9738)|[BUG] `test_delta_part_write_round_trip_unmanaged` and `test_delta_multi_part_write_round_trip_unmanaged` fail with `DATAGEN_SEED=1700105176`| -|[#9771](https://github.com/NVIDIA/spark-rapids/issues/9771)|[BUG] ast_test.py::test_X[(String, True)][DATAGEN_SEED=1700205785] failed| -|[#9782](https://github.com/NVIDIA/spark-rapids/issues/9782)|[BUG] Error messages appear in a clean build| -|[#9798](https://github.com/NVIDIA/spark-rapids/issues/9798)|[BUG] GpuCheckOverflowInTableInsert should be added to databricks shim| -|[#9820](https://github.com/NVIDIA/spark-rapids/issues/9820)|[BUG] test_parquet_write_roundtrip_datetime_with_legacy_rebase fails with "year 0 is out of range"| -|[#9817](https://github.com/NVIDIA/spark-rapids/issues/9817)|[BUG] FAILED dpp_test.py::test_dpp_reuse_broadcast_exchange[false-0-parquet][DATAGEN_SEED=1700572856, IGNORE_ORDER]| -|[#9768](https://github.com/NVIDIA/spark-rapids/issues/9768)|[BUG] `cast decimal to string` ScalaTest relies on a side effects | -|[#9711](https://github.com/NVIDIA/spark-rapids/issues/9711)|[BUG] test_lte fails with DATAGEN_SEED=1699987762| -|[#9751](https://github.com/NVIDIA/spark-rapids/issues/9751)|[BUG] cmp_test test_gte failed with DATAGEN_SEED=1700149611| -|[#9469](https://github.com/NVIDIA/spark-rapids/issues/9469)|[BUG] [main] ERROR com.nvidia.spark.rapids.GpuOverrideUtil - Encountered an exception applying GPU overrides java.lang.IllegalStateException: the broadcast must be on the GPU too| -|[#9648](https://github.com/NVIDIA/spark-rapids/issues/9648)|[BUG] Existence default values in schema are not being honored| -|[#9676](https://github.com/NVIDIA/spark-rapids/issues/9676)|Fix Delta Lake Integration tests; `test_delta_atomic_create_table_as_select` and `test_delta_atomic_replace_table_as_select`| -|[#9701](https://github.com/NVIDIA/spark-rapids/issues/9701)|[BUG] test_ts_formats_round_trip and test_datetime_roundtrip_with_legacy_rebase fail with DATAGEN_SEED=1699915317| -|[#9691](https://github.com/NVIDIA/spark-rapids/issues/9691)|[BUG] Repeated Maven invocations w/o changes recompile too many Scala sources despite recompileMode=incremental | -|[#9547](https://github.com/NVIDIA/spark-rapids/issues/9547)|Update buildall and doc to generate bloop projects for test debugging| -|[#9697](https://github.com/NVIDIA/spark-rapids/issues/9697)|[BUG] Iceberg multiple file readers can not read files if the file paths contain encoded URL unsafe chars| -|[#9681](https://github.com/NVIDIA/spark-rapids/issues/9681)|Databricks Build Failing For 330db+| -|[#9521](https://github.com/NVIDIA/spark-rapids/issues/9521)|[BUG] Multi Threaded Shuffle Writer needs flow control| -|[#9675](https://github.com/NVIDIA/spark-rapids/issues/9675)|Failing Delta Lake Tests for Databricks 13.3 Due to WriteIntoDeltaCommand| -|[#9669](https://github.com/NVIDIA/spark-rapids/issues/9669)|[BUG] Rebase exception states not in UTC but timezone is Etc/UTC| -|[#7940](https://github.com/NVIDIA/spark-rapids/issues/7940)|[BUG] UCX peer connection issue in multi-nic single node cluster| -|[#9650](https://github.com/NVIDIA/spark-rapids/issues/9650)|[BUG] Github workflow for missing scala2.13 updates fails to detect when pom is new| -|[#9621](https://github.com/NVIDIA/spark-rapids/issues/9621)|[BUG] Scala 2.13 with-classifier profile is picking up Scala2.12 spark.version| -|[#9636](https://github.com/NVIDIA/spark-rapids/issues/9636)|[BUG] All parquet integration tests failed "Part of the plan is not columnar class" in databricks runtimes| -|[#9108](https://github.com/NVIDIA/spark-rapids/issues/9108)|[BUG] nullability on some decimal operations is wrong| -|[#9625](https://github.com/NVIDIA/spark-rapids/issues/9625)|[BUG] Typo in github Maven check install-modules | -|[#9603](https://github.com/NVIDIA/spark-rapids/issues/9603)|[BUG] fastparquet_compatibility_test fails on dataproc| -|[#8729](https://github.com/NVIDIA/spark-rapids/issues/8729)|[BUG] nightly integration test failed OOM kill in JDK11 ENV| -|[#9589](https://github.com/NVIDIA/spark-rapids/issues/9589)|[BUG] Scala 2.13 build hard-codes Java 8 target | -|[#9581](https://github.com/NVIDIA/spark-rapids/issues/9581)|Delta Lake 2.4 missing equals/hashCode override for file format and some metrics for merge| -|[#9507](https://github.com/NVIDIA/spark-rapids/issues/9507)|[BUG] Spark 3.2+/ParquetFilterSuite/Parquet filter pushdown - timestamp/ FAILED | -|[#9540](https://github.com/NVIDIA/spark-rapids/issues/9540)|[BUG] Job failed with SparkUpgradeException no matter which value are set for spark.sql.parquet.datetimeRebaseModeInRead| -|[#9545](https://github.com/NVIDIA/spark-rapids/issues/9545)|[BUG] Dataproc 2.0 test_reading_file_rewritten_with_fastparquet tests failing| -|[#9552](https://github.com/NVIDIA/spark-rapids/issues/9552)|[BUG] Inconsistent CDH dependency overrides across submodules| -|[#9571](https://github.com/NVIDIA/spark-rapids/issues/9571)|[BUG] non-deterministic compiled SQLExecPlugin.class with scala 2.13 deployment| -|[#9569](https://github.com/NVIDIA/spark-rapids/issues/9569)|[BUG] test_window_running failed in 3.1.2+3.1.3| -|[#9480](https://github.com/NVIDIA/spark-rapids/issues/9480)|[BUG] mapInPandas doesn't invoke udf on empty partitions| -|[#8644](https://github.com/NVIDIA/spark-rapids/issues/8644)|[BUG] Parquet file with malformed dictionary does not error when loaded| -|[#9310](https://github.com/NVIDIA/spark-rapids/issues/9310)|[BUG] Improve support for reading JSON files with malformed rows| -|[#9457](https://github.com/NVIDIA/spark-rapids/issues/9457)|[BUG] CDH 332 unit tests failing| -|[#9404](https://github.com/NVIDIA/spark-rapids/issues/9404)|[BUG] Spark reports a decimal error when create lit scalar when generate Decimal(34, -5) data.| -|[#9110](https://github.com/NVIDIA/spark-rapids/issues/9110)|[BUG] GPU Reader fails due to partition column creating column larger then cudf column size limit| -|[#8631](https://github.com/NVIDIA/spark-rapids/issues/8631)|[BUG] Parquet load failure on repeated_no_annotation.parquet| -|[#9364](https://github.com/NVIDIA/spark-rapids/issues/9364)|[BUG] CUDA illegal access error is triggering split and retry logic| - -### PRs -||| -|:---|:---| -|[#10384](https://github.com/NVIDIA/spark-rapids/pull/10384)|[DOC] Update docs for 23.12.2 release [skip ci] | -|[#10341](https://github.com/NVIDIA/spark-rapids/pull/10341)|Update changelog for v23.12.2 [skip ci]| -|[#10340](https://github.com/NVIDIA/spark-rapids/pull/10340)|Copyright to 2024 [skip ci]| -|[#10323](https://github.com/NVIDIA/spark-rapids/pull/10323)|Upgrade version to 23.12.2-SNAPSHOT| -|[#10329](https://github.com/NVIDIA/spark-rapids/pull/10329)|update download page for v23.12.2 release [skip ci]| -|[#10274](https://github.com/NVIDIA/spark-rapids/pull/10274)|PythonRunner Changes| -|[#10124](https://github.com/NVIDIA/spark-rapids/pull/10124)|Update changelog for v23.12.1 [skip ci]| -|[#10123](https://github.com/NVIDIA/spark-rapids/pull/10123)|Change version to v23.12.1 [skip ci]| -|[#10122](https://github.com/NVIDIA/spark-rapids/pull/10122)|Init changelog for v23.12.1 [skip ci]| -|[#10121](https://github.com/NVIDIA/spark-rapids/pull/10121)|[DOC] update download page for db hot fix [skip ci]| -|[#10116](https://github.com/NVIDIA/spark-rapids/pull/10116)|Upgrade to 23.12.1-SNAPSHOT| -|[#10069](https://github.com/NVIDIA/spark-rapids/pull/10069)|Revert "Support split broadcast join condition into ast and non-ast […| -|[#9470](https://github.com/NVIDIA/spark-rapids/pull/9470)|Use float to string kernel| -|[#9481](https://github.com/NVIDIA/spark-rapids/pull/9481)|Use parse_url kernel for PROTOCOL parsing| -|[#9935](https://github.com/NVIDIA/spark-rapids/pull/9935)|Init 23.12 changelog [skip ci]| -|[#9943](https://github.com/NVIDIA/spark-rapids/pull/9943)|[DOC] Update docs for 23.12.0 release [skip ci]| -|[#10014](https://github.com/NVIDIA/spark-rapids/pull/10014)|Add documentation for how to run tests with a fixed datagen seed [skip ci]| -|[#9954](https://github.com/NVIDIA/spark-rapids/pull/9954)|Update private and JNI version to released 23.12.0| -|[#10009](https://github.com/NVIDIA/spark-rapids/pull/10009)|Using fix seed to unblock 23.12 release; Move the blocked issues to 24.02| -|[#10007](https://github.com/NVIDIA/spark-rapids/pull/10007)|Fix Java OOM in non-UTC case with lots of xfail (#9944)| -|[#9985](https://github.com/NVIDIA/spark-rapids/pull/9985)|Avoid allocating GPU memory out of RMM managed pool in test| -|[#9970](https://github.com/NVIDIA/spark-rapids/pull/9970)|Avoid leading and trailing zeros in test_timestamp_seconds_rounding_necessary| -|[#9978](https://github.com/NVIDIA/spark-rapids/pull/9978)|Avoid using floating point values as partition values in tests| -|[#9979](https://github.com/NVIDIA/spark-rapids/pull/9979)|Add compatibility notes for writing ORC with lost Gregorian days [skip ci]| -|[#9949](https://github.com/NVIDIA/spark-rapids/pull/9949)|Override the seed for `test_map_scalars_supported_key_types ` for version of Spark before 3.4.0 [Databricks]| -|[#9961](https://github.com/NVIDIA/spark-rapids/pull/9961)|Avoid using floating point for partition values in Delta Lake tests| -|[#9960](https://github.com/NVIDIA/spark-rapids/pull/9960)|Fix LongGen accidentally using special cases when none are desired| -|[#9950](https://github.com/NVIDIA/spark-rapids/pull/9950)|Avoid generating NaNs as partition values in test_part_write_round_trip| -|[#9940](https://github.com/NVIDIA/spark-rapids/pull/9940)|Fix 'year 0 is out of range' by setting a fix seed| -|[#9946](https://github.com/NVIDIA/spark-rapids/pull/9946)|Fix test_multi_tier_ast to ignore ordering of output rows| -|[#9928](https://github.com/NVIDIA/spark-rapids/pull/9928)|Test `inset` with `NaN` only for Spark from 3.1.3| -|[#9906](https://github.com/NVIDIA/spark-rapids/pull/9906)|Fix test_initcap to use the intended limited character set| -|[#9831](https://github.com/NVIDIA/spark-rapids/pull/9831)|Skip fastparquet timestamp tests when plugin cannot read/write timestamps| -|[#9893](https://github.com/NVIDIA/spark-rapids/pull/9893)|Add multiple expression tier regression test for AST| -|[#9889](https://github.com/NVIDIA/spark-rapids/pull/9889)|Fix test_cast_string_ts_valid_format test| -|[#9833](https://github.com/NVIDIA/spark-rapids/pull/9833)|Fix a hang for Pandas UDFs on DB 13.3| -|[#9873](https://github.com/NVIDIA/spark-rapids/pull/9873)|Add support for decimal in `to_json`| -|[#9890](https://github.com/NVIDIA/spark-rapids/pull/9890)|Remove Databricks 13.3 from release 23.12| -|[#9874](https://github.com/NVIDIA/spark-rapids/pull/9874)|Fix zero-scale floor and ceil tests| -|[#9879](https://github.com/NVIDIA/spark-rapids/pull/9879)|Fix resource leak in to_json| -|[#9600](https://github.com/NVIDIA/spark-rapids/pull/9600)|Add date and timestamp support to to_json| -|[#9871](https://github.com/NVIDIA/spark-rapids/pull/9871)|Fix test_cast_string_date_valid_format generating year 0| -|[#9885](https://github.com/NVIDIA/spark-rapids/pull/9885)|Preparation for non-UTC nightly CI [skip ci]| -|[#9810](https://github.com/NVIDIA/spark-rapids/pull/9810)|Support from_utc_timestamp on the GPU for non-UTC timezones (non-DST)| -|[#9865](https://github.com/NVIDIA/spark-rapids/pull/9865)|Fix problems with nulls in sequence tests| -|[#9864](https://github.com/NVIDIA/spark-rapids/pull/9864)|Add compatibility documentation with respect to decimal overflow detection [skip ci]| -|[#9860](https://github.com/NVIDIA/spark-rapids/pull/9860)|Fixing FAQ deadlink in plugin code [skip ci]| -|[#9840](https://github.com/NVIDIA/spark-rapids/pull/9840)|Avoid using NaNs as Delta Lake partition values| -|[#9773](https://github.com/NVIDIA/spark-rapids/pull/9773)|xfail all the impacted cases when using non-UTC time zone| -|[#9849](https://github.com/NVIDIA/spark-rapids/pull/9849)|Instantly Delete pre-merge content of stage workspace if success| -|[#9848](https://github.com/NVIDIA/spark-rapids/pull/9848)|Force datagen_seed for test_ceil_scale_zero and test_decimal_round| -|[#9677](https://github.com/NVIDIA/spark-rapids/pull/9677)|Enable build for Databricks 13.3| -|[#9809](https://github.com/NVIDIA/spark-rapids/pull/9809)|Re-enable AST string integration cases| -|[#9835](https://github.com/NVIDIA/spark-rapids/pull/9835)|Avoid pre-Gregorian dates in schema_evolution_test| -|[#9786](https://github.com/NVIDIA/spark-rapids/pull/9786)|Check paths for existence to prevent ignorable error messages during build| -|[#9824](https://github.com/NVIDIA/spark-rapids/pull/9824)|UCX 1.15 upgrade| -|[#9800](https://github.com/NVIDIA/spark-rapids/pull/9800)|Add GpuCheckOverflowInTableInsert to Databricks 11.3+| -|[#9821](https://github.com/NVIDIA/spark-rapids/pull/9821)|Update timestamp gens to avoid "year 0 is out of range" errors| -|[#9826](https://github.com/NVIDIA/spark-rapids/pull/9826)|Set seed to 0 for test_hash_reduction_sum| -|[#9720](https://github.com/NVIDIA/spark-rapids/pull/9720)|Support timestamp in `from_json`| -|[#9818](https://github.com/NVIDIA/spark-rapids/pull/9818)|Specify nullable=False when generating filter values in dpp tests| -|[#9689](https://github.com/NVIDIA/spark-rapids/pull/9689)|Support CPU path for from_utc_timestamp function with timezone | -|[#9769](https://github.com/NVIDIA/spark-rapids/pull/9769)|Use withGpuSparkSession to customize SparkConf| -|[#9780](https://github.com/NVIDIA/spark-rapids/pull/9780)|Fix NaN handling in GpuLessThanOrEqual and GpuGreaterThanOrEqual| -|[#9795](https://github.com/NVIDIA/spark-rapids/pull/9795)|xfail AST string tests| -|[#9666](https://github.com/NVIDIA/spark-rapids/pull/9666)|Add support for parsing strings as dates in `from_json`| -|[#9673](https://github.com/NVIDIA/spark-rapids/pull/9673)|Fix the broadcast joins issues caused by InputFileBlockRule| -|[#9785](https://github.com/NVIDIA/spark-rapids/pull/9785)|Force datagen_seed for 9781 and 9784 [skip ci]| -|[#9765](https://github.com/NVIDIA/spark-rapids/pull/9765)|Let GPU scans fall back when default values exist in schema| -|[#9729](https://github.com/NVIDIA/spark-rapids/pull/9729)|Fix Delta Lake atomic table operations on spark341db| -|[#9770](https://github.com/NVIDIA/spark-rapids/pull/9770)|[BUG] Fix the doc for Maven and Scala 2.13 test example [skip ci]| -|[#9761](https://github.com/NVIDIA/spark-rapids/pull/9761)|Fix bug in tagging of JsonToStructs| -|[#9758](https://github.com/NVIDIA/spark-rapids/pull/9758)|Remove forced seed from Delta Lake part_write_round_trip_unmanaged tests| -|[#9652](https://github.com/NVIDIA/spark-rapids/pull/9652)|Add time zone config to set non-UTC| -|[#9736](https://github.com/NVIDIA/spark-rapids/pull/9736)|Fix `TimestampGen` to generate value not too close to the minimum allowed timestamp| -|[#9698](https://github.com/NVIDIA/spark-rapids/pull/9698)|Speed up build: unnecessary invalidation in the incremental recompile mode| -|[#9748](https://github.com/NVIDIA/spark-rapids/pull/9748)|Fix Delta Lake part_write_round_trip_unmanaged tests with floating point| -|[#9702](https://github.com/NVIDIA/spark-rapids/pull/9702)|Support split BroadcastNestedLoopJoin condition for AST and non-AST| -|[#9746](https://github.com/NVIDIA/spark-rapids/pull/9746)|Force test_hypot to be single seed for now| -|[#9745](https://github.com/NVIDIA/spark-rapids/pull/9745)|Avoid generating null filter values in test_delta_dfp_reuse_broadcast_exchange| -|[#9741](https://github.com/NVIDIA/spark-rapids/pull/9741)|Set seed=0 for the delta lake part roundtrip tests| -|[#9660](https://github.com/NVIDIA/spark-rapids/pull/9660)|Fully support date/time legacy rebase for nested input| -|[#9672](https://github.com/NVIDIA/spark-rapids/pull/9672)|Support String type for AST| |[#9716](https://github.com/NVIDIA/spark-rapids/pull/9716)|Initiate project version 24.02.0-SNAPSHOT| -|[#9732](https://github.com/NVIDIA/spark-rapids/pull/9732)|Temporarily force `datagen_seed=0` for `test_re_replace_all` to unblock CI| -|[#9726](https://github.com/NVIDIA/spark-rapids/pull/9726)|Fix leak in BatchWithPartitionData| -|[#9717](https://github.com/NVIDIA/spark-rapids/pull/9717)|Encode the file path from Iceberg when converting to a PartitionedFile| -|[#9441](https://github.com/NVIDIA/spark-rapids/pull/9441)|Add a random seed specific to datagen cases| -|[#9649](https://github.com/NVIDIA/spark-rapids/pull/9649)|Support `spark.sql.parquet.datetimeRebaseModeInRead=LEGACY` and `spark.sql.parquet.int96RebaseModeInRead=LEGACY`| -|[#9612](https://github.com/NVIDIA/spark-rapids/pull/9612)|Escape quotes and newlines when converting strings to json format in to_json| -|[#9644](https://github.com/NVIDIA/spark-rapids/pull/9644)|Add Partial Delta Lake Support for Databricks 13.3| -|[#9690](https://github.com/NVIDIA/spark-rapids/pull/9690)|Changed `extractExecutedPlan` to consider ResultQueryStageExec for Databricks 13.3| -|[#9686](https://github.com/NVIDIA/spark-rapids/pull/9686)|Removed Maven Profiles From `tests/pom.xml`| -|[#9509](https://github.com/NVIDIA/spark-rapids/pull/9509)|Fine-grained spill metrics| -|[#9658](https://github.com/NVIDIA/spark-rapids/pull/9658)|Support `spark.sql.parquet.int96RebaseModeInWrite=LEGACY`| -|[#9695](https://github.com/NVIDIA/spark-rapids/pull/9695)|Revert "Support split non-AST-able join condition for BroadcastNested…| -|[#9693](https://github.com/NVIDIA/spark-rapids/pull/9693)|Enable automerge from 23.12 to 24.02 [skip ci]| -|[#9679](https://github.com/NVIDIA/spark-rapids/pull/9679)|[Doc] update the dead link in download page [skip ci]| -|[#9678](https://github.com/NVIDIA/spark-rapids/pull/9678)|Add flow control for multithreaded shuffle writer| -|[#9635](https://github.com/NVIDIA/spark-rapids/pull/9635)|Support split non-AST-able join condition for BroadcastNestedLoopJoin| -|[#9646](https://github.com/NVIDIA/spark-rapids/pull/9646)|Fix Integration Test Failures for Databricks 13.3 Support| -|[#9670](https://github.com/NVIDIA/spark-rapids/pull/9670)|Normalize file timezone and handle missing file timezone in datetimeRebaseUtils| -|[#9657](https://github.com/NVIDIA/spark-rapids/pull/9657)|Update verify check to handle new pom files [skip ci]| -|[#9663](https://github.com/NVIDIA/spark-rapids/pull/9663)|Making User Guide info in bold and adding it as top right link in github.io [skip ci]| -|[#9609](https://github.com/NVIDIA/spark-rapids/pull/9609)|Add valid retry solution to mvn-verify [skip ci]| -|[#9655](https://github.com/NVIDIA/spark-rapids/pull/9655)|Document problem with handling of invalid characters in CSV reader| -|[#9620](https://github.com/NVIDIA/spark-rapids/pull/9620)|Add support for parsing boolean values in `from_json`| -|[#9615](https://github.com/NVIDIA/spark-rapids/pull/9615)|Bloop updates - require JDK11 in buildall + docs, build bloop for all targets.| -|[#9631](https://github.com/NVIDIA/spark-rapids/pull/9631)|Refactor Parquet readers| -|[#9637](https://github.com/NVIDIA/spark-rapids/pull/9637)|Added Support For Various Execs for Databricks 13.3 | -|[#9640](https://github.com/NVIDIA/spark-rapids/pull/9640)|Add support for `ignoreNullFields=false` in `to_json`| -|[#9623](https://github.com/NVIDIA/spark-rapids/pull/9623)|Running window optimization for `LAST()`| -|[#9641](https://github.com/NVIDIA/spark-rapids/pull/9641)|Revert "Support rebase checking for nested dates and timestamps (#9617)"| -|[#9423](https://github.com/NVIDIA/spark-rapids/pull/9423)|Re-enable `from_json` / `JsonToStructs`| -|[#9624](https://github.com/NVIDIA/spark-rapids/pull/9624)|Add jenkins-level retry for pre-merge build in databricks runtimes| -|[#9608](https://github.com/NVIDIA/spark-rapids/pull/9608)|Fix nullability issues for some decimal operations| -|[#9617](https://github.com/NVIDIA/spark-rapids/pull/9617)|Support rebase checking for nested dates and timestamps| -|[#9611](https://github.com/NVIDIA/spark-rapids/pull/9611)|Move simple classes after refactoring to sql-plugin-api| -|[#9618](https://github.com/NVIDIA/spark-rapids/pull/9618)|Remove unused dataTypes argument from HostShuffleCoalesceIterator| -|[#9626](https://github.com/NVIDIA/spark-rapids/pull/9626)|Fix ENV typo in pre-merge github actions [skip ci]| -|[#9593](https://github.com/NVIDIA/spark-rapids/pull/9593)|PythonRunner and RapidsErrorUtils Changes For Databricks 13.3| -|[#9607](https://github.com/NVIDIA/spark-rapids/pull/9607)|Integration tests: Install specific fastparquet version.| -|[#9610](https://github.com/NVIDIA/spark-rapids/pull/9610)|Propagate local properties to broadcast execs| -|[#9544](https://github.com/NVIDIA/spark-rapids/pull/9544)|Support batching for `RANGE` running window aggregations. Including on| -|[#9601](https://github.com/NVIDIA/spark-rapids/pull/9601)|Remove usage of deprecated scala.Proxy| -|[#9591](https://github.com/NVIDIA/spark-rapids/pull/9591)|Enable implicit JDK profile activation| -|[#9586](https://github.com/NVIDIA/spark-rapids/pull/9586)|Merge metrics and file format fixes to Delta 2.4 support| -|[#9594](https://github.com/NVIDIA/spark-rapids/pull/9594)|Revert "Ignore failing Parquet filter test to unblock CI (#9519)"| -|[#9454](https://github.com/NVIDIA/spark-rapids/pull/9454)|Support encryption and compression in disk store| -|[#9439](https://github.com/NVIDIA/spark-rapids/pull/9439)|Support stack function| -|[#9583](https://github.com/NVIDIA/spark-rapids/pull/9583)|Fix fastparquet tests to work with HDFS| -|[#9508](https://github.com/NVIDIA/spark-rapids/pull/9508)|Consolidate deps switching in an intermediate pom| -|[#9562](https://github.com/NVIDIA/spark-rapids/pull/9562)|Delta Lake 2.3.0 support| -|[#9576](https://github.com/NVIDIA/spark-rapids/pull/9576)|Move Stack classes to wrapper classes to fix non-deterministic build issue| -|[#9572](https://github.com/NVIDIA/spark-rapids/pull/9572)|Add retry for CrossJoinIterator and ConditionalNestedLoopJoinIterator| -|[#9575](https://github.com/NVIDIA/spark-rapids/pull/9575)|Fix `test_window_running*()` for `NTH_VALUE IGNORE NULLS`.| -|[#9574](https://github.com/NVIDIA/spark-rapids/pull/9574)|Fix broken #endif scala comments [skip ci]| -|[#9568](https://github.com/NVIDIA/spark-rapids/pull/9568)|Enforce Apache 3.3.0+ for Scala 2.13| -|[#9557](https://github.com/NVIDIA/spark-rapids/pull/9557)|Support launching Map Pandas UDF on empty partitions| -|[#9489](https://github.com/NVIDIA/spark-rapids/pull/9489)|Batching support for ROW-based `FIRST()` window function| -|[#9510](https://github.com/NVIDIA/spark-rapids/pull/9510)|Add Databricks 13.3 shim boilerplate code and refactor Databricks 12.2 shim| -|[#9554](https://github.com/NVIDIA/spark-rapids/pull/9554)|Fix fastparquet installation for| -|[#9536](https://github.com/NVIDIA/spark-rapids/pull/9536)|Add CPU POC of TimeZoneDB; Test some time zones by comparing CPU POC and Spark| -|[#9558](https://github.com/NVIDIA/spark-rapids/pull/9558)|Support integration test against scala2.13 spark binaries[skip ci]| -|[#8592](https://github.com/NVIDIA/spark-rapids/pull/8592)|Scala 2.13 Support| -|[#9551](https://github.com/NVIDIA/spark-rapids/pull/9551)|Enable malformed Parquet failure test| -|[#9546](https://github.com/NVIDIA/spark-rapids/pull/9546)|Support OverwriteByExpressionExecV1 for Delta Lake tables| -|[#9527](https://github.com/NVIDIA/spark-rapids/pull/9527)|Support Split And Retry for GpuProjectAstExec| -|[#9541](https://github.com/NVIDIA/spark-rapids/pull/9541)|Move simple classes to API| -|[#9548](https://github.com/NVIDIA/spark-rapids/pull/9548)|Append new authorized user to blossom-ci whitelist [skip ci]| -|[#9418](https://github.com/NVIDIA/spark-rapids/pull/9418)|Fix STRUCT comparison between Pandas and Spark dataframes in fastparquet tests| -|[#9468](https://github.com/NVIDIA/spark-rapids/pull/9468)|Add SplitAndRetry to GpuRunningWindowIterator| -|[#9486](https://github.com/NVIDIA/spark-rapids/pull/9486)|Add partial support for `to_json`| -|[#9538](https://github.com/NVIDIA/spark-rapids/pull/9538)|Fix tiered project breaking higher order functions| -|[#9539](https://github.com/NVIDIA/spark-rapids/pull/9539)|Add delta-24x to delta-lake/README.md [skip ci]| -|[#9534](https://github.com/NVIDIA/spark-rapids/pull/9534)|Add pyarrow tests for Databricks runtime| -|[#9444](https://github.com/NVIDIA/spark-rapids/pull/9444)|Remove redundant pass-through shuffle manager classes| -|[#9531](https://github.com/NVIDIA/spark-rapids/pull/9531)|Fix relative path for spark-shell nightly test [skip ci]| -|[#9525](https://github.com/NVIDIA/spark-rapids/pull/9525)|Follow-up to dbdeps consolidation| -|[#9506](https://github.com/NVIDIA/spark-rapids/pull/9506)|Move ProxyShuffleInternalManagerBase to api| -|[#9504](https://github.com/NVIDIA/spark-rapids/pull/9504)|Add a spark-shell smoke test to premerge and nightly| -|[#9519](https://github.com/NVIDIA/spark-rapids/pull/9519)|Ignore failing Parquet filter test to unblock CI| -|[#9478](https://github.com/NVIDIA/spark-rapids/pull/9478)|Support AppendDataExecV1 for Delta Lake tables| -|[#9366](https://github.com/NVIDIA/spark-rapids/pull/9366)|Add tests to check compatibility with `fastparquet`| -|[#9419](https://github.com/NVIDIA/spark-rapids/pull/9419)|Add retry to RoundRobin Partitioner and Range Partitioner| -|[#9502](https://github.com/NVIDIA/spark-rapids/pull/9502)|Install Dependencies Needed For Databricks 13.3| -|[#9296](https://github.com/NVIDIA/spark-rapids/pull/9296)|Implement `percentile` aggregation| -|[#9488](https://github.com/NVIDIA/spark-rapids/pull/9488)|Add Shim JSON Headers for Databricks 13.3| -|[#9443](https://github.com/NVIDIA/spark-rapids/pull/9443)|Add AtomicReplaceTableAsSelectExec support for Delta Lake| -|[#9476](https://github.com/NVIDIA/spark-rapids/pull/9476)|Refactor common Delta Lake test code| -|[#9463](https://github.com/NVIDIA/spark-rapids/pull/9463)|Fix Cloudera 3.3.2 shim for handling CheckOverflowInTableInsert and orc zstd support| -|[#9460](https://github.com/NVIDIA/spark-rapids/pull/9460)|Update links in old release notes to new doc locations [skip ci]| -|[#9405](https://github.com/NVIDIA/spark-rapids/pull/9405)|Wrap scalar generation into spark session in integration test| -|[#9459](https://github.com/NVIDIA/spark-rapids/pull/9459)|Fix 332cdh build [skip ci]| -|[#9425](https://github.com/NVIDIA/spark-rapids/pull/9425)|Add support for AtomicCreateTableAsSelect with Delta Lake| -|[#9434](https://github.com/NVIDIA/spark-rapids/pull/9434)|Add retry support to `HostToGpuCoalesceIterator.concatAllAndPutOnGPU`| -|[#9453](https://github.com/NVIDIA/spark-rapids/pull/9453)|Update codeowner and blossom-ci ACL [skip ci]| -|[#9396](https://github.com/NVIDIA/spark-rapids/pull/9396)|Add support for Cloudera CDS-3.3.2| -|[#9380](https://github.com/NVIDIA/spark-rapids/pull/9380)|Fix parsing of Parquet legacy list-of-struct format| -|[#9438](https://github.com/NVIDIA/spark-rapids/pull/9438)|Fix auto merge conflict 9437 [skip ci]| -|[#9424](https://github.com/NVIDIA/spark-rapids/pull/9424)|Refactor aggregate functions| -|[#9414](https://github.com/NVIDIA/spark-rapids/pull/9414)|Add retry to GpuHashJoin.filterNulls| -|[#9388](https://github.com/NVIDIA/spark-rapids/pull/9388)|Add developer documentation about working with data sources [skip ci]| -|[#9369](https://github.com/NVIDIA/spark-rapids/pull/9369)|Improve JSON empty row fix to use less memory| -|[#9373](https://github.com/NVIDIA/spark-rapids/pull/9373)|Fix auto merge conflict 9372| -|[#9308](https://github.com/NVIDIA/spark-rapids/pull/9308)|Initiate arm64 CI support [skip ci]| -|[#9292](https://github.com/NVIDIA/spark-rapids/pull/9292)|Init project version 23.12.0-SNAPSHOT| ## Older Releases Changelog of older releases can be found at [docs/archives](/docs/archives) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a416829e7f2..da0e6d8675a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -130,15 +130,15 @@ mvn -pl dist -PnoSnapshots package -DskipTests Verify that shim-specific classes are hidden from a conventional classloader. ```bash -$ javap -cp dist/target/rapids-4-spark_2.12-24.04.0-SNAPSHOT-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl +$ javap -cp dist/target/rapids-4-spark_2.12-24.06.0-SNAPSHOT-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl Error: class not found: com.nvidia.spark.rapids.shims.SparkShimImpl ``` However, its bytecode can be loaded if prefixed with `spark3XY` not contained in the package name ```bash -$ javap -cp dist/target/rapids-4-spark_2.12-24.04.0-SNAPSHOT-cuda11.jar spark320.com.nvidia.spark.rapids.shims.SparkShimImpl | head -2 -Warning: File dist/target/rapids-4-spark_2.12-24.04.0-SNAPSHOT-cuda11.jar(/spark320/com/nvidia/spark/rapids/shims/SparkShimImpl.class) does not contain class spark320.com.nvidia.spark.rapids.shims.SparkShimImpl +$ javap -cp dist/target/rapids-4-spark_2.12-24.06.0-SNAPSHOT-cuda11.jar spark320.com.nvidia.spark.rapids.shims.SparkShimImpl | head -2 +Warning: File dist/target/rapids-4-spark_2.12-24.06.0-SNAPSHOT-cuda11.jar(/spark320/com/nvidia/spark/rapids/shims/SparkShimImpl.class) does not contain class spark320.com.nvidia.spark.rapids.shims.SparkShimImpl Compiled from "SparkShims.scala" public final class com.nvidia.spark.rapids.shims.SparkShimImpl { ``` @@ -181,7 +181,7 @@ mvn package -pl dist -am -Dbuildver=340 -DallowConventionalDistJar=true Verify `com.nvidia.spark.rapids.shims.SparkShimImpl` is conventionally loadable: ```bash -$ javap -cp dist/target/rapids-4-spark_2.12-24.04.0-SNAPSHOT-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl | head -2 +$ javap -cp dist/target/rapids-4-spark_2.12-24.06.0-SNAPSHOT-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl | head -2 Compiled from "SparkShims.scala" public final class com.nvidia.spark.rapids.shims.SparkShimImpl { ``` diff --git a/NOTICE b/NOTICE index eb8aaeb9977..657bfecd06a 100644 --- a/NOTICE +++ b/NOTICE @@ -48,6 +48,17 @@ The Apache Software Foundation (http://www.apache.org/). -------------------------------------------------------------------------------- +This project includes software from the Apache Gluten project +(www.github.com/apache/incubator-gluten/). + +Apache Gluten (Incubating) +Copyright (2024) The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +-------------------------------------------------------------------------------- + This project includes code from Kite, developed at Cloudera, Inc. with the following copyright notice: diff --git a/README.md b/README.md index d789cff0488..02e5b4cd95a 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,8 @@ access to any of the memory that RMM is holding. The Qualification and Profiling tools have been moved to [nvidia/spark-rapids-tools](https://github.com/NVIDIA/spark-rapids-tools) repo. -Please refer to [Qualification tool documentation](https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-qualification-tool.html) -and [Profiling tool documentation](https://docs.nvidia.com/spark-rapids/user-guide/latest/spark-profiling-tool.html) +Please refer to [Qualification tool documentation](https://docs.nvidia.com/spark-rapids/user-guide/latest/qualification/overview.html) +and [Profiling tool documentation](https://docs.nvidia.com/spark-rapids/user-guide/latest/profiling/overview.html) for more details on how to use the tools. ## Dependency for External Projects @@ -73,7 +73,7 @@ as a `provided` dependency. com.nvidia rapids-4-spark_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT provided ``` diff --git a/aggregator/pom.xml b/aggregator/pom.xml index 02ebabc1ecd..7d3d4b94c35 100644 --- a/aggregator/pom.xml +++ b/aggregator/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../jdk-profiles/pom.xml rapids-4-spark-aggregator_2.12 RAPIDS Accelerator for Apache Spark Aggregator Creates an aggregated shaded package of the RAPIDS plugin for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT aggregator @@ -728,6 +728,23 @@ + + release343 + + + buildver + 343 + + + + + com.nvidia + rapids-4-spark-delta-24x_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + + + release350 diff --git a/api_validation/pom.xml b/api_validation/pom.xml index f923a21809d..a65993b4e13 100644 --- a/api_validation/pom.xml +++ b/api_validation/pom.xml @@ -22,11 +22,11 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-api-validation_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT api_validation diff --git a/build/build-info b/build/build-info index 88cd8516d13..434e7b498bc 100755 --- a/build/build-info +++ b/build/build-info @@ -1,7 +1,7 @@ #!/usr/bin/env bash # -# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ set -e echo_build_properties() { echo version=$1 echo cudf_version=$2 - echo user=$USER + echo user=$(whoami) echo revision=$(git rev-parse HEAD) echo branch=$(git rev-parse --abbrev-ref HEAD) echo date=$(date -u +%Y-%m-%dT%H:%M:%SZ) diff --git a/datagen/README.md b/datagen/README.md index 983f2de5e2c..ced310f7c82 100644 --- a/datagen/README.md +++ b/datagen/README.md @@ -24,12 +24,12 @@ Where `$SPARK_VERSION` is a compressed version number, like 330 for Spark 3.3.0. After this the jar should be at `target/datagen_2.12-$PLUGIN_VERSION-spark$SPARK_VERSION.jar` -for example a Spark 3.3.0 jar for the 24.04.0 release would be -`target/datagen_2.12-24.04.0-spark330.jar` +for example a Spark 3.3.0 jar for the 24.06.0 release would be +`target/datagen_2.12-24.06.0-spark330.jar` To get a spark shell with this you can run ```shell -spark-shell --jars target/datagen_2.12-24.04.0-spark330.jar +spark-shell --jars target/datagen_2.12-24.06.0-spark330.jar ``` After that you should be good to go. diff --git a/datagen/ScaleTest.md b/datagen/ScaleTest.md index c258c1a11ef..bdd7c2ff5e7 100644 --- a/datagen/ScaleTest.md +++ b/datagen/ScaleTest.md @@ -44,7 +44,7 @@ $SPARK_HOME/bin/spark-submit \ --conf spark.sql.parquet.datetimeRebaseModeInWrite=CORRECTED \ --class com.nvidia.rapids.tests.scaletest.ScaleTestDataGen \ # the main class --jars $SPARK_HOME/examples/jars/scopt_2.12-3.7.1.jar \ # one dependency jar just shipped with Spark under $SPARK_HOME -./target/datagen_2.12-24.04.0-SNAPSHOT-spark332.jar \ +./target/datagen_2.12-24.06.0-SNAPSHOT-spark332.jar \ 1 \ 10 \ parquet \ diff --git a/datagen/pom.xml b/datagen/pom.xml index 7ee1f4d7be1..9acdfaab044 100644 --- a/datagen/pom.xml +++ b/datagen/pom.xml @@ -21,13 +21,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml datagen_2.12 Data Generator Tools for generating large amounts of data - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT datagen diff --git a/datagen/src/main/scala/org/apache/spark/sql/tests/datagen/bigDataGen.scala b/datagen/src/main/scala/org/apache/spark/sql/tests/datagen/bigDataGen.scala index da8f9461e2e..91335afe4e6 100644 --- a/datagen/src/main/scala/org/apache/spark/sql/tests/datagen/bigDataGen.scala +++ b/datagen/src/main/scala/org/apache/spark/sql/tests/datagen/bigDataGen.scala @@ -609,6 +609,15 @@ abstract class DataGen(var conf: ColumnConf, this } + def setNullProbabilityRecursively(probability: Double): DataGen = { + this.userProvidedNullGen = Some(NullProbabilityGenerationFunction(probability)) + children.foreach { + case (_, dataGen) => + dataGen.setNullProbabilityRecursively(probability) + } + this + } + /** * Set a specific location to seed mapping for the value generation. */ @@ -672,6 +681,7 @@ abstract class DataGen(var conf: ColumnConf, * Get the default value generator for this specific data gen. */ protected def getValGen: GeneratorFunction + def children: Seq[(String, DataGen)] /** * Get the final ready to use GeneratorFunction for the data generator. @@ -823,6 +833,8 @@ class BooleanGen(conf: ColumnConf, override def dataType: DataType = BooleanType override protected def getValGen: GeneratorFunction = BooleanGenFunc() + + override def children: Seq[(String, DataGen)] = Seq.empty } /** @@ -878,6 +890,8 @@ class ByteGen(conf: ColumnConf, extends DataGen(conf, defaultValueRange) { override def getValGen: GeneratorFunction = ByteGenFunc() override def dataType: DataType = ByteType + + override def children: Seq[(String, DataGen)] = Seq.empty } /** @@ -935,6 +949,8 @@ class ShortGen(conf: ColumnConf, override def getValGen: GeneratorFunction = ShortGenFunc() override def dataType: DataType = ShortType + + override def children: Seq[(String, DataGen)] = Seq.empty } /** @@ -991,6 +1007,8 @@ class IntGen(conf: ColumnConf, override def getValGen: GeneratorFunction = IntGenFunc() override def dataType: DataType = IntegerType + + override def children: Seq[(String, DataGen)] = Seq.empty } /** @@ -1045,6 +1063,8 @@ class LongGen(conf: ColumnConf, override def getValGen: GeneratorFunction = LongGenFunc() override def dataType: DataType = LongType + + override def children: Seq[(String, DataGen)] = Seq.empty } case class Decimal32GenFunc( @@ -1284,6 +1304,8 @@ class DecimalGen(dt: DecimalType, val max = DecimalGen.genMaxUnscaled(dt.precision) DecimalGenFunc(dt.precision, dt.scale, -max, max) } + + override def children: Seq[(String, DataGen)] = Seq.empty } /** @@ -1341,6 +1363,8 @@ class TimestampGen(conf: ColumnConf, override protected def getValGen: GeneratorFunction = TimestampGenFunc() override def dataType: DataType = TimestampType + + override def children: Seq[(String, DataGen)] = Seq.empty } object BigDataGenConsts { @@ -1418,6 +1442,8 @@ class DateGen(conf: ColumnConf, override protected def getValGen: GeneratorFunction = DateGenFunc() override def dataType: DataType = DateType + + override def children: Seq[(String, DataGen)] = Seq.empty } /** @@ -1440,6 +1466,8 @@ class DoubleGen(conf: ColumnConf, defaultValueRange: Option[(Any, Any)]) override def dataType: DataType = DoubleType override protected def getValGen: GeneratorFunction = DoubleGenFunc() + + override def children: Seq[(String, DataGen)] = Seq.empty } /** @@ -1462,6 +1490,8 @@ class FloatGen(conf: ColumnConf, defaultValueRange: Option[(Any, Any)]) override def dataType: DataType = FloatType override protected def getValGen: GeneratorFunction = FloatGenFunc() + + override def children: Seq[(String, DataGen)] = Seq.empty } trait JSONType { @@ -1648,6 +1678,8 @@ class StringGen(conf: ColumnConf, defaultValueRange: Option[(Any, Any)]) override def dataType: DataType = StringType override protected def getValGen: GeneratorFunction = ASCIIGenFunc() + + override def children: Seq[(String, DataGen)] = Seq.empty } case class StructGenFunc(childGens: Array[GeneratorFunction]) extends GeneratorFunction { @@ -1752,6 +1784,8 @@ class ArrayGen(child: DataGen, None } } + + override def children: Seq[(String, DataGen)] = Seq(("data", child)) } case class MapGenFunc( @@ -1816,6 +1850,8 @@ class MapGen(key: DataGen, None } } + + override def children: Seq[(String, DataGen)] = Seq(("key", key), ("value", value)) } @@ -1864,6 +1900,11 @@ class ColumnGen(val dataGen: DataGen) { this } + def setNullProbabilityRecursively(probability: Double): ColumnGen = { + dataGen.setNullProbabilityRecursively(probability) + this + } + def setNullGen(f: NullGeneratorFunction): ColumnGen = { dataGen.setNullGen(f) this @@ -1973,6 +2014,14 @@ class TableGen(val columns: Seq[(String, ColumnGen)], numRows: Long) { this } + def setNullProbabilityRecursively(probability: Double): TableGen = { + columns.foreach { + case (_, columnGen) => + columnGen.setNullProbabilityRecursively(probability) + } + this + } + /** * Convert this table into a `DataFrame` that can be * written out or used directly. Writing it out to parquet diff --git a/datagen/src/main/spark320/scala/org/apache/spark/sql/tests/datagen/datagen/DataGenExprBase.scala b/datagen/src/main/spark320/scala/org/apache/spark/sql/tests/datagen/datagen/DataGenExprBase.scala index 421869c395d..ccbb03c4faa 100644 --- a/datagen/src/main/spark320/scala/org/apache/spark/sql/tests/datagen/datagen/DataGenExprBase.scala +++ b/datagen/src/main/spark320/scala/org/apache/spark/sql/tests/datagen/datagen/DataGenExprBase.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.tests.datagen diff --git a/delta-lake/delta-20x/pom.xml b/delta-lake/delta-20x/pom.xml index 53654a1a823..c180f566cb5 100644 --- a/delta-lake/delta-20x/pom.xml +++ b/delta-lake/delta-20x/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-20x_2.12 RAPIDS Accelerator for Apache Spark Delta Lake 2.0.x Support Delta Lake 2.0.x support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-20x diff --git a/delta-lake/delta-21x/pom.xml b/delta-lake/delta-21x/pom.xml index b47e9ba8e59..90dcc723f8f 100644 --- a/delta-lake/delta-21x/pom.xml +++ b/delta-lake/delta-21x/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-21x_2.12 RAPIDS Accelerator for Apache Spark Delta Lake 2.1.x Support Delta Lake 2.1.x support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-21x diff --git a/delta-lake/delta-22x/pom.xml b/delta-lake/delta-22x/pom.xml index e94fac38f3a..82d52e2e896 100644 --- a/delta-lake/delta-22x/pom.xml +++ b/delta-lake/delta-22x/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-22x_2.12 RAPIDS Accelerator for Apache Spark Delta Lake 2.2.x Support Delta Lake 2.2.x support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-22x diff --git a/delta-lake/delta-23x/pom.xml b/delta-lake/delta-23x/pom.xml index e9036a4d185..bf003810e6f 100644 --- a/delta-lake/delta-23x/pom.xml +++ b/delta-lake/delta-23x/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../pom.xml rapids-4-spark-delta-23x_2.12 RAPIDS Accelerator for Apache Spark Delta Lake 2.3.x Support Delta Lake 2.3.x support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-23x diff --git a/delta-lake/delta-24x/pom.xml b/delta-lake/delta-24x/pom.xml index f42bb49bec9..1cfcbaa8385 100644 --- a/delta-lake/delta-24x/pom.xml +++ b/delta-lake/delta-24x/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-24x_2.12 RAPIDS Accelerator for Apache Spark Delta Lake 2.4.x Support Delta Lake 2.4.x support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-24x diff --git a/delta-lake/delta-spark330db/pom.xml b/delta-lake/delta-spark330db/pom.xml index 7a825b6e2af..d33d4b16d67 100644 --- a/delta-lake/delta-spark330db/pom.xml +++ b/delta-lake/delta-spark330db/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark330db_2.12 RAPIDS Accelerator for Apache Spark Databricks 11.3 Delta Lake Support Databricks 11.3 Delta Lake support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-spark330db diff --git a/delta-lake/delta-spark332db/pom.xml b/delta-lake/delta-spark332db/pom.xml index dae4a0b96a8..7aaf04dbc7b 100644 --- a/delta-lake/delta-spark332db/pom.xml +++ b/delta-lake/delta-spark332db/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark332db_2.12 RAPIDS Accelerator for Apache Spark Databricks 12.2 Delta Lake Support Databricks 12.2 Delta Lake support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-spark332db diff --git a/delta-lake/delta-spark341db/pom.xml b/delta-lake/delta-spark341db/pom.xml index ad61dafc738..d4f9ad9c436 100644 --- a/delta-lake/delta-spark341db/pom.xml +++ b/delta-lake/delta-spark341db/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark341db_2.12 RAPIDS Accelerator for Apache Spark Databricks 13.3 Delta Lake Support Databricks 13.3 Delta Lake support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT false diff --git a/delta-lake/delta-stub/pom.xml b/delta-lake/delta-stub/pom.xml index 99fd2a836d0..28c6db7a251 100644 --- a/delta-lake/delta-stub/pom.xml +++ b/delta-lake/delta-stub/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-stub_2.12 RAPIDS Accelerator for Apache Spark Delta Lake Stub Delta Lake stub for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-stub diff --git a/dist/pom.xml b/dist/pom.xml index f8ca2ebf9bf..f2f9ea7e17a 100644 --- a/dist/pom.xml +++ b/dist/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../jdk-profiles/pom.xml rapids-4-spark_2.12 RAPIDS Accelerator for Apache Spark Distribution Creates the distribution package of the RAPIDS plugin for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT com.nvidia diff --git a/docs/additional-functionality/advanced_configs.md b/docs/additional-functionality/advanced_configs.md index f1a53cc539b..cb930f64164 100644 --- a/docs/additional-functionality/advanced_configs.md +++ b/docs/additional-functionality/advanced_configs.md @@ -129,12 +129,12 @@ Name | Description | Default Value | Applicable at spark.rapids.sql.json.read.decimal.enabled|When reading a quoted string as a decimal Spark supports reading non-ascii unicode digits, and the RAPIDS Accelerator does not.|true|Runtime spark.rapids.sql.json.read.double.enabled|JSON reading is not 100% compatible when reading doubles.|true|Runtime spark.rapids.sql.json.read.float.enabled|JSON reading is not 100% compatible when reading floats.|true|Runtime -spark.rapids.sql.json.read.mixedTypesAsString.enabled|JSON reading is not 100% compatible when reading mixed types as string.|false|Runtime spark.rapids.sql.mode|Set the mode for the Rapids Accelerator. The supported modes are explainOnly and executeOnGPU. This config can not be changed at runtime, you must restart the application for it to take affect. The default mode is executeOnGPU, which means the RAPIDS Accelerator plugin convert the Spark operations and execute them on the GPU when possible. The explainOnly mode allows running queries on the CPU and the RAPIDS Accelerator will evaluate the queries as if it was going to run on the GPU. The explanations of what would have run on the GPU and why are output in log messages. When using explainOnly mode, the default explain output is ALL, this can be changed by setting spark.rapids.sql.explain. See that config for more details.|executeongpu|Startup spark.rapids.sql.optimizer.joinReorder.enabled|When enabled, joins may be reordered for improved query performance|true|Runtime spark.rapids.sql.python.gpu.enabled|This is an experimental feature and is likely to change in the future. Enable (true) or disable (false) support for scheduling Python Pandas UDFs with GPU resources. When enabled, pandas UDFs are assumed to share the same GPU that the RAPIDs accelerator uses and will honor the python GPU configs|false|Runtime -spark.rapids.sql.reader.chunked|Enable a chunked reader where possible. A chunked reader allows reading highly compressed data that could not be read otherwise, but at the expense of more GPU memory, and in some cases more GPU computation.|true|Runtime -spark.rapids.sql.reader.chunked.subPage|Enable a chunked reader where possible for reading data that is smaller than the typical row group/page limit. Currently this only works for parquet.|true|Runtime +spark.rapids.sql.reader.chunked|Enable a chunked reader where possible. A chunked reader allows reading highly compressed data that could not be read otherwise, but at the expense of more GPU memory, and in some cases more GPU computation. Currently this only supports ORC and Parquet formats.|true|Runtime +spark.rapids.sql.reader.chunked.limitMemoryUsage|Enable a soft limit on the internal memory usage of the chunked reader (if being used). Such limit is calculated as the multiplication of 'spark.rapids.sql.batchSizeBytes' and 'spark.rapids.sql.reader.chunked.memoryUsageRatio'.For example, if batchSizeBytes is set to 1GB and memoryUsageRatio is 4, the chunked reader will try to keep its memory usage under 4GB.|None|Runtime +spark.rapids.sql.reader.chunked.subPage|Enable a chunked reader where possible for reading data that is smaller than the typical row group/page limit. Currently deprecated and replaced by 'spark.rapids.sql.reader.chunked.limitMemoryUsage'.|None|Runtime spark.rapids.sql.reader.multithreaded.combine.sizeBytes|The target size in bytes to combine multiple small files together when using the MULTITHREADED parquet or orc reader. With combine disabled, the MULTITHREADED reader reads the files in parallel and sends individual files down to the GPU, but that can be inefficient for small files. When combine is enabled, files that are ready within spark.rapids.sql.reader.multithreaded.combine.waitTime together, up to this threshold size, are combined before sending down to GPU. This can be disabled by setting it to 0. Note that combine also will not go over the spark.rapids.sql.reader.batchSizeRows or spark.rapids.sql.reader.batchSizeBytes limits.|67108864|Runtime spark.rapids.sql.reader.multithreaded.combine.waitTime|When using the multithreaded parquet or orc reader with combine mode, how long to wait, in milliseconds, for more files to finish if haven't met the size threshold. Note that this will wait this amount of time from when the last file was available, so total wait time could be larger then this.|200|Runtime spark.rapids.sql.reader.multithreaded.read.keepOrder|When using the MULTITHREADED reader, if this is set to true we read the files in the same order Spark does, otherwise the order may not be the same. Now it is supported only for parquet and orc.|true|Runtime @@ -184,6 +184,7 @@ Name | SQL Function(s) | Description | Default Value | Notes spark.rapids.sql.expression.ArrayContains|`array_contains`|Returns a boolean if the array contains the passed in key|true|None| spark.rapids.sql.expression.ArrayExcept|`array_except`|Returns an array of the elements in array1 but not in array2, without duplicates|true|This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal, but the CPU implementation currently does not (see SPARK-39845). Also, Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+| spark.rapids.sql.expression.ArrayExists|`exists`|Return true if any element satisfies the predicate LambdaFunction|true|None| +spark.rapids.sql.expression.ArrayFilter|`filter`|Filter an input array using a given predicate|true|None| spark.rapids.sql.expression.ArrayIntersect|`array_intersect`|Returns an array of the elements in the intersection of array1 and array2, without duplicates|true|This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal, but the CPU implementation currently does not (see SPARK-39845). Also, Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+| spark.rapids.sql.expression.ArrayMax|`array_max`|Returns the maximum value in the array|true|None| spark.rapids.sql.expression.ArrayMin|`array_min`|Returns the minimum value in the array|true|None| @@ -248,7 +249,7 @@ Name | SQL Function(s) | Description | Default Value | Notes spark.rapids.sql.expression.FromUnixTime|`from_unixtime`|Get the string from a unix timestamp|true|None| spark.rapids.sql.expression.GetArrayItem| |Gets the field at `ordinal` in the Array|true|None| spark.rapids.sql.expression.GetArrayStructFields| |Extracts the `ordinal`-th fields of all array elements for the data with the type of array of struct|true|None| -spark.rapids.sql.expression.GetJsonObject|`get_json_object`|Extracts a json object from path|false|This is disabled by default because escape sequences are not processed correctly, the input is not validated, and the output is not normalized the same as Spark| +spark.rapids.sql.expression.GetJsonObject|`get_json_object`|Extracts a json object from path|false|This is disabled by default because Experimental feature that could be unstable or have performance issues.| spark.rapids.sql.expression.GetMapValue| |Gets Value from a Map based on a key|true|None| spark.rapids.sql.expression.GetStructField| |Gets the named field of the struct|true|None| spark.rapids.sql.expression.GetTimestamp| |Gets timestamps from strings using given pattern.|true|None| @@ -269,7 +270,7 @@ Name | SQL Function(s) | Description | Default Value | Notes spark.rapids.sql.expression.IsNotNull|`isnotnull`|Checks if a value is not null|true|None| spark.rapids.sql.expression.IsNull|`isnull`|Checks if a value is null|true|None| spark.rapids.sql.expression.JsonToStructs|`from_json`|Returns a struct value with the given `jsonStr` and `schema`|false|This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case| -spark.rapids.sql.expression.JsonTuple|`json_tuple`|Returns a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string.|false|This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.| +spark.rapids.sql.expression.JsonTuple|`json_tuple`|Returns a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string.|false|This is disabled by default because Experimental feature that could be unstable or have performance issues.| spark.rapids.sql.expression.KnownFloatingPointNormalized| |Tag to prevent redundant normalization|true|None| spark.rapids.sql.expression.KnownNotNull| |Tag an expression as known to not be null|true|None| spark.rapids.sql.expression.Lag|`lag`|Window function that returns N entries behind this one|true|None| diff --git a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_no_rdma b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_no_rdma index adf28f5fea2..fe5c64b1dfc 100644 --- a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_no_rdma +++ b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_no_rdma @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ # - ROCKY_VER: Rocky Linux OS version ARG CUDA_VER=11.8.0 -ARG UCX_VER=1.15.0 +ARG UCX_VER=1.16.0 ARG UCX_CUDA_VER=11 ARG UCX_ARCH=x86_64 ARG ROCKY_VER=8 @@ -38,6 +38,5 @@ RUN ls /usr/lib RUN mkdir /tmp/ucx_install && cd /tmp/ucx_install && \ wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \ tar -xvf *.bz2 && \ - rpm -i ucx-$UCX_VER*.rpm && \ - rpm -i ucx-cuda-$UCX_VER*.rpm --nodeps && \ + rpm -i `ls ucx-[0-9]*.rpm ucx-cuda-[0-9]*.rpm` --nodeps && \ rm -rf /tmp/ucx_install diff --git a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_rdma b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_rdma index 9083e1561b5..f88c4212a92 100644 --- a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_rdma +++ b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.rocky_rdma @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ # - ROCKY_VER: Rocky Linux OS version ARG CUDA_VER=11.8.0 -ARG UCX_VER=1.15.0 +ARG UCX_VER=1.16.0 ARG UCX_CUDA_VER=11 ARG UCX_ARCH=x86_64 ARG ROCKY_VER=8 @@ -37,7 +37,5 @@ RUN yum update -y && yum install -y wget bzip2 rdma-core numactl-libs libgomp li RUN mkdir /tmp/ucx_install && cd /tmp/ucx_install && \ wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \ tar -xvf *.bz2 && \ - rpm -i ucx-$UCX_VER*.rpm && \ - rpm -i ucx-cuda-$UCX_VER*.rpm --nodeps && \ - rpm -i ucx-ib-$UCX_VER-1.el8.x86_64.rpm ucx-rdmacm-$UCX_VER-1.el8.x86_64.rpm && \ + rpm -i `ls ucx-[0-9]*.rpm ucx-cuda-[0-9]*.rpm ucx-ib-[0-9]*.rpm ucx-rdmacm-[0-9]*.rpm` --nodeps && \ rm -rf /tmp/ucx_install diff --git a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_no_rdma b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_no_rdma index e0318a0de60..792e7848e56 100644 --- a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_no_rdma +++ b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_no_rdma @@ -1,5 +1,5 @@ # -# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ # ARG CUDA_VER=11.8.0 -ARG UCX_VER=1.15.0 +ARG UCX_VER=1.16.0 ARG UCX_CUDA_VER=11 ARG UCX_ARCH=x86_64 ARG UBUNTU_VER=20.04 diff --git a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_rdma b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_rdma index 55281fc4b1b..42014c67251 100644 --- a/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_rdma +++ b/docs/additional-functionality/shuffle-docker-examples/Dockerfile.ubuntu_rdma @@ -1,5 +1,5 @@ # -# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ ARG RDMA_CORE_VERSION=32.1 ARG CUDA_VER=11.8.0 -ARG UCX_VER=1.15.0 +ARG UCX_VER=1.16.0 ARG UCX_CUDA_VER=11 ARG UCX_ARCH=x86_64 ARG UBUNTU_VER=20.04 diff --git a/docs/archive.md b/docs/archive.md index edd270ae5bc..e2e23a26f11 100644 --- a/docs/archive.md +++ b/docs/archive.md @@ -5,6 +5,97 @@ nav_order: 15 --- Below are archived releases for RAPIDS Accelerator for Apache Spark. +## Release v24.02.0 +### Hardware Requirements: + +The plugin is tested on the following architectures: + + GPU Models: NVIDIA V100, T4, A10/A100, L4 and H100 GPUs + +### Software Requirements: + + OS: Ubuntu 20.04, Ubuntu 22.04, CentOS 7, or Rocky Linux 8 + + NVIDIA Driver*: R470+ + + Runtime: + Scala 2.12, 2.13 + Python, Java Virtual Machine (JVM) compatible with your spark-version. + + * Check the Spark documentation for Python and Java version compatibility with your specific + Spark version. For instance, visit `https://spark.apache.org/docs/3.4.1` for Spark 3.4.1. + + Supported Spark versions: + Apache Spark 3.2.0, 3.2.1, 3.2.2, 3.2.3, 3.2.4 + Apache Spark 3.3.0, 3.3.1, 3.3.2, 3.3.3 + Apache Spark 3.4.0, 3.4.1 + Apache Spark 3.5.0 + + Supported Databricks runtime versions for Azure and AWS: + Databricks 10.4 ML LTS (GPU, Scala 2.12, Spark 3.2.1) + Databricks 11.3 ML LTS (GPU, Scala 2.12, Spark 3.3.0) + Databricks 12.2 ML LTS (GPU, Scala 2.12, Spark 3.3.2) + Databricks 13.3 ML LTS (GPU, Scala 2.12, Spark 3.4.1) + + Supported Dataproc versions: + GCP Dataproc 2.0 + GCP Dataproc 2.1 + + Supported Dataproc Serverless versions: + Spark runtime 1.1 LTS + Spark runtime 2.0 + Spark runtime 2.1 + +*Some hardware may have a minimum driver version greater than R470. Check the GPU spec sheet +for your hardware's minimum driver version. + +*For Cloudera and EMR support, please refer to the +[Distributions](https://docs.nvidia.com/spark-rapids/user-guide/latest/faq.html#which-distributions-are-supported) section of the FAQ. + +### RAPIDS Accelerator's Support Policy for Apache Spark +The RAPIDS Accelerator maintains support for Apache Spark versions available for download from [Apache Spark](https://spark.apache.org/downloads.html) + +### Download RAPIDS Accelerator for Apache Spark v24.02.0 + +| Processor | Scala Version | Download Jar | Download Signature | +|-----------|---------------|--------------|--------------------| +| x86_64 | Scala 2.12 | [RAPIDS Accelerator v24.02.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.02.0/rapids-4-spark_2.12-24.02.0.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.02.0/rapids-4-spark_2.12-24.02.0.jar.asc) | +| x86_64 | Scala 2.13 | [RAPIDS Accelerator v24.02.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.02.0/rapids-4-spark_2.13-24.02.0.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.02.0/rapids-4-spark_2.13-24.02.0.jar.asc) | +| arm64 | Scala 2.12 | [RAPIDS Accelerator v24.02.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.02.0/rapids-4-spark_2.12-24.02.0-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.02.0/rapids-4-spark_2.12-24.02.0-cuda11-arm64.jar.asc) | +| arm64 | Scala 2.13 | [RAPIDS Accelerator v24.02.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.02.0/rapids-4-spark_2.13-24.02.0-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.02.0/rapids-4-spark_2.13-24.02.0-cuda11-arm64.jar.asc) | + +This package is built against CUDA 11.8. It is tested on V100, T4, A10, A100, L4 and H100 GPUs with +CUDA 11.8 through CUDA 12.0. + +### Verify signature +* Download the [PUB_KEY](https://keys.openpgp.org/search?q=sw-spark@nvidia.com). +* Import the public key: `gpg --import PUB_KEY` +* Verify the signature for Scala 2.12 jar: + `gpg --verify rapids-4-spark_2.12-24.02.0.jar.asc rapids-4-spark_2.12-24.02.0.jar` +* Verify the signature for Scala 2.13 jar: + `gpg --verify rapids-4-spark_2.13-24.02.0.jar.asc rapids-4-spark_2.13-24.02.0.jar` + +The output of signature verify: + + gpg: Good signature from "NVIDIA Spark (For the signature of spark-rapids release jars) " + +### Release Notes +New functionality and performance improvements for this release include: +* Discontinued support for Nvidia GPUs based on Pascal architecture. +* Set get_json_object functionality to disabled by default. +* Implemented string comparison in AST expressions. +* Expanded timezone support to include options beyond UTC. +* Optional checksums for cached files in the file cache. +* Introduced support for Databricks 13.3 ML LTS. +* Added support for parse_url functionality. +* Introducing Lazy Quantifier support for regular expression functions. +* Added support for the format_number function. +* Enhanced batching support for row-based bounded window functions. +* For updates on RAPIDS Accelerator Tools, please visit [this link](https://github.com/NVIDIA/spark-rapids-tools/releases). + +For a detailed list of changes, please refer to the +[CHANGELOG](https://github.com/NVIDIA/spark-rapids/blob/main/CHANGELOG.md). + ## Release v23.12.2 ### Hardware Requirements: diff --git a/docs/archives/CHANGELOG_23.02_to_23.10.md b/docs/archives/CHANGELOG_23.02_to_23.12.md similarity index 80% rename from docs/archives/CHANGELOG_23.02_to_23.10.md rename to docs/archives/CHANGELOG_23.02_to_23.12.md index dc2601adac6..485d394d2d7 100644 --- a/docs/archives/CHANGELOG_23.02_to_23.10.md +++ b/docs/archives/CHANGELOG_23.02_to_23.12.md @@ -1,6 +1,311 @@ # Change log -Generated on 2023-10-24 +Generated on 2024-04-10 +## Release 23.12 + +### Features +||| +|:---|:---| +|[#6832](https://github.com/NVIDIA/spark-rapids/issues/6832)|[FEA] Convert Timestamp/Timezone tests/checks to be per operator instead of generic | +|[#9805](https://github.com/NVIDIA/spark-rapids/issues/9805)|[FEA] Support ```current_date``` expression function with CST (UTC + 8) timezone support| +|[#9515](https://github.com/NVIDIA/spark-rapids/issues/9515)|[FEA] Support temporal types in to_json| +|[#9872](https://github.com/NVIDIA/spark-rapids/issues/9872)|[FEA][JSON] Support Decimal type in `to_json`| +|[#9802](https://github.com/NVIDIA/spark-rapids/issues/9802)|[FEA] Support FromUTCTimestamp on the GPU with a non-UTC time zone| +|[#6831](https://github.com/NVIDIA/spark-rapids/issues/6831)|[FEA] Support timestamp transitions to and from UTC for single time zones with no repeating rules| +|[#9590](https://github.com/NVIDIA/spark-rapids/issues/9590)|[FEA][JSON] Support temporal types in `from_json`| +|[#9804](https://github.com/NVIDIA/spark-rapids/issues/9804)|[FEA] Support CPU path for from_utc_timestamp function with timezone| +|[#9461](https://github.com/NVIDIA/spark-rapids/issues/9461)|[FEA] Validate nvcomp-3.0 with spark rapids plugin| +|[#8832](https://github.com/NVIDIA/spark-rapids/issues/8832)|[FEA] rewrite join conditions where only part of it can fit on the AST| +|[#9059](https://github.com/NVIDIA/spark-rapids/issues/9059)|[FEA] Support spark.sql.parquet.datetimeRebaseModeInRead=LEGACY| +|[#9037](https://github.com/NVIDIA/spark-rapids/issues/9037)|[FEA] Support spark.sql.parquet.int96RebaseModeInWrite= LEGACY| +|[#9632](https://github.com/NVIDIA/spark-rapids/issues/9632)|[FEA] Take into account `org.apache.spark.timeZone` in Parquet/Avro from Spark 3.2| +|[#8770](https://github.com/NVIDIA/spark-rapids/issues/8770)|[FEA] add more metrics to Eventlogs or Executor logs| +|[#9597](https://github.com/NVIDIA/spark-rapids/issues/9597)|[FEA][JSON] Support boolean type in `from_json`| +|[#9516](https://github.com/NVIDIA/spark-rapids/issues/9516)|[FEA] Add support for JSON data source option `ignoreNullFields=false` in `to_json`| +|[#9520](https://github.com/NVIDIA/spark-rapids/issues/9520)|[FEA] Add support for `LAST()` as running window function| +|[#9518](https://github.com/NVIDIA/spark-rapids/issues/9518)|[FEA] Add support for relevant JSON data source options in `to_json`| +|[#9218](https://github.com/NVIDIA/spark-rapids/issues/9218)|[FEA] Support stack function| +|[#9532](https://github.com/NVIDIA/spark-rapids/issues/9532)|[FEA] Support Delta Lake 2.3.0| +|[#1525](https://github.com/NVIDIA/spark-rapids/issues/1525)|[FEA] Support Scala 2.13| +|[#7279](https://github.com/NVIDIA/spark-rapids/issues/7279)|[FEA] Support OverwriteByExpressionExecV1 for Delta Lake| +|[#9326](https://github.com/NVIDIA/spark-rapids/issues/9326)|[FEA] Specify `recover_with_null` when reading JSON files| +|[#8780](https://github.com/NVIDIA/spark-rapids/issues/8780)|[FEA] Support to_json function| +|[#7278](https://github.com/NVIDIA/spark-rapids/issues/7278)|[FEA] Support AppendDataExecV1 for Delta Lake| +|[#6266](https://github.com/NVIDIA/spark-rapids/issues/6266)|[FEA] Support Percentile| +|[#7277](https://github.com/NVIDIA/spark-rapids/issues/7277)|[FEA] Support AtomicReplaceTableAsSelect for Delta Lake| +|[#7276](https://github.com/NVIDIA/spark-rapids/issues/7276)|[FEA] Support AtomicCreateTableAsSelect for Delta Lake| + +### Performance +||| +|:---|:---| +|[#8137](https://github.com/NVIDIA/spark-rapids/issues/8137)|[FEA] Upgrade to UCX 1.15| +|[#8157](https://github.com/NVIDIA/spark-rapids/issues/8157)|[FEA] Add string comparison to AST expressions| +|[#9398](https://github.com/NVIDIA/spark-rapids/issues/9398)|[FEA] Compress/encrypt spill to disk| + +### Bugs Fixed +||| +|:---|:---| +|[#9687](https://github.com/NVIDIA/spark-rapids/issues/9687)|[BUG] `test_in_set` fails when DATAGEN_SEED=1698940723| +|[#9659](https://github.com/NVIDIA/spark-rapids/issues/9659)|[BUG] executor crash intermittantly in scala2.13-built spark332 integration tests| +|[#9923](https://github.com/NVIDIA/spark-rapids/issues/9923)|[BUG] Failed case about ```test_timestamp_seconds_rounding_necessary[Decimal(20,7)][DATAGEN_SEED=1701412018] – src.main.python.date_time_test```| +|[#9982](https://github.com/NVIDIA/spark-rapids/issues/9982)|[BUG] test "convert large InternalRow iterator to cached batch single col" failed with arena pool| +|[#9683](https://github.com/NVIDIA/spark-rapids/issues/9683)|[BUG] test_map_scalars_supported_key_types fails with DATAGEN_SEED=1698940723| +|[#9976](https://github.com/NVIDIA/spark-rapids/issues/9976)|[BUG] test_part_write_round_trip[Float] Failed on -0.0 partition| +|[#9948](https://github.com/NVIDIA/spark-rapids/issues/9948)|[BUG] parquet reader data corruption in nested schema after https://github.com/rapidsai/cudf/pull/13302| +|[#9867](https://github.com/NVIDIA/spark-rapids/issues/9867)|[BUG] Unable to use Spark Rapids with Spark Thrift Server| +|[#9934](https://github.com/NVIDIA/spark-rapids/issues/9934)|[BUG] test_delta_multi_part_write_round_trip_unmanaged and test_delta_part_write_round_trip_unmanaged failed DATA_SEED=1701608331 | +|[#9933](https://github.com/NVIDIA/spark-rapids/issues/9933)|[BUG] collection_ops_test.py::test_sequence_too_long_sequence[Long(not_null)][DATAGEN_SEED=1701553915, INJECT_OOM]| +|[#9837](https://github.com/NVIDIA/spark-rapids/issues/9837)|[BUG] test_part_write_round_trip failed| +|[#9932](https://github.com/NVIDIA/spark-rapids/issues/9932)|[BUG] Failed test_multi_tier_ast[DATAGEN_SEED=1701445668] on CI| +|[#9829](https://github.com/NVIDIA/spark-rapids/issues/9829)|[BUG] Java OOM when testing non-UTC time zone with lots of cases fallback.| +|[#9403](https://github.com/NVIDIA/spark-rapids/issues/9403)|[BUG] test_cogroup_apply_udf[Short(not_null)] failed with pandas 2.1.X| +|[#9684](https://github.com/NVIDIA/spark-rapids/issues/9684)|[BUG] test_coalesce fails with DATAGEN_SEED=1698940723| +|[#9685](https://github.com/NVIDIA/spark-rapids/issues/9685)|[BUG] test_case_when fails with DATAGEN_SEED=1698940723| +|[#9776](https://github.com/NVIDIA/spark-rapids/issues/9776)|[BUG] fastparquet compatibility tests fail with data mismatch if TZ is not set and system timezone is not UTC| +|[#9733](https://github.com/NVIDIA/spark-rapids/issues/9733)|[BUG] Complex AST expressions can crash with non-matching operand type error| +|[#9877](https://github.com/NVIDIA/spark-rapids/issues/9877)|[BUG] Fix resource leak in to_json| +|[#9722](https://github.com/NVIDIA/spark-rapids/issues/9722)|[BUG] test_floor_scale_zero fails with DATAGEN_SEED=1700009407| +|[#9846](https://github.com/NVIDIA/spark-rapids/issues/9846)|[BUG] test_ceil_scale_zero may fail with different datagen_seed| +|[#9781](https://github.com/NVIDIA/spark-rapids/issues/9781)|[BUG] test_cast_string_date_valid_format fails on DATAGEN_SEED=1700250017| +|[#9714](https://github.com/NVIDIA/spark-rapids/issues/9714)|Scala Map class not found when executing the benchmark on Spark 3.5.0 with Scala 2.13| +|[#9856](https://github.com/NVIDIA/spark-rapids/issues/9856)|collection_ops_test.py failed on Dataproc-2.1 with: Column 'None' does not exist| +|[#9397](https://github.com/NVIDIA/spark-rapids/issues/9397)|[BUG] RapidsShuffleManager MULTITHREADED on Databricks, we see loss of executors due to Rpc issues| +|[#9738](https://github.com/NVIDIA/spark-rapids/issues/9738)|[BUG] `test_delta_part_write_round_trip_unmanaged` and `test_delta_multi_part_write_round_trip_unmanaged` fail with `DATAGEN_SEED=1700105176`| +|[#9771](https://github.com/NVIDIA/spark-rapids/issues/9771)|[BUG] ast_test.py::test_X[(String, True)][DATAGEN_SEED=1700205785] failed| +|[#9782](https://github.com/NVIDIA/spark-rapids/issues/9782)|[BUG] Error messages appear in a clean build| +|[#9798](https://github.com/NVIDIA/spark-rapids/issues/9798)|[BUG] GpuCheckOverflowInTableInsert should be added to databricks shim| +|[#9820](https://github.com/NVIDIA/spark-rapids/issues/9820)|[BUG] test_parquet_write_roundtrip_datetime_with_legacy_rebase fails with "year 0 is out of range"| +|[#9817](https://github.com/NVIDIA/spark-rapids/issues/9817)|[BUG] FAILED dpp_test.py::test_dpp_reuse_broadcast_exchange[false-0-parquet][DATAGEN_SEED=1700572856, IGNORE_ORDER]| +|[#9768](https://github.com/NVIDIA/spark-rapids/issues/9768)|[BUG] `cast decimal to string` ScalaTest relies on a side effects | +|[#9711](https://github.com/NVIDIA/spark-rapids/issues/9711)|[BUG] test_lte fails with DATAGEN_SEED=1699987762| +|[#9751](https://github.com/NVIDIA/spark-rapids/issues/9751)|[BUG] cmp_test test_gte failed with DATAGEN_SEED=1700149611| +|[#9469](https://github.com/NVIDIA/spark-rapids/issues/9469)|[BUG] [main] ERROR com.nvidia.spark.rapids.GpuOverrideUtil - Encountered an exception applying GPU overrides java.lang.IllegalStateException: the broadcast must be on the GPU too| +|[#9648](https://github.com/NVIDIA/spark-rapids/issues/9648)|[BUG] Existence default values in schema are not being honored| +|[#9676](https://github.com/NVIDIA/spark-rapids/issues/9676)|Fix Delta Lake Integration tests; `test_delta_atomic_create_table_as_select` and `test_delta_atomic_replace_table_as_select`| +|[#9701](https://github.com/NVIDIA/spark-rapids/issues/9701)|[BUG] test_ts_formats_round_trip and test_datetime_roundtrip_with_legacy_rebase fail with DATAGEN_SEED=1699915317| +|[#9691](https://github.com/NVIDIA/spark-rapids/issues/9691)|[BUG] Repeated Maven invocations w/o changes recompile too many Scala sources despite recompileMode=incremental | +|[#9547](https://github.com/NVIDIA/spark-rapids/issues/9547)|Update buildall and doc to generate bloop projects for test debugging| +|[#9697](https://github.com/NVIDIA/spark-rapids/issues/9697)|[BUG] Iceberg multiple file readers can not read files if the file paths contain encoded URL unsafe chars| +|[#9681](https://github.com/NVIDIA/spark-rapids/issues/9681)|Databricks Build Failing For 330db+| +|[#9521](https://github.com/NVIDIA/spark-rapids/issues/9521)|[BUG] Multi Threaded Shuffle Writer needs flow control| +|[#9675](https://github.com/NVIDIA/spark-rapids/issues/9675)|Failing Delta Lake Tests for Databricks 13.3 Due to WriteIntoDeltaCommand| +|[#9669](https://github.com/NVIDIA/spark-rapids/issues/9669)|[BUG] Rebase exception states not in UTC but timezone is Etc/UTC| +|[#7940](https://github.com/NVIDIA/spark-rapids/issues/7940)|[BUG] UCX peer connection issue in multi-nic single node cluster| +|[#9650](https://github.com/NVIDIA/spark-rapids/issues/9650)|[BUG] Github workflow for missing scala2.13 updates fails to detect when pom is new| +|[#9621](https://github.com/NVIDIA/spark-rapids/issues/9621)|[BUG] Scala 2.13 with-classifier profile is picking up Scala2.12 spark.version| +|[#9636](https://github.com/NVIDIA/spark-rapids/issues/9636)|[BUG] All parquet integration tests failed "Part of the plan is not columnar class" in databricks runtimes| +|[#9108](https://github.com/NVIDIA/spark-rapids/issues/9108)|[BUG] nullability on some decimal operations is wrong| +|[#9625](https://github.com/NVIDIA/spark-rapids/issues/9625)|[BUG] Typo in github Maven check install-modules | +|[#9603](https://github.com/NVIDIA/spark-rapids/issues/9603)|[BUG] fastparquet_compatibility_test fails on dataproc| +|[#8729](https://github.com/NVIDIA/spark-rapids/issues/8729)|[BUG] nightly integration test failed OOM kill in JDK11 ENV| +|[#9589](https://github.com/NVIDIA/spark-rapids/issues/9589)|[BUG] Scala 2.13 build hard-codes Java 8 target | +|[#9581](https://github.com/NVIDIA/spark-rapids/issues/9581)|Delta Lake 2.4 missing equals/hashCode override for file format and some metrics for merge| +|[#9507](https://github.com/NVIDIA/spark-rapids/issues/9507)|[BUG] Spark 3.2+/ParquetFilterSuite/Parquet filter pushdown - timestamp/ FAILED | +|[#9540](https://github.com/NVIDIA/spark-rapids/issues/9540)|[BUG] Job failed with SparkUpgradeException no matter which value are set for spark.sql.parquet.datetimeRebaseModeInRead| +|[#9545](https://github.com/NVIDIA/spark-rapids/issues/9545)|[BUG] Dataproc 2.0 test_reading_file_rewritten_with_fastparquet tests failing| +|[#9552](https://github.com/NVIDIA/spark-rapids/issues/9552)|[BUG] Inconsistent CDH dependency overrides across submodules| +|[#9571](https://github.com/NVIDIA/spark-rapids/issues/9571)|[BUG] non-deterministic compiled SQLExecPlugin.class with scala 2.13 deployment| +|[#9569](https://github.com/NVIDIA/spark-rapids/issues/9569)|[BUG] test_window_running failed in 3.1.2+3.1.3| +|[#9480](https://github.com/NVIDIA/spark-rapids/issues/9480)|[BUG] mapInPandas doesn't invoke udf on empty partitions| +|[#8644](https://github.com/NVIDIA/spark-rapids/issues/8644)|[BUG] Parquet file with malformed dictionary does not error when loaded| +|[#9310](https://github.com/NVIDIA/spark-rapids/issues/9310)|[BUG] Improve support for reading JSON files with malformed rows| +|[#9457](https://github.com/NVIDIA/spark-rapids/issues/9457)|[BUG] CDH 332 unit tests failing| +|[#9404](https://github.com/NVIDIA/spark-rapids/issues/9404)|[BUG] Spark reports a decimal error when create lit scalar when generate Decimal(34, -5) data.| +|[#9110](https://github.com/NVIDIA/spark-rapids/issues/9110)|[BUG] GPU Reader fails due to partition column creating column larger then cudf column size limit| +|[#8631](https://github.com/NVIDIA/spark-rapids/issues/8631)|[BUG] Parquet load failure on repeated_no_annotation.parquet| +|[#9364](https://github.com/NVIDIA/spark-rapids/issues/9364)|[BUG] CUDA illegal access error is triggering split and retry logic| + +### PRs +||| +|:---|:---| +|[#10384](https://github.com/NVIDIA/spark-rapids/pull/10384)|[DOC] Update docs for 23.12.2 release [skip ci] | +|[#10341](https://github.com/NVIDIA/spark-rapids/pull/10341)|Update changelog for v23.12.2 [skip ci]| +|[#10340](https://github.com/NVIDIA/spark-rapids/pull/10340)|Copyright to 2024 [skip ci]| +|[#10323](https://github.com/NVIDIA/spark-rapids/pull/10323)|Upgrade version to 23.12.2-SNAPSHOT| +|[#10329](https://github.com/NVIDIA/spark-rapids/pull/10329)|update download page for v23.12.2 release [skip ci]| +|[#10274](https://github.com/NVIDIA/spark-rapids/pull/10274)|PythonRunner Changes| +|[#10124](https://github.com/NVIDIA/spark-rapids/pull/10124)|Update changelog for v23.12.1 [skip ci]| +|[#10123](https://github.com/NVIDIA/spark-rapids/pull/10123)|Change version to v23.12.1 [skip ci]| +|[#10122](https://github.com/NVIDIA/spark-rapids/pull/10122)|Init changelog for v23.12.1 [skip ci]| +|[#10121](https://github.com/NVIDIA/spark-rapids/pull/10121)|[DOC] update download page for db hot fix [skip ci]| +|[#10116](https://github.com/NVIDIA/spark-rapids/pull/10116)|Upgrade to 23.12.1-SNAPSHOT| +|[#10069](https://github.com/NVIDIA/spark-rapids/pull/10069)|Revert "Support split broadcast join condition into ast and non-ast […| +|[#9470](https://github.com/NVIDIA/spark-rapids/pull/9470)|Use float to string kernel| +|[#9481](https://github.com/NVIDIA/spark-rapids/pull/9481)|Use parse_url kernel for PROTOCOL parsing| +|[#9935](https://github.com/NVIDIA/spark-rapids/pull/9935)|Init 23.12 changelog [skip ci]| +|[#9943](https://github.com/NVIDIA/spark-rapids/pull/9943)|[DOC] Update docs for 23.12.0 release [skip ci]| +|[#10014](https://github.com/NVIDIA/spark-rapids/pull/10014)|Add documentation for how to run tests with a fixed datagen seed [skip ci]| +|[#9954](https://github.com/NVIDIA/spark-rapids/pull/9954)|Update private and JNI version to released 23.12.0| +|[#10009](https://github.com/NVIDIA/spark-rapids/pull/10009)|Using fix seed to unblock 23.12 release; Move the blocked issues to 24.02| +|[#10007](https://github.com/NVIDIA/spark-rapids/pull/10007)|Fix Java OOM in non-UTC case with lots of xfail (#9944)| +|[#9985](https://github.com/NVIDIA/spark-rapids/pull/9985)|Avoid allocating GPU memory out of RMM managed pool in test| +|[#9970](https://github.com/NVIDIA/spark-rapids/pull/9970)|Avoid leading and trailing zeros in test_timestamp_seconds_rounding_necessary| +|[#9978](https://github.com/NVIDIA/spark-rapids/pull/9978)|Avoid using floating point values as partition values in tests| +|[#9979](https://github.com/NVIDIA/spark-rapids/pull/9979)|Add compatibility notes for writing ORC with lost Gregorian days [skip ci]| +|[#9949](https://github.com/NVIDIA/spark-rapids/pull/9949)|Override the seed for `test_map_scalars_supported_key_types ` for version of Spark before 3.4.0 [Databricks]| +|[#9961](https://github.com/NVIDIA/spark-rapids/pull/9961)|Avoid using floating point for partition values in Delta Lake tests| +|[#9960](https://github.com/NVIDIA/spark-rapids/pull/9960)|Fix LongGen accidentally using special cases when none are desired| +|[#9950](https://github.com/NVIDIA/spark-rapids/pull/9950)|Avoid generating NaNs as partition values in test_part_write_round_trip| +|[#9940](https://github.com/NVIDIA/spark-rapids/pull/9940)|Fix 'year 0 is out of range' by setting a fix seed| +|[#9946](https://github.com/NVIDIA/spark-rapids/pull/9946)|Fix test_multi_tier_ast to ignore ordering of output rows| +|[#9928](https://github.com/NVIDIA/spark-rapids/pull/9928)|Test `inset` with `NaN` only for Spark from 3.1.3| +|[#9906](https://github.com/NVIDIA/spark-rapids/pull/9906)|Fix test_initcap to use the intended limited character set| +|[#9831](https://github.com/NVIDIA/spark-rapids/pull/9831)|Skip fastparquet timestamp tests when plugin cannot read/write timestamps| +|[#9893](https://github.com/NVIDIA/spark-rapids/pull/9893)|Add multiple expression tier regression test for AST| +|[#9889](https://github.com/NVIDIA/spark-rapids/pull/9889)|Fix test_cast_string_ts_valid_format test| +|[#9833](https://github.com/NVIDIA/spark-rapids/pull/9833)|Fix a hang for Pandas UDFs on DB 13.3| +|[#9873](https://github.com/NVIDIA/spark-rapids/pull/9873)|Add support for decimal in `to_json`| +|[#9890](https://github.com/NVIDIA/spark-rapids/pull/9890)|Remove Databricks 13.3 from release 23.12| +|[#9874](https://github.com/NVIDIA/spark-rapids/pull/9874)|Fix zero-scale floor and ceil tests| +|[#9879](https://github.com/NVIDIA/spark-rapids/pull/9879)|Fix resource leak in to_json| +|[#9600](https://github.com/NVIDIA/spark-rapids/pull/9600)|Add date and timestamp support to to_json| +|[#9871](https://github.com/NVIDIA/spark-rapids/pull/9871)|Fix test_cast_string_date_valid_format generating year 0| +|[#9885](https://github.com/NVIDIA/spark-rapids/pull/9885)|Preparation for non-UTC nightly CI [skip ci]| +|[#9810](https://github.com/NVIDIA/spark-rapids/pull/9810)|Support from_utc_timestamp on the GPU for non-UTC timezones (non-DST)| +|[#9865](https://github.com/NVIDIA/spark-rapids/pull/9865)|Fix problems with nulls in sequence tests| +|[#9864](https://github.com/NVIDIA/spark-rapids/pull/9864)|Add compatibility documentation with respect to decimal overflow detection [skip ci]| +|[#9860](https://github.com/NVIDIA/spark-rapids/pull/9860)|Fixing FAQ deadlink in plugin code [skip ci]| +|[#9840](https://github.com/NVIDIA/spark-rapids/pull/9840)|Avoid using NaNs as Delta Lake partition values| +|[#9773](https://github.com/NVIDIA/spark-rapids/pull/9773)|xfail all the impacted cases when using non-UTC time zone| +|[#9849](https://github.com/NVIDIA/spark-rapids/pull/9849)|Instantly Delete pre-merge content of stage workspace if success| +|[#9848](https://github.com/NVIDIA/spark-rapids/pull/9848)|Force datagen_seed for test_ceil_scale_zero and test_decimal_round| +|[#9677](https://github.com/NVIDIA/spark-rapids/pull/9677)|Enable build for Databricks 13.3| +|[#9809](https://github.com/NVIDIA/spark-rapids/pull/9809)|Re-enable AST string integration cases| +|[#9835](https://github.com/NVIDIA/spark-rapids/pull/9835)|Avoid pre-Gregorian dates in schema_evolution_test| +|[#9786](https://github.com/NVIDIA/spark-rapids/pull/9786)|Check paths for existence to prevent ignorable error messages during build| +|[#9824](https://github.com/NVIDIA/spark-rapids/pull/9824)|UCX 1.15 upgrade| +|[#9800](https://github.com/NVIDIA/spark-rapids/pull/9800)|Add GpuCheckOverflowInTableInsert to Databricks 11.3+| +|[#9821](https://github.com/NVIDIA/spark-rapids/pull/9821)|Update timestamp gens to avoid "year 0 is out of range" errors| +|[#9826](https://github.com/NVIDIA/spark-rapids/pull/9826)|Set seed to 0 for test_hash_reduction_sum| +|[#9720](https://github.com/NVIDIA/spark-rapids/pull/9720)|Support timestamp in `from_json`| +|[#9818](https://github.com/NVIDIA/spark-rapids/pull/9818)|Specify nullable=False when generating filter values in dpp tests| +|[#9689](https://github.com/NVIDIA/spark-rapids/pull/9689)|Support CPU path for from_utc_timestamp function with timezone | +|[#9769](https://github.com/NVIDIA/spark-rapids/pull/9769)|Use withGpuSparkSession to customize SparkConf| +|[#9780](https://github.com/NVIDIA/spark-rapids/pull/9780)|Fix NaN handling in GpuLessThanOrEqual and GpuGreaterThanOrEqual| +|[#9795](https://github.com/NVIDIA/spark-rapids/pull/9795)|xfail AST string tests| +|[#9666](https://github.com/NVIDIA/spark-rapids/pull/9666)|Add support for parsing strings as dates in `from_json`| +|[#9673](https://github.com/NVIDIA/spark-rapids/pull/9673)|Fix the broadcast joins issues caused by InputFileBlockRule| +|[#9785](https://github.com/NVIDIA/spark-rapids/pull/9785)|Force datagen_seed for 9781 and 9784 [skip ci]| +|[#9765](https://github.com/NVIDIA/spark-rapids/pull/9765)|Let GPU scans fall back when default values exist in schema| +|[#9729](https://github.com/NVIDIA/spark-rapids/pull/9729)|Fix Delta Lake atomic table operations on spark341db| +|[#9770](https://github.com/NVIDIA/spark-rapids/pull/9770)|[BUG] Fix the doc for Maven and Scala 2.13 test example [skip ci]| +|[#9761](https://github.com/NVIDIA/spark-rapids/pull/9761)|Fix bug in tagging of JsonToStructs| +|[#9758](https://github.com/NVIDIA/spark-rapids/pull/9758)|Remove forced seed from Delta Lake part_write_round_trip_unmanaged tests| +|[#9652](https://github.com/NVIDIA/spark-rapids/pull/9652)|Add time zone config to set non-UTC| +|[#9736](https://github.com/NVIDIA/spark-rapids/pull/9736)|Fix `TimestampGen` to generate value not too close to the minimum allowed timestamp| +|[#9698](https://github.com/NVIDIA/spark-rapids/pull/9698)|Speed up build: unnecessary invalidation in the incremental recompile mode| +|[#9748](https://github.com/NVIDIA/spark-rapids/pull/9748)|Fix Delta Lake part_write_round_trip_unmanaged tests with floating point| +|[#9702](https://github.com/NVIDIA/spark-rapids/pull/9702)|Support split BroadcastNestedLoopJoin condition for AST and non-AST| +|[#9746](https://github.com/NVIDIA/spark-rapids/pull/9746)|Force test_hypot to be single seed for now| +|[#9745](https://github.com/NVIDIA/spark-rapids/pull/9745)|Avoid generating null filter values in test_delta_dfp_reuse_broadcast_exchange| +|[#9741](https://github.com/NVIDIA/spark-rapids/pull/9741)|Set seed=0 for the delta lake part roundtrip tests| +|[#9660](https://github.com/NVIDIA/spark-rapids/pull/9660)|Fully support date/time legacy rebase for nested input| +|[#9672](https://github.com/NVIDIA/spark-rapids/pull/9672)|Support String type for AST| +|[#9716](https://github.com/NVIDIA/spark-rapids/pull/9716)|Initiate project version 24.02.0-SNAPSHOT| +|[#9732](https://github.com/NVIDIA/spark-rapids/pull/9732)|Temporarily force `datagen_seed=0` for `test_re_replace_all` to unblock CI| +|[#9726](https://github.com/NVIDIA/spark-rapids/pull/9726)|Fix leak in BatchWithPartitionData| +|[#9717](https://github.com/NVIDIA/spark-rapids/pull/9717)|Encode the file path from Iceberg when converting to a PartitionedFile| +|[#9441](https://github.com/NVIDIA/spark-rapids/pull/9441)|Add a random seed specific to datagen cases| +|[#9649](https://github.com/NVIDIA/spark-rapids/pull/9649)|Support `spark.sql.parquet.datetimeRebaseModeInRead=LEGACY` and `spark.sql.parquet.int96RebaseModeInRead=LEGACY`| +|[#9612](https://github.com/NVIDIA/spark-rapids/pull/9612)|Escape quotes and newlines when converting strings to json format in to_json| +|[#9644](https://github.com/NVIDIA/spark-rapids/pull/9644)|Add Partial Delta Lake Support for Databricks 13.3| +|[#9690](https://github.com/NVIDIA/spark-rapids/pull/9690)|Changed `extractExecutedPlan` to consider ResultQueryStageExec for Databricks 13.3| +|[#9686](https://github.com/NVIDIA/spark-rapids/pull/9686)|Removed Maven Profiles From `tests/pom.xml`| +|[#9509](https://github.com/NVIDIA/spark-rapids/pull/9509)|Fine-grained spill metrics| +|[#9658](https://github.com/NVIDIA/spark-rapids/pull/9658)|Support `spark.sql.parquet.int96RebaseModeInWrite=LEGACY`| +|[#9695](https://github.com/NVIDIA/spark-rapids/pull/9695)|Revert "Support split non-AST-able join condition for BroadcastNested…| +|[#9693](https://github.com/NVIDIA/spark-rapids/pull/9693)|Enable automerge from 23.12 to 24.02 [skip ci]| +|[#9679](https://github.com/NVIDIA/spark-rapids/pull/9679)|[Doc] update the dead link in download page [skip ci]| +|[#9678](https://github.com/NVIDIA/spark-rapids/pull/9678)|Add flow control for multithreaded shuffle writer| +|[#9635](https://github.com/NVIDIA/spark-rapids/pull/9635)|Support split non-AST-able join condition for BroadcastNestedLoopJoin| +|[#9646](https://github.com/NVIDIA/spark-rapids/pull/9646)|Fix Integration Test Failures for Databricks 13.3 Support| +|[#9670](https://github.com/NVIDIA/spark-rapids/pull/9670)|Normalize file timezone and handle missing file timezone in datetimeRebaseUtils| +|[#9657](https://github.com/NVIDIA/spark-rapids/pull/9657)|Update verify check to handle new pom files [skip ci]| +|[#9663](https://github.com/NVIDIA/spark-rapids/pull/9663)|Making User Guide info in bold and adding it as top right link in github.io [skip ci]| +|[#9609](https://github.com/NVIDIA/spark-rapids/pull/9609)|Add valid retry solution to mvn-verify [skip ci]| +|[#9655](https://github.com/NVIDIA/spark-rapids/pull/9655)|Document problem with handling of invalid characters in CSV reader| +|[#9620](https://github.com/NVIDIA/spark-rapids/pull/9620)|Add support for parsing boolean values in `from_json`| +|[#9615](https://github.com/NVIDIA/spark-rapids/pull/9615)|Bloop updates - require JDK11 in buildall + docs, build bloop for all targets.| +|[#9631](https://github.com/NVIDIA/spark-rapids/pull/9631)|Refactor Parquet readers| +|[#9637](https://github.com/NVIDIA/spark-rapids/pull/9637)|Added Support For Various Execs for Databricks 13.3 | +|[#9640](https://github.com/NVIDIA/spark-rapids/pull/9640)|Add support for `ignoreNullFields=false` in `to_json`| +|[#9623](https://github.com/NVIDIA/spark-rapids/pull/9623)|Running window optimization for `LAST()`| +|[#9641](https://github.com/NVIDIA/spark-rapids/pull/9641)|Revert "Support rebase checking for nested dates and timestamps (#9617)"| +|[#9423](https://github.com/NVIDIA/spark-rapids/pull/9423)|Re-enable `from_json` / `JsonToStructs`| +|[#9624](https://github.com/NVIDIA/spark-rapids/pull/9624)|Add jenkins-level retry for pre-merge build in databricks runtimes| +|[#9608](https://github.com/NVIDIA/spark-rapids/pull/9608)|Fix nullability issues for some decimal operations| +|[#9617](https://github.com/NVIDIA/spark-rapids/pull/9617)|Support rebase checking for nested dates and timestamps| +|[#9611](https://github.com/NVIDIA/spark-rapids/pull/9611)|Move simple classes after refactoring to sql-plugin-api| +|[#9618](https://github.com/NVIDIA/spark-rapids/pull/9618)|Remove unused dataTypes argument from HostShuffleCoalesceIterator| +|[#9626](https://github.com/NVIDIA/spark-rapids/pull/9626)|Fix ENV typo in pre-merge github actions [skip ci]| +|[#9593](https://github.com/NVIDIA/spark-rapids/pull/9593)|PythonRunner and RapidsErrorUtils Changes For Databricks 13.3| +|[#9607](https://github.com/NVIDIA/spark-rapids/pull/9607)|Integration tests: Install specific fastparquet version.| +|[#9610](https://github.com/NVIDIA/spark-rapids/pull/9610)|Propagate local properties to broadcast execs| +|[#9544](https://github.com/NVIDIA/spark-rapids/pull/9544)|Support batching for `RANGE` running window aggregations. Including on| +|[#9601](https://github.com/NVIDIA/spark-rapids/pull/9601)|Remove usage of deprecated scala.Proxy| +|[#9591](https://github.com/NVIDIA/spark-rapids/pull/9591)|Enable implicit JDK profile activation| +|[#9586](https://github.com/NVIDIA/spark-rapids/pull/9586)|Merge metrics and file format fixes to Delta 2.4 support| +|[#9594](https://github.com/NVIDIA/spark-rapids/pull/9594)|Revert "Ignore failing Parquet filter test to unblock CI (#9519)"| +|[#9454](https://github.com/NVIDIA/spark-rapids/pull/9454)|Support encryption and compression in disk store| +|[#9439](https://github.com/NVIDIA/spark-rapids/pull/9439)|Support stack function| +|[#9583](https://github.com/NVIDIA/spark-rapids/pull/9583)|Fix fastparquet tests to work with HDFS| +|[#9508](https://github.com/NVIDIA/spark-rapids/pull/9508)|Consolidate deps switching in an intermediate pom| +|[#9562](https://github.com/NVIDIA/spark-rapids/pull/9562)|Delta Lake 2.3.0 support| +|[#9576](https://github.com/NVIDIA/spark-rapids/pull/9576)|Move Stack classes to wrapper classes to fix non-deterministic build issue| +|[#9572](https://github.com/NVIDIA/spark-rapids/pull/9572)|Add retry for CrossJoinIterator and ConditionalNestedLoopJoinIterator| +|[#9575](https://github.com/NVIDIA/spark-rapids/pull/9575)|Fix `test_window_running*()` for `NTH_VALUE IGNORE NULLS`.| +|[#9574](https://github.com/NVIDIA/spark-rapids/pull/9574)|Fix broken #endif scala comments [skip ci]| +|[#9568](https://github.com/NVIDIA/spark-rapids/pull/9568)|Enforce Apache 3.3.0+ for Scala 2.13| +|[#9557](https://github.com/NVIDIA/spark-rapids/pull/9557)|Support launching Map Pandas UDF on empty partitions| +|[#9489](https://github.com/NVIDIA/spark-rapids/pull/9489)|Batching support for ROW-based `FIRST()` window function| +|[#9510](https://github.com/NVIDIA/spark-rapids/pull/9510)|Add Databricks 13.3 shim boilerplate code and refactor Databricks 12.2 shim| +|[#9554](https://github.com/NVIDIA/spark-rapids/pull/9554)|Fix fastparquet installation for| +|[#9536](https://github.com/NVIDIA/spark-rapids/pull/9536)|Add CPU POC of TimeZoneDB; Test some time zones by comparing CPU POC and Spark| +|[#9558](https://github.com/NVIDIA/spark-rapids/pull/9558)|Support integration test against scala2.13 spark binaries[skip ci]| +|[#8592](https://github.com/NVIDIA/spark-rapids/pull/8592)|Scala 2.13 Support| +|[#9551](https://github.com/NVIDIA/spark-rapids/pull/9551)|Enable malformed Parquet failure test| +|[#9546](https://github.com/NVIDIA/spark-rapids/pull/9546)|Support OverwriteByExpressionExecV1 for Delta Lake tables| +|[#9527](https://github.com/NVIDIA/spark-rapids/pull/9527)|Support Split And Retry for GpuProjectAstExec| +|[#9541](https://github.com/NVIDIA/spark-rapids/pull/9541)|Move simple classes to API| +|[#9548](https://github.com/NVIDIA/spark-rapids/pull/9548)|Append new authorized user to blossom-ci whitelist [skip ci]| +|[#9418](https://github.com/NVIDIA/spark-rapids/pull/9418)|Fix STRUCT comparison between Pandas and Spark dataframes in fastparquet tests| +|[#9468](https://github.com/NVIDIA/spark-rapids/pull/9468)|Add SplitAndRetry to GpuRunningWindowIterator| +|[#9486](https://github.com/NVIDIA/spark-rapids/pull/9486)|Add partial support for `to_json`| +|[#9538](https://github.com/NVIDIA/spark-rapids/pull/9538)|Fix tiered project breaking higher order functions| +|[#9539](https://github.com/NVIDIA/spark-rapids/pull/9539)|Add delta-24x to delta-lake/README.md [skip ci]| +|[#9534](https://github.com/NVIDIA/spark-rapids/pull/9534)|Add pyarrow tests for Databricks runtime| +|[#9444](https://github.com/NVIDIA/spark-rapids/pull/9444)|Remove redundant pass-through shuffle manager classes| +|[#9531](https://github.com/NVIDIA/spark-rapids/pull/9531)|Fix relative path for spark-shell nightly test [skip ci]| +|[#9525](https://github.com/NVIDIA/spark-rapids/pull/9525)|Follow-up to dbdeps consolidation| +|[#9506](https://github.com/NVIDIA/spark-rapids/pull/9506)|Move ProxyShuffleInternalManagerBase to api| +|[#9504](https://github.com/NVIDIA/spark-rapids/pull/9504)|Add a spark-shell smoke test to premerge and nightly| +|[#9519](https://github.com/NVIDIA/spark-rapids/pull/9519)|Ignore failing Parquet filter test to unblock CI| +|[#9478](https://github.com/NVIDIA/spark-rapids/pull/9478)|Support AppendDataExecV1 for Delta Lake tables| +|[#9366](https://github.com/NVIDIA/spark-rapids/pull/9366)|Add tests to check compatibility with `fastparquet`| +|[#9419](https://github.com/NVIDIA/spark-rapids/pull/9419)|Add retry to RoundRobin Partitioner and Range Partitioner| +|[#9502](https://github.com/NVIDIA/spark-rapids/pull/9502)|Install Dependencies Needed For Databricks 13.3| +|[#9296](https://github.com/NVIDIA/spark-rapids/pull/9296)|Implement `percentile` aggregation| +|[#9488](https://github.com/NVIDIA/spark-rapids/pull/9488)|Add Shim JSON Headers for Databricks 13.3| +|[#9443](https://github.com/NVIDIA/spark-rapids/pull/9443)|Add AtomicReplaceTableAsSelectExec support for Delta Lake| +|[#9476](https://github.com/NVIDIA/spark-rapids/pull/9476)|Refactor common Delta Lake test code| +|[#9463](https://github.com/NVIDIA/spark-rapids/pull/9463)|Fix Cloudera 3.3.2 shim for handling CheckOverflowInTableInsert and orc zstd support| +|[#9460](https://github.com/NVIDIA/spark-rapids/pull/9460)|Update links in old release notes to new doc locations [skip ci]| +|[#9405](https://github.com/NVIDIA/spark-rapids/pull/9405)|Wrap scalar generation into spark session in integration test| +|[#9459](https://github.com/NVIDIA/spark-rapids/pull/9459)|Fix 332cdh build [skip ci]| +|[#9425](https://github.com/NVIDIA/spark-rapids/pull/9425)|Add support for AtomicCreateTableAsSelect with Delta Lake| +|[#9434](https://github.com/NVIDIA/spark-rapids/pull/9434)|Add retry support to `HostToGpuCoalesceIterator.concatAllAndPutOnGPU`| +|[#9453](https://github.com/NVIDIA/spark-rapids/pull/9453)|Update codeowner and blossom-ci ACL [skip ci]| +|[#9396](https://github.com/NVIDIA/spark-rapids/pull/9396)|Add support for Cloudera CDS-3.3.2| +|[#9380](https://github.com/NVIDIA/spark-rapids/pull/9380)|Fix parsing of Parquet legacy list-of-struct format| +|[#9438](https://github.com/NVIDIA/spark-rapids/pull/9438)|Fix auto merge conflict 9437 [skip ci]| +|[#9424](https://github.com/NVIDIA/spark-rapids/pull/9424)|Refactor aggregate functions| +|[#9414](https://github.com/NVIDIA/spark-rapids/pull/9414)|Add retry to GpuHashJoin.filterNulls| +|[#9388](https://github.com/NVIDIA/spark-rapids/pull/9388)|Add developer documentation about working with data sources [skip ci]| +|[#9369](https://github.com/NVIDIA/spark-rapids/pull/9369)|Improve JSON empty row fix to use less memory| +|[#9373](https://github.com/NVIDIA/spark-rapids/pull/9373)|Fix auto merge conflict 9372| +|[#9308](https://github.com/NVIDIA/spark-rapids/pull/9308)|Initiate arm64 CI support [skip ci]| +|[#9292](https://github.com/NVIDIA/spark-rapids/pull/9292)|Init project version 23.12.0-SNAPSHOT| + ## Release 23.10 diff --git a/docs/compatibility.md b/docs/compatibility.md index b482ac70ffb..574465b4496 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -368,10 +368,8 @@ In versions of Spark before 3.5.0 there is no maximum to how deeply nested JSON no matter what version of Spark is used. If the nesting level is over this the JSON is considered invalid and all values will be returned as nulls. -Only structs are supported for nested types. There are also some issues with arrays of structs. If -your data includes this, even if you are not reading it, you might get an exception. You can -try to set `spark.rapids.sql.json.read.mixedTypesAsString.enabled` to true to work around this, -but it also has some issues with it. +Mixed types can have some problems. If an item being read could have some lines that are arrays +and others that are structs/dictionaries it is possible an error will be thrown. Dates and Timestamps have some issues and may return values for technically invalid inputs. @@ -439,31 +437,8 @@ Known issues are: ### get_json_object -The `GetJsonObject` operator takes a JSON formatted string and a JSON path string as input. The -code base for this is currently separate from GPU parsing of JSON for files and `FromJsonObject`. -Because of this the results can be different from each other. Because of several incompatibilities -and bugs in the GPU version of `GetJsonObject` it will be on the CPU by default. If you are -aware of the current limitations with the GPU version, you might see a significant performance -speedup if you enable it by setting `spark.rapids.sql.expression.GetJsonObject` to `true`. - -The following is a list of known differences. - * [No input validation](https://github.com/NVIDIA/spark-rapids/issues/10218). If the input string - is not valid JSON Apache Spark returns a null result, but ours will still try to find a match. - * [Escapes are not properly processed for Strings](https://github.com/NVIDIA/spark-rapids/issues/10196). - When returning a result for a quoted string Apache Spark will remove the quotes and replace - any escape sequences with the proper characters. The escape sequence processing does not happen - on the GPU. - * [Invalid JSON paths could throw exceptions](https://github.com/NVIDIA/spark-rapids/issues/10212) - If a JSON path is not valid Apache Spark returns a null result, but ours may throw an exception - and fail the query. - * [Non-string output is not normalized](https://github.com/NVIDIA/spark-rapids/issues/10218) - When returning a result for things other than strings, a number of things are normalized by - Apache Spark, but are not normalized by the GPU, like removing unnecessary white space, - parsing and then serializing floating point numbers, turning single quotes to double quotes, - and removing unneeded escapes for single quotes. - -The following is a list of bugs in either the GPU version or arguably in Apache Spark itself. - * https://github.com/NVIDIA/spark-rapids/issues/10219 non-matching quotes in quoted strings +Known issue: +- [Floating-point number normalization error](https://github.com/NVIDIA/spark-rapids-jni/issues/1922). `get_json_object` floating-point number normalization on the GPU could sometimes return incorrect results if the string contains high-precision values, see the String to Float and Float to String section for more details. ## Avro diff --git a/docs/configs.md b/docs/configs.md index 8759e8268e3..57517bc2b32 100644 --- a/docs/configs.md +++ b/docs/configs.md @@ -10,7 +10,7 @@ The following is the list of options that `rapids-plugin-4-spark` supports. On startup use: `--conf [conf key]=[conf value]`. For example: ``` -${SPARK_HOME}/bin/spark-shell --jars rapids-4-spark_2.12-24.04.0-SNAPSHOT-cuda11.jar \ +${SPARK_HOME}/bin/spark-shell --jars rapids-4-spark_2.12-24.06.0-SNAPSHOT-cuda11.jar \ --conf spark.plugins=com.nvidia.spark.SQLPlugin \ --conf spark.rapids.sql.concurrentGpuTasks=2 ``` diff --git a/docs/dev/get-json-object-dump-tool.md b/docs/dev/get-json-object-dump-tool.md new file mode 100644 index 00000000000..6cbf5ad7c9f --- /dev/null +++ b/docs/dev/get-json-object-dump-tool.md @@ -0,0 +1,99 @@ +--- +layout: page +title: Dump tool for get_json_object +nav_order: 12 +parent: Developer Overview +--- + +# Dump tool for get_json_object + +## Overview +In order to help debug the issues with the `get_json_object` function, the RAPIDS Accelerator provides a +dump tool to save debug information to try and reproduce the issues. Note, the dumped data will be masked +to protect the customer data. + +## How to enable +This assumes that the RAPIDs Accelerator has already been enabled. + +The `get_json_object` expression may be off by default so enable it first +``` +'spark.rapids.sql.expression.GetJsonObject': 'true' +``` + +To enable debugging just set the path to dump the data to. Note that this +path is interpreted using the Hadoop FileSystem APIs. This means that +a path with no schema will go to the default file system. + +``` +'spark.rapids.sql.expression.GetJsonObject.debugPath': '/tmp/DEBUG_JSON_DUMP/' +``` + +This path should be a directory or someplace that we can create a directory to +store files in. Multiple files may be written out. Note that each instance of +`get_json_object` will mask the data in different ways, but the same +instance should mask the data in the same way. + +You may also set the max number of rows for each file/batch. Each time a new +batch of data comes into the `get_json_object` expression a new file is written +and this controls the maximum number of rows that may be written out. +``` +'spark.rapids.sql.test.get_json_object.saveRows': '1024' +``` +This config can be skipped, because default value works. + +## Masking +Please note that this cannot currently be disabled. +This tool should not dump the original input data. +The goal is to find out what types of issues are showing up, and ideally +give the RAPIDS team enough information to reproduce it. + +Digits `[0-9]` will be remapped to `[0-9]`, the mapping is chosen +randomly for each instance of the expression. This is done to preserve +the format of the numbers, even if they are not 100% the same. + +The characters `[a-zA-Z]` are also randomly remapped to `[a-zA-Z]` similar +to the numbers. But many of these are preserved because they are part of +special cases. + +The letters that are preserved are `a, b, c, d, e, f, l, n, r, s, t, u, A, B, C, D, E, F` + +These are preserved because they could be used as a part of + * special keywords like `true`, `false`, or `null` + * number formatting like `1.0E-3` + * escape characters defined in the JSON standard `\b\f\n\r\t\u` + * or hexadecimal numbers that are a part of the `\u` escape sequence + +All other characters are mapped to the letter `s` unless they are one of the following. + + * ASCII `[0 to 31]` are considered to be control characters in the JSON spec and in some cases are not allowed + * `-` for negative numbers + * `{ } [ ] , : " '` are part of the structure of JSON, or at least are considered that way + * `\` for escape sequences + * `$ [ ] . * '` which are part of JSON paths + * `?` which Spark has as a special case for JSON path, but no one else does. + +## Stored Data +The dumped data is stored in a CSV file, that should be compatible with Spark, +and most other CSV readers. CSV is a format that is not great at storing complex +data, like JSON in it, so there are likely to be some small compatibility issues. +The following shows you how to read the stored data using Spark with Scala. + +Spark wants the data to be stored with no line separators, but JSON can have this. +So we replace `\r` and `\n` with a character sequences that is not likely to show up +in practice. JSON data can also conflict with CSV escape handling, especially if the +input data is not valid JSON. As such we also replace double quotes and commas just in +case. + +```scala +// Replace this with the actual path to read from +val readPath = "/data/tmp/DEBUG_JSON_DUMP" + +val df = spark.read. + schema("isLegacy boolean, path string, originalInput string, cpuOutput string, gpuOutput string"). + csv(readPath) + +val strUnescape = Seq("isLegacy") ++ Seq("path", "originalInput", "cpuOutput", "gpuOutput"). + map(c => s"""replace(replace(replace(replace($c, '**CR**', '\r'), '**LF**', '\n'), '**QT**', '"'), '**COMMA**', ',') as $c""") + +val data = df.selectExpr(strUnescape : _*) +``` \ No newline at end of file diff --git a/docs/dev/shims.md b/docs/dev/shims.md index 0315e5bd963..9a8e09d8295 100644 --- a/docs/dev/shims.md +++ b/docs/dev/shims.md @@ -68,17 +68,17 @@ Using JarURLConnection URLs we create a Parallel World of the current version wi Spark 3.0.2's URLs: ```text -jar:file:/home/spark/rapids-4-spark_2.12-24.04.0.jar!/ -jar:file:/home/spark/rapids-4-spark_2.12-24.04.0.jar!/spark3xx-common/ -jar:file:/home/spark/rapids-4-spark_2.12-24.04.0.jar!/spark302/ +jar:file:/home/spark/rapids-4-spark_2.12-24.06.0.jar!/ +jar:file:/home/spark/rapids-4-spark_2.12-24.06.0.jar!/spark3xx-common/ +jar:file:/home/spark/rapids-4-spark_2.12-24.06.0.jar!/spark302/ ``` Spark 3.2.0's URLs : ```text -jar:file:/home/spark/rapids-4-spark_2.12-24.04.0.jar!/ -jar:file:/home/spark/rapids-4-spark_2.12-24.04.0.jar!/spark3xx-common/ -jar:file:/home/spark/rapids-4-spark_2.12-24.04.0.jar!/spark320/ +jar:file:/home/spark/rapids-4-spark_2.12-24.06.0.jar!/ +jar:file:/home/spark/rapids-4-spark_2.12-24.06.0.jar!/spark3xx-common/ +jar:file:/home/spark/rapids-4-spark_2.12-24.06.0.jar!/spark320/ ``` ### Late Inheritance in Public Classes diff --git a/docs/dev/testing.md b/docs/dev/testing.md index ed68f08392d..6a6c6a378eb 100644 --- a/docs/dev/testing.md +++ b/docs/dev/testing.md @@ -5,5 +5,5 @@ nav_order: 2 parent: Developer Overview --- An overview of testing can be found within the repository at: -* [Unit tests](https://github.com/NVIDIA/spark-rapids/tree/branch-24.04/tests#readme) -* [Integration testing](https://github.com/NVIDIA/spark-rapids/tree/branch-24.04/integration_tests#readme) +* [Unit tests](https://github.com/NVIDIA/spark-rapids/tree/branch-24.06/tests#readme) +* [Integration testing](https://github.com/NVIDIA/spark-rapids/tree/branch-24.06/integration_tests#readme) diff --git a/docs/download.md b/docs/download.md index 7a0b5b93adf..879787c2cca 100644 --- a/docs/download.md +++ b/docs/download.md @@ -18,7 +18,7 @@ cuDF jar, that is either preinstalled in the Spark classpath on all nodes or sub that uses the RAPIDS Accelerator For Apache Spark. See the [getting-started guide](https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html) for more details. -## Release v24.02.0 +## Release v24.04.0 ### Hardware Requirements: The plugin is tested on the following architectures: @@ -40,17 +40,16 @@ The plugin is tested on the following architectures: Supported Spark versions: Apache Spark 3.2.0, 3.2.1, 3.2.2, 3.2.3, 3.2.4 - Apache Spark 3.3.0, 3.3.1, 3.3.2, 3.3.3 - Apache Spark 3.4.0, 3.4.1 - Apache Spark 3.5.0 + Apache Spark 3.3.0, 3.3.1, 3.3.2, 3.3.3, 3.3.4 + Apache Spark 3.4.0, 3.4.1, 3.4.2 + Apache Spark 3.5.0, 3.5.1 Supported Databricks runtime versions for Azure and AWS: - Databricks 10.4 ML LTS (GPU, Scala 2.12, Spark 3.2.1) Databricks 11.3 ML LTS (GPU, Scala 2.12, Spark 3.3.0) Databricks 12.2 ML LTS (GPU, Scala 2.12, Spark 3.3.2) Databricks 13.3 ML LTS (GPU, Scala 2.12, Spark 3.4.1) - Supported Dataproc versions: + Supported Dataproc versions (Debian/Ubuntu): GCP Dataproc 2.0 GCP Dataproc 2.1 @@ -68,14 +67,14 @@ for your hardware's minimum driver version. ### RAPIDS Accelerator's Support Policy for Apache Spark The RAPIDS Accelerator maintains support for Apache Spark versions available for download from [Apache Spark](https://spark.apache.org/downloads.html) -### Download RAPIDS Accelerator for Apache Spark v24.02.0 +### Download RAPIDS Accelerator for Apache Spark v24.04.0 | Processor | Scala Version | Download Jar | Download Signature | |-----------|---------------|--------------|--------------------| -| x86_64 | Scala 2.12 | [RAPIDS Accelerator v24.02.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.02.0/rapids-4-spark_2.12-24.02.0.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.02.0/rapids-4-spark_2.12-24.02.0.jar.asc) | -| x86_64 | Scala 2.13 | [RAPIDS Accelerator v24.02.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.02.0/rapids-4-spark_2.13-24.02.0.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.02.0/rapids-4-spark_2.13-24.02.0.jar.asc) | -| arm64 | Scala 2.12 | [RAPIDS Accelerator v24.02.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.02.0/rapids-4-spark_2.12-24.02.0-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.02.0/rapids-4-spark_2.12-24.02.0-cuda11-arm64.jar.asc) | -| arm64 | Scala 2.13 | [RAPIDS Accelerator v24.02.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.02.0/rapids-4-spark_2.13-24.02.0-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.02.0/rapids-4-spark_2.13-24.02.0-cuda11-arm64.jar.asc) | +| x86_64 | Scala 2.12 | [RAPIDS Accelerator v24.04.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.0/rapids-4-spark_2.12-24.04.0.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.0/rapids-4-spark_2.12-24.04.0.jar.asc) | +| x86_64 | Scala 2.13 | [RAPIDS Accelerator v24.04.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.0/rapids-4-spark_2.13-24.04.0.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.0/rapids-4-spark_2.13-24.04.0.jar.asc) | +| arm64 | Scala 2.12 | [RAPIDS Accelerator v24.04.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.0/rapids-4-spark_2.12-24.04.0-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.0/rapids-4-spark_2.12-24.04.0-cuda11-arm64.jar.asc) | +| arm64 | Scala 2.13 | [RAPIDS Accelerator v24.04.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.0/rapids-4-spark_2.13-24.04.0-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.0/rapids-4-spark_2.13-24.04.0-cuda11-arm64.jar.asc) | This package is built against CUDA 11.8. It is tested on V100, T4, A10, A100, L4 and H100 GPUs with CUDA 11.8 through CUDA 12.0. @@ -84,26 +83,23 @@ CUDA 11.8 through CUDA 12.0. * Download the [PUB_KEY](https://keys.openpgp.org/search?q=sw-spark@nvidia.com). * Import the public key: `gpg --import PUB_KEY` * Verify the signature for Scala 2.12 jar: - `gpg --verify rapids-4-spark_2.12-24.02.0.jar.asc rapids-4-spark_2.12-24.02.0.jar` + `gpg --verify rapids-4-spark_2.12-24.04.0.jar.asc rapids-4-spark_2.12-24.04.0.jar` * Verify the signature for Scala 2.13 jar: - `gpg --verify rapids-4-spark_2.13-24.02.0.jar.asc rapids-4-spark_2.13-24.02.0.jar` + `gpg --verify rapids-4-spark_2.13-24.04.0.jar.asc rapids-4-spark_2.13-24.04.0.jar` The output of signature verify: gpg: Good signature from "NVIDIA Spark (For the signature of spark-rapids release jars) " ### Release Notes -New functionality and performance improvements for this release include: -* Discontinued support for Nvidia GPUs based on Pascal architecture. -* Set get_json_object functionality to disabled by default. -* Implemented string comparison in AST expressions. -* Expanded timezone support to include options beyond UTC. -* Optional checksums for cached files in the file cache. -* Introduced support for Databricks 13.3 ML LTS. -* Added support for parse_url functionality. -* Introducing Lazy Quantifier support for regular expression functions. -* Added support for the format_number function. -* Enhanced batching support for row-based bounded window functions. +* New functionality and performance improvements for this release include: +* Performance improvements for S3 reading. +Refer to perfio.s3.enabled in [advanced_configs](./additional-functionality/advanced_configs.md) for more details. +* Performance improvements when doing a joins on unique keys. +* Enhanced decompression kernels for zstd and snappy. +* Enhanced Parquet reading performance with modular kernels. +* Added compatibility with Spark version 3.5.1. +* Deprecated support for Databricks 10.4 ML LTS. * For updates on RAPIDS Accelerator Tools, please visit [this link](https://github.com/NVIDIA/spark-rapids-tools/releases). For a detailed list of changes, please refer to the diff --git a/docs/supported_ops.md b/docs/supported_ops.md index 65873a40a5a..abba4a34d0c 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -2288,6 +2288,74 @@ are limited. +ArrayFilter +`filter` +Filter an input array using a given predicate +None +project +argument + + + + + + + + + + + + + + +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, UDT
+ + + + + +function +S + + + + + + + + + + + + + + + + + + + +result + + + + + + + + + + + + + + +PS
UTC is only supported TZ for child TIMESTAMP;
unsupported child types BINARY, CALENDAR, UDT
+ + + + + ArrayIntersect `array_intersect` Returns an array of the elements in the intersection of array1 and array2, without duplicates @@ -2518,6 +2586,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + ArrayRepeat `array_repeat` Returns the array containing the given input value (left) count (right) times @@ -2586,32 +2680,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - ArrayTransform `transform` Transform elements in an array using the transform function. This is similar to a `map` in functional programming @@ -2910,6 +2978,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + Asin `asin` Inverse sine @@ -3000,32 +3094,6 @@ are limited. -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - Asinh `asinh` Inverse hyperbolic sine @@ -3343,6 +3411,32 @@ are limited. +Expression +SQL Functions(s) +Description +Notes +Context +Param/Output +BOOLEAN +BYTE +SHORT +INT +LONG +FLOAT +DOUBLE +DATE +TIMESTAMP +STRING +DECIMAL +NULL +BINARY +CALENDAR +ARRAY +MAP +STRUCT +UDT + + AttributeReference References an input column @@ -3391,32 +3485,6 @@ are limited. NS -Expression -SQL Functions(s) -Description -Notes -Context -Param/Output -BOOLEAN -BYTE -SHORT -INT -LONG -FLOAT -DOUBLE -DATE -TIMESTAMP -STRING -DECIMAL -NULL -BINARY -CALENDAR -ARRAY -MAP -STRUCT -UDT - - BRound `bround` Round an expression to d decimal places using HALF_EVEN rounding mode @@ -6856,7 +6924,7 @@ are limited. GetJsonObject `get_json_object` Extracts a json object from path -This is disabled by default because escape sequences are not processed correctly, the input is not validated, and the output is not normalized the same as Spark +This is disabled by default because Experimental feature that could be unstable or have performance issues. project json @@ -8222,7 +8290,7 @@ are limited. JsonTuple `json_tuple` Returns a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string. -This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports. +This is disabled by default because Experimental feature that could be unstable or have performance issues. project json @@ -10817,7 +10885,7 @@ are limited. -PS
only support partToExtract = PROTOCOL | HOST | QUERY;
Literal value only
+PS
only support partToExtract = PROTOCOL | HOST | QUERY | PATH;
Literal value only
diff --git a/integration_tests/README.md b/integration_tests/README.md index 7237720a114..9493fbb07d1 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -263,7 +263,7 @@ individually, so you don't risk running unit tests along with the integration te http://www.scalatest.org/user_guide/using_the_scalatest_shell ```shell -spark-shell --jars rapids-4-spark-tests_2.12-24.04.0-SNAPSHOT-tests.jar,rapids-4-spark-integration-tests_2.12-24.04.0-SNAPSHOT-tests.jar,scalatest_2.12-3.0.5.jar,scalactic_2.12-3.0.5.jar +spark-shell --jars rapids-4-spark-tests_2.12-24.06.0-SNAPSHOT-tests.jar,rapids-4-spark-integration-tests_2.12-24.06.0-SNAPSHOT-tests.jar,scalatest_2.12-3.0.5.jar,scalactic_2.12-3.0.5.jar ``` First you import the `scalatest_shell` and tell the tests where they can find the test files you @@ -286,7 +286,7 @@ If you just want to verify the SQL replacement is working you will need to add t assumes CUDA 11.0 is being used and the Spark distribution is built with Scala 2.12. ``` -$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-24.04.0-SNAPSHOT-cuda11.jar" ./runtests.py +$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-24.06.0-SNAPSHOT-cuda11.jar" ./runtests.py ``` You don't have to enable the plugin for this to work, the test framework will do that for you. @@ -409,6 +409,19 @@ the SHS supported values for the config key With `zstd` it's easy to view / decompress event logs using the CLI `zstd -d [--stdout] ` even without the SHS webUI. +### Worker Logs + +NOTE: Available only in local mode i.e. master URL = local[K, F] + +By default, when using xdist the integration tests will write the tests output to console and to a text file +that will appear under the run directory of the form +`integration_tests/target/run_dir--xxxx/WORKERID_worker_logs.log`. The output format of the log and the log level +can be changed by modifying the file `integration_tests/src/test/resources/pytest_log4j.properties`. + +If xdist is not used (e.g., `TEST_PARALLEL=1`) +the worker log will be `integration_tests/target/run_dir--xxxx/gw0_worker_logs.log` as if executed by +worker 0 under xdist. + ### Enabling cudf_udf Tests The cudf_udf tests in this framework are testing Pandas UDF(user-defined function) with cuDF. They are disabled by default not only because of the complicated environment setup, but also because GPU resources scheduling for Pandas UDF is an experimental feature now, the performance may not always be better. @@ -430,7 +443,7 @@ To run cudf_udf tests, need following configuration changes: As an example, here is the `spark-submit` command with the cudf_udf parameter on CUDA 11.0: ``` -$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-24.04.0-SNAPSHOT-cuda11.jar,rapids-4-spark-tests_2.12-24.04.0-SNAPSHOT.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-24.04.0-SNAPSHOT-cuda11.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-24.04.0-SNAPSHOT-cuda11.jar" ./runtests.py --cudf_udf +$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-24.06.0-SNAPSHOT-cuda11.jar,rapids-4-spark-tests_2.12-24.06.0-SNAPSHOT.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-24.06.0-SNAPSHOT-cuda11.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-24.06.0-SNAPSHOT-cuda11.jar" ./runtests.py --cudf_udf ``` ### Enabling fuzz tests diff --git a/integration_tests/ScaleTest.md b/integration_tests/ScaleTest.md index 61031f3c05e..cd99b7cc7e9 100644 --- a/integration_tests/ScaleTest.md +++ b/integration_tests/ScaleTest.md @@ -97,7 +97,7 @@ $SPARK_HOME/bin/spark-submit \ --conf spark.sql.parquet.datetimeRebaseModeInWrite=CORRECTED \ --jars $SPARK_HOME/examples/jars/scopt_2.12-3.7.1.jar \ --class com.nvidia.spark.rapids.tests.scaletest.ScaleTest \ -./target/rapids-4-spark-integration-tests_2.12-24.04.0-SNAPSHOT-spark332.jar \ +./target/rapids-4-spark-integration-tests_2.12-24.06.0-SNAPSHOT-spark332.jar \ 10 \ 100 \ parquet \ diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml index aa34a5a8ccd..f413d24b70a 100644 --- a/integration_tests/pom.xml +++ b/integration_tests/pom.xml @@ -22,11 +22,11 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-integration-tests_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT integration_tests diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh index 713c06c31e3..dec93e6f22a 100755 --- a/integration_tests/run_pyspark_from_build.sh +++ b/integration_tests/run_pyspark_from_build.sh @@ -205,10 +205,11 @@ else fi REPORT_CHARS=${REPORT_CHARS:="fE"} # default as (f)ailed, (E)rror + STD_INPUT_PATH="$INPUT_PATH"/src/test/resources TEST_COMMON_OPTS=(-v -r"$REPORT_CHARS" "$TEST_TAGS" - --std_input_path="$INPUT_PATH"/src/test/resources + --std_input_path="$STD_INPUT_PATH" --color=yes $TEST_TYPE_PARAM "$TEST_ARGS" @@ -241,7 +242,8 @@ else # Set the Delta log cache size to prevent the driver from caching every Delta log indefinitely export PYSP_TEST_spark_databricks_delta_delta_log_cacheSize=${PYSP_TEST_spark_databricks_delta_delta_log_cacheSize:-10} deltaCacheSize=$PYSP_TEST_spark_databricks_delta_delta_log_cacheSize - export PYSP_TEST_spark_driver_extraJavaOptions="-ea -Duser.timezone=$TZ -Ddelta.log.cacheSize=$deltaCacheSize $COVERAGE_SUBMIT_FLAGS" + DRIVER_EXTRA_JAVA_OPTIONS="-ea -Duser.timezone=$TZ -Ddelta.log.cacheSize=$deltaCacheSize" + export PYSP_TEST_spark_driver_extraJavaOptions="$DRIVER_EXTRA_JAVA_OPTIONS $COVERAGE_SUBMIT_FLAGS" export PYSP_TEST_spark_executor_extraJavaOptions="-ea -Duser.timezone=$TZ" export PYSP_TEST_spark_ui_showConsoleProgress='false' export PYSP_TEST_spark_sql_session_timeZone=$TZ @@ -313,7 +315,11 @@ EOF export PYSP_TEST_spark_master="local[$LOCAL_PARALLEL,$SPARK_TASK_MAXFAILURES]" fi fi - + if [[ "$SPARK_SUBMIT_FLAGS" == *"--master local"* || "$PYSP_TEST_spark_master" == "local"* ]]; then + # The only case where we want worker logs is in local mode so we set the value here explicitly + # We can't use the PYSP_TEST_spark_master as it's not always set e.g. when using --master + export USE_WORKER_LOGS=1 + fi # Set a seed to be used in the tests, for datagen export SPARK_RAPIDS_TEST_DATAGEN_SEED=${SPARK_RAPIDS_TEST_DATAGEN_SEED:-${DATAGEN_SEED:-`date +%s`}} echo "SPARK_RAPIDS_TEST_DATAGEN_SEED used: $SPARK_RAPIDS_TEST_DATAGEN_SEED" @@ -370,6 +376,15 @@ EOF then exec python "${RUN_TESTS_COMMAND[@]}" "${TEST_PARALLEL_OPTS[@]}" "${TEST_COMMON_OPTS[@]}" else + if [[ "$USE_WORKER_LOGS" == "1" ]]; then + # Setting the extraJavaOptions again to set the log4j confs that will be needed for writing logs in the expected location + # We have to export it again because we want to be able to let the user override these confs by setting them on the + # command-line using the COVERAGE_SUBMIT_FLAGS which won't be possible if we were to just say + # export $PYSP_TEST_spark_driver_extraJavaOptions = "$PYSP_TEST_spark_driver_extraJavaOptions $LOG4J_CONF" + LOG4J_CONF="-Dlog4j.configuration=file://$STD_INPUT_PATH/pytest_log4j.properties -Dlogfile=$RUN_DIR/gw0_worker_logs.log" + export PYSP_TEST_spark_driver_extraJavaOptions="$DRIVER_EXTRA_JAVA_OPTIONS $LOG4J_CONF $COVERAGE_SUBMIT_FLAGS" + fi + # We set the GPU memory size to be a constant value even if only running with a parallelism of 1 # because it helps us have consistent test runs. jarOpts=() diff --git a/integration_tests/src/main/python/aqe_test.py b/integration_tests/src/main/python/aqe_test.py index b7968f8e902..ba0553912d4 100755 --- a/integration_tests/src/main/python/aqe_test.py +++ b/integration_tests/src/main/python/aqe_test.py @@ -298,3 +298,40 @@ def do_it(spark): assert_gpu_and_cpu_are_equal_collect(do_it, conf=bhj_disable_conf) + +# See https://github.com/NVIDIA/spark-rapids/issues/10645. Sometimes the exchange can provide multiple +# batches, so we to coalesce them into a single batch for the broadcast hash join. +@ignore_order(local=True) +@pytest.mark.skipif(not (is_databricks_runtime()), \ + reason="Executor side broadcast only supported on Databricks") +def test_aqe_join_executor_broadcast_enforce_single_batch(): + # Use a small batch to see if Databricks could send multiple batches + conf = copy_and_update(_adaptive_conf, { "spark.rapids.sql.batchSizeBytes": "25" }) + def prep(spark): + id_gen = RepeatSeqGen(IntegerGen(nullable=False), length=250) + name_gen = RepeatSeqGen(["Adam", "Bob", "Cathy"], data_type=StringType()) + school_gen = RepeatSeqGen(["School1", "School2", "School3"], data_type=StringType()) + + df = gen_df(spark, StructGen([('id', id_gen), ('name', name_gen)], nullable=False), length=1000) + df.createOrReplaceTempView("df") + + df_school = gen_df(spark, StructGen([('id', id_gen), ('school', school_gen)], nullable=False), length=250) + df.createOrReplaceTempView("df_school") + + with_cpu_session(prep) + + def do_it(spark): + res = spark.sql( + """ + select /*+ BROADCAST(df_school) */ * from df, df_school where df.id == df_school.id + """ + ) + res.explain() + return res + # Ensure this is an EXECUTOR_BROADCAST + assert_cpu_and_gpu_are_equal_collect_with_capture( + do_it, + exist_classes="GpuShuffleExchangeExec,GpuBroadcastHashJoinExec", + non_exist_classes="GpuBroadcastExchangeExec", + conf=conf) + diff --git a/integration_tests/src/main/python/array_test.py b/integration_tests/src/main/python/array_test.py index e2d7d1b5c81..0b98bd23439 100644 --- a/integration_tests/src/main/python/array_test.py +++ b/integration_tests/src/main/python/array_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -449,6 +449,33 @@ def do_it(spark): }) +@pytest.mark.parametrize('data_gen', [ + ArrayGen(string_gen), + ArrayGen(int_gen), + ArrayGen(ArrayGen(int_gen)), + ArrayGen(ArrayGen(StructGen([["A", int_gen], ["B", string_gen]])))], ids=idfn) +def test_array_filter(data_gen): + def do_it(spark): + columns = ['a'] + element_type = data_gen.data_type.elementType + if isinstance(element_type, IntegralType): + columns.extend([ + 'filter(a, item -> item % 2 = 0) as filter_even', + 'filter(a, item -> item < 0) as filter_negative', + 'filter(a, item -> item >= 0) as filter_non_negative' + ]) + + if isinstance(element_type, StringType): + columns.extend(['filter(a, entry -> length(entry) > 5) as filter_longer_than_5']) + + if isinstance(element_type, ArrayType): + columns.extend(['filter(a, entry -> size(entry) < 5) as filter_shorter_than_5']) + + return unary_op_df(spark, data_gen).selectExpr(columns) + + assert_gpu_and_cpu_are_equal_collect(do_it) + + array_zips_gen = array_gens_sample + [ArrayGen(map_string_string_gen[0], max_length=5), ArrayGen(BinaryGen(max_length=5), max_length=5)] diff --git a/integration_tests/src/main/python/dpp_test.py b/integration_tests/src/main/python/dpp_test.py index c9f0eadab1a..cd4610cf95c 100644 --- a/integration_tests/src/main/python/dpp_test.py +++ b/integration_tests/src/main/python/dpp_test.py @@ -19,7 +19,7 @@ from asserts import assert_cpu_and_gpu_are_equal_collect_with_capture, assert_gpu_and_cpu_are_equal_collect from conftest import spark_tmp_table_factory from data_gen import * -from marks import ignore_order, allow_non_gpu +from marks import ignore_order, allow_non_gpu, datagen_overrides from spark_session import is_before_spark_320, with_cpu_session, is_before_spark_312, is_databricks_runtime, is_databricks113_or_later # non-positive values here can produce a degenerative join, so here we ensure that most values are @@ -171,6 +171,7 @@ def fn(spark): # When BroadcastExchangeExec is available on filtering side, and it can be reused: # DynamicPruningExpression(InSubqueryExec(value, GpuSubqueryBroadcastExec))) @ignore_order +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10147") @pytest.mark.parametrize('store_format', ['parquet', 'orc'], ids=idfn) @pytest.mark.parametrize('s_index', list(range(len(_statements))), ids=idfn) @pytest.mark.parametrize('aqe_enabled', [ diff --git a/integration_tests/src/main/python/get_json_test.py b/integration_tests/src/main/python/get_json_test.py index 935a61e0562..ef405db7e33 100644 --- a/integration_tests/src/main/python/get_json_test.py +++ b/integration_tests/src/main/python/get_json_test.py @@ -14,7 +14,7 @@ import pytest -from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect +from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, with_gpu_session from data_gen import * from pyspark.sql.types import * from marks import * @@ -38,7 +38,7 @@ def test_get_json_object(json_str_pattern): 'get_json_object(\'%s\', "$.store.fruit[0]")' % scalar_json, ), conf={'spark.sql.parser.escapedStringLiterals': 'true', - 'spark.rapids.sql.expression.GetJsonObject': 'true'}) + 'spark.rapids.sql.expression.GetJsonObject': 'true'}) def test_get_json_object_quoted_index(): schema = StructType([StructField("jsonStr", StringType())]) @@ -55,9 +55,7 @@ def test_get_json_object_quoted_index(): DB 10.4 shows incorrect behaviour with single quotes") def test_get_json_object_single_quotes(): schema = StructType([StructField("jsonStr", StringType())]) - data = [[r'''{'a':'A'}'''], - [r'''{'b':'"B'}'''], - [r'''{"c":"'C"}''']] + data = [[r'''{'a':'A'}''']] assert_gpu_and_cpu_are_equal_collect( lambda spark: spark.createDataFrame(data,schema=schema).select( @@ -73,35 +71,17 @@ def test_get_json_object_single_quotes(): "$['key with spaces']", "$.store.book", "$.store.book[0]", - pytest.param("$",marks=[ - pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218'), - pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10196'), - pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10194')]), + "$", "$.store.book[0].category", "$.store.basket[0][1]", "$.store.basket[0][2].b", "$.zip code", "$.fb:testid", - pytest.param("$.a",marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10196')), + "$.a", "$.non_exist_key", "$..no_recursive", - "$.store.book[0].non_exist_key"]) -def test_get_json_object_spark_unit_tests(query): - schema = StructType([StructField("jsonStr", StringType())]) - data = [ - ['''{"store":{"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}},"email":"amy@only_for_json_udf_test.net","owner":"amy","zip code":"94025","fb:testid":"1234"}'''], - ['''{ "key with spaces": "it works" }'''], - ['''{"a":"b\nc"}'''], - ['''{"a":"b\"c"}'''], - ["\u0000\u0000\u0000A\u0001AAA"], - ['{"big": "' + ('x' * 3000) + '"}']] - assert_gpu_and_cpu_are_equal_collect( - lambda spark: spark.createDataFrame(data,schema=schema).select( - f.get_json_object('jsonStr', query)), - conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) - -@allow_non_gpu("ProjectExec", "GetJsonObject") -@pytest.mark.parametrize('query',["$.store.basket[0][*].b", + "$.store.book[0].non_exist_key", + "$.store.basket[0][*].b", "$.store.book[*].reader", "$.store.book[*]", "$.store.book[*].category", @@ -111,16 +91,20 @@ def test_get_json_object_spark_unit_tests(query): "$.store.basket[0][*]", "$.store.basket[*][*]", "$.store.basket[*].non_exist_key"]) -def test_get_json_object_spark_unit_tests_fallback(query): +def test_get_json_object_spark_unit_tests(query): schema = StructType([StructField("jsonStr", StringType())]) - data = [['''{"store":{"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}},"email":"amy@only_for_json_udf_test.net","owner":"amy","zip code":"94025","fb:testid":"1234"}''']] - assert_gpu_fallback_collect( + data = [ + ['''{"store":{"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}},"email":"amy@only_for_json_udf_test.net","owner":"amy","zip code":"94025","fb:testid":"1234"}'''], + ['''{ "key with spaces": "it works" }'''], + ['''{"a":"b\nc"}'''], + ['''{"a":"b\"c"}'''], + ["\u0000\u0000\u0000A\u0001AAA"], + ['{"big": "' + ('x' * 3000) + '"}']] + assert_gpu_and_cpu_are_equal_collect( lambda spark: spark.createDataFrame(data,schema=schema).select( f.get_json_object('jsonStr', query)), - "GetJsonObject", - conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) + conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) -@pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/10218") def test_get_json_object_normalize_non_string_output(): schema = StructType([StructField("jsonStr", StringType())]) data = [[' { "a": "A" } '], @@ -140,7 +124,7 @@ def test_get_json_object_normalize_non_string_output(): lambda spark: spark.createDataFrame(data,schema=schema).select( f.col('jsonStr'), f.get_json_object('jsonStr', '$')), - conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) + conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) def test_get_json_object_quoted_question(): schema = StructType([StructField("jsonStr", StringType())]) @@ -149,9 +133,8 @@ def test_get_json_object_quoted_question(): assert_gpu_and_cpu_are_equal_collect( lambda spark: spark.createDataFrame(data,schema=schema).select( f.get_json_object('jsonStr',r'''$['?']''').alias('question')), - conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) + conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) -@pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/10196") def test_get_json_object_escaped_string_data(): schema = StructType([StructField("jsonStr", StringType())]) data = [[r'{"a":"A\"B"}'], @@ -167,7 +150,6 @@ def test_get_json_object_escaped_string_data(): lambda spark: spark.createDataFrame(data,schema=schema).selectExpr('get_json_object(jsonStr,"$.a")'), conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) -@pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/10196") def test_get_json_object_escaped_key(): schema = StructType([StructField("jsonStr", StringType())]) data = [ @@ -204,9 +186,8 @@ def test_get_json_object_escaped_key(): f.get_json_object('jsonStr', r'$.a\t').alias('qat1'), f.get_json_object('jsonStr','$.a\t').alias('qat2') ), - conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) + conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) -@pytest.mark.xfail(reason="https://github.com/NVIDIA/spark-rapids/issues/10212") def test_get_json_object_invalid_path(): schema = StructType([StructField("jsonStr", StringType())]) data = [['{"a":"A"}'], @@ -228,7 +209,7 @@ def test_get_json_object_invalid_path(): f.get_json_object('jsonStr', '[-1]').alias('neg_one_index'), f.get_json_object('jsonStr', '$.c[-1]').alias('c_neg_one_index'), ), - conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) + conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) def test_get_json_object_top_level_array_notation(): # This is a special version of invalid path. It is something that the GPU supports @@ -245,7 +226,7 @@ def test_get_json_object_top_level_array_notation(): f.get_json_object('jsonStr', '''['a']''').alias('sub_a'), f.get_json_object('jsonStr', '''$['b']''').alias('sub_b'), ), - conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) + conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) def test_get_json_object_unquoted_array_notation(): # This is a special version of invalid path. It is something that the GPU supports @@ -261,7 +242,7 @@ def test_get_json_object_unquoted_array_notation(): f.get_json_object('jsonStr', '$[1]').alias('one_index'), f.get_json_object('jsonStr', '''$['1']''').alias('quoted_one_index'), f.get_json_object('jsonStr', '$[a1]').alias('a_one_index')), - conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) + conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) def test_get_json_object_white_space_removal(): @@ -299,8 +280,63 @@ def test_get_json_object_white_space_removal(): f.get_json_object('jsonStr', "$['a .a ']").alias('a_space_dot_a_space'), f.get_json_object('jsonStr', "$[' a . a ']").alias('space_a_space_dot_space_a_space'), ), - conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) + conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) + + +def test_get_json_object_jni_java_tests(): + schema = StructType([StructField("jsonStr", StringType())]) + data = [['\'abc\''], + ['[ [11, 12], [21, [221, [2221, [22221, 22222]]]], [31, 32] ]'], + ['123'], + ['{ \'k\' : \'v\' }'], + ['[ [[[ {\'k\': \'v1\'} ], {\'k\': \'v2\'}]], [[{\'k\': \'v3\'}], {\'k\': \'v4\'}], {\'k\': \'v5\'} ]'], + ['[1, [21, 22], 3]'], + ['[ {\'k\': [0, 1, 2]}, {\'k\': [10, 11, 12]}, {\'k\': [20, 21, 22]} ]'], + ['[ [0], [10, 11, 12], [2] ]'], + ['[[0, 1, 2], [10, [111, 112, 113], 12], [20, 21, 22]]'], + ['[[0, 1, 2], [10, [], 12], [20, 21, 22]]'], + ['{\'k\' : [0,1,2]}'], + ['{\'k\' : null}'] + ] + + assert_gpu_and_cpu_are_equal_collect( + lambda spark: spark.createDataFrame(data,schema=schema).select( + f.col('jsonStr'), + f.get_json_object('jsonStr', '$').alias('dollor'), + f.get_json_object('jsonStr', '$[*][*]').alias('s_w_s_w'), + f.get_json_object('jsonStr', '$.k').alias('dot_k'), + f.get_json_object('jsonStr', '$[*]').alias('s_w'), + f.get_json_object('jsonStr', '$[*].k[*]').alias('s_w_k_s_w'), + f.get_json_object('jsonStr', '$[1][*]').alias('s_1_s_w'), + f.get_json_object('jsonStr', "$[1][1][*]").alias('s_1_s_1_s_w'), + f.get_json_object('jsonStr', "$.k[1]").alias('dot_k_s_1'), + f.get_json_object('jsonStr', "$.*").alias('w'), + ), + conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) + +@allow_non_gpu('ProjectExec') +def test_get_json_object_deep_nested_json(): + schema = StructType([StructField("jsonStr", StringType())]) + data = [['{"a":{"b":{"c":{"d":{"e":{"f":{"g":{"h":{"i":{"j":{"k":{"l":{"m":{"n":{"o":{"p":{"q":{"r":{"s":{"t":{"u":{"v":{"w":{"x":{"y":{"z":"A"}}' + ]] + assert_gpu_and_cpu_are_equal_collect( + lambda spark: spark.createDataFrame(data,schema=schema).select( + f.get_json_object('jsonStr', '$.a.b.c.d.e.f.g.h.i').alias('i'), + f.get_json_object('jsonStr', '$.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p').alias('p') + ), + conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) + +@allow_non_gpu('ProjectExec') +def test_get_json_object_deep_nested_json_fallback(): + schema = StructType([StructField("jsonStr", StringType())]) + data = [['{"a":{"b":{"c":{"d":{"e":{"f":{"g":{"h":{"i":{"j":{"k":{"l":{"m":{"n":{"o":{"p":{"q":{"r":{"s":{"t":{"u":{"v":{"w":{"x":{"y":{"z":"A"}}' + ]] + assert_gpu_fallback_collect( + lambda spark: spark.createDataFrame(data,schema=schema).select( + f.get_json_object('jsonStr', '$.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z').alias('z')), + 'GetJsonObject', + conf={'spark.rapids.sql.expression.GetJsonObject': 'true'}) @allow_non_gpu('ProjectExec') @pytest.mark.parametrize('json_str_pattern', [r'\{"store": \{"fruit": \[\{"weight":\d,"type":"[a-z]{1,9}"\}\], ' \ @@ -316,8 +352,42 @@ def assert_gpu_did_fallback(sql_text): gen_df(spark, [('a', gen), ('b', pattern)], length=10).selectExpr(sql_text), 'GetJsonObject', conf={'spark.sql.parser.escapedStringLiterals': 'true', - 'spark.rapids.sql.expression.GetJsonObject': 'true'}) + 'spark.rapids.sql.expression.GetJsonObject': 'true'}) assert_gpu_did_fallback('get_json_object(a, b)') assert_gpu_did_fallback('get_json_object(\'%s\', b)' % scalar_json) +@pytest.mark.parametrize('json_str_pattern', [r'\{"store": \{"fruit": \[\{"weight":\d,"type":"[a-z]{1,9}"\}\], ' \ + r'"bicycle":\{"price":[1-9]\d\.\d\d,"color":"[a-z]{0,4}"\}\},' \ + r'"email":"[a-z]{1,5}\@[a-z]{3,10}\.com","owner":"[a-z]{3,8}"\}', + r'\{"a": "[a-z]{1,3}"\}'], ids=idfn) +def test_get_json_object_legacy(json_str_pattern): + gen = mk_json_str_gen(json_str_pattern) + scalar_json = '{"store": {"fruit": [{"name": "test"}]}}' + assert_gpu_and_cpu_are_equal_collect( + lambda spark: unary_op_df(spark, gen, length=10).selectExpr( + 'get_json_object(a,"$.a")', + 'get_json_object(a, "$.owner")', + 'get_json_object(a, "$.store.fruit[0]")', + 'get_json_object(\'%s\', "$.store.fruit[0]")' % scalar_json, + ), + conf={'spark.rapids.sql.expression.GetJsonObject': 'true', + 'spark.sql.parser.escapedStringLiterals': 'true', + 'spark.rapids.sql.getJsonObject.legacy.enabled': 'true'}) + +# In the legacy mode, the output of get_json_object is not normalized. +# Verify that the output is not normalized for floating point to check the legacy mode is working. +def test_get_json_object_number_normalization_legacy(): + schema = StructType([StructField("jsonStr", StringType())]) + data = [['[100.0,200.000,351.980]'], + ['[12345678900000000000.0]'], + ['[12345678900000000000]'], + ['[1' + '0'* 400 + ']'], + ['[1E308]'], + ['[1.0E309,-1E309,1E5000]']] + gpu_result = with_gpu_session(lambda spark: spark.createDataFrame(data,schema=schema).select( + f.col('jsonStr'), + f.get_json_object('jsonStr', '$')).collect(), + conf={'spark.rapids.sql.expression.GetJsonObject': 'true', + 'spark.rapids.sql.getJsonObject.legacy.enabled': 'true'}) + assert([[row[1]] for row in gpu_result] == data) diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py index 5d2bb7d658d..4b28574b677 100644 --- a/integration_tests/src/main/python/hash_aggregate_test.py +++ b/integration_tests/src/main/python/hash_aggregate_test.py @@ -917,6 +917,7 @@ def exact_percentile_reduction(df): 'percentile(val, array(0, 0.0001, 0.5, 0.9999, 1), abs(freq))' ) +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10233") @pytest.mark.parametrize('data_gen', exact_percentile_reduction_data_gen, ids=idfn) def test_exact_percentile_reduction(data_gen): assert_gpu_and_cpu_are_equal_collect( @@ -993,6 +994,7 @@ def exact_percentile_groupby(df): ) @ignore_order +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10719") @pytest.mark.parametrize('data_gen', exact_percentile_groupby_data_gen, ids=idfn) def test_exact_percentile_groupby(data_gen): assert_gpu_and_cpu_are_equal_collect( @@ -1010,6 +1012,7 @@ def test_exact_percentile_groupby(data_gen): @allow_non_gpu('ObjectHashAggregateExec', 'SortAggregateExec', 'ShuffleExchangeExec', 'HashPartitioning', 'AggregateExpression', 'Alias', 'Cast', 'Literal', 'ProjectExec', 'Percentile') +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10738") @pytest.mark.parametrize('data_gen', exact_percentile_groupby_cpu_fallback_data_gen, ids=idfn) @pytest.mark.parametrize('replace_mode', ['partial', 'final|complete'], ids=idfn) @pytest.mark.parametrize('use_obj_hash_agg', ['false', 'true'], ids=idfn) @@ -1080,6 +1083,7 @@ def test_hash_multiple_mode_query(data_gen, conf): @approximate_float @ignore_order @incompat +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10234") @pytest.mark.parametrize('data_gen', _init_list, ids=idfn) @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn) @@ -1093,6 +1097,7 @@ def test_hash_multiple_mode_query_avg_distincts(data_gen, conf): @approximate_float @ignore_order @incompat +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10388") @pytest.mark.parametrize('data_gen', _init_list, ids=idfn) @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn) def test_hash_query_multiple_distincts_with_non_distinct(data_gen, conf): diff --git a/integration_tests/src/main/python/json_matrix_test.py b/integration_tests/src/main/python/json_matrix_test.py index 0156a5c1c9a..83b741491b7 100644 --- a/integration_tests/src/main/python/json_matrix_test.py +++ b/integration_tests/src/main/python/json_matrix_test.py @@ -52,8 +52,7 @@ def read_json_as_text(spark, data_path, column_name): 'spark.rapids.sql.format.json.read.enabled': 'true', 'spark.rapids.sql.json.read.float.enabled': 'true', 'spark.rapids.sql.json.read.double.enabled': 'true', - 'spark.rapids.sql.json.read.decimal.enabled': 'true', - 'spark.rapids.sql.json.read.mixedTypesAsString.enabled': 'true' + 'spark.rapids.sql.json.read.decimal.enabled': 'true' } _enable_json_to_structs_conf = { @@ -61,7 +60,7 @@ def read_json_as_text(spark, data_path, column_name): 'spark.rapids.sql.json.read.float.enabled': 'true', 'spark.rapids.sql.json.read.double.enabled': 'true', 'spark.rapids.sql.json.read.decimal.enabled': 'true', - 'spark.rapids.sql.json.read.mixedTypesAsString.enabled': 'true' + 'spark.rapids.sql.json.read.decimal.enabled': 'true' } _enable_get_json_object_conf = { @@ -114,15 +113,13 @@ def test_from_json_allow_comments_off(std_input_path): # Off is the default so it really needs to work @allow_non_gpu(TEXT_INPUT_EXEC) -@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10194') def test_get_json_object_allow_comments_off(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_COMMENTS_FILE, "json").selectExpr('''get_json_object(json, "$.str")'''), - conf =_enable_get_json_object_conf) + conf=_enable_get_json_object_conf) # Off is the default so it really needs to work @allow_non_gpu(TEXT_INPUT_EXEC) -@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10454') def test_json_tuple_allow_comments_off(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_COMMENTS_FILE, "json").selectExpr('''json_tuple(json, "str")'''), @@ -173,7 +170,7 @@ def test_from_json_allow_single_quotes_on(std_input_path): def test_get_json_object_allow_single_quotes_on(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_SQ_FILE, "json").selectExpr('''get_json_object(json, "$.str")'''), - conf =_enable_get_json_object_conf) + conf=_enable_get_json_object_conf) # On is the default so it really needs to work @allow_non_gpu(TEXT_INPUT_EXEC) @@ -224,15 +221,13 @@ def test_from_json_allow_unquoted_field_names_on(std_input_path): # Off is the default so it really needs to work @allow_non_gpu(TEXT_INPUT_EXEC) -@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10454') def test_get_json_object_allow_unquoted_field_names_off(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_UNQUOTE_FIELD_NAMES_FILE, "json").selectExpr('''get_json_object(json, "$.str")'''), - conf =_enable_get_json_object_conf) + conf=_enable_get_json_object_conf) # Off is the default so it really needs to work @allow_non_gpu(TEXT_INPUT_EXEC) -@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10454') def test_json_tuple_allow_unquoted_field_names_off(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_UNQUOTE_FIELD_NAMES_FILE, "json").selectExpr('''json_tuple(json, "str")'''), @@ -285,16 +280,14 @@ def test_from_json_allow_numeric_leading_zeros_off(std_input_path): # Off is the default so it really needs to work @allow_non_gpu(TEXT_INPUT_EXEC) -@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10454') def test_get_json_object_allow_numeric_leading_zeros_off(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NUMERIC_LEAD_ZEROS_FILE, "json").selectExpr('''get_json_object(json, "$.byte")''', '''get_json_object(json, "$.int")''', '''get_json_object(json, "$.float")''','''get_json_object(json, "$.decimal")'''), - conf =_enable_get_json_object_conf) + conf=_enable_get_json_object_conf) # Off is the default so it really needs to work @allow_non_gpu(TEXT_INPUT_EXEC) -@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10454') def test_json_tuple_allow_numeric_leading_zeros_off(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NUMERIC_LEAD_ZEROS_FILE, "json").selectExpr('''json_tuple(json, "byte", "int", "float", "decimal")'''), @@ -349,16 +342,14 @@ def test_from_json_allow_nonnumeric_numbers_on(std_input_path): # Off is the default for get_json_object so we want this to work @allow_non_gpu(TEXT_INPUT_EXEC) -@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10454') def test_get_json_object_allow_nonnumeric_numbers_off(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NONNUMERIC_NUMBERS_FILE, "json").selectExpr('''get_json_object(json, "$.float")''', '''get_json_object(json, "$.double")'''), - conf =_enable_get_json_object_conf) + conf=_enable_get_json_object_conf) # Off is the default for json_tuple, so we want this to work @allow_non_gpu(TEXT_INPUT_EXEC) -@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10454') def test_json_tuple_allow_nonnumeric_numbers_off(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NONNUMERIC_NUMBERS_FILE, "json").selectExpr('''json_tuple(json, "float", "double")'''), @@ -407,15 +398,13 @@ def test_from_json_allow_backslash_escape_any_on(std_input_path): # Off is the default for get_json_object so we want this to work @allow_non_gpu(TEXT_INPUT_EXEC) -@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10454') def test_get_json_object_allow_backslash_escape_any_off(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_BS_ESC_FILE, "json").selectExpr('''get_json_object(json, "$.str")'''), - conf =_enable_get_json_object_conf) + conf=_enable_get_json_object_conf) # Off is the default for json_tuple, so we want this to work @allow_non_gpu(TEXT_INPUT_EXEC) -@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10454') def test_json_tuple_allow_backslash_escape_any_off(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_BS_ESC_FILE, "json").selectExpr('''json_tuple(json, "str")'''), @@ -466,7 +455,7 @@ def test_from_json_allow_unquoted_control_chars_on(std_input_path): def test_get_json_object_allow_unquoted_control_chars_on(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_UNQUOTED_CONTROL_FILE, "json").selectExpr('''get_json_object(json, "$.str")'''), - conf =_enable_get_json_object_conf) + conf=_enable_get_json_object_conf) # On is the default for json_tuple, so we want this to work @allow_non_gpu(TEXT_INPUT_EXEC) @@ -525,7 +514,7 @@ def test_from_json_dec_locale(std_input_path, locale): def test_get_json_object_dec_locale(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_DEC_LOCALE_FILE, "json").selectExpr('''get_json_object(json, "$.data")'''), - conf =_enable_get_json_object_conf) + conf=_enable_get_json_object_conf) #There is no way to set a locale for these, and it really should not matter @allow_non_gpu(TEXT_INPUT_EXEC) @@ -584,7 +573,7 @@ def test_from_json_dec_locale_non_aribic(std_input_path, locale): def test_get_json_object_dec_locale_non_aribic(std_input_path): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_DEC_LOCALE_NON_ARIBIC_FILE, "json").selectExpr('''get_json_object(json, "$.data")'''), - conf =_enable_get_json_object_conf) + conf=_enable_get_json_object_conf) #There is no way to set a locale for these, and it really should not matter @allow_non_gpu(TEXT_INPUT_EXEC) @@ -766,39 +755,39 @@ def test_from_json_strings(std_input_path, input_file): conf =_enable_json_to_structs_conf) @pytest.mark.parametrize('input_file', [ - pytest.param("int_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), - pytest.param("float_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), - pytest.param("sci_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), + "int_formatted.json", + "float_formatted.json", + "sci_formatted.json", "int_formatted_strings.json", "float_formatted_strings.json", "sci_formatted_strings.json", "decimal_locale_formatted_strings.json", "single_quoted_strings.json", - pytest.param("boolean_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), - pytest.param("invalid_ridealong_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10534')), - pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), - pytest.param("int_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), - pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218'))]) + "boolean_formatted.json", + "invalid_ridealong_columns.json", + "int_array_formatted.json", + "int_struct_formatted.json", + "int_mixed_array_struct_formatted.json"]) @allow_non_gpu(TEXT_INPUT_EXEC) def test_get_json_object_formats(std_input_path, input_file): assert_gpu_and_cpu_are_equal_collect( lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").selectExpr("*", '''get_json_object(json, "$.data")'''), - conf =_enable_get_json_object_conf) + conf=_enable_get_json_object_conf) @pytest.mark.parametrize('input_file', [ - pytest.param("int_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), - pytest.param("float_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), - pytest.param("sci_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), + "int_formatted.json", + "float_formatted.json", + "sci_formatted.json", "int_formatted_strings.json", "float_formatted_strings.json", "sci_formatted_strings.json", "decimal_locale_formatted_strings.json", "single_quoted_strings.json", - pytest.param("boolean_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), - pytest.param("invalid_ridealong_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10534')), - pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), - pytest.param("int_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218')), - pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10218'))]) + "boolean_formatted.json", + "invalid_ridealong_columns.json", + "int_array_formatted.json", + "int_struct_formatted.json", + "int_mixed_array_struct_formatted.json"]) @allow_non_gpu(TEXT_INPUT_EXEC) def test_json_tuple_formats(std_input_path, input_file): assert_gpu_and_cpu_are_equal_collect( diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py index 6ddebd1b9eb..e5365992fa4 100644 --- a/integration_tests/src/main/python/json_test.py +++ b/integration_tests/src/main/python/json_test.py @@ -382,8 +382,7 @@ def test_read_invalid_json(spark_tmp_table_factory, std_input_path, read_func, f @pytest.mark.parametrize('v1_enabled_list', ["", "json"]) def test_read_valid_json(spark_tmp_table_factory, std_input_path, read_func, filename, schema, v1_enabled_list): conf = copy_and_update(_enable_all_types_conf, - {'spark.sql.sources.useV1SourceList': v1_enabled_list, - 'spark.rapids.sql.json.read.mixedTypesAsString.enabled': True}) + {'spark.sql.sources.useV1SourceList': v1_enabled_list}) assert_gpu_and_cpu_are_equal_collect( read_func(std_input_path + '/' + filename, schema, @@ -689,7 +688,6 @@ def test_from_json_struct_boolean(pattern): conf=_enable_all_types_conf) @allow_non_gpu(*non_utc_allow) -@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10349') def test_from_json_struct_decimal(): json_string_gen = StringGen(r'{ "a": "[+-]?([0-9]{0,5})?(\.[0-9]{0,2})?([eE][+-]?[0-9]{1,2})?" }') \ .with_special_pattern('', weight=50) \ @@ -899,11 +897,10 @@ def test_from_json_struct_of_list(schema): @pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10351') def test_from_json_mixed_types_list_struct(schema): json_string_gen = StringGen(r'{"a": (\[1,2,3\]|{"b":"[a-z]{2}"}) }') - conf = copy_and_update(_enable_all_types_conf, {'spark.rapids.sql.json.read.mixedTypesAsString.enabled': 'true'}) assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, json_string_gen) \ .select('a', f.from_json('a', schema)), - conf=conf) + conf=_enable_all_types_conf) @pytest.mark.parametrize('schema', ['struct', 'struct']) @allow_non_gpu(*non_utc_allow) diff --git a/integration_tests/src/main/python/json_tuple_test.py b/integration_tests/src/main/python/json_tuple_test.py index a53e543150f..7de605dda8a 100644 --- a/integration_tests/src/main/python/json_tuple_test.py +++ b/integration_tests/src/main/python/json_tuple_test.py @@ -58,16 +58,26 @@ def test_json_tuple_with_large_number_of_fields_fallback(json_str_pattern): "JsonTuple", conf={'spark.sql.parser.escapedStringLiterals': 'true', 'spark.rapids.sql.expression.JsonTuple': 'true'}) - -@allow_non_gpu('GenerateExec', 'JsonTuple') + @pytest.mark.parametrize('json_str_pattern', json_str_patterns, ids=idfn) -def test_json_tuple_with_special_characters_fallback(json_str_pattern): +def test_json_tuple_with_special_characters(json_str_pattern): gen = mk_json_str_gen(json_str_pattern) special_characters = ['.', '[', ']', '{', '}', '\\\\', '\'', '\\\"'] for special_character in special_characters: - assert_gpu_fallback_collect( + assert_gpu_and_cpu_are_equal_collect( lambda spark: unary_op_df(spark, gen, length=10).selectExpr( 'json_tuple(a, "a", "a' + special_character + '")'), - "JsonTuple", conf={'spark.sql.parser.escapedStringLiterals': 'true', 'spark.rapids.sql.expression.JsonTuple': 'true'}) + +def test_json_tuple_with_slash_backslash(): + schema = StructType([StructField("jsonStr", StringType())]) + data = [['{"url":"https:\/\/www.nvidia.com\/1\/pic\/-1234.jpg","item":[],"info":{"id":12345}}'], + ['{"info":[{"foo":0}],"from":"bar","url":[{"title":"测试\\\\\测试 测试","value_3":"测试;测试;测试"}]}'], + ['{"number":"1234567890","info":[{"foo":0}],"from":"bar"}']] + + assert_gpu_and_cpu_are_equal_collect( + lambda spark: spark.createDataFrame(data, schema).selectExpr( + 'json_tuple(jsonStr, "url", "info")'), + conf={'spark.sql.parser.escapedStringLiterals': 'true', + 'spark.rapids.sql.expression.JsonTuple': 'true'}) \ No newline at end of file diff --git a/integration_tests/src/main/python/regexp_test.py b/integration_tests/src/main/python/regexp_test.py index ff47d0020f3..e14a465d8e0 100644 --- a/integration_tests/src/main/python/regexp_test.py +++ b/integration_tests/src/main/python/regexp_test.py @@ -563,6 +563,7 @@ def test_character_classes(): ), conf=_regexp_conf) +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10641") def test_regexp_choice(): gen = mk_str_gen('[abcd]{1,3}[0-9]{1,3}[abcd]{1,3}[ \n\t\r]{0,2}') assert_gpu_and_cpu_are_equal_collect( diff --git a/integration_tests/src/main/python/spark_init_internal.py b/integration_tests/src/main/python/spark_init_internal.py index 7be5291e471..6cb5c49fc71 100644 --- a/integration_tests/src/main/python/spark_init_internal.py +++ b/integration_tests/src/main/python/spark_init_internal.py @@ -14,9 +14,9 @@ import logging import os +import pytest import re import stat -import sys logging.basicConfig( format="%(asctime)s %(levelname)-8s %(message)s", @@ -96,6 +96,7 @@ def create_tmp_hive(): except Exception as e: logging.warn(f"Failed to setup the hive scratch dir {path}. Error {e}") +# Entry point into this file def pytest_sessionstart(session): # initializations that must happen globally once before tests start # if xdist in the coordinator, if not xdist in the pytest process @@ -131,10 +132,12 @@ def pytest_sessionstart(session): if ('PYTEST_XDIST_WORKER' in os.environ): wid = os.environ['PYTEST_XDIST_WORKER'] - _handle_derby_dir(_sb, driver_opts, wid) _handle_event_log_dir(_sb, wid) + driver_opts += _get_driver_opts_for_worker_logs(_sb, wid) + _handle_derby_dir(_sb, driver_opts, wid) _handle_ivy_cache_dir(_sb, wid) else: + driver_opts += _get_driver_opts_for_worker_logs(_sb, 'gw0') _sb.config('spark.driver.extraJavaOptions', driver_opts) _handle_event_log_dir(_sb, 'gw0') @@ -154,6 +157,45 @@ def _handle_derby_dir(sb, driver_opts, wid): os.makedirs(d) sb.config('spark.driver.extraJavaOptions', driver_opts + ' -Dderby.system.home={}'.format(d)) +def _use_worker_logs(): + return os.environ.get('USE_WORKER_LOGS') == '1' + +# Create a named logger to be used for only logging test name in `log_test_name` +logger = logging.getLogger('__pytest_worker_logger__') +def _get_driver_opts_for_worker_logs(_sb, wid): + if not _use_worker_logs(): + logging.info("Not setting worker logs. Worker logs on non-local mode are sent to the location pre-configured " + "by the user") + return "" + + current_directory = os.path.abspath(os.path.curdir) + log_file = '{}/{}_worker_logs.log'.format(current_directory, wid) + + from conftest import get_std_input_path + std_input_path = get_std_input_path() + # This is not going to take effect when TEST_PARALLEL=1 as it's set as a conf when calling spark-submit + driver_opts = ' -Dlog4j.configuration=file://{}/pytest_log4j.properties '.format(std_input_path) + \ + ' -Dlogfile={}'.format(log_file) + + # Set up Logging to the WORKERID_worker_logs + # Note: This logger is only used for logging the test name in method `log_test_name`. + global logger + logger.setLevel(logging.INFO) + # Create file handler to output logs into corresponding worker log file + # This file_handler is modifying the worker_log file that the plugin will also write to + # The reason for doing this is to get all test logs in one place from where we can do other analysis + # that might be needed in future to look at the execs that were used in our integration tests + file_handler = logging.FileHandler(log_file) + # Set the formatter for the file handler, we match the formatter from the basicConfig for consistency in logs + formatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S") + + file_handler.setFormatter(formatter) + + # Add the file handler to the logger + logger.addHandler(file_handler) + + return driver_opts def _handle_event_log_dir(sb, wid): if os.environ.get('SPARK_EVENTLOG_ENABLED', str(True)).lower() in [ @@ -208,3 +250,7 @@ def get_spark_i_know_what_i_am_doing(): def spark_version(): return _spark.version + +@pytest.fixture(scope='function', autouse=_use_worker_logs()) +def log_test_name(request): + logger.info("Running test '{}'".format(request.node.nodeid)) diff --git a/integration_tests/src/main/python/udf_test.py b/integration_tests/src/main/python/udf_test.py index 4060166ebff..e28b8512108 100644 --- a/integration_tests/src/main/python/udf_test.py +++ b/integration_tests/src/main/python/udf_test.py @@ -464,3 +464,13 @@ def func(iterator): assert_gpu_and_cpu_are_equal_collect( lambda spark: spark.range(0, 10, 1, 1).mapInArrow(func, "id long", is_barrier)) + + +def test_pandas_udf_rows_only(): + def add_one(a): + return a + 1 + my_udf = f.pandas_udf(add_one, returnType=IntegerType()) + assert_gpu_and_cpu_are_equal_collect( + lambda spark: unary_op_df(spark, int_gen, num_slices=4, length=52345) + .select(my_udf(f.lit(0))), + conf=arrow_udf_conf) diff --git a/integration_tests/src/main/python/url_test.py b/integration_tests/src/main/python/url_test.py index ca6bae1853f..157056ee6b8 100644 --- a/integration_tests/src/main/python/url_test.py +++ b/integration_tests/src/main/python/url_test.py @@ -148,10 +148,8 @@ url_gen = StringGen(url_pattern) -supported_parts = ['PROTOCOL', 'HOST', 'QUERY'] -unsupported_parts = ['PATH', 'REF', 'FILE', 'AUTHORITY', 'USERINFO'] -supported_with_key_parts = ['PROTOCOL', 'HOST', 'QUERY'] -unsupported_with_key_parts = ['PATH', 'REF', 'FILE', 'AUTHORITY', 'USERINFO'] +supported_parts = ['PROTOCOL', 'HOST', 'QUERY', 'PATH'] +unsupported_parts = ['REF', 'FILE', 'AUTHORITY', 'USERINFO'] @pytest.mark.parametrize('data_gen', [url_gen, edge_cases_gen], ids=idfn) @pytest.mark.parametrize('part', supported_parts, ids=idfn) @@ -190,13 +188,13 @@ def test_parse_url_query_with_key_regex_fallback(key): .selectExpr("a", "parse_url(a, 'QUERY', '" + key + "')"), 'ParseUrl') -@pytest.mark.parametrize('part', supported_with_key_parts, ids=idfn) +@pytest.mark.parametrize('part', supported_parts, ids=idfn) def test_parse_url_with_key(part): assert_gpu_and_cpu_are_equal_collect( lambda spark: unary_op_df(spark, url_gen).selectExpr("parse_url(a, '" + part + "', 'key')")) @allow_non_gpu('ProjectExec', 'ParseUrl') -@pytest.mark.parametrize('part', unsupported_with_key_parts, ids=idfn) +@pytest.mark.parametrize('part', unsupported_parts, ids=idfn) def test_parse_url_with_key_fallback(part): assert_gpu_fallback_collect( lambda spark: unary_op_df(spark, url_gen).selectExpr("parse_url(a, '" + part + "', 'key')"), diff --git a/integration_tests/src/test/resources/log4j.properties b/integration_tests/src/test/resources/log4j.properties index e50b3a70971..6cc51f4894f 100644 --- a/integration_tests/src/test/resources/log4j.properties +++ b/integration_tests/src/test/resources/log4j.properties @@ -1,5 +1,5 @@ # -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +14,9 @@ # limitations under the License. # +## This properties file configures the logs generated by scala-test while running scala tests +## TODO: This file may not be needed as there is a log4j2.properties file which might take precedence + log4j.rootCategory=INFO, file log4j.appender.file=org.apache.log4j.FileAppender log4j.appender.file.append=true @@ -25,4 +28,4 @@ log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{ log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.err log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n \ No newline at end of file +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n diff --git a/integration_tests/src/test/resources/log4j2.properties b/integration_tests/src/test/resources/log4j2.properties index 90d7dd3d469..778f2e7e8e9 100644 --- a/integration_tests/src/test/resources/log4j2.properties +++ b/integration_tests/src/test/resources/log4j2.properties @@ -1,5 +1,5 @@ # -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +14,8 @@ # limitations under the License. # +## This properties file configures the logs generated by scala-test while running scala tests + # log level of log4j itself status=warn diff --git a/integration_tests/src/test/resources/pytest_log4j.properties b/integration_tests/src/test/resources/pytest_log4j.properties new file mode 100644 index 00000000000..6927ab2c1a7 --- /dev/null +++ b/integration_tests/src/test/resources/pytest_log4j.properties @@ -0,0 +1,42 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## This properties file is used to configure logs generated when integration tests using xdist + +log4j.appender.myConsoleAppender=org.apache.log4j.ConsoleAppender +log4j.appender.myConsoleAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.myConsoleAppender.layout.ConversionPattern=%d [%t] %-5p %c - %m%n + +log4j.appender.RollingAppender=org.apache.log4j.DailyRollingFileAppender +log4j.appender.RollingAppender.File=${logfile} +log4j.appender.RollingAppender.DatePattern='.'yyyy-MM-dd +log4j.appender.RollingAppender.layout=org.apache.log4j.PatternLayout +log4j.appender.RollingAppender.layout.ConversionPattern=[%p] %d %c %M - %m%n + +log4j.rootLogger=INFO, RollingAppender, myConsoleAppender + +log4j.logger.spark.storage=INFO, RollingAppender +log4j.additivity.spark.storage=false +log4j.logger.spark.scheduler=INFO, RollingAppender +log4j.additivity.spark.scheduler=false +log4j.logger.spark.CacheTracker=INFO, RollingAppender +log4j.additivity.spark.CacheTracker=false +log4j.logger.spark.CacheTrackerActor=INFO, RollingAppender +log4j.additivity.spark.CacheTrackerActor=false +log4j.logger.spark.MapOutputTrackerActor=INFO, RollingAppender +log4j.additivity.spark.MapOutputTrackerActor=false +log4j.logger.spark.MapOutputTracker=INFO, RollingAppender +log4j.additivty.spark.MapOutputTracker=false diff --git a/jdk-profiles/pom.xml b/jdk-profiles/pom.xml index e3e9817d4e6..ad865205946 100644 --- a/jdk-profiles/pom.xml +++ b/jdk-profiles/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT com.nvidia rapids-4-spark-jdk-profiles_2.12 pom Shim JDK Profiles - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT jdk9plus diff --git a/jenkins/Dockerfile-blossom.multi b/jenkins/Dockerfile-blossom.multi index b5897f01881..3884c437ff7 100644 --- a/jenkins/Dockerfile-blossom.multi +++ b/jenkins/Dockerfile-blossom.multi @@ -1,5 +1,5 @@ # -# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ ARG CUDA_VER=11.8.0 ARG UBUNTU_VER=20.04 -ARG UCX_VER=1.15.0 +ARG UCX_VER=1.16.0 # multi-platform build with: docker buildx build --platform linux/arm64,linux/amd64 on either amd64 or arm64 host # check available official arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH) FROM --platform=$TARGETPLATFORM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER} diff --git a/jenkins/Dockerfile-blossom.ubuntu b/jenkins/Dockerfile-blossom.ubuntu index b3366a5362f..64b0f4f26a1 100644 --- a/jenkins/Dockerfile-blossom.ubuntu +++ b/jenkins/Dockerfile-blossom.ubuntu @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ ARG CUDA_VER=11.0.3 ARG UBUNTU_VER=20.04 -ARG UCX_VER=1.15.0 +ARG UCX_VER=1.16.0 ARG UCX_CUDA_VER=11 FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER} ARG CUDA_VER diff --git a/jenkins/Jenkinsfile-blossom.premerge b/jenkins/Jenkinsfile-blossom.premerge index 1656d3d2a3a..e7bb8af2cdd 100755 --- a/jenkins/Jenkinsfile-blossom.premerge +++ b/jenkins/Jenkinsfile-blossom.premerge @@ -95,7 +95,7 @@ pipeline { def title = githubHelper.getIssue().title.toLowerCase() if (title ==~ /.*\[skip ci\].*/) { - githubHelper.updateCommitStatus("$BUILD_URL", "Skipped", GitHubCommitState.SUCCESS) + githubHelper.updateCommitStatus("", "Skipped", GitHubCommitState.SUCCESS) currentBuild.result == "SUCCESS" skipped = true return @@ -129,7 +129,7 @@ pipeline { steps { script { - githubHelper.updateCommitStatus("$BUILD_URL", "Running", GitHubCommitState.PENDING) + githubHelper.updateCommitStatus("", "Running", GitHubCommitState.PENDING) unstash "source_tree" container('cpu') { // check if pre-merge dockerfile modified @@ -279,7 +279,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true""" } steps { script { - githubHelper.updateCommitStatus("$BUILD_URL", "Running - includes databricks", GitHubCommitState.PENDING) + githubHelper.updateCommitStatus("", "Running - includes databricks", GitHubCommitState.PENDING) def DBJob = build(job: 'rapids-databricks_premerge-github', propagate: false, wait: true, parameters: [ @@ -313,7 +313,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true""" } if (currentBuild.currentResult == "SUCCESS") { - githubHelper.updateCommitStatus("$BUILD_URL", "Success", GitHubCommitState.SUCCESS) + githubHelper.updateCommitStatus("", "Success", GitHubCommitState.SUCCESS) } else { // upload log only in case of build failure def guardWords = ["gitlab.*?\\.com", "urm.*?\\.com", @@ -323,7 +323,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true""" githubHelper.uploadParallelLogs(this, env.JOB_NAME, env.BUILD_NUMBER, null, guardWords) if (currentBuild.currentResult != "ABORTED") { // skip ABORTED result to avoid status overwrite - githubHelper.updateCommitStatus("$BUILD_URL", "Fail", GitHubCommitState.FAILURE) + githubHelper.updateCommitStatus("", "Fail", GitHubCommitState.FAILURE) } } diff --git a/jenkins/databricks/create.py b/jenkins/databricks/create.py index 8e11abeb6ed..4da25a328b8 100644 --- a/jenkins/databricks/create.py +++ b/jenkins/databricks/create.py @@ -27,7 +27,7 @@ def main(): workspace = 'https://dbc-9ff9942e-a9c4.cloud.databricks.com' token = '' sshkey = '' - cluster_name = 'CI-GPU-databricks-24.04.0-SNAPSHOT' + cluster_name = 'CI-GPU-databricks-24.06.0-SNAPSHOT' idletime = 240 runtime = '7.0.x-gpu-ml-scala2.12' num_workers = 1 diff --git a/jenkins/databricks/init_cudf_udf.sh b/jenkins/databricks/init_cudf_udf.sh index 529c071d6e9..f214c7d27e9 100755 --- a/jenkins/databricks/init_cudf_udf.sh +++ b/jenkins/databricks/init_cudf_udf.sh @@ -20,7 +20,7 @@ set -ex -CUDF_VER=${CUDF_VER:-24.04} +CUDF_VER=${CUDF_VER:-24.06} CUDA_VER=${CUDA_VER:-11.8} # Need to explicitly add conda into PATH environment, to activate conda environment. diff --git a/jenkins/deploy.sh b/jenkins/deploy.sh index 75b52fb94d9..522510cbc6a 100755 --- a/jenkins/deploy.sh +++ b/jenkins/deploy.sh @@ -80,14 +80,15 @@ cp $JS_FPATH-javadoc.jar $FPATH-javadoc.jar echo "Plan to deploy ${FPATH}.jar to $SERVER_URL (ID:$SERVER_ID)" +GPG_PLUGIN="org.apache.maven.plugins:maven-gpg-plugin:3.1.0:sign-and-deploy-file" ###### Choose the deploy command ###### if [ "$SIGN_FILE" == true ]; then case $SIGN_TOOL in nvsec) - DEPLOY_CMD="$MVN gpg:sign-and-deploy-file -Dgpg.executable=nvsec_sign" + DEPLOY_CMD="$MVN $GPG_PLUGIN -Dgpg.executable=nvsec_sign" ;; gpg) - DEPLOY_CMD="$MVN gpg:sign-and-deploy-file -Dgpg.passphrase=$GPG_PASSPHRASE " + DEPLOY_CMD="$MVN $GPG_PLUGIN -Dgpg.passphrase=$GPG_PASSPHRASE " ;; *) echo "Error unsupported sign type : $SIGN_TYPE !" diff --git a/jenkins/version-def.sh b/jenkins/version-def.sh index c31c888002a..45f46940c37 100755 --- a/jenkins/version-def.sh +++ b/jenkins/version-def.sh @@ -26,11 +26,11 @@ for VAR in $OVERWRITE_PARAMS; do done IFS=$PRE_IFS -CUDF_VER=${CUDF_VER:-"24.04.0-SNAPSHOT"} +CUDF_VER=${CUDF_VER:-"24.06.0-SNAPSHOT"} CUDA_CLASSIFIER=${CUDA_CLASSIFIER:-"cuda11"} CLASSIFIER=${CLASSIFIER:-"$CUDA_CLASSIFIER"} # default as CUDA_CLASSIFIER for compatibility -PROJECT_VER=${PROJECT_VER:-"24.04.0-SNAPSHOT"} -PROJECT_TEST_VER=${PROJECT_TEST_VER:-"24.04.0-SNAPSHOT"} +PROJECT_VER=${PROJECT_VER:-"24.06.0-SNAPSHOT"} +PROJECT_TEST_VER=${PROJECT_TEST_VER:-"24.06.0-SNAPSHOT"} SPARK_VER=${SPARK_VER:-"3.1.1"} SPARK_VER_213=${SPARK_VER_213:-"3.3.0"} # Make a best attempt to set the default value for the shuffle shim. diff --git a/pom.xml b/pom.xml index 349dd792177..c939f8d5891 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ rapids-4-spark-parent_2.12 RAPIDS Accelerator for Apache Spark Root Project The root project of the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT pom https://nvidia.github.io/spark-rapids/ @@ -428,6 +428,26 @@ delta-lake/delta-24x + + release343 + + + buildver + 343 + + + + 343 + ${spark343.version} + ${spark343.version} + 1.12.3 + ${spark330.iceberg.version} + 2.0.6 + + + delta-lake/delta-24x + + release330cdh @@ -629,6 +649,7 @@ arm64 ${cuda.version}-arm64 + ${ucx.baseVersion}-aarch64 @@ -698,8 +719,8 @@ spark${buildver} cuda11 ${cuda.version} - 24.04.0-SNAPSHOT - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT + 24.06.0-SNAPSHOT 2.12 2.8.0 incremental @@ -711,7 +732,9 @@ https://github.com/openjdk/jdk17/blob/4afbcaf55383ec2f5da53282a1547bac3d099e9d/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties#L1993-L1994 --> -Xlint:all,-serial,-path,-try,-processing|-Werror - 1.15.0 + 1.16.0 + + ${ucx.baseVersion} true package @@ -751,6 +774,7 @@ 3.4.0 3.4.1 3.4.2 + 3.4.3 3.3.0.3.3.7180.0-274 3.3.2.3.3.7190.0-91 3.3.0-databricks @@ -806,6 +830,7 @@ 340, 341, 342, + 343, 350, 351 @@ -849,6 +874,8 @@ ${noSnapshot.buildvers}, ${snapshot.buildvers}, ${databricks.buildvers}, + + 400 330, @@ -1012,6 +1039,53 @@ ${mockito.version} test
+ + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + test-jar + test + + + org.apache.hadoop + hadoop-client + + + org.apache.hadoop + hadoop-client-api + + + org.apache.hadoop + hadoop-client-runtime + + + org.apache.curator + curator-recipes + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + + + org.apache.spark + spark-catalyst_${scala.binary.version} + ${spark.version} + test-jar + test + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + test-jar + test +
diff --git a/scala2.13/aggregator/pom.xml b/scala2.13/aggregator/pom.xml index 8809dc0629e..f3bbfc1d7dc 100644 --- a/scala2.13/aggregator/pom.xml +++ b/scala2.13/aggregator/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../jdk-profiles/pom.xml rapids-4-spark-aggregator_2.13 RAPIDS Accelerator for Apache Spark Aggregator Creates an aggregated shaded package of the RAPIDS plugin for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT aggregator @@ -728,6 +728,23 @@
+ + release343 + + + buildver + 343 + + + + + com.nvidia + rapids-4-spark-delta-24x_${scala.binary.version} + ${project.version} + ${spark.version.classifier} + + + release350 diff --git a/scala2.13/api_validation/pom.xml b/scala2.13/api_validation/pom.xml index de4986652d6..c934b610e73 100644 --- a/scala2.13/api_validation/pom.xml +++ b/scala2.13/api_validation/pom.xml @@ -22,11 +22,11 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-api-validation_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT api_validation diff --git a/scala2.13/datagen/pom.xml b/scala2.13/datagen/pom.xml index 44aa01a1439..65c0e4cc98b 100644 --- a/scala2.13/datagen/pom.xml +++ b/scala2.13/datagen/pom.xml @@ -21,13 +21,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml datagen_2.13 Data Generator Tools for generating large amounts of data - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT datagen diff --git a/scala2.13/delta-lake/delta-20x/pom.xml b/scala2.13/delta-lake/delta-20x/pom.xml index a14b97eecf2..9d86042c395 100644 --- a/scala2.13/delta-lake/delta-20x/pom.xml +++ b/scala2.13/delta-lake/delta-20x/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-20x_2.13 RAPIDS Accelerator for Apache Spark Delta Lake 2.0.x Support Delta Lake 2.0.x support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-20x diff --git a/scala2.13/delta-lake/delta-21x/pom.xml b/scala2.13/delta-lake/delta-21x/pom.xml index bc7c836fedc..83bf7f1f56d 100644 --- a/scala2.13/delta-lake/delta-21x/pom.xml +++ b/scala2.13/delta-lake/delta-21x/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-21x_2.13 RAPIDS Accelerator for Apache Spark Delta Lake 2.1.x Support Delta Lake 2.1.x support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-21x diff --git a/scala2.13/delta-lake/delta-22x/pom.xml b/scala2.13/delta-lake/delta-22x/pom.xml index 7ec7293f72c..c0cf82529ad 100644 --- a/scala2.13/delta-lake/delta-22x/pom.xml +++ b/scala2.13/delta-lake/delta-22x/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-22x_2.13 RAPIDS Accelerator for Apache Spark Delta Lake 2.2.x Support Delta Lake 2.2.x support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-22x diff --git a/scala2.13/delta-lake/delta-23x/pom.xml b/scala2.13/delta-lake/delta-23x/pom.xml index 96eb4cf357a..8eb2a09375f 100644 --- a/scala2.13/delta-lake/delta-23x/pom.xml +++ b/scala2.13/delta-lake/delta-23x/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../pom.xml rapids-4-spark-delta-23x_2.13 RAPIDS Accelerator for Apache Spark Delta Lake 2.3.x Support Delta Lake 2.3.x support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-23x diff --git a/scala2.13/delta-lake/delta-24x/pom.xml b/scala2.13/delta-lake/delta-24x/pom.xml index 12e5cd232c5..505e2eb04d7 100644 --- a/scala2.13/delta-lake/delta-24x/pom.xml +++ b/scala2.13/delta-lake/delta-24x/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-24x_2.13 RAPIDS Accelerator for Apache Spark Delta Lake 2.4.x Support Delta Lake 2.4.x support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-24x diff --git a/scala2.13/delta-lake/delta-spark330db/pom.xml b/scala2.13/delta-lake/delta-spark330db/pom.xml index 5c87d25cb09..db71dd83111 100644 --- a/scala2.13/delta-lake/delta-spark330db/pom.xml +++ b/scala2.13/delta-lake/delta-spark330db/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark330db_2.13 RAPIDS Accelerator for Apache Spark Databricks 11.3 Delta Lake Support Databricks 11.3 Delta Lake support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-spark330db diff --git a/scala2.13/delta-lake/delta-spark332db/pom.xml b/scala2.13/delta-lake/delta-spark332db/pom.xml index d2cf31c1378..ab9ac45d775 100644 --- a/scala2.13/delta-lake/delta-spark332db/pom.xml +++ b/scala2.13/delta-lake/delta-spark332db/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark332db_2.13 RAPIDS Accelerator for Apache Spark Databricks 12.2 Delta Lake Support Databricks 12.2 Delta Lake support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-spark332db diff --git a/scala2.13/delta-lake/delta-spark341db/pom.xml b/scala2.13/delta-lake/delta-spark341db/pom.xml index 7665f08b828..78be4fad4b8 100644 --- a/scala2.13/delta-lake/delta-spark341db/pom.xml +++ b/scala2.13/delta-lake/delta-spark341db/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-spark341db_2.13 RAPIDS Accelerator for Apache Spark Databricks 13.3 Delta Lake Support Databricks 13.3 Delta Lake support for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT false diff --git a/scala2.13/delta-lake/delta-stub/pom.xml b/scala2.13/delta-lake/delta-stub/pom.xml index 9b749b879d9..81731f59c4f 100644 --- a/scala2.13/delta-lake/delta-stub/pom.xml +++ b/scala2.13/delta-lake/delta-stub/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../jdk-profiles/pom.xml rapids-4-spark-delta-stub_2.13 RAPIDS Accelerator for Apache Spark Delta Lake Stub Delta Lake stub for the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../delta-lake/delta-stub diff --git a/scala2.13/dist/pom.xml b/scala2.13/dist/pom.xml index 9eec252c9ca..0fd9d2795b6 100644 --- a/scala2.13/dist/pom.xml +++ b/scala2.13/dist/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../jdk-profiles/pom.xml rapids-4-spark_2.13 RAPIDS Accelerator for Apache Spark Distribution Creates the distribution package of the RAPIDS plugin for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT com.nvidia diff --git a/scala2.13/integration_tests/pom.xml b/scala2.13/integration_tests/pom.xml index 6a355a243fb..8dfeda2eeb6 100644 --- a/scala2.13/integration_tests/pom.xml +++ b/scala2.13/integration_tests/pom.xml @@ -22,11 +22,11 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-integration-tests_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT integration_tests diff --git a/scala2.13/jdk-profiles/pom.xml b/scala2.13/jdk-profiles/pom.xml index 91804499ff0..c41c5ef38f4 100644 --- a/scala2.13/jdk-profiles/pom.xml +++ b/scala2.13/jdk-profiles/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT com.nvidia rapids-4-spark-jdk-profiles_2.13 pom Shim JDK Profiles - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT jdk9plus diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index 446d3b11e55..cf3bfb48373 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -23,7 +23,7 @@ rapids-4-spark-parent_2.13 RAPIDS Accelerator for Apache Spark Root Project The root project of the RAPIDS Accelerator for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT pom https://nvidia.github.io/spark-rapids/ @@ -428,6 +428,26 @@ delta-lake/delta-24x + + release343 + + + buildver + 343 + + + + 343 + ${spark343.version} + ${spark343.version} + 1.12.3 + ${spark330.iceberg.version} + 2.0.6 + + + delta-lake/delta-24x + + release330cdh @@ -629,6 +649,7 @@ arm64 ${cuda.version}-arm64 + ${ucx.baseVersion}-aarch64 @@ -698,8 +719,8 @@ spark${buildver} cuda11 ${cuda.version} - 24.04.0-SNAPSHOT - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT + 24.06.0-SNAPSHOT 2.13 2.8.0 incremental @@ -711,7 +732,9 @@ https://github.com/openjdk/jdk17/blob/4afbcaf55383ec2f5da53282a1547bac3d099e9d/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties#L1993-L1994 --> -Xlint:all,-serial,-path,-try,-processing|-Werror - 1.15.0 + 1.16.0 + + ${ucx.baseVersion} true package @@ -751,6 +774,7 @@ 3.4.0 3.4.1 3.4.2 + 3.4.3 3.3.0.3.3.7180.0-274 3.3.2.3.3.7190.0-91 3.3.0-databricks @@ -806,6 +830,7 @@ 340, 341, 342, + 343, 350, 351 @@ -849,6 +874,8 @@ ${noSnapshot.buildvers}, ${snapshot.buildvers}, ${databricks.buildvers}, + + 400 330, @@ -1012,6 +1039,53 @@ ${mockito.version} test + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + test-jar + test + + + org.apache.hadoop + hadoop-client + + + org.apache.hadoop + hadoop-client-api + + + org.apache.hadoop + hadoop-client-runtime + + + org.apache.curator + curator-recipes + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + + + org.apache.spark + spark-catalyst_${scala.binary.version} + ${spark.version} + test-jar + test + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + test-jar + test + diff --git a/scala2.13/shim-deps/cloudera/pom.xml b/scala2.13/shim-deps/cloudera/pom.xml index 15e08712a4b..bce1e909069 100644 --- a/scala2.13/shim-deps/cloudera/pom.xml +++ b/scala2.13/shim-deps/cloudera/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../pom.xml rapids-4-spark-cdh-bom pom CDH Shim Dependencies - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/cloudera diff --git a/scala2.13/shim-deps/databricks/pom.xml b/scala2.13/shim-deps/databricks/pom.xml index 79583498519..8b8e9403c30 100644 --- a/scala2.13/shim-deps/databricks/pom.xml +++ b/scala2.13/shim-deps/databricks/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../pom.xml rapids-4-spark-db-bom pom Databricks Shim Dependencies - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/databricks diff --git a/scala2.13/shim-deps/pom.xml b/scala2.13/shim-deps/pom.xml index f8815d992b1..6ab6f90130e 100644 --- a/scala2.13/shim-deps/pom.xml +++ b/scala2.13/shim-deps/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../jdk-profiles/pom.xml rapids-4-spark-shim-deps-parent_2.13 pom Shim Dependencies Profiles - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT release321cdh diff --git a/scala2.13/shuffle-plugin/pom.xml b/scala2.13/shuffle-plugin/pom.xml index f0ac96b8b01..a3ee1ffb638 100644 --- a/scala2.13/shuffle-plugin/pom.xml +++ b/scala2.13/shuffle-plugin/pom.xml @@ -21,13 +21,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-shuffle_2.13 RAPIDS Accelerator for Apache Spark Shuffle Plugin Accelerated shuffle plugin for the RAPIDS plugin for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT shuffle-plugin diff --git a/scala2.13/sql-plugin-api/pom.xml b/scala2.13/sql-plugin-api/pom.xml index 6b4dd52f75e..22736968bf4 100644 --- a/scala2.13/sql-plugin-api/pom.xml +++ b/scala2.13/sql-plugin-api/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-sql-plugin-api_2.13 Module for Non-Shimmable API - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT sql-plugin-api false diff --git a/scala2.13/sql-plugin/pom.xml b/scala2.13/sql-plugin/pom.xml index e3f68550ed8..5bee34752c7 100644 --- a/scala2.13/sql-plugin/pom.xml +++ b/scala2.13/sql-plugin/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-sql_2.13 RAPIDS Accelerator for Apache Spark SQL Plugin The RAPIDS SQL plugin for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT sql-plugin diff --git a/scala2.13/tests/pom.xml b/scala2.13/tests/pom.xml index 0e1f41b11ae..a9c1d707b27 100644 --- a/scala2.13/tests/pom.xml +++ b/scala2.13/tests/pom.xml @@ -21,13 +21,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-tests_2.13 RAPIDS Accelerator for Apache Spark Tests RAPIDS plugin for Apache Spark integration tests - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT tests @@ -103,6 +103,27 @@ org.apache.spark spark-avro_${scala.binary.version} + + org.apache.spark + spark-core_${scala.binary.version} + test-jar + + + org.apache.spark + spark-sql_${scala.binary.version} + test-jar + + + org.apache.spark + spark-catalyst_${scala.binary.version} + test-jar + + + org.scalatestplus + scalatestplus-scalacheck_${scala.binary.version} + 3.1.0.0-RC2 + test + diff --git a/scala2.13/tools/pom.xml b/scala2.13/tools/pom.xml index d4c6a91737a..43796a41532 100644 --- a/scala2.13/tools/pom.xml +++ b/scala2.13/tools/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../jdk-profiles/pom.xml rapids-4-spark-tools-support pom RAPIDS Accelerator for Apache Spark Tools Support Supporting code for RAPIDS Accelerator tools - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT com.nvidia diff --git a/scala2.13/udf-compiler/pom.xml b/scala2.13/udf-compiler/pom.xml index d4c81dfd20d..89d96dd41fd 100644 --- a/scala2.13/udf-compiler/pom.xml +++ b/scala2.13/udf-compiler/pom.xml @@ -21,13 +21,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.13 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-udf_2.13 RAPIDS Accelerator for Apache Spark Scala UDF Plugin The RAPIDS Scala UDF plugin for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT udf-compiler diff --git a/scripts/README.md b/scripts/README.md index 1d40483e206..df703b919c0 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -13,7 +13,7 @@ steps can be helpful in narrowing down which files in the changeset are directly a77ae27f15 [SPARK-41442][SQL][FOLLOWUP] SQLMetric should not expose -1 value as it's invalid ``` 3. Run the following command from spark-rapids project-root, and you should get a file called - `audit-plugin.log` at location pointed by `$WORKSPACE`. + `audit-plugin.log` at location pointed by `$WORKSPACE`. The environment variables must be absolute paths. ``` WORKSPACE=~/workspace SPARK_TREE=~/workspace/spark COMMIT_DIFF_LOG=~/workspace/commits.log ./scripts/prioritize-commits.sh diff --git a/scripts/audit-spark.sh b/scripts/audit-spark.sh index d2b68e25807..4b5702f8009 100755 --- a/scripts/audit-spark.sh +++ b/scripts/audit-spark.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -55,7 +55,8 @@ if [ -f "$lastcommit" ]; then cd ${SPARK_TREE} latestcommit=`cat ${lastcommit}` git checkout $basebranch - git log --oneline HEAD...$latestcommit -- sql/core/src/main sql/catalyst/src/main | tee ${COMMIT_DIFF_LOG} + git log --oneline HEAD...$latestcommit -- sql/core/src/main sql/catalyst/src/main \ + core/src/main/scala/org/apache/spark/shuffle core/src/main/scala/org/apache/spark/storage | tee ${COMMIT_DIFF_LOG} git log HEAD -n 1 --pretty="%h" > ${lastcommit} cd $WORKSPACE diff --git a/shim-deps/cloudera/pom.xml b/shim-deps/cloudera/pom.xml index 8a9e573d570..787975be559 100644 --- a/shim-deps/cloudera/pom.xml +++ b/shim-deps/cloudera/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../pom.xml rapids-4-spark-cdh-bom pom CDH Shim Dependencies - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/cloudera diff --git a/shim-deps/databricks/pom.xml b/shim-deps/databricks/pom.xml index fb7bbf63ae3..189faf97a60 100644 --- a/shim-deps/databricks/pom.xml +++ b/shim-deps/databricks/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../../pom.xml rapids-4-spark-db-bom pom Databricks Shim Dependencies - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/databricks diff --git a/shim-deps/pom.xml b/shim-deps/pom.xml index 13fd972393d..2859bbff28d 100644 --- a/shim-deps/pom.xml +++ b/shim-deps/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../jdk-profiles/pom.xml rapids-4-spark-shim-deps-parent_2.12 pom Shim Dependencies Profiles - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT release321cdh diff --git a/shuffle-plugin/pom.xml b/shuffle-plugin/pom.xml index 99656b379a3..60d7580c237 100644 --- a/shuffle-plugin/pom.xml +++ b/shuffle-plugin/pom.xml @@ -21,13 +21,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-shuffle_2.12 RAPIDS Accelerator for Apache Spark Shuffle Plugin Accelerated shuffle plugin for the RAPIDS plugin for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT shuffle-plugin diff --git a/sql-plugin-api/pom.xml b/sql-plugin-api/pom.xml index 0b1a2cedbe5..68e20c10f77 100644 --- a/sql-plugin-api/pom.xml +++ b/sql-plugin-api/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-sql-plugin-api_2.12 Module for Non-Shimmable API - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT sql-plugin-api false diff --git a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala index 76440bcd4c6..cdcb3f73423 100644 --- a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala +++ b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala @@ -49,11 +49,11 @@ import org.apache.spark.util.MutableURLClassLoader Each shim can see a consistent parallel world without conflicts by referencing only one conflicting directory. E.g., Spark 3.2.0 Shim will use - jar:file:/home/spark/rapids-4-spark_2.12-24.04.0.jar!/spark3xx-common/ - jar:file:/home/spark/rapids-4-spark_2.12-24.04.0.jar!/spark320/ + jar:file:/home/spark/rapids-4-spark_2.12-24.06.0.jar!/spark3xx-common/ + jar:file:/home/spark/rapids-4-spark_2.12-24.06.0.jar!/spark320/ Spark 3.1.1 will use - jar:file:/home/spark/rapids-4-spark_2.12-24.04.0.jar!/spark3xx-common/ - jar:file:/home/spark/rapids-4-spark_2.12-24.04.0.jar!/spark311/ + jar:file:/home/spark/rapids-4-spark_2.12-24.06.0.jar!/spark3xx-common/ + jar:file:/home/spark/rapids-4-spark_2.12-24.06.0.jar!/spark311/ Using these Jar URL's allows referencing different bytecode produced from identical sources by incompatible Scala / Spark dependencies. */ diff --git a/sql-plugin/pom.xml b/sql-plugin/pom.xml index 9e4c6b1b461..cbe7d873dff 100644 --- a/sql-plugin/pom.xml +++ b/sql-plugin/pom.xml @@ -22,13 +22,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-sql_2.12 RAPIDS Accelerator for Apache Spark SQL Plugin The RAPIDS SQL plugin for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT sql-plugin diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquet.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquet.java index 683902941e9..2c92d92f854 100644 --- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquet.java +++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquet.java @@ -69,7 +69,7 @@ public static class ReadBuilder { private long maxBatchSizeBytes = Integer.MAX_VALUE; private long targetBatchSizeBytes = Integer.MAX_VALUE; private boolean useChunkedReader = false; - private boolean useSubPageChunked = false; + private long maxChunkedReaderMemoryUsageSizeBytes = 0; private scala.Option debugDumpPrefix = null; private boolean debugDumpAlways = false; private scala.collection.immutable.Map metrics = null; @@ -141,9 +141,10 @@ public ReadBuilder withTargetBatchSizeBytes(long targetBatchSizeBytes) { return this; } - public ReadBuilder withUseChunkedReader(boolean useChunkedReader, boolean useSubPageChunked) { + public ReadBuilder withUseChunkedReader(boolean useChunkedReader, + long maxChunkedReaderMemoryUsageSizeBytes) { this.useChunkedReader = useChunkedReader; - this.useSubPageChunked = useSubPageChunked; + this.maxChunkedReaderMemoryUsageSizeBytes = maxChunkedReaderMemoryUsageSizeBytes; return this; } @@ -164,8 +165,8 @@ public CloseableIterable build() { InternalRow.empty(), file.location(), start, length); return new GpuParquetReader(file, projectSchema, options, nameMapping, filter, caseSensitive, idToConstant, deleteFilter, partFile, conf, maxBatchSizeRows, maxBatchSizeBytes, - targetBatchSizeBytes, useChunkedReader, useSubPageChunked, debugDumpPrefix, - debugDumpAlways, metrics); + targetBatchSizeBytes, useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes, + debugDumpPrefix, debugDumpAlways, metrics); } } diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquetReader.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquetReader.java index 724e32707db..47b649af6ed 100644 --- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquetReader.java +++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquetReader.java @@ -87,7 +87,7 @@ public class GpuParquetReader extends CloseableGroup implements CloseableIterabl private final long maxBatchSizeBytes; private final long targetBatchSizeBytes; private final boolean useChunkedReader; - private final boolean useSubPageChunked; + private final long maxChunkedReaderMemoryUsageSizeBytes; private final scala.Option debugDumpPrefix; private final boolean debugDumpAlways; private final scala.collection.immutable.Map metrics; @@ -98,7 +98,7 @@ public GpuParquetReader( Map idToConstant, GpuDeleteFilter deleteFilter, PartitionedFile partFile, Configuration conf, int maxBatchSizeRows, long maxBatchSizeBytes, long targetBatchSizeBytes, boolean useChunkedReader, - boolean useSubPageChunked, + long maxChunkedReaderMemoryUsageSizeBytes, scala.Option debugDumpPrefix, boolean debugDumpAlways, scala.collection.immutable.Map metrics) { this.input = input; @@ -115,7 +115,7 @@ public GpuParquetReader( this.maxBatchSizeBytes = maxBatchSizeBytes; this.targetBatchSizeBytes = targetBatchSizeBytes; this.useChunkedReader = useChunkedReader; - this.useSubPageChunked = useSubPageChunked; + this.maxChunkedReaderMemoryUsageSizeBytes = maxChunkedReaderMemoryUsageSizeBytes; this.debugDumpPrefix = debugDumpPrefix; this.debugDumpAlways = debugDumpAlways; this.metrics = metrics; @@ -143,7 +143,7 @@ public org.apache.iceberg.io.CloseableIterator iterator() { new Path(input.location()), clippedBlocks, fileReadSchema, caseSensitive, partReaderSparkSchema, debugDumpPrefix, debugDumpAlways, maxBatchSizeRows, maxBatchSizeBytes, targetBatchSizeBytes, useChunkedReader, - useSubPageChunked, + maxChunkedReaderMemoryUsageSizeBytes, metrics, DateTimeRebaseCorrected$.MODULE$, // dateRebaseMode DateTimeRebaseCorrected$.MODULE$, // timestampRebaseMode diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuBatchDataReader.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuBatchDataReader.java index d9ff9c157fa..202ba2c91b3 100644 --- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuBatchDataReader.java +++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuBatchDataReader.java @@ -48,14 +48,15 @@ class GpuBatchDataReader extends BaseDataReader { private final long maxBatchSizeBytes; private final long targetBatchSizeBytes; private final boolean useChunkedReader; - private final boolean useSubPageChunked; + private final long maxChunkedReaderMemoryUsageSizeBytes; private final scala.Option parquetDebugDumpPrefix; private final boolean parquetDebugDumpAlways; private final scala.collection.immutable.Map metrics; GpuBatchDataReader(CombinedScanTask task, Table table, Schema expectedSchema, boolean caseSensitive, Configuration conf, int maxBatchSizeRows, long maxBatchSizeBytes, - long targetBatchSizeBytes, boolean useChunkedReader, boolean useSubPageChunked, + long targetBatchSizeBytes, + boolean useChunkedReader, long maxChunkedReaderMemoryUsageSizeBytes, scala.Option parquetDebugDumpPrefix, boolean parquetDebugDumpAlways, scala.collection.immutable.Map metrics) { super(table, task); @@ -67,7 +68,7 @@ class GpuBatchDataReader extends BaseDataReader { this.maxBatchSizeBytes = maxBatchSizeBytes; this.targetBatchSizeBytes = targetBatchSizeBytes; this.useChunkedReader = useChunkedReader; - this.useSubPageChunked = useSubPageChunked; + this.maxChunkedReaderMemoryUsageSizeBytes = maxChunkedReaderMemoryUsageSizeBytes; this.parquetDebugDumpPrefix = parquetDebugDumpPrefix; this.parquetDebugDumpAlways = parquetDebugDumpAlways; this.metrics = metrics; @@ -102,7 +103,7 @@ CloseableIterator open(FileScanTask task) { .withMaxBatchSizeRows(maxBatchSizeRows) .withMaxBatchSizeBytes(maxBatchSizeBytes) .withTargetBatchSizeBytes(targetBatchSizeBytes) - .withUseChunkedReader(useChunkedReader, useSubPageChunked) + .withUseChunkedReader(useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes) .withDebugDump(parquetDebugDumpPrefix, parquetDebugDumpAlways) .withMetrics(metrics); diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuMultiFileBatchReader.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuMultiFileBatchReader.java index b27dd01daf6..9c36fe76020 100644 --- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuMultiFileBatchReader.java +++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuMultiFileBatchReader.java @@ -68,7 +68,7 @@ class GpuMultiFileBatchReader extends BaseDataReader { private final long maxGpuColumnSizeBytes; private final boolean useChunkedReader; - private final boolean useSubPageChunked; + private final long maxChunkedReaderMemoryUsageSizeBytes; private final scala.Option parquetDebugDumpPrefix; private final boolean parquetDebugDumpAlways; private final scala.collection.immutable.Map metrics; @@ -87,7 +87,7 @@ class GpuMultiFileBatchReader extends BaseDataReader { GpuMultiFileBatchReader(CombinedScanTask task, Table table, Schema expectedSchema, boolean caseSensitive, Configuration conf, int maxBatchSizeRows, long maxBatchSizeBytes, long targetBatchSizeBytes, long maxGpuColumnSizeBytes, - boolean useChunkedReader, boolean useSubPageChunked, + boolean useChunkedReader, long maxChunkedReaderMemoryUsageSizeBytes, scala.Option parquetDebugDumpPrefix, boolean parquetDebugDumpAlways, int numThreads, int maxNumFileProcessed, boolean useMultiThread, FileFormat fileFormat, @@ -102,7 +102,7 @@ class GpuMultiFileBatchReader extends BaseDataReader { this.targetBatchSizeBytes = targetBatchSizeBytes; this.maxGpuColumnSizeBytes = maxGpuColumnSizeBytes; this.useChunkedReader = useChunkedReader; - this.useSubPageChunked = useSubPageChunked; + this.maxChunkedReaderMemoryUsageSizeBytes = maxChunkedReaderMemoryUsageSizeBytes; this.parquetDebugDumpPrefix = parquetDebugDumpPrefix; this.parquetDebugDumpAlways = parquetDebugDumpAlways; this.useMultiThread = useMultiThread; @@ -352,7 +352,7 @@ protected FilePartitionReaderBase createRapidsReader(PartitionedFile[] pFiles, return new MultiFileCloudParquetPartitionReader(conf, pFiles, this::filterParquetBlocks, caseSensitive, parquetDebugDumpPrefix, parquetDebugDumpAlways, maxBatchSizeRows, maxBatchSizeBytes, targetBatchSizeBytes, maxGpuColumnSizeBytes, - useChunkedReader, useSubPageChunked, metrics, partitionSchema, + useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes, metrics, partitionSchema, numThreads, maxNumFileProcessed, false, // ignoreMissingFiles false, // ignoreCorruptFiles @@ -428,9 +428,9 @@ protected FilePartitionReaderBase createRapidsReader(PartitionedFile[] pFiles, return new MultiFileParquetPartitionReader(conf, pFiles, JavaConverters.asScalaBuffer(clippedBlocks).toSeq(), - caseSensitive, parquetDebugDumpPrefix, parquetDebugDumpAlways, useChunkedReader, - useSubPageChunked, + caseSensitive, parquetDebugDumpPrefix, parquetDebugDumpAlways, maxBatchSizeRows, maxBatchSizeBytes, targetBatchSizeBytes, maxGpuColumnSizeBytes, + useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes, metrics, partitionSchema, numThreads, false, // ignoreMissingFiles false, // ignoreCorruptFiles diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuSparkScan.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuSparkScan.java index 3def72b537b..b7544675d41 100644 --- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuSparkScan.java +++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuSparkScan.java @@ -283,7 +283,7 @@ private static class MultiFileBatchReader super(task.task, task.table(), task.expectedSchema(), task.isCaseSensitive(), task.getConfiguration(), task.getMaxBatchSizeRows(), task.getMaxBatchSizeBytes(), task.getTargetBatchSizeBytes(), task.getMaxGpuColumnSizeBytes(), task.useChunkedReader(), - task.useSubPageChunked(), + task.maxChunkedReaderMemoryUsageSizeBytes(), task.getParquetDebugDumpPrefix(), task.getParquetDebugDumpAlways(), task.getNumThreads(), task.getMaxNumFileProcessed(), useMultiThread, ff, metrics, queryUsesInputFile); @@ -294,7 +294,7 @@ private static class BatchReader extends GpuBatchDataReader implements Partition BatchReader(ReadTask task, scala.collection.immutable.Map metrics) { super(task.task, task.table(), task.expectedSchema(), task.isCaseSensitive(), task.getConfiguration(), task.getMaxBatchSizeRows(), task.getMaxBatchSizeBytes(), - task.getTargetBatchSizeBytes(), task.useChunkedReader(), task.useSubPageChunked(), + task.getTargetBatchSizeBytes(), task.useChunkedReader(), task.maxChunkedReaderMemoryUsageSizeBytes(), task.getParquetDebugDumpPrefix(), task.getParquetDebugDumpAlways(), metrics); } } @@ -305,7 +305,7 @@ static class ReadTask implements InputPartition, Serializable { private final String expectedSchemaString; private final boolean caseSensitive; private final boolean useChunkedReader; - private final boolean useSubPageChunked; + private final long maxChunkedReaderMemoryUsageSizeBytes; private final Broadcast confBroadcast; private final int maxBatchSizeRows; private final long maxBatchSizeBytes; @@ -343,7 +343,12 @@ static class ReadTask implements InputPartition, Serializable { this.numThreads = rapidsConf.multiThreadReadNumThreads(); this.maxNumFileProcessed = rapidsConf.maxNumParquetFilesParallel(); this.useChunkedReader = rapidsConf.chunkedReaderEnabled(); - this.useSubPageChunked = rapidsConf.chunkedSubPageReaderEnabled(); + if(rapidsConf.limitChunkedReaderMemoryUsage()) { + double limitRatio = rapidsConf.chunkedReaderMemoryUsageRatio(); + this.maxChunkedReaderMemoryUsageSizeBytes = (long)(limitRatio * this.targetBatchSizeBytes); + } else { + this.maxChunkedReaderMemoryUsageSizeBytes = 0L; + } } @Override @@ -410,8 +415,8 @@ public boolean useChunkedReader() { return useChunkedReader; } - public boolean useSubPageChunked() { - return useSubPageChunked; + public long maxChunkedReaderMemoryUsageSizeBytes() { + return maxChunkedReaderMemoryUsageSizeBytes; } } } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuGetJsonObject.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuGetJsonObject.scala index 16950368ab0..882c8fec13d 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuGetJsonObject.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuGetJsonObject.scala @@ -19,15 +19,19 @@ package com.nvidia.spark.rapids import scala.util.parsing.combinator.RegexParsers import ai.rapids.cudf.{ColumnVector, GetJsonObjectOptions, Scalar} -import com.nvidia.spark.rapids.Arm.withResource +import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource} +import com.nvidia.spark.rapids.jni.JSONUtils +import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, GetJsonObject} +import org.apache.spark.sql.rapids.test.CpuGetJsonObject import org.apache.spark.sql.types.{DataType, StringType} import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.SerializableConfiguration // Copied from Apache Spark org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala -private[this] sealed trait PathInstruction -private[this] object PathInstruction { +sealed trait PathInstruction +object PathInstruction { case object Subscript extends PathInstruction case object Wildcard extends PathInstruction case object Key extends PathInstruction @@ -35,7 +39,7 @@ private[this] object PathInstruction { case class Named(name: String) extends PathInstruction } -private[this] object JsonPathParser extends RegexParsers { +object JsonPathParser extends RegexParsers { import PathInstruction._ def root: Parser[Char] = '$' @@ -83,12 +87,41 @@ private[this] object JsonPathParser extends RegexParsers { } } + def fallbackCheck(instructions: List[PathInstruction]): Boolean = { + // JNI kernel has a limit of 16 nested nodes, fallback to CPU if we exceed that + instructions.length > 16 + } + + def unzipInstruction(instruction: PathInstruction): (String, String, Long) = { + instruction match { + case Subscript => ("subscript", "", -1) + case Key => ("key", "", -1) + case Wildcard => ("wildcard", "", -1) + case Index(index) => ("index", "", index) + case Named(name) => ("named", name, -1) + } + } + + def convertToJniObject(instructions: List[PathInstruction]): + Array[JSONUtils.PathInstructionJni] = { + instructions.map { instruction => + val (tpe, name, index) = unzipInstruction(instruction) + new JSONUtils.PathInstructionJni(tpe match { + case "subscript" => JSONUtils.PathInstructionType.SUBSCRIPT + case "key" => JSONUtils.PathInstructionType.KEY + case "wildcard" => JSONUtils.PathInstructionType.WILDCARD + case "index" => JSONUtils.PathInstructionType.INDEX + case "named" => JSONUtils.PathInstructionType.NAMED + }, name, index) + }.toArray + } + def containsUnsupportedPath(instructions: List[PathInstruction]): Boolean = { // Gpu GetJsonObject is not supported if JSON path contains wildcard [*] // see https://github.com/NVIDIA/spark-rapids/issues/10216 instructions.exists { case Wildcard => true - case Named(name) if name == "*" => true + case Named("*") => true case _ => false } } @@ -114,21 +147,154 @@ class GpuGetJsonObjectMeta( override def tagExprForGpu(): Unit = { val lit = GpuOverrides.extractLit(expr.right) - lit.map { l => + lit.foreach { l => val instructions = JsonPathParser.parse(l.value.asInstanceOf[UTF8String].toString) - if (instructions.exists(JsonPathParser.containsUnsupportedPath)) { - willNotWorkOnGpu("get_json_object on GPU does not support wildcard [*] in path") + if (!conf.isLegacyGetJsonObjectEnabled) { + if (instructions.exists(JsonPathParser.fallbackCheck)) { + willNotWorkOnGpu("get_json_object on GPU does not support more than 16 nested paths") + } + } else { + if (instructions.exists(JsonPathParser.containsUnsupportedPath)) { + willNotWorkOnGpu("get_json_object on GPU does not support wildcard [*] in path") + } + } + } + } + + override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression = { + if (!conf.isLegacyGetJsonObjectEnabled) { + GpuGetJsonObject(lhs, rhs, + conf.testGetJsonObjectSavePath, conf.testGetJsonObjectSaveRows) + } else { + GpuGetJsonObjectLegacy(lhs, rhs, + conf.testGetJsonObjectSavePath, conf.testGetJsonObjectSaveRows) + } + } +} + +case class GpuGetJsonObject( + json: Expression, + path: Expression, + savePathForVerify: Option[String], + saveRowsForVerify: Int) + extends GpuBinaryExpressionArgsAnyScalar + with ExpectsInputTypes { + // Get a Hadoop conf for the JSON Object + val hconf: Option[SerializableConfiguration] = savePathForVerify.map { _ => + val spark = SparkSession.active + new SerializableConfiguration(spark.sparkContext.hadoopConfiguration) + } + val seed = System.nanoTime() + + override def left: Expression = json + override def right: Expression = path + override def dataType: DataType = StringType + override def inputTypes: Seq[DataType] = Seq(StringType, StringType) + override def nullable: Boolean = true + override def prettyName: String = "get_json_object" + + private var cachedInstructions: + Option[Option[List[PathInstruction]]] = None + + def parseJsonPath(path: GpuScalar): Option[List[PathInstruction]] = { + if (path.isValid) { + val pathStr = path.getValue.toString() + JsonPathParser.parse(pathStr) + } else { + None + } + } + + /** + * get_json_object(any_json, '$.*') always return null. + * '$.*' will be parsed to be a single `Wildcard`. + * Check whether has separated `Wildcard` + * + * @param instructions query path instructions + * @return true if has separated `Wildcard`, false otherwise. + */ + private def hasSeparateWildcard(instructions: Option[List[PathInstruction]]): Boolean = { + import PathInstruction._ + def hasSeparate(ins: List[PathInstruction], idx: Int): Boolean = { + if (idx == 0) { + ins(0) match { + case Wildcard => true + case _ => false + } + } else { + (ins(idx - 1), ins(idx)) match { + case (Key, Wildcard) => false + case (Subscript, Wildcard) => false + case (_, Wildcard) => true + case _ => false + } + } + } + + if (instructions.isEmpty) { + false + } else { + val list = instructions.get + list.indices.exists { idx => hasSeparate(list, idx) } + } + } + + override def doColumnar(lhs: GpuColumnVector, rhs: GpuScalar): ColumnVector = { + val fromGpu = cachedInstructions.getOrElse { + val pathInstructions = parseJsonPath(rhs) + val checkedPathInstructions = if (hasSeparateWildcard(pathInstructions)) { + // If has separate wildcard path, should return all nulls + None + } else { + pathInstructions + } + cachedInstructions = Some(checkedPathInstructions) + checkedPathInstructions + } match { + case Some(instructions) => { + val jniInstructions = JsonPathParser.convertToJniObject(instructions) + JSONUtils.getJsonObject(lhs.getBase, jniInstructions) + } + case None => GpuColumnVector.columnVectorFromNull(lhs.getRowCount.toInt, StringType) + } + + // Below is only for testing purpose + savePathForVerify.foreach { debugPath => + closeOnExcept(fromGpu) { _ => + val path = rhs.getValue.asInstanceOf[UTF8String] + withResource(CpuGetJsonObject.getJsonObjectOnCpu(lhs, path)) { fromCpu => + // verify result, save diffs if have + CpuGetJsonObject.verify(isLegacy = false, seed, + lhs.getBase, path, fromGpu, fromCpu, debugPath, saveRowsForVerify, + hconf.get.value) + } } } + + fromGpu } - override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression = - GpuGetJsonObject(lhs, rhs) + override def doColumnar(numRows: Int, lhs: GpuScalar, rhs: GpuScalar): ColumnVector = { + withResource(GpuColumnVector.from(lhs, numRows, left.dataType)) { expandedLhs => + doColumnar(expandedLhs, rhs) + } + } } -case class GpuGetJsonObject(json: Expression, path: Expression) +case class GpuGetJsonObjectLegacy( + json: Expression, + path: Expression, + savePathForVerify: Option[String], + saveRowsForVerify: Int) extends GpuBinaryExpressionArgsAnyScalar with ExpectsInputTypes { + // Get a Hadoop conf for the JSON Object + val hconf: Option[SerializableConfiguration] = savePathForVerify.map { _ => + val spark = SparkSession.active + new SerializableConfiguration(spark.sparkContext.hadoopConfiguration) + } + val seed = System.nanoTime() + override def left: Expression = json override def right: Expression = path override def dataType: DataType = StringType @@ -148,7 +314,7 @@ case class GpuGetJsonObject(json: Expression, path: Expression) } override def doColumnar(lhs: GpuColumnVector, rhs: GpuScalar): ColumnVector = { - cachedNormalizedPath.getOrElse { + val fromGpu = cachedNormalizedPath.getOrElse { val normalizedPath: Option[String] = normalizeJsonPath(rhs) cachedNormalizedPath = Some(normalizedPath) normalizedPath @@ -160,6 +326,21 @@ case class GpuGetJsonObject(json: Expression, path: Expression) } case None => GpuColumnVector.columnVectorFromNull(lhs.getRowCount.toInt, StringType) } + + // Below is only for testing purpose + savePathForVerify.foreach { debugPath => + closeOnExcept(fromGpu) { _ => + val path = rhs.getValue.asInstanceOf[UTF8String] + withResource(CpuGetJsonObject.getJsonObjectOnCpu(lhs, path)) { fromCpu => + // verify result, save diffs if have + CpuGetJsonObject.verify(isLegacy = true, seed, + lhs.getBase, path, fromGpu, fromCpu, debugPath, saveRowsForVerify, + hconf.get.value) + } + } + } + + fromGpu } override def doColumnar(numRows: Int, lhs: GpuScalar, rhs: GpuScalar): ColumnVector = { diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuJsonTuple.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuJsonTuple.scala index ae539820331..7b4ae4c2a43 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuJsonTuple.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuJsonTuple.scala @@ -16,10 +16,10 @@ package com.nvidia.spark.rapids -import ai.rapids.cudf.{GetJsonObjectOptions,Scalar} import com.nvidia.spark.rapids.Arm._ import com.nvidia.spark.rapids.RapidsPluginImplicits._ import com.nvidia.spark.rapids.RmmRapidsRetryIterator.{splitSpillableInHalfByRows, withRetry} +import com.nvidia.spark.rapids.jni.JSONUtils import com.nvidia.spark.rapids.shims.ShimExpression import org.apache.spark.sql.catalyst.analysis.TypeCheckResult @@ -59,26 +59,28 @@ case class GpuJsonTuple(children: Seq[Expression]) extends GpuGenerator val json = inputBatch.column(generatorOffset).asInstanceOf[GpuColumnVector].getBase val schema = Array.fill[DataType](fieldExpressions.length)(StringType) - val fieldScalars = fieldExpressions.safeMap { field => + val fieldInstructions = fieldExpressions.map { field => withResourceIfAllowed(field.columnarEvalAny(inputBatch)) { case fieldScalar: GpuScalar => - // Specials characters like '.', '[', ']' are not supported in field names - Scalar.fromString("$." + fieldScalar.getBase.getJavaString) + val fieldString = fieldScalar.getBase.getJavaString + val key = new JSONUtils.PathInstructionJni( + JSONUtils.PathInstructionType.KEY, "", -1) + val named = new JSONUtils.PathInstructionJni( + JSONUtils.PathInstructionType.NAMED, fieldString, -1) + Array(key, named) case _ => throw new UnsupportedOperationException(s"JSON field must be a scalar value") } } - withResource(fieldScalars) { fieldScalars => - withResource(fieldScalars.safeMap(field => json.getJSONObject(field, - GetJsonObjectOptions.builder().allowSingleQuotes(true).build()))) { resultCols => - val generatorCols = resultCols.safeMap(_.incRefCount).zip(schema).safeMap { - case (col, dataType) => GpuColumnVector.from(col, dataType) - } - val nonGeneratorCols = (0 until generatorOffset).safeMap { i => - inputBatch.column(i).asInstanceOf[GpuColumnVector].incRefCount - } - new ColumnarBatch((nonGeneratorCols ++ generatorCols).toArray, inputBatch.numRows) + withResource(fieldInstructions.safeMap(field => JSONUtils.getJsonObject(json, field))) { + resultCols => + val generatorCols = resultCols.safeMap(_.incRefCount).zip(schema).safeMap { + case (col, dataType) => GpuColumnVector.from(col, dataType) } + val nonGeneratorCols = (0 until generatorOffset).safeMap { i => + inputBatch.column(i).asInstanceOf[GpuColumnVector].incRefCount + } + new ColumnarBatch((nonGeneratorCols ++ generatorCols).toArray, inputBatch.numRows) } } } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala index f64ed1097b0..faca6d8e3c7 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala @@ -18,7 +18,7 @@ package com.nvidia.spark.rapids import java.io.{File, IOException} import java.net.{URI, URISyntaxException} -import java.util.concurrent.{Callable, ConcurrentLinkedQueue, ExecutorCompletionService, Future, LinkedBlockingQueue, ThreadPoolExecutor, TimeUnit} +import java.util.concurrent.{Callable, ConcurrentLinkedQueue, ExecutorCompletionService, Future, ThreadPoolExecutor, TimeUnit} import scala.annotation.tailrec import scala.collection.JavaConverters._ @@ -123,20 +123,11 @@ object MultiFileReaderThreadPool extends Logging { private def initThreadPool( maxThreads: Int, - keepAliveSeconds: Long = 60): ThreadPoolExecutor = synchronized { + keepAliveSeconds: Int = 60): ThreadPoolExecutor = synchronized { if (threadPool.isEmpty) { - val threadFactory = new ThreadFactoryBuilder() - .setNameFormat(s"multithreaded file reader worker-%d") - .setDaemon(true) - .build() - - val threadPoolExecutor = new ThreadPoolExecutor( - maxThreads, // corePoolSize: max number of threads to create before queuing the tasks - maxThreads, // maximumPoolSize: because we use LinkedBlockingDeque, this is not used - keepAliveSeconds, - TimeUnit.SECONDS, - new LinkedBlockingQueue[Runnable], - threadFactory) + val threadPoolExecutor = + TrampolineUtil.newDaemonCachedThreadPool("multithreaded file reader worker", maxThreads, + keepAliveSeconds) threadPoolExecutor.allowCoreThreadTimeOut(true) logDebug(s"Using $maxThreads for the multithreaded reader thread pool") threadPool = Some(threadPoolExecutor) @@ -214,8 +205,14 @@ abstract class MultiFilePartitionReaderFactoryBase( protected val maxReadBatchSizeRows: Int = rapidsConf.maxReadBatchSizeRows protected val maxReadBatchSizeBytes: Long = rapidsConf.maxReadBatchSizeBytes protected val targetBatchSizeBytes: Long = rapidsConf.gpuTargetBatchSizeBytes - protected val subPageChunked: Boolean = rapidsConf.chunkedSubPageReaderEnabled protected val maxGpuColumnSizeBytes: Long = rapidsConf.maxGpuColumnSizeBytes + protected val useChunkedReader: Boolean = rapidsConf.chunkedReaderEnabled + protected val maxChunkedReaderMemoryUsageSizeBytes: Long = + if(rapidsConf.limitChunkedReaderMemoryUsage) { + (rapidsConf.chunkedReaderMemoryUsageRatio * targetBatchSizeBytes).toLong + } else { + 0L + } private val allCloudSchemes = rapidsConf.getCloudSchemes.toSet override def createReader(partition: InputPartition): PartitionReader[InternalRow] = { diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 8f1a720b92b..27bdb018284 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -2737,6 +2737,25 @@ object GpuOverrides extends Logging { ) } }), + expr[ArrayFilter]( + "Filter an input array using a given predicate", + ExprChecks.projectOnly(TypeSig.ARRAY.nested(TypeSig.commonCudfTypes + + TypeSig.DECIMAL_128 + TypeSig.NULL + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.MAP), + TypeSig.ARRAY.nested(TypeSig.all), + Seq( + ParamCheck("argument", + TypeSig.ARRAY.nested(TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + TypeSig.NULL + + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.MAP), + TypeSig.ARRAY.nested(TypeSig.all)), + ParamCheck("function", TypeSig.BOOLEAN, TypeSig.BOOLEAN))), + (in, conf, p, r) => new ExprMeta[ArrayFilter](in, conf, p, r) { + override def convertToGpu(): GpuExpression = { + GpuArrayFilter( + childExprs.head.convertToGpu(), + childExprs(1).convertToGpu() + ) + } + }), // TODO: fix the signature https://github.com/NVIDIA/spark-rapids/issues/5327 expr[ArraysZip]( "Returns a merged array of structs in which the N-th struct contains" + @@ -3257,7 +3276,7 @@ object GpuOverrides extends Logging { ExprChecks.projectOnly(TypeSig.STRING, TypeSig.STRING, Seq(ParamCheck("url", TypeSig.STRING, TypeSig.STRING), ParamCheck("partToExtract", TypeSig.lit(TypeEnum.STRING).withPsNote( - TypeEnum.STRING, "only support partToExtract = PROTOCOL | HOST | QUERY"), + TypeEnum.STRING, "only support partToExtract = PROTOCOL | HOST | QUERY | PATH"), TypeSig.STRING)), // Should really be an OptionalParam Some(RepeatingParamCheck("key", TypeSig.STRING, TypeSig.STRING))), @@ -3651,9 +3670,8 @@ object GpuOverrides extends Logging { ExprChecks.projectOnly( TypeSig.STRING, TypeSig.STRING, Seq(ParamCheck("json", TypeSig.STRING, TypeSig.STRING), ParamCheck("path", TypeSig.lit(TypeEnum.STRING), TypeSig.STRING))), - (a, conf, p, r) => new GpuGetJsonObjectMeta(a, conf, p, r) - ).disabledByDefault("escape sequences are not processed correctly, the input is not " + - "validated, and the output is not normalized the same as Spark"), + (a, conf, p, r) => new GpuGetJsonObjectMeta(a, conf, p, r)).disabledByDefault( + "Experimental feature that could be unstable or have performance issues."), expr[JsonToStructs]( "Returns a struct value with the given `jsonStr` and `schema`", ExprChecks.projectOnly( @@ -3690,8 +3708,7 @@ object GpuOverrides extends Logging { override def convertToGpu(child: Expression): GpuExpression = // GPU implementation currently does not support duplicated json key names in input - GpuJsonToStructs(a.schema, a.options, child, conf.isJsonMixedTypesAsStringEnabled, - a.timeZoneId) + GpuJsonToStructs(a.schema, a.options, child, a.timeZoneId) }).disabledByDefault("it is currently in beta and undergoes continuous enhancements."+ " Please consult the "+ "[compatibility documentation](../compatibility.md#json-supporting-types)"+ @@ -3731,24 +3748,10 @@ object GpuOverrides extends Logging { // potential performance problems. willNotWorkOnGpu("JsonTuple with large number of fields is not supported on GPU") } - // If any field argument contains special characters as follows, fall back to CPU. - (a.children.tail).map { fieldExpr => - extractLit(fieldExpr).foreach { field => - if (field.value != null) { - val fieldStr = field.value.asInstanceOf[UTF8String].toString - val specialCharacters = List(".", "[", "]", "{", "}", "\\", "\'", "\"") - if (specialCharacters.exists(fieldStr.contains(_))) { - willNotWorkOnGpu(s"""JsonTuple with special character in field \"$fieldStr\" """ - + "is not supported on GPU") - } - } - } - } } override def convertToGpu(): GpuExpression = GpuJsonTuple(childExprs.map(_.convertToGpu())) } - ).disabledByDefault("JsonTuple on the GPU does not support all of the normalization " + - "that the CPU supports."), + ).disabledByDefault("Experimental feature that could be unstable or have performance issues."), expr[org.apache.spark.sql.execution.ScalarSubquery]( "Subquery that will return only one row and one column", ExprChecks.projectOnly( @@ -3884,8 +3887,7 @@ object GpuOverrides extends Logging { a.dataFilters, conf.maxReadBatchSizeRows, conf.maxReadBatchSizeBytes, - conf.maxGpuColumnSizeBytes, - conf.isJsonMixedTypesAsStringEnabled) + conf.maxGpuColumnSizeBytes) })).map(r => (r.getClassFor.asSubclass(classOf[Scan]), r)).toMap val scans: Map[Class[_ <: Scan], ScanRule[_ <: Scan]] = diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala index 4f140f27bf3..7e75940869b 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala @@ -38,7 +38,7 @@ import com.nvidia.spark.rapids.RapidsConf.ParquetFooterReaderType import com.nvidia.spark.rapids.RapidsPluginImplicits._ import com.nvidia.spark.rapids.filecache.FileCache import com.nvidia.spark.rapids.jni.{DateTimeRebase, ParquetFooter} -import com.nvidia.spark.rapids.shims.{ColumnDefaultValuesShims, GpuParquetCrypto, GpuTypeShims, ParquetLegacyNanoAsLongShims, ParquetSchemaClipShims, ParquetStringPredShims, ReaderUtils, ShimFilePartitionReaderFactory, SparkShimImpl} +import com.nvidia.spark.rapids.shims.{ColumnDefaultValuesShims, GpuParquetCrypto, GpuTypeShims, ParquetLegacyNanoAsLongShims, ParquetSchemaClipShims, ParquetStringPredShims, ShimFilePartitionReaderFactory, SparkShimImpl} import org.apache.commons.io.IOUtils import org.apache.commons.io.output.{CountingOutputStream, NullOutputStream} import org.apache.hadoop.conf.Configuration @@ -683,12 +683,10 @@ private case class GpuParquetFileFilterHandler( conf.unset(encryptConf) } } - val fileHadoopConf = - ReaderUtils.getHadoopConfForReaderThread(new Path(file.filePath.toString), conf) val footer: ParquetMetadata = try { footerReader match { case ParquetFooterReaderType.NATIVE => - val serialized = withResource(readAndFilterFooter(file, fileHadoopConf, + val serialized = withResource(readAndFilterFooter(file, conf, readDataSchema, filePath)) { tableFooter => if (tableFooter.getNumColumns <= 0) { // Special case because java parquet reader does not like having 0 columns. @@ -712,7 +710,7 @@ private case class GpuParquetFileFilterHandler( } } case _ => - readAndSimpleFilterFooter(file, fileHadoopConf, filePath) + readAndSimpleFilterFooter(file, conf, filePath) } } catch { case e if GpuParquetCrypto.isColumnarCryptoException(e) => @@ -739,9 +737,9 @@ private case class GpuParquetFileFilterHandler( val blocks = if (pushedFilters.isDefined) { withResource(new NvtxRange("getBlocksWithFilter", NvtxColor.CYAN)) { _ => // Use the ParquetFileReader to perform dictionary-level filtering - ParquetInputFormat.setFilterPredicate(fileHadoopConf, pushedFilters.get) + ParquetInputFormat.setFilterPredicate(conf, pushedFilters.get) //noinspection ScalaDeprecation - withResource(new ParquetFileReader(fileHadoopConf, footer.getFileMetaData, filePath, + withResource(new ParquetFileReader(conf, footer.getFileMetaData, filePath, footer.getBlocks, Collections.emptyList[ColumnDescriptor])) { parquetReader => parquetReader.getRowGroups } @@ -1084,8 +1082,6 @@ case class GpuParquetMultiFilePartitionReaderFactory( rapidsConf, alluxioPathReplacementMap) { private val isCaseSensitive = sqlConf.caseSensitiveAnalysis - private val useChunkedReader = rapidsConf.chunkedReaderEnabled - private val useSubPageChunked = rapidsConf.chunkedSubPageReaderEnabled private val debugDumpPrefix = rapidsConf.parquetDebugDumpPrefix private val debugDumpAlways = rapidsConf.parquetDebugDumpAlways private val numThreads = rapidsConf.multiThreadReadNumThreads @@ -1151,10 +1147,11 @@ case class GpuParquetMultiFilePartitionReaderFactory( val combineConf = CombineConf(combineThresholdSize, combineWaitTime) new MultiFileCloudParquetPartitionReader(conf, files, filterFunc, isCaseSensitive, debugDumpPrefix, debugDumpAlways, maxReadBatchSizeRows, maxReadBatchSizeBytes, - targetBatchSizeBytes, maxGpuColumnSizeBytes, useChunkedReader, subPageChunked, metrics, - partitionSchema, numThreads, maxNumFileProcessed, ignoreMissingFiles, ignoreCorruptFiles, - readUseFieldId, alluxioPathReplacementMap.getOrElse(Map.empty), alluxioReplacementTaskTime, - queryUsesInputFile, keepReadsInOrderFromConf, combineConf) + targetBatchSizeBytes, maxGpuColumnSizeBytes, + useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes, + metrics, partitionSchema, numThreads, maxNumFileProcessed, ignoreMissingFiles, + ignoreCorruptFiles, readUseFieldId, alluxioPathReplacementMap.getOrElse(Map.empty), + alluxioReplacementTaskTime, queryUsesInputFile, keepReadsInOrderFromConf, combineConf) } private def filterBlocksForCoalescingReader( @@ -1266,9 +1263,10 @@ case class GpuParquetMultiFilePartitionReaderFactory( _ += TimeUnit.NANOSECONDS.toMillis(filterTime) } new MultiFileParquetPartitionReader(conf, files, clippedBlocks.toSeq, isCaseSensitive, - debugDumpPrefix, debugDumpAlways, useChunkedReader, useSubPageChunked, maxReadBatchSizeRows, - maxReadBatchSizeBytes, targetBatchSizeBytes, maxGpuColumnSizeBytes, metrics, partitionSchema, - numThreads, ignoreMissingFiles, ignoreCorruptFiles, readUseFieldId) + debugDumpPrefix, debugDumpAlways, maxReadBatchSizeRows, maxReadBatchSizeBytes, + targetBatchSizeBytes, maxGpuColumnSizeBytes, + useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes, + metrics, partitionSchema, numThreads, ignoreMissingFiles, ignoreCorruptFiles, readUseFieldId) } /** @@ -1302,7 +1300,12 @@ case class GpuParquetPartitionReaderFactory( private val targetSizeBytes = rapidsConf.gpuTargetBatchSizeBytes private val maxGpuColumnSizeBytes = rapidsConf.maxGpuColumnSizeBytes private val useChunkedReader = rapidsConf.chunkedReaderEnabled - private val useSubPageChunked = rapidsConf.chunkedSubPageReaderEnabled + private val maxChunkedReaderMemoryUsageSizeBytes = + if(rapidsConf.limitChunkedReaderMemoryUsage) { + (rapidsConf.chunkedReaderMemoryUsageRatio * targetSizeBytes).toLong + } else { + 0L + } private val filterHandler = GpuParquetFileFilterHandler(sqlConf, metrics) private val readUseFieldId = ParquetSchemaClipShims.useFieldId(sqlConf) private val footerReadType = GpuParquetScan.footerReaderHeuristic( @@ -1333,7 +1336,8 @@ case class GpuParquetPartitionReaderFactory( new ParquetPartitionReader(conf, file, singleFileInfo.filePath, singleFileInfo.blocks, singleFileInfo.schema, isCaseSensitive, readDataSchema, debugDumpPrefix, debugDumpAlways, maxReadBatchSizeRows, maxReadBatchSizeBytes, targetSizeBytes, - useChunkedReader, useSubPageChunked, metrics, singleFileInfo.dateRebaseMode, + useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes, + metrics, singleFileInfo.dateRebaseMode, singleFileInfo.timestampRebaseMode, singleFileInfo.hasInt96Timestamps, readUseFieldId) } } @@ -1545,14 +1549,13 @@ trait ParquetPartitionReaderBase extends Logging with ScanWithMetrics val filePathString: String = filePath.toString val remoteItems = new ArrayBuffer[CopyRange](blocks.length) var totalBytesToCopy = 0L - val fileHadoopConf = ReaderUtils.getHadoopConfForReaderThread(filePath, conf) withResource(new ArrayBuffer[LocalCopy](blocks.length)) { localItems => blocks.foreach { block => block.getColumns.asScala.foreach { column => val columnSize = column.getTotalSize val outputOffset = totalBytesToCopy + startPos val channel = FileCache.get.getDataRangeChannel(filePathString, - column.getStartingPos, columnSize, fileHadoopConf) + column.getStartingPos, columnSize, conf) if (channel.isDefined) { localItems += LocalCopy(channel.get, columnSize, outputOffset) } else { @@ -1583,14 +1586,13 @@ trait ParquetPartitionReaderBase extends Logging with ScanWithMetrics return 0L } - val fileHadoopConf = ReaderUtils.getHadoopConfForReaderThread(filePath, conf) val coalescedRanges = coalesceReads(remoteCopies) val totalBytesCopied = PerfIO.readToHostMemory( - fileHadoopConf, out.buffer, filePath.toUri, + conf, out.buffer, filePath.toUri, coalescedRanges.map(r => IntRangeWithOffset(r.offset, r.length, r.outputOffset)) ).getOrElse { - withResource(filePath.getFileSystem(fileHadoopConf).open(filePath)) { in => + withResource(filePath.getFileSystem(conf).open(filePath)) { in => val copyBuffer: Array[Byte] = new Array[Byte](copyBufferSize) coalescedRanges.foldLeft(0L) { (acc, blockCopy) => acc + copyDataRange(blockCopy, in, out, copyBuffer) @@ -1602,7 +1604,7 @@ trait ParquetPartitionReaderBase extends Logging with ScanWithMetrics metrics.getOrElse(GpuMetric.FILECACHE_DATA_RANGE_MISSES, NoopMetric) += 1 metrics.getOrElse(GpuMetric.FILECACHE_DATA_RANGE_MISSES_SIZE, NoopMetric) += range.length val cacheToken = FileCache.get.startDataRangeCache( - filePathString, range.offset, range.length, fileHadoopConf) + filePathString, range.offset, range.length, conf) // If we get a filecache token then we can complete the caching by providing the data. // If we do not get a token then we should not cache this data. cacheToken.foreach { token => @@ -1851,6 +1853,11 @@ private case class ParquetSingleDataBlockMeta( * @param debugDumpAlways whether to debug dump always or only on errors * @param maxReadBatchSizeRows soft limit on the maximum number of rows the reader reads per batch * @param maxReadBatchSizeBytes soft limit on the maximum number of bytes the reader reads per batch + * @param targetBatchSizeBytes the target size of a batch + * @param maxGpuColumnSizeBytes the maximum size of a GPU column + * @param useChunkedReader whether to read Parquet by chunks or read all at once + * @param maxChunkedReaderMemoryUsageSizeBytes soft limit on the number of bytes of internal memory + * usage that the reader will use * @param execMetrics metrics * @param partitionSchema Schema of partitions. * @param numThreads the size of the threadpool @@ -1864,12 +1871,12 @@ class MultiFileParquetPartitionReader( override val isSchemaCaseSensitive: Boolean, debugDumpPrefix: Option[String], debugDumpAlways: Boolean, - useChunkedReader: Boolean, - useSubPageChunked: Boolean, maxReadBatchSizeRows: Integer, maxReadBatchSizeBytes: Long, targetBatchSizeBytes: Long, maxGpuColumnSizeBytes: Long, + useChunkedReader: Boolean, + maxChunkedReaderMemoryUsageSizeBytes: Long, override val execMetrics: Map[String, GpuMetric], partitionSchema: StructType, numThreads: Int, @@ -1975,7 +1982,7 @@ class MultiFileParquetPartitionReader( // About to start using the GPU GpuSemaphore.acquireIfNecessary(TaskContext.get()) - MakeParquetTableProducer(useChunkedReader, useSubPageChunked, + MakeParquetTableProducer(useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes, conf, currentTargetBatchSize, parseOpts, dataBuffer, 0, dataSize, metrics, extraInfo.dateRebaseMode, extraInfo.timestampRebaseMode, @@ -2033,6 +2040,11 @@ class MultiFileParquetPartitionReader( * @param debugDumpAlways whether to debug dump always or only on errors * @param maxReadBatchSizeRows soft limit on the maximum number of rows the reader reads per batch * @param maxReadBatchSizeBytes soft limit on the maximum number of bytes the reader reads per batch + * @param targetBatchSizeBytes the target size of the batch + * @param maxGpuColumnSizeBytes the maximum size of a GPU column + * @param useChunkedReader whether to read Parquet by chunks or read all at once + * @param maxChunkedReaderMemoryUsageSizeBytes soft limit on the number of bytes of internal memory + * usage that the reader will use * @param execMetrics metrics * @param partitionSchema Schema of partitions. * @param numThreads the size of the threadpool @@ -2060,7 +2072,7 @@ class MultiFileCloudParquetPartitionReader( targetBatchSizeBytes: Long, maxGpuColumnSizeBytes: Long, useChunkedReader: Boolean, - subPageChunked: Boolean, + maxChunkedReaderMemoryUsageSizeBytes: Long, override val execMetrics: Map[String, GpuMetric], partitionSchema: StructType, numThreads: Int, @@ -2549,7 +2561,8 @@ class MultiFileCloudParquetPartitionReader( // The MakeParquetTableProducer will close the input buffer, and that would be bad // because we don't want to close it until we know that we are done with it hostBuffer.incRefCount() - val tableReader = MakeParquetTableProducer(useChunkedReader, subPageChunked, + val tableReader = MakeParquetTableProducer(useChunkedReader, + maxChunkedReaderMemoryUsageSizeBytes, conf, targetBatchSizeBytes, parseOpts, hostBuffer, 0, dataSize, metrics, @@ -2581,7 +2594,7 @@ class MultiFileCloudParquetPartitionReader( object MakeParquetTableProducer extends Logging { def apply( useChunkedReader: Boolean, - useSubPageChunked: Boolean, + maxChunkedReaderMemoryUsageSizeBytes: Long, conf: Configuration, chunkSizeByteLimit: Long, opts: ParquetOptions, @@ -2601,12 +2614,8 @@ object MakeParquetTableProducer extends Logging { debugDumpAlways: Boolean ): GpuDataProducer[Table] = { if (useChunkedReader) { - val passReadLimit = if (useSubPageChunked) { - 4 * chunkSizeByteLimit - } else { - 0L - } - ParquetTableReader(conf, chunkSizeByteLimit, passReadLimit, opts, buffer, offset, + ParquetTableReader(conf, chunkSizeByteLimit, maxChunkedReaderMemoryUsageSizeBytes, + opts, buffer, offset, len, metrics, dateRebaseMode, timestampRebaseMode, hasInt96Timestamps, isSchemaCaseSensitive, useFieldId, readDataSchema, clippedParquetSchema, splits, debugDumpPrefix, debugDumpAlways) @@ -2655,7 +2664,7 @@ object MakeParquetTableProducer extends Logging { case class ParquetTableReader( conf: Configuration, chunkSizeByteLimit: Long, - passReadLimit: Long, + maxChunkedReaderMemoryUsageSizeBytes: Long, opts: ParquetOptions, buffer: HostMemoryBuffer, offset: Long, @@ -2671,8 +2680,8 @@ case class ParquetTableReader( splits: Array[PartitionedFile], debugDumpPrefix: Option[String], debugDumpAlways: Boolean) extends GpuDataProducer[Table] with Logging { - private[this] val reader = new ParquetChunkedReader(chunkSizeByteLimit, passReadLimit, opts, - buffer, offset, len) + private[this] val reader = new ParquetChunkedReader(chunkSizeByteLimit, + maxChunkedReaderMemoryUsageSizeBytes, opts, buffer, offset, len) private[this] lazy val splitsString = splits.mkString("; ") @@ -2750,7 +2759,7 @@ class ParquetPartitionReader( maxReadBatchSizeBytes: Long, targetBatchSizeBytes: Long, useChunkedReader: Boolean, - useSubPageChunked: Boolean, + maxChunkedReaderMemoryUsageSizeBytes: Long, override val execMetrics: Map[String, GpuMetric], dateRebaseMode: DateTimeRebaseMode, timestampRebaseMode: DateTimeRebaseMode, @@ -2818,7 +2827,8 @@ class ParquetPartitionReader( // Inc the ref count because MakeParquetTableProducer will try to close the dataBuffer // which we don't want until we know that the retry is done with it. dataBuffer.incRefCount() - val producer = MakeParquetTableProducer(useChunkedReader, useSubPageChunked, conf, + val producer = MakeParquetTableProducer(useChunkedReader, + maxChunkedReaderMemoryUsageSizeBytes, conf, targetBatchSizeBytes, parseOpts, dataBuffer, 0, dataSize, metrics, dateRebaseMode, timestampRebaseMode, diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index aabbacccfe3..d0e483c00cb 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -27,6 +27,7 @@ import scala.util.Try import ai.rapids.cudf.{Cuda, CudaException, CudaFatalException, CudfException, MemoryCleaner} import com.nvidia.spark.rapids.RapidsConf.AllowMultipleJars +import com.nvidia.spark.rapids.RapidsPluginUtils.buildInfoEvent import com.nvidia.spark.rapids.filecache.{FileCache, FileCacheLocalityManager, FileCacheLocalityMsg} import com.nvidia.spark.rapids.jni.GpuTimeZoneDB import com.nvidia.spark.rapids.python.PythonWorkerSemaphore @@ -35,6 +36,7 @@ import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.spark.{ExceptionFailure, SparkConf, SparkContext, TaskFailedReason} import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin} import org.apache.spark.internal.Logging +import org.apache.spark.scheduler.SparkListenerEvent import org.apache.spark.serializer.{JavaSerializer, KryoSerializer} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution._ @@ -75,18 +77,23 @@ object RapidsPluginUtils extends Logging { private val SPARK_MASTER = "spark.master" private val SPARK_RAPIDS_REPO_URL = "https://github.com/NVIDIA/spark-rapids" + lazy val buildInfoEvent = SparkRapidsBuildInfoEvent( + sparkRapidsBuildInfo = loadProps(PLUGIN_PROPS_FILENAME), + sparkRapidsJniBuildInfo = loadProps(JNI_PROPS_FILENAME), + cudfBuildInfo = loadProps(CUDF_PROPS_FILENAME), + sparkRapidsPrivateBuildInfo =loadProps(PRIVATE_PROPS_FILENAME) + ) + + + { - val pluginProps = loadProps(PLUGIN_PROPS_FILENAME) - logInfo(s"RAPIDS Accelerator build: $pluginProps") - val jniProps = loadProps(JNI_PROPS_FILENAME) - logInfo(s"RAPIDS Accelerator JNI build: $jniProps") - val cudfProps = loadProps(CUDF_PROPS_FILENAME) - logInfo(s"cudf build: $cudfProps") - val privateProps = loadProps(PRIVATE_PROPS_FILENAME) - logInfo(s"RAPIDS Accelerator Private ${privateProps}") - val pluginVersion = pluginProps.getProperty("version", "UNKNOWN") - val cudfVersion = cudfProps.getProperty("version", "UNKNOWN") - val privateRev = privateProps.getProperty("revision", "UNKNOWN") + logInfo(s"RAPIDS Accelerator build: ${buildInfoEvent.sparkRapidsBuildInfo}") + logInfo(s"RAPIDS Accelerator JNI build: ${buildInfoEvent.sparkRapidsJniBuildInfo}") + logInfo(s"cudf build: ${buildInfoEvent.cudfBuildInfo}") + logInfo(s"RAPIDS Accelerator Private ${buildInfoEvent.sparkRapidsPrivateBuildInfo}") + val pluginVersion = buildInfoEvent.sparkRapidsBuildInfo.getOrElse("version", "UNKNOWN") + val cudfVersion = buildInfoEvent.cudfBuildInfo.getOrElse("version", "UNKNOWN") + val privateRev = buildInfoEvent.sparkRapidsPrivateBuildInfo.getOrElse("revision", "UNKNOWN") logWarning(s"RAPIDS Accelerator $pluginVersion using cudf ${cudfVersion}, " + s"private revision ${privateRev}") } @@ -126,11 +133,11 @@ object RapidsPluginUtils extends Logging { val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet.toSeq.filter { url => { val urlPath = url.toString - // Filter out submodule jars, e.g. rapids-4-spark-aggregator_2.12-24.04.0-spark341.jar, + // Filter out submodule jars, e.g. rapids-4-spark-aggregator_2.12-24.06.0-spark341.jar, // and files stored under subdirs of '!/', e.g. - // rapids-4-spark_2.12-24.04.0-cuda11.jar!/spark330/rapids4spark-version-info.properties + // rapids-4-spark_2.12-24.06.0-cuda11.jar!/spark330/rapids4spark-version-info.properties // We only want to find the main jar, e.g. - // rapids-4-spark_2.12-24.04.0-cuda11.jar!/rapids4spark-version-info.properties + // rapids-4-spark_2.12-24.06.0-cuda11.jar!/rapids4spark-version-info.properties !urlPath.contains("rapids-4-spark-") && urlPath.endsWith("!/" + propName) } } @@ -156,18 +163,17 @@ object RapidsPluginUtils extends Logging { lazy val msg = s"Multiple $jarName jars found in the classpath:\n$rapidsJarsVersMsg" + s"Please make sure there is only one $jarName jar in the classpath. " - require(revisionMap.size > 0, s"Could not find any $jarName jars in the classpath") - + // revisionMap.size could be 0 when debugging in IDE, so allow it in that case conf.allowMultipleJars match { case AllowMultipleJars.ALWAYS => - if (revisionMap.size != 1 || revisionMap.values.exists(_.size != 1)) { + if (revisionMap.size > 1 || revisionMap.values.exists(_.size != 1)) { logWarning(msg) } case AllowMultipleJars.SAME_REVISION => val recommended = "If it is impossible to fix the classpath you can suppress the " + s"error by setting ${RapidsConf.ALLOW_MULTIPLE_JARS.key} to ALWAYS, but this " + s"can cause unpredictable behavior as the plugin may pick up the wrong jar." - require(revisionMap.size == 1, msg + recommended) + require(revisionMap.size <= 1, msg + recommended) if (revisionMap.values.exists(_.size != 1)) { logWarning(msg + recommended) } @@ -176,7 +182,7 @@ object RapidsPluginUtils extends Logging { s"error by setting ${RapidsConf.ALLOW_MULTIPLE_JARS.key} to SAME_REVISION or ALWAYS." + " But setting it to ALWAYS can cause unpredictable behavior as the plugin may pick " + "up the wrong jar." - require(revisionMap.size == 1 && revisionMap.values.forall(_.size == 1), msg + recommended) + require(revisionMap.size <= 1 && revisionMap.values.forall(_.size == 1), msg + recommended) } } @@ -293,7 +299,7 @@ object RapidsPluginUtils extends Logging { } } - def loadProps(resourceName: String): Properties = { + def loadProps(resourceName: String): Map[String, String] = { val classLoader = RapidsPluginUtils.getClass.getClassLoader val resource = classLoader.getResourceAsStream(resourceName) if (resource == null) { @@ -301,7 +307,7 @@ object RapidsPluginUtils extends Logging { } val props = new Properties props.load(resource) - props + props.asScala.toMap } private def loadExtensions[T <: AnyRef](extClass: Class[T], classes: Seq[String]): Seq[T] = { @@ -351,10 +357,8 @@ object RapidsPluginUtils extends Logging { /** * Extracts supported GPU architectures from the given properties file */ - private def getSupportedGpuArchitectures(propFileName: String): Set[Int] = { - val props = RapidsPluginUtils.loadProps(propFileName) - Option(props.getProperty("gpu_architectures")) - .getOrElse(throw new RuntimeException(s"GPU architectures not found in $propFileName")) + private def getSupportedGpuArchitectures(props: Map[String, String], origin: String): Set[Int] = { + props.getOrElse("gpu_architectures", sys.error(s"GPU architectures not found in $origin")) .split(";") .map(_.toInt) .toSet @@ -366,8 +370,9 @@ object RapidsPluginUtils extends Logging { */ def validateGpuArchitecture(): Unit = { val gpuArch = Cuda.getComputeCapabilityMajor * 10 + Cuda.getComputeCapabilityMinor - validateGpuArchitectureInternal(gpuArch, getSupportedGpuArchitectures(JNI_PROPS_FILENAME), - getSupportedGpuArchitectures(CUDF_PROPS_FILENAME)) + validateGpuArchitectureInternal(gpuArch, + getSupportedGpuArchitectures(buildInfoEvent.sparkRapidsJniBuildInfo, JNI_PROPS_FILENAME), + getSupportedGpuArchitectures(buildInfoEvent.cudfBuildInfo, CUDF_PROPS_FILENAME)) } /** @@ -406,6 +411,14 @@ object RapidsPluginUtils extends Logging { } } + +case class SparkRapidsBuildInfoEvent( + sparkRapidsBuildInfo: Map[String, String], + sparkRapidsJniBuildInfo: Map[String, String], + cudfBuildInfo: Map[String, String], + sparkRapidsPrivateBuildInfo: Map[String, String] +) extends SparkListenerEvent + /** * The Spark driver plugin provided by the RAPIDS Spark plugin. */ @@ -460,6 +473,7 @@ class RapidsDriverPlugin extends DriverPlugin with Logging { logDebug("Loading extra driver plugins: " + s"${extraDriverPlugins.map(_.getClass.getName).mkString(",")}") extraDriverPlugins.foreach(_.init(sc, pluginContext)) + TrampolineUtil.postEvent(sc, buildInfoEvent) conf.rapidsConfMap } @@ -578,16 +592,14 @@ class RapidsExecutorPlugin extends ExecutorPlugin with Logging { private def checkCudfVersion(conf: RapidsConf): Unit = { try { - val pluginProps = RapidsPluginUtils.loadProps(RapidsPluginUtils.PLUGIN_PROPS_FILENAME) - val expectedCudfVersion = Option(pluginProps.getProperty("cudf_version")).getOrElse { + val expectedCudfVersion = buildInfoEvent.sparkRapidsBuildInfo.getOrElse("cudf_version", throw CudfVersionMismatchException("Could not find cudf version in " + - RapidsPluginUtils.PLUGIN_PROPS_FILENAME) - } - val cudfProps = RapidsPluginUtils.loadProps(RapidsPluginUtils.CUDF_PROPS_FILENAME) - val cudfVersion = Option(cudfProps.getProperty("version")).getOrElse { + RapidsPluginUtils.PLUGIN_PROPS_FILENAME)) + + val cudfVersion = buildInfoEvent.cudfBuildInfo.getOrElse("version", throw CudfVersionMismatchException("Could not find cudf version in " + - RapidsPluginUtils.CUDF_PROPS_FILENAME) - } + RapidsPluginUtils.CUDF_PROPS_FILENAME)) + // compare cudf version in the classpath with the cudf version expected by plugin if (!RapidsExecutorPlugin.cudfVersionSatisfied(expectedCudfVersion, cudfVersion)) { throw CudfVersionMismatchException(s"Found cudf version $cudfVersion, RAPIDS Accelerator " + diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index 2971d9dae51..1ec31c003a4 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -566,6 +566,40 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern") s"Batch size must be positive and not exceed ${Integer.MAX_VALUE} bytes.") .createWithDefault(1 * 1024 * 1024 * 1024) // 1 GiB is the default + val CHUNKED_READER = conf("spark.rapids.sql.reader.chunked") + .doc("Enable a chunked reader where possible. A chunked reader allows " + + "reading highly compressed data that could not be read otherwise, but at the expense " + + "of more GPU memory, and in some cases more GPU computation. "+ + "Currently this only supports ORC and Parquet formats.") + .booleanConf + .createWithDefault(true) + + val CHUNKED_READER_MEMORY_USAGE_RATIO = conf("spark.rapids.sql.reader.chunked.memoryUsageRatio") + .doc("A value to compute soft limit on the internal memory usage of the chunked reader " + + "(if being used). Such limit is calculated as the multiplication of this value and " + + s"'${GPU_BATCH_SIZE_BYTES.key}'.") + .internal() + .startupOnly() + .doubleConf + .checkValue(v => v > 0, "The ratio value must be positive.") + .createWithDefault(4) + + val LIMIT_CHUNKED_READER_MEMORY_USAGE = conf("spark.rapids.sql.reader.chunked.limitMemoryUsage") + .doc("Enable a soft limit on the internal memory usage of the chunked reader " + + "(if being used). Such limit is calculated as the multiplication of " + + s"'${GPU_BATCH_SIZE_BYTES.key}' and '${CHUNKED_READER_MEMORY_USAGE_RATIO.key}'." + + "For example, if batchSizeBytes is set to 1GB and memoryUsageRatio is 4, " + + "the chunked reader will try to keep its memory usage under 4GB.") + .booleanConf + .createOptional + + val CHUNKED_SUBPAGE_READER = conf("spark.rapids.sql.reader.chunked.subPage") + .doc("Enable a chunked reader where possible for reading data that is smaller " + + "than the typical row group/page limit. Currently deprecated and replaced by " + + s"'${LIMIT_CHUNKED_READER_MEMORY_USAGE}'.") + .booleanConf + .createOptional + val MAX_GPU_COLUMN_SIZE_BYTES = conf("spark.rapids.sql.columnSizeBytes") .doc("Limit the max number of bytes for a GPU column. It is same as the cudf " + "row count limit of a column. It is used by the multi-file readers. " + @@ -584,19 +618,6 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern") .integerConf .createWithDefault(Integer.MAX_VALUE) - val CHUNKED_READER = conf("spark.rapids.sql.reader.chunked") - .doc("Enable a chunked reader where possible. A chunked reader allows " + - "reading highly compressed data that could not be read otherwise, but at the expense " + - "of more GPU memory, and in some cases more GPU computation.") - .booleanConf - .createWithDefault(true) - - val CHUNKED_SUBPAGE_READER = conf("spark.rapids.sql.reader.chunked.subPage") - .doc("Enable a chunked reader where possible for reading data that is smaller " + - "than the typical row group/page limit. Currently this only works for parquet.") - .booleanConf - .createWithDefault(true) - val MAX_READER_BATCH_SIZE_BYTES = conf("spark.rapids.sql.reader.batchSizeBytes") .doc("Soft limit on the maximum number of bytes the reader reads per batch. " + "The readers will read chunks of data until this limit is met or exceeded. " + @@ -902,6 +923,15 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern") .booleanConf .createWithDefault(true) + val ENABLE_GETJSONOBJECT_LEGACY = conf("spark.rapids.sql.getJsonObject.legacy.enabled") + .doc("When set to true, the get_json_object function will use the legacy implementation " + + "on the GPU. The legacy implementation is faster than the current implementation, but " + + "it has several incompatibilities and bugs, including no input validation, escapes are " + + "not properly processed for Strings, and non-string output is not normalized.") + .internal() + .booleanConf + .createWithDefault(false) + // FILE FORMATS val MULTITHREAD_READ_NUM_THREADS = conf("spark.rapids.sql.multiThreadedRead.numThreads") .doc("The maximum number of threads on each executor to use for reading small " + @@ -1230,12 +1260,6 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern") .booleanConf .createWithDefault(true) - val ENABLE_READ_JSON_MIXED_TYPES_AS_STRING = - conf("spark.rapids.sql.json.read.mixedTypesAsString.enabled") - .doc("JSON reading is not 100% compatible when reading mixed types as string.") - .booleanConf - .createWithDefault(false) - val ENABLE_AVRO = conf("spark.rapids.sql.format.avro.enabled") .doc("When set to true enables all avro input and output acceleration. " + "(only input is currently supported anyways)") @@ -2148,6 +2172,22 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression. .booleanConf .createWithDefault(false) + val TEST_GET_JSON_OBJECT_SAVE_PATH = conf("spark.rapids.sql.expression.GetJsonObject.debugPath") + .doc("Only for tests: specify a directory to save CSV debug output for get_json_object " + + "if the output differs from the CPU version. Multiple files may be saved") + .internal() + .stringConf + .createOptional + + val TEST_GET_JSON_OBJECT_SAVE_ROWS = + conf("spark.rapids.sql.expression.GetJsonObject.debugSaveRows") + .doc("Only for tests: when a debugPath is provided this is the number " + + "of rows that is saved per file. There may be multiple files if there " + + "are multiple tasks or multiple batches within a task") + .internal() + .integerConf + .createWithDefault(1024) + private def printSectionHeader(category: String): Unit = println(s"\n### $category") @@ -2183,7 +2223,7 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression. |On startup use: `--conf [conf key]=[conf value]`. For example: | |``` - |${SPARK_HOME}/bin/spark-shell --jars rapids-4-spark_2.12-24.04.0-SNAPSHOT-cuda11.jar \ + |${SPARK_HOME}/bin/spark-shell --jars rapids-4-spark_2.12-24.06.0-SNAPSHOT-cuda11.jar \ |--conf spark.plugins=com.nvidia.spark.SQLPlugin \ |--conf spark.rapids.sql.concurrentGpuTasks=2 |``` @@ -2506,7 +2546,21 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val chunkedReaderEnabled: Boolean = get(CHUNKED_READER) - lazy val chunkedSubPageReaderEnabled: Boolean = get(CHUNKED_SUBPAGE_READER) + lazy val limitChunkedReaderMemoryUsage: Boolean = { + val hasLimit = get(LIMIT_CHUNKED_READER_MEMORY_USAGE) + val deprecatedConf = get(CHUNKED_SUBPAGE_READER) + if (deprecatedConf.isDefined) { + logWarning(s"'${CHUNKED_SUBPAGE_READER.key}' is deprecated and is replaced by " + + s"'${LIMIT_CHUNKED_READER_MEMORY_USAGE}'.") + if (hasLimit.isDefined && hasLimit.get != deprecatedConf.get) { + throw new IllegalStateException(s"Both '${CHUNKED_SUBPAGE_READER.key}' and " + + s"'${LIMIT_CHUNKED_READER_MEMORY_USAGE.key}' are set but using different values.") + } + } + hasLimit.getOrElse(deprecatedConf.getOrElse(true)) + } + + lazy val chunkedReaderMemoryUsageRatio: Double = get(CHUNKED_READER_MEMORY_USAGE_RATIO) lazy val maxReadBatchSizeRows: Int = get(MAX_READER_BATCH_SIZE_ROWS) @@ -2570,6 +2624,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val isTieredProjectEnabled: Boolean = get(ENABLE_TIERED_PROJECT) + lazy val isLegacyGetJsonObjectEnabled: Boolean = get(ENABLE_GETJSONOBJECT_LEGACY) + lazy val isExpandPreprojectEnabled: Boolean = get(ENABLE_EXPAND_PREPROJECT) lazy val multiThreadReadNumThreads: Int = { @@ -2675,8 +2731,6 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val isJsonDecimalReadEnabled: Boolean = get(ENABLE_READ_JSON_DECIMALS) - lazy val isJsonMixedTypesAsStringEnabled: Boolean = get(ENABLE_READ_JSON_MIXED_TYPES_AS_STRING) - lazy val isAvroEnabled: Boolean = get(ENABLE_AVRO) lazy val isAvroReadEnabled: Boolean = get(ENABLE_AVRO_READ) @@ -2915,6 +2969,10 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val skipGpuArchCheck: Boolean = get(SKIP_GPU_ARCH_CHECK) + lazy val testGetJsonObjectSavePath: Option[String] = get(TEST_GET_JSON_OBJECT_SAVE_PATH) + + lazy val testGetJsonObjectSaveRows: Int = get(TEST_GET_JSON_OBJECT_SAVE_ROWS) + private val optimizerDefaults = Map( // this is not accurate because CPU projections do have a cost due to appending values // to each row that is produced, but this needs to be a really small number because @@ -2985,4 +3043,4 @@ case class OomInjectionConf( skipCount: Int, withSplit: Boolean, oomInjectionFilter: OomInjectionType -) \ No newline at end of file +) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/higherOrderFunctions.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/higherOrderFunctions.scala index 2093b52256e..3a4e3eeb321 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/higherOrderFunctions.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/higherOrderFunctions.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ import scala.collection.mutable import ai.rapids.cudf import ai.rapids.cudf.{DType, Table} -import com.nvidia.spark.rapids.Arm.withResource +import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource} import com.nvidia.spark.rapids.RapidsPluginImplicits.ReallyAGpuExpression import com.nvidia.spark.rapids.shims.ShimExpression @@ -275,10 +275,13 @@ trait GpuArrayTransformBase extends GpuSimpleHigherOrderFunction { } /* - * Post-process the column view of the array after applying the function parameter + * Post-process the column view of the array after applying the function parameter. + * @param lambdaTransformedCV the results of the lambda expression running + * @param arg the original input array from the expression. */ protected def transformListColumnView( - lambdaTransformedCV: cudf.ColumnView): GpuColumnVector + lambdaTransformedCV: cudf.ColumnView, + arg: cudf.ColumnView): GpuColumnVector override def columnarEval(batch: ColumnarBatch): GpuColumnVector = { withResource(argument.columnarEval(batch)) { arg => @@ -287,13 +290,14 @@ trait GpuArrayTransformBase extends GpuSimpleHigherOrderFunction { } withResource(dataCol) { _ => val cv = GpuListUtils.replaceListDataColumnAsView(arg.getBase, dataCol.getBase) - withResource(cv)(transformListColumnView) + withResource(cv) { cv => + transformListColumnView(cv, arg.getBase) + } } } } } - case class GpuArrayTransform( argument: Expression, function: Expression, @@ -311,12 +315,11 @@ case class GpuArrayTransform( } override protected def transformListColumnView( - lambdaTransformedCV: cudf.ColumnView): GpuColumnVector = { + lambdaTransformedCV: cudf.ColumnView, arg: cudf.ColumnView): GpuColumnVector = { GpuColumnVector.from(lambdaTransformedCV.copyToColumnVector(), dataType) } } - case class GpuArrayExists( argument: Expression, function: Expression, @@ -402,7 +405,8 @@ case class GpuArrayExists( } override protected def transformListColumnView( - lambdaTransformedCV: cudf.ColumnView + lambdaTransformedCV: cudf.ColumnView, + arg: cudf.ColumnView ): GpuColumnVector = { withResource(exists(lambdaTransformedCV)) { existsCV => // exists is false for empty arrays @@ -415,6 +419,29 @@ case class GpuArrayExists( } +case class GpuArrayFilter( + argument: Expression, + function: Expression, + isBound: Boolean = false, + boundIntermediate: Seq[GpuExpression] = Seq.empty) extends GpuArrayTransformBase { + + override def dataType: DataType = argument.dataType + + override def nodeName: String = "filter" + + override def bind(input: AttributeSeq): GpuExpression = { + val (boundFunc, boundArg, boundIntermediate) = bindLambdaFunc(input) + GpuArrayFilter(boundArg, boundFunc,isBound = true, boundIntermediate) + } + + override protected def transformListColumnView(lambdaTransformedCV: cudf.ColumnView, + arg: cudf.ColumnView): GpuColumnVector = { + closeOnExcept(arg.applyBooleanMask(lambdaTransformedCV)) { ret => + GpuColumnVector.from(ret, dataType) + } + } +} + trait GpuMapSimpleHigherOrderFunction extends GpuSimpleHigherOrderFunction with GpuBind { protected def isBound: Boolean diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala index e5b34f82973..2024fb5891d 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala @@ -246,8 +246,7 @@ case class GpuJsonScan( dataFilters: Seq[Expression], maxReaderBatchSizeRows: Integer, maxReaderBatchSizeBytes: Long, - maxGpuColumnSizeBytes: Long, - mixedTypesAsStringEnabled: Boolean) + maxGpuColumnSizeBytes: Long) extends TextBasedFileScan(sparkSession, options) with GpuScan { private lazy val parsedOptions: JSONOptions = new JSONOptions( @@ -270,8 +269,7 @@ case class GpuJsonScan( GpuJsonPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf, dataSchema, readDataSchema, readPartitionSchema, parsedOptions, maxReaderBatchSizeRows, - maxReaderBatchSizeBytes, maxGpuColumnSizeBytes, metrics, options.asScala.toMap, - mixedTypesAsStringEnabled) + maxReaderBatchSizeBytes, maxGpuColumnSizeBytes, metrics, options.asScala.toMap) } override def withInputFile(): GpuScan = this @@ -289,8 +287,7 @@ case class GpuJsonPartitionReaderFactory( maxReaderBatchSizeBytes: Long, maxGpuColumnSizeBytes: Long, metrics: Map[String, GpuMetric], - @transient params: Map[String, String], - mixedTypesAsStringEnabled: Boolean) extends ShimFilePartitionReaderFactory(params) { + @transient params: Map[String, String]) extends ShimFilePartitionReaderFactory(params) { override def buildReader(partitionedFile: PartitionedFile): PartitionReader[InternalRow] = { throw new IllegalStateException("ROW BASED PARSING IS NOT SUPPORTED ON THE GPU...") @@ -300,7 +297,7 @@ case class GpuJsonPartitionReaderFactory( val conf = broadcastedConf.value.value val reader = new PartitionReaderWithBytesRead(new JsonPartitionReader(conf, partFile, dataSchema, readDataSchema, parsedOptions, maxReaderBatchSizeRows, maxReaderBatchSizeBytes, - metrics, mixedTypesAsStringEnabled)) + metrics)) ColumnarPartitionReaderWithPartitionValues.newReader(partFile, reader, partitionSchema, maxGpuColumnSizeBytes) } @@ -346,14 +343,13 @@ class JsonPartitionReader( parsedOptions: JSONOptions, maxRowsPerChunk: Integer, maxBytesPerChunk: Long, - execMetrics: Map[String, GpuMetric], - enableMixedTypesAsString: Boolean) + execMetrics: Map[String, GpuMetric]) extends GpuTextBasedPartitionReader[HostLineBufferer, HostLineBuffererFactory.type](conf, partFile, dataSchema, readDataSchema, parsedOptions.lineSeparatorInRead, maxRowsPerChunk, maxBytesPerChunk, execMetrics, HostLineBuffererFactory) { def buildJsonOptions(parsedOptions: JSONOptions): cudf.JSONOptions = - GpuJsonReadCommon.cudfJsonOptions(parsedOptions, enableMixedTypesAsString) + GpuJsonReadCommon.cudfJsonOptions(parsedOptions) /** * Read the host buffer to GPU table diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuReadJsonFileFormat.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuReadJsonFileFormat.scala index 2c31ccacfcb..b7b7d63c3cb 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuReadJsonFileFormat.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuReadJsonFileFormat.scala @@ -68,8 +68,7 @@ class GpuReadJsonFileFormat extends JsonFileFormat with GpuReadFileFormatWithMet rapidsConf.maxReadBatchSizeBytes, rapidsConf.maxGpuColumnSizeBytes, metrics, - options, - rapidsConf.isJsonMixedTypesAsStringEnabled) + options) PartitionReaderIterator.buildReader(factory) } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonReadCommon.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonReadCommon.scala index cf908fbb557..9acc9063750 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonReadCommon.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonReadCommon.scala @@ -362,11 +362,10 @@ object GpuJsonReadCommon { } } - def cudfJsonOptions(options: JSONOptions, - enableMixedTypes: Boolean): ai.rapids.cudf.JSONOptions = { + def cudfJsonOptions(options: JSONOptions): ai.rapids.cudf.JSONOptions = { ai.rapids.cudf.JSONOptions.builder() .withRecoverWithNull(true) - .withMixedTypesAsStrings(enableMixedTypes) + .withMixedTypesAsStrings(true) .withNormalizeWhitespace(true) .withKeepQuotes(true) .withNormalizeSingleQuotes(options.allowSingleQuotes) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala index cfd34770184..464941d4dbf 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala @@ -69,7 +69,6 @@ case class GpuJsonToStructs( schema: DataType, options: Map[String, String], child: Expression, - enableMixedTypesAsString: Boolean, timeZoneId: Option[String] = None) extends GpuUnaryExpression with TimeZoneAwareExpression with ExpectsInputTypes with NullIntolerant { @@ -155,7 +154,7 @@ case class GpuJsonToStructs( SQLConf.get.columnNameOfCorruptRecord) private lazy val jsonOptions = - GpuJsonReadCommon.cudfJsonOptions(parsedOptions, enableMixedTypesAsString) + GpuJsonReadCommon.cudfJsonOptions(parsedOptions) override protected def doColumnar(input: GpuColumnVector): cudf.ColumnVector = { schema match { diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala index 6a72c9e76c2..9a8f65ac821 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala @@ -40,7 +40,7 @@ object GpuParseUrl { def isSupportedPart(part: String): Boolean = { part match { - case PROTOCOL | HOST | QUERY => + case PROTOCOL | HOST | QUERY | PATH => true case _ => false @@ -67,9 +67,11 @@ case class GpuParseUrl(children: Seq[Expression]) ParseURI.parseURIHost(url.getBase) case QUERY => ParseURI.parseURIQuery(url.getBase) - case PATH | REF | FILE | AUTHORITY | USERINFO => + case PATH => + ParseURI.parseURIPath(url.getBase) + case REF | FILE | AUTHORITY | USERINFO => throw new UnsupportedOperationException(s"$this is not supported partToExtract=$part. " + - s"Only PROTOCOL, HOST and QUERY without a key are supported") + s"Only PROTOCOL, HOST, QUERY and PATH are supported") case _ => throw new IllegalArgumentException(s"Invalid partToExtract: $partToExtract") } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala index f02107e1458..36236b23936 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,7 +80,7 @@ object GpuMurmur3Hash { HashUtils.normalizeInput(cv).asInstanceOf[ColumnView] } withResource(normalized) { _ => - ColumnVector.spark32BitMurmurHash3(seed, normalized) + Hash.murmurHash32(seed, normalized) } } } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala index 80bfbf69c7e..8fee4144270 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ package org.apache.spark.sql.rapids import java.io.{File, FileInputStream} import java.util.Optional -import java.util.concurrent.{Callable, ConcurrentHashMap, ExecutionException, Executors, Future, LinkedBlockingQueue} +import java.util.concurrent.{Callable, ConcurrentHashMap, ExecutionException, Executors, Future, LinkedBlockingQueue, TimeUnit} import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} import scala.collection @@ -28,6 +28,7 @@ import scala.collection.mutable.ListBuffer import ai.rapids.cudf.{NvtxColor, NvtxRange} import com.nvidia.spark.rapids._ import com.nvidia.spark.rapids.Arm.withResource +import com.nvidia.spark.rapids.RapidsPluginImplicits._ import com.nvidia.spark.rapids.ScalableTaskCompletion.onTaskCompletion import com.nvidia.spark.rapids.format.TableMeta import com.nvidia.spark.rapids.shuffle.{RapidsShuffleRequestHandler, RapidsShuffleServer, RapidsShuffleTransport} @@ -644,40 +645,107 @@ abstract class RapidsShuffleThreadedReaderBase[K, C]( private val futures = new mutable.Queue[Future[Option[BlockState]]]() private val serializerInstance = serializer.newInstance() private val limiter = new BytesInFlightLimiter(maxBytesInFlight) - private val fallbackIter: Iterator[(Any, Any)] = if (numReaderThreads == 1) { - // this is the non-optimized case, where we add metrics to capture the blocked - // time and the deserialization time as part of the shuffle read time. - new Iterator[(Any, Any)]() { - private var currentIter: Iterator[(Any, Any)] = _ - override def hasNext: Boolean = fetcherIterator.hasNext || ( - currentIter != null && currentIter.hasNext) - - override def next(): (Any, Any) = { - val fetchTimeStart = System.nanoTime() - var readBlockedTime = 0L - if (currentIter == null || !currentIter.hasNext) { - val readBlockedStart = System.nanoTime() - val (_, stream) = fetcherIterator.next() - readBlockedTime = System.nanoTime() - readBlockedStart - currentIter = serializerInstance.deserializeStream(stream).asKeyValueIterator + private val fallbackIter: Iterator[(Any, Any)] with AutoCloseable = + if (numReaderThreads == 1) { + // this is the non-optimized case, where we add metrics to capture the blocked + // time and the deserialization time as part of the shuffle read time. + new Iterator[(Any, Any)]() with AutoCloseable { + private var currentIter: Iterator[(Any, Any)] = _ + private var currentStream: AutoCloseable = _ + override def hasNext: Boolean = fetcherIterator.hasNext || ( + currentIter != null && currentIter.hasNext) + + override def close(): Unit = { + if (currentStream != null) { + currentStream.close() + currentStream = null + } + } + + override def next(): (Any, Any) = { + val fetchTimeStart = System.nanoTime() + var readBlockedTime = 0L + if (currentIter == null || !currentIter.hasNext) { + val readBlockedStart = System.nanoTime() + val (_, stream) = fetcherIterator.next() + readBlockedTime = System.nanoTime() - readBlockedStart + // this is stored only to call close on it + currentStream = stream + currentIter = serializerInstance.deserializeStream(stream).asKeyValueIterator + } + val res = currentIter.next() + val fetchTime = System.nanoTime() - fetchTimeStart + deserializationTimeNs.foreach(_ += (fetchTime - readBlockedTime)) + shuffleReadTimeNs.foreach(_ += fetchTime) + res } - val res = currentIter.next() - val fetchTime = System.nanoTime() - fetchTimeStart - deserializationTimeNs.foreach(_ += (fetchTime - readBlockedTime)) - shuffleReadTimeNs.foreach(_ += fetchTime) - res } + } else { + null } - } else { - null - } - // Register a completion handler to close any queued cbs. + // Register a completion handler to close any queued cbs, + // pending iterators, or futures onTaskCompletion(context) { + // remove any materialized batches queued.forEach { case (_, cb:ColumnarBatch) => cb.close() } queued.clear() + + // close any materialized BlockState objects that are holding onto netty buffers or + // file descriptors + pendingIts.safeClose() + pendingIts.clear() + + // we could have futures left that are either done or in flight + // we need to cancel them and then close out any `BlockState` + // objects that were created (to remove netty buffers or file descriptors) + val futuresAndCancellations = futures.map { f => + val didCancel = f.cancel(true) + (f, didCancel) + } + + // if we weren't able to cancel, we are going to make a best attempt at getting the future + // and we are going to close it. The timeout is to prevent an (unlikely) infinite wait. + // If we do timeout then this handler is going to throw. + var failedFuture: Option[Throwable] = None + futuresAndCancellations + .filter { case (_, didCancel) => !didCancel } + .foreach { case (future, _) => + try { + // this could either be a successful future, or it finished with exception + // the case when it will fail with exception is when the underlying stream is closed + // as part of the shutdown process of the task. + future.get(10, TimeUnit.MILLISECONDS) + .foreach(_.close()) + } catch { + case t: Throwable => + // this is going to capture the first exception and not worry about others + // because we probably don't want to spam the UI or log with an exception per + // block we are fetching + if (failedFuture.isEmpty) { + failedFuture = Some(t) + } + } + } + futures.clear() + try { + if (fallbackIter != null) { + fallbackIter.close() + } + } catch { + case t: Throwable => + if (failedFuture.isEmpty) { + failedFuture = Some(t) + } else { + failedFuture.get.addSuppressed(t) + } + } finally { + failedFuture.foreach { e => + throw e + } + } } override def hasNext: Boolean = { @@ -689,9 +757,26 @@ abstract class RapidsShuffleThreadedReaderBase[K, C]( } } - case class BlockState(blockId: BlockId, batchIter: SerializedBatchIterator) - extends Iterator[(Any, Any)] { - private var nextBatchSize = batchIter.tryReadNextHeader().getOrElse(0L) + case class BlockState( + blockId: BlockId, + batchIter: SerializedBatchIterator, + origStream: AutoCloseable) + extends Iterator[(Any, Any)] with AutoCloseable { + + private var nextBatchSize = { + var success = false + try { + val res = batchIter.tryReadNextHeader().getOrElse(0L) + success = true + res + } finally { + if (!success) { + // we tried to read from a stream, but something happened + // lets close it + close() + } + } + } def getNextBatchSize: Long = nextBatchSize @@ -699,8 +784,23 @@ abstract class RapidsShuffleThreadedReaderBase[K, C]( override def next(): (Any, Any) = { val nextBatch = batchIter.next() - nextBatchSize = batchIter.tryReadNextHeader().getOrElse(0L) - nextBatch + var success = false + try { + nextBatchSize = batchIter.tryReadNextHeader().getOrElse(0L) + success = true + nextBatch + } finally { + if (!success) { + // the call to get a next header threw. We need to close `nextBatch`. + nextBatch match { + case (_, cb: ColumnarBatch) => cb.close() + } + } + } + } + + override def close(): Unit = { + origStream.close() // make sure we call this on error } } @@ -723,7 +823,7 @@ abstract class RapidsShuffleThreadedReaderBase[K, C]( waitTime += System.nanoTime() - waitTimeStart // if the future returned a block state, we have more work to do pending match { - case Some(leftOver@BlockState(_, _)) => + case Some(leftOver@BlockState(_, _, _)) => pendingIts.enqueue(leftOver) case _ => // done } @@ -771,19 +871,27 @@ abstract class RapidsShuffleThreadedReaderBase[K, C]( private def deserializeTask(blockState: BlockState): Unit = { val slot = RapidsShuffleInternalManagerBase.getNextReaderSlot futures += RapidsShuffleInternalManagerBase.queueReadTask(slot, () => { - var currentBatchSize = blockState.getNextBatchSize - var didFit = true - while (blockState.hasNext && didFit) { - val batch = blockState.next() - queued.offer(batch) - // peek at the next batch - currentBatchSize = blockState.getNextBatchSize - didFit = limiter.acquire(currentBatchSize) - } - if (!didFit) { - Some(blockState) - } else { - None // no further batches + var success = false + try { + var currentBatchSize = blockState.getNextBatchSize + var didFit = true + while (blockState.hasNext && didFit) { + val batch = blockState.next() + queued.offer(batch) + // peek at the next batch + currentBatchSize = blockState.getNextBatchSize + didFit = limiter.acquire(currentBatchSize) + } + success = true + if (!didFit) { + Some(blockState) + } else { + None // no further batches + } + } finally { + if (!success) { + blockState.close() + } } }) } @@ -830,7 +938,7 @@ abstract class RapidsShuffleThreadedReaderBase[K, C]( val deserStream = serializerInstance.deserializeStream(inputStream) val batchIter = deserStream.asKeyValueIterator.asInstanceOf[SerializedBatchIterator] - val blockState = BlockState(blockId, batchIter) + val blockState = BlockState(blockId, batchIter, inputStream) // get the next known batch size (there could be multiple batches) if (limiter.acquire(blockState.getNextBatchSize)) { // we can fit at least the first batch in this block diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala index be30e67ad7b..9e03dc26141 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,8 @@ package org.apache.spark.sql.rapids.execution +import java.util.concurrent.ThreadPoolExecutor + import org.json4s.JsonAST import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkMasterRegex, SparkUpgradeException, TaskContext} @@ -26,6 +28,7 @@ import org.apache.spark.internal.config import org.apache.spark.internal.config.EXECUTOR_ID import org.apache.spark.io.CompressionCodec import org.apache.spark.memory.TaskMemoryManager +import org.apache.spark.scheduler.SparkListenerEvent import org.apache.spark.security.CryptoStreamUtils import org.apache.spark.serializer.{JavaSerializer, SerializerManager} import org.apache.spark.sql.{AnalysisException, SparkSession} @@ -42,7 +45,7 @@ import org.apache.spark.util.{ShutdownHookManager, Utils} object TrampolineUtil { def doExecuteBroadcast[T](child: SparkPlan): Broadcast[T] = child.doExecuteBroadcast() - def isSupportedRelation(mode: BroadcastMode): Boolean = + def isSupportedRelation(mode: BroadcastMode): Boolean = ShimTrampolineUtil.isSupportedRelation(mode) def unionLikeMerge(left: DataType, right: DataType): DataType = @@ -217,4 +220,16 @@ object TrampolineUtil { 1 } } + + def newDaemonCachedThreadPool( + prefix: String, + maxThreadNumber: Int, + keepAliveSeconds: Int): ThreadPoolExecutor = { + org.apache.spark.util.ThreadUtils.newDaemonCachedThreadPool(prefix, maxThreadNumber, + keepAliveSeconds) + } + + def postEvent(sc: SparkContext, sparkEvent: SparkListenerEvent): Unit = { + sc.listenerBus.post(sparkEvent) + } } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuArrowEvalPythonExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuArrowEvalPythonExec.scala index 5e588cae7bd..182d7d1b6c6 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuArrowEvalPythonExec.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuArrowEvalPythonExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -71,13 +71,31 @@ class RebatchingRoundoffIterator( } } + private[this] def concatRowsOnlyBatch(cbs: ColumnarBatch*): ColumnarBatch = { + if (cbs.length == 1) { + return cbs.head + } + withResource(cbs) { _ => + val totalRowsNum = cbs.map(_.numRows().toLong).sum + if (totalRowsNum != totalRowsNum.toInt) { + throw new IllegalStateException("Cannot support a batch larger that MAX INT rows") + } + new ColumnarBatch(Array.empty, totalRowsNum.toInt) + } + } + private[this] def concat(l: ColumnarBatch, r: ColumnarBatch): ColumnarBatch = { - withResource(GpuColumnVector.from(l)) { lTable => - withResource(GpuColumnVector.from(r)) { rTable => - withResource(Table.concatenate(lTable, rTable)) { concatTable => - GpuColumnVector.from(concatTable, GpuColumnVector.extractTypes(l)) + assert(l.numCols() == r.numCols()) + if (l.numCols() > 0) { + withResource(GpuColumnVector.from(l)) { lTable => + withResource(GpuColumnVector.from(r)) { rTable => + withResource(Table.concatenate(lTable, rTable)) { concatTable => + GpuColumnVector.from(concatTable, GpuColumnVector.extractTypes(l)) + } } } + } else { // rows only batches + concatRowsOnlyBatch(l, r) } } @@ -91,7 +109,13 @@ class RebatchingRoundoffIterator( batches.append(SpillableColumnarBatch(got, SpillPriorities.ACTIVE_BATCHING_PRIORITY)) } val toConcat = batches.toArray.safeMap(_.getColumnarBatch()) - ConcatAndConsumeAll.buildNonEmptyBatch(toConcat, schema) + assert(toConcat.nonEmpty, "no batches to be concatenated") + // expect all batches have the same number of columns + if (toConcat.head.numCols() > 0) { + ConcatAndConsumeAll.buildNonEmptyBatch(toConcat, schema) + } else { + concatRowsOnlyBatch(toConcat: _*) + } } override def next(): ColumnarBatch = { @@ -148,8 +172,9 @@ class RebatchingRoundoffIterator( } val rc: Long = combined.numRows() + val numCols = combined.numCols() - if (rc % targetRoundoff == 0 || rc < targetRoundoff) { + if (rc % targetRoundoff == 0 || rc < targetRoundoff || numCols == 0) { return combined } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/test/cpuJsonExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/test/cpuJsonExpressions.scala new file mode 100644 index 00000000000..97d271b076f --- /dev/null +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/test/cpuJsonExpressions.scala @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.rapids.test + +import java.time.LocalDate +import java.time.format.DateTimeFormatter +import java.util.{Objects, UUID} + +import scala.collection.mutable +import scala.util.Random + +import ai.rapids.cudf.{ColumnVector, DType, HostColumnVector} +import com.nvidia.spark.rapids.Arm.withResource +import com.nvidia.spark.rapids.GpuColumnVector +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FSDataOutputStream, Path} + +import org.apache.spark.TaskContext +import org.apache.spark.sql.catalyst.expressions.{GetJsonObject, Literal} +import org.apache.spark.sql.types.StringType +import org.apache.spark.unsafe.types.UTF8String + +case class CsvWriterWrapper(filePath: String, conf: Configuration) extends AutoCloseable { + + // This is implemented as a method to make it easier to subclass + // ColumnarOutputWriter in the tests, and override this behavior. + private def getOutputStream: FSDataOutputStream = { + val hadoopPath = new Path(filePath) + val fs = hadoopPath.getFileSystem(conf) + fs.create(hadoopPath, false) + } + private var fileStream: FSDataOutputStream = getOutputStream + + override def close(): Unit = { + if (fileStream != null) { + // csv writer will close under file writer + fileStream.close() + fileStream = null + } + } + + def escape(str: String): String = { + if (str == null) { + "" + } else { + "\"" + str.replace("\n", "**LF**") + .replace("\r", "**CR**") + .replace("\"", "**QT**") + .replace(",", "**COMMA**")+ "\"" + } + } + + def writeRow(isLegacy: Boolean, row: Array[String]): Unit = { + val fullSeq = Seq(isLegacy.toString) ++ row + fileStream.write(fullSeq.map(escape).mkString("", ",", "\n").getBytes("UTF8")) + } +} + +/** + * Used to mask customer data to avoid customer data leakage. + */ +object GetJsonObjectMask { + + /** + * Used by mask data + */ + private def getRetainChars: Set[Char] = { + val s = mutable.Set[Char]() + for (i <- 0 to 32) { + s += i.toChar + } + val others = Array[Char]( + '{', '}', '[', ']', ',', ':', '"', '\'', + '\\', '/', 'b', 'f', 'n', 'r', 't', 'u', + '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'e', 'E', + 'u', 'A', 'a', 'B', 'b', 'C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', + 't', 'r', 'u', 'e', + 'f', 'a', 'l', 's', 'e', + 'n', 'u', 'l', 'l', + '$', '[', ']', '.', '*', '\'', '?' + ) + s ++= others + s.toSet + } + + private val RETAIN_CHARS = getRetainChars + + private def getCharsForKey: Set[Char] = { + val buf = new mutable.ArrayBuffer[Char] + for (c <- 'A' to 'Z') { + buf.append(c) + } + for (c <- 'a' to 'z') { + buf.append(c) + } + buf.toSet + } + + private val oneToOneMappingChars: Set[Char] = getCharsForKey -- RETAIN_CHARS + + /** + * Mask data. RAPIDS Accelerator should not dump the original Customer data. + * This dump tool only care about the functionality of get-json-object, the masked data should + * reproduce issues if original data/path can reproduce issues. The mask is to find a way to + * mask data and reproduce issues by using masked data. + * + * Special/retain chars, the following chars will not be masked: + * ASCII chars [0, 31] including space char + * { } [ ] , : " ' : JSON structure chars, should not mask + * \ : escape char, should not mask + * / b f n r t u : can follow \, should not mask + * - : used by number, should not mask + * 0-9 : used by number, it's special char, mask method refers to the following + * e E : used by number, e.g.: 1.0E-3, should not mask + * u A-F a-f : used by JSON string by unicode, e.g.: \u1e2F + * true : should not mask + * false : should not mask + * null : should not mask + * $ [ ] . * ' : used by path, should not mask + * ? : json path supports although Spark does not support, also add this because has no side + * effect + * Above special/retain chars should not be masked, or the JSON will be invalid. + * + * Mask logic: + * - Assume path only contains a-z, A-Z, '_' and [0-9] + * - For digits [1-9] create a random one to one mapping and replace, note do not touch '0' + * Because 00 number is invalid. + * - For above special/retain chars do not change + * - For char set [a-z, A-Z] minus special/retain chars like [eE1-9], create a random one to + * one mapping to mask data. e.g.: a -> b, b -> c, ..., z -> a + * - For other chars, e.g.: Chinese chars, map to a const char 's' + * + * @return masked data + */ + def mask( + seed: Long, + pathStr: String, + jsonStr: String, + cpuResult: String, + gpuResult: String): Array[String] = { + val random = new Random(seed) + // generate one to one map + // Note: path/json/result should use the same mask way + val randomInt = random.nextInt() + val charMap = getMap(randomInt) + val digitMap = getDigitMap(randomInt) + Array( + doMask(pathStr, RETAIN_CHARS, charMap, digitMap), + doMask(jsonStr, RETAIN_CHARS, charMap, digitMap), + doMask(cpuResult, RETAIN_CHARS, charMap, digitMap), + doMask(gpuResult, RETAIN_CHARS, charMap, digitMap) + ) + } + + private def getMap(seed: Int): Map[Char, Char] = { + val random = new Random(seed) + val charsFrom = oneToOneMappingChars.toList + val charsTo = random.shuffle(oneToOneMappingChars.toList) + val map = mutable.Map[Char, Char]() + for( i <- charsFrom.indices) { + map(charsFrom(i)) = charsTo(i) + } + map.toMap + } + + private def getDigitMap(seed: Int): Map[Char, Char] = { + val random = new Random(seed) + val digits = '1' to '9' + val from = digits.toList + val to = random.shuffle(digits.toList) + val map = mutable.Map[Char, Char]() + for (i <- from.indices) { + map(from(i)) = to(i) + } + map.toMap + } + + + /** + * Mask chars + * @param originStr origin json/path/result string + * @param retainChars retain chars, should not be masked + * @param oneToOneMap char to char map for masking + * @return masked string + */ + private def doMask( + originStr: String, + retainChars: Set[Char], + oneToOneMap: Map[Char, Char], + digitMap: Map[Char, Char]): String = { + if (originStr != null) { + val buf = new StringBuffer(originStr.length) + var idx = 0 + while (idx < originStr.length) { + val originChar = originStr(idx) + idx += 1 + if (originChar >= '1' && originChar <= '9') { + // digits need to one to one map + val toDigit = digitMap(originChar) + buf.append(toDigit) + } else { + // not in [1-9] + if (oneToOneMappingChars.contains(originChar)) { + // chars need one to one map + val toChar = oneToOneMap(originChar) + buf.append(toChar) + } else { + if (!retainChars.contains(originChar)) { + // if it's not a retain char, replace to a const char 's' + buf.append('s') + } else { + // retain char, do not change + buf.append(originChar) + } + } + } + } + buf.toString + } else { + null + } + } +} + +object CpuGetJsonObject { + /** + * verify results from Cpu and Gpu, save diffs if have + * @param dataCv original JSON data + * @param path the path to extract JSON data + * @param fromGpuCv result from GPU + * @param fromCpuHCV result from CPU + * @param savePathForVerify save path if have diffs + * @param saveRowsForVerify max diff rows to save, Note: only take effective for current data + */ + def verify( + isLegacy: Boolean, + seed: Long, + dataCv: ColumnVector, + path: UTF8String, + fromGpuCv: ColumnVector, + fromCpuHCV: HostColumnVector, + savePathForVerify: String, + saveRowsForVerify: Int, + conf: Configuration): Unit = { + withResource(dataCv.copyToHost()) { dataHCV => + withResource(fromGpuCv.copyToHost()) { fromGpuHCV => + val tcId = TaskContext.get.taskAttemptId() + val date = DateTimeFormatter.ofPattern("yyyyMMdd").format(LocalDate.now()) + val uuid = UUID.randomUUID() + val savePath = s"$savePathForVerify/${date}_${tcId}_${uuid}.csv" + withResource(CsvWriterWrapper(savePath, conf)) { csvWriter => + val pathStr = if (path == null) "null" else path.toString + var currRow = 0 + var diffRowsNum = 0 + while (currRow < dataCv.getRowCount.toInt && + diffRowsNum < saveRowsForVerify + ) { + val str = dataHCV.getJavaString(currRow) + val cpuStr = if (fromCpuHCV.isNull(currRow)) null else fromCpuHCV.getJavaString(currRow) + val gpuStr = if (fromGpuHCV.isNull(currRow)) null else fromGpuHCV.getJavaString(currRow) + if (!Objects.equals(cpuStr, gpuStr)) { // if have diff + diffRowsNum += 1 + // mask customer data + val masked = GetJsonObjectMask.mask(seed, pathStr, str, cpuStr, gpuStr) + csvWriter.writeRow(isLegacy, masked) + } + currRow += 1 + } + } + } + } + } + + /** + * Run get-json-object on CPU + * @param dataCv original JSON data + * @param path path scalar + * @return CPU result of get-json-object + */ + def getJsonObjectOnCpu(dataCv: GpuColumnVector, path: UTF8String): HostColumnVector = { + withResource(dataCv.copyToHost()) { dataHCV => + withResource(HostColumnVector.builder(DType.STRING, dataHCV.getRowCount.toInt)) { + resultBuilder => + val pathLiteral = Literal.create(path, StringType) + for (i <- 0 until dataHCV.getRowCount.toInt) { + val json = dataHCV.getUTF8String(i) + // In order to use `GetJsonObject` directly, + // here use a literal json and a literal path + val jsonLiteral = Literal.create(json, StringType) + val cpuGetJsonObject = GetJsonObject(jsonLiteral, pathLiteral) + // input null is safe because both json and path are literal + val utf8String = cpuGetJsonObject.eval(null) + if (utf8String == null) { + resultBuilder.appendNull() + } else { + resultBuilder.append(utf8String.toString) + } + } + resultBuilder.build() + } + } + } +} diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala index 864b9175d4e..9535bd4caf9 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala index e97126ba478..d024f58d152 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala index 4dc843f3ef3..6bdce1011fa 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala index 6d976475a23..7bb97012966 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala @@ -36,6 +36,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala index fb108f3c54a..3f4829cc9d2 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala @@ -35,8 +35,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala index f1b2193a5ff..5ec195bd554 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala index 8ae1358fe35..781cac2c580 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala @@ -37,6 +37,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala index a4493b2d24d..bdb732e54b2 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala @@ -37,6 +37,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala index 107d8268d3d..cd1ebe4b59c 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala @@ -36,6 +36,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala index cca77aec318..5e569c484cd 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/extractValueShims.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/extractValueShims.scala index c690ebda936..1f2514a10d4 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/extractValueShims.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/extractValueShims.scala @@ -34,8 +34,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala index 39ac3fd30e5..d94c8e54683 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala index 919ad4e0788..aabb2dd5e36 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala @@ -37,6 +37,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala index 321fce4a4e6..9bceecac524 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala @@ -37,6 +37,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.hive.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala index 1e78be8f608..fe4c37c9ef9 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala @@ -35,6 +35,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.hive.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala index b5a8a3fb0c0..84d653f1d05 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala @@ -37,6 +37,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.hive.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala index d2065aab794..84f3313452f 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala @@ -34,6 +34,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala index 243ef430187..761d84b4667 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala @@ -36,6 +36,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} spark-rapids-shim-json-lines ***/ diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala index 8a081ac907b..e7245db64e4 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala @@ -36,6 +36,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution.python.shims diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala index 4fdcead437c..adb28725ba1 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala @@ -36,6 +36,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} spark-rapids-shim-json-lines ***/ diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala index 4186effcf84..ad3d2522475 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,8 +34,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution.python.shims diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala index bc2cdb6d78e..63f132bd2f8 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala @@ -37,6 +37,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala index 2393591fbf7..41aa0807646 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala @@ -37,6 +37,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala index f3e7ec523b3..05096f6a41e 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala @@ -37,6 +37,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/ShimSupportsRuntimeFiltering.java b/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/ShimSupportsRuntimeFiltering.java index dec5d603d46..c3706f9b51e 100644 --- a/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/ShimSupportsRuntimeFiltering.java +++ b/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/ShimSupportsRuntimeFiltering.java @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims; diff --git a/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/XxHash64Shims.scala b/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/XxHash64Shims.scala index 22353f50f96..d286f8f7ffa 100644 --- a/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/XxHash64Shims.scala +++ b/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/XxHash64Shims.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala index a318bef159d..03365af971e 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala @@ -31,8 +31,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExecBase.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExecBase.scala index 33dbbcc7549..43c9e326653 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExecBase.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExecBase.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala index e70d532ff52..8c82074b8f5 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala @@ -30,8 +30,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReaderBase.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReaderBase.scala index b5f2ad58ca3..c905330d1ff 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReaderBase.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReaderBase.scala @@ -33,8 +33,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuParquetCrypto.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuParquetCrypto.scala index b29c0fbfcab..ffc8c71349e 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuParquetCrypto.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuParquetCrypto.scala @@ -30,8 +30,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HashUtils.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HashUtils.scala index ccbabc7841b..c24d2faf1c7 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HashUtils.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HashUtils.scala @@ -33,8 +33,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OffsetWindowFunctionMeta.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OffsetWindowFunctionMeta.scala index 9ed90975779..c403ec6e73d 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OffsetWindowFunctionMeta.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OffsetWindowFunctionMeta.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcCastingShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcCastingShims.scala index 3c84810b59d..f346fce582c 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcCastingShims.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcCastingShims.scala @@ -31,8 +31,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcShims320untilAllBase.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcShims320untilAllBase.scala index 236260dd82c..44542a464c6 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcShims320untilAllBase.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcShims320untilAllBase.scala @@ -31,8 +31,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala index 17a6de5c9a0..154c1464a19 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala @@ -33,8 +33,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RapidsCsvScanMeta.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RapidsCsvScanMeta.scala index 6b000a98191..59683200f7d 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RapidsCsvScanMeta.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RapidsCsvScanMeta.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RebaseShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RebaseShims.scala index bf8925b8754..a0d1fb27ccc 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RebaseShims.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RebaseShims.scala @@ -33,8 +33,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimAQEShuffleReadExec.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimAQEShuffleReadExec.scala index d65cf183ca3..bfa8152e379 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimAQEShuffleReadExec.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimAQEShuffleReadExec.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBaseSubqueryExec.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBaseSubqueryExec.scala index 40ef20d6e52..5cad6b02633 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBaseSubqueryExec.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBaseSubqueryExec.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala index e65e2c8d907..f59b3310c30 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,8 +31,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimPredicateHelper.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimPredicateHelper.scala index 137174d1f34..235827288ef 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimPredicateHelper.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimPredicateHelper.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusNonDBShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusNonDBShims.scala index d6691882663..21ccbbdb965 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusNonDBShims.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusNonDBShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,8 +30,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala index 551ba464539..438e6f5a5b9 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TreeNode.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TreeNode.scala index 6c17bcf2438..c219ebe74ba 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TreeNode.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TreeNode.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TypeSigUtil.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TypeSigUtil.scala index 15a86a7718c..65a191f386a 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TypeSigUtil.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TypeSigUtil.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/YearParseUtil.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/YearParseUtil.scala index f155d13a863..8b73d74b78b 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/YearParseUtil.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/YearParseUtil.scala @@ -33,8 +33,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/gpuWindows.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/gpuWindows.scala index 8647a59efc3..65632ffaf47 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/gpuWindows.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/gpuWindows.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/v1FallbackWriters.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/v1FallbackWriters.scala index b0118d36432..f8c4d816fcd 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/v1FallbackWriters.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/v1FallbackWriters.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleBlockResolver.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleBlockResolver.scala index 54842f1adcf..e6f939d7b77 100644 --- a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleBlockResolver.scala +++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleBlockResolver.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/ShuffledBatchRDDUtil.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/ShuffledBatchRDDUtil.scala index 3c2a2bfa2e9..3359c54a922 100644 --- a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/ShuffledBatchRDDUtil.scala +++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/ShuffledBatchRDDUtil.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/storage/ShimDiskBlockManager.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/storage/ShimDiskBlockManager.scala index 8d713d442ab..4b284d430dd 100644 --- a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/storage/ShimDiskBlockManager.scala +++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/storage/ShimDiskBlockManager.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.rapids.shims.storage diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala index 67895d5eca8..4361716f881 100644 --- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala +++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala @@ -33,6 +33,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} spark-rapids-shim-json-lines ***/ diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/AvroUtils.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/AvroUtils.scala index 68ffbc438f0..e06e113226f 100644 --- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/AvroUtils.scala +++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/AvroUtils.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedWriter.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedWriter.scala index a820bd24107..9dbb6f926af 100644 --- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedWriter.scala +++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedWriter.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/Spark32XShimsUtils.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/Spark32XShimsUtils.scala index 2fc7799a64a..1319a3788b4 100644 --- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/Spark32XShimsUtils.scala +++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/Spark32XShimsUtils.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/datetimeExpressions.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/datetimeExpressions.scala index 1593314048e..21fcbbf5f9b 100644 --- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/datetimeExpressions.scala +++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/datetimeExpressions.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsPushBasedFetchHelper.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsPushBasedFetchHelper.scala index 64d37ff3fab..6bea4285492 100644 --- a/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsPushBasedFetchHelper.scala +++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsPushBasedFetchHelper.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.storage diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsShuffleBlockFetcherIterator.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsShuffleBlockFetcherIterator.scala index 7ea7cde1c2b..cd331c9d0d8 100644 --- a/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsShuffleBlockFetcherIterator.scala +++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsShuffleBlockFetcherIterator.scala @@ -34,8 +34,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.storage diff --git a/sql-plugin/src/main/spark321/scala/com/nvidia/spark/rapids/shims/Spark321PlusShims.scala b/sql-plugin/src/main/spark321/scala/com/nvidia/spark/rapids/shims/Spark321PlusShims.scala index d3b7bcedfb8..4f705434d60 100644 --- a/sql-plugin/src/main/spark321/scala/com/nvidia/spark/rapids/shims/Spark321PlusShims.scala +++ b/sql-plugin/src/main/spark321/scala/com/nvidia/spark/rapids/shims/Spark321PlusShims.scala @@ -33,8 +33,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark323/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala b/sql-plugin/src/main/spark323/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala index 6233fc40d00..f36454a42fb 100644 --- a/sql-plugin/src/main/spark323/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala +++ b/sql-plugin/src/main/spark323/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilter.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilter.scala index 4274b24fc6b..1503da12afa 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilter.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilterMightContain.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilterMightContain.scala index 15cb6a52676..64800bcd679 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilterMightContain.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilterMightContain.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuInSubqueryExec.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuInSubqueryExec.scala index 4d229d547a1..5a7de3b2e20 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuInSubqueryExec.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuInSubqueryExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,8 +25,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/AnsiUtil.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/AnsiUtil.scala index 2d57fc2d950..abe29315fc5 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/AnsiUtil.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/AnsiUtil.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BloomFilterShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BloomFilterShims.scala index 251f3eeb175..fcb06ca392b 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BloomFilterShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BloomFilterShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CharVarcharUtilsShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CharVarcharUtilsShims.scala index 6e4944b969f..ebd09a18e41 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CharVarcharUtilsShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CharVarcharUtilsShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DayTimeIntervalShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DayTimeIntervalShims.scala index 5d4bece1cba..7ed95214c2a 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DayTimeIntervalShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DayTimeIntervalShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala index 7d26b91e4be..76d4e9630f9 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,8 +25,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/FilteredPartitions.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/FilteredPartitions.scala index 4836b1e3b4d..515f8dd3800 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/FilteredPartitions.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/FilteredPartitions.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuDataSourceRDD.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuDataSourceRDD.scala index a84b8f0f3a3..a48d6380ab0 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuDataSourceRDD.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuDataSourceRDD.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuHashPartitioning.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuHashPartitioning.scala index c6fc7d9a174..67ab48ad5bd 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuHashPartitioning.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuHashPartitioning.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala index 98995383327..20bcd108bcd 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtilsBase.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtilsBase.scala index 56bf9203c07..0ee3a60658c 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtilsBase.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtilsBase.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,8 +27,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuRangePartitioning.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuRangePartitioning.scala index 889aa414f6b..a2e2355b7fe 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuRangePartitioning.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuRangePartitioning.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuTypeShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuTypeShims.scala index 11d03ce8f68..055c81ef913 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuTypeShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuTypeShims.scala @@ -27,8 +27,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala index 94ca8f0a913..8960cf10c42 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,8 +25,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcReadingShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcReadingShims.scala index cc8e69869c4..97409821aac 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcReadingShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcReadingShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcShims.scala index cc09214d8b1..c41d73b3dd4 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,8 +25,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetFieldIdShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetFieldIdShims.scala index 20e483baee3..9c772184973 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetFieldIdShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetFieldIdShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,8 +27,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala index 21f84bfe8b9..56708017a23 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala index 67a3b0913a7..c80e7a40942 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala @@ -28,6 +28,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsFileSourceMetaUtils.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsFileSourceMetaUtils.scala index 865ea22daa2..3595da246d5 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsFileSourceMetaUtils.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsFileSourceMetaUtils.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsOrcScanMeta.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsOrcScanMeta.scala index 5a4eb2db9fb..f705df075c6 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsOrcScanMeta.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsOrcScanMeta.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsParquetScanMeta.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsParquetScanMeta.scala index 1bb4c4d4e6a..86da731d532 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsParquetScanMeta.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsParquetScanMeta.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RoundingShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RoundingShims.scala index 5e943bcaa16..0631fca0f93 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RoundingShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RoundingShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ScanExecShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ScanExecShims.scala index e0f22f16440..dcba8961a32 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ScanExecShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ScanExecShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusNonDBShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusNonDBShims.scala index 1864b6dca38..bd2e28428fa 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusNonDBShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusNonDBShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,8 +25,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusShims.scala index d048a10821b..180bcba1aab 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusShims.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusShims.scala @@ -25,8 +25,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/ShimCurrentBatchIterator.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/ShimCurrentBatchIterator.scala index 27cce9b3072..32c07dfd51e 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/ShimCurrentBatchIterator.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/ShimCurrentBatchIterator.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.datasources.parquet diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala index d9cba34d541..85b17788f41 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.datasources.parquet.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/aggregate/GpuBloomFilterAggregate.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/aggregate/GpuBloomFilterAggregate.scala index 512e159ac93..eb776825c3e 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/aggregate/GpuBloomFilterAggregate.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/aggregate/GpuBloomFilterAggregate.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.aggregate diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala index 5eca8b18294..768261cbc89 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala index e9d711315a9..4739d3859c4 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala @@ -28,6 +28,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsFor330plus.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsFor330plus.scala index 81eb26e3078..320fb0737b7 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsFor330plus.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsFor330plus.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedReader.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedReader.scala index 6d25fb4da29..60211f5e6f6 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedReader.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedReader.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/intervalExpressions.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/intervalExpressions.scala index 39262064ef2..04eaa80bb46 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/intervalExpressions.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/intervalExpressions.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/types/shims/PartitionValueCastShims.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/types/shims/PartitionValueCastShims.scala index 783417efb97..3d8c6100dcf 100644 --- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/types/shims/PartitionValueCastShims.scala +++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/types/shims/PartitionValueCastShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,8 +28,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.types.shims diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AnsiCastShim.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AnsiCastShim.scala index 569ba8672f2..d7a843f83f3 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AnsiCastShim.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AnsiCastShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CastingConfigShim.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CastingConfigShim.scala index 80d1209d575..8b6d2b14af1 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CastingConfigShim.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CastingConfigShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,8 +21,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ColumnDefaultValuesShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ColumnDefaultValuesShims.scala index ae118fd288d..19217339c0a 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ColumnDefaultValuesShims.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ColumnDefaultValuesShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DecimalArithmeticOverrides.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DecimalArithmeticOverrides.scala index ba1df975366..35a10840306 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DecimalArithmeticOverrides.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DecimalArithmeticOverrides.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,8 +21,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GetMapValueMeta.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GetMapValueMeta.scala index b6531828f70..dcdd27e6e50 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GetMapValueMeta.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GetMapValueMeta.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,8 +21,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuCastShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuCastShims.scala index 1d8aeda4b63..8c0613ec762 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuCastShims.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuCastShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,8 +21,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ParquetStringPredShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ParquetStringPredShims.scala index e1f3ddbd43f..667475c6d99 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ParquetStringPredShims.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ParquetStringPredShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,8 +21,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ReaderUtils.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ReaderUtils.scala deleted file mode 100644 index 2f4360424fb..00000000000 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ReaderUtils.scala +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*** spark-rapids-shim-json-lines -{"spark": "330db"} -{"spark": "332db"} -{"spark": "341db"} -spark-rapids-shim-json-lines ***/ -package com.nvidia.spark.rapids.shims - -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path - -import org.apache.spark.internal.Logging - -object ReaderUtils extends Logging { - - private lazy val isUnityCatalogEnabled = com.databricks.unity.UnityConf.isEnabled - - /* - * Databricks has the Unity Catalog that allows accessing files across multiple metastores and - * catalogs. When our readers run in different threads, the credentials don't get setup - * properly. Here we get the Hadoop configuration associated specifically with that file which - * seems to contain the necessary credentials. This conf will be used when creating the - * Hadoop Filesystem, which with Unity ends up being a special Credentials file system. - */ - def getHadoopConfForReaderThread(filePath: Path, conf: Configuration): Configuration = { - if (isUnityCatalogEnabled) { - try { - com.databricks.unity.ClusterDefaultSAM.createDelegateHadoopConf(filePath, conf) - } catch { - case a: AssertionError => - // ignore this and just return the regular conf, it might be a filesystem not supported - // and I don't have a good way to check this - logWarning("Assertion error calling createDelegateHadoopConf, skipping.", a) - conf - } - } else { - conf - } - } -} diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimFilePartitionReaderFactory.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimFilePartitionReaderFactory.scala index c52f2b8498d..f19fae66b51 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimFilePartitionReaderFactory.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimFilePartitionReaderFactory.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,8 +21,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/TypeUtilsShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/TypeUtilsShims.scala index 6ed589eaffa..3f233cac24f 100644 --- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/TypeUtilsShims.scala +++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/TypeUtilsShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,8 +21,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala index 28af2c27432..dd04d7cfd26 100644 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala +++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.datasources.parquet diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/rapids/DataSourceStrategyUtils.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/rapids/DataSourceStrategyUtils.scala index 04f0b220b5b..1367d2fe1ae 100644 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/rapids/DataSourceStrategyUtils.scala +++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/rapids/DataSourceStrategyUtils.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,8 +21,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.datasources.rapids diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala index 07e713abcdf..ed967590fa7 100644 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala +++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,8 +26,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala index 40cc9b418b8..b370b230d11 100644 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala +++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala @@ -21,8 +21,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.aggregate diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala index 7913602d2bf..8387ab12326 100644 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala +++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,8 +21,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuExecutorBroadcastHelper.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuExecutorBroadcastHelper.scala index 2522de85169..5fabf05069f 100644 --- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuExecutorBroadcastHelper.scala +++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuExecutorBroadcastHelper.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution -import com.nvidia.spark.rapids.{ConcatAndConsumeAll, GpuColumnVector, GpuMetric, GpuShuffleCoalesceIterator, HostShuffleCoalesceIterator} +import com.nvidia.spark.rapids.{ConcatAndConsumeAll, GpuCoalesceIterator, GpuColumnVector, GpuMetric, GpuShuffleCoalesceIterator, HostShuffleCoalesceIterator, NoopMetric, RequireSingleBatch} import com.nvidia.spark.rapids.Arm.withResource import org.apache.spark.TaskContext @@ -40,6 +40,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch * which means they require a format of the data that can be used on the GPU. */ object GpuExecutorBroadcastHelper { + import GpuMetric._ // This reads the shuffle data that we have retrieved using `getShuffleRDD` from the shuffle // exchange. WARNING: Do not use this method outside of this context. This method can only be @@ -65,11 +66,28 @@ object GpuExecutorBroadcastHelper { // Use the GPU Shuffle Coalesce iterator to concatenate and load batches onto the // host as needed. Since we don't have GpuShuffleCoalesceExec in the plan for the // executor broadcast scenario, we have to use that logic here to efficiently - // grab and release the semaphore while doing I/O + // grab and release the semaphore while doing I/O. We wrap this with GpuCoalesceIterator + // to ensure this always a single batch for the following step. + val shuffleMetrics = Map( + CONCAT_TIME -> metricsMap(CONCAT_TIME), + OP_TIME -> metricsMap(OP_TIME) + ).withDefaultValue(NoopMetric) + val iter = shuffleDataIterator(shuffleData) - new GpuShuffleCoalesceIterator( - new HostShuffleCoalesceIterator(iter, targetSize, metricsMap), - dataTypes, metricsMap).asInstanceOf[Iterator[ColumnarBatch]] + new GpuCoalesceIterator( + new GpuShuffleCoalesceIterator( + new HostShuffleCoalesceIterator(iter, targetSize, shuffleMetrics), + dataTypes, shuffleMetrics).asInstanceOf[Iterator[ColumnarBatch]], + dataTypes, + RequireSingleBatch, + NoopMetric, // numInputRows + NoopMetric, // numInputBatches + NoopMetric, // numOutputRows + NoopMetric, // numOutputBatches + NoopMetric, // collectTime + metricsMap(CONCAT_TIME), // concatTime + metricsMap(OP_TIME), // opTime + "GpuBroadcastHashJoinExec").asInstanceOf[Iterator[ColumnarBatch]] } /** diff --git a/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusNonDBShims.scala b/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusNonDBShims.scala index 046e71dbaa8..913df0cab6e 100644 --- a/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusNonDBShims.scala +++ b/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusNonDBShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,8 +23,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/CreateDataSourceTableAsSelectCommandMetaShims.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/CreateDataSourceTableAsSelectCommandMetaShims.scala index 9240a226ecf..faa550c0cb6 100644 --- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/CreateDataSourceTableAsSelectCommandMetaShims.scala +++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/CreateDataSourceTableAsSelectCommandMetaShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/FileIndexOptionsShims.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/FileIndexOptionsShims.scala index 9802376697b..1a4d530767a 100644 --- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/FileIndexOptionsShims.scala +++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/FileIndexOptionsShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala index 3da3abea1eb..9105ab50e1e 100644 --- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala +++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.hive.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuKnownNullable.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuKnownNullable.scala index e0bd36696bd..10cb1b94373 100644 --- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuKnownNullable.scala +++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuKnownNullable.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala index f1abe457800..53c17d2f946 100644 --- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala +++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala index ec48578d56d..7cc94359daa 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.datasources diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala index bc54b45ccac..96046aed76c 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.hive.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala index 78b0a216ae2..4c3d6787d91 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ {"spark": "341db"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.hive.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveFileUtil.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveFileUtil.scala index 681cf79e1e7..be39c07414e 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveFileUtil.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveFileUtil.scala @@ -21,8 +21,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.hive.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveProviderCmdShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveProviderCmdShims.scala index 64b0eea741f..126244492f9 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveProviderCmdShims.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveProviderCmdShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.hive.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala index c683f71552a..78daa0bf6f1 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala index b8080ffb810..9e36cf41fad 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims @@ -89,7 +91,7 @@ case class GpuCreateDataSourceTableAsSelectCommand( // We will use the schema of resolved.relation as the schema of the table (instead of // the schema of df). It is important since the nullability may be changed by the relation // provider (for example, see org.apache.spark.sql.parquet.DefaultSource). - schema = result.schema) + schema = SchemaMetadataShims.getCleanedSchema(result.schema)) // Table location is already validated. No need to check it again during table creation. sessionState.catalog.createTable(newTable, ignoreIfExists = false, validateLocation = false) diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuDataSource.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuDataSource.scala index e8140b318b1..93cb0e8dfa5 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuDataSource.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuDataSource.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ReaderUtils.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala similarity index 53% rename from sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ReaderUtils.scala rename to sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala index 043105f4550..bff6af379b7 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ReaderUtils.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,35 +15,20 @@ */ /*** spark-rapids-shim-json-lines -{"spark": "311"} -{"spark": "312"} -{"spark": "313"} -{"spark": "320"} -{"spark": "321"} -{"spark": "321cdh"} -{"spark": "322"} -{"spark": "323"} -{"spark": "324"} -{"spark": "330"} -{"spark": "330cdh"} -{"spark": "331"} -{"spark": "332"} -{"spark": "332cdh"} -{"spark": "333"} -{"spark": "334"} +{"spark": "332db"} {"spark": "340"} {"spark": "341"} +{"spark": "341db"} {"spark": "342"} -{"spark": "350"} -{"spark": "351"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ -package com.nvidia.spark.rapids.shims +package org.apache.spark.sql.rapids.shims -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path +import org.apache.spark.sql.types.StructType -object ReaderUtils { - def getHadoopConfForReaderThread(filePath: Path, conf: Configuration): Configuration = { - conf +object SchemaMetadataShims { + // SPARK-43123 removes the internal metadata from the schema. Not available for Spark < 3.5.0. + def getCleanedSchema(inputSchema: StructType): StructType = { + inputSchema } } diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaUtilsShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaUtilsShims.scala index f99b4e41931..783e14f4a83 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaUtilsShims.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaUtilsShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala index 58aba9822e3..d74254b190d 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala index 582a473ca34..91dc2b8e637 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala b/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala index f634ba8b4a8..3dea88e94d3 100644 --- a/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala +++ b/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,9 @@ /*** spark-rapids-shim-json-lines {"spark": "334"} {"spark": "342"} +{"spark": "343"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala index 0dabfd1e46d..8e5d64017df 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala @@ -19,6 +19,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala index 74dab178deb..a6af3172b74 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GlobalLimitShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GlobalLimitShims.scala index d8cb80d1d2f..761b4d8c4b9 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GlobalLimitShims.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GlobalLimitShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala index 4d8b45e038b..39f42d8b833 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala index 7ed79194f8d..bf5bb272889 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala index e26648b96ec..18da13783a3 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/OrcProtoWriterShim.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/OrcProtoWriterShim.scala index b93f43417b6..3f52b775f69 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/OrcProtoWriterShim.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/OrcProtoWriterShim.scala @@ -1,6 +1,6 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala index ad6355d8281..b3418a35691 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala index 89ff642e6d3..996ec44ca83 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala index e0b73482e3f..a99ab808db5 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala index 8aab7f55adf..ca2fa215892 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala index 3369f813f72..a042a62f053 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala index a4af7318028..08a7b443313 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala index d11d3d4401f..c2946ca8c1b 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala index 5c80cdf1407..e3728ef100b 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala index 6e715e22d92..29f36d6f2f7 100644 --- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala +++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shuffle diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala index 393b7efb244..59b1f570ac6 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.catalyst.csv diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala index fa8649cf1d2..85150c8e8fc 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.catalyst.json diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala index 013da3896d8..a655a0dc1b2 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.datasources.v2.rapids diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala index 590cb60b1e6..d22058a3e5e 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.datasources.v2.rapids diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/RapidsCachingReader.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/RapidsCachingReader.scala index 3694af18c18..74d56abf740 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/RapidsCachingReader.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/RapidsCachingReader.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala index 22125e829a9..c27809804c5 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala index 9ff5acb5b16..b6ae72a8493 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala index 8eb5aa9453f..5fd1d04828f 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/GpuJsonToStructsShim.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/GpuJsonToStructsShim.scala index 5e7b28b3a7b..9b55a0ff935 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/GpuJsonToStructsShim.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/GpuJsonToStructsShim.scala @@ -18,8 +18,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala index 30d18974793..8ee0485ab36 100644 --- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala +++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala index 6873237d244..2a7d900c9d0 100644 --- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala +++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ {"spark": "341db"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuToPrettyString.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuToPrettyString.scala index 23ff476ebc6..a2b3569cdb2 100644 --- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuToPrettyString.scala +++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuToPrettyString.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ {"spark": "341db"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuWindowGroupLimitExec.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuWindowGroupLimitExec.scala index 3406186a9d0..e3a38415692 100644 --- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuWindowGroupLimitExec.scala +++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuWindowGroupLimitExec.scala @@ -18,6 +18,7 @@ {"spark": "341db"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala index e79730fc13f..c7f5aa9995d 100644 --- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala +++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala @@ -18,6 +18,7 @@ {"spark": "341db"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala index c76ea303ea4..3b94d5a5201 100644 --- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala +++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ {"spark": "341db"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.rapids.shims diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala index c7c64830447..0841384c5bb 100644 --- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala +++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ {"spark": "341db"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.hive.rapids.shims diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala index 1d43bf40206..c3578f7e961 100644 --- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala +++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ {"spark": "341db"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.hive.rapids.shims diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala index d6a30ae671b..66a82881020 100644 --- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala +++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ {"spark": "341db"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.hive.rapids.shims diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala index 2a351f66329..a7c4d320fc4 100644 --- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala +++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala @@ -16,6 +16,7 @@ /*** spark-rapids-shim-json-lines {"spark": "341db"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution.python.shims diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala index 17106833a99..ac58baa2eb7 100644 --- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala +++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala @@ -15,6 +15,7 @@ */ /*** spark-rapids-shim-json-lines {"spark": "341db"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution.python.shims diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala index 13df7923cd4..c149063bcf5 100644 --- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala +++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ {"spark": "341db"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution.python.shims diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala index 7e52ce7a3d0..aad1eb52c02 100644 --- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala +++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala @@ -16,6 +16,7 @@ /*** spark-rapids-shim-json-lines {"spark": "341db"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution.python.shims diff --git a/sql-plugin/src/main/spark342/scala/com/nvidia/spark/rapids/shims/DecimalMultiply128.scala b/sql-plugin/src/main/spark342/scala/com/nvidia/spark/rapids/shims/DecimalMultiply128.scala index cafcc9f21ef..40547711ed3 100644 --- a/sql-plugin/src/main/spark342/scala/com/nvidia/spark/rapids/shims/DecimalMultiply128.scala +++ b/sql-plugin/src/main/spark342/scala/com/nvidia/spark/rapids/shims/DecimalMultiply128.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,9 @@ /*** spark-rapids-shim-json-lines {"spark": "342"} +{"spark": "343"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark343/scala/com/nvidia/spark/rapids/shims/spark343/SparkShimServiceProvider.scala b/sql-plugin/src/main/spark343/scala/com/nvidia/spark/rapids/shims/spark343/SparkShimServiceProvider.scala new file mode 100644 index 00000000000..ff35d06a9c7 --- /dev/null +++ b/sql-plugin/src/main/spark343/scala/com/nvidia/spark/rapids/shims/spark343/SparkShimServiceProvider.scala @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "343"} +spark-rapids-shim-json-lines ***/ +package com.nvidia.spark.rapids.shims.spark343 + +import com.nvidia.spark.rapids.SparkShimVersion + +object SparkShimServiceProvider { + val VERSION = SparkShimVersion(3, 4, 3) + val VERSIONNAMES = Seq(s"$VERSION") +} + +class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider { + + override def getShimVersion: SparkShimVersion = SparkShimServiceProvider.VERSION + + override def matchesVersion(version: String): Boolean = { + SparkShimServiceProvider.VERSIONNAMES.contains(version) + } +} diff --git a/sql-plugin/src/main/spark343/scala/com/nvidia/spark/rapids/spark343/RapidsShuffleManager.scala b/sql-plugin/src/main/spark343/scala/com/nvidia/spark/rapids/spark343/RapidsShuffleManager.scala new file mode 100644 index 00000000000..182a3397bd8 --- /dev/null +++ b/sql-plugin/src/main/spark343/scala/com/nvidia/spark/rapids/spark343/RapidsShuffleManager.scala @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "343"} +spark-rapids-shim-json-lines ***/ +package com.nvidia.spark.rapids.spark343 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase + +/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */ +sealed class RapidsShuffleManager( + conf: SparkConf, + isDriver: Boolean +) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver) diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala index b9999dee246..4bbc4644241 100644 --- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala +++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala index 744bd93b029..d03493272c4 100644 --- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala +++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala index 1d238be84b0..f539917437d 100644 --- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala +++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala index 99728ef9e74..710c7121216 100644 --- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala +++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala index f910f951d7a..a0335827612 100644 --- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala +++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala index 8f9bc5c1573..833767558c6 100644 --- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala +++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala index 6238cfbc159..059de3f8edb 100644 --- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala +++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala index c15dc7907bb..eb8839a3a6f 100644 --- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala +++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.datasources.parquet diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala index e495dba634a..077547eec4b 100644 --- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala +++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.datasources.parquet.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala index a5faad5122f..13bb17136f3 100644 --- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala +++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.datasources.v2.rapids diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala index d32ab59db95..aaf6408250e 100644 --- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala +++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.datasources.v2.rapids diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala index cfed5b884c6..bd33ec2344d 100644 --- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala +++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.execution.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala index 4876d31bb12..3a4577a0573 100644 --- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala +++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.execution diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala index edd4bfab7fb..79d468d285d 100644 --- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala +++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala index c3918c6f72f..50ac5f5dd94 100644 --- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala +++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala index 54e3516da5a..564b6ae53c2 100644 --- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala +++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala index c27f4824c4a..a08211f3795 100644 --- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala +++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala @@ -17,6 +17,7 @@ /*** spark-rapids-shim-json-lines {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids.shims diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala new file mode 100644 index 00000000000..2c12de00727 --- /dev/null +++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "350"} +{"spark": "351"} +{"spark": "400"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.shims + +import org.apache.spark.sql.catalyst.util.removeInternalMetadata +import org.apache.spark.sql.types.StructType + +object SchemaMetadataShims { + // SPARK-43123 removes the internal metadata from the schema + def getCleanedSchema(inputSchema: StructType): StructType = { + removeInternalMetadata(inputSchema) + } +} diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/spark400/SparkShimServiceProvider.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/spark400/SparkShimServiceProvider.scala new file mode 100644 index 00000000000..af623f7ce45 --- /dev/null +++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/spark400/SparkShimServiceProvider.scala @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "400"} +spark-rapids-shim-json-lines ***/ +package com.nvidia.spark.rapids.shims.spark400 + +import com.nvidia.spark.rapids.SparkShimVersion + +object SparkShimServiceProvider { + val VERSION = SparkShimVersion(4, 0, 0) + val VERSIONNAMES = Seq(s"$VERSION", s"$VERSION-SNAPSHOT") +} + +class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider { + + override def getShimVersion: SparkShimVersion = SparkShimServiceProvider.VERSION + + override def matchesVersion(version: String): Boolean = { + SparkShimServiceProvider.VERSIONNAMES.contains(version) + } +} diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/spark400/RapidsShuffleManager.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/spark400/RapidsShuffleManager.scala new file mode 100644 index 00000000000..e299d4a0343 --- /dev/null +++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/spark400/RapidsShuffleManager.scala @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "400"} +spark-rapids-shim-json-lines ***/ +package com.nvidia.spark.rapids.spark400 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase + +/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */ +sealed class RapidsShuffleManager( + conf: SparkConf, + isDriver: Boolean +) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver) diff --git a/sql-plugin/src/test/spark343/scala/com/nvidia/spark/rapids/shims/spark343/SparkShimsSuite.scala b/sql-plugin/src/test/spark343/scala/com/nvidia/spark/rapids/shims/spark343/SparkShimsSuite.scala new file mode 100644 index 00000000000..0b8b3e0fee1 --- /dev/null +++ b/sql-plugin/src/test/spark343/scala/com/nvidia/spark/rapids/shims/spark343/SparkShimsSuite.scala @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "343"} +spark-rapids-shim-json-lines ***/ +package com.nvidia.spark.rapids.shims.spark343 + +import com.nvidia.spark.rapids._ +import org.scalatest.funsuite.AnyFunSuite + +class SparkShimsSuite extends AnyFunSuite with FQSuiteName { + test("spark shims version") { + assert(ShimLoader.getShimVersion === SparkShimVersion(3, 4, 3)) + } + + test("shuffle manager class") { + assert(ShimLoader.getRapidsShuffleManagerClass === + classOf[com.nvidia.spark.rapids.spark343.RapidsShuffleManager].getCanonicalName) + } + +} diff --git a/sql-plugin/src/test/spark400/scala/com/nvidia/spark/rapids/shims/spark400/SparkShimsSuite.scala b/sql-plugin/src/test/spark400/scala/com/nvidia/spark/rapids/shims/spark400/SparkShimsSuite.scala new file mode 100644 index 00000000000..f052b283476 --- /dev/null +++ b/sql-plugin/src/test/spark400/scala/com/nvidia/spark/rapids/shims/spark400/SparkShimsSuite.scala @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "400"} +spark-rapids-shim-json-lines ***/ +package com.nvidia.spark.rapids.shims.spark400 + +import com.nvidia.spark.rapids._ +import org.scalatest.funsuite.AnyFunSuite + +class SparkShimsSuite extends AnyFunSuite with FQSuiteName { + test("spark shims version") { + assert(ShimLoader.getShimVersion === SparkShimVersion(4, 0, 0)) + } + + test("shuffle manager class") { + assert(ShimLoader.getRapidsShuffleManagerClass === + classOf[com.nvidia.spark.rapids.spark400.RapidsShuffleManager].getCanonicalName) + } + +} diff --git a/tests/pom.xml b/tests/pom.xml index 46b1cd9637b..96c2a051143 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -21,13 +21,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-tests_2.12 RAPIDS Accelerator for Apache Spark Tests RAPIDS plugin for Apache Spark integration tests - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT tests @@ -103,6 +103,27 @@ org.apache.spark spark-avro_${scala.binary.version} + + org.apache.spark + spark-core_${scala.binary.version} + test-jar + + + org.apache.spark + spark-sql_${scala.binary.version} + test-jar + + + org.apache.spark + spark-catalyst_${scala.binary.version} + test-jar + + + org.scalatestplus + scalatestplus-scalacheck_${scala.binary.version} + 3.1.0.0-RC2 + test + diff --git a/tests/src/test/java/com/nvidia/spark/rapids/TestStats.java b/tests/src/test/java/com/nvidia/spark/rapids/TestStats.java new file mode 100644 index 00000000000..3f367529bcd --- /dev/null +++ b/tests/src/test/java/com/nvidia/spark/rapids/TestStats.java @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.nvidia.spark.rapids; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +/** Only use in UT Env. It's not thread safe. */ +public class TestStats { + private static final String HEADER_FORMAT = "%s%s"; + private static final String ROW_FORMAT = + "%s%s%s%s%s%s"; + + private static boolean UT_ENV = false; + private static final Map caseInfos = new HashMap<>(); + private static String currentCase; + public static int offloadRapidsUnitNumber = 0; + public static int testUnitNumber = 0; + + // use the rapids backend to execute the query + public static boolean offloadRapids = true; + public static int suiteTestNumber = 0; + public static int offloadRapidsTestNumber = 0; + + public static void beginStatistic() { + UT_ENV = true; + } + + public static void reset() { + offloadRapids = false; + suiteTestNumber = 0; + offloadRapidsTestNumber = 0; + testUnitNumber = 0; + offloadRapidsUnitNumber = 0; + resetCase(); + caseInfos.clear(); + } + + private static int totalSuiteTestNumber = 0; + public static int totalOffloadRapidsTestNumber = 0; + + public static int totalTestUnitNumber = 0; + public static int totalOffloadRapidsCaseNumber = 0; + + public static void printMarkdown(String suitName) { + if (!UT_ENV) { + return; + } + + String title = "print_markdown_" + suitName; + + String info = + "Case Count: %d, OffloadRapids Case Count: %d, " + + "Unit Count %d, OffloadRapids Unit Count %d"; + + System.out.println( + String.format( + HEADER_FORMAT, + title, + String.format( + info, + TestStats.suiteTestNumber, + TestStats.offloadRapidsTestNumber, + TestStats.testUnitNumber, + TestStats.offloadRapidsUnitNumber))); + + caseInfos.forEach( + (key, value) -> + System.out.println( + String.format( + ROW_FORMAT, + title, + key, + value.status, + value.type, + String.join("
", value.fallbackExpressionName), + String.join("
", value.fallbackClassName)))); + + totalSuiteTestNumber += suiteTestNumber; + totalOffloadRapidsTestNumber += offloadRapidsTestNumber; + totalTestUnitNumber += testUnitNumber; + totalOffloadRapidsCaseNumber += offloadRapidsUnitNumber; + System.out.println( + "total_markdown_ totalCaseNum:" + + totalSuiteTestNumber + + " offloadRapids: " + + totalOffloadRapidsTestNumber + + " total unit: " + + totalTestUnitNumber + + " offload unit: " + + totalOffloadRapidsCaseNumber); + } + + public static void addFallBackClassName(String className) { + if (!UT_ENV) { + return; + } + + if (caseInfos.containsKey(currentCase) && !caseInfos.get(currentCase).stack.isEmpty()) { + CaseInfo info = caseInfos.get(currentCase); + caseInfos.get(currentCase).fallbackExpressionName.add(info.stack.pop()); + caseInfos.get(currentCase).fallbackClassName.add(className); + } + } + + public static void addFallBackCase() { + if (!UT_ENV) { + return; + } + + if (caseInfos.containsKey(currentCase)) { + caseInfos.get(currentCase).type = "fallback"; + } + } + + public static void addExpressionClassName(String className) { + if (!UT_ENV) { + return; + } + + if (caseInfos.containsKey(currentCase)) { + CaseInfo info = caseInfos.get(currentCase); + info.stack.add(className); + } + } + + public static Set getFallBackClassName() { + if (!UT_ENV) { + return Collections.emptySet(); + } + + if (caseInfos.containsKey(currentCase)) { + return Collections.unmodifiableSet(caseInfos.get(currentCase).fallbackExpressionName); + } + + return Collections.emptySet(); + } + + public static void addIgnoreCaseName(String caseName) { + if (!UT_ENV) { + return; + } + + if (caseInfos.containsKey(caseName)) { + caseInfos.get(caseName).type = "fatal"; + } + } + + public static void resetCase() { + if (!UT_ENV) { + return; + } + + if (caseInfos.containsKey(currentCase)) { + caseInfos.get(currentCase).stack.clear(); + } + currentCase = ""; + } + + public static void startCase(String caseName) { + if (!UT_ENV) { + return; + } + + caseInfos.putIfAbsent(caseName, new CaseInfo()); + currentCase = caseName; + } + + public static void endCase(boolean status) { + if (!UT_ENV) { + return; + } + + if (caseInfos.containsKey(currentCase)) { + caseInfos.get(currentCase).status = status ? "success" : "error"; + } + + resetCase(); + } +} + +class CaseInfo { + final Stack stack = new Stack<>(); + Set fallbackExpressionName = new HashSet<>(); + Set fallbackClassName = new HashSet<>(); + String type = ""; + String status = ""; +} diff --git a/tests/src/test/spark320/scala/com/nvidia/spark/rapids/ConcurrentWriterMetricsSuite.scala b/tests/src/test/spark320/scala/com/nvidia/spark/rapids/ConcurrentWriterMetricsSuite.scala index fe40b3eb736..cad9da2d33a 100644 --- a/tests/src/test/spark320/scala/com/nvidia/spark/rapids/ConcurrentWriterMetricsSuite.scala +++ b/tests/src/test/spark320/scala/com/nvidia/spark/rapids/ConcurrentWriterMetricsSuite.scala @@ -34,6 +34,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark320/scala/com/nvidia/spark/rapids/shims/OrcStatisticShim.scala b/tests/src/test/spark320/scala/com/nvidia/spark/rapids/shims/OrcStatisticShim.scala index b8895cc5a0d..554867d3d55 100644 --- a/tests/src/test/spark320/scala/com/nvidia/spark/rapids/shims/OrcStatisticShim.scala +++ b/tests/src/test/spark320/scala/com/nvidia/spark/rapids/shims/OrcStatisticShim.scala @@ -31,8 +31,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shims diff --git a/tests/src/test/spark321/scala/com/nvidia/spark/rapids/DynamicPruningSuite.scala b/tests/src/test/spark321/scala/com/nvidia/spark/rapids/DynamicPruningSuite.scala index 47b77df1537..2d4156d1b3b 100644 --- a/tests/src/test/spark321/scala/com/nvidia/spark/rapids/DynamicPruningSuite.scala +++ b/tests/src/test/spark321/scala/com/nvidia/spark/rapids/DynamicPruningSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark321/scala/com/nvidia/spark/rapids/OrcEncryptionSuite.scala b/tests/src/test/spark321/scala/com/nvidia/spark/rapids/OrcEncryptionSuite.scala index e0f84b206e1..2fd72a5ac0f 100644 --- a/tests/src/test/spark321/scala/com/nvidia/spark/rapids/OrcEncryptionSuite.scala +++ b/tests/src/test/spark321/scala/com/nvidia/spark/rapids/OrcEncryptionSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedReaderSuite.scala b/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedReaderSuite.scala index b00e268d949..ccdd4362dee 100644 --- a/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedReaderSuite.scala +++ b/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedReaderSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids @@ -47,6 +48,15 @@ import org.apache.spark.serializer.SerializerManager import org.apache.spark.sql.rapids.shims.RapidsShuffleThreadedReader import org.apache.spark.storage.{BlockManager, BlockManagerId, ShuffleBlockId} +class InjectedShuffleErrorInTests extends Exception { +} + +class ErrorInputStream(wrapped: InputStream) extends InputStream { + override def read(): Int = { + throw new InjectedShuffleErrorInTests + } +} + /** * * Code ported over from `BlockStoreShuffleReaderSuite` in Apache Spark. @@ -56,15 +66,23 @@ import org.apache.spark.storage.{BlockManager, BlockManagerId, ShuffleBlockId} * We need to define this class ourselves instead of using a spy because the NioManagedBuffer class * is final (final classes cannot be spied on). */ -class RecordingManagedBuffer(underlyingBuffer: NioManagedBuffer) extends ManagedBuffer { +class RecordingManagedBuffer( + underlyingBuffer: NioManagedBuffer, + injectError: Boolean) extends ManagedBuffer { var callsToRetain = 0 var callsToRelease = 0 override def size(): Long = underlyingBuffer.size() override def nioByteBuffer(): ByteBuffer = underlyingBuffer.nioByteBuffer() - override def createInputStream(): InputStream = underlyingBuffer.createInputStream() + override def createInputStream(): InputStream = { + val is = underlyingBuffer.createInputStream() + if (injectError) { + new ErrorInputStream(is) + } else { + is + } + } override def convertToNetty(): AnyRef = underlyingBuffer.convertToNetty() - override def retain(): ManagedBuffer = { callsToRetain += 1 underlyingBuffer.retain() @@ -82,110 +100,133 @@ class RapidsShuffleThreadedReaderSuite RapidsShuffleInternalManagerBase.stopThreadPool() } - /** - * This test makes sure that, when data is read from a HashShuffleReader, the underlying - * ManagedBuffers that contain the data are eventually released. - */ - Seq(1, 2).foreach { numReaderThreads => - test(s"read() releases resources on completion - numThreads=$numReaderThreads") { - val testConf = new SparkConf(false) - // this sets the session and the SparkEnv - SparkSessionHolder.withSparkSession(testConf, _ => { - if (numReaderThreads > 1) { - RapidsShuffleInternalManagerBase.startThreadPoolIfNeeded(0, numReaderThreads) - } - - val reduceId = 15 - val shuffleId = 22 - val numMaps = 6 - val keyValuePairsPerMap = 10 - val serializer = new GpuColumnarBatchSerializer(NoopMetric) - - // Make a mock BlockManager that will return RecordingManagedByteBuffers of data, so that we - // can ensure retain() and release() are properly called. - val blockManager = mock(classOf[BlockManager]) - - // Create a buffer with some randomly generated key-value pairs to use as the shuffle data - // from each mappers (all mappers return the same shuffle data). - val byteOutputStream = new ByteArrayOutputStream() - val serializationStream = serializer.newInstance().serializeStream(byteOutputStream) - withResource(GpuColumnVector.emptyBatchFromTypes(Array.empty)) { emptyBatch => - (0 until keyValuePairsPerMap).foreach { i => - serializationStream.writeKey(i) - serializationStream.writeValue(GpuColumnVector.incRefCounts(emptyBatch)) - } + def runShuffleRead(numReaderThreads: Int, injectError: Boolean = false): Unit = { + val testConf = new SparkConf(false) + // this sets the session and the SparkEnv + SparkSessionHolder.withSparkSession(testConf, _ => { + if (numReaderThreads > 1) { + RapidsShuffleInternalManagerBase.startThreadPoolIfNeeded(0, numReaderThreads) + } + + val reduceId = 15 + val shuffleId = 22 + val numMaps = 6 + val keyValuePairsPerMap = 10 + val serializer = new GpuColumnarBatchSerializer(NoopMetric) + + // Make a mock BlockManager that will return RecordingManagedByteBuffers of data, so that we + // can ensure retain() and release() are properly called. + val blockManager = mock(classOf[BlockManager]) + + // Create a buffer with some randomly generated key-value pairs to use as the shuffle data + // from each mappers (all mappers return the same shuffle data). + val byteOutputStream = new ByteArrayOutputStream() + val serializationStream = serializer.newInstance().serializeStream(byteOutputStream) + withResource(GpuColumnVector.emptyBatchFromTypes(Array.empty)) { emptyBatch => + (0 until keyValuePairsPerMap).foreach { i => + serializationStream.writeKey(i) + serializationStream.writeValue(GpuColumnVector.incRefCounts(emptyBatch)) } - - // Setup the mocked BlockManager to return RecordingManagedBuffers. - val localBlockManagerId = BlockManagerId("test-client", "test-client", 1) - when(blockManager.blockManagerId).thenReturn(localBlockManagerId) - val buffers = (0 until numMaps).map { mapId => - // Create a ManagedBuffer with the shuffle data. - val nioBuffer = new NioManagedBuffer(ByteBuffer.wrap(byteOutputStream.toByteArray)) - val managedBuffer = new RecordingManagedBuffer(nioBuffer) - - // Setup the blockManager mock so the buffer gets returned when the shuffle code tries to - // fetch shuffle data. + } + + // Setup the mocked BlockManager to return RecordingManagedBuffers. + val localBlockManagerId = BlockManagerId("test-client", "test-client", 1) + when(blockManager.blockManagerId).thenReturn(localBlockManagerId) + val buffers = (0 until numMaps).map { mapId => + // Create a ManagedBuffer with the shuffle data. + val nioBuffer = new NioManagedBuffer(ByteBuffer.wrap(byteOutputStream.toByteArray)) + val managedBuffer = new RecordingManagedBuffer(nioBuffer, injectError) + + // Setup the blockManager mock so the buffer gets returned when the shuffle code tries to + // fetch shuffle data. + val shuffleBlockId = ShuffleBlockId(shuffleId, mapId, reduceId) + when(blockManager.getLocalBlockData(meq(shuffleBlockId))).thenReturn(managedBuffer) + managedBuffer + } + + // Make a mocked MapOutputTracker for the shuffle reader to use to determine what + // shuffle data to read. + val mapOutputTracker = mock(classOf[MapOutputTracker]) + when(mapOutputTracker.getMapSizesByExecutorId( + shuffleId, 0, numMaps, reduceId, reduceId + 1)).thenReturn { + // Test a scenario where all data is local, to avoid creating a bunch of additional mocks + // for the code to read data over the network. + val shuffleBlockIdsAndSizes = (0 until numMaps).map { mapId => val shuffleBlockId = ShuffleBlockId(shuffleId, mapId, reduceId) - when(blockManager.getLocalBlockData(meq(shuffleBlockId))).thenReturn(managedBuffer) - managedBuffer + (shuffleBlockId, byteOutputStream.size().toLong, mapId) } - - // Make a mocked MapOutputTracker for the shuffle reader to use to determine what - // shuffle data to read. - val mapOutputTracker = mock(classOf[MapOutputTracker]) - when(mapOutputTracker.getMapSizesByExecutorId( - shuffleId, 0, numMaps, reduceId, reduceId + 1)).thenReturn { - // Test a scenario where all data is local, to avoid creating a bunch of additional mocks - // for the code to read data over the network. - val shuffleBlockIdsAndSizes = (0 until numMaps).map { mapId => - val shuffleBlockId = ShuffleBlockId(shuffleId, mapId, reduceId) - (shuffleBlockId, byteOutputStream.size().toLong, mapId) + Seq((localBlockManagerId, shuffleBlockIdsAndSizes)).iterator + } + + // Create a mocked shuffle handle to pass into HashShuffleReader. + val shuffleHandle = { + val dependency = mock(classOf[GpuShuffleDependency[Int, Int, Int]]) + when(dependency.serializer).thenReturn(serializer) + when(dependency.aggregator).thenReturn(None) + when(dependency.keyOrdering).thenReturn(None) + new ShuffleHandleWithMetrics[Int, Int, Int]( + shuffleId, Map.empty, dependency) + } + + val serializerManager = new SerializerManager( + serializer, + new SparkConf() + .set(config.SHUFFLE_COMPRESS, false) + .set(config.SHUFFLE_SPILL_COMPRESS, false)) + + val taskContext = TaskContext.empty() + val metrics = taskContext.taskMetrics.createTempShuffleReadMetrics() + val shuffleReader = new RapidsShuffleThreadedReader[Int, Int]( + 0, + numMaps, + reduceId, + reduceId + 1, + shuffleHandle, + taskContext, + metrics, + 1024 * 1024, + serializerManager, + blockManager, + mapOutputTracker = mapOutputTracker, + numReaderThreads = numReaderThreads) + + if (injectError) { + var e: Throwable = null + assertThrows[InjectedShuffleErrorInTests] { + try { + shuffleReader.read().length + } catch { + case t: Throwable => + e = t + throw t } - Seq((localBlockManagerId, shuffleBlockIdsAndSizes)).iterator - } - - // Create a mocked shuffle handle to pass into HashShuffleReader. - val shuffleHandle = { - val dependency = mock(classOf[GpuShuffleDependency[Int, Int, Int]]) - when(dependency.serializer).thenReturn(serializer) - when(dependency.aggregator).thenReturn(None) - when(dependency.keyOrdering).thenReturn(None) - new ShuffleHandleWithMetrics[Int, Int, Int]( - shuffleId, Map.empty, dependency) } - - val serializerManager = new SerializerManager( - serializer, - new SparkConf() - .set(config.SHUFFLE_COMPRESS, false) - .set(config.SHUFFLE_SPILL_COMPRESS, false)) - - val taskContext = TaskContext.empty() - val metrics = taskContext.taskMetrics.createTempShuffleReadMetrics() - val shuffleReader = new RapidsShuffleThreadedReader[Int, Int]( - 0, - numMaps, - reduceId, - reduceId + 1, - shuffleHandle, - taskContext, - metrics, - 1024 * 1024, - serializerManager, - blockManager, - mapOutputTracker = mapOutputTracker, - numReaderThreads = numReaderThreads) - + taskContext.markTaskCompleted(Some(e)) + } else { assert(shuffleReader.read().length === keyValuePairsPerMap * numMaps) + taskContext.markTaskCompleted(None) + } + + // Calling .length above will have exhausted the iterator; make sure that exhausting the + // iterator caused retain and release to be called on each buffer. + buffers.foreach { buffer => + assert(buffer.callsToRetain === 1) + assert(buffer.callsToRelease === 1) + } + }) + } - // Calling .length above will have exhausted the iterator; make sure that exhausting the - // iterator caused retain and release to be called on each buffer. - buffers.foreach { buffer => - assert(buffer.callsToRetain === 1) - assert(buffer.callsToRelease === 1) - } - }) + /** + * This test makes sure that, when data is read from a HashShuffleReader, the underlying + * ManagedBuffers that contain the data are eventually released. + */ + Seq(1, 2).foreach { numReaderThreads => + test(s"read() releases resources on completion - numThreads=$numReaderThreads") { + runShuffleRead(numReaderThreads) + } + + test(s"read() releases resources on error - numThreads=$numReaderThreads") { + runShuffleRead(numReaderThreads, injectError = true) } } } diff --git a/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedWriterSuite.scala b/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedWriterSuite.scala index 37c8856e626..818aa84bb3b 100644 --- a/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedWriterSuite.scala +++ b/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedWriterSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/BloomFilterAggregateQuerySuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/BloomFilterAggregateQuerySuite.scala index be5928e23ac..7fc26e76005 100644 --- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/BloomFilterAggregateQuerySuite.scala +++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/BloomFilterAggregateQuerySuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/CsvScanForIntervalSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/CsvScanForIntervalSuite.scala index b995d773ca4..e812fca9449 100644 --- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/CsvScanForIntervalSuite.scala +++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/CsvScanForIntervalSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/GpuIntervalUtilsTest.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/GpuIntervalUtilsTest.scala index 57de81737bd..f63666c6fee 100644 --- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/GpuIntervalUtilsTest.scala +++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/GpuIntervalUtilsTest.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalArithmeticSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalArithmeticSuite.scala index 9aeefc87b2e..0d4fcea4d05 100644 --- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalArithmeticSuite.scala +++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalArithmeticSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalCastSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalCastSuite.scala index c559ff9070f..28654db3036 100644 --- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalCastSuite.scala +++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalCastSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalDivisionSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalDivisionSuite.scala index af3087682f7..4a8ad0778ed 100644 --- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalDivisionSuite.scala +++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalDivisionSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalMultiplySuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalMultiplySuite.scala index eed8631c6b7..b560ec87ecf 100644 --- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalMultiplySuite.scala +++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalMultiplySuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalSuite.scala index dc34562f00d..b4e1b9d3e67 100644 --- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalSuite.scala +++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/ParquetUDTSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/ParquetUDTSuite.scala index d9253489939..448e0260029 100644 --- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/ParquetUDTSuite.scala +++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/ParquetUDTSuite.scala @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/SampleSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/SampleSuite.scala index 4ce9faa44cd..bf19779c8a0 100644 --- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/SampleSuite.scala +++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/SampleSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/TimestampSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/TimestampSuite.scala index 3779c1fbd71..8b4c73a92d8 100644 --- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/TimestampSuite.scala +++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/TimestampSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala index 22d9ea63146..82ce1073e13 100644 --- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,8 +25,10 @@ {"spark": "340"} {"spark": "341"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package org.apache.spark.sql.rapids diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsCastSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsCastSuite.scala new file mode 100644 index 00000000000..f3fec27f7f6 --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsCastSuite.scala @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.suites + +import org.apache.spark.sql.catalyst.expressions.{Cast, CastBase, CastSuite, Expression, Literal} +import org.apache.spark.sql.rapids.utils.RapidsTestsTrait +import org.apache.spark.sql.types._ + +class RapidsCastSuite extends CastSuite with RapidsTestsTrait { + // example to enhance logging for base suite + override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = { + v match { + case lit: Expression => + logDebug(s"Cast from: ${lit.dataType.typeName}, to: ${targetType.typeName}") + Cast(lit, targetType, timeZoneId) + case _ => + val lit = Literal(v) + logDebug(s"Cast from: ${lit.dataType.typeName}, to: ${targetType.typeName}") + Cast(lit, targetType, timeZoneId) + } + } +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsDataFrameAggregateSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsDataFrameAggregateSuite.scala new file mode 100644 index 00000000000..5a394a5b0e8 --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsDataFrameAggregateSuite.scala @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.suites + +import org.apache.spark.sql.DataFrameAggregateSuite +import org.apache.spark.sql.rapids.utils.RapidsSQLTestsTrait + +class RapidsDataFrameAggregateSuite extends DataFrameAggregateSuite with RapidsSQLTestsTrait { + // example to show how to replace the logic of an excluded test case in Vanilla Spark + testRapids("collect functions" ) { // "collect functions" was excluded at RapidsTestSettings + // println("...") + } +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonFunctionsSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonFunctionsSuite.scala new file mode 100644 index 00000000000..43150c0df4b --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonFunctionsSuite.scala @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.suites + +import org.apache.spark.sql.JsonFunctionsSuite +import org.apache.spark.sql.rapids.utils.RapidsSQLTestsTrait + +class RapidsJsonFunctionsSuite extends JsonFunctionsSuite with RapidsSQLTestsTrait {} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala new file mode 100644 index 00000000000..6d244c67ad0 --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.suites + +import org.apache.spark.SparkConf +import org.apache.spark.sql.execution.datasources.{InMemoryFileIndex, NoopCache} +import org.apache.spark.sql.execution.datasources.json.JsonSuite +import org.apache.spark.sql.execution.datasources.v2.json.JsonScanBuilder +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait +import org.apache.spark.sql.sources +import org.apache.spark.sql.types.{IntegerType, StructType} +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +class RapidsJsonSuite extends JsonSuite with RapidsSQLTestsBaseTrait { + + /** Returns full path to the given file in the resource folder */ + override protected def testFile(fileName: String): String = { + getWorkspaceFilePath("sql", "core", "src", "test", "resources").toString + "/" + fileName + } +} + +class RapidsJsonV1Suite extends RapidsJsonSuite with RapidsSQLTestsBaseTrait { + override def sparkConf: SparkConf = + super.sparkConf + .set(SQLConf.USE_V1_SOURCE_LIST, "json") +} + +class RapidsJsonV2Suite extends RapidsJsonSuite with RapidsSQLTestsBaseTrait { + override def sparkConf: SparkConf = + super.sparkConf + .set(SQLConf.USE_V1_SOURCE_LIST, "") + + test("get pushed filters") { + val attr = "col" + def getBuilder(path: String): JsonScanBuilder = { + val fileIndex = new InMemoryFileIndex( + spark, + Seq(new org.apache.hadoop.fs.Path(path, "file.json")), + Map.empty, + None, + NoopCache) + val schema = new StructType().add(attr, IntegerType) + val options = CaseInsensitiveStringMap.empty() + new JsonScanBuilder(spark, fileIndex, schema, schema, options) + } + val filters: Array[sources.Filter] = Array(sources.IsNotNull(attr)) + withSQLConf(SQLConf.JSON_FILTER_PUSHDOWN_ENABLED.key -> "true") { + withTempPath { + file => + val scanBuilder = getBuilder(file.getCanonicalPath) + assert(scanBuilder.pushDataFilters(filters) === filters) + } + } + + withSQLConf(SQLConf.JSON_FILTER_PUSHDOWN_ENABLED.key -> "false") { + withTempPath { + file => + val scanBuilder = getBuilder(file.getCanonicalPath) + assert(scanBuilder.pushDataFilters(filters) === Array.empty[sources.Filter]) + } + } + } +} + +class RapidsJsonLegacyTimeParserSuite extends RapidsJsonSuite with RapidsSQLTestsBaseTrait { + override def sparkConf: SparkConf = + super.sparkConf + .set(SQLConf.LEGACY_TIME_PARSER_POLICY, "legacy") +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsMathFunctionsSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsMathFunctionsSuite.scala new file mode 100644 index 00000000000..55b4b00f680 --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsMathFunctionsSuite.scala @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.suites + +import org.apache.spark.sql.MathFunctionsSuite +import org.apache.spark.sql.rapids.utils.RapidsSQLTestsTrait + +class RapidsMathFunctionsSuite extends MathFunctionsSuite with RapidsSQLTestsTrait { +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsRegexpExpressionsSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsRegexpExpressionsSuite.scala new file mode 100644 index 00000000000..95b54240dbe --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsRegexpExpressionsSuite.scala @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.suites + +import org.apache.spark.sql.catalyst.expressions.RegexpExpressionsSuite +import org.apache.spark.sql.rapids.utils.RapidsTestsTrait + +class RapidsRegexpExpressionsSuite extends RegexpExpressionsSuite with RapidsTestsTrait {} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsStringExpressionsSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsStringExpressionsSuite.scala new file mode 100644 index 00000000000..164406fdf83 --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsStringExpressionsSuite.scala @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.suites + +import org.apache.spark.sql.catalyst.expressions.StringExpressionsSuite +import org.apache.spark.sql.rapids.utils.RapidsTestsTrait + +class RapidsStringExpressionsSuite extends StringExpressionsSuite with RapidsTestsTrait {} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsStringFunctionsSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsStringFunctionsSuite.scala new file mode 100644 index 00000000000..7b4a8ac6d7d --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsStringFunctionsSuite.scala @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.suites + +import org.apache.spark.sql.StringFunctionsSuite +import org.apache.spark.sql.rapids.utils.RapidsSQLTestsTrait + +class RapidsStringFunctionsSuite + extends StringFunctionsSuite + with RapidsSQLTestsTrait { +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala new file mode 100644 index 00000000000..83396e977fa --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.utils + +import java.util + +import scala.collection.JavaConverters._ +import scala.reflect.ClassTag + +import com.nvidia.spark.rapids.TestStats + +import org.apache.spark.sql.rapids.utils.RapidsTestConstants.RAPIDS_TEST + +abstract class BackendTestSettings { + + private val enabledSuites: java.util.Map[String, SuiteSettings] = new util.HashMap() + + protected def enableSuite[T: ClassTag]: SuiteSettings = { + val suiteName = implicitly[ClassTag[T]].runtimeClass.getCanonicalName + if (enabledSuites.containsKey(suiteName)) { + throw new IllegalArgumentException("Duplicated suite name: " + suiteName) + } + val suiteSettings = new SuiteSettings + enabledSuites.put(suiteName, suiteSettings) + suiteSettings + } + + private[utils] def shouldRun(suiteName: String, testName: String): Boolean = { + if (!enabledSuites.containsKey(suiteName)) { + return false + } + + val suiteSettings = enabledSuites.get(suiteName) + + val inclusion = suiteSettings.inclusion.asScala + val exclusion = suiteSettings.exclusion.asScala + + if (inclusion.isEmpty && exclusion.isEmpty) { + // default to run all cases under this suite + return true + } + + if (inclusion.nonEmpty && exclusion.nonEmpty) { + // error + throw new IllegalStateException( + s"Do not use include and exclude conditions on the same test case: $suiteName:$testName") + } + + if (inclusion.nonEmpty) { + // include mode + val isIncluded = inclusion.exists(_.isIncluded(testName)) + return isIncluded + } + + if (exclusion.nonEmpty) { + // exclude mode + val isExcluded = exclusion.exists(_.isExcluded(testName)) + return !isExcluded + } + + throw new IllegalStateException("Unreachable code") + } + + sealed trait ExcludeReason + // The reason should most likely to be a issue link, + // or a description like "This simply can't work on GPU". + // It should never be "unknown" or "need investigation" + case class KNOWN_ISSUE(reason: String) extends ExcludeReason + case class WONT_FIX_ISSUE(reason: String) extends ExcludeReason + + + final protected class SuiteSettings { + private[utils] val inclusion: util.List[IncludeBase] = new util.ArrayList() + private[utils] val exclusion: util.List[ExcludeBase] = new util.ArrayList() + private[utils] val excludeReasons: util.List[ExcludeReason] = new util.ArrayList() + + def include(testNames: String*): SuiteSettings = { + inclusion.add(Include(testNames: _*)) + this + } + def exclude(testNames: String, reason: ExcludeReason): SuiteSettings = { + exclusion.add(Exclude(testNames)) + excludeReasons.add(reason) + this + } + def includeRapidsTest(testName: String*): SuiteSettings = { + inclusion.add(IncludeRapidsTest(testName: _*)) + this + } + def excludeRapidsTest(testName: String, reason: ExcludeReason): SuiteSettings = { + exclusion.add(ExcludeRapidsTest(testName)) + excludeReasons.add(reason) + this + } + def includeByPrefix(prefixes: String*): SuiteSettings = { + inclusion.add(IncludeByPrefix(prefixes: _*)) + this + } + def excludeByPrefix(prefixes: String, reason: ExcludeReason): SuiteSettings = { + exclusion.add(ExcludeByPrefix(prefixes)) + excludeReasons.add(reason) + this + } + def includeRapidsTestsByPrefix(prefixes: String*): SuiteSettings = { + inclusion.add(IncludeRapidsTestByPrefix(prefixes: _*)) + this + } + def excludeRapidsTestsByPrefix(prefixes: String, reason: ExcludeReason): SuiteSettings = { + exclusion.add(ExcludeRadpisTestByPrefix(prefixes)) + excludeReasons.add(reason) + this + } + def includeAllRapidsTests(): SuiteSettings = { + inclusion.add(IncludeByPrefix(RAPIDS_TEST)) + this + } + def excludeAllRapidsTests(reason: ExcludeReason): SuiteSettings = { + exclusion.add(ExcludeByPrefix(RAPIDS_TEST)) + excludeReasons.add(reason) + this + } + } + + protected trait IncludeBase { + def isIncluded(testName: String): Boolean + } + protected trait ExcludeBase { + def isExcluded(testName: String): Boolean + } + private case class Include(testNames: String*) extends IncludeBase { + val nameSet: Set[String] = Set(testNames: _*) + override def isIncluded(testName: String): Boolean = nameSet.contains(testName) + } + private case class Exclude(testNames: String*) extends ExcludeBase { + val nameSet: Set[String] = Set(testNames: _*) + override def isExcluded(testName: String): Boolean = nameSet.contains(testName) + } + private case class IncludeRapidsTest(testNames: String*) extends IncludeBase { + val nameSet: Set[String] = testNames.map(name => RAPIDS_TEST + name).toSet + override def isIncluded(testName: String): Boolean = nameSet.contains(testName) + } + private case class ExcludeRapidsTest(testNames: String*) extends ExcludeBase { + val nameSet: Set[String] = testNames.map(name => RAPIDS_TEST + name).toSet + override def isExcluded(testName: String): Boolean = nameSet.contains(testName) + } + private case class IncludeByPrefix(prefixes: String*) extends IncludeBase { + override def isIncluded(testName: String): Boolean = { + if (prefixes.exists(prefix => testName.startsWith(prefix))) { + return true + } + false + } + } + private case class ExcludeByPrefix(prefixes: String*) extends ExcludeBase { + override def isExcluded(testName: String): Boolean = { + if (prefixes.exists(prefix => testName.startsWith(prefix))) { + return true + } + false + } + } + private case class IncludeRapidsTestByPrefix(prefixes: String*) extends IncludeBase { + override def isIncluded(testName: String): Boolean = { + if (prefixes.exists(prefix => testName.startsWith(RAPIDS_TEST + prefix))) { + return true + } + false + } + } + private case class ExcludeRadpisTestByPrefix(prefixes: String*) extends ExcludeBase { + override def isExcluded(testName: String): Boolean = { + if (prefixes.exists(prefix => testName.startsWith(RAPIDS_TEST + prefix))) { + return true + } + false + } + } +} + +object BackendTestSettings { + val instance: BackendTestSettings = { + Class + .forName("org.apache.spark.sql.rapids.utils.RapidsTestSettings") + .getDeclaredConstructor() + .newInstance() + .asInstanceOf[BackendTestSettings] + } + + def shouldRun(suiteName: String, testName: String): Boolean = { + val v = instance.shouldRun(suiteName, testName: String) + + if (!v) { + TestStats.addIgnoreCaseName(testName) + } + + v + } +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala new file mode 100644 index 00000000000..540c70a2ee1 --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.utils + +import java.util.{Locale, TimeZone} + +import org.scalactic.source.Position +import org.scalatest.Tag + +import org.apache.spark.SparkConf +import org.apache.spark.internal.config.Tests.IS_TESTING +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, ShuffleQueryStageExec} +import org.apache.spark.sql.rapids.utils.RapidsTestConstants.RAPIDS_TEST +import org.apache.spark.sql.test.SharedSparkSession + + +/** Basic trait for Rapids SQL test cases. */ +trait RapidsSQLTestsBaseTrait extends SharedSparkSession with RapidsTestsBaseTrait { + + protected override def afterAll(): Unit = { + // SparkFunSuite will set this to true, and forget to reset to false + System.clearProperty(IS_TESTING.key) + super.afterAll() + } + + protected def testRapids(testName: String, testTag: Tag*)(testFun: => Any)(implicit + pos: Position): Unit = { + test(RAPIDS_TEST + testName, testTag: _*)(testFun) + } + + override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit + pos: Position): Unit = { + if (shouldRun(testName)) { + super.test(testName, testTags: _*)(testFun) + } else { + super.ignore(testName, testTags: _*)(testFun) + } + } + + override def sparkConf: SparkConf = { + RapidsSQLTestsBaseTrait.nativeSparkConf(super.sparkConf, warehouse) + } + + /** + * Get all the children plan of plans. + * + * @param plans + * : the input plans. + * @return + */ + private def getChildrenPlan(plans: Seq[SparkPlan]): Seq[SparkPlan] = { + if (plans.isEmpty) { + return Seq() + } + + val inputPlans: Seq[SparkPlan] = plans.map { + case stage: ShuffleQueryStageExec => stage.plan + case plan => plan + } + + var newChildren: Seq[SparkPlan] = Seq() + inputPlans.foreach { + plan => + newChildren = newChildren ++ getChildrenPlan(plan.children) + // To avoid duplication of WholeStageCodegenXXX and its children. + if (!plan.nodeName.startsWith("WholeStageCodegen")) { + newChildren = newChildren :+ plan + } + } + newChildren + } + + /** + * Get the executed plan of a data frame. + * + * @param df + * : dataframe. + * @return + * A sequence of executed plans. + */ + def getExecutedPlan(df: DataFrame): Seq[SparkPlan] = { + df.queryExecution.executedPlan match { + case exec: AdaptiveSparkPlanExec => + getChildrenPlan(Seq(exec.executedPlan)) + case plan => + getChildrenPlan(Seq(plan)) + } + } +} + +object RapidsSQLTestsBaseTrait { + def nativeSparkConf(origin: SparkConf, warehouse: String): SparkConf = { + // Timezone is fixed to UTC to allow timestamps to work by default + TimeZone.setDefault(TimeZone.getTimeZone("UTC")) + // Add Locale setting + Locale.setDefault(Locale.US) + + val conf = origin + .set("spark.rapids.sql.enabled", "true") + .set("spark.plugins", "com.nvidia.spark.SQLPlugin") + .set("spark.sql.queryExecutionListeners", + "org.apache.spark.sql.rapids.ExecutionPlanCaptureCallback") + .set("spark.sql.warehouse.dir", warehouse) + .set("spark.sql.cache.serializer", "com.nvidia.spark.ParquetCachedBatchSerializer") + .setAppName("rapids spark plugin running Vanilla Spark UT") + + conf + } +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsTrait.scala new file mode 100644 index 00000000000..4358e29630c --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsTrait.scala @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.utils + +import java.io.File +import java.util.TimeZone + +import scala.collection.JavaConverters._ + +import org.apache.commons.io.{FileUtils => fu} +import org.apache.commons.math3.util.Precision +import org.scalatest.Assertions + +import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.plans.logical +import org.apache.spark.sql.catalyst.util.{sideBySide, stackTraceToString} +import org.apache.spark.sql.execution.SQLExecution + + +/** Basic trait for Rapids SQL test cases. */ +trait RapidsSQLTestsTrait extends QueryTest with RapidsSQLTestsBaseTrait { + + def prepareWorkDir(): Unit = { + // prepare working paths + val basePathDir = new File(basePath) + if (basePathDir.exists()) { + fu.forceDelete(basePathDir) + } + fu.forceMkdir(basePathDir) + fu.forceMkdir(new File(warehouse)) + fu.forceMkdir(new File(metaStorePathAbsolute)) + } + + override def beforeAll(): Unit = { + prepareWorkDir() + super.beforeAll() + spark.sparkContext.setLogLevel("WARN") + } + + override def afterAll(): Unit = { + super.afterAll() + } + + override protected def checkAnswer(df: => DataFrame, expectedAnswer: Seq[Row]): Unit = { + val analyzedDF = + try df + catch { + case ae: AnalysisException => + val plan = ae.plan + if (plan.isDefined) { + fail(s""" + |Failed to analyze query: $ae + |${plan.get} + | + |${stackTraceToString(ae)} + |""".stripMargin) + } else { + throw ae + } + } + + assertEmptyMissingInput(analyzedDF) + + RapidsQueryTestUtil.checkAnswer(analyzedDF, expectedAnswer) + } +} + +object RapidsQueryTestUtil extends Assertions { + + /** + * Runs the plan and makes sure the answer matches the expected result. + * + * @param df + * the DataFrame to be executed + * @param expectedAnswer + * the expected result in a Seq of Rows. + * @param checkToRDD + * whether to verify deserialization to an RDD. This runs the query twice. + */ + def checkAnswer(df: DataFrame, expectedAnswer: Seq[Row], checkToRDD: Boolean = true): Unit = { + getErrorMessageInCheckAnswer(df, expectedAnswer, checkToRDD) match { + case Some(errorMessage) => fail(errorMessage) + case None => + } + } + + /** + * Runs the plan and makes sure the answer matches the expected result. If there was exception + * during the execution or the contents of the DataFrame does not match the expected result, an + * error message will be returned. Otherwise, a None will be returned. + * + * @param df + * the DataFrame to be executed + * @param expectedAnswer + * the expected result in a Seq of Rows. + * @param checkToRDD + * whether to verify deserialization to an RDD. This runs the query twice. + */ + def getErrorMessageInCheckAnswer( + df: DataFrame, + expectedAnswer: Seq[Row], + checkToRDD: Boolean = true): Option[String] = { + val isSorted = df.logicalPlan.collect { case s: logical.Sort => s }.nonEmpty + if (checkToRDD) { + SQLExecution.withSQLConfPropagated(df.sparkSession) { + df.rdd.count() // Also attempt to deserialize as an RDD [SPARK-15791] + } + } + + val sparkAnswer = + try df.collect().toSeq + catch { + case e: Exception => + val errorMessage = + s""" + |Exception thrown while executing query: + |${df.queryExecution} + |== Exception == + |$e + |${org.apache.spark.sql.catalyst.util.stackTraceToString(e)} + """.stripMargin + return Some(errorMessage) + } + + sameRows(expectedAnswer, sparkAnswer, isSorted).map { + results => + s""" + |Results do not match for query: + |Timezone: ${TimeZone.getDefault} + |Timezone Env: ${sys.env.getOrElse("TZ", "")} + | + |${df.queryExecution} + |== Results == + |$results + """.stripMargin + } + } + + def prepareAnswer(answer: Seq[Row], isSorted: Boolean): Seq[Row] = { + // Converts data to types that we can do equality comparison using Scala collections. + // For BigDecimal type, the Scala type has a better definition of equality test (similar to + // Java's java.math.BigDecimal.compareTo). + // For binary arrays, we convert it to Seq to avoid of calling java.util.Arrays.equals for + // equality test. + val converted: Seq[Row] = answer.map(prepareRow) + if (!isSorted) converted.sortBy(_.toString()) else converted + } + + // We need to call prepareRow recursively to handle schemas with struct types. + def prepareRow(row: Row): Row = { + Row.fromSeq(row.toSeq.map { + case null => null + case bd: java.math.BigDecimal => BigDecimal(bd) + // Equality of WrappedArray differs for AnyVal and AnyRef in Scala 2.12.2+ + case seq: Seq[_] => + seq.map { + case b: java.lang.Byte => b.byteValue + case s: java.lang.Short => s.shortValue + case i: java.lang.Integer => i.intValue + case l: java.lang.Long => l.longValue + case f: java.lang.Float => f.floatValue + case d: java.lang.Double => d.doubleValue + case x => x + } + // Convert array to Seq for easy equality check. + case b: Array[_] => b.toSeq + case r: Row => prepareRow(r) + case o => o + }) + } + + private def genError( + expectedAnswer: Seq[Row], + sparkAnswer: Seq[Row], + isSorted: Boolean): String = { + val getRowType: Option[Row] => String = row => + row + .map( + row => + if (row.schema == null) { + "struct<>" + } else { + s"${row.schema.catalogString}" + }) + .getOrElse("struct<>") + + s""" + |== Results == + |${sideBySide( + s"== Correct Answer - ${expectedAnswer.size} ==" +: + getRowType(expectedAnswer.headOption) +: + prepareAnswer(expectedAnswer, isSorted).map(_.toString()), + s"== RAPIDS Answer - ${sparkAnswer.size} ==" +: + getRowType(sparkAnswer.headOption) +: + prepareAnswer(sparkAnswer, isSorted).map(_.toString()) + ).mkString("\n")} + """.stripMargin + } + + def includesRows(expectedRows: Seq[Row], sparkAnswer: Seq[Row]): Option[String] = { + if (!prepareAnswer(expectedRows, true).toSet.subsetOf(prepareAnswer(sparkAnswer, true).toSet)) { + return Some(genError(expectedRows, sparkAnswer, true)) + } + None + } + + private def compare(obj1: Any, obj2: Any): Boolean = (obj1, obj2) match { + case (null, null) => true + case (null, _) => false + case (_, null) => false + case (a: Array[_], b: Array[_]) => + a.length == b.length && a.zip(b).forall { case (l, r) => compare(l, r) } + case (a: Map[_, _], b: Map[_, _]) => + a.size == b.size && a.keys.forall { + aKey => b.keys.find(bKey => compare(aKey, bKey)).exists(bKey => compare(a(aKey), b(bKey))) + } + case (a: Iterable[_], b: Iterable[_]) => + a.size == b.size && a.zip(b).forall { case (l, r) => compare(l, r) } + case (a: Product, b: Product) => + compare(a.productIterator.toSeq, b.productIterator.toSeq) + case (a: Row, b: Row) => + compare(a.toSeq, b.toSeq) + // 0.0 == -0.0, turn float/double to bits before comparison, to distinguish 0.0 and -0.0. + case (a: Double, b: Double) => + if ((isNaNOrInf(a) || isNaNOrInf(b)) || (a == -0.0) || (b == -0.0)) { + java.lang.Double.doubleToRawLongBits(a) == java.lang.Double.doubleToRawLongBits(b) + } else { + Precision.equalsWithRelativeTolerance(a, b, 0.00001d) + } + case (a: Float, b: Float) => + java.lang.Float.floatToRawIntBits(a) == java.lang.Float.floatToRawIntBits(b) + case (a, b) => a == b + } + + def isNaNOrInf(num: Double): Boolean = { + num.isNaN || num.isInfinite || num.isNegInfinity || num.isPosInfinity + } + + def sameRows( + expectedAnswer: Seq[Row], + sparkAnswer: Seq[Row], + isSorted: Boolean = false): Option[String] = { + // modify method 'compare' + if (!compare(prepareAnswer(expectedAnswer, isSorted), prepareAnswer(sparkAnswer, isSorted))) { + return Some(genError(expectedAnswer, sparkAnswer, isSorted)) + } + None + } + + def checkAnswer(df: DataFrame, expectedAnswer: java.util.List[Row]): Unit = { + getErrorMessageInCheckAnswer(df, expectedAnswer.asScala.toSeq) match { + case Some(errorMessage) => fail(errorMessage) + case None => + } + } +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestConstants.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestConstants.scala new file mode 100644 index 00000000000..772becaa5f9 --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestConstants.scala @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.utils + +import org.apache.spark.sql.types._ + +object RapidsTestConstants { + + val RAPIDS_TEST: String = "Rapids - " + + val IGNORE_ALL: String = "IGNORE_ALL" + + val SUPPORTED_DATA_TYPE = TypeCollection( + BooleanType, + ByteType, + ShortType, + IntegerType, + LongType, + FloatType, + DoubleType, + DecimalType, + StringType, + BinaryType, + DateType, + TimestampType, + ArrayType, + StructType, + MapType + ) +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala new file mode 100644 index 00000000000..4981c385219 --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.utils + +import org.apache.spark.sql.rapids.suites.{RapidsCastSuite, RapidsDataFrameAggregateSuite, RapidsJsonFunctionsSuite, RapidsJsonSuite, RapidsMathFunctionsSuite, RapidsRegexpExpressionsSuite, RapidsStringExpressionsSuite, RapidsStringFunctionsSuite} + +// Some settings' line length exceeds 100 +// scalastyle:off line.size.limit + +class RapidsTestSettings extends BackendTestSettings { + + enableSuite[RapidsCastSuite] + .exclude("Process Infinity, -Infinity, NaN in case insensitive manner", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771")) + .exclude("SPARK-35711: cast timestamp without time zone to timestamp with local time zone", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771")) + .exclude("SPARK-35719: cast timestamp with local time zone to timestamp without timezone", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771")) + .exclude("SPARK-35112: Cast string to day-time interval", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771")) + .exclude("SPARK-35735: Take into account day-time interval fields in cast", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771")) + .exclude("casting to fixed-precision decimals", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771")) + .exclude("SPARK-32828: cast from a derived user-defined type to a base type", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771")) + enableSuite[RapidsDataFrameAggregateSuite] + .exclude("collect functions", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772")) + .exclude("collect functions structs", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772")) + .exclude("collect functions should be able to cast to array type with no null values", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772")) + .exclude("SPARK-17641: collect functions should not collect null values", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772")) + .exclude("SPARK-19471: AggregationIterator does not initialize the generated result projection before using it", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772")) + enableSuite[RapidsJsonFunctionsSuite] + enableSuite[RapidsJsonSuite] + .exclude("Casting long as timestamp", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("Write timestamps correctly with timestampFormat option and timeZone option", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("SPARK-23723: json in UTF-16 with BOM", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("SPARK-23723: multi-line json in UTF-32BE with BOM", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("SPARK-23723: Use user's encoding in reading of multi-line json in UTF-16LE", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("SPARK-23723: Unsupported encoding name", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("SPARK-23723: checking that the encoding option is case agnostic", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("SPARK-23723: specified encoding is not matched to actual encoding", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("SPARK-23724: lineSep should be set if encoding if different from UTF-8", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("SPARK-31716: inferring should handle malformed input", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("SPARK-24190: restrictions for JSONOptions in read", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("exception mode for parsing date/timestamp string", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + .exclude("SPARK-37360: Timestamp type inference for a mix of TIMESTAMP_NTZ and TIMESTAMP_LTZ", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773")) + enableSuite[RapidsMathFunctionsSuite] + enableSuite[RapidsRegexpExpressionsSuite] + .exclude("RegexReplace", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10774")) + .exclude("RegexExtract", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10774")) + .exclude("RegexExtractAll", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10774")) + .exclude("SPLIT", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10774")) + enableSuite[RapidsStringExpressionsSuite] + .exclude("SPARK-22498: Concat should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("SPARK-22549: ConcatWs should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("SPARK-22550: Elt should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("StringComparison", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("Substring", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("ascii for string", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("base64/unbase64 for string", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("encode/decode for string", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("SPARK-22603: FormatString should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("LOCATE", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("LPAD/RPAD", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("REPEAT", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("length for string / binary", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + .exclude("ParseUrl", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775")) + enableSuite[RapidsStringFunctionsSuite] +} +// scalastyle:on line.size.limit diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsBaseTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsBaseTrait.scala new file mode 100644 index 00000000000..a3039077d90 --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsBaseTrait.scala @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.utils + +trait RapidsTestsBaseTrait { + + protected val rootPath: String = getClass.getResource("/").getPath + protected val basePath: String = rootPath + "unit-tests-working-home" + + protected val warehouse: String = basePath + "/spark-warehouse" + protected val metaStorePathAbsolute: String = basePath + "/meta" + + def shouldRun(testName: String): Boolean = { + BackendTestSettings.shouldRun(getClass.getCanonicalName, testName) + } +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsCommonTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsCommonTrait.scala new file mode 100644 index 00000000000..1b39073fdcf --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsCommonTrait.scala @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.utils + +import com.nvidia.spark.rapids.TestStats +import org.scalactic.source.Position +import org.scalatest.{Args, Status, Tag} + +import org.apache.spark.SparkFunSuite +import org.apache.spark.internal.config.Tests.IS_TESTING +import org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper +import org.apache.spark.sql.rapids.utils.RapidsTestConstants.RAPIDS_TEST + +trait RapidsTestsCommonTrait + extends SparkFunSuite + with ExpressionEvalHelper + with RapidsTestsBaseTrait { + + protected override def afterAll(): Unit = { + // SparkFunSuite will set this to true, and forget to reset to false + System.clearProperty(IS_TESTING.key) + super.afterAll() + } + + override def runTest(testName: String, args: Args): Status = { + TestStats.suiteTestNumber += 1 + TestStats.offloadRapids = true + TestStats.startCase(testName) + val status = super.runTest(testName, args) + if (TestStats.offloadRapids) { + TestStats.offloadRapidsTestNumber += 1 + print("'" + testName + "'" + " offload to RAPIDS\n") + } else { + // you can find the keyword 'Validation failed for' in function doValidate() in log + // to get the fallback reason + print("'" + testName + "'" + " NOT use RAPIDS\n") + TestStats.addFallBackCase() + } + + TestStats.endCase(status.succeeds()); + status + } + + protected def testRapids(testName: String, testTag: Tag*)(testFun: => Any)(implicit + pos: Position): Unit = { + test(RAPIDS_TEST + testName, testTag: _*)(testFun) + } + override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit + pos: Position): Unit = { + if (shouldRun(testName)) { + super.test(testName, testTags: _*)(testFun) + } else { + super.ignore(testName, testTags: _*)(testFun) + } + } +} diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala new file mode 100644 index 00000000000..08e10f4d4fd --- /dev/null +++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "330"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids.utils + +import java.io.File + +import com.nvidia.spark.rapids.{GpuProjectExec, TestStats} +import org.apache.commons.io.{FileUtils => fu} +import org.apache.commons.math3.util.Precision +import org.scalactic.TripleEqualsSupport.Spread +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.{Column, DataFrame, Row, SparkSession} +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} +import org.apache.spark.sql.catalyst.analysis.ResolveTimeZone +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection +import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, ConvertToLocalRelation, NullPropagation} +import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, MapData, TypeUtils} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.rapids.utils.RapidsQueryTestUtil.isNaNOrInf +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +trait RapidsTestsTrait extends RapidsTestsCommonTrait { + + override def beforeAll(): Unit = { + // prepare working paths + val basePathDir = new File(basePath) + if (basePathDir.exists()) { + fu.forceDelete(basePathDir) + } + fu.forceMkdir(basePathDir) + fu.forceMkdir(new File(warehouse)) + fu.forceMkdir(new File(metaStorePathAbsolute)) + super.beforeAll() + initializeSession() + _spark.sparkContext.setLogLevel("WARN") + } + + override def afterAll(): Unit = { + try { + super.afterAll() + } finally { + try { + if (_spark != null) { + try { + _spark.sessionState.catalog.reset() + } finally { + _spark.stop() + _spark = null + } + } + } finally { + SparkSession.clearActiveSession() + SparkSession.clearDefaultSession() + } + } + logInfo( + "Test suite: " + this.getClass.getSimpleName + + "; Suite test number: " + TestStats.suiteTestNumber + + "; OffloadRapids number: " + TestStats.offloadRapidsTestNumber + "\n") + TestStats.printMarkdown(this.getClass.getSimpleName) + TestStats.reset() + } + + protected def initializeSession(): Unit = { + if (_spark == null) { + val sparkBuilder = SparkSession + .builder() + .master(s"local[2]") + // Avoid static evaluation for literal input by spark catalyst. + .config( + SQLConf.OPTIMIZER_EXCLUDED_RULES.key, + ConvertToLocalRelation.ruleName + + "," + ConstantFolding.ruleName + "," + NullPropagation.ruleName) + .config("spark.rapids.sql.enabled", "true") + .config("spark.plugins", "com.nvidia.spark.SQLPlugin") + .config("spark.sql.queryExecutionListeners", + "org.apache.spark.sql.rapids.ExecutionPlanCaptureCallback") + .config("spark.sql.warehouse.dir", warehouse) + .appName("rapids spark plugin running Vanilla Spark UT") + + _spark = sparkBuilder + .config("spark.unsafe.exceptionOnMemoryLeak", "true") + .getOrCreate() + } + } + + protected var _spark: SparkSession = null + + override protected def checkEvaluation( + expression: => Expression, + expected: Any, + inputRow: InternalRow = EmptyRow): Unit = { + val resolver = ResolveTimeZone + val expr = resolver.resolveTimeZones(expression) + assert(expr.resolved) + + if (canConvertToDataFrame(inputRow)) { + rapidsCheckExpression(expr, expected, inputRow) + } else { + logWarning(s"The status of this unit test is not guaranteed.") + val catalystValue = CatalystTypeConverters.convertToCatalyst(expected) + checkEvaluationWithoutCodegen(expr, catalystValue, inputRow) + checkEvaluationWithMutableProjection(expr, catalystValue, inputRow) + if (GenerateUnsafeProjection.canSupport(expr.dataType)) { + checkEvaluationWithUnsafeProjection(expr, catalystValue, inputRow) + } + checkEvaluationWithOptimization(expr, catalystValue, inputRow) + } + } + + /** + * Sort map data by key and return the sorted key array and value array. + * + * @param input + * : input map data. + * @param kt + * : key type. + * @param vt + * : value type. + * @return + * the sorted key array and value array. + */ + private def getSortedArrays( + input: MapData, + kt: DataType, + vt: DataType): (ArrayData, ArrayData) = { + val keyArray = input.keyArray().toArray[Any](kt) + val valueArray = input.valueArray().toArray[Any](vt) + val newMap = (keyArray.zip(valueArray)).toMap + val sortedMap = mutable.SortedMap(newMap.toSeq: _*)(TypeUtils.getInterpretedOrdering(kt)) + (new GenericArrayData(sortedMap.keys.toArray), new GenericArrayData(sortedMap.values.toArray)) + } + + override protected def checkResult( + result: Any, + expected: Any, + exprDataType: DataType, + exprNullable: Boolean): Boolean = { + val dataType = UserDefinedType.sqlType(exprDataType) + + // The result is null for a non-nullable expression + assert(result != null || exprNullable, "exprNullable should be true if result is null") + (result, expected) match { + case (result: Array[Byte], expected: Array[Byte]) => + java.util.Arrays.equals(result, expected) + case (result: Double, expected: Spread[Double @unchecked]) => + expected.asInstanceOf[Spread[Double]].isWithin(result) + case (result: InternalRow, expected: InternalRow) => + val st = dataType.asInstanceOf[StructType] + assert(result.numFields == st.length && expected.numFields == st.length) + st.zipWithIndex.forall { + case (f, i) => + checkResult( + result.get(i, f.dataType), + expected.get(i, f.dataType), + f.dataType, + f.nullable) + } + case (result: ArrayData, expected: ArrayData) => + result.numElements == expected.numElements && { + val ArrayType(et, cn) = dataType.asInstanceOf[ArrayType] + var isSame = true + var i = 0 + while (isSame && i < result.numElements) { + isSame = checkResult(result.get(i, et), expected.get(i, et), et, cn) + i += 1 + } + isSame + } + case (result: MapData, expected: MapData) => + val MapType(kt, vt, vcn) = dataType.asInstanceOf[MapType] + checkResult( + getSortedArrays(result, kt, vt)._1, + getSortedArrays(expected, kt, vt)._1, + ArrayType(kt, containsNull = false), + exprNullable = false) && checkResult( + getSortedArrays(result, kt, vt)._2, + getSortedArrays(expected, kt, vt)._2, + ArrayType(vt, vcn), + exprNullable = false) + case (result: Double, expected: Double) => + if ( + (isNaNOrInf(result) || isNaNOrInf(expected)) + || (result == -0.0) || (expected == -0.0) + ) { + java.lang.Double.doubleToRawLongBits(result) == + java.lang.Double.doubleToRawLongBits(expected) + } else { + Precision.equalsWithRelativeTolerance(result, expected, 0.00001d) + } + case (result: Float, expected: Float) => + if (expected.isNaN) result.isNaN else expected == result + case (result: Row, expected: InternalRow) => result.toSeq == expected.toSeq(result.schema) + case _ => + result == expected + } + } + + def checkDataTypeSupported(expr: Expression): Boolean = { + RapidsTestConstants.SUPPORTED_DATA_TYPE.acceptsType(expr.dataType) + } + + def rapidsCheckExpression(expression: Expression, expected: Any, inputRow: InternalRow): Unit = { + val df = if (inputRow != EmptyRow && inputRow != InternalRow.empty) { + convertInternalRowToDataFrame(inputRow) + } else { + val schema = StructType(StructField("a", IntegerType, nullable = true) :: Nil) + val empData = Seq(Row(1)) + _spark.createDataFrame(_spark.sparkContext.parallelize(empData), schema) + } + val resultDF = df.select(Column(expression)) + val result = resultDF.collect() + TestStats.testUnitNumber = TestStats.testUnitNumber + 1 + if ( + checkDataTypeSupported(expression) && + expression.children.forall(checkDataTypeSupported) + ) { + val projectTransformer = resultDF.queryExecution.executedPlan.collect { + case p: GpuProjectExec => p + } + if (projectTransformer.size == 1) { + TestStats.offloadRapidsUnitNumber += 1 + logInfo("Offload to native backend in the test.\n") + } else { + logInfo("Not supported in native backend, fall back to vanilla spark in the test.\n") + shouldNotFallback() + } + } else { + logInfo("Has unsupported data type, fall back to vanilla spark.\n") + shouldNotFallback() + } + + if ( + !(checkResult(result.head.get(0), expected, expression.dataType, expression.nullable) + || checkResult( + CatalystTypeConverters.createToCatalystConverter(expression.dataType)( + result.head.get(0) + ), // decimal precision is wrong from value + CatalystTypeConverters.convertToCatalyst(expected), + expression.dataType, + expression.nullable + )) + ) { + val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" + fail( + s"Incorrect evaluation: $expression, " + + s"actual: ${result.head.get(0)}, " + + s"expected: $expected$input") + } + } + + def shouldNotFallback(): Unit = { + TestStats.offloadRapids = false + } + + def canConvertToDataFrame(inputRow: InternalRow): Boolean = { + if (inputRow == EmptyRow || inputRow == InternalRow.empty) { + return true + } + if (!inputRow.isInstanceOf[GenericInternalRow]) { + return false + } + val values = inputRow.asInstanceOf[GenericInternalRow].values + for (value <- values) { + value match { + case _: MapData => return false + case _: ArrayData => return false + case _: InternalRow => return false + case _ => + } + } + true + } + + def convertInternalRowToDataFrame(inputRow: InternalRow): DataFrame = { + val structFileSeq = new ArrayBuffer[StructField]() + val values = inputRow match { + case genericInternalRow: GenericInternalRow => + genericInternalRow.values + case _ => throw new UnsupportedOperationException("Unsupported InternalRow.") + } + values.foreach { + case boolean: java.lang.Boolean => + structFileSeq.append(StructField("bool", BooleanType, boolean == null)) + case short: java.lang.Short => + structFileSeq.append(StructField("i16", ShortType, short == null)) + case byte: java.lang.Byte => + structFileSeq.append(StructField("i8", ByteType, byte == null)) + case integer: java.lang.Integer => + structFileSeq.append(StructField("i32", IntegerType, integer == null)) + case long: java.lang.Long => + structFileSeq.append(StructField("i64", LongType, long == null)) + case float: java.lang.Float => + structFileSeq.append(StructField("fp32", FloatType, float == null)) + case double: java.lang.Double => + structFileSeq.append(StructField("fp64", DoubleType, double == null)) + case utf8String: UTF8String => + structFileSeq.append(StructField("str", StringType, utf8String == null)) + case byteArr: Array[Byte] => + structFileSeq.append(StructField("vbin", BinaryType, byteArr == null)) + case decimal: Decimal => + structFileSeq.append( + StructField("dec", DecimalType(decimal.precision, decimal.scale), decimal == null)) + case null => + structFileSeq.append(StructField("null", IntegerType, nullable = true)) + case unsupported @ _ => + throw new UnsupportedOperationException(s"Unsupported type: ${unsupported.getClass}") + } + val fields = structFileSeq.toSeq + _spark.internalCreateDataFrame( + _spark.sparkContext.parallelize(Seq(inputRow)), + StructType(fields)) + } +} diff --git a/tests/src/test/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleTestHelper.scala b/tests/src/test/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleTestHelper.scala index f0f88508449..6250e5a41d1 100644 --- a/tests/src/test/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleTestHelper.scala +++ b/tests/src/test/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleTestHelper.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,10 @@ {"spark": "341"} {"spark": "341db"} {"spark": "342"} +{"spark": "343"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids.shuffle diff --git a/tests/src/test/spark341db/scala/com/nvidia/spark/rapids/ToPrettyStringSuite.scala b/tests/src/test/spark341db/scala/com/nvidia/spark/rapids/ToPrettyStringSuite.scala index c53ed57b8a5..e5fc1a03ce7 100644 --- a/tests/src/test/spark341db/scala/com/nvidia/spark/rapids/ToPrettyStringSuite.scala +++ b/tests/src/test/spark341db/scala/com/nvidia/spark/rapids/ToPrettyStringSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ {"spark": "341db"} {"spark": "350"} {"spark": "351"} +{"spark": "400"} spark-rapids-shim-json-lines ***/ package com.nvidia.spark.rapids diff --git a/tests/src/test/spark350/scala/org/apache/spark/sql/rapids/GpuCreateDataSourceTableAsSelectCommandSuite.scala b/tests/src/test/spark350/scala/org/apache/spark/sql/rapids/GpuCreateDataSourceTableAsSelectCommandSuite.scala new file mode 100644 index 00000000000..997ee283eb8 --- /dev/null +++ b/tests/src/test/spark350/scala/org/apache/spark/sql/rapids/GpuCreateDataSourceTableAsSelectCommandSuite.scala @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*** spark-rapids-shim-json-lines +{"spark": "350"} +{"spark": "351"} +{"spark": "400"} +spark-rapids-shim-json-lines ***/ +package org.apache.spark.sql.rapids + +import com.nvidia.spark.rapids.FunSuiteWithTempDir +import com.nvidia.spark.rapids.SparkQueryCompareTestSuite + +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.catalyst.expressions.FileSourceMetadataAttribute.FILE_SOURCE_METADATA_COL_ATTR_KEY +import org.apache.spark.sql.connector.catalog.{Column, Identifier} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ +import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils + +class GpuCreateDataSourceTableAsSelectCommandSuite + extends SparkQueryCompareTestSuite + with FunSuiteWithTempDir { + + // Fails with Spark < 3.5.0 - see https://github.com/NVIDIA/spark-rapids/issues/8844 + test("Metadata column related field metadata should not be leaked to catalogs") { + val inputDf = "inputDf" + val targetTable = "targetTable" + val columnName = "dataColumn" + // Create a metadata having an internal metadata field as its key + val newMetadata = Metadata.fromJson(s"""{"$FILE_SOURCE_METADATA_COL_ATTR_KEY": "dummy"}""") + withGpuSparkSession { spark => + withTable(spark, targetTable) { + // Create an Dataframe having a column with the above metadata + val schema = StructType(Array( + StructField(columnName, StringType, nullable = true, newMetadata) + )) + val emptyRDD = spark.sparkContext.emptyRDD[Row] + spark.createDataFrame(emptyRDD, schema).createOrReplaceTempView(inputDf) + + // Create the target table from the Dataframe (CTAS) + spark.sql(s""" + |CREATE TABLE $targetTable USING PARQUET + |AS SELECT $columnName FROM $inputDf + |""".stripMargin) + + // Fetch the created table's columns to verify metadata leakage + val tableColumns = getColumns(spark, targetTable) + assert(tableColumns.length == 1, "Table should only contain one column.") + val firstColumn = tableColumns.head + assert(firstColumn.name == columnName, s"Column name should be '$columnName'.") + assert(firstColumn.dataType == StringType, "Column type should be StringType.") + assert(firstColumn.metadataInJSON() == null, "Column metadata should be empty.") + } + } + } + + private def withTable(spark: SparkSession, tableNames: String*)(f: => Unit): Unit = { + Utils.tryWithSafeFinally(f) { + tableNames.foreach { name => + spark.sql(s"DROP TABLE IF EXISTS $name") + } + } + } + + /** + * This method accesses the current catalog of the Spark session to + * fetch the schema of the input table. It then returns the columns of the table + * as an array of Column objects. + */ + private def getColumns(spark: SparkSession, tableName: String): Array[Column] = { + val catalogManager = spark.sessionState.catalogManager + val currentCatalog = catalogManager.currentCatalog.asTableCatalog + val identifier = Identifier.of(catalogManager.currentNamespace, tableName) + currentCatalog.loadTable(identifier).columns() + } +} diff --git a/tools/generated_files/311/operatorsScore.csv b/tools/generated_files/311/operatorsScore.csv index 10ceb96336d..65f76bd1c36 100644 --- a/tools/generated_files/311/operatorsScore.csv +++ b/tools/generated_files/311/operatorsScore.csv @@ -48,6 +48,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/311/supportedExprs.csv b/tools/generated_files/311/supportedExprs.csv index 52cd9957729..af0c5f1edd6 100644 --- a/tools/generated_files/311/supportedExprs.csv +++ b/tools/generated_files/311/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA @@ -224,9 +227,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS @@ -279,9 +282,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS diff --git a/tools/generated_files/312/operatorsScore.csv b/tools/generated_files/312/operatorsScore.csv index 10ceb96336d..65f76bd1c36 100644 --- a/tools/generated_files/312/operatorsScore.csv +++ b/tools/generated_files/312/operatorsScore.csv @@ -48,6 +48,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/312/supportedExprs.csv b/tools/generated_files/312/supportedExprs.csv index 52cd9957729..af0c5f1edd6 100644 --- a/tools/generated_files/312/supportedExprs.csv +++ b/tools/generated_files/312/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA @@ -224,9 +227,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS @@ -279,9 +282,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS diff --git a/tools/generated_files/313/operatorsScore.csv b/tools/generated_files/313/operatorsScore.csv index 10ceb96336d..65f76bd1c36 100644 --- a/tools/generated_files/313/operatorsScore.csv +++ b/tools/generated_files/313/operatorsScore.csv @@ -48,6 +48,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/313/supportedExprs.csv b/tools/generated_files/313/supportedExprs.csv index 52cd9957729..af0c5f1edd6 100644 --- a/tools/generated_files/313/supportedExprs.csv +++ b/tools/generated_files/313/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA @@ -224,9 +227,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS @@ -279,9 +282,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS diff --git a/tools/generated_files/320/operatorsScore.csv b/tools/generated_files/320/operatorsScore.csv index c600071cb30..354875afa92 100644 --- a/tools/generated_files/320/operatorsScore.csv +++ b/tools/generated_files/320/operatorsScore.csv @@ -52,6 +52,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/320/supportedExprs.csv b/tools/generated_files/320/supportedExprs.csv index 4b1d8e23f17..43256eeb9b5 100644 --- a/tools/generated_files/320/supportedExprs.csv +++ b/tools/generated_files/320/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -224,9 +227,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -279,9 +282,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,N IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/321/operatorsScore.csv b/tools/generated_files/321/operatorsScore.csv index c600071cb30..354875afa92 100644 --- a/tools/generated_files/321/operatorsScore.csv +++ b/tools/generated_files/321/operatorsScore.csv @@ -52,6 +52,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/321/supportedExprs.csv b/tools/generated_files/321/supportedExprs.csv index 4b1d8e23f17..43256eeb9b5 100644 --- a/tools/generated_files/321/supportedExprs.csv +++ b/tools/generated_files/321/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -224,9 +227,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -279,9 +282,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,N IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/321cdh/operatorsScore.csv b/tools/generated_files/321cdh/operatorsScore.csv index c600071cb30..354875afa92 100644 --- a/tools/generated_files/321cdh/operatorsScore.csv +++ b/tools/generated_files/321cdh/operatorsScore.csv @@ -52,6 +52,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/321cdh/supportedExprs.csv b/tools/generated_files/321cdh/supportedExprs.csv index 4b1d8e23f17..43256eeb9b5 100644 --- a/tools/generated_files/321cdh/supportedExprs.csv +++ b/tools/generated_files/321cdh/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -224,9 +227,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -279,9 +282,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,N IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/322/operatorsScore.csv b/tools/generated_files/322/operatorsScore.csv index c600071cb30..354875afa92 100644 --- a/tools/generated_files/322/operatorsScore.csv +++ b/tools/generated_files/322/operatorsScore.csv @@ -52,6 +52,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/322/supportedExprs.csv b/tools/generated_files/322/supportedExprs.csv index 4b1d8e23f17..43256eeb9b5 100644 --- a/tools/generated_files/322/supportedExprs.csv +++ b/tools/generated_files/322/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -224,9 +227,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -279,9 +282,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,N IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/323/operatorsScore.csv b/tools/generated_files/323/operatorsScore.csv index c600071cb30..354875afa92 100644 --- a/tools/generated_files/323/operatorsScore.csv +++ b/tools/generated_files/323/operatorsScore.csv @@ -52,6 +52,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/323/supportedExprs.csv b/tools/generated_files/323/supportedExprs.csv index 4b1d8e23f17..43256eeb9b5 100644 --- a/tools/generated_files/323/supportedExprs.csv +++ b/tools/generated_files/323/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -224,9 +227,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -279,9 +282,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,N IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/324/operatorsScore.csv b/tools/generated_files/324/operatorsScore.csv index c600071cb30..354875afa92 100644 --- a/tools/generated_files/324/operatorsScore.csv +++ b/tools/generated_files/324/operatorsScore.csv @@ -52,6 +52,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/324/supportedExprs.csv b/tools/generated_files/324/supportedExprs.csv index 4b1d8e23f17..43256eeb9b5 100644 --- a/tools/generated_files/324/supportedExprs.csv +++ b/tools/generated_files/324/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -224,9 +227,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -279,9 +282,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,N IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/330/operatorsScore.csv b/tools/generated_files/330/operatorsScore.csv index b47c1919ac6..035c60229c7 100644 --- a/tools/generated_files/330/operatorsScore.csv +++ b/tools/generated_files/330/operatorsScore.csv @@ -53,6 +53,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/330/supportedExprs.csv b/tools/generated_files/330/supportedExprs.csv index f86e913a309..8e12f247870 100644 --- a/tools/generated_files/330/supportedExprs.csv +++ b/tools/generated_files/330/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -233,9 +236,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -288,9 +291,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/330cdh/operatorsScore.csv b/tools/generated_files/330cdh/operatorsScore.csv index b47c1919ac6..035c60229c7 100644 --- a/tools/generated_files/330cdh/operatorsScore.csv +++ b/tools/generated_files/330cdh/operatorsScore.csv @@ -53,6 +53,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/330cdh/supportedExprs.csv b/tools/generated_files/330cdh/supportedExprs.csv index f86e913a309..8e12f247870 100644 --- a/tools/generated_files/330cdh/supportedExprs.csv +++ b/tools/generated_files/330cdh/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -233,9 +236,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -288,9 +291,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/331/operatorsScore.csv b/tools/generated_files/331/operatorsScore.csv index 234986322a0..9bbb7badf4c 100644 --- a/tools/generated_files/331/operatorsScore.csv +++ b/tools/generated_files/331/operatorsScore.csv @@ -53,6 +53,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/331/supportedExprs.csv b/tools/generated_files/331/supportedExprs.csv index b0fa92db757..35d6333476a 100644 --- a/tools/generated_files/331/supportedExprs.csv +++ b/tools/generated_files/331/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -235,9 +238,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -290,9 +293,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/332/operatorsScore.csv b/tools/generated_files/332/operatorsScore.csv index 234986322a0..9bbb7badf4c 100644 --- a/tools/generated_files/332/operatorsScore.csv +++ b/tools/generated_files/332/operatorsScore.csv @@ -53,6 +53,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/332/supportedExprs.csv b/tools/generated_files/332/supportedExprs.csv index b0fa92db757..35d6333476a 100644 --- a/tools/generated_files/332/supportedExprs.csv +++ b/tools/generated_files/332/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -235,9 +238,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -290,9 +293,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/332cdh/operatorsScore.csv b/tools/generated_files/332cdh/operatorsScore.csv index 234986322a0..9bbb7badf4c 100644 --- a/tools/generated_files/332cdh/operatorsScore.csv +++ b/tools/generated_files/332cdh/operatorsScore.csv @@ -53,6 +53,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/332cdh/supportedExprs.csv b/tools/generated_files/332cdh/supportedExprs.csv index b0fa92db757..35d6333476a 100644 --- a/tools/generated_files/332cdh/supportedExprs.csv +++ b/tools/generated_files/332cdh/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -235,9 +238,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -290,9 +293,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/333/operatorsScore.csv b/tools/generated_files/333/operatorsScore.csv index 234986322a0..9bbb7badf4c 100644 --- a/tools/generated_files/333/operatorsScore.csv +++ b/tools/generated_files/333/operatorsScore.csv @@ -53,6 +53,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/333/supportedExprs.csv b/tools/generated_files/333/supportedExprs.csv index b0fa92db757..35d6333476a 100644 --- a/tools/generated_files/333/supportedExprs.csv +++ b/tools/generated_files/333/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -235,9 +238,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -290,9 +293,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/334/operatorsScore.csv b/tools/generated_files/334/operatorsScore.csv index 234986322a0..9bbb7badf4c 100644 --- a/tools/generated_files/334/operatorsScore.csv +++ b/tools/generated_files/334/operatorsScore.csv @@ -53,6 +53,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/334/supportedExprs.csv b/tools/generated_files/334/supportedExprs.csv index b0fa92db757..35d6333476a 100644 --- a/tools/generated_files/334/supportedExprs.csv +++ b/tools/generated_files/334/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -235,9 +238,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -290,9 +293,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/340/operatorsScore.csv b/tools/generated_files/340/operatorsScore.csv index 692a4694e98..c7d246f3fe9 100644 --- a/tools/generated_files/340/operatorsScore.csv +++ b/tools/generated_files/340/operatorsScore.csv @@ -54,6 +54,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/340/supportedExprs.csv b/tools/generated_files/340/supportedExprs.csv index 519fa951e3e..1f5d3237d2a 100644 --- a/tools/generated_files/340/supportedExprs.csv +++ b/tools/generated_files/340/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -235,9 +238,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -290,9 +293,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/341/operatorsScore.csv b/tools/generated_files/341/operatorsScore.csv index 692a4694e98..c7d246f3fe9 100644 --- a/tools/generated_files/341/operatorsScore.csv +++ b/tools/generated_files/341/operatorsScore.csv @@ -54,6 +54,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/341/supportedExprs.csv b/tools/generated_files/341/supportedExprs.csv index 519fa951e3e..1f5d3237d2a 100644 --- a/tools/generated_files/341/supportedExprs.csv +++ b/tools/generated_files/341/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -235,9 +238,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -290,9 +293,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/342/operatorsScore.csv b/tools/generated_files/342/operatorsScore.csv index 692a4694e98..c7d246f3fe9 100644 --- a/tools/generated_files/342/operatorsScore.csv +++ b/tools/generated_files/342/operatorsScore.csv @@ -54,6 +54,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/342/supportedExprs.csv b/tools/generated_files/342/supportedExprs.csv index 519fa951e3e..1f5d3237d2a 100644 --- a/tools/generated_files/342/supportedExprs.csv +++ b/tools/generated_files/342/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -235,9 +238,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -290,9 +293,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/343/operatorsScore.csv b/tools/generated_files/343/operatorsScore.csv new file mode 100644 index 00000000000..c7d246f3fe9 --- /dev/null +++ b/tools/generated_files/343/operatorsScore.csv @@ -0,0 +1,286 @@ +CPUOperator,Score +CoalesceExec,3.0 +CollectLimitExec,3.0 +ExpandExec,3.0 +FileSourceScanExec,3.0 +FilterExec,2.8 +GenerateExec,3.0 +GlobalLimitExec,3.0 +LocalLimitExec,3.0 +ProjectExec,3.0 +RangeExec,3.0 +SampleExec,3.0 +SortExec,8.0 +SubqueryBroadcastExec,3.0 +TakeOrderedAndProjectExec,3.0 +UnionExec,3.0 +AQEShuffleReadExec,3.0 +HashAggregateExec,4.5 +ObjectHashAggregateExec,3.0 +SortAggregateExec,3.0 +InMemoryTableScanExec,3.0 +DataWritingCommandExec,3.0 +ExecutedCommandExec,3.0 +WriteFilesExec,3.0 +AppendDataExecV1,3.0 +AtomicCreateTableAsSelectExec,3.0 +AtomicReplaceTableAsSelectExec,3.0 +BatchScanExec,3.0 +OverwriteByExpressionExecV1,3.0 +BroadcastExchangeExec,3.0 +ShuffleExchangeExec,4.2 +BroadcastHashJoinExec,5.1 +BroadcastNestedLoopJoinExec,3.0 +CartesianProductExec,3.0 +ShuffledHashJoinExec,3.0 +SortMergeJoinExec,22.7 +AggregateInPandasExec,1.2 +ArrowEvalPythonExec,1.2 +FlatMapCoGroupsInPandasExec,3.0 +FlatMapGroupsInPandasExec,1.2 +MapInPandasExec,1.2 +PythonMapInArrowExec,3.0 +WindowInPandasExec,1.2 +WindowExec,3.0 +HiveTableScanExec,3.0 +Abs,4 +Acos,4 +Acosh,4 +Add,4 +AggregateExpression,4 +Alias,4 +And,4 +ApproximatePercentile,4 +ArrayContains,4 +ArrayExcept,4 +ArrayExists,4 +ArrayFilter,4 +ArrayIntersect,4 +ArrayMax,4 +ArrayMin,4 +ArrayRemove,4 +ArrayRepeat,4 +ArrayTransform,4 +ArrayUnion,4 +ArraysOverlap,4 +ArraysZip,4 +Ascii,4 +Asin,4 +Asinh,4 +AtLeastNNonNulls,4 +Atan,4 +Atanh,4 +AttributeReference,4 +Average,4 +BRound,4 +BitLength,4 +BitwiseAnd,4 +BitwiseNot,4 +BitwiseOr,4 +BitwiseXor,4 +BloomFilterAggregate,4 +BloomFilterMightContain,4 +CaseWhen,4 +Cbrt,4 +Ceil,4 +CheckOverflowInTableInsert,4 +Coalesce,4 +CollectList,4 +CollectSet,4 +Concat,4 +ConcatWs,4 +Contains,4 +Conv,4 +Cos,4 +Cosh,4 +Cot,4 +Count,4 +CreateArray,4 +CreateMap,4 +CreateNamedStruct,4 +CurrentRow$,4 +DateAdd,4 +DateAddInterval,4 +DateDiff,4 +DateFormatClass,4 +DateSub,4 +DayOfMonth,4 +DayOfWeek,4 +DayOfYear,4 +DenseRank,4 +Divide,4 +DivideDTInterval,4 +DivideYMInterval,4 +DynamicPruningExpression,4 +ElementAt,4 +Empty2Null,4 +EndsWith,4 +EqualNullSafe,4 +EqualTo,4 +Exp,4 +Explode,4 +Expm1,4 +First,4 +Flatten,4 +Floor,4 +FormatNumber,4 +FromUTCTimestamp,4 +FromUnixTime,4 +GetArrayItem,4 +GetArrayStructFields,4 +GetJsonObject,4 +GetMapValue,4 +GetStructField,4 +GetTimestamp,4 +GreaterThan,4 +GreaterThanOrEqual,4 +Greatest,4 +HiveGenericUDF,4 +HiveSimpleUDF,4 +Hour,4 +Hypot,4 +If,4 +In,4 +InSet,4 +InSubqueryExec,4 +InitCap,4 +InputFileBlockLength,4 +InputFileBlockStart,4 +InputFileName,4 +IntegralDivide,4 +IsNaN,4 +IsNotNull,4 +IsNull,4 +JsonToStructs,4 +JsonTuple,4 +KnownFloatingPointNormalized,4 +KnownNotNull,4 +KnownNullable,4 +Lag,4 +LambdaFunction,4 +Last,4 +LastDay,4 +Lead,4 +Least,4 +Length,4 +LessThan,4 +LessThanOrEqual,4 +Like,4 +Literal,4 +Log,4 +Log10,4 +Log1p,4 +Log2,4 +Logarithm,4 +Lower,4 +MakeDecimal,4 +MapConcat,4 +MapEntries,4 +MapFilter,4 +MapKeys,4 +MapValues,4 +Max,4 +Md5,4 +MicrosToTimestamp,4 +MillisToTimestamp,4 +Min,4 +Minute,4 +MonotonicallyIncreasingID,4 +Month,4 +Multiply,4 +MultiplyDTInterval,4 +MultiplyYMInterval,4 +Murmur3Hash,4 +NaNvl,4 +NamedLambdaVariable,4 +NormalizeNaNAndZero,4 +Not,4 +NthValue,4 +OctetLength,4 +Or,4 +ParseUrl,4 +PercentRank,4 +Percentile,4 +PivotFirst,4 +Pmod,4 +PosExplode,4 +Pow,4 +PreciseTimestampConversion,4 +PythonUDF,4 +Quarter,4 +RLike,4 +RaiseError,4 +Rand,4 +Rank,4 +RegExpExtract,4 +RegExpExtractAll,4 +RegExpReplace,4 +Remainder,4 +ReplicateRows,4 +Reverse,4 +Rint,4 +Round,4 +RoundCeil,4 +RoundFloor,4 +RowNumber,4 +ScalaUDF,4 +ScalarSubquery,4 +Second,4 +SecondsToTimestamp,4 +Sequence,4 +ShiftLeft,4 +ShiftRight,4 +ShiftRightUnsigned,4 +Signum,4 +Sin,4 +Sinh,4 +Size,4 +SortArray,4 +SortOrder,4 +SparkPartitionID,4 +SpecifiedWindowFrame,4 +Sqrt,4 +Stack,4 +StartsWith,4 +StddevPop,4 +StddevSamp,4 +StringInstr,4 +StringLPad,4 +StringLocate,4 +StringRPad,4 +StringRepeat,4 +StringReplace,4 +StringSplit,4 +StringToMap,4 +StringTranslate,4 +StringTrim,4 +StringTrimLeft,4 +StringTrimRight,4 +StructsToJson,4 +Substring,4 +SubstringIndex,4 +Subtract,4 +Sum,4 +Tan,4 +Tanh,4 +TimeAdd,4 +ToDegrees,4 +ToRadians,4 +ToUTCTimestamp,4 +ToUnixTimestamp,4 +TransformKeys,4 +TransformValues,4 +UnaryMinus,4 +UnaryPositive,4 +UnboundedFollowing$,4 +UnboundedPreceding$,4 +UnixTimestamp,4 +UnscaledValue,4 +Upper,4 +VariancePop,4 +VarianceSamp,4 +WeekDay,4 +WindowExpression,4 +WindowSpecDefinition,4 +XxHash64,4 +Year,4 diff --git a/tools/generated_files/343/supportedDataSource.csv b/tools/generated_files/343/supportedDataSource.csv new file mode 100644 index 00000000000..77f30cbe1de --- /dev/null +++ b/tools/generated_files/343/supportedDataSource.csv @@ -0,0 +1,13 @@ +Format,Direction,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH +Avro,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO +CSV,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,NA,NA,NA,NA,NA,NA +Delta,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S +Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS +HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S +JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO +ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA +ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S +Parquet,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/343/supportedExecs.csv b/tools/generated_files/343/supportedExecs.csv new file mode 100644 index 00000000000..9cd10316f8f --- /dev/null +++ b/tools/generated_files/343/supportedExecs.csv @@ -0,0 +1,56 @@ +Exec,Supported,Notes,Params,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH +CoalesceExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +CollectLimitExec,NS,This is disabled by default because Collect Limit replacement can be slower on the GPU; if huge number of rows in a batch it could help by limiting the number of rows transferred from GPU to CPU,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +ExpandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +FileSourceScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +FilterExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +GenerateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +GlobalLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +LocalLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +ProjectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +RangeExec,S,None,Input/Output,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SampleExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,S,S +SortExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +SubqueryBroadcastExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +TakeOrderedAndProjectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +UnionExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +AQEShuffleReadExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +HashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS +ObjectHashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS +SortAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS +InMemoryTableScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,PS,PS,PS,NS,S,S +DataWritingCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,PS,NS,S,NS,PS,PS,PS,NS,S,S +ExecutedCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +WriteFilesExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AppendDataExecV1,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S +AtomicCreateTableAsSelectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S +AtomicReplaceTableAsSelectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S +BatchScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S +OverwriteByExpressionExecV1,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S +BroadcastExchangeExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ShuffleExchangeExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +BroadcastHashJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +BroadcastHashJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +BroadcastHashJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BroadcastHashJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +BroadcastNestedLoopJoinExec,S,None,condition(A non-inner join only is supported if the condition expression can be converted to a GPU AST expression),S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BroadcastNestedLoopJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CartesianProductExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ShuffledHashJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +ShuffledHashJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +ShuffledHashJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShuffledHashJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +SortMergeJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +SortMergeJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +SortMergeJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SortMergeJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +AggregateInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS +ArrowEvalPythonExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +FlatMapCoGroupsInPandasExec,NS,This is disabled by default because Performance is not ideal with many small groups,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS +FlatMapGroupsInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS +MapInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonMapInArrowExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +WindowInPandasExec,NS,This is disabled by default because it only supports row based frame for now,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,NS,NS,NS,NS +WindowExec,S,None,partitionSpec,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS +WindowExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +HiveTableScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS diff --git a/tools/generated_files/343/supportedExprs.csv b/tools/generated_files/343/supportedExprs.csv new file mode 100644 index 00000000000..1f5d3237d2a --- /dev/null +++ b/tools/generated_files/343/supportedExprs.csv @@ -0,0 +1,767 @@ +Expression,Supported,SQL Func,Notes,Context,Params,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH +Abs,S,`abs`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,S +Abs,S,`abs`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,S +Abs,S,`abs`,None,AST,input,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NS,NS +Abs,S,`abs`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NS,NS +Acos,S,`acos`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acos,S,`acos`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acos,S,`acos`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acos,S,`acos`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acosh,S,`acosh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acosh,S,`acosh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acosh,S,`acosh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acosh,S,`acosh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Add,S,`+`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Add,S,`+`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Add,S,`+`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Add,S,`+`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Add,S,`+`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Add,S,`+`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Alias,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +Alias,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +Alias,S, ,None,AST,input,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S +Alias,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S +And,S,`and`,None,project,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +And,S,`and`,None,project,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +And,S,`and`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +And,S,`and`,None,AST,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +And,S,`and`,None,AST,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +And,S,`and`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayContains,S,`array_contains`,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayContains,S,`array_contains`,None,project,key,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS +ArrayContains,S,`array_contains`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayMax,S,`array_max`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayMax,S,`array_max`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +ArrayMin,S,`array_min`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayMin,S,`array_min`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +ArrayRemove,S,`array_remove`,None,project,array,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,PS,NS,NS,NS,NS,NS +ArrayRemove,S,`array_remove`,None,project,element,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +ArrayRemove,S,`array_remove`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayRepeat,S,`array_repeat`,None,project,left,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +ArrayRepeat,S,`array_repeat`,None,project,right,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayRepeat,S,`array_repeat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayTransform,S,`transform`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayTransform,S,`transform`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +ArrayTransform,S,`transform`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArraysZip,S,`arrays_zip`,None,project,children,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArraysZip,S,`arrays_zip`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Ascii,NS,`ascii`,This is disabled by default because it only supports strings starting with ASCII or Latin-1 characters after Spark 3.2.3; 3.3.1 and 3.4.0. Otherwise the results will not match the CPU.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Ascii,NS,`ascii`,This is disabled by default because it only supports strings starting with ASCII or Latin-1 characters after Spark 3.2.3; 3.3.1 and 3.4.0. Otherwise the results will not match the CPU.,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asin,S,`asin`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asin,S,`asin`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asin,S,`asin`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asin,S,`asin`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asinh,S,`asinh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asinh,S,`asinh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asinh,S,`asinh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asinh,S,`asinh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AtLeastNNonNulls,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +AtLeastNNonNulls,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atan,S,`atan`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atan,S,`atan`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atan,S,`atan`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atan,S,`atan`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atanh,S,`atanh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atanh,S,`atanh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atanh,S,`atanh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atanh,S,`atanh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AttributeReference,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +AttributeReference,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S +BRound,S,`bround`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +BRound,S,`bround`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BRound,S,`bround`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitLength,S,`bit_length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA +BitLength,S,`bit_length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseNot,S,`~`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseNot,S,`~`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseNot,S,`~`,None,AST,input,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseNot,S,`~`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterMightContain,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA +BloomFilterMightContain,S, ,None,project,rhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterMightContain,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +CaseWhen,S,`when`,None,project,predicate,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +CaseWhen,S,`when`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CaseWhen,S,`when`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Cbrt,S,`cbrt`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cbrt,S,`cbrt`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cbrt,S,`cbrt`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cbrt,S,`cbrt`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Ceil,S, ,None,project,input,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Ceil,S, ,None,project,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +CheckOverflowInTableInsert,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +CheckOverflowInTableInsert,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +Coalesce,S,`coalesce`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +Coalesce,S,`coalesce`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +Concat,S,`concat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,PS,NA,NA,NA,NA,NA +Concat,S,`concat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,PS,NA,NA,NA,NA,NA +ConcatWs,S,`concat_ws`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +ConcatWs,S,`concat_ws`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Contains,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Contains,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Contains,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,num,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,from_base,NA,PS,PS,PS,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,to_base,NA,PS,PS,PS,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cos,S,`cos`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cos,S,`cos`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cos,S,`cos`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cos,S,`cos`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cosh,S,`cosh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cosh,S,`cosh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cosh,S,`cosh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cosh,S,`cosh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cot,S,`cot`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cot,S,`cot`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cot,S,`cot`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cot,S,`cot`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +CreateArray,S,`array`,None,project,arg,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,NS,PS,NS,NS,NS +CreateArray,S,`array`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CreateMap,S,`map`,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,S,NA,NA,PS,NA,PS,NA,NA,NA +CreateMap,S,`map`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NA,NA,PS,PS,PS,NA,NA,NA +CreateNamedStruct,S,`named_struct`; `struct`,None,project,name,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +CreateNamedStruct,S,`named_struct`; `struct`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CreateNamedStruct,S,`named_struct`; `struct`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA +CurrentRow$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +DateAdd,S,`date_add`; `dateadd`,None,project,startDate,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateAdd,S,`date_add`; `dateadd`,None,project,days,NA,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateAdd,S,`date_add`; `dateadd`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateAddInterval,S, ,None,project,start,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateAddInterval,S, ,None,project,interval,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA +DateAddInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateDiff,S,`date_diff`; `datediff`,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateDiff,S,`date_diff`; `datediff`,None,project,rhs,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateDiff,S,`date_diff`; `datediff`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateFormatClass,S,`date_format`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateFormatClass,S,`date_format`,None,project,strfmt,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateFormatClass,S,`date_format`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateSub,S,`date_sub`,None,project,startDate,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateSub,S,`date_sub`,None,project,days,NA,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateSub,S,`date_sub`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfMonth,S,`day`; `dayofmonth`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfMonth,S,`day`; `dayofmonth`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfWeek,S,`dayofweek`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfWeek,S,`dayofweek`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfYear,S,`dayofyear`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfYear,S,`dayofyear`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DenseRank,S,`dense_rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +DenseRank,S,`dense_rank`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Divide,S,`/`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Divide,S,`/`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Divide,S,`/`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +DivideDTInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA +DivideDTInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +DivideDTInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA +DivideYMInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S +DivideYMInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +DivideYMInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S +DynamicPruningExpression,S, ,None,project,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DynamicPruningExpression,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +ElementAt,S,`element_at`,None,project,array/map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA +ElementAt,S,`element_at`,None,project,index/key,PS,PS,PS,S,PS,PS,PS,PS,PS,PS,PS,NS,NS,NS,NS,NS,NS,NS,NS,NS +ElementAt,S,`element_at`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Empty2Null,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Empty2Null,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EndsWith,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EndsWith,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EndsWith,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EqualNullSafe,S,`<=>`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +EqualNullSafe,S,`<=>`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +EqualNullSafe,S,`<=>`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EqualTo,S,`==`; `=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +EqualTo,S,`==`; `=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +EqualTo,S,`==`; `=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EqualTo,S,`==`; `=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +EqualTo,S,`==`; `=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +EqualTo,S,`==`; `=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Exp,S,`exp`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Exp,S,`exp`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Exp,S,`exp`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Exp,S,`exp`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Explode,S,`explode_outer`; `explode`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA +Explode,S,`explode_outer`; `explode`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Expm1,S,`expm1`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Expm1,S,`expm1`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Expm1,S,`expm1`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Expm1,S,`expm1`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Flatten,S,`flatten`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Flatten,S,`flatten`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Floor,S, ,None,project,input,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Floor,S, ,None,project,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +FormatNumber,S,`format_number`,None,project,x,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +FormatNumber,S,`format_number`,None,project,d,NA,NA,NA,PS,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FormatNumber,S,`format_number`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUTCTimestamp,S,`from_utc_timestamp`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUTCTimestamp,S,`from_utc_timestamp`,None,project,timezone,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUTCTimestamp,S,`from_utc_timestamp`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUnixTime,S,`from_unixtime`,None,project,sec,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUnixTime,S,`from_unixtime`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUnixTime,S,`from_unixtime`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetArrayItem,S, ,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS +GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +GetStructField,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA +GetStructField,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +GetTimestamp,S, ,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetTimestamp,S, ,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetTimestamp,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GreaterThan,S,`>`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +GreaterThan,S,`>`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +GreaterThan,S,`>`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GreaterThan,S,`>`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +GreaterThan,S,`>`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +GreaterThan,S,`>`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GreaterThanOrEqual,S,`>=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +GreaterThanOrEqual,S,`>=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +GreaterThanOrEqual,S,`>=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GreaterThanOrEqual,S,`>=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Hypot,S,`hypot`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Hypot,S,`hypot`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +If,S,`if`,None,project,predicate,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +If,S,`if`,None,project,trueValue,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +If,S,`if`,None,project,falseValue,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +If,S,`if`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +In,S,`in`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +In,S,`in`,None,project,list,PS,PS,PS,PS,PS,PS,PS,PS,PS,PS,PS,NS,NS,NS,NS,NA,NS,NS,NA,NA +In,S,`in`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InSet,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +InSet,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InitCap,S,`initcap`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InitCap,S,`initcap`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InputFileBlockLength,S,`input_file_block_length`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InputFileBlockStart,S,`input_file_block_start`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InputFileName,S,`input_file_name`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +IntegralDivide,S,`div`,None,project,lhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +IntegralDivide,S,`div`,None,project,rhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +IntegralDivide,S,`div`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +IsNaN,S,`isnan`,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +IsNaN,S,`isnan`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS +IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS +IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS +KnownNotNull,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS +KnownNullable,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +KnownNullable,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +Lag,S,`lag`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Lag,S,`lag`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Lag,S,`lag`,None,window,default,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Lag,S,`lag`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +LambdaFunction,S, ,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +LambdaFunction,S, ,None,project,arguments,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +LambdaFunction,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +LastDay,S,`last_day`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +LastDay,S,`last_day`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Lead,S,`lead`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Lead,S,`lead`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Lead,S,`lead`,None,window,default,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Lead,S,`lead`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Least,S,`least`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Least,S,`least`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Length,S,`char_length`; `character_length`; `len`; `length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA +Length,S,`char_length`; `character_length`; `len`; `length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +LessThan,S,`<`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +LessThan,S,`<`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +LessThan,S,`<`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +LessThan,S,`<`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +LessThan,S,`<`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +LessThan,S,`<`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +LessThanOrEqual,S,`<=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +LessThanOrEqual,S,`<=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +LessThanOrEqual,S,`<=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +LessThanOrEqual,S,`<=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +LessThanOrEqual,S,`<=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +LessThanOrEqual,S,`<=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Like,S,`like`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Like,S,`like`,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Like,S,`like`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Literal,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,S,S +Literal,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS +Log,S,`ln`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log,S,`ln`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log10,S,`log10`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log10,S,`log10`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log1p,S,`log1p`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log1p,S,`log1p`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log2,S,`log2`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log2,S,`log2`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Logarithm,S,`log`,None,project,value,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Logarithm,S,`log`,None,project,base,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Logarithm,S,`log`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Lower,S,`lcase`; `lower`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Lower,S,`lcase`; `lower`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MakeDecimal,S, ,None,project,input,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MakeDecimal,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA +MapConcat,S,`map_concat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapConcat,S,`map_concat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapEntries,S,`map_entries`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapEntries,S,`map_entries`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +MapFilter,S,`map_filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapFilter,S,`map_filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MapFilter,S,`map_filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapKeys,S,`map_keys`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapKeys,S,`map_keys`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +MapValues,S,`map_values`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapValues,S,`map_values`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Md5,S,`md5`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA +Md5,S,`md5`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MicrosToTimestamp,S,`timestamp_micros`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MicrosToTimestamp,S,`timestamp_micros`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MillisToTimestamp,S,`timestamp_millis`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MillisToTimestamp,S,`timestamp_millis`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Minute,S,`minute`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +MultiplyDTInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA +MultiplyDTInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +MultiplyDTInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA +MultiplyYMInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S +MultiplyYMInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +MultiplyYMInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S +Murmur3Hash,S,`hash`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Murmur3Hash,S,`hash`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NaNvl,S,`nanvl`,None,project,lhs,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NaNvl,S,`nanvl`,None,project,rhs,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NaNvl,S,`nanvl`,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NamedLambdaVariable,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +Not,S,`!`; `not`,None,project,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Not,S,`!`; `not`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Not,S,`!`; `not`,None,AST,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Not,S,`!`; `not`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NthValue,S,`nth_value`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +NthValue,S,`nth_value`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NthValue,S,`nth_value`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +OctetLength,S,`octet_length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA +OctetLength,S,`octet_length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,project,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,project,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,AST,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,AST,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ParseUrl,S,`parse_url`,None,project,url,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ParseUrl,S,`parse_url`,None,project,partToExtract,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ParseUrl,S,`parse_url`,None,project,key,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ParseUrl,S,`parse_url`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +PercentRank,S,`percent_rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +PercentRank,S,`percent_rank`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pmod,S,`pmod`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pmod,S,`pmod`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pmod,S,`pmod`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +PosExplode,S,`posexplode_outer`; `posexplode`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA +PosExplode,S,`posexplode_outer`; `posexplode`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,AST,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,AST,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +PreciseTimestampConversion,S, ,None,project,input,NA,NA,NA,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +PreciseTimestampConversion,S, ,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +PythonUDF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +PythonUDF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +PythonUDF,S, ,None,window,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDF,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +PythonUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +Quarter,S,`quarter`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Quarter,S,`quarter`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RaiseError,S,`raise_error`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RaiseError,S,`raise_error`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +Rand,S,`rand`; `random`,None,project,seed,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Rand,S,`rand`; `random`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Rank,S,`rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +Rank,S,`rank`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtract,S,`regexp_extract`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtract,S,`regexp_extract`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtract,S,`regexp_extract`,None,project,idx,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtract,S,`regexp_extract`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtractAll,S,`regexp_extract_all`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtractAll,S,`regexp_extract_all`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtractAll,S,`regexp_extract_all`,None,project,idx,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtractAll,S,`regexp_extract_all`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +RegExpReplace,S,`regexp_replace`,None,project,regex,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpReplace,S,`regexp_replace`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpReplace,S,`regexp_replace`,None,project,pos,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpReplace,S,`regexp_replace`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpReplace,S,`regexp_replace`,None,project,rep,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Remainder,S,`%`; `mod`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Remainder,S,`%`; `mod`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Remainder,S,`%`; `mod`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +ReplicateRows,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +ReplicateRows,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Reverse,S,`reverse`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Reverse,S,`reverse`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Rint,S,`rint`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Rint,S,`rint`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Rint,S,`rint`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Rint,S,`rint`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Round,S,`round`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Round,S,`round`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Round,S,`round`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundCeil,S, ,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundCeil,S, ,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundCeil,S, ,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundFloor,S, ,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundFloor,S, ,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundFloor,S, ,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +RowNumber,S,`row_number`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ScalaUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +Second,S,`second`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Second,S,`second`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SecondsToTimestamp,S,`timestamp_seconds`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +SecondsToTimestamp,S,`timestamp_seconds`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sequence,S,`sequence`,None,project,start,NA,S,S,S,S,NA,NA,NS,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sequence,S,`sequence`,None,project,stop,NA,S,S,S,S,NA,NA,NS,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sequence,S,`sequence`,None,project,step,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA +Sequence,S,`sequence`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ShiftLeft,S,`shiftleft`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftLeft,S,`shiftleft`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftLeft,S,`shiftleft`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRight,S,`shiftright`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRight,S,`shiftright`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRight,S,`shiftright`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Signum,S,`sign`; `signum`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Signum,S,`sign`; `signum`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sin,S,`sin`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sin,S,`sin`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sin,S,`sin`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sin,S,`sin`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sinh,S,`sinh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sinh,S,`sinh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sinh,S,`sinh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sinh,S,`sinh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Size,S,`cardinality`; `size`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA +Size,S,`cardinality`; `size`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SortArray,S,`sort_array`,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +SortArray,S,`sort_array`,None,project,ascendingOrder,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SortArray,S,`sort_array`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +SortOrder,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +SortOrder,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +SparkPartitionID,S,`spark_partition_id`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SpecifiedWindowFrame,S, ,None,project,lower,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,S,NA,NA,NA,NA,S,NS +SpecifiedWindowFrame,S, ,None,project,upper,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,S,NA,NA,NA,NA,S,NS +SpecifiedWindowFrame,S, ,None,project,result,NA,S,S,S,S,NS,NS,NA,NA,NA,NS,NA,NA,S,NA,NA,NA,NA,S,NS +Sqrt,S,`sqrt`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sqrt,S,`sqrt`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sqrt,S,`sqrt`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sqrt,S,`sqrt`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Stack,S,`stack`,None,project,n,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Stack,S,`stack`,None,project,expr,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +Stack,S,`stack`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +StartsWith,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StartsWith,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StartsWith,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringInstr,S,`instr`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringInstr,S,`instr`,None,project,substr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringInstr,S,`instr`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLPad,S, ,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLPad,S, ,None,project,len,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLPad,S, ,None,project,pad,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLPad,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLocate,S,`locate`; `position`,None,project,substr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLocate,S,`locate`; `position`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLocate,S,`locate`; `position`,None,project,start,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLocate,S,`locate`; `position`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRPad,S, ,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRPad,S, ,None,project,len,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRPad,S, ,None,project,pad,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRPad,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRepeat,S,`repeat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRepeat,S,`repeat`,None,project,repeatTimes,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRepeat,S,`repeat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringReplace,S,`replace`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringReplace,S,`replace`,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringReplace,S,`replace`,None,project,replace,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringReplace,S,`replace`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringSplit,S,`split`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringSplit,S,`split`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringSplit,S,`split`,None,project,limit,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringSplit,S,`split`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +StringToMap,S,`str_to_map`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringToMap,S,`str_to_map`,None,project,pairDelim,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringToMap,S,`str_to_map`,None,project,keyValueDelim,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringToMap,S,`str_to_map`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA +StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,from,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,to,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrim,S,`trim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrim,S,`trim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrim,S,`trim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimLeft,S,`ltrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimLeft,S,`ltrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimLeft,S,`ltrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimRight,S,`rtrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimRight,S,`rtrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimRight,S,`rtrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StructsToJson,NS,`to_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,struct,S,S,S,S,S,S,S,S,PS,S,S,NA,NA,NA,PS,PS,PS,NA,NA,NA +StructsToJson,NS,`to_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Substring,S,`substr`; `substring`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA +Substring,S,`substr`; `substring`,None,project,pos,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Substring,S,`substr`; `substring`,None,project,len,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Substring,S,`substr`; `substring`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA +SubstringIndex,S,`substring_index`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SubstringIndex,S,`substring_index`,None,project,delim,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SubstringIndex,S,`substring_index`,None,project,count,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SubstringIndex,S,`substring_index`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Subtract,S,`-`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Subtract,S,`-`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Subtract,S,`-`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Subtract,S,`-`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Subtract,S,`-`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Subtract,S,`-`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Tan,S,`tan`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tan,S,`tan`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tan,S,`tan`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tan,S,`tan`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tanh,S,`tanh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tanh,S,`tanh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tanh,S,`tanh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tanh,S,`tanh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +TimeAdd,S, ,None,project,start,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +TimeAdd,S, ,None,project,interval,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,S,NA +TimeAdd,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToDegrees,S,`degrees`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToDegrees,S,`degrees`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToRadians,S,`radians`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToRadians,S,`radians`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUTCTimestamp,S,`to_utc_timestamp`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUTCTimestamp,S,`to_utc_timestamp`,None,project,timezone,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUTCTimestamp,S,`to_utc_timestamp`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUnixTimestamp,S,`to_unix_timestamp`,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUnixTimestamp,S,`to_unix_timestamp`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUnixTimestamp,S,`to_unix_timestamp`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +TransformKeys,S,`transform_keys`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +TransformKeys,S,`transform_keys`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NS,NS +TransformKeys,S,`transform_keys`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +TransformValues,S,`transform_values`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +TransformValues,S,`transform_values`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +TransformValues,S,`transform_values`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +UnaryMinus,S,`negative`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +UnaryMinus,S,`negative`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +UnaryMinus,S,`negative`,None,AST,input,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +UnaryMinus,S,`negative`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +UnaryPositive,S,`positive`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +UnaryPositive,S,`positive`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +UnaryPositive,S,`positive`,None,AST,input,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,S,S +UnaryPositive,S,`positive`,None,AST,result,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,S,S +UnboundedFollowing$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +UnboundedPreceding$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +UnixTimestamp,S,`unix_timestamp`,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +UnixTimestamp,S,`unix_timestamp`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +UnixTimestamp,S,`unix_timestamp`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +UnscaledValue,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA +UnscaledValue,S, ,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Upper,S,`ucase`; `upper`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Upper,S,`ucase`; `upper`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +WeekDay,S,`weekday`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +WeekDay,S,`weekday`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +WindowExpression,S, ,None,window,windowFunction,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +WindowExpression,S, ,None,window,windowSpec,NA,S,S,S,S,NS,NS,NA,NA,NA,PS,NA,NA,S,NA,NA,NA,NA,S,NS +WindowExpression,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +WindowSpecDefinition,S, ,None,project,partition,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS +WindowSpecDefinition,S, ,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS +WindowSpecDefinition,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS +XxHash64,S,`xxhash64`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +XxHash64,S,`xxhash64`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Year,S,`year`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Year,S,`year`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AggregateExpression,S, ,None,aggregation,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AggregateExpression,S, ,None,aggregation,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AggregateExpression,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AggregateExpression,S, ,None,reduction,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AggregateExpression,S, ,None,reduction,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AggregateExpression,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AggregateExpression,S, ,None,window,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AggregateExpression,S, ,None,window,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AggregateExpression,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,input,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,percentage,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,accuracy,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,result,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,input,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,percentage,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,accuracy,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,result,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA +Average,S,`avg`; `mean`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS +Average,S,`avg`; `mean`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Average,S,`avg`; `mean`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS +Average,S,`avg`; `mean`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Average,S,`avg`; `mean`,None,window,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS +Average,S,`avg`; `mean`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterAggregate,S, ,None,reduction,child,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterAggregate,S, ,None,reduction,estimatedItems,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterAggregate,S, ,None,reduction,numBits,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterAggregate,S, ,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA +CollectList,S,`array_agg`; `collect_list`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CollectList,S,`array_agg`; `collect_list`,None,aggregation,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CollectList,S,`array_agg`; `collect_list`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CollectList,S,`array_agg`; `collect_list`,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CollectList,S,`array_agg`; `collect_list`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CollectList,S,`array_agg`; `collect_list`,None,window,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CollectSet,S,`collect_set`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +CollectSet,S,`collect_set`,None,aggregation,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CollectSet,S,`collect_set`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +CollectSet,S,`collect_set`,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CollectSet,S,`collect_set`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +CollectSet,S,`collect_set`,None,window,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Count,S,`count`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +Count,S,`count`,None,aggregation,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Count,S,`count`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +Count,S,`count`,None,reduction,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Count,S,`count`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +Count,S,`count`,None,window,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +First,S,`first_value`; `first`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +First,S,`first_value`; `first`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +First,S,`first_value`; `first`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +First,S,`first_value`; `first`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +First,S,`first_value`; `first`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +First,S,`first_value`; `first`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Max,S,`max`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Max,S,`max`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Max,S,`max`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Max,S,`max`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Max,S,`max`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Max,S,`max`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Min,S,`min`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Min,S,`min`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,reduction,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,reduction,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +PivotFirst,S, ,None,aggregation,pivotColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +PivotFirst,S, ,None,aggregation,valueColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +PivotFirst,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,NS,NS,NS,NS +PivotFirst,S, ,None,reduction,pivotColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +PivotFirst,S, ,None,reduction,valueColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +PivotFirst,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,NS,NS,NS,NS +StddevPop,S,`stddev_pop`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevPop,S,`stddev_pop`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevPop,S,`stddev_pop`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevPop,S,`stddev_pop`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevPop,S,`stddev_pop`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevPop,S,`stddev_pop`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,reduction,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,window,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,aggregation,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,reduction,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,window,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,window,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/350/operatorsScore.csv b/tools/generated_files/350/operatorsScore.csv index 50b7084026c..e9c010515e2 100644 --- a/tools/generated_files/350/operatorsScore.csv +++ b/tools/generated_files/350/operatorsScore.csv @@ -55,6 +55,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/350/supportedExecs.csv b/tools/generated_files/350/supportedExecs.csv index 949a482a551..409fa3e45aa 100644 --- a/tools/generated_files/350/supportedExecs.csv +++ b/tools/generated_files/350/supportedExecs.csv @@ -18,7 +18,7 @@ AQEShuffleReadExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS HashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS ObjectHashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS SortAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS -InMemoryTableScanExec,NS,This is disabled by default because there could be complications when using it with AQE. For more details please check https://github.com/NVIDIA/spark-rapids/issues/10603,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,PS,PS,PS,NS,S,S +InMemoryTableScanExec,NS,This is disabled by default because there could be complications when using it with AQE with Spark-3.5.0 and Spark-3.5.1. For more details please check https://github.com/NVIDIA/spark-rapids/issues/10603,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,PS,PS,PS,NS,S,S DataWritingCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,PS,NS,S,NS,PS,PS,PS,NS,S,S ExecutedCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S WriteFilesExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S diff --git a/tools/generated_files/350/supportedExprs.csv b/tools/generated_files/350/supportedExprs.csv index fb5c9e0bba7..9d9451a0a0d 100644 --- a/tools/generated_files/350/supportedExprs.csv +++ b/tools/generated_files/350/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA @@ -235,9 +238,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS @@ -290,9 +293,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/351/operatorsScore.csv b/tools/generated_files/351/operatorsScore.csv new file mode 100644 index 00000000000..e9c010515e2 --- /dev/null +++ b/tools/generated_files/351/operatorsScore.csv @@ -0,0 +1,288 @@ +CPUOperator,Score +CoalesceExec,3.0 +CollectLimitExec,3.0 +ExpandExec,3.0 +FileSourceScanExec,3.0 +FilterExec,2.8 +GenerateExec,3.0 +GlobalLimitExec,3.0 +LocalLimitExec,3.0 +ProjectExec,3.0 +RangeExec,3.0 +SampleExec,3.0 +SortExec,8.0 +SubqueryBroadcastExec,3.0 +TakeOrderedAndProjectExec,3.0 +UnionExec,3.0 +AQEShuffleReadExec,3.0 +HashAggregateExec,4.5 +ObjectHashAggregateExec,3.0 +SortAggregateExec,3.0 +InMemoryTableScanExec,3.0 +DataWritingCommandExec,3.0 +ExecutedCommandExec,3.0 +WriteFilesExec,3.0 +AppendDataExecV1,3.0 +AtomicCreateTableAsSelectExec,3.0 +AtomicReplaceTableAsSelectExec,3.0 +BatchScanExec,3.0 +OverwriteByExpressionExecV1,3.0 +BroadcastExchangeExec,3.0 +ShuffleExchangeExec,4.2 +BroadcastHashJoinExec,5.1 +BroadcastNestedLoopJoinExec,3.0 +CartesianProductExec,3.0 +ShuffledHashJoinExec,3.0 +SortMergeJoinExec,22.7 +AggregateInPandasExec,1.2 +ArrowEvalPythonExec,1.2 +FlatMapCoGroupsInPandasExec,3.0 +FlatMapGroupsInPandasExec,1.2 +MapInPandasExec,1.2 +PythonMapInArrowExec,3.0 +WindowInPandasExec,1.2 +WindowExec,3.0 +WindowGroupLimitExec,3.0 +HiveTableScanExec,3.0 +Abs,4 +Acos,4 +Acosh,4 +Add,4 +AggregateExpression,4 +Alias,4 +And,4 +ApproximatePercentile,4 +ArrayContains,4 +ArrayExcept,4 +ArrayExists,4 +ArrayFilter,4 +ArrayIntersect,4 +ArrayMax,4 +ArrayMin,4 +ArrayRemove,4 +ArrayRepeat,4 +ArrayTransform,4 +ArrayUnion,4 +ArraysOverlap,4 +ArraysZip,4 +Ascii,4 +Asin,4 +Asinh,4 +AtLeastNNonNulls,4 +Atan,4 +Atanh,4 +AttributeReference,4 +Average,4 +BRound,4 +BitLength,4 +BitwiseAnd,4 +BitwiseNot,4 +BitwiseOr,4 +BitwiseXor,4 +BloomFilterAggregate,4 +BloomFilterMightContain,4 +CaseWhen,4 +Cbrt,4 +Ceil,4 +CheckOverflowInTableInsert,4 +Coalesce,4 +CollectList,4 +CollectSet,4 +Concat,4 +ConcatWs,4 +Contains,4 +Conv,4 +Cos,4 +Cosh,4 +Cot,4 +Count,4 +CreateArray,4 +CreateMap,4 +CreateNamedStruct,4 +CurrentRow$,4 +DateAdd,4 +DateAddInterval,4 +DateDiff,4 +DateFormatClass,4 +DateSub,4 +DayOfMonth,4 +DayOfWeek,4 +DayOfYear,4 +DenseRank,4 +Divide,4 +DivideDTInterval,4 +DivideYMInterval,4 +DynamicPruningExpression,4 +ElementAt,4 +Empty2Null,4 +EndsWith,4 +EqualNullSafe,4 +EqualTo,4 +Exp,4 +Explode,4 +Expm1,4 +First,4 +Flatten,4 +Floor,4 +FormatNumber,4 +FromUTCTimestamp,4 +FromUnixTime,4 +GetArrayItem,4 +GetArrayStructFields,4 +GetJsonObject,4 +GetMapValue,4 +GetStructField,4 +GetTimestamp,4 +GreaterThan,4 +GreaterThanOrEqual,4 +Greatest,4 +HiveGenericUDF,4 +HiveSimpleUDF,4 +Hour,4 +Hypot,4 +If,4 +In,4 +InSet,4 +InSubqueryExec,4 +InitCap,4 +InputFileBlockLength,4 +InputFileBlockStart,4 +InputFileName,4 +IntegralDivide,4 +IsNaN,4 +IsNotNull,4 +IsNull,4 +JsonToStructs,4 +JsonTuple,4 +KnownFloatingPointNormalized,4 +KnownNotNull,4 +KnownNullable,4 +Lag,4 +LambdaFunction,4 +Last,4 +LastDay,4 +Lead,4 +Least,4 +Length,4 +LessThan,4 +LessThanOrEqual,4 +Like,4 +Literal,4 +Log,4 +Log10,4 +Log1p,4 +Log2,4 +Logarithm,4 +Lower,4 +MakeDecimal,4 +MapConcat,4 +MapEntries,4 +MapFilter,4 +MapKeys,4 +MapValues,4 +Max,4 +Md5,4 +MicrosToTimestamp,4 +MillisToTimestamp,4 +Min,4 +Minute,4 +MonotonicallyIncreasingID,4 +Month,4 +Multiply,4 +MultiplyDTInterval,4 +MultiplyYMInterval,4 +Murmur3Hash,4 +NaNvl,4 +NamedLambdaVariable,4 +NormalizeNaNAndZero,4 +Not,4 +NthValue,4 +OctetLength,4 +Or,4 +ParseUrl,4 +PercentRank,4 +Percentile,4 +PivotFirst,4 +Pmod,4 +PosExplode,4 +Pow,4 +PreciseTimestampConversion,4 +PythonUDAF,4 +PythonUDF,4 +Quarter,4 +RLike,4 +RaiseError,4 +Rand,4 +Rank,4 +RegExpExtract,4 +RegExpExtractAll,4 +RegExpReplace,4 +Remainder,4 +ReplicateRows,4 +Reverse,4 +Rint,4 +Round,4 +RoundCeil,4 +RoundFloor,4 +RowNumber,4 +ScalaUDF,4 +ScalarSubquery,4 +Second,4 +SecondsToTimestamp,4 +Sequence,4 +ShiftLeft,4 +ShiftRight,4 +ShiftRightUnsigned,4 +Signum,4 +Sin,4 +Sinh,4 +Size,4 +SortArray,4 +SortOrder,4 +SparkPartitionID,4 +SpecifiedWindowFrame,4 +Sqrt,4 +Stack,4 +StartsWith,4 +StddevPop,4 +StddevSamp,4 +StringInstr,4 +StringLPad,4 +StringLocate,4 +StringRPad,4 +StringRepeat,4 +StringReplace,4 +StringSplit,4 +StringToMap,4 +StringTranslate,4 +StringTrim,4 +StringTrimLeft,4 +StringTrimRight,4 +StructsToJson,4 +Substring,4 +SubstringIndex,4 +Subtract,4 +Sum,4 +Tan,4 +Tanh,4 +TimeAdd,4 +ToDegrees,4 +ToRadians,4 +ToUTCTimestamp,4 +ToUnixTimestamp,4 +TransformKeys,4 +TransformValues,4 +UnaryMinus,4 +UnaryPositive,4 +UnboundedFollowing$,4 +UnboundedPreceding$,4 +UnixTimestamp,4 +UnscaledValue,4 +Upper,4 +VariancePop,4 +VarianceSamp,4 +WeekDay,4 +WindowExpression,4 +WindowSpecDefinition,4 +XxHash64,4 +Year,4 diff --git a/tools/generated_files/351/supportedDataSource.csv b/tools/generated_files/351/supportedDataSource.csv new file mode 100644 index 00000000000..77f30cbe1de --- /dev/null +++ b/tools/generated_files/351/supportedDataSource.csv @@ -0,0 +1,13 @@ +Format,Direction,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH +Avro,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO +CSV,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,NA,NA,NA,NA,NA,NA +Delta,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S +Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS +HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S +JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO +ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA +ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S +Parquet,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/351/supportedExecs.csv b/tools/generated_files/351/supportedExecs.csv new file mode 100644 index 00000000000..409fa3e45aa --- /dev/null +++ b/tools/generated_files/351/supportedExecs.csv @@ -0,0 +1,57 @@ +Exec,Supported,Notes,Params,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH +CoalesceExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +CollectLimitExec,NS,This is disabled by default because Collect Limit replacement can be slower on the GPU; if huge number of rows in a batch it could help by limiting the number of rows transferred from GPU to CPU,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +ExpandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +FileSourceScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +FilterExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +GenerateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +GlobalLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +LocalLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +ProjectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +RangeExec,S,None,Input/Output,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SampleExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,S,S +SortExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +SubqueryBroadcastExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +TakeOrderedAndProjectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +UnionExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +AQEShuffleReadExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +HashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS +ObjectHashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS +SortAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS +InMemoryTableScanExec,NS,This is disabled by default because there could be complications when using it with AQE with Spark-3.5.0 and Spark-3.5.1. For more details please check https://github.com/NVIDIA/spark-rapids/issues/10603,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,PS,PS,PS,NS,S,S +DataWritingCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,PS,NS,S,NS,PS,PS,PS,NS,S,S +ExecutedCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +WriteFilesExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AppendDataExecV1,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S +AtomicCreateTableAsSelectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S +AtomicReplaceTableAsSelectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S +BatchScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S +OverwriteByExpressionExecV1,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S +BroadcastExchangeExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ShuffleExchangeExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +BroadcastHashJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +BroadcastHashJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +BroadcastHashJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BroadcastHashJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +BroadcastNestedLoopJoinExec,S,None,condition(A non-inner join only is supported if the condition expression can be converted to a GPU AST expression),S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BroadcastNestedLoopJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CartesianProductExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +ShuffledHashJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +ShuffledHashJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +ShuffledHashJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShuffledHashJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +SortMergeJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +SortMergeJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS +SortMergeJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SortMergeJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +AggregateInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS +ArrowEvalPythonExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +FlatMapCoGroupsInPandasExec,NS,This is disabled by default because Performance is not ideal with many small groups,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS +FlatMapGroupsInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS +MapInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonMapInArrowExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +WindowInPandasExec,NS,This is disabled by default because it only supports row based frame for now,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,NS,NS,NS,NS +WindowExec,S,None,partitionSpec,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS +WindowExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +WindowGroupLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +HiveTableScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS diff --git a/tools/generated_files/351/supportedExprs.csv b/tools/generated_files/351/supportedExprs.csv new file mode 100644 index 00000000000..9d9451a0a0d --- /dev/null +++ b/tools/generated_files/351/supportedExprs.csv @@ -0,0 +1,775 @@ +Expression,Supported,SQL Func,Notes,Context,Params,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH +Abs,S,`abs`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,S +Abs,S,`abs`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,S +Abs,S,`abs`,None,AST,input,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NS,NS +Abs,S,`abs`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NS,NS +Acos,S,`acos`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acos,S,`acos`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acos,S,`acos`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acos,S,`acos`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acosh,S,`acosh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acosh,S,`acosh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acosh,S,`acosh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Acosh,S,`acosh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Add,S,`+`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Add,S,`+`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Add,S,`+`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Add,S,`+`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Add,S,`+`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Add,S,`+`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Alias,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +Alias,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +Alias,S, ,None,AST,input,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S +Alias,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S +And,S,`and`,None,project,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +And,S,`and`,None,project,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +And,S,`and`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +And,S,`and`,None,AST,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +And,S,`and`,None,AST,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +And,S,`and`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayContains,S,`array_contains`,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayContains,S,`array_contains`,None,project,key,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS +ArrayContains,S,`array_contains`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayMax,S,`array_max`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayMax,S,`array_max`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +ArrayMin,S,`array_min`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayMin,S,`array_min`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +ArrayRemove,S,`array_remove`,None,project,array,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,PS,NS,NS,NS,NS,NS +ArrayRemove,S,`array_remove`,None,project,element,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +ArrayRemove,S,`array_remove`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayRepeat,S,`array_repeat`,None,project,left,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +ArrayRepeat,S,`array_repeat`,None,project,right,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayRepeat,S,`array_repeat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayTransform,S,`transform`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayTransform,S,`transform`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +ArrayTransform,S,`transform`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArraysZip,S,`arrays_zip`,None,project,children,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ArraysZip,S,`arrays_zip`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Ascii,NS,`ascii`,This is disabled by default because it only supports strings starting with ASCII or Latin-1 characters after Spark 3.2.3; 3.3.1 and 3.4.0. Otherwise the results will not match the CPU.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Ascii,NS,`ascii`,This is disabled by default because it only supports strings starting with ASCII or Latin-1 characters after Spark 3.2.3; 3.3.1 and 3.4.0. Otherwise the results will not match the CPU.,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asin,S,`asin`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asin,S,`asin`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asin,S,`asin`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asin,S,`asin`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asinh,S,`asinh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asinh,S,`asinh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asinh,S,`asinh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Asinh,S,`asinh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AtLeastNNonNulls,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +AtLeastNNonNulls,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atan,S,`atan`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atan,S,`atan`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atan,S,`atan`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atan,S,`atan`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atanh,S,`atanh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atanh,S,`atanh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atanh,S,`atanh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Atanh,S,`atanh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AttributeReference,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +AttributeReference,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S +BRound,S,`bround`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +BRound,S,`bround`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BRound,S,`bround`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitLength,S,`bit_length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA +BitLength,S,`bit_length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseAnd,S,`&`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseNot,S,`~`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseNot,S,`~`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseNot,S,`~`,None,AST,input,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseNot,S,`~`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseOr,S,`\|`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BitwiseXor,S,`^`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterMightContain,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA +BloomFilterMightContain,S, ,None,project,rhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterMightContain,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +CaseWhen,S,`when`,None,project,predicate,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +CaseWhen,S,`when`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CaseWhen,S,`when`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Cbrt,S,`cbrt`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cbrt,S,`cbrt`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cbrt,S,`cbrt`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cbrt,S,`cbrt`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Ceil,S, ,None,project,input,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Ceil,S, ,None,project,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +CheckOverflowInTableInsert,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +CheckOverflowInTableInsert,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +Coalesce,S,`coalesce`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +Coalesce,S,`coalesce`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +Concat,S,`concat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,PS,NA,NA,NA,NA,NA +Concat,S,`concat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,PS,NA,NA,NA,NA,NA +ConcatWs,S,`concat_ws`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +ConcatWs,S,`concat_ws`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Contains,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Contains,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Contains,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,num,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,from_base,NA,PS,PS,PS,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,to_base,NA,PS,PS,PS,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cos,S,`cos`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cos,S,`cos`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cos,S,`cos`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cos,S,`cos`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cosh,S,`cosh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cosh,S,`cosh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cosh,S,`cosh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cosh,S,`cosh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cot,S,`cot`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cot,S,`cot`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cot,S,`cot`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Cot,S,`cot`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +CreateArray,S,`array`,None,project,arg,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,NS,PS,NS,NS,NS +CreateArray,S,`array`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CreateMap,S,`map`,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,S,NA,NA,PS,NA,PS,NA,NA,NA +CreateMap,S,`map`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NA,NA,PS,PS,PS,NA,NA,NA +CreateNamedStruct,S,`named_struct`; `struct`,None,project,name,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +CreateNamedStruct,S,`named_struct`; `struct`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CreateNamedStruct,S,`named_struct`; `struct`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA +CurrentRow$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +DateAdd,S,`date_add`; `dateadd`,None,project,startDate,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateAdd,S,`date_add`; `dateadd`,None,project,days,NA,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateAdd,S,`date_add`; `dateadd`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateAddInterval,S, ,None,project,start,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateAddInterval,S, ,None,project,interval,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA +DateAddInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateDiff,S,`date_diff`; `datediff`,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateDiff,S,`date_diff`; `datediff`,None,project,rhs,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateDiff,S,`date_diff`; `datediff`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateFormatClass,S,`date_format`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateFormatClass,S,`date_format`,None,project,strfmt,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateFormatClass,S,`date_format`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateSub,S,`date_sub`,None,project,startDate,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateSub,S,`date_sub`,None,project,days,NA,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DateSub,S,`date_sub`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfMonth,S,`day`; `dayofmonth`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfMonth,S,`day`; `dayofmonth`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfWeek,S,`dayofweek`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfWeek,S,`dayofweek`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfYear,S,`dayofyear`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DayOfYear,S,`dayofyear`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DenseRank,S,`dense_rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +DenseRank,S,`dense_rank`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Divide,S,`/`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Divide,S,`/`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Divide,S,`/`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +DivideDTInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA +DivideDTInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +DivideDTInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA +DivideYMInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S +DivideYMInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +DivideYMInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S +DynamicPruningExpression,S, ,None,project,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +DynamicPruningExpression,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +ElementAt,S,`element_at`,None,project,array/map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA +ElementAt,S,`element_at`,None,project,index/key,PS,PS,PS,S,PS,PS,PS,PS,PS,PS,PS,NS,NS,NS,NS,NS,NS,NS,NS,NS +ElementAt,S,`element_at`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Empty2Null,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Empty2Null,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EndsWith,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EndsWith,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EndsWith,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EqualNullSafe,S,`<=>`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +EqualNullSafe,S,`<=>`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +EqualNullSafe,S,`<=>`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EqualTo,S,`==`; `=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +EqualTo,S,`==`; `=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +EqualTo,S,`==`; `=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +EqualTo,S,`==`; `=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +EqualTo,S,`==`; `=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +EqualTo,S,`==`; `=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Exp,S,`exp`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Exp,S,`exp`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Exp,S,`exp`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Exp,S,`exp`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Explode,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA +Explode,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Expm1,S,`expm1`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Expm1,S,`expm1`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Expm1,S,`expm1`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Expm1,S,`expm1`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Flatten,S,`flatten`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Flatten,S,`flatten`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Floor,S, ,None,project,input,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Floor,S, ,None,project,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +FormatNumber,S,`format_number`,None,project,x,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +FormatNumber,S,`format_number`,None,project,d,NA,NA,NA,PS,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FormatNumber,S,`format_number`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUTCTimestamp,S,`from_utc_timestamp`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUTCTimestamp,S,`from_utc_timestamp`,None,project,timezone,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUTCTimestamp,S,`from_utc_timestamp`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUnixTime,S,`from_unixtime`,None,project,sec,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUnixTime,S,`from_unixtime`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +FromUnixTime,S,`from_unixtime`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetArrayItem,S, ,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS +GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +GetStructField,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA +GetStructField,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +GetTimestamp,S, ,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetTimestamp,S, ,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GetTimestamp,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GreaterThan,S,`>`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +GreaterThan,S,`>`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +GreaterThan,S,`>`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GreaterThan,S,`>`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +GreaterThan,S,`>`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +GreaterThan,S,`>`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GreaterThanOrEqual,S,`>=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +GreaterThanOrEqual,S,`>=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +GreaterThanOrEqual,S,`>=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +GreaterThanOrEqual,S,`>=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Hypot,S,`hypot`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Hypot,S,`hypot`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +If,S,`if`,None,project,predicate,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +If,S,`if`,None,project,trueValue,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +If,S,`if`,None,project,falseValue,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +If,S,`if`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S +In,S,`in`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +In,S,`in`,None,project,list,PS,PS,PS,PS,PS,PS,PS,PS,PS,PS,PS,NS,NS,NS,NS,NA,NS,NS,NA,NA +In,S,`in`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InSet,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +InSet,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InitCap,S,`initcap`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InitCap,S,`initcap`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InputFileBlockLength,S,`input_file_block_length`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InputFileBlockStart,S,`input_file_block_start`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InputFileName,S,`input_file_name`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +IntegralDivide,S,`div`,None,project,lhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +IntegralDivide,S,`div`,None,project,rhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +IntegralDivide,S,`div`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +IsNaN,S,`isnan`,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +IsNaN,S,`isnan`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS +IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS +IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS +KnownNotNull,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS +KnownNullable,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +KnownNullable,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +Lag,S,`lag`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Lag,S,`lag`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Lag,S,`lag`,None,window,default,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Lag,S,`lag`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +LambdaFunction,S, ,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +LambdaFunction,S, ,None,project,arguments,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +LambdaFunction,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +LastDay,S,`last_day`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +LastDay,S,`last_day`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Lead,S,`lead`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Lead,S,`lead`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Lead,S,`lead`,None,window,default,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Lead,S,`lead`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Least,S,`least`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Least,S,`least`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Length,S,`char_length`; `character_length`; `len`; `length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA +Length,S,`char_length`; `character_length`; `len`; `length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +LessThan,S,`<`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +LessThan,S,`<`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +LessThan,S,`<`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +LessThan,S,`<`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +LessThan,S,`<`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +LessThan,S,`<`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +LessThanOrEqual,S,`<=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +LessThanOrEqual,S,`<=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA +LessThanOrEqual,S,`<=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +LessThanOrEqual,S,`<=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +LessThanOrEqual,S,`<=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA +LessThanOrEqual,S,`<=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Like,S,`like`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Like,S,`like`,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Like,S,`like`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Literal,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,S,S +Literal,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS +Log,S,`ln`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log,S,`ln`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log10,S,`log10`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log10,S,`log10`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log1p,S,`log1p`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log1p,S,`log1p`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log2,S,`log2`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Log2,S,`log2`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Logarithm,S,`log`,None,project,value,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Logarithm,S,`log`,None,project,base,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Logarithm,S,`log`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Lower,S,`lcase`; `lower`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Lower,S,`lcase`; `lower`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MakeDecimal,S, ,None,project,input,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MakeDecimal,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA +MapConcat,S,`map_concat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapConcat,S,`map_concat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapEntries,S,`map_entries`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapEntries,S,`map_entries`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +MapFilter,S,`map_filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapFilter,S,`map_filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MapFilter,S,`map_filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapKeys,S,`map_keys`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapKeys,S,`map_keys`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +MapValues,S,`map_values`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +MapValues,S,`map_values`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Md5,S,`md5`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA +Md5,S,`md5`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MicrosToTimestamp,S,`timestamp_micros`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MicrosToTimestamp,S,`timestamp_micros`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MillisToTimestamp,S,`timestamp_millis`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MillisToTimestamp,S,`timestamp_millis`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Minute,S,`minute`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +Multiply,S,`*`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +MultiplyDTInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA +MultiplyDTInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +MultiplyDTInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA +MultiplyYMInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S +MultiplyYMInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +MultiplyYMInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S +Murmur3Hash,S,`hash`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +Murmur3Hash,S,`hash`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NaNvl,S,`nanvl`,None,project,lhs,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NaNvl,S,`nanvl`,None,project,rhs,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NaNvl,S,`nanvl`,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NamedLambdaVariable,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +Not,S,`!`; `not`,None,project,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Not,S,`!`; `not`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Not,S,`!`; `not`,None,AST,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Not,S,`!`; `not`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NthValue,S,`nth_value`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +NthValue,S,`nth_value`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NthValue,S,`nth_value`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +OctetLength,S,`octet_length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA +OctetLength,S,`octet_length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,project,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,project,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,AST,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,AST,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Or,S,`or`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ParseUrl,S,`parse_url`,None,project,url,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ParseUrl,S,`parse_url`,None,project,partToExtract,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ParseUrl,S,`parse_url`,None,project,key,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ParseUrl,S,`parse_url`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +PercentRank,S,`percent_rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +PercentRank,S,`percent_rank`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pmod,S,`pmod`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pmod,S,`pmod`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pmod,S,`pmod`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA +PosExplode,S,`posexplode_outer`; `posexplode`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA +PosExplode,S,`posexplode_outer`; `posexplode`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,AST,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,AST,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Pow,S,`pow`; `power`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +PreciseTimestampConversion,S, ,None,project,input,NA,NA,NA,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +PreciseTimestampConversion,S, ,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +PythonUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +PythonUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +PythonUDAF,S, ,None,window,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDAF,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +PythonUDAF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDAF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +PythonUDF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +PythonUDF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +PythonUDF,S, ,None,window,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDF,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +PythonUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS +PythonUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA +Quarter,S,`quarter`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Quarter,S,`quarter`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RaiseError,S,`raise_error`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RaiseError,S,`raise_error`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +Rand,S,`rand`; `random`,None,project,seed,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Rand,S,`rand`; `random`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Rank,S,`rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +Rank,S,`rank`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtract,S,`regexp_extract`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtract,S,`regexp_extract`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtract,S,`regexp_extract`,None,project,idx,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtract,S,`regexp_extract`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtractAll,S,`regexp_extract_all`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtractAll,S,`regexp_extract_all`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtractAll,S,`regexp_extract_all`,None,project,idx,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpExtractAll,S,`regexp_extract_all`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +RegExpReplace,S,`regexp_replace`,None,project,regex,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpReplace,S,`regexp_replace`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpReplace,S,`regexp_replace`,None,project,pos,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpReplace,S,`regexp_replace`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RegExpReplace,S,`regexp_replace`,None,project,rep,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Remainder,S,`%`; `mod`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Remainder,S,`%`; `mod`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Remainder,S,`%`; `mod`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +ReplicateRows,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +ReplicateRows,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Reverse,S,`reverse`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Reverse,S,`reverse`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Rint,S,`rint`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Rint,S,`rint`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Rint,S,`rint`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Rint,S,`rint`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Round,S,`round`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Round,S,`round`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Round,S,`round`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundCeil,S, ,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundCeil,S, ,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundCeil,S, ,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundFloor,S, ,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundFloor,S, ,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +RoundFloor,S, ,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +RowNumber,S,`row_number`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ScalaUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +ScalaUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +Second,S,`second`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Second,S,`second`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SecondsToTimestamp,S,`timestamp_seconds`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +SecondsToTimestamp,S,`timestamp_seconds`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sequence,S,`sequence`,None,project,start,NA,S,S,S,S,NA,NA,NS,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sequence,S,`sequence`,None,project,stop,NA,S,S,S,S,NA,NA,NS,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sequence,S,`sequence`,None,project,step,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA +Sequence,S,`sequence`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +ShiftLeft,S,`shiftleft`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftLeft,S,`shiftleft`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftLeft,S,`shiftleft`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRight,S,`shiftright`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRight,S,`shiftright`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRight,S,`shiftright`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Signum,S,`sign`; `signum`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Signum,S,`sign`; `signum`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sin,S,`sin`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sin,S,`sin`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sin,S,`sin`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sin,S,`sin`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sinh,S,`sinh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sinh,S,`sinh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sinh,S,`sinh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sinh,S,`sinh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Size,S,`cardinality`; `size`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA +Size,S,`cardinality`; `size`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SortArray,S,`sort_array`,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +SortArray,S,`sort_array`,None,project,ascendingOrder,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SortArray,S,`sort_array`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +SortOrder,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +SortOrder,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +SparkPartitionID,S,`spark_partition_id`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SpecifiedWindowFrame,S, ,None,project,lower,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,S,NA,NA,NA,NA,S,NS +SpecifiedWindowFrame,S, ,None,project,upper,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,S,NA,NA,NA,NA,S,NS +SpecifiedWindowFrame,S, ,None,project,result,NA,S,S,S,S,NS,NS,NA,NA,NA,NS,NA,NA,S,NA,NA,NA,NA,S,NS +Sqrt,S,`sqrt`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sqrt,S,`sqrt`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sqrt,S,`sqrt`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sqrt,S,`sqrt`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Stack,S,`stack`,None,project,n,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Stack,S,`stack`,None,project,expr,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +Stack,S,`stack`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +StartsWith,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StartsWith,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StartsWith,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringInstr,S,`instr`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringInstr,S,`instr`,None,project,substr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringInstr,S,`instr`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLPad,S, ,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLPad,S, ,None,project,len,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLPad,S, ,None,project,pad,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLPad,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLocate,S,`locate`; `position`,None,project,substr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLocate,S,`locate`; `position`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLocate,S,`locate`; `position`,None,project,start,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringLocate,S,`locate`; `position`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRPad,S, ,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRPad,S, ,None,project,len,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRPad,S, ,None,project,pad,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRPad,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRepeat,S,`repeat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRepeat,S,`repeat`,None,project,repeatTimes,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringRepeat,S,`repeat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringReplace,S,`replace`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringReplace,S,`replace`,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringReplace,S,`replace`,None,project,replace,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringReplace,S,`replace`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringSplit,S,`split`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringSplit,S,`split`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringSplit,S,`split`,None,project,limit,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringSplit,S,`split`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +StringToMap,S,`str_to_map`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringToMap,S,`str_to_map`,None,project,pairDelim,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringToMap,S,`str_to_map`,None,project,keyValueDelim,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringToMap,S,`str_to_map`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA +StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,from,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,to,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrim,S,`trim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrim,S,`trim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrim,S,`trim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimLeft,S,`ltrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimLeft,S,`ltrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimLeft,S,`ltrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimRight,S,`rtrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimRight,S,`rtrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StringTrimRight,S,`rtrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StructsToJson,NS,`to_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,struct,S,S,S,S,S,S,S,S,PS,S,S,NA,NA,NA,PS,PS,PS,NA,NA,NA +StructsToJson,NS,`to_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Substring,S,`substr`; `substring`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA +Substring,S,`substr`; `substring`,None,project,pos,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Substring,S,`substr`; `substring`,None,project,len,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Substring,S,`substr`; `substring`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA +SubstringIndex,S,`substring_index`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SubstringIndex,S,`substring_index`,None,project,delim,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SubstringIndex,S,`substring_index`,None,project,count,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +SubstringIndex,S,`substring_index`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Subtract,S,`-`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Subtract,S,`-`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Subtract,S,`-`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +Subtract,S,`-`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Subtract,S,`-`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Subtract,S,`-`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +Tan,S,`tan`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tan,S,`tan`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tan,S,`tan`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tan,S,`tan`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tanh,S,`tanh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tanh,S,`tanh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tanh,S,`tanh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Tanh,S,`tanh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +TimeAdd,S, ,None,project,start,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +TimeAdd,S, ,None,project,interval,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,S,NA +TimeAdd,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToDegrees,S,`degrees`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToDegrees,S,`degrees`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToRadians,S,`radians`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToRadians,S,`radians`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUTCTimestamp,S,`to_utc_timestamp`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUTCTimestamp,S,`to_utc_timestamp`,None,project,timezone,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUTCTimestamp,S,`to_utc_timestamp`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUnixTimestamp,S,`to_unix_timestamp`,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUnixTimestamp,S,`to_unix_timestamp`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ToUnixTimestamp,S,`to_unix_timestamp`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +TransformKeys,S,`transform_keys`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +TransformKeys,S,`transform_keys`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NS,NS +TransformKeys,S,`transform_keys`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +TransformValues,S,`transform_values`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +TransformValues,S,`transform_values`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS +TransformValues,S,`transform_values`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA +UnaryMinus,S,`negative`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +UnaryMinus,S,`negative`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +UnaryMinus,S,`negative`,None,AST,input,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +UnaryMinus,S,`negative`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS +UnaryPositive,S,`positive`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +UnaryPositive,S,`positive`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S +UnaryPositive,S,`positive`,None,AST,input,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,S,S +UnaryPositive,S,`positive`,None,AST,result,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,S,S +UnboundedFollowing$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +UnboundedPreceding$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +UnixTimestamp,S,`unix_timestamp`,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +UnixTimestamp,S,`unix_timestamp`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +UnixTimestamp,S,`unix_timestamp`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +UnscaledValue,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA +UnscaledValue,S, ,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Upper,S,`ucase`; `upper`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Upper,S,`ucase`; `upper`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +WeekDay,S,`weekday`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +WeekDay,S,`weekday`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +WindowExpression,S, ,None,window,windowFunction,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +WindowExpression,S, ,None,window,windowSpec,NA,S,S,S,S,NS,NS,NA,NA,NA,PS,NA,NA,S,NA,NA,NA,NA,S,NS +WindowExpression,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +WindowSpecDefinition,S, ,None,project,partition,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS +WindowSpecDefinition,S, ,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS +WindowSpecDefinition,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS +XxHash64,S,`xxhash64`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +XxHash64,S,`xxhash64`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Year,S,`year`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Year,S,`year`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AggregateExpression,S, ,None,aggregation,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AggregateExpression,S, ,None,aggregation,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AggregateExpression,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AggregateExpression,S, ,None,reduction,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AggregateExpression,S, ,None,reduction,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AggregateExpression,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AggregateExpression,S, ,None,window,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +AggregateExpression,S, ,None,window,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +AggregateExpression,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,input,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,percentage,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,accuracy,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,result,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,input,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,percentage,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,accuracy,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,result,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA +Average,S,`avg`; `mean`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS +Average,S,`avg`; `mean`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Average,S,`avg`; `mean`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS +Average,S,`avg`; `mean`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Average,S,`avg`; `mean`,None,window,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS +Average,S,`avg`; `mean`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterAggregate,S, ,None,reduction,child,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterAggregate,S, ,None,reduction,estimatedItems,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterAggregate,S, ,None,reduction,numBits,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +BloomFilterAggregate,S, ,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA +CollectList,S,`array_agg`; `collect_list`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CollectList,S,`array_agg`; `collect_list`,None,aggregation,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CollectList,S,`array_agg`; `collect_list`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CollectList,S,`array_agg`; `collect_list`,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CollectList,S,`array_agg`; `collect_list`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +CollectList,S,`array_agg`; `collect_list`,None,window,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CollectSet,S,`collect_set`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +CollectSet,S,`collect_set`,None,aggregation,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CollectSet,S,`collect_set`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +CollectSet,S,`collect_set`,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +CollectSet,S,`collect_set`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS +CollectSet,S,`collect_set`,None,window,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA +Count,S,`count`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +Count,S,`count`,None,aggregation,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Count,S,`count`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +Count,S,`count`,None,reduction,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Count,S,`count`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S +Count,S,`count`,None,window,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +First,S,`first_value`; `first`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +First,S,`first_value`; `first`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +First,S,`first_value`; `first`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +First,S,`first_value`; `first`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +First,S,`first_value`; `first`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +First,S,`first_value`; `first`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Last,S,`last_value`; `last`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +Max,S,`max`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Max,S,`max`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Max,S,`max`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Max,S,`max`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Max,S,`max`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Max,S,`max`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Min,S,`min`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Min,S,`min`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA +Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,reduction,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,reduction,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +Percentile,S,`percentile`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA +PivotFirst,S, ,None,aggregation,pivotColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +PivotFirst,S, ,None,aggregation,valueColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +PivotFirst,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,NS,NS,NS,NS +PivotFirst,S, ,None,reduction,pivotColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +PivotFirst,S, ,None,reduction,valueColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS +PivotFirst,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,NS,NS,NS,NS +StddevPop,S,`stddev_pop`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevPop,S,`stddev_pop`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevPop,S,`stddev_pop`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevPop,S,`stddev_pop`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevPop,S,`stddev_pop`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevPop,S,`stddev_pop`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,reduction,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,window,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,aggregation,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,reduction,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,window,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +Sum,S,`sum`,None,window,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VariancePop,S,`var_pop`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +VarianceSamp,S,`var_samp`; `variance`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS +HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS diff --git a/tools/generated_files/operatorsScore.csv b/tools/generated_files/operatorsScore.csv index 10ceb96336d..65f76bd1c36 100644 --- a/tools/generated_files/operatorsScore.csv +++ b/tools/generated_files/operatorsScore.csv @@ -48,6 +48,7 @@ ApproximatePercentile,4 ArrayContains,4 ArrayExcept,4 ArrayExists,4 +ArrayFilter,4 ArrayIntersect,4 ArrayMax,4 ArrayMin,4 diff --git a/tools/generated_files/supportedExprs.csv b/tools/generated_files/supportedExprs.csv index 52cd9957729..af0c5f1edd6 100644 --- a/tools/generated_files/supportedExprs.csv +++ b/tools/generated_files/supportedExprs.csv @@ -36,6 +36,9 @@ ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA +ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA @@ -224,9 +227,9 @@ GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA -GetJsonObject,NS,`get_json_object`,This is disabled by default because escape sequences are not processed correctly; the input is not validated; and the output is not normalized the same as Spark,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA +GetJsonObject,NS,`get_json_object`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS @@ -279,9 +282,9 @@ IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA -JsonTuple,NS,`json_tuple`,This is disabled by default because JsonTuple on the GPU does not support all of the normalization that the CPU supports.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA +JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS diff --git a/tools/pom.xml b/tools/pom.xml index 30db8f0e0ff..8b9dabcc791 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -22,14 +22,14 @@ com.nvidia rapids-4-spark-jdk-profiles_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../jdk-profiles/pom.xml rapids-4-spark-tools-support pom RAPIDS Accelerator for Apache Spark Tools Support Supporting code for RAPIDS Accelerator tools - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT com.nvidia diff --git a/udf-compiler/pom.xml b/udf-compiler/pom.xml index b09262fff13..9bbd21353c5 100644 --- a/udf-compiler/pom.xml +++ b/udf-compiler/pom.xml @@ -21,13 +21,13 @@ com.nvidia rapids-4-spark-shim-deps-parent_2.12 - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT ../shim-deps/pom.xml rapids-4-spark-udf_2.12 RAPIDS Accelerator for Apache Spark Scala UDF Plugin The RAPIDS Scala UDF plugin for Apache Spark - 24.04.0-SNAPSHOT + 24.06.0-SNAPSHOT udf-compiler