diff --git a/.github/workflows/license-header-check.yml b/.github/workflows/license-header-check.yml new file mode 100644 index 00000000000..e7f62399436 --- /dev/null +++ b/.github/workflows/license-header-check.yml @@ -0,0 +1,58 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A workflow to check copyright/license header +name: license header check + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + license-header-check: + runs-on: ubuntu-latest + if: "!contains(github.event.pull_request.title, '[bot]')" + steps: + - name: Get checkout depth + run: | + echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 10 ))" >> $GITHUB_ENV + + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: ${{ env.PR_FETCH_DEPTH }} + + - name: license-header-check + uses: NVIDIA/spark-rapids-common/license-header-check@main + with: + included_file_patterns: | + *.yml, + *.yaml, + *.sh, + *.xml, + *.properties, + *.scala, + *.py, + build/*, + *.cpp, + *Dockerfile*, + *Jenkinsfile*, + *.ini, + *.java, + *.fbs + excluded_file_patterns: | + *target/*, + thirdparty/*, + sql-plugin/src/main/java/com/nvidia/spark/rapids/format/* + \ No newline at end of file diff --git a/datagen/src/main/spark400/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala b/datagen/src/main/spark400/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala index 2884968660d..3480718dbc7 100644 --- a/datagen/src/main/spark400/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala +++ b/datagen/src/main/spark400/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala @@ -24,6 +24,6 @@ import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.internal.ExpressionUtils.{column, expression} object DataGenExprShims { - def columnToExpr(c: Column): Expression = c - def exprToColumn(e: Expression): Column = e + def columnToExpr(c: Column): Expression = expression(c) + def exprToColumn(e: Expression): Column = column(e) } diff --git a/delta-lake/common/src/main/databricks/scala/org/apache/spark/sql/rapids/delta/GpuOptimizeWriteExchangeExec.scala b/delta-lake/common/src/main/databricks/scala/org/apache/spark/sql/rapids/delta/GpuOptimizeWriteExchangeExec.scala index 0c212d6842a..5b26943a541 100644 --- a/delta-lake/common/src/main/databricks/scala/org/apache/spark/sql/rapids/delta/GpuOptimizeWriteExchangeExec.scala +++ b/delta-lake/common/src/main/databricks/scala/org/apache/spark/sql/rapids/delta/GpuOptimizeWriteExchangeExec.scala @@ -39,6 +39,7 @@ import org.apache.spark.sql.execution.{CoalescedPartitionSpec, ShufflePartitionS import org.apache.spark.sql.execution.exchange.Exchange import org.apache.spark.sql.execution.metric.{SQLMetrics, SQLShuffleReadMetricsReporter, SQLShuffleWriteMetricsReporter} import org.apache.spark.sql.rapids.execution.{GpuShuffleExchangeExecBase, ShuffledBatchRDD} +import org.apache.spark.sql.rapids.execution.GpuShuffleExchangeExecBase.createAdditionalExchangeMetrics import org.apache.spark.sql.vectorized.ColumnarBatch import org.apache.spark.util.ThreadUtils @@ -71,22 +72,11 @@ case class GpuOptimizeWriteExchangeExec( private[sql] lazy val readMetrics = SQLShuffleReadMetricsReporter.createShuffleReadMetrics(sparkContext) - override lazy val additionalMetrics: Map[String, GpuMetric] = Map( - "dataSize" -> createSizeMetric(ESSENTIAL_LEVEL, "data size"), - "dataReadSize" -> createSizeMetric(MODERATE_LEVEL, "data read size"), - "rapidsShuffleSerializationTime" -> - createNanoTimingMetric(DEBUG_LEVEL, "rs. serialization time"), - "rapidsShuffleDeserializationTime" -> - createNanoTimingMetric(DEBUG_LEVEL, "rs. deserialization time"), - "rapidsShuffleWriteTime" -> - createNanoTimingMetric(ESSENTIAL_LEVEL, "rs. shuffle write time"), - "rapidsShuffleCombineTime" -> - createNanoTimingMetric(DEBUG_LEVEL, "rs. shuffle combine time"), - "rapidsShuffleWriteIoTime" -> - createNanoTimingMetric(DEBUG_LEVEL, "rs. shuffle write io time"), - "rapidsShuffleReadTime" -> - createNanoTimingMetric(ESSENTIAL_LEVEL, "rs. shuffle read time") - ) ++ GpuMetric.wrap(readMetrics) ++ GpuMetric.wrap(writeMetrics) + override lazy val additionalMetrics : Map[String, GpuMetric] = { + createAdditionalExchangeMetrics(this) ++ + GpuMetric.wrap(readMetrics) ++ + GpuMetric.wrap(writeMetrics) + } override lazy val allMetrics: Map[String, GpuMetric] = { Map( @@ -98,7 +88,7 @@ case class GpuOptimizeWriteExchangeExec( } private lazy val serializer: Serializer = - new GpuColumnarBatchSerializer(gpuLongMetric("dataSize"), + new GpuColumnarBatchSerializer(allMetrics, child.output.map(_.dataType).toArray, RapidsConf.SHUFFLE_KUDO_SERIALIZER_ENABLED.get(child.conf)) diff --git a/docs/dev/idea-code-style-settings.xml b/docs/dev/idea-code-style-settings.xml index 165d30dde06..9f5c3c100dc 100644 --- a/docs/dev/idea-code-style-settings.xml +++ b/docs/dev/idea-code-style-settings.xml @@ -1,3 +1,19 @@ + +