From 3298da2bbdfe5f082cf78b70aab98d3d1126b5e0 Mon Sep 17 00:00:00 2001 From: "Hongbin Ma (Mahone)" Date: Tue, 17 Dec 2024 15:27:45 +0800 Subject: [PATCH] address comments Signed-off-by: Hongbin Ma (Mahone) --- .../com/nvidia/spark/rapids/GpuAggregateExec.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala index 05b89cd593a..b8ee363f5aa 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala @@ -281,16 +281,16 @@ object AggregateUtils extends Logging { if (needRepartitionAgain(bucket)) { if (recursiveDepth >= maxRecursiveDepth) { // Normally this should not happen, because we are repartitioning data that has - // already went through first round of aggregation, so there shouldn't be too many + // already gone through first round of aggregation, so there shouldn't be too many // duplicated rows (the duplication only happens in different batches) to prevent // repartitioning out (considering we're changing seed each time we repartition). - // However for some test cases with really small batch size, this can happen. So + // However, for some test cases with really small batch size, this can happen. So // we're just logging some warnings here. - log.warn("The bucket is still too large after " + recursiveDepth + - " times of repartition. Size for each batch in " + - "current bucket: " + bucket.map(_.sizeInBytes).mkString(", ") + " rows: " + - bucket.map(_.numRows()).mkString(", ") + " targetMergeBatchSize: " - + targetMergeBatchSize) + log.warn(s"The bucket is still too large after $recursiveDepth repartitions. " + + s"See https://github.com/NVIDIA/spark-rapids/issues/11834. " + + s"Sizes for each batch in current bucket: ${bucket.map(_.sizeInBytes).mkString(", ")}" + + s" rows: ${bucket.map(_.numRows()).mkString(", ")}" + + s" targetMergeBatchSize: $targetMergeBatchSize") ArrayBuffer(bucket) } else { val nextLayerBuckets =