NVIDIA · thirtiseven · Sep 29, 2023 · Sep 8, 2023 · Sep 8, 2023 · Sep 12, 2023
diff --git a/docs/compatibility.md b/docs/compatibility.md
@@ -663,7 +663,7 @@ types to strings. The GPU uses a lowercase `e` prefix for an exponent while Spar
 
 The `format_number` function will retain 10 digits of precision for the GPU when the input is a floating 
 point number, but Spark will retain up to 17 digits of precision, i.e. `format_number(1234567890.1234567890, 5)`
-will return `1,234,567,890.00000` on the GPU and `1,234,567,890.12346` on the CPU.
+will return `1,234,567,890.00000` on the GPU and `1,234,567,890.12346` on the CPU. To enable this on the GPU, set [`spark.rapids.sql.formatNumberFloat.enabled`](additional-functionality/advanced_configs.md#sql.formatNumberFloat.enabled) to `true`.
 
 Starting from 22.06 this conf is enabled by default, to disable this operation on the GPU, set
 [`spark.rapids.sql.castFloatToString.enabled`](configs.md#sql.castFloatToString.enabled) to `false`.

diff --git a/integration_tests/src/main/python/string_test.py b/integration_tests/src/main/python/string_test.py
@@ -801,7 +801,9 @@ def test_conv_dec_to_from_hex(from_base, to_base, pattern):
 format_number_gens = integral_gens + [DecimalGen(precision=7, scale=7), DecimalGen(precision=18, scale=0), 
                                       DecimalGen(precision=18, scale=3), DecimalGen(precision=36, scale=5), 
                                       DecimalGen(precision=36, scale=-5), DecimalGen(precision=38, scale=10), 
-                                      DecimalGen(precision=38, scale=-10)]
+                                      DecimalGen(precision=38, scale=-10), 
+                                      DecimalGen(precision=38, scale=30, special_cases=[Decimal('0.000125')]),
+                                      DecimalGen(precision=38, scale=32, special_cases=[Decimal('0.000125')])]
 
 @pytest.mark.parametrize('data_gen', format_number_gens, ids=idfn)
 def test_format_number_supported(data_gen):
@@ -828,11 +830,24 @@ def test_format_number_float_limited(data_gen):
         conf = float_format_number_conf
     )
 
+# format_number for float/double is disabled by default due to compatibility issue
+# GPU will generate result with less precision than CPU
 @allow_non_gpu('ProjectExec')
-def test_format_number_float_fallback():
-    gen = DoubleGen()
+@pytest.mark.parametrize('data_gen', [float_gen, double_gen], ids=idfn)
+def test_format_number_float_fallback(data_gen):
     assert_gpu_fallback_collect(
-        lambda spark: unary_op_df(spark, gen).selectExpr(
+        lambda spark: unary_op_df(spark, data_gen).selectExpr(
+            'format_number(a, 5)'),
+        'FormatNumber'
+    )
+
+# fallback due to https://github.com/NVIDIA/spark-rapids/issues/9309
+@allow_non_gpu('ProjectExec')
+@pytest.mark.parametrize('data_gen', [float_gen, double_gen], ids=idfn)
+def test_format_number_decimal_big_scale_fallback(data_gen):
+    data_gen = DecimalGen(precision=38, scale=37) 
+    assert_gpu_fallback_collect(
+        lambda spark: unary_op_df(spark, data_gen).selectExpr(
             'format_number(a, 5)'),
         'FormatNumber'
-    )
+    )
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -3102,6 +3102,12 @@ object GpuOverrides extends Logging {
                   s" ${RapidsConf.ENABLE_FLOAT_FORMAT_NUMBER} to true.")
               }
             }
+            case dt: DecimalType => {
+              if (dt.scale > 32) {
+                willNotWorkOnGpu("format_number will generate results mismatched from Spark " +
+                  "when the scale is larger than 32.")
+              }
+            }
             case _ =>
           }
         }

diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala
@@ -2126,29 +2126,29 @@ case class GpuFormatNumber(x: Expression, d: Expression)
       ColumnVector.stringConcatenate(Array(intPart, decPart, zeros))
     }
     // split intAndDecParts to intPart and decPart with substrings, start = len(intAndDecParts) - d
-    closeOnExcept(ArrayBuffer.empty[ColumnVector]) { resource_array =>
+    closeOnExcept(ArrayBuffer.empty[ColumnVector]) { resourceArray =>
       val (intPartPosExp, decPartPosExpTemp) = withResource(intAndDecParts) { _ =>
         val (start, end) = withResource(intAndDecParts.getCharLengths) { partsLength =>
           (withResource(Scalar.fromInt(d)) { d =>
             partsLength.sub(d)
           }, partsLength.incRefCount())
         }
-        val zeroCv = withResource(Scalar.fromInt(0)) { zero =>
-          ColumnVector.fromScalar(zero, cv.getRowCount.toInt)
-        }
         withResource(start) { _ =>
-          val intPart = withResource(zeroCv) { _ =>
-            intAndDecParts.substring(zeroCv, start)
-          }
-          val decPart = closeOnExcept(intPart) { _ =>
-            withResource(end) { _ =>
+          withResource(end) { _ =>
+            val zeroIntCv = withResource(Scalar.fromInt(0)) { zero =>
+              ColumnVector.fromScalar(zero, cv.getRowCount.toInt)
+            }
+            val intPart = withResource(zeroIntCv) { _ =>
+              intAndDecParts.substring(zeroIntCv, start)
+            }
+            val decPart = closeOnExcept(intPart) { _ =>
               intAndDecParts.substring(start, end)
             }
+            (intPart, decPart)
           }
-          (intPart, decPart)
         }
       }
-      resource_array += intPartPosExp
+      resourceArray += intPartPosExp
       // if decLen - exp > d, convert to float/double, round, convert back to string
       // decLen's max value is 9, abs(expPart)'s min value is 7, so it is possible only when d < 2
       // because d is small, we can use double to do the rounding
@@ -2192,12 +2192,14 @@ case class GpuFormatNumber(x: Expression, d: Expression)
     val cond1 = withResource(Scalar.fromInt(-1 - d)) { negOneSubD =>
       exp.greaterOrEqualTo(negOneSubD)
     }
-    val decLenSubExp = withResource(decPart.getCharLengths) { decLen =>
-      decLen.sub(exp)
-    }
-    val cond2 = withResource(decLenSubExp) { _ =>
-      withResource(Scalar.fromInt(d)) { d =>
-        decLenSubExp.greaterThan(d)
+    val cond2 = closeOnExcept(cond1) { _ =>
+      val decLenSubExp = withResource(decPart.getCharLengths) { decLen =>
+        decLen.sub(exp)
+      }
+      withResource(decLenSubExp) { _ =>
+        withResource(Scalar.fromInt(d)) { d =>
+          decLenSubExp.greaterThan(d)
+        }
       }
     }
     val needRounding = withResource(cond1) { _ =>
@@ -2238,8 +2240,8 @@ case class GpuFormatNumber(x: Expression, d: Expression)
         // To do a dataframe operation, add some zeros before 
         // (intPat + decPart) and round them to 10
         // zerosNumRounding = (10 - (d + exp + 1)) . max(0)
-        val tenSubDExpOne = withResource(Scalar.fromInt(10)) { ten =>
-          withResource(dExpOne) { _ =>
+        val tenSubDExpOne = withResource(dExpOne) { _ => 
+          withResource(Scalar.fromInt(10)) { ten =>
             ten.sub(dExpOne)
           }
         }
@@ -2255,11 +2257,11 @@ case class GpuFormatNumber(x: Expression, d: Expression)
             zeroCv.repeatStrings(zerosNumRounding)
           }
         }
-        val zeroPointCv = withResource(Scalar.fromString("0.")) { point =>
-          ColumnVector.fromScalar(point, cv.getRowCount.toInt)
-        }
-        val numberToRoundStr = withResource(zeroPointCv) { _ =>
-          withResource(leadingZeros) { _ =>
+        val numberToRoundStr = withResource(leadingZeros) { _ =>
+          val zeroPointCv = withResource(Scalar.fromString("0.")) { point =>
+            ColumnVector.fromScalar(point, cv.getRowCount.toInt)
+          }
+          withResource(zeroPointCv) { _ =>
             ColumnVector.stringConcatenate(Array(zeroPointCv, leadingZeros, intPart, decPart))
           }
         }
@@ -2287,7 +2289,9 @@ case class GpuFormatNumber(x: Expression, d: Expression)
         val decPartNegExp = withResource(decPartStriped) { _ =>
           decPartStriped.pad(d, PadSide.LEFT, "0")
         }
-        (getZeroCv(cv.getRowCount.toInt), decPartNegExp)
+        closeOnExcept(decPartNegExp) { _ =>
+          (getZeroCv(cv.getRowCount.toInt), decPartNegExp)
+        }
     }
   }
 
@@ -2301,8 +2305,10 @@ case class GpuFormatNumber(x: Expression, d: Expression)
         (intAndDec.getColumn(0).incRefCount(), intAndDec.getColumn(1).incRefCount())
       }
     }
-    val intPartNoNeg = withResource(intPart) { _ =>
-      removeNegSign(intPart)
+    val intPartNoNeg = closeOnExcept(decPart) { _ =>
+      withResource(intPart) { _ =>
+        removeNegSign(intPart)
+      }
     }
     val decPartPad = closeOnExcept(intPartNoNeg) { _ =>
       withResource(decPart) { _ =>
@@ -2335,12 +2341,18 @@ case class GpuFormatNumber(x: Expression, d: Expression)
       }
     }
     // sign will be handled later, use string-based solution instead abs to avoid overfolw
-    val intPart = withResource(intPartSign) { _ =>
-      removeNegSign(intPartSign)  
+    val intPart = closeOnExcept(decPart) { _ =>
+      closeOnExcept(expPart) { _ =>
+        withResource(intPartSign) { _ =>
+          removeNegSign(intPartSign)  
+        }
+      }
     }
-    val exp = closeOnExcept(intPart) { _ =>
-      withResource(expPart) { _ =>
-        expPart.castTo(DType.INT32)
+    val exp = closeOnExcept(decPart) { _ =>
+      closeOnExcept(intPart) { _ =>
+        withResource(expPart) { _ =>
+          expPart.castTo(DType.INT32)
+        }
       }
     }
     // handle positive and negative exp separately
@@ -2351,42 +2363,42 @@ case class GpuFormatNumber(x: Expression, d: Expression)
         }
       }
     }
-    val (intPartNegExp, decPartNegExp) = withResource(intPart) { _ =>
-      withResource(decPart) { _ =>
-        closeOnExcept(exp) { _ =>
-          handleDoubleNegExp(cv, intPart, decPart, exp, d)
+    withResource(ArrayBuffer.empty[ColumnVector]) { resourceArray =>
+      val (intPartNegExp, decPartNegExp) = withResource(intPart) { _ =>
+        withResource(decPart) { _ =>
+          closeOnExcept(exp) { _ =>
+            handleDoubleNegExp(cv, intPart, decPart, exp, d)
+          }
         }
       }
-    }
-    val expPos = withResource(exp) { _ =>
-      withResource(Scalar.fromInt(0)) { zero =>
-        exp.greaterOrEqualTo(zero)
+      resourceArray += intPartNegExp
+      resourceArray += decPartNegExp
+      val expPos = withResource(exp) { _ =>
+        withResource(Scalar.fromInt(0)) { zero =>
+          exp.greaterOrEqualTo(zero)
+        }
       }
-    }
-    // combine results
-    withResource(expPos) { _ =>
-      val intPartExp = withResource(intPartPosExp) { _ =>
-        withResource(intPartNegExp) { _ =>
+      // combine results
+      withResource(expPos) { _ =>
+        val intPartExp = withResource(intPartPosExp) { _ =>
           expPos.ifElse(intPartPosExp, intPartNegExp)
         }
-      }
-      val decPartExp = closeOnExcept(intPartExp) { _ =>
-        withResource(decPartPosExp) { _ =>
-          withResource(decPartNegExp) { _ =>
+        val decPartExp = closeOnExcept(intPartExp) { _ =>
+          withResource(decPartPosExp) { _ =>
             expPos.ifElse(decPartPosExp, decPartNegExp)
           }
         }
+        (intPartExp, decPartExp)
       }
-      (intPartExp, decPartExp)
     }
   }
 
   private def getPartsFromDouble(cv: ColumnVector, d: Int): (ColumnVector, ColumnVector) = {
     // handle normal case: 1234.567
-    closeOnExcept(ArrayBuffer.empty[ColumnVector]) { resource_array =>
+    closeOnExcept(ArrayBuffer.empty[ColumnVector]) { resourceArray =>
       val (normalInt, normalDec) = normalDoubleSplit(cv, d)
-      resource_array += normalInt
-      resource_array += normalDec
+      resourceArray += normalInt
+      resourceArray += normalDec
       // first check special case
       val cvStr = withResource(cv.castTo(DType.STRING)) { cvStr =>
         cvStr.incRefCount()
@@ -2420,17 +2432,17 @@ case class GpuFormatNumber(x: Expression, d: Expression)
               expDoubleSplit(noEReplaced, d)
             }
             // combine results
-            // remove normalInt from resource_array
-            resource_array.remove(0)
+            // remove normalInt from resourceArray
+            resourceArray.remove(0)
             val intPart = closeOnExcept(expDec) { _ =>
               withResource(expInt) { _ =>
                 withResource(normalInt) { _ =>
                   containsE.ifElse(expInt, normalInt)
                 }
               }
             }
-            resource_array.clear()
-            resource_array += intPart
+            resourceArray.clear()
+            resourceArray += intPart
             val decPart = withResource(expDec) { _ =>
               withResource(normalDec) { _ =>
                 containsE.ifElse(expDec, normalDec)
@@ -2451,30 +2463,28 @@ case class GpuFormatNumber(x: Expression, d: Expression)
     // if d <= scale, no need to append zeros, if scale < 0, append d zeros
     val appendZeroNum = (d - scale).max(0).min(d)
     val (intPart, decTemp) = if (roundingScale <= 0) {
-      withResource(ArrayBuffer.empty[ColumnVector]) { resource_array =>
+      withResource(ArrayBuffer.empty[ColumnVector]) { resourceArray =>
         val intPart = withResource(cv.round(roundingScale, RoundMode.HALF_EVEN)) { rounded =>
           rounded.castTo(DType.STRING)
         }
-        resource_array += intPart
+        resourceArray += intPart
         // if intString starts with 0, it must be "00000...", replace it with "0"
         val (isZero, zeroCv) = withResource(Scalar.fromString("0")) { zero =>
           withResource(intPart.startsWith(zero)) { isZero =>
-            closeOnExcept(isZero) { _ =>
               (isZero.incRefCount(), ColumnVector.fromScalar(zero, cv.getRowCount.toInt))
-            }
           }
         }
         val intPartZeroHandled = withResource(isZero) { isZero =>
           withResource(zeroCv) { zeroCv =>
             isZero.ifElse(zeroCv, intPart)
           }
         }
-        resource_array += intPartZeroHandled
+        resourceArray += intPartZeroHandled
         // a temp decPart is empty before appending zeros
         val decPart = withResource(Scalar.fromString("")) { emptyString =>
           ColumnVector.fromScalar(emptyString, cv.getRowCount.toInt)
         }
-        resource_array += decPart
+        resourceArray += decPart
         (intPartZeroHandled.incRefCount(), decPart.incRefCount())
       }
     } else {
@@ -2486,18 +2496,15 @@ case class GpuFormatNumber(x: Expression, d: Expression)
         }
       }
     }
-    closeOnExcept(ArrayBuffer.empty[ColumnVector]) { resource_array =>
+    closeOnExcept(ArrayBuffer.empty[ColumnVector]) { resourceArray =>
       // remove negative sign from intPart, sign will be handled later
       val intPartPos = closeOnExcept(decTemp) { _ =>
         withResource(intPart) { _ =>
-          withResource(Scalar.fromString("-")) { negativeSign =>
-            withResource(Scalar.fromString("")) { emptyString =>
-              intPart.stringReplace(negativeSign, emptyString)
-            }
-          }
+          removeNegSign(intPart)
         }
       }
-      resource_array += intPartPos
+      resourceArray += intPartPos
+      // append zeros
       val appendZeros = "0" * appendZeroNum
       val appendZerosCv = closeOnExcept(decTemp) { _ =>
         withResource(Scalar.fromString(appendZeros)) { zeroString =>
@@ -2524,12 +2531,9 @@ case class GpuFormatNumber(x: Expression, d: Expression)
       }
       case IntegerType | LongType | ShortType | ByteType => {
         val intPartPos = withResource(cv.castTo(DType.STRING)) { intPart =>
-          withResource(Scalar.fromString("-")) { negativeSign =>
-            withResource(Scalar.fromString("")) { emptyString =>
-              intPart.stringReplace(negativeSign, emptyString)
-            }
-          }
+          removeNegSign(intPart)
         }
+        // dec part is all zeros
         val dzeros = "0" * d
         val decPart = closeOnExcept(intPartPos) { _ =>
           withResource(Scalar.fromString(dzeros)) { zeroString =>
@@ -2575,23 +2579,6 @@ case class GpuFormatNumber(x: Expression, d: Expression)
         str.substring(i, i + 3).asInstanceOf[ColumnView]
       }.toArray
     }
-    // TODO: test Liangcai's solution here
-    // val substrs = closeOnExcept(sepCol) { _ =>
-    //   var curEndsCol: ColumnVector = str.getCharLengths()
-    //   withResource(curEndsCol) { _ =>
-    //     (0 until maxstrlen by 3).safeMap { _ =>
-    //       val startCol = withResource(Scalar.fromInt(3)) { scalar3 =>
-    //         curEndsCol.sub(scalar3)
-    //       }
-    //       val sub = closeOnExcept(startCol) { _ =>
-    //         str.substring(startCol, curEndsCol).asInstanceOf[ColumnView]
-    //       }
-    //       curEndsCol.safeClose()
-    //       curEndsCol = startCol
-    //       sub
-    //     }.toArray
-    //   }
-    // }
     withResource(substrs) { _ =>
       withResource(sepCol) { _ =>
         withResource(ColumnVector.stringConcatenate(substrs, sepCol)) { res =>