From d0e5cdfc4df197bfb4846a243e3d9ea9d7b87aab Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 27 Aug 2024 12:17:51 -1000 Subject: [PATCH] Allow for binops between two differently sized DecimalDtypes (#16638) Currently cudf Python has some custom logic for determining the resulting dtype of a binop between 2 decimal dtypes since Python decimal dtype support `precision` and libcudf doesn't. But libcudf does require that the 2 operands have the same decimal type when calculating the binop, so we must ensure the inputs are cast to the same, resulting dtype. Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16638 --- python/cudf/cudf/core/column/decimal.py | 17 ++++++++++++++--- python/cudf/cudf/tests/test_decimal.py | 10 ++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 3b979ef2e97..8803ebd6791 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -135,9 +135,15 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str): # are computed outside of libcudf if op in {"__add__", "__sub__", "__mul__", "__div__"}: output_type = _get_decimal_type(lhs.dtype, rhs.dtype, op) + lhs = lhs.astype( + type(output_type)(lhs.dtype.precision, lhs.dtype.scale) + ) + rhs = rhs.astype( + type(output_type)(rhs.dtype.precision, rhs.dtype.scale) + ) result = libcudf.binaryop.binaryop(lhs, rhs, op, output_type) - # TODO: Why is this necessary? Why isn't the result's - # precision already set correctly based on output_type? + # libcudf doesn't support precision, so result.dtype doesn't + # maintain output_type.precision result.dtype.precision = output_type.precision elif op in { "__eq__", @@ -430,7 +436,11 @@ def _with_type_metadata( return self -def _get_decimal_type(lhs_dtype, rhs_dtype, op): +def _get_decimal_type( + lhs_dtype: DecimalDtype, + rhs_dtype: DecimalDtype, + op: str, +) -> DecimalDtype: """ Returns the resulting decimal type after calculating precision & scale when performing the binary operation @@ -441,6 +451,7 @@ def _get_decimal_type(lhs_dtype, rhs_dtype, op): # This should at some point be hooked up to libcudf's # binary_operation_fixed_point_scale + # Note: libcudf decimal types don't have a concept of precision p1, p2 = lhs_dtype.precision, rhs_dtype.precision s1, s2 = lhs_dtype.scale, rhs_dtype.scale diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index b63788d20b7..048b3a656e3 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -398,3 +398,13 @@ def test_decimal_overflow(): s = cudf.Series([1, 2], dtype=cudf.Decimal128Dtype(precision=38, scale=0)) result = s * Decimal("1.0") assert_eq(cudf.Decimal128Dtype(precision=38, scale=1), result.dtype) + + +def test_decimal_binop_upcast_operands(): + ser1 = cudf.Series([0.51, 1.51, 2.51]).astype(cudf.Decimal64Dtype(18, 2)) + ser2 = cudf.Series([0.90, 0.96, 0.99]).astype(cudf.Decimal128Dtype(19, 2)) + result = ser1 + ser2 + expected = cudf.Series([1.41, 2.47, 3.50]).astype( + cudf.Decimal128Dtype(20, 2) + ) + assert_eq(result, expected)