diff --git a/python/lib.c b/python/lib.c index 8c0855e3..51c54722 100644 --- a/python/lib.c +++ b/python/lib.c @@ -3263,9 +3263,6 @@ static PyObject *api_add(PyObject *self, PyObject *const *args, Py_ssize_t const } } - printf("Effective datatypes are: a - %s, b - %s, out - %s\n", datatype_to_python_string(a_parsed.datatype), - datatype_to_python_string(b_parsed.datatype), datatype_to_python_string(ab_dtype)); - // Estimate the total number of output elements: Py_ssize_t out_total_elements = 1; for (Py_ssize_t i = 0; i < out_parsed.as_buffer_dimensions; ++i) diff --git a/scripts/test.py b/scripts/test.py index c0985b83..26356a30 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -67,16 +67,26 @@ baseline_bilinear = lambda x, y, z: x @ z @ y def _normalize_element_wise(r, dtype): + """Clips higher-resolution results to the smaller target dtype without overflow.""" if np.issubdtype(dtype, np.integer): r = np.round(r) - #! We need non-overflowing saturating addition for small integers, that NumPy lacks: - #! https://stackoverflow.com/questions/29611185/avoid-overflow-when-adding-numpy-arrays - if dtype == np.uint8: - r = np.clip(r, 0, 255, out=r) - elif dtype == np.int8: - r = np.clip(r, -128, 127, out=r) + #! We need non-overflowing saturating addition for small integers, that NumPy lacks: + #! https://stackoverflow.com/questions/29611185/avoid-overflow-when-adding-numpy-arrays + dtype_info = np.iinfo(dtype) + r = np.clip(r, dtype_info.min, dtype_info.max, out=r) return r.astype(dtype) + def upcast_preserving_inf(r, dtype): + """Upcasts the result to a higher resolution, preserving infinity values (min/max for integers).""" + dtype_old_info = np.iinfo(r.dtype) if np.issubdtype(r.dtype, np.integer) else np.finfo(r.dtype) + dtype_new_info = np.iinfo(dtype) if np.issubdtype(dtype, np.integer) else np.finfo(dtype) + is_min = r == dtype_old_info.min + is_max = r == dtype_old_info.max + r = r.astype(dtype) + r[is_min] = dtype_new_info.min + r[is_max] = dtype_new_info.max + return r + def baseline_scale(x, alpha, beta): return _normalize_element_wise(alpha * x + beta, x.dtype) @@ -94,14 +104,26 @@ def baseline_wsum(x, y, alpha, beta): def baseline_fma(x, y, z, alpha, beta): return _normalize_element_wise(np.multiply((alpha * x), y) + beta * z, x.dtype) - def baseline_add(x, y): - dtype = x.dtype if isinstance(x, np.ndarray) else y.dtype - if dtype == np.uint8: - return _normalize_element_wise(x.astype(np.uint16) + y, dtype) - elif dtype == np.int8: - return _normalize_element_wise(x.astype(np.int16) + y, dtype) + def baseline_add(x, y, out=None): + + # If the input types are identical, we want to perform addition with saturation + if isinstance(x, np.ndarray) and isinstance(y, np.ndarray) and x.dtype == y.dtype and out is None: + if x.dtype == np.uint8: + return _normalize_element_wise(x.astype(np.uint16) + y, x.dtype) + elif x.dtype == np.int8: + return _normalize_element_wise(x.astype(np.int16) + y, x.dtype) + if x.dtype == np.uint16: + return _normalize_element_wise(x.astype(np.uint32) + y, x.dtype) + elif x.dtype == np.int16: + return _normalize_element_wise(x.astype(np.int32) + y, x.dtype) + if x.dtype == np.uint32: + return _normalize_element_wise(x.astype(np.uint64) + y, x.dtype) + elif x.dtype == np.int32: + return _normalize_element_wise(x.astype(np.int64) + y, x.dtype) + else: + return np.add(x, y) else: - return x + y + return np.add(x, y, out=out, casting="unsafe") def baseline_multiply(x, y): dtype = x.dtype if isinstance(x, np.ndarray) else y.dtype @@ -451,7 +473,7 @@ def collect_warnings(message: str, stats: dict): def keep_one_capability(cap: str): - assert cap in possible_capabilities + assert cap in possible_capabilities or cap == "serial", f"Capability {cap} is not available on this platform." for c in possible_capabilities: if c != cap: simd.disable_capability(c) @@ -1568,7 +1590,7 @@ def test_cdist_hamming(ndim, out_dtype, capability): @pytest.mark.parametrize("second_dtype", ["float64", "float32", "int32", "uint32"]) @pytest.mark.parametrize("output_dtype", ["float64", "float32", "int32", "uint32"]) @pytest.mark.parametrize("kernel", ["add"]) # , "multiply"]) -@pytest.mark.parametrize("capability", possible_capabilities) +@pytest.mark.parametrize("capability", ["serial"]) def test_add(first_dtype, second_dtype, output_dtype, kernel, capability, stats_fixture): """Tests NumPy-like compatibility interfaces on all kinds of non-contiguous arrays.""" @@ -1579,29 +1601,77 @@ def test_add(first_dtype, second_dtype, output_dtype, kernel, capability, stats_ def validate(a, b, inplace_simsimd): result_numpy = baseline_kernel(a, b) result_simsimd = np.array(simd_kernel(a, b)) + assert ( + result_simsimd.size == result_numpy.size + ), f"Result sizes differ: {result_simsimd.size} vs {result_numpy.size}" + assert ( + result_simsimd.shape == result_numpy.shape + ), f"Result shapes differ: {result_simsimd.shape} vs {result_numpy.shape}" + assert ( + result_simsimd.dtype == result_numpy.dtype + ), f"Result dtypes differ: {result_simsimd.dtype} vs {result_numpy.dtype} for ({first_dtype} + {second_dtype})" + + if not np.allclose(result_simsimd, result_numpy, atol=SIMSIMD_ATOL, rtol=SIMSIMD_RTOL): + # ? Find the first mismatch and use it as an example in the error message + np.testing.assert_allclose( + result_simsimd, + result_numpy, + atol=SIMSIMD_ATOL, + rtol=SIMSIMD_RTOL, + err_msg=f""" + Result mismatch for ({first_dtype} + {second_dtype}) + First argument: {a} + Second argument: {b} + SimSIMD result: {result_simsimd} + NumPy result: {result_numpy} + """, + ) + + #! NumPy constantly overflows in mixed-precision operations! + inplace_numpy = np.empty_like(inplace_simsimd) simd_kernel(a, b, out=inplace_simsimd) - assert result_simsimd.size == result_numpy.size - assert result_simsimd.shape == result_numpy.shape - assert result_simsimd.dtype == result_numpy.dtype - np.testing.assert_allclose(result_simsimd, result_numpy, atol=SIMSIMD_ATOL, rtol=SIMSIMD_RTOL) - np.testing.assert_allclose(result_simsimd, inplace_simsimd, atol=SIMSIMD_ATOL, rtol=SIMSIMD_RTOL) + baseline_kernel(a, b, out=inplace_numpy) + + assert ( + inplace_simsimd.size == inplace_numpy.size + ), f"Inplace sizes differ: {inplace_simsimd.size} vs {inplace_numpy.size}" + assert ( + inplace_simsimd.shape == inplace_numpy.shape + ), f"Inplace shapes differ: {inplace_simsimd.shape} vs {inplace_numpy.shape}" + assert ( + inplace_simsimd.dtype == inplace_numpy.dtype + ), f"Inplace dtypes differ: {inplace_simsimd.dtype} vs {inplace_numpy.dtype} for ({first_dtype} + {second_dtype})" + + # Let's count the number of overflows in NumPy: + overflow_count = np.sum(np.isclose(inplace_simsimd, inplace_numpy, atol=SIMSIMD_ATOL, rtol=SIMSIMD_RTOL)) + if overflow_count: + collect_warnings( + f"NumPy overflow in ({first_dtype} + {second_dtype} -> {output_dtype})", + stats_fixture, + ) return result_simsimd # Vector-Vector addition + a = random_of_dtype(first_dtype, (6,)) + b = random_of_dtype(second_dtype, (6,)) + o = np.zeros(6).astype(output_dtype) + validate(a, b, o) + + # Larger Vector-Vector addition a = random_of_dtype(first_dtype, (47,)) b = random_of_dtype(second_dtype, (47,)) o = np.zeros(47).astype(output_dtype) validate(a, b, o) # Vector-Scalar addition - validate(a, -11, o) - validate(a, 11, o) - validate(a, 11.0, o) + validate(a, np.int8(-11), o) + validate(a, np.uint8(11), o) + validate(a, np.float16(11.0), o) # Scalar-Vector addition - validate(-13, b, o) - validate(13, b, o) - validate(13.0, b, o) + validate(np.int8(-13), b, o) + validate(np.uint8(13), b, o) + validate(np.float16(13.0), b, o) # Matrix-Matrix addition a = random_of_dtype(first_dtype, (10, 47))