diff --git a/cpp/include/raft/distance/detail/distance_ops/l2_exp.cuh b/cpp/include/raft/distance/detail/distance_ops/l2_exp.cuh index 2e1bd6c6d7..05ebe76208 100644 --- a/cpp/include/raft/distance/detail/distance_ops/l2_exp.cuh +++ b/cpp/include/raft/distance/detail/distance_ops/l2_exp.cuh @@ -22,10 +22,11 @@ namespace raft::distance::detail::ops { template -__device__ DataT get_clamp_precision() +__device__ constexpr DataT get_clamp_precision() { switch (sizeof(DataT)) { - case 4: return 1e-5; + case 2: return 1e-3; + case 4: return 1e-4; case 8: return 1e-14; default: return 0; } @@ -46,9 +47,8 @@ struct l2_exp_cutlass_op { * Self-neighboring points should have (aNorm == bNorm) == accVal and the dot product (accVal) * can sometimes have round-off errors, which will cause (aNorm == bNorm) ~ accVal instead. */ - outVal = outVal * (raft::abs(outVal) >= get_clamp_precision() && - !(aNorm == bNorm && accVal != 0.0)); - return sqrt ? raft::sqrt(outVal) : outVal; + outVal = outVal * !((outVal < get_clamp_precision()) * (aNorm == bNorm)); + return sqrt ? raft::sqrt(outVal * (outVal > 0)) : outVal; } __device__ AccT operator()(DataT aData) const noexcept { return aData; } @@ -107,8 +107,8 @@ struct l2_exp_distance_op { * (accVal) can sometimes have round-off errors, which will cause (aNorm == bNorm) ~ accVal * instead. */ - acc[i][j] = val * (raft::abs(val) >= get_clamp_precision() && - !(regxn[i] == regyn[j] && accVal != 0.0)); + acc[i][j] = + val * (val > 0) * !((val < get_clamp_precision()) * (regxn[i] == regyn[j])); } } if (sqrt) { diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index 315e2245d8..25fdf3f0f6 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -84,8 +84,6 @@ You can see the exact versions as well in the dockerhub site: [//]: # (```) - - ## How to run the benchmarks We provide a collection of lightweight Python scripts to run the benchmarks. There are 4 general steps to running the benchmarks and visualizing the results. @@ -118,17 +116,6 @@ will be written at location `datasets/glove-100-inner/`. ### Step 2: Build and Search Index The script `raft-ann-bench.run` will build and search indices for a given dataset and its specified configuration. -To confirgure which algorithms are available, we use `algos.yaml`. -To configure building/searching indices for a dataset, look at [index configuration](#json-index-config). -An entry in `algos.yaml` looks like: -```yaml -raft_ivf_pq: - executable: RAFT_IVF_PQ_ANN_BENCH - requires_gpu: true -``` -`executable` : specifies the name of the binary that will build/search the index. It is assumed to be -available in `raft/cpp/build/`. -`requires_gpu` : denotes whether an algorithm requires GPU to run. The usage of the script `raft-ann-bench.run` is: ```bash @@ -294,8 +281,6 @@ options: Path to billion-scale dataset groundtruth file (default: None) ``` - - ### Running with Docker containers Two methods are provided for running the benchmarks with the Docker containers. @@ -410,14 +395,8 @@ The table below contains the possible settings for the `algo` field. Each unique | HNSWlib | `hnswlib` | | RAFT | `raft_brute_force`, `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq` | - - - By default, the index will be placed in `bench/ann/data//index/`. Using `sift-128-euclidean` for the dataset with the `algo` example above, the indexes would be placed in `bench/ann/data/sift-128-euclidean/index/algo_name/param1_val1-param2_val2`. - - - ## Adding a new ANN algorithm ### Implementation and Configuration @@ -490,6 +469,7 @@ How to interpret these JSON objects is totally left to the implementation and sh } ``` + ### Adding a CMake Target In `raft/cpp/bench/ann/CMakeLists.txt`, we provide a `CMake` function to configure a new Benchmark target with the following signature: ``` @@ -511,3 +491,14 @@ ConfigureAnnBench( ``` This will create an executable called `HNSWLIB_ANN_BENCH`, which can then be used to run `HNSWLIB` benchmarks. + +Add a new entry to `algos.yaml` to map the name of the algorithm to its binary executable and specify whether the algorithm requires GPU support. +```yaml +raft_ivf_pq: + executable: RAFT_IVF_PQ_ANN_BENCH + requires_gpu: true +``` + +`executable` : specifies the name of the binary that will build/search the index. It is assumed to be +available in `raft/cpp/build/`. +`requires_gpu` : denotes whether an algorithm requires GPU to run. diff --git a/python/pylibraft/pylibraft/test/test_distance.py b/python/pylibraft/pylibraft/test/test_distance.py index 34ed86db01..697a21119e 100644 --- a/python/pylibraft/pylibraft/test/test_distance.py +++ b/python/pylibraft/pylibraft/test/test_distance.py @@ -63,6 +63,8 @@ def test_distance(n_rows, n_cols, inplace, metric, order, dtype): else: expected = cdist(input1, input1, metric) + # expected[expected <= 1e-5] = 0.0 + input1_device = device_ndarray(input1) output_device = device_ndarray(output) if inplace else None @@ -77,4 +79,8 @@ def test_distance(n_rows, n_cols, inplace, metric, order, dtype): actual = output_device.copy_to_host() + # actual[actual <= 1e-5] = 0.0 + # if metric == "euclidean": + # np.fill_diagonal(actual, 0.0) + assert np.allclose(expected, actual, atol=1e-3, rtol=1e-3)