diff --git a/cpp/include/raft/distance/detail/distance_ops/l2_exp.cuh b/cpp/include/raft/distance/detail/distance_ops/l2_exp.cuh
index 2e1bd6c6d7..05ebe76208 100644
--- a/cpp/include/raft/distance/detail/distance_ops/l2_exp.cuh
+++ b/cpp/include/raft/distance/detail/distance_ops/l2_exp.cuh
@@ -22,10 +22,11 @@
 namespace raft::distance::detail::ops {
 
 template <typename DataT>
-__device__ DataT get_clamp_precision()
+__device__ constexpr DataT get_clamp_precision()
 {
   switch (sizeof(DataT)) {
-    case 4: return 1e-5;
+    case 2: return 1e-3;
+    case 4: return 1e-4;
     case 8: return 1e-14;
     default: return 0;
   }
@@ -46,9 +47,8 @@ struct l2_exp_cutlass_op {
      * Self-neighboring points should have (aNorm == bNorm) == accVal and the dot product (accVal)
      * can sometimes have round-off errors, which will cause (aNorm == bNorm) ~ accVal instead.
      */
-    outVal = outVal * (raft::abs(outVal) >= get_clamp_precision<DataT>() &&
-                       !(aNorm == bNorm && accVal != 0.0));
-    return sqrt ? raft::sqrt(outVal) : outVal;
+    outVal = outVal * !((outVal < get_clamp_precision<DataT>()) * (aNorm == bNorm));
+    return sqrt ? raft::sqrt(outVal * (outVal > 0)) : outVal;
   }
 
   __device__ AccT operator()(DataT aData) const noexcept { return aData; }
@@ -107,8 +107,8 @@ struct l2_exp_distance_op {
          * (accVal) can sometimes have round-off errors, which will cause (aNorm == bNorm) ~ accVal
          * instead.
          */
-        acc[i][j] = val * (raft::abs(val) >= get_clamp_precision<DataT>() &&
-                           !(regxn[i] == regyn[j] && accVal != 0.0));
+        acc[i][j] =
+          val * (val > 0) * !((val < get_clamp_precision<DataT>()) * (regxn[i] == regyn[j]));
       }
     }
     if (sqrt) {
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index 315e2245d8..25fdf3f0f6 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -84,8 +84,6 @@ You can see the exact versions as well in the dockerhub site:
 
 [//]: # (```)
 
-
-
 ## How to run the benchmarks
 
 We provide a collection of lightweight Python scripts to run the benchmarks. There are 4 general steps to running the benchmarks and visualizing the results. 
@@ -118,17 +116,6 @@ will be written at location `datasets/glove-100-inner/`.
 ### Step 2: Build and Search Index
 The script `raft-ann-bench.run` will build and search indices for a given dataset and its
 specified configuration.
-To confirgure which algorithms are available, we use `algos.yaml`.
-To configure building/searching indices for a dataset, look at [index configuration](#json-index-config).
-An entry in `algos.yaml` looks like:
-```yaml
-raft_ivf_pq:
-  executable: RAFT_IVF_PQ_ANN_BENCH
-  requires_gpu: true
-```
-`executable` : specifies the name of the binary that will build/search the index. It is assumed to be
-available in `raft/cpp/build/`.
-`requires_gpu` : denotes whether an algorithm requires GPU to run.
 
 The usage of the script `raft-ann-bench.run` is:
 ```bash
@@ -294,8 +281,6 @@ options:
                         Path to billion-scale dataset groundtruth file (default: None)
 ```
 
-
-
 ### Running with Docker containers
 
 Two methods are provided for running the benchmarks with the Docker containers. 
@@ -410,14 +395,8 @@ The table below contains the possible settings for the `algo` field. Each unique
 | HNSWlib   | `hnswlib`                                                        |
 | RAFT      | `raft_brute_force`, `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq` |
 
-
-
-
 By default, the index will be placed in `bench/ann/data/<dataset_name>/index/<name>`. Using `sift-128-euclidean` for the dataset with the `algo` example above, the indexes would be placed in `bench/ann/data/sift-128-euclidean/index/algo_name/param1_val1-param2_val2`.
 
-
-
-
 ## Adding a new ANN algorithm
 
 ### Implementation and Configuration
@@ -490,6 +469,7 @@ How to interpret these JSON objects is totally left to the implementation and sh
       }
     ```
 
+
 ### Adding a CMake Target
 In `raft/cpp/bench/ann/CMakeLists.txt`, we provide a `CMake` function to configure a new Benchmark target with the following signature:
 ```
@@ -511,3 +491,14 @@ ConfigureAnnBench(
 ```
 
 This will create an executable called `HNSWLIB_ANN_BENCH`, which can then be used to run `HNSWLIB` benchmarks.
+
+Add a new entry to `algos.yaml` to map the name of the algorithm to its binary executable and specify whether the algorithm requires GPU support.
+```yaml
+raft_ivf_pq:
+  executable: RAFT_IVF_PQ_ANN_BENCH
+  requires_gpu: true
+```
+
+`executable` : specifies the name of the binary that will build/search the index. It is assumed to be
+available in `raft/cpp/build/`.
+`requires_gpu` : denotes whether an algorithm requires GPU to run.
diff --git a/python/pylibraft/pylibraft/test/test_distance.py b/python/pylibraft/pylibraft/test/test_distance.py
index 34ed86db01..697a21119e 100644
--- a/python/pylibraft/pylibraft/test/test_distance.py
+++ b/python/pylibraft/pylibraft/test/test_distance.py
@@ -63,6 +63,8 @@ def test_distance(n_rows, n_cols, inplace, metric, order, dtype):
     else:
         expected = cdist(input1, input1, metric)
 
+    # expected[expected <= 1e-5] = 0.0
+
     input1_device = device_ndarray(input1)
     output_device = device_ndarray(output) if inplace else None
 
@@ -77,4 +79,8 @@ def test_distance(n_rows, n_cols, inplace, metric, order, dtype):
 
     actual = output_device.copy_to_host()
 
+    # actual[actual <= 1e-5] = 0.0
+    # if metric == "euclidean":
+    #     np.fill_diagonal(actual, 0.0)
+
     assert np.allclose(expected, actual, atol=1e-3, rtol=1e-3)