From d6c7a5536d0d5edf6b069c867db8ba21939b142a Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Sun, 20 Oct 2024 15:17:44 +0200
Subject: [PATCH 01/13] add KNNImputer

---
 lib/scholar/impute/knn_imputer.ex | 259 ++++++++++++++++++++++++++++++
 1 file changed, 259 insertions(+)
 create mode 100644 lib/scholar/impute/knn_imputer.ex

diff --git a/lib/scholar/impute/knn_imputer.ex b/lib/scholar/impute/knn_imputer.ex
new file mode 100644
index 00000000..6db2bc90
--- /dev/null
+++ b/lib/scholar/impute/knn_imputer.ex
@@ -0,0 +1,259 @@
+defmodule Scholar.Impute.KNNImputer do
+  @moduledoc """
+  Imputer for completing missing values using k-Nearest Neighbors.
+
+  Each sample's missing values are imputed using the mean value from
+    `n_neighbors` nearest neighbors found in the training set. Two samples are
+    close if the features that neither is missing are close.
+  """
+  import Nx.Defn
+  import Scholar.Metrics.Distance
+
+  @derive {Nx.Container, keep: [:missing_values], containers: [:statistics]}
+  defstruct [:statistics, :missing_values]
+
+  opts_schema = [
+    missing_values: [
+      type: {:or, [:float, :integer, {:in, [:nan]}]},
+      default: :nan,
+      doc: ~S"""
+      The placeholder for the missing values. All occurrences of `:missing_values` will be imputed.
+      """
+    ],
+    number_of_neighbors: [
+      type: :pos_integer,
+      default: 2,
+      doc: "The number of nearest neighbors."
+    ]
+  ]
+
+  @opts_schema NimbleOptions.new!(opts_schema)
+
+  @doc """
+  Imputer for completing missing values using k-Nearest Neighbors.
+
+  ## Options
+
+  #{NimbleOptions.docs(@opts_schema)}
+
+  ## Return Values
+
+    The function returns a struct with the following parameters:
+
+    * `:missing_values` - the same value as in `:missing_values`
+
+    * `:statistics` - The imputation fill value for each feature. Computing statistics can result in
+    [`Nx.Constant.nan/0`](https://hexdocs.pm/nx/Nx.Constants.html#nan/0) values.
+
+  ## Examples
+
+      iex> x = Nx.tensor([[40.0, 2.0],[4.0, 5.0],[7.0, :nan],[:nan, 8.0],[11.0, 11.0]])
+      iex> Scholar.Impute.KNNImputer.fit(x, number_of_neighbors: 2)
+      %Scholar.Impute.KNNImputer{
+      statistics: #Nx.Tensor<
+        f32[5][2]
+        [
+            [NaN, NaN],
+            [NaN, NaN],
+            [NaN, 8.0],
+            [7.5, NaN],
+            [NaN, NaN]
+          ]
+      >,
+      missing_values: :nan
+      }
+
+  """
+
+  deftransform fit(x, opts \\ []) do
+    opts = NimbleOptions.validate!(opts, @opts_schema)
+
+    input_rank = Nx.rank(x)
+
+    if input_rank != 2 do
+      raise ArgumentError, "Wrong input rank. Expected: 2, got: #{inspect(input_rank)}"
+    end
+
+    if opts[:missing_values] != :nan and
+       Nx.any(Nx.is_nan(x)) == Nx.tensor(1, type: :u8) do
+      raise ArgumentError,
+            ":missing_values other than :nan possible only if there is no Nx.Constant.nan() in the array"
+    end
+
+    x =
+      if opts[:missing_values] != :nan,
+         do: Nx.select(Nx.equal(x, opts[:missing_values]), Nx.Constants.nan(), x),
+         else: x
+
+    num_neighbors = opts[:number_of_neighbors]
+
+    if num_neighbors < 1 do
+      raise ArgumentError, "Number of neighbors must be greater than 0"
+    end
+
+    {rows, cols} = Nx.shape(x)
+
+    # TODO calculate all nan rows
+    row_nan_count = Nx.sum(Nx.is_nan(x), axes: [1])
+    # row with only 1 non nan value is also considered as all nan row
+    all_nan_rows =
+      Nx.select(Nx.greater_equal(row_nan_count, cols - 1), Nx.tensor(1), Nx.tensor(0))
+
+    all_nan_rows_count = Nx.sum(all_nan_rows)
+
+    if num_neighbors > rows - 1 - Nx.to_number(all_nan_rows_count) do
+      raise ArgumentError,
+            "Number of neighbors rows must be less than number valid of rows - 1 (valid row is row with more than 1 non nan value)"
+    end
+
+    statistics = knn_impute(x, num_neighbors: num_neighbors)
+    #     statistics = all_nan_rows_count
+    missing_values = opts[:missing_values]
+    %__MODULE__{statistics: statistics, missing_values: missing_values}
+  end
+
+  @doc """
+  Impute all missing values in `x` using fitted imputer.
+
+  ## Return Values
+
+  The function returns input tensor with NaN replaced with values saved in fitted imputer.
+
+  ## Examples
+
+      iex> x = Nx.tensor([[40.0, 2.0],[4.0, 5.0],[7.0, :nan],[:nan, 8.0],[11.0, 11.0]])
+      iex> imputer = Scholar.Impute.KNNImputer.fit(x, strategy: :mean)
+      iex> Scholar.Impute.KNNImputer.transform(imputer, x)
+      Nx.tensor(
+         f32[5][2]
+        [
+          [40.0, 2.0],
+          [4.0, 5.0],
+          [7.0, 8.0],
+          [7.5, 8.0],
+          [11.0, 11.0]
+        ]
+      )
+  """
+  deftransform transform(%__MODULE__{statistics: statistics, missing_values: missing_values}, x) do
+    mask = if missing_values == :nan, do: Nx.is_nan(x), else: Nx.equal(x, missing_values)
+    Nx.select(mask, statistics, x)
+  end
+
+  defn knn_impute(x, opts \\ []) do
+    mask = Nx.is_nan(x)
+    {num_rows, num_cols} = Nx.shape(x)
+    num_neighbors = opts[:num_neighbors]
+
+    placeholder_value = Nx.Constants.nan()
+    values_to_impute = Nx.broadcast(Nx.tensor(placeholder_value), x)
+
+    {_, values_to_impute} =
+      while {{row = 0, mask, num_neighbors, num_rows, x}, values_to_impute},
+            Nx.less(row, num_rows) do
+        {_, values_to_impute} =
+          while {{col = 0, mask, num_neighbors, num_cols, row, x}, values_to_impute},
+                Nx.less(col, num_cols) do
+            if mask[row][col] > 0 do
+              {rows, cols} = Nx.shape(x)
+
+              neighbor_avg =
+                calculate_knn(x, row, col, rows: rows, num_neighbors: opts[:num_neighbors])
+
+              indices =
+                [Nx.stack(row), Nx.stack(col)]
+                |> Nx.concatenate()
+                |> Nx.stack()
+
+              values_to_impute = Nx.indexed_put(values_to_impute, indices, Nx.stack(neighbor_avg))
+              {{col + 1, mask, num_neighbors, cols, row, x}, values_to_impute}
+            else
+              {{col + 1, mask, num_neighbors, num_cols, row, x}, values_to_impute}
+            end
+          end
+
+        {{row + 1, mask, num_neighbors, num_rows, x}, values_to_impute}
+      end
+
+    values_to_impute
+  end
+
+  defnp calculate_knn(x, nan_row, nan_col, opts \\ []) do
+    opts = keyword!(opts, rows: 1, num_neighbors: 2)
+    rows = opts[:rows]
+    num_neighbors = opts[:num_neighbors]
+
+    row_distances = Nx.iota({rows}, type: {:f, 32})
+
+    row_with_value_to_fill = x[nan_row]
+
+    # calculate distance between row with nan to fill and all other rows where distance
+    # to the row is under its index in the tensor
+    {_, row_distances} =
+      while {{i = 0, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances},
+            Nx.less(i, rows) do
+        potential_donor = x[i]
+
+        if i == nan_row do
+          distance = Nx.Constants.infinity({:f, 32})
+          row_distances = Nx.indexed_put(row_distances, Nx.new_axis(i, 0), distance)
+          {{i + 1, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances}
+        else
+          distance = nan_euclidian(row_with_value_to_fill, nan_col, potential_donor)
+          row_distances = Nx.indexed_put(row_distances, Nx.new_axis(i, 0), distance)
+          {{i + 1, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances}
+        end
+      end
+
+    {_, indices} = Nx.top_k(-row_distances, k: num_neighbors)
+
+    gather_indices = Nx.stack([indices, Nx.broadcast(nan_col, indices)], axis: 1)
+    values = Nx.gather(x, gather_indices)
+    Nx.sum(values) / num_neighbors
+  end
+
+  # nan_col is the column of the value to impute
+  defnp nan_euclidian(row, nan_col, potential_neighbor) do
+    {coordinates} = Nx.shape(row)
+
+    # minus nan column
+    coordinates = coordinates - 1
+
+    # inputes zeros in nan_col to calculate distance with squared_euclidean
+    new_row = Nx.indexed_put(row, Nx.new_axis(nan_col, 0), Nx.tensor(0))
+
+    # if potential neighbor has nan in nan_col, we don't want to calculate distance and the case if potential_neighbour is the row to impute
+    {potential_neighbor} =
+      if potential_neighbor[nan_col] == Nx.Constants.nan() do
+        potential_neighbor = Nx.broadcast(Nx.Constants.infinity({:f, 32}), potential_neighbor)
+        {potential_neighbor}
+      else
+        # inputes zeros in nan_col to calculate distance with squared_euclidean - distance will be 0 so no change to the distance value
+        potential_neighbor =
+          Nx.indexed_put(potential_neighbor, Nx.new_axis(nan_col, 0), Nx.tensor(0))
+
+        {potential_neighbor}
+      end
+
+    # calculates how many values are present in the row without nan_col to calculate weight for the distance
+    present_coordinates = Nx.sum(Nx.logical_not(Nx.is_nan(potential_neighbor))) - 1
+
+    # if row has all nans we skip it
+    {weight, potential_neighbor} =
+      if present_coordinates == 0 do
+        potential_neighbor = Nx.broadcast(Nx.Constants.infinity({:f, 32}), potential_neighbor)
+        weight = 0
+        {weight, potential_neighbor}
+      else
+        potential_neighbor = Nx.select(Nx.is_nan(potential_neighbor), new_row, potential_neighbor)
+        weight = coordinates / present_coordinates
+        {weight, potential_neighbor}
+      end
+
+    # calculating weighted euclidian distance
+    distance = Nx.sqrt(weight * squared_euclidean(new_row, potential_neighbor))
+
+    # return inf if potential_row is row to impute
+    Nx.select(Nx.is_nan(distance), Nx.Constants.infinity({:f, 32}), distance)
+  end
+end

From 47b4a653106272e8c6f6fdf3a2ef3d547136d913 Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Sun, 20 Oct 2024 15:21:25 +0200
Subject: [PATCH 02/13] fix doctests

---
 lib/scholar/impute/knn_imputer.ex | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/lib/scholar/impute/knn_imputer.ex b/lib/scholar/impute/knn_imputer.ex
index 6db2bc90..0ad71747 100644
--- a/lib/scholar/impute/knn_imputer.ex
+++ b/lib/scholar/impute/knn_imputer.ex
@@ -46,11 +46,10 @@ defmodule Scholar.Impute.KNNImputer do
     [`Nx.Constant.nan/0`](https://hexdocs.pm/nx/Nx.Constants.html#nan/0) values.
 
   ## Examples
-
-      iex> x = Nx.tensor([[40.0, 2.0],[4.0, 5.0],[7.0, :nan],[:nan, 8.0],[11.0, 11.0]])
-      iex> Scholar.Impute.KNNImputer.fit(x, number_of_neighbors: 2)
-      %Scholar.Impute.KNNImputer{
-      statistics: #Nx.Tensor<
+  iex> x = Nx.tensor([[40.0, 2.0],[4.0, 5.0],[7.0, :nan],[:nan, 8.0],[11.0, 11.0]])
+  iex> Scholar.Impute.KNNImputer.fit(x, number_of_neighbors: 2)
+  %Scholar.Impute.KNNImputer{
+  statistics: #Nx.Tensor<
         f32[5][2]
         [
             [NaN, NaN],
@@ -59,10 +58,9 @@ defmodule Scholar.Impute.KNNImputer do
             [7.5, NaN],
             [NaN, NaN]
           ]
-      >,
-      missing_values: :nan
-      }
-
+  >,
+  missing_values: :nan
+  }
   """
 
   deftransform fit(x, opts \\ []) do
@@ -93,7 +91,6 @@ defmodule Scholar.Impute.KNNImputer do
 
     {rows, cols} = Nx.shape(x)
 
-    # TODO calculate all nan rows
     row_nan_count = Nx.sum(Nx.is_nan(x), axes: [1])
     # row with only 1 non nan value is also considered as all nan row
     all_nan_rows =
@@ -125,7 +122,6 @@ defmodule Scholar.Impute.KNNImputer do
       iex> imputer = Scholar.Impute.KNNImputer.fit(x, strategy: :mean)
       iex> Scholar.Impute.KNNImputer.transform(imputer, x)
       Nx.tensor(
-         f32[5][2]
         [
           [40.0, 2.0],
           [4.0, 5.0],

From eb8f245f075c46b79b17c6658b32cdcf3bbd3135 Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Sun, 20 Oct 2024 15:30:13 +0200
Subject: [PATCH 03/13] mix format

---
 lib/scholar/impute/knn_imputer.ex | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/scholar/impute/knn_imputer.ex b/lib/scholar/impute/knn_imputer.ex
index 0ad71747..bcdf5d0b 100644
--- a/lib/scholar/impute/knn_imputer.ex
+++ b/lib/scholar/impute/knn_imputer.ex
@@ -73,15 +73,15 @@ defmodule Scholar.Impute.KNNImputer do
     end
 
     if opts[:missing_values] != :nan and
-       Nx.any(Nx.is_nan(x)) == Nx.tensor(1, type: :u8) do
+         Nx.any(Nx.is_nan(x)) == Nx.tensor(1, type: :u8) do
       raise ArgumentError,
             ":missing_values other than :nan possible only if there is no Nx.Constant.nan() in the array"
     end
 
     x =
       if opts[:missing_values] != :nan,
-         do: Nx.select(Nx.equal(x, opts[:missing_values]), Nx.Constants.nan(), x),
-         else: x
+        do: Nx.select(Nx.equal(x, opts[:missing_values]), Nx.Constants.nan(), x),
+        else: x
 
     num_neighbors = opts[:number_of_neighbors]
 

From 642b15e29cc6932adca4f194bb8f7b17135a4c85 Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Sun, 20 Oct 2024 15:45:48 +0200
Subject: [PATCH 04/13] change placeholder_value to tensor

---
 lib/scholar/impute/knn_imputer.ex | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/scholar/impute/knn_imputer.ex b/lib/scholar/impute/knn_imputer.ex
index bcdf5d0b..dcc716bf 100644
--- a/lib/scholar/impute/knn_imputer.ex
+++ b/lib/scholar/impute/knn_imputer.ex
@@ -103,7 +103,9 @@ defmodule Scholar.Impute.KNNImputer do
             "Number of neighbors rows must be less than number valid of rows - 1 (valid row is row with more than 1 non nan value)"
     end
 
-    statistics = knn_impute(x, num_neighbors: num_neighbors)
+    placeholder_value = Nx.Constants.nan() |> Nx.tensor()
+
+    statistics = knn_impute(x, placeholder_value, num_neighbors: num_neighbors)
     #     statistics = all_nan_rows_count
     missing_values = opts[:missing_values]
     %__MODULE__{statistics: statistics, missing_values: missing_values}
@@ -136,13 +138,12 @@ defmodule Scholar.Impute.KNNImputer do
     Nx.select(mask, statistics, x)
   end
 
-  defn knn_impute(x, opts \\ []) do
+  defnp knn_impute(x, placeholder_value, opts \\ []) do
     mask = Nx.is_nan(x)
     {num_rows, num_cols} = Nx.shape(x)
     num_neighbors = opts[:num_neighbors]
 
-    placeholder_value = Nx.Constants.nan()
-    values_to_impute = Nx.broadcast(Nx.tensor(placeholder_value), x)
+    values_to_impute = Nx.broadcast(placeholder_value, x)
 
     {_, values_to_impute} =
       while {{row = 0, mask, num_neighbors, num_rows, x}, values_to_impute},

From 520633ae8c0ab6d02266fbcbc3b1e6004f63f68d Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Sun, 20 Oct 2024 15:58:05 +0200
Subject: [PATCH 05/13] fix doctest

---
 lib/scholar/impute/knn_imputer.ex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/scholar/impute/knn_imputer.ex b/lib/scholar/impute/knn_imputer.ex
index dcc716bf..0d602dc5 100644
--- a/lib/scholar/impute/knn_imputer.ex
+++ b/lib/scholar/impute/knn_imputer.ex
@@ -121,7 +121,7 @@ defmodule Scholar.Impute.KNNImputer do
   ## Examples
 
       iex> x = Nx.tensor([[40.0, 2.0],[4.0, 5.0],[7.0, :nan],[:nan, 8.0],[11.0, 11.0]])
-      iex> imputer = Scholar.Impute.KNNImputer.fit(x, strategy: :mean)
+      iex> imputer = Scholar.Impute.KNNImputer.fit(x, number_of_neighbors: 2)
       iex> Scholar.Impute.KNNImputer.transform(imputer, x)
       Nx.tensor(
         [

From 926a1c70bb56d54dbcd3aea93aa87e071927cd0a Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Sun, 27 Oct 2024 11:20:37 +0100
Subject: [PATCH 06/13] apply suggested changes

---
 .../{knn_imputer.ex => knn_imputter.ex}       | 57 ++++++++++---------
 1 file changed, 29 insertions(+), 28 deletions(-)
 rename lib/scholar/impute/{knn_imputer.ex => knn_imputter.ex} (86%)

diff --git a/lib/scholar/impute/knn_imputer.ex b/lib/scholar/impute/knn_imputter.ex
similarity index 86%
rename from lib/scholar/impute/knn_imputer.ex
rename to lib/scholar/impute/knn_imputter.ex
index 0d602dc5..8d4cdef7 100644
--- a/lib/scholar/impute/knn_imputer.ex
+++ b/lib/scholar/impute/knn_imputter.ex
@@ -1,4 +1,4 @@
-defmodule Scholar.Impute.KNNImputer do
+defmodule Scholar.Impute.KNNImputter do
   @moduledoc """
   Imputer for completing missing values using k-Nearest Neighbors.
 
@@ -46,21 +46,22 @@ defmodule Scholar.Impute.KNNImputer do
     [`Nx.Constant.nan/0`](https://hexdocs.pm/nx/Nx.Constants.html#nan/0) values.
 
   ## Examples
-  iex> x = Nx.tensor([[40.0, 2.0],[4.0, 5.0],[7.0, :nan],[:nan, 8.0],[11.0, 11.0]])
-  iex> Scholar.Impute.KNNImputer.fit(x, number_of_neighbors: 2)
-  %Scholar.Impute.KNNImputer{
-  statistics: #Nx.Tensor<
-        f32[5][2]
-        [
-            [NaN, NaN],
-            [NaN, NaN],
-            [NaN, 8.0],
-            [7.5, NaN],
-            [NaN, NaN]
-          ]
-  >,
-  missing_values: :nan
-  }
+
+      iex> x = Nx.tensor([[40.0, 2.0],[4.0, 5.0],[7.0, :nan],[:nan, 8.0],[11.0, 11.0]])
+      iex> Scholar.Impute.KNNImputter.fit(x, number_of_neighbors: 2)
+      %Scholar.Impute.KNNImputter{
+        statistics: Nx.tensor(
+          [
+                  [NaN, NaN],
+                  [NaN, NaN],
+                  [NaN, 8.0],
+                  [7.5, NaN],
+                  [NaN, NaN]
+                ]
+        ),
+        missing_values: :nan
+      }
+
   """
 
   deftransform fit(x, opts \\ []) do
@@ -121,8 +122,8 @@ defmodule Scholar.Impute.KNNImputer do
   ## Examples
 
       iex> x = Nx.tensor([[40.0, 2.0],[4.0, 5.0],[7.0, :nan],[:nan, 8.0],[11.0, 11.0]])
-      iex> imputer = Scholar.Impute.KNNImputer.fit(x, number_of_neighbors: 2)
-      iex> Scholar.Impute.KNNImputer.transform(imputer, x)
+      iex> imputer = Scholar.Impute.KNNImputter.fit(x, number_of_neighbors: 2)
+      iex> Scholar.Impute.KNNImputter.transform(imputer, x)
       Nx.tensor(
         [
           [40.0, 2.0],
@@ -191,15 +192,15 @@ defmodule Scholar.Impute.KNNImputer do
             Nx.less(i, rows) do
         potential_donor = x[i]
 
-        if i == nan_row do
-          distance = Nx.Constants.infinity({:f, 32})
-          row_distances = Nx.indexed_put(row_distances, Nx.new_axis(i, 0), distance)
-          {{i + 1, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances}
-        else
-          distance = nan_euclidian(row_with_value_to_fill, nan_col, potential_donor)
-          row_distances = Nx.indexed_put(row_distances, Nx.new_axis(i, 0), distance)
-          {{i + 1, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances}
-        end
+        distance =
+          if i == nan_row do
+            Nx.Constants.infinity({:f, 32})
+          else
+            nan_euclidian(row_with_value_to_fill, nan_col, potential_donor)
+          end
+
+        row_distances = Nx.indexed_put(row_distances, Nx.new_axis(i, 0), distance)
+        {{i + 1, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances}
       end
 
     {_, indices} = Nx.top_k(-row_distances, k: num_neighbors)
@@ -221,7 +222,7 @@ defmodule Scholar.Impute.KNNImputer do
 
     # if potential neighbor has nan in nan_col, we don't want to calculate distance and the case if potential_neighbour is the row to impute
     {potential_neighbor} =
-      if potential_neighbor[nan_col] == Nx.Constants.nan() do
+      if Nx.is_nan(potential_neighbor[nan_col]) do
         potential_neighbor = Nx.broadcast(Nx.Constants.infinity({:f, 32}), potential_neighbor)
         {potential_neighbor}
       else

From a3e0eba690d487f54263f0572f3c6fb04b0e26a2 Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Sun, 27 Oct 2024 11:37:23 +0100
Subject: [PATCH 07/13] added type tensors

---
 lib/scholar/impute/knn_imputter.ex | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/lib/scholar/impute/knn_imputter.ex b/lib/scholar/impute/knn_imputter.ex
index 8d4cdef7..1b2a4984 100644
--- a/lib/scholar/impute/knn_imputter.ex
+++ b/lib/scholar/impute/knn_imputter.ex
@@ -194,7 +194,7 @@ defmodule Scholar.Impute.KNNImputter do
 
         distance =
           if i == nan_row do
-            Nx.Constants.infinity({:f, 32})
+            Nx.Constants.infinity(Nx.type(row_with_value_to_fill))
           else
             nan_euclidian(row_with_value_to_fill, nan_col, potential_donor)
           end
@@ -223,12 +223,18 @@ defmodule Scholar.Impute.KNNImputter do
     # if potential neighbor has nan in nan_col, we don't want to calculate distance and the case if potential_neighbour is the row to impute
     {potential_neighbor} =
       if Nx.is_nan(potential_neighbor[nan_col]) do
-        potential_neighbor = Nx.broadcast(Nx.Constants.infinity({:f, 32}), potential_neighbor)
+        potential_neighbor =
+          Nx.broadcast(Nx.Constants.infinity(Nx.type(potential_neighbor)), potential_neighbor)
+
         {potential_neighbor}
       else
         # inputes zeros in nan_col to calculate distance with squared_euclidean - distance will be 0 so no change to the distance value
         potential_neighbor =
-          Nx.indexed_put(potential_neighbor, Nx.new_axis(nan_col, 0), Nx.tensor(0))
+          Nx.indexed_put(
+            potential_neighbor,
+            Nx.new_axis(nan_col, 0),
+            Nx.tensor(0, type: Nx.type(row))
+          )
 
         {potential_neighbor}
       end
@@ -239,7 +245,9 @@ defmodule Scholar.Impute.KNNImputter do
     # if row has all nans we skip it
     {weight, potential_neighbor} =
       if present_coordinates == 0 do
-        potential_neighbor = Nx.broadcast(Nx.Constants.infinity({:f, 32}), potential_neighbor)
+        potential_neighbor =
+          Nx.broadcast(Nx.Constants.infinity(Nx.type(potential_neighbor)), potential_neighbor)
+
         weight = 0
         {weight, potential_neighbor}
       else
@@ -252,6 +260,6 @@ defmodule Scholar.Impute.KNNImputter do
     distance = Nx.sqrt(weight * squared_euclidean(new_row, potential_neighbor))
 
     # return inf if potential_row is row to impute
-    Nx.select(Nx.is_nan(distance), Nx.Constants.infinity({:f, 32}), distance)
+    Nx.select(Nx.is_nan(distance), Nx.Constants.infinity(Nx.type(distance)), distance)
   end
 end

From 108475d0d6d12b0f7232336088fafdd2c67ed9f1 Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Mon, 28 Oct 2024 14:38:47 +0100
Subject: [PATCH 08/13] added tests and remove not working checks

---
 lib/scholar/impute/knn_imputter.ex        |  44 +++-----
 test/scholar/impute/knn_imputter_test.exs | 128 ++++++++++++++++++++++
 2 files changed, 141 insertions(+), 31 deletions(-)
 create mode 100644 test/scholar/impute/knn_imputter_test.exs

diff --git a/lib/scholar/impute/knn_imputter.ex b/lib/scholar/impute/knn_imputter.ex
index 1b2a4984..4a0901ed 100644
--- a/lib/scholar/impute/knn_imputter.ex
+++ b/lib/scholar/impute/knn_imputter.ex
@@ -18,6 +18,8 @@ defmodule Scholar.Impute.KNNImputter do
       default: :nan,
       doc: ~S"""
       The placeholder for the missing values. All occurrences of `:missing_values` will be imputed.
+
+      The default value expects there are no NaNs in the input tensor.
       """
     ],
     number_of_neighbors: [
@@ -30,7 +32,12 @@ defmodule Scholar.Impute.KNNImputter do
   @opts_schema NimbleOptions.new!(opts_schema)
 
   @doc """
-  Imputer for completing missing values using k-Nearest Neighbors.
+  Imputter for completing missing values using k-Nearest Neighbors.
+
+  Preconditions:
+    * `number_of_neighbors` is a positive integer.
+    *  number of neighbors must be less than number valid of rows - 1 (valid row is row with more than 1 non nan value) otherwise it is better to use simple imputter
+    *  when you set a value different than :nan in `missing_values` there should be no NaNs in the input tensor
 
   ## Options
 
@@ -52,11 +59,11 @@ defmodule Scholar.Impute.KNNImputter do
       %Scholar.Impute.KNNImputter{
         statistics: Nx.tensor(
           [
-                  [NaN, NaN],
-                  [NaN, NaN],
-                  [NaN, 8.0],
-                  [7.5, NaN],
-                  [NaN, NaN]
+                  [:nan, :nan],
+                  [:nan, :nan],
+                  [:nan, 8.0],
+                  [7.5, :nan],
+                  [:nan, :nan]
                 ]
         ),
         missing_values: :nan
@@ -73,12 +80,6 @@ defmodule Scholar.Impute.KNNImputter do
       raise ArgumentError, "Wrong input rank. Expected: 2, got: #{inspect(input_rank)}"
     end
 
-    if opts[:missing_values] != :nan and
-         Nx.any(Nx.is_nan(x)) == Nx.tensor(1, type: :u8) do
-      raise ArgumentError,
-            ":missing_values other than :nan possible only if there is no Nx.Constant.nan() in the array"
-    end
-
     x =
       if opts[:missing_values] != :nan,
         do: Nx.select(Nx.equal(x, opts[:missing_values]), Nx.Constants.nan(), x),
@@ -86,28 +87,9 @@ defmodule Scholar.Impute.KNNImputter do
 
     num_neighbors = opts[:number_of_neighbors]
 
-    if num_neighbors < 1 do
-      raise ArgumentError, "Number of neighbors must be greater than 0"
-    end
-
-    {rows, cols} = Nx.shape(x)
-
-    row_nan_count = Nx.sum(Nx.is_nan(x), axes: [1])
-    # row with only 1 non nan value is also considered as all nan row
-    all_nan_rows =
-      Nx.select(Nx.greater_equal(row_nan_count, cols - 1), Nx.tensor(1), Nx.tensor(0))
-
-    all_nan_rows_count = Nx.sum(all_nan_rows)
-
-    if num_neighbors > rows - 1 - Nx.to_number(all_nan_rows_count) do
-      raise ArgumentError,
-            "Number of neighbors rows must be less than number valid of rows - 1 (valid row is row with more than 1 non nan value)"
-    end
-
     placeholder_value = Nx.Constants.nan() |> Nx.tensor()
 
     statistics = knn_impute(x, placeholder_value, num_neighbors: num_neighbors)
-    #     statistics = all_nan_rows_count
     missing_values = opts[:missing_values]
     %__MODULE__{statistics: statistics, missing_values: missing_values}
   end
diff --git a/test/scholar/impute/knn_imputter_test.exs b/test/scholar/impute/knn_imputter_test.exs
new file mode 100644
index 00000000..677a039f
--- /dev/null
+++ b/test/scholar/impute/knn_imputter_test.exs
@@ -0,0 +1,128 @@
+defmodule KNNImputterTest do
+  use Scholar.Case, async: true
+  alias Scholar.Impute.KNNImputter
+  doctest KNNImputter
+
+  describe "general cases" do
+    def generate_data() do
+      x = Nx.iota({5, 4})
+      x = Nx.select(Nx.equal(Nx.quotient(x, 5), 2), Nx.Constants.nan(), x)
+      Nx.indexed_put(x, Nx.tensor([[4, 2]]), Nx.tensor([6.0]))
+    end
+
+    test "general KNN imputer" do
+      x = generate_data()
+      jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)
+      jit_transform = Nx.Defn.jit(&KNNImputter.transform/2)
+
+      knn_imputer =
+        %KNNImputter{statistics: statistics, missing_values: missing_values} =
+        jit_fit.(x, missing_values: :nan, number_of_neighbors: 2)
+
+      assert missing_values == :nan
+
+      assert statistics ==
+               Nx.tensor([
+                 [:nan, :nan, :nan, :nan],
+                 [:nan, :nan, :nan, :nan],
+                 [:nan, :nan, 4.0, 5.0],
+                 [2.0, 3.0, 4.0, :nan],
+                 [:nan, :nan, :nan, :nan]
+               ])
+
+      assert jit_transform.(knn_imputer, x) ==
+               Nx.tensor([
+                 [0.0, 1.0, 2.0, 3.0],
+                 [4.0, 5.0, 6.0, 7.0],
+                 [8.0, 9.0, 4.0, 5.0],
+                 [2.0, 3.0, 4.0, 15.0],
+                 [16.0, 17.0, 6.0, 19.0]
+               ])
+    end
+
+    test "general KNN imputer with different number of neighbors" do
+      x = generate_data()
+      jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)
+      jit_transform = Nx.Defn.jit(&KNNImputter.transform/2)
+
+      knn_imputter =
+        %KNNImputter{statistics: statistics, missing_values: missing_values} =
+        jit_fit.(x, missing_values: :nan, number_of_neighbors: 1)
+
+      assert missing_values == :nan
+
+      assert statistics ==
+               Nx.tensor([
+                 [:nan, :nan, :nan, :nan],
+                 [:nan, :nan, :nan, :nan],
+                 [:nan, :nan, 2.0, 3.0],
+                 [0.0, 1.0, 2.0, :nan],
+                 [:nan, :nan, :nan, :nan]
+               ])
+
+      assert jit_transform.(knn_imputter, x) ==
+               Nx.tensor([
+                 [0.0, 1.0, 2.0, 3.0],
+                 [4.0, 5.0, 6.0, 7.0],
+                 [8.0, 9.0, 2.0, 3.0],
+                 [0.0, 1.0, 2.0, 15.0],
+                 [16.0, 17.0, 6.0, 19.0]
+               ])
+    end
+
+    test "missing values different than :nan" do
+      x = generate_data()
+      x = Nx.select(Nx.is_nan(x), Nx.tensor(19.0), x)
+      jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)
+      jit_transform = Nx.Defn.jit(&KNNImputter.transform/2)
+
+      knn_imputter =
+        %KNNImputter{statistics: statistics, missing_values: missing_values} =
+        jit_fit.(x, missing_values: 19.0, number_of_neighbors: 2)
+
+      assert missing_values == 19.0
+
+      assert statistics ==
+               Nx.tensor([
+                 [:nan, :nan, :nan, :nan],
+                 [:nan, :nan, :nan, :nan],
+                 [:nan, :nan, 4.0, 5.0],
+                 [2.0, 3.0, 4.0, :nan],
+                 [:nan, :nan, :nan, 5.0]
+               ])
+
+      assert jit_transform.(knn_imputter, x) ==
+               Nx.tensor([
+                 [0.0, 1.0, 2.0, 3.0],
+                 [4.0, 5.0, 6.0, 7.0],
+                 [8.0, 9.0, 4.0, 5.0],
+                 [2.0, 3.0, 4.0, 15.0],
+                 [16.0, 17.0, 6.0, 5.0]
+               ])
+    end
+  end
+
+  describe "errors" do
+    test "Wrong impute rank" do
+      x = Nx.tensor([1, 2, 2, 3])
+
+      assert_raise ArgumentError,
+                   "Wrong input rank. Expected: 2, got: 1",
+                   fn ->
+                     KNNImputter.fit(x, missing_values: 1, number_of_neighbors: 2)
+                   end
+    end
+
+    test "Invalid n_neighbors value" do
+      x = generate_data()
+
+      jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)
+
+      assert_raise NimbleOptions.ValidationError,
+                   "invalid value for :number_of_neighbors option: expected positive integer, got: -1",
+                   fn ->
+                     jit_fit.(x, missing_values: 1.0, number_of_neighbors: -1)
+                   end
+    end
+  end
+end

From 1a3aae7adc331772205a10431e2bf78801726ca5 Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Wed, 30 Oct 2024 11:01:48 +0100
Subject: [PATCH 09/13] change errors and refactor

---
 lib/scholar/impute/knn_imputter.ex        | 27 ++++++++++++-----------
 test/scholar/impute/knn_imputter_test.exs |  9 ++++----
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/lib/scholar/impute/knn_imputter.ex b/lib/scholar/impute/knn_imputter.ex
index 4a0901ed..b9d220f8 100644
--- a/lib/scholar/impute/knn_imputter.ex
+++ b/lib/scholar/impute/knn_imputter.ex
@@ -3,8 +3,8 @@ defmodule Scholar.Impute.KNNImputter do
   Imputer for completing missing values using k-Nearest Neighbors.
 
   Each sample's missing values are imputed using the mean value from
-    `n_neighbors` nearest neighbors found in the training set. Two samples are
-    close if the features that neither is missing are close.
+  `n_neighbors` nearest neighbors found in the training set. Two samples are
+  close if the features that neither is missing are close.
   """
   import Nx.Defn
   import Scholar.Metrics.Distance
@@ -36,7 +36,8 @@ defmodule Scholar.Impute.KNNImputter do
 
   Preconditions:
     * `number_of_neighbors` is a positive integer.
-    *  number of neighbors must be less than number valid of rows - 1 (valid row is row with more than 1 non nan value) otherwise it is better to use simple imputter
+    *  number of neighbors must be less than number valid of rows - 1
+  (valid row is row with more than 1 non nan value) otherwise it is better to use simple imputter
     *  when you set a value different than :nan in `missing_values` there should be no NaNs in the input tensor
 
   ## Options
@@ -77,20 +78,18 @@ defmodule Scholar.Impute.KNNImputter do
     input_rank = Nx.rank(x)
 
     if input_rank != 2 do
-      raise ArgumentError, "Wrong input rank. Expected: 2, got: #{inspect(input_rank)}"
+      raise ArgumentError, "wrong input rank. Expected: 2, got: #{inspect(input_rank)}"
     end
 
-    x =
-      if opts[:missing_values] != :nan,
-        do: Nx.select(Nx.equal(x, opts[:missing_values]), Nx.Constants.nan(), x),
-        else: x
+    missing_values = opts[:missing_values]
 
-    num_neighbors = opts[:number_of_neighbors]
+    x =
+      if missing_values != :nan,
+         do: Nx.select(Nx.equal(x, missing_values), :nan, x),
+         else: x
 
-    placeholder_value = Nx.Constants.nan() |> Nx.tensor()
 
-    statistics = knn_impute(x, placeholder_value, num_neighbors: num_neighbors)
-    missing_values = opts[:missing_values]
+    statistics = knn_impute(x, num_neighbors: opts[:number_of_neighbors], missing_values: missing_values)
     %__MODULE__{statistics: statistics, missing_values: missing_values}
   end
 
@@ -121,11 +120,13 @@ defmodule Scholar.Impute.KNNImputter do
     Nx.select(mask, statistics, x)
   end
 
-  defnp knn_impute(x, placeholder_value, opts \\ []) do
+  defnp knn_impute(x, opts \\ []) do
     mask = Nx.is_nan(x)
     {num_rows, num_cols} = Nx.shape(x)
     num_neighbors = opts[:num_neighbors]
 
+    placeholder_value = Nx.tensor(:nan)
+
     values_to_impute = Nx.broadcast(placeholder_value, x)
 
     {_, values_to_impute} =
diff --git a/test/scholar/impute/knn_imputter_test.exs b/test/scholar/impute/knn_imputter_test.exs
index 677a039f..87b297cb 100644
--- a/test/scholar/impute/knn_imputter_test.exs
+++ b/test/scholar/impute/knn_imputter_test.exs
@@ -72,7 +72,8 @@ defmodule KNNImputterTest do
 
     test "missing values different than :nan" do
       x = generate_data()
-      x = Nx.select(Nx.is_nan(x), Nx.tensor(19.0), x)
+      x = Nx.select(Nx.is_nan(x), 19.0, x)
+#      x = Nx.select(Nx.equal(x,19), :nan, x)
       jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)
       jit_transform = Nx.Defn.jit(&KNNImputter.transform/2)
 
@@ -103,17 +104,17 @@ defmodule KNNImputterTest do
   end
 
   describe "errors" do
-    test "Wrong impute rank" do
+    test "invalid impute rank" do
       x = Nx.tensor([1, 2, 2, 3])
 
       assert_raise ArgumentError,
-                   "Wrong input rank. Expected: 2, got: 1",
+                   "wrong input rank. Expected: 2, got: 1",
                    fn ->
                      KNNImputter.fit(x, missing_values: 1, number_of_neighbors: 2)
                    end
     end
 
-    test "Invalid n_neighbors value" do
+    test "invalid n_neighbors value" do
       x = generate_data()
 
       jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)

From 366584ea96a10d77bc7467723d12936dbaa0b8fd Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Wed, 27 Nov 2024 13:26:29 +0100
Subject: [PATCH 10/13] apply suggestions

---
 lib/scholar/impute/knn_imputter.ex        | 38 +++++++++++------------
 test/scholar/impute/knn_imputter_test.exs | 12 +++----
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/lib/scholar/impute/knn_imputter.ex b/lib/scholar/impute/knn_imputter.ex
index b9d220f8..4b0aa019 100644
--- a/lib/scholar/impute/knn_imputter.ex
+++ b/lib/scholar/impute/knn_imputter.ex
@@ -22,7 +22,7 @@ defmodule Scholar.Impute.KNNImputter do
       The default value expects there are no NaNs in the input tensor.
       """
     ],
-    number_of_neighbors: [
+    num_neighbors: [
       type: :pos_integer,
       default: 2,
       doc: "The number of nearest neighbors."
@@ -35,7 +35,7 @@ defmodule Scholar.Impute.KNNImputter do
   Imputter for completing missing values using k-Nearest Neighbors.
 
   Preconditions:
-    * `number_of_neighbors` is a positive integer.
+    * `num_neighbors` is a positive integer.
     *  number of neighbors must be less than number valid of rows - 1
   (valid row is row with more than 1 non nan value) otherwise it is better to use simple imputter
     *  when you set a value different than :nan in `missing_values` there should be no NaNs in the input tensor
@@ -50,13 +50,12 @@ defmodule Scholar.Impute.KNNImputter do
 
     * `:missing_values` - the same value as in `:missing_values`
 
-    * `:statistics` - The imputation fill value for each feature. Computing statistics can result in
-    [`Nx.Constant.nan/0`](https://hexdocs.pm/nx/Nx.Constants.html#nan/0) values.
+    * `:statistics` - The imputation fill value for each feature. Computing statistics can result in values.
 
   ## Examples
 
       iex> x = Nx.tensor([[40.0, 2.0],[4.0, 5.0],[7.0, :nan],[:nan, 8.0],[11.0, 11.0]])
-      iex> Scholar.Impute.KNNImputter.fit(x, number_of_neighbors: 2)
+      iex> Scholar.Impute.KNNImputter.fit(x, num_neighbors: 2)
       %Scholar.Impute.KNNImputter{
         statistics: Nx.tensor(
           [
@@ -89,7 +88,7 @@ defmodule Scholar.Impute.KNNImputter do
          else: x
 
 
-    statistics = knn_impute(x, num_neighbors: opts[:number_of_neighbors], missing_values: missing_values)
+    statistics = knn_impute(x, num_neighbors: opts[:num_neighbors], missing_values: missing_values)
     %__MODULE__{statistics: statistics, missing_values: missing_values}
   end
 
@@ -103,7 +102,7 @@ defmodule Scholar.Impute.KNNImputter do
   ## Examples
 
       iex> x = Nx.tensor([[40.0, 2.0],[4.0, 5.0],[7.0, :nan],[:nan, 8.0],[11.0, 11.0]])
-      iex> imputer = Scholar.Impute.KNNImputter.fit(x, number_of_neighbors: 2)
+      iex> imputer = Scholar.Impute.KNNImputter.fit(x, num_neighbors: 2)
       iex> Scholar.Impute.KNNImputter.transform(imputer, x)
       Nx.tensor(
         [
@@ -131,22 +130,17 @@ defmodule Scholar.Impute.KNNImputter do
 
     {_, values_to_impute} =
       while {{row = 0, mask, num_neighbors, num_rows, x}, values_to_impute},
-            Nx.less(row, num_rows) do
+            row < num_rows do
         {_, values_to_impute} =
           while {{col = 0, mask, num_neighbors, num_cols, row, x}, values_to_impute},
-                Nx.less(col, num_cols) do
-            if mask[row][col] > 0 do
+                col < num_cols do
+            if mask[row][col] do
               {rows, cols} = Nx.shape(x)
 
               neighbor_avg =
                 calculate_knn(x, row, col, rows: rows, num_neighbors: opts[:num_neighbors])
 
-              indices =
-                [Nx.stack(row), Nx.stack(col)]
-                |> Nx.concatenate()
-                |> Nx.stack()
-
-              values_to_impute = Nx.indexed_put(values_to_impute, indices, Nx.stack(neighbor_avg))
+              values_to_impute = Nx.put_slice(values_to_impute, [row, col], Nx.reshape(neighbor_avg, {1, 1}))
               {{col + 1, mask, num_neighbors, cols, row, x}, values_to_impute}
             else
               {{col + 1, mask, num_neighbors, num_cols, row, x}, values_to_impute}
@@ -172,19 +166,25 @@ defmodule Scholar.Impute.KNNImputter do
     # to the row is under its index in the tensor
     {_, row_distances} =
       while {{i = 0, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances},
-            Nx.less(i, rows) do
+            i < rows do
+#        potential_donors = Nx.vectorize(x, :rows)
+#        distances = nan_euclidean(row_with_value_to_fill, nan_col, potential_donors) |> Nx.devectorize()
+#        row_distances = Nx.indexed_put(distances, Nx.new_axis(i, 0), Nx.Constants.infinity())
         potential_donor = x[i]
 
         distance =
           if i == nan_row do
             Nx.Constants.infinity(Nx.type(row_with_value_to_fill))
           else
-            nan_euclidian(row_with_value_to_fill, nan_col, potential_donor)
+            nan_euclidean(row_with_value_to_fill, nan_col, potential_donor)
           end
 
         row_distances = Nx.indexed_put(row_distances, Nx.new_axis(i, 0), distance)
         {{i + 1, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances}
       end
+#    potential_donors = Nx.vectorize(x, :rows)
+#    distances = nan_euclidean(row_with_value_to_fill, nan_col, potential_donors) |> Nx.devectorize()
+#    row_distances = Nx.indexed_put(distances, [i], Nx.Constants.infinity())
 
     {_, indices} = Nx.top_k(-row_distances, k: num_neighbors)
 
@@ -194,7 +194,7 @@ defmodule Scholar.Impute.KNNImputter do
   end
 
   # nan_col is the column of the value to impute
-  defnp nan_euclidian(row, nan_col, potential_neighbor) do
+  defnp nan_euclidean(row, nan_col, potential_neighbor) do
     {coordinates} = Nx.shape(row)
 
     # minus nan column
diff --git a/test/scholar/impute/knn_imputter_test.exs b/test/scholar/impute/knn_imputter_test.exs
index 87b297cb..c76becb4 100644
--- a/test/scholar/impute/knn_imputter_test.exs
+++ b/test/scholar/impute/knn_imputter_test.exs
@@ -17,7 +17,7 @@ defmodule KNNImputterTest do
 
       knn_imputer =
         %KNNImputter{statistics: statistics, missing_values: missing_values} =
-        jit_fit.(x, missing_values: :nan, number_of_neighbors: 2)
+        jit_fit.(x, missing_values: :nan, num_neighbors: 2)
 
       assert missing_values == :nan
 
@@ -47,7 +47,7 @@ defmodule KNNImputterTest do
 
       knn_imputter =
         %KNNImputter{statistics: statistics, missing_values: missing_values} =
-        jit_fit.(x, missing_values: :nan, number_of_neighbors: 1)
+        jit_fit.(x, missing_values: :nan, num_neighbors: 1)
 
       assert missing_values == :nan
 
@@ -79,7 +79,7 @@ defmodule KNNImputterTest do
 
       knn_imputter =
         %KNNImputter{statistics: statistics, missing_values: missing_values} =
-        jit_fit.(x, missing_values: 19.0, number_of_neighbors: 2)
+        jit_fit.(x, missing_values: 19.0, num_neighbors: 2)
 
       assert missing_values == 19.0
 
@@ -110,7 +110,7 @@ defmodule KNNImputterTest do
       assert_raise ArgumentError,
                    "wrong input rank. Expected: 2, got: 1",
                    fn ->
-                     KNNImputter.fit(x, missing_values: 1, number_of_neighbors: 2)
+                     KNNImputter.fit(x, missing_values: 1, num_neighbors: 2)
                    end
     end
 
@@ -120,9 +120,9 @@ defmodule KNNImputterTest do
       jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)
 
       assert_raise NimbleOptions.ValidationError,
-                   "invalid value for :number_of_neighbors option: expected positive integer, got: -1",
+                   "invalid value for :num_neighbors option: expected positive integer, got: -1",
                    fn ->
-                     jit_fit.(x, missing_values: 1.0, number_of_neighbors: -1)
+                     jit_fit.(x, missing_values: 1.0, num_neighbors: -1)
                    end
     end
   end

From f4b6c3987f32286e77fd97fe83924fb8895bb2a9 Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Wed, 27 Nov 2024 14:59:58 +0100
Subject: [PATCH 11/13] apply suggestions

---
 lib/scholar/impute/knn_imputter.ex | 51 ++++++++++++++----------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/lib/scholar/impute/knn_imputter.ex b/lib/scholar/impute/knn_imputter.ex
index 4b0aa019..d62bfaf2 100644
--- a/lib/scholar/impute/knn_imputter.ex
+++ b/lib/scholar/impute/knn_imputter.ex
@@ -14,7 +14,7 @@ defmodule Scholar.Impute.KNNImputter do
 
   opts_schema = [
     missing_values: [
-      type: {:or, [:float, :integer, {:in, [:nan]}]},
+      type: {:or, [:float, :integer, {:in, [:infinity, :neg_infinity, :nan]}]},
       default: :nan,
       doc: ~S"""
       The placeholder for the missing values. All occurrences of `:missing_values` will be imputed.
@@ -35,10 +35,9 @@ defmodule Scholar.Impute.KNNImputter do
   Imputter for completing missing values using k-Nearest Neighbors.
 
   Preconditions:
-    * `num_neighbors` is a positive integer.
-    *  number of neighbors must be less than number valid of rows - 1
-  (valid row is row with more than 1 non nan value) otherwise it is better to use simple imputter
-    *  when you set a value different than :nan in `missing_values` there should be no NaNs in the input tensor
+    *  The number of neighbors must be less than the number of valid rows - 1.
+    *  A valid row is a row with more than 1 non-NaN values. Otherwise it is better to use a simpler imputter.
+    *  When you set a value different than :nan in `missing_values` there should be no NaNs in the input tensor
 
   ## Options
 
@@ -48,7 +47,7 @@ defmodule Scholar.Impute.KNNImputter do
 
     The function returns a struct with the following parameters:
 
-    * `:missing_values` - the same value as in `:missing_values`
+    * `:missing_values` - the same value as in the `:missing_values` option
 
     * `:statistics` - The imputation fill value for each feature. Computing statistics can result in values.
 
@@ -164,27 +163,25 @@ defmodule Scholar.Impute.KNNImputter do
 
     # calculate distance between row with nan to fill and all other rows where distance
     # to the row is under its index in the tensor
-    {_, row_distances} =
-      while {{i = 0, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances},
-            i < rows do
-#        potential_donors = Nx.vectorize(x, :rows)
-#        distances = nan_euclidean(row_with_value_to_fill, nan_col, potential_donors) |> Nx.devectorize()
-#        row_distances = Nx.indexed_put(distances, Nx.new_axis(i, 0), Nx.Constants.infinity())
-        potential_donor = x[i]
-
-        distance =
-          if i == nan_row do
-            Nx.Constants.infinity(Nx.type(row_with_value_to_fill))
-          else
-            nan_euclidean(row_with_value_to_fill, nan_col, potential_donor)
-          end
-
-        row_distances = Nx.indexed_put(row_distances, Nx.new_axis(i, 0), distance)
-        {{i + 1, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances}
-      end
-#    potential_donors = Nx.vectorize(x, :rows)
-#    distances = nan_euclidean(row_with_value_to_fill, nan_col, potential_donors) |> Nx.devectorize()
-#    row_distances = Nx.indexed_put(distances, [i], Nx.Constants.infinity())
+#    {_, row_distances} =
+#      while {{i = 0, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances},
+#            i < rows do
+#
+#        potential_donor = x[i]
+#
+#        distance =
+#          if i == nan_row do
+#            Nx.Constants.infinity(Nx.type(row_with_value_to_fill))
+#          else
+#            nan_euclidean(row_with_value_to_fill, nan_col, potential_donor)
+#          end
+#
+#        row_distances = Nx.indexed_put(row_distances, Nx.new_axis(i, 0), distance)
+#        {{i + 1, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances}
+#      end
+    potential_donors = Nx.vectorize(x, :rows)
+    distances = nan_euclidean(row_with_value_to_fill, nan_col, potential_donors) |> Nx.devectorize()
+    row_distances = Nx.indexed_put(distances, Nx.tensor(nan_row), Nx.Constants.infinity())
 
     {_, indices} = Nx.top_k(-row_distances, k: num_neighbors)
 

From e23a9dd6c524c8397d77093159360b25af7f6bc9 Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Thu, 28 Nov 2024 12:45:51 +0100
Subject: [PATCH 12/13] apply suggested changes

---
 lib/scholar/impute/knn_imputter.ex | 38 +++++++++++++++---------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/lib/scholar/impute/knn_imputter.ex b/lib/scholar/impute/knn_imputter.ex
index d62bfaf2..3107eca2 100644
--- a/lib/scholar/impute/knn_imputter.ex
+++ b/lib/scholar/impute/knn_imputter.ex
@@ -163,25 +163,18 @@ defmodule Scholar.Impute.KNNImputter do
 
     # calculate distance between row with nan to fill and all other rows where distance
     # to the row is under its index in the tensor
-#    {_, row_distances} =
-#      while {{i = 0, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances},
-#            i < rows do
-#
-#        potential_donor = x[i]
-#
-#        distance =
-#          if i == nan_row do
-#            Nx.Constants.infinity(Nx.type(row_with_value_to_fill))
-#          else
-#            nan_euclidean(row_with_value_to_fill, nan_col, potential_donor)
-#          end
-#
-#        row_distances = Nx.indexed_put(row_distances, Nx.new_axis(i, 0), distance)
-#        {{i + 1, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances}
-#      end
-    potential_donors = Nx.vectorize(x, :rows)
-    distances = nan_euclidean(row_with_value_to_fill, nan_col, potential_donors) |> Nx.devectorize()
-    row_distances = Nx.indexed_put(distances, Nx.tensor(nan_row), Nx.Constants.infinity())
+    {_, row_distances} =
+      while {{i = 0, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances},
+            i < rows do
+
+        potential_donor = x[i]
+
+        distance =
+          calculate_distance(row_with_value_to_fill, nan_col, potential_donor,nan_row)
+
+        row_distances = Nx.indexed_put(row_distances, Nx.new_axis(i, 0), distance)
+        {{i + 1, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances}
+      end
 
     {_, indices} = Nx.top_k(-row_distances, k: num_neighbors)
 
@@ -190,6 +183,13 @@ defmodule Scholar.Impute.KNNImputter do
     Nx.sum(values) / num_neighbors
   end
 
+  defnp calculate_distance(row,nan_col,potential_donor,nan_row) do
+    case row do
+      ^nan_row -> Nx.Constants.infinity(Nx.type(row))
+      _ -> nan_euclidean(row, nan_col, potential_donor)
+    end
+  end
+
   # nan_col is the column of the value to impute
   defnp nan_euclidean(row, nan_col, potential_neighbor) do
     {coordinates} = Nx.shape(row)

From d5913eb4d4b76d38a7ccb51aa1c423f6a8512966 Mon Sep 17 00:00:00 2001
From: srzeszut <szczepan@fedora.com>
Date: Thu, 28 Nov 2024 12:55:25 +0100
Subject: [PATCH 13/13] mix format

---
 lib/scholar/impute/knn_imputter.ex        | 16 +++++++++-------
 test/scholar/impute/knn_imputter_test.exs |  2 +-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/lib/scholar/impute/knn_imputter.ex b/lib/scholar/impute/knn_imputter.ex
index 3107eca2..6b5a602e 100644
--- a/lib/scholar/impute/knn_imputter.ex
+++ b/lib/scholar/impute/knn_imputter.ex
@@ -83,11 +83,12 @@ defmodule Scholar.Impute.KNNImputter do
 
     x =
       if missing_values != :nan,
-         do: Nx.select(Nx.equal(x, missing_values), :nan, x),
-         else: x
+        do: Nx.select(Nx.equal(x, missing_values), :nan, x),
+        else: x
 
+    statistics =
+      knn_impute(x, num_neighbors: opts[:num_neighbors], missing_values: missing_values)
 
-    statistics = knn_impute(x, num_neighbors: opts[:num_neighbors], missing_values: missing_values)
     %__MODULE__{statistics: statistics, missing_values: missing_values}
   end
 
@@ -139,7 +140,9 @@ defmodule Scholar.Impute.KNNImputter do
               neighbor_avg =
                 calculate_knn(x, row, col, rows: rows, num_neighbors: opts[:num_neighbors])
 
-              values_to_impute = Nx.put_slice(values_to_impute, [row, col], Nx.reshape(neighbor_avg, {1, 1}))
+              values_to_impute =
+                Nx.put_slice(values_to_impute, [row, col], Nx.reshape(neighbor_avg, {1, 1}))
+
               {{col + 1, mask, num_neighbors, cols, row, x}, values_to_impute}
             else
               {{col + 1, mask, num_neighbors, num_cols, row, x}, values_to_impute}
@@ -166,11 +169,10 @@ defmodule Scholar.Impute.KNNImputter do
     {_, row_distances} =
       while {{i = 0, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances},
             i < rows do
-
         potential_donor = x[i]
 
         distance =
-          calculate_distance(row_with_value_to_fill, nan_col, potential_donor,nan_row)
+          calculate_distance(row_with_value_to_fill, nan_col, potential_donor, nan_row)
 
         row_distances = Nx.indexed_put(row_distances, Nx.new_axis(i, 0), distance)
         {{i + 1, x, row_with_value_to_fill, rows, nan_row, nan_col}, row_distances}
@@ -183,7 +185,7 @@ defmodule Scholar.Impute.KNNImputter do
     Nx.sum(values) / num_neighbors
   end
 
-  defnp calculate_distance(row,nan_col,potential_donor,nan_row) do
+  defnp calculate_distance(row, nan_col, potential_donor, nan_row) do
     case row do
       ^nan_row -> Nx.Constants.infinity(Nx.type(row))
       _ -> nan_euclidean(row, nan_col, potential_donor)
diff --git a/test/scholar/impute/knn_imputter_test.exs b/test/scholar/impute/knn_imputter_test.exs
index c76becb4..8937e7c9 100644
--- a/test/scholar/impute/knn_imputter_test.exs
+++ b/test/scholar/impute/knn_imputter_test.exs
@@ -73,7 +73,7 @@ defmodule KNNImputterTest do
     test "missing values different than :nan" do
       x = generate_data()
       x = Nx.select(Nx.is_nan(x), 19.0, x)
-#      x = Nx.select(Nx.equal(x,19), :nan, x)
+      #      x = Nx.select(Nx.equal(x,19), :nan, x)
       jit_fit = Nx.Defn.jit(&KNNImputter.fit/2)
       jit_transform = Nx.Defn.jit(&KNNImputter.transform/2)