Add tests, change default param, add new preprocessing to isotonic re…

…gression
elixir-nx · Nov 7, 2023 · b3d8f0a · b3d8f0a
1 parent b231377
commit b3d8f0a
Show file tree

Hide file tree

Showing 3 changed files with 295 additions and 67 deletions.
diff --git a/lib/scholar/linear/isotonic_regression.ex b/lib/scholar/linear/isotonic_regression.ex
@@ -4,6 +4,9 @@ defmodule Scholar.Linear.IsotonicRegression do
   observations by solving a convex optimization problem. It is a form of
   regression analysis that can be used as an alternative to polynomial
   regression to fit nonlinear data.
+
+  Time complexity of isotonic regression is $O(N^2)$ where $N$ is the
+  number of points.
   """
   require Nx
   import Nx.Defn, except: [transform: 2]
@@ -38,7 +41,7 @@ defmodule Scholar.Linear.IsotonicRegression do
           y_thresholds: Nx.Tensor.t(),
           increasing: Nx.Tensor.t(),
           cutoff_index: Nx.Tensor.t(),
-          preprocess: Tuple.t() | Scholar.Interpolation.Linear.t()
+          preprocess: tuple() | Scholar.Interpolation.Linear.t()
         }
 
   opts = [
@@ -174,8 +177,6 @@ defmodule Scholar.Linear.IsotonicRegression do
           Nx.u8(0)
       end
 
-    # increasing = Nx.u8(1)
-
     fit_n(x, y, sample_weights, increasing, opts)
   end
 
@@ -206,12 +207,12 @@ defmodule Scholar.Linear.IsotonicRegression do
       iex> Scholar.Linear.IsotonicRegression.predict(model, to_predict)
       #Nx.Tensor<
         f32[10]
-        [1.0, 1.6666667461395264, 2.3333334922790527, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
+        [1.0, 1.6666667461395264, 2.3333332538604736, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
       >
   """
   defn predict(model, x) do
     check_input_shape(x)
-    # check_preprocess(model)
+    check_preprocess(model)
 
     x = Nx.flatten(x)
     x = Nx.clip(x, model.x_min, model.x_max)
@@ -261,43 +262,61 @@ defmodule Scholar.Linear.IsotonicRegression do
             ]
           ),
           x: Nx.tensor(
-            [1.0, 4.0, 7.0, 9.0, 10.0]
+            [1.0, 4.0, 7.0, 9.0, 10.0, 11.0]
           )
         }
       }
   """
-  defn preprocess(model) do
-    # cutoff = Nx.to_number(model.cutoff_index)
-    # x = model.x_thresholds[0..cutoff]
-    # y = model.y_thresholds[0..cutoff]
-
-    # {x, y} =
-    #   if trim_duplicates do
-    #     keep_mask =
-    #       Nx.logical_or(
-    #         Nx.not_equal(y[1..-2//1], y[0..-3//1]),
-    #         Nx.not_equal(y[1..-2//1], y[2..-1//1])
-    #       )
-
-    #     keep_mask = Nx.concatenate([Nx.tensor([1]), keep_mask, Nx.tensor([1])])
-
-    #     indices =
-    #       Nx.iota({Nx.axis_size(y, 0)})
-    #       |> Nx.add(1)
-    #       |> Nx.multiply(keep_mask)
-    #       |> Nx.to_flat_list()
-
-    #     indices = Enum.filter(indices, fn x -> x != 0 end) |> Nx.tensor() |> Nx.subtract(1)
-    #     x = Nx.take(x, indices)
-    #     y = Nx.take(y, indices)
-    #     {x, y}
-    #   else
-    #     {x, y}
-    #   end
-
-    # model = %__MODULE__{model | x_thresholds: x}
-    # model = %__MODULE__{model | y_thresholds: y}
+  def preprocess(model, trim_duplicates \\ true) do
+    cutoff = Nx.to_number(model.cutoff_index)
+    x = model.x_thresholds[0..cutoff]
+    y = model.y_thresholds[0..cutoff]
+
+    {x, y} =
+      if trim_duplicates do
+        keep_mask =
+          Nx.logical_or(
+            Nx.not_equal(y[1..-2//1], y[0..-3//1]),
+            Nx.not_equal(y[1..-2//1], y[2..-1//1])
+          )
+
+        keep_mask = Nx.concatenate([Nx.tensor([1]), keep_mask, Nx.tensor([1])])
+
+        indices =
+          Nx.iota({Nx.axis_size(y, 0)})
+          |> Nx.add(1)
+          |> Nx.multiply(keep_mask)
+          |> Nx.to_flat_list()
+
+        indices = Enum.filter(indices, fn x -> x != 0 end) |> Nx.tensor() |> Nx.subtract(1)
+        x = Nx.take(x, indices)
+        y = Nx.take(y, indices)
+        {x, y}
+      else
+        {x, y}
+      end
+
+    model = %__MODULE__{model | x_thresholds: x}
+    model = %__MODULE__{model | y_thresholds: y}
+
+    %__MODULE__{
+      model
+      | preprocess:
+          Scholar.Interpolation.Linear.fit(
+            model.x_thresholds,
+            model.y_thresholds
+          )
+    }
+  end
 
+  @doc """
+  Preprocesses the `model` for prediction.
+
+  Returns an updated `model`. This is a special version of `preprocess/1` that
+  does not trim duplicates so it can be used in defns. It is not recommended
+  to use this function directly.
+  """
+  defn special_preprocess(model) do
     %__MODULE__{
       model
       | preprocess:
@@ -517,7 +536,8 @@ defmodule Scholar.Linear.IsotonicRegression do
 
   defnp check_increasing(x, y) do
     x = Nx.new_axis(x, -1)
+    y = Nx.new_axis(y, -1)
     model = Scholar.Linear.LinearRegression.fit(x, y)
-    Nx.squeeze(model.coefficients[0] >= 0)
+    model.coefficients[0][0] >= 0
   end
 end
diff --git a/lib/scholar/manifold/mds.ex b/lib/scholar/manifold/mds.ex
@@ -23,7 +23,7 @@ defmodule Scholar.Manifold.MDS do
     ],
     metric: [
       type: :boolean,
-      default: false,
+      default: true,
       doc: ~S"""
       If `true`, use dissimilarities as metric distances in the embedding space.
       """
@@ -33,6 +33,7 @@ defmodule Scholar.Manifold.MDS do
       default: false,
       doc: ~S"""
       If `true`, normalize the stress by the sum of squared dissimilarities.
+      Only valid if `metric` is `false`.
       """
     ],
     eps: [
@@ -78,6 +79,7 @@ defmodule Scholar.Manifold.MDS do
     metric = if opts[:metric], do: 1, else: 0
     normalized_stress = if opts[:normalized_stress], do: 1, else: 0
     eps = opts[:eps]
+    n = Nx.axis_size(dissimilarities, 0)
 
     {{x, stress, i}, _} =
       while {{x, _stress = Nx.Constants.infinity(Nx.type(dissimilarities)), i = 0},
@@ -86,7 +88,6 @@ defmodule Scholar.Manifold.MDS do
               metric, normalized_stress, eps, stop_value = 0}},
             i < max_iter and not stop_value do
         dis = Distance.pairwise_euclidean(x)
-        n = Nx.axis_size(dissimilarities, 0)
 
         disparities =
           if metric do
@@ -96,14 +97,14 @@ defmodule Scholar.Manifold.MDS do
 
             dis_flat_indices = lower_triangle_indices(dis)
 
-            n = Nx.axis_size(dis, 0)
-
             dis_flat_w = Nx.take(dis_flat, dis_flat_indices)
 
             disparities_flat_model =
-              Scholar.Linear.IsotonicRegression.fit(similarities_flat_w, dis_flat_w)
+              Scholar.Linear.IsotonicRegression.fit(similarities_flat_w, dis_flat_w,
+                increasing: true
+              )
 
-            model = Scholar.Linear.IsotonicRegression.preprocess(disparities_flat_model)
+            model = Scholar.Linear.IsotonicRegression.special_preprocess(disparities_flat_model)
 
             disparities_flat =
               Scholar.Linear.IsotonicRegression.predict(model, similarities_flat_w)
@@ -133,7 +134,7 @@ defmodule Scholar.Manifold.MDS do
         ratio = disparities / dis
         b = -ratio
         b = Nx.put_diagonal(b, Nx.take_diagonal(b) + Nx.sum(ratio, axes: [1]))
-        x = 1.0 / n * Nx.dot(b, x)
+        x = Nx.dot(b, x) * (1.0 / n)
 
         dis = Nx.sum(Nx.sqrt(Nx.sum(x ** 2, axes: [1])))
 
@@ -209,7 +210,7 @@ defmodule Scholar.Manifold.MDS do
     {best, best_stress, best_iter}
   end
 
-  defn lower_triangle_indices(tensor) do
+  defnp lower_triangle_indices(tensor) do
     n = Nx.axis_size(tensor, 0)
 
     temp = Nx.broadcast(Nx.s64(0), {div(n * (n - 1), 2)})
@@ -249,17 +250,17 @@ defmodule Scholar.Manifold.MDS do
       %Scholar.Manifold.MDS{
         embedding: Nx.tensor(
           [
-            [0.040477119386196136, -0.4997042417526245],
-            [-0.35801631212234497, -0.09504470974206924],
-            [-0.08517580479383469, 0.35293734073638916],
-            [0.42080432176589966, 0.23617777228355408]
+            [16.3013916015625, -3.444634437561035],
+            [5.866805553436279, 1.6378790140151978],
+            [-5.487184524536133, 0.5837264657020569],
+            [-16.681013107299805, 1.2230290174484253]
           ]
         ),
         stress: Nx.tensor(
-          0.0016479993937537074
+          0.3993147909641266
         ),
         n_iter: Nx.tensor(
-          19
+          23
         )
       }
   """
@@ -288,17 +289,17 @@ defmodule Scholar.Manifold.MDS do
       %Scholar.Manifold.MDS{
         embedding: Nx.tensor(
           [
-            [0.040477119386196136, -0.4997042417526245],
-            [-0.35801631212234497, -0.09504470974206924],
-            [-0.08517580479383469, 0.35293734073638916],
-            [0.42080432176589966, 0.23617777228355408]
+            [16.3013916015625, -3.444634437561035],
+            [5.866805553436279, 1.6378790140151978],
+            [-5.487184524536133, 0.5837264657020569],
+            [-16.681013107299805, 1.2230290174484253]
           ]
         ),
         stress: Nx.tensor(
-          0.0016479993937537074
+          0.3993147909641266
         ),
         n_iter: Nx.tensor(
-          19
+          23
         )
       }
   """
@@ -333,10 +334,10 @@ defmodule Scholar.Manifold.MDS do
       %Scholar.Manifold.MDS{
         embedding: Nx.tensor(
           [
-            [0.41079193353652954, 0.41079193353652954],
-            [0.1369306445121765, 0.1369306445121765],
-            [-0.1369306445121765, -0.1369306445121765],
-            [-0.41079193353652954, -0.41079193353652954]
+            [11.858541488647461, 11.858541488647461],
+            [3.9528470039367676, 3.9528470039367676],
+            [-3.9528470039367676, -3.9528470039367676],
+            [-11.858541488647461, -11.858541488647461]
           ]
         ),
         stress: Nx.tensor(
@@ -373,14 +374,14 @@ defmodule Scholar.Manifold.MDS do
       %Scholar.Manifold.MDS{
         embedding: Nx.tensor(
           [
-            [0.3354101777076721, 0.3354101777076721, 0.3354101777076721],
-            [0.11180339753627777, 0.11180339753627777, 0.11180339753627777],
-            [-0.11180339753627777, -0.11180340498685837, -0.11180339753627777],
-            [-0.3354102075099945, -0.3354102075099945, -0.3354102075099945]
+            [9.682458877563477, 9.682458877563477, 9.682458877563477],
+            [3.2274858951568604, 3.2274858951568604, 3.2274858951568604],
+            [-3.2274863719940186, -3.2274863719940186, -3.2274863719940186],
+            [-9.682458877563477, -9.682458877563477, -9.682458877563477]
           ]
         ),
         stress: Nx.tensor(
-          2.6645352591003757e-15
+          9.094947017729282e-12
         ),
         n_iter: Nx.tensor(
           3