From d769bf543cf24e0e5073451880eba9297a304821 Mon Sep 17 00:00:00 2001 From: norm4nn Date: Mon, 28 Oct 2024 18:22:14 +0100 Subject: [PATCH 1/9] pls_svd module created --- lib/scholar/cross_decomposition/pls_svd.ex | 117 +++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 lib/scholar/cross_decomposition/pls_svd.ex diff --git a/lib/scholar/cross_decomposition/pls_svd.ex b/lib/scholar/cross_decomposition/pls_svd.ex new file mode 100644 index 00000000..81fc79e2 --- /dev/null +++ b/lib/scholar/cross_decomposition/pls_svd.ex @@ -0,0 +1,117 @@ +defmodule Scholar.CrossDecomposition.PLSSVD do + @moduledoc """ + + """ + import Nx.Defn + import Scholar.Shared + + opts_schema = [ + num_components: [ + default: 2, + type: :pos_integer, + doc: "The number of components to keep. Should be in `[1, + min(n_samples, n_features, n_targets)]`." + ], + scale: [ + default: true, + type: :boolean, + doc: "Whether to scale `x` and `x`." + ], + ] + + @opts_schema NimbleOptions.new!(opts_schema) + + @doc """ + + ## Options + + #{NimbleOptions.docs(@opts_schema)} + + ## Return Values + + + ## Examples + + """ + + deftransform fit(x, y opts \\ []) do + fit_n(x, y, NimbleOptions.validate!(opts, @opts_schema)) + end + + defnp fit_n(x, y, opts) do + {x, y} = check_x_y(x, y, opts) + num_components = opts[:num_components] + {x, y, x_mean, y_mean, x_std, y_std} = center_scale_xy(x, y, opts) + end + + defnp check_x_y(x, y, opts) do + y = + case Nx.shape(y) do + {n} -> Nx.reshape(y, {n, 1}) + _ -> y + end + + num_components = opts[:num_components] + {num_samples, num_features} = Nx.shape(x) + {num_samples_y, num_targets} = Nx.axis_shape(y, 0) + + cond do + num_samples != num_samples_y -> + raise ArgumentError, + """ + num_samples must be the same for x and y \ + x num_samples = #{num_samples}, y num_samples = #{num_samples_y} + """ + + num_components > num_features -> + raise ArgumentError, + """ + num_components must be less than or equal to \ + num_features = #{num_features}, got #{num_components} + """ + + num_components > num_samples -> + raise ArgumentError, + """ + num_components must be less than or equal to \ + num_samples = #{num_samples}, got #{num_components} + """ + + num_components > num_targets -> + raise ArgumentError, + """ + num_components must be less than or equal to \ + num_targets = #{num_targets}, got #{num_components} + """ + + true -> + nil + end + {x, y} + end + + defnp center_scale_x_y(x, y, opts) do + scale = opts[:scale] + x_mean = Nx.mean(x, axis: 0) + x = x - x_mean + + y_mean = Nx.mean(y, axis: 0) + y = y - y_mean + + if scale do + x_std = Nx.standard_deviation(x, axes: [0], ddof: 1) + x_std = Nx.select(x_std == 0.0, 1.0, x_std) + x = x / Nx.broadcast(x_std, Nx.shape(x)) + + y_std = Nx.standard_deviation(y, axes: [0], ddof: 1) + y_std = Nx.select(y_std == 0.0, 1.0, y_std) + y = y / Nx.broadcast(y_std, Nx.shape(y)) + + {x, y, x_mean, y_mean, x_std, y_std} + else + x_std = Nx.broadcast(1, {Nx.axis_size(x, 1)}) + y_std = Nx.broadcast(1, {Nx.axis_size(y, 1)}) + {x, y, x_mean, y_mean, x_std, y_std} + end + end +end From 745b852785ae5a47935cfc05262a445996895432 Mon Sep 17 00:00:00 2001 From: norm4nn Date: Wed, 30 Oct 2024 16:45:34 +0100 Subject: [PATCH 2/9] added docs --- lib/scholar/cross_decomposition/pls_svd.ex | 260 ++++++++++++++++++++- 1 file changed, 250 insertions(+), 10 deletions(-) diff --git a/lib/scholar/cross_decomposition/pls_svd.ex b/lib/scholar/cross_decomposition/pls_svd.ex index 81fc79e2..b4c2ec1a 100644 --- a/lib/scholar/cross_decomposition/pls_svd.ex +++ b/lib/scholar/cross_decomposition/pls_svd.ex @@ -1,9 +1,33 @@ defmodule Scholar.CrossDecomposition.PLSSVD do @moduledoc """ + Partial Least Square SVD. This transformer simply performs a SVD on the cross-covariance matrix. + + It is able to project both the training data `x` and the targets + `y`. The training data `x` is projected on the left singular vectors, while + the targets are projected on the right singular vectors. + + """ import Nx.Defn - import Scholar.Shared + + @derive {Nx.Container, + containers: [ + :x_mean, + :y_mean, + :x_std, + :y_std, + :x_weights, + :y_weights + ]} + defstruct [ + :x_mean, + :y_mean, + :x_std, + :y_std, + :x_weights, + :y_weights + ] opts_schema = [ num_components: [ @@ -15,13 +39,19 @@ defmodule Scholar.CrossDecomposition.PLSSVD do scale: [ default: true, type: :boolean, - doc: "Whether to scale `x` and `x`." - ], + doc: "Whether to scale `x` and `y`." + ] ] @opts_schema NimbleOptions.new!(opts_schema) @doc """ + Fit model to data. + Takes as arguments: + + * `x` - training samples, `{num_samples, num_features}` shaped tensor + + * `y` - targets, `{num_samples, num_targets}` shaped `y` tensor ## Options @@ -29,19 +59,228 @@ defmodule Scholar.CrossDecomposition.PLSSVD do ## Return Values + The function returns fitted estimator represented by struct with the following parameters: + + * `:x_mean` - tensor of shape `{num_features}` with `x` tensor mean values aggregated by axis 0. + + * `:y_mean` - tensor of shape `{num_features}` with `x` tensor mean values aggregated by axis 0. + + * `:x_std` - tensor of shape `{num_components}` + Percentage of variance explained by each of the selected components. + + * `:y_std` - ndarray of shape `{num_components}` + The singular values corresponding to each of the selected components. + + * `:x_weights` - ndarray of shape `{num_components}` + The singular values corresponding to each of the selected components. + + * `:y_weights` - ndarray of shape `{num_components}` + The singular values corresponding to each of the selected components. ## Examples + iex> x = Nx.tensor([[0.0, 0.0, 1.0], + [1.0, 0.0, 0.0], + [2.0, 2.0, 2.0], + [2.0, 5.0, 4.0]]) + iex> y = Nx.tensor([[0.1, -0.2], + [0.9, 1.1], + [6.2, 5.9], + [11.9, 12.3]]) + iex> model = Scholar.CrossDecomposition.PLSSVD.fit(x, y) + iex> model.x_mean + #Nx.Tensor< + f32[3] + [1.25, 1.75, 1.75] + > + iex> model.y_std + #Nx.Tensor< + f32[2] + [5.467098712921143, 5.661198616027832] + > + iex> model.x_weights + #Nx.Tensor< + f32[3][2] + [ + [0.521888256072998, -0.11256571859121323], + [0.6170258522033691, 0.7342619299888611], + [0.5889922380447388, -0.6694686412811279] + ] + > """ - deftransform fit(x, y opts \\ []) do - fit_n(x, y, NimbleOptions.validate!(opts, @opts_schema)) + deftransform fit(x, y, opts \\ []) do + fit_n(x, y, NimbleOptions.validate!(opts, @opts_schema)) end defnp fit_n(x, y, opts) do {x, y} = check_x_y(x, y, opts) num_components = opts[:num_components] - {x, y, x_mean, y_mean, x_std, y_std} = center_scale_xy(x, y, opts) + {x, y, x_mean, y_mean, x_std, y_std} = center_scale_x_y(x, y, opts) + + c = + Nx.transpose(x) + |> Nx.dot(y) + + {u, _s, vt} = Nx.LinAlg.svd(c, full_matrices?: false) + u = Nx.slice_along_axis(u, 0, num_components, axis: 1) + vt = Nx.slice_along_axis(vt, 0, num_components, axis: 0) + {u, vt} = Scholar.Decomposition.Utils.flip_svd(u, vt) + v = Nx.transpose(vt) + + x_weights = u + y_weights = v + + %__MODULE__{ + x_mean: x_mean, + y_mean: y_mean, + x_std: x_std, + y_std: y_std, + x_weights: x_weights, + y_weights: y_weights + } + end + + @doc """ + Apply the dimensionality reduction. + Takes as arguments: + + * fitted estimator struct which is return value of `fit/3` function from this module + + * `x` - training samples, `{num_samples, num_features}` shaped tensor + + * `y` - targets, `{num_samples, num_targets}` shaped `y` tensor + + ## Options + + #{NimbleOptions.docs(@opts_schema)} + + ## Return Values + + Returns tuple with transformed data `{x_transformed, y_transformed}` where: + + * `x_transformed` is `{num_samples, num_features}` shaped tensor. + + * `y_transformed` is `{num_samples, num_features}` shaped tensor. + + ## Examples + + iex> x = Nx.tensor([[0.0, 0.0, 1.0], + [1.0, 0.0, 0.0], + [2.0, 2.0, 2.0], + [2.0, 5.0, 4.0]]) + iex> y = Nx.tensor([[0.1, -0.2], + [0.9, 1.1], + [6.2, 5.9], + [11.9, 12.3]]) + iex> model = Scholar.CrossDecomposition.PLSSVD.fit(x, y) + iex> Scholar.CrossDecomposition.PLSSVD.transform(model, x, y) + {#Nx.Tensor< + f32[4][2] + [ + [-1.397004246711731, -0.10283949971199036], + [-1.1967883110046387, 0.17159013450145721], + [0.5603229403495789, -0.10849219560623169], + [2.0334696769714355, 0.039741579443216324] + ] + >, + #Nx.Tensor< + f32[4][2] + [ + [-1.2260178327560425, -0.019306711852550507], + [-0.9602956175804138, 0.04015407711267471], + [0.3249155580997467, -0.04311027377843857], + [1.8613981008529663, 0.022262824699282646] + ] + >} + + """ + deftransform transform(model, x, y, opts \\ []) do + transform_n(model, x, y, NimbleOptions.validate!(opts, @opts_schema)) + end + + defnp transform_n( + %__MODULE__{ + x_mean: x_mean, + y_mean: y_mean, + x_std: x_std, + y_std: y_std, + x_weights: x_weights, + y_weights: y_weights + } = _model, + x, + y, + opts + ) do + {x, y} = check_x_y(x, y, opts) + + xr = (x - x_mean) / x_std + x_scores = Nx.dot(xr, x_weights) + + yr = (y - y_mean) / y_std + y_scores = Nx.dot(yr, y_weights) + {x_scores, y_scores} + end + + @doc """ + Learn and apply the dimensionality reduction. + Takes as arguments: + + * `x` - training samples, `{num_samples, num_features}` shaped tensor + + * `y` - targets, `{num_samples, num_targets}` shaped `y` tensor + + ## Options + + #{NimbleOptions.docs(@opts_schema)} + + ## Return Values + + Returns tuple with transformed data `{x_transformed, y_transformed}` where: + + * `x_transformed` is `{num_samples, num_features}` shaped tensor. + + * `y_transformed` is `{num_samples, num_features}` shaped tensor. + + ## Examples + + iex> x = Nx.tensor([[0.0, 0.0, 1.0], + [1.0, 0.0, 0.0], + [2.0, 2.0, 2.0], + [2.0, 5.0, 4.0]]) + iex> y = Nx.tensor([[0.1, -0.2], + [0.9, 1.1], + [6.2, 5.9], + [11.9, 12.3]]) + iex> Scholar.CrossDecomposition.PLSSVD.fit_transform(x, y) + {#Nx.Tensor< + f32[4][2] + [ + [-1.397004246711731, -0.10283949971199036], + [-1.1967883110046387, 0.17159013450145721], + [0.5603229403495789, -0.10849219560623169], + [2.0334696769714355, 0.039741579443216324] + ] + >, + #Nx.Tensor< + f32[4][2] + [ + [-1.2260178327560425, -0.019306711852550507], + [-0.9602956175804138, 0.04015407711267471], + [0.3249155580997467, -0.04311027377843857], + [1.8613981008529663, 0.022262824699282646] + ] + >} + + """ + + deftransform fit_transform(x, y, opts \\ []) do + fit_transform_n(x, y, NimbleOptions.validate!(opts, @opts_schema)) + end + + defnp fit_transform_n(x, y, opts) do + fit(x, y, opts) + |> transform(x, y, opts) end defnp check_x_y(x, y, opts) do @@ -53,7 +292,7 @@ defmodule Scholar.CrossDecomposition.PLSSVD do num_components = opts[:num_components] {num_samples, num_features} = Nx.shape(x) - {num_samples_y, num_targets} = Nx.axis_shape(y, 0) + {num_samples_y, num_targets} = Nx.shape(y) cond do num_samples != num_samples_y -> @@ -87,18 +326,19 @@ defmodule Scholar.CrossDecomposition.PLSSVD do true -> nil end + {x, y} end defnp center_scale_x_y(x, y, opts) do scale = opts[:scale] - x_mean = Nx.mean(x, axis: 0) + x_mean = Nx.mean(x, axes: [0]) x = x - x_mean - y_mean = Nx.mean(y, axis: 0) + y_mean = Nx.mean(y, axes: [0]) y = y - y_mean - if scale do + if scale do x_std = Nx.standard_deviation(x, axes: [0], ddof: 1) x_std = Nx.select(x_std == 0.0, 1.0, x_std) x = x / Nx.broadcast(x_std, Nx.shape(x)) From 7eee166db1371d9552d4c3bfab92f9ac36d13f55 Mon Sep 17 00:00:00 2001 From: norm4nn Date: Wed, 30 Oct 2024 17:31:37 +0100 Subject: [PATCH 3/9] added tests --- lib/scholar/cross_decomposition/pls_svd.ex | 72 +++--- .../cross_decomposition/pls_svd_test.exs | 222 ++++++++++++++++++ 2 files changed, 258 insertions(+), 36 deletions(-) create mode 100644 test/scholar/cross_decomposition/pls_svd_test.exs diff --git a/lib/scholar/cross_decomposition/pls_svd.ex b/lib/scholar/cross_decomposition/pls_svd.ex index b4c2ec1a..a761146c 100644 --- a/lib/scholar/cross_decomposition/pls_svd.ex +++ b/lib/scholar/cross_decomposition/pls_svd.ex @@ -61,32 +61,28 @@ defmodule Scholar.CrossDecomposition.PLSSVD do The function returns fitted estimator represented by struct with the following parameters: - * `:x_mean` - tensor of shape `{num_features}` with `x` tensor mean values aggregated by axis 0. + * `:x_mean` - tensor of shape `{num_features}` which represents `x` tensor mean values calculated along axis 0. - * `:y_mean` - tensor of shape `{num_features}` with `x` tensor mean values aggregated by axis 0. + * `:y_mean` - tensor of shape `{num_targets}` which represents `x` tensor mean values calculated along axis 0. - * `:x_std` - tensor of shape `{num_components}` - Percentage of variance explained by each of the selected components. + * `:x_std` - tensor of shape `{num_features}` which represents `x` tensor standard deviation values calculated along axis 0. - * `:y_std` - ndarray of shape `{num_components}` - The singular values corresponding to each of the selected components. + * `:y_std` - tensor of shape `{num_targets}` which represents `y` tensor standard deviation values calculated along axis 0. - * `:x_weights` - ndarray of shape `{num_components}` - The singular values corresponding to each of the selected components. + * `:x_weights` - tensor of shape `{num_features, num_components}` the left singular vectors of the SVD of the cross-covariance matrix. - * `:y_weights` - ndarray of shape `{num_components}` - The singular values corresponding to each of the selected components. + * `:y_weights` - tensor of shape `{num_targets, num_components}` the right singular vectors of the SVD of the cross-covariance matrix. ## Examples iex> x = Nx.tensor([[0.0, 0.0, 1.0], - [1.0, 0.0, 0.0], - [2.0, 2.0, 2.0], - [2.0, 5.0, 4.0]]) + ...> [1.0, 0.0, 0.0], + ...> [2.0, 2.0, 2.0], + ...> [2.0, 5.0, 4.0]]) iex> y = Nx.tensor([[0.1, -0.2], - [0.9, 1.1], - [6.2, 5.9], - [11.9, 12.3]]) + ...> [0.9, 1.1], + ...> [6.2, 5.9], + ...> [11.9, 12.3]]) iex> model = Scholar.CrossDecomposition.PLSSVD.fit(x, y) iex> model.x_mean #Nx.Tensor< @@ -166,16 +162,17 @@ defmodule Scholar.CrossDecomposition.PLSSVD do ## Examples iex> x = Nx.tensor([[0.0, 0.0, 1.0], - [1.0, 0.0, 0.0], - [2.0, 2.0, 2.0], - [2.0, 5.0, 4.0]]) + ...> [1.0, 0.0, 0.0], + ...> [2.0, 2.0, 2.0], + ...> [2.0, 5.0, 4.0]]) iex> y = Nx.tensor([[0.1, -0.2], - [0.9, 1.1], - [6.2, 5.9], - [11.9, 12.3]]) + ...> [0.9, 1.1], + ...> [6.2, 5.9], + ...> [11.9, 12.3]]) iex> model = Scholar.CrossDecomposition.PLSSVD.fit(x, y) - iex> Scholar.CrossDecomposition.PLSSVD.transform(model, x, y) - {#Nx.Tensor< + iex> {x, y} = Scholar.CrossDecomposition.PLSSVD.transform(model, x, y) + iex> x + #Nx.Tensor< f32[4][2] [ [-1.397004246711731, -0.10283949971199036], @@ -183,7 +180,8 @@ defmodule Scholar.CrossDecomposition.PLSSVD do [0.5603229403495789, -0.10849219560623169], [2.0334696769714355, 0.039741579443216324] ] - >, + > + iex> y #Nx.Tensor< f32[4][2] [ @@ -192,7 +190,7 @@ defmodule Scholar.CrossDecomposition.PLSSVD do [0.3249155580997467, -0.04311027377843857], [1.8613981008529663, 0.022262824699282646] ] - >} + > """ deftransform transform(model, x, y, opts \\ []) do @@ -245,15 +243,16 @@ defmodule Scholar.CrossDecomposition.PLSSVD do ## Examples iex> x = Nx.tensor([[0.0, 0.0, 1.0], - [1.0, 0.0, 0.0], - [2.0, 2.0, 2.0], - [2.0, 5.0, 4.0]]) + ...> [1.0, 0.0, 0.0], + ...> [2.0, 2.0, 2.0], + ...> [2.0, 5.0, 4.0]]) iex> y = Nx.tensor([[0.1, -0.2], - [0.9, 1.1], - [6.2, 5.9], - [11.9, 12.3]]) - iex> Scholar.CrossDecomposition.PLSSVD.fit_transform(x, y) - {#Nx.Tensor< + ...> [0.9, 1.1], + ...> [6.2, 5.9], + ...> [11.9, 12.3]]) + iex> {x, y} = Scholar.CrossDecomposition.PLSSVD.fit_transform(x, y) + iex> x + #Nx.Tensor< f32[4][2] [ [-1.397004246711731, -0.10283949971199036], @@ -261,7 +260,8 @@ defmodule Scholar.CrossDecomposition.PLSSVD do [0.5603229403495789, -0.10849219560623169], [2.0334696769714355, 0.039741579443216324] ] - >, + > + iex> y #Nx.Tensor< f32[4][2] [ @@ -270,7 +270,7 @@ defmodule Scholar.CrossDecomposition.PLSSVD do [0.3249155580997467, -0.04311027377843857], [1.8613981008529663, 0.022262824699282646] ] - >} + > """ diff --git a/test/scholar/cross_decomposition/pls_svd_test.exs b/test/scholar/cross_decomposition/pls_svd_test.exs new file mode 100644 index 00000000..9afb5803 --- /dev/null +++ b/test/scholar/cross_decomposition/pls_svd_test.exs @@ -0,0 +1,222 @@ +defmodule Scholar.CrossDecomposition.PLSSVDTest do + use Scholar.Case, async: true + alias Scholar.CrossDecomposition.PLSSVD + doctest PLSSVD + + defp x do + Nx.tensor([ + [0.0, 0.0, 1.0, 16.0], + [1.0, 0.0, 0.0, 25.2], + [2.0, 2.0, 2.0, -2.3], + [2.0, 5.0, 4.0, 4.5], + [5.0, -2.0, 3.3, 4.5] + ]) + end + + defp y do + Nx.tensor([ + [0.1, -0.2, 3.0], + [0.9, 1.1, 5.1], + [6.2, 5.9, 2.5], + [11.9, 12.3, -6.0], + [7.6, 1.8, 4.9] + ]) + end + + defp y_1d do + Nx.tensor([0.1, -0.2, 3.0, 6.9, 3]) + end + + test "fit test" do + model = Scholar.CrossDecomposition.PLSSVD.fit(x(), y()) + + assert_all_close( + model.x_mean, + Nx.tensor([2.0, 1.0, 2.059999942779541, 9.579999923706055]), + atol: 1.0e-3 + ) + + assert_all_close( + model.y_mean, + Nx.tensor([5.339999675750732, 4.179999828338623, 1.899999976158142]), + atol: 1.0e-3 + ) + + assert_all_close( + model.x_std, + Nx.tensor([1.8708287477493286, 2.6457512378692627, 1.6334013938903809, 10.931011199951172]), + atol: 1.0e-3 + ) + + assert_all_close( + model.y_std, + Nx.tensor([4.90030574798584, 5.08005952835083, 4.561249732971191]), + atol: 1.0e-3 + ) + + assert_all_close( + model.x_weights, + Nx.tensor([ + [0.17879533767700195, 0.7447080016136169], + [0.6228733062744141, -0.5843358635902405], + [0.6137028336524963, 0.1790202558040619], + [-0.4510321617126465, -0.26816627383232117] + ]), + atol: 1.0e-3 + ) + + assert_all_close( + model.y_weights, + Nx.tensor([ + [0.6292941570281982, 0.7398861646652222], + [0.5848351716995239, -0.2493150532245636], + [-0.5118170976638794, 0.6248283386230469] + ]), + atol: 1.0e-3 + ) + end + + test "transform test" do + model = Scholar.CrossDecomposition.PLSSVD.fit(x(), y()) + {x_transformed, y_transformed} = Scholar.CrossDecomposition.PLSSVD.transform(model, x(), y()) + + assert_all_close( + x_transformed, + Nx.tensor([ + [-1.0897283554077148, -0.8489431142807007], + [-1.7494868040084839, -0.7861797213554382], + [0.703069806098938, 0.06401326507329941], + [1.8802037239074707, -0.5461838245391846], + [0.25594159960746765, 2.117293357849121] + ]), + atol: 1.0e-3 + ) + + assert_all_close( + y_transformed, + Nx.tensor([ + [-1.3005900382995605, -0.42553290724754333], + [-1.2838343381881714, -0.08087197691202164], + [0.24112752079963684, 0.12762844562530518], + [2.6636931896209717, -0.49021831154823303], + [-0.3203960657119751, 0.8689947128295898] + ]), + atol: 1.0e-3 + ) + end + + test "fit_transform test - all options are default" do + {x_transformed, y_transformed} = Scholar.CrossDecomposition.PLSSVD.fit_transform(x(), y()) + + assert_all_close( + x_transformed, + Nx.tensor([ + [-1.0897283554077148, -0.8489431142807007], + [-1.7494868040084839, -0.7861797213554382], + [0.703069806098938, 0.06401326507329941], + [1.8802037239074707, -0.5461838245391846], + [0.25594159960746765, 2.117293357849121] + ]), + atol: 1.0e-3 + ) + + assert_all_close( + y_transformed, + Nx.tensor([ + [-1.3005900382995605, -0.42553290724754333], + [-1.2838343381881714, -0.08087197691202164], + [0.24112752079963684, 0.12762844562530518], + [2.6636931896209717, -0.49021831154823303], + [-0.3203960657119751, 0.8689947128295898] + ]), + atol: 1.0e-3 + ) + end + + test "fit_transform test - :num_components set to 1" do + {x_transformed, y_transformed} = + Scholar.CrossDecomposition.PLSSVD.fit_transform(x(), y(), num_components: 1) + + assert_all_close( + x_transformed, + Nx.tensor([ + [-1.0897283554077148], + [-1.7494868040084839], + [0.703069806098938], + [1.8802037239074707], + [0.25594159960746765] + ]), + atol: 1.0e-3 + ) + + assert_all_close( + y_transformed, + Nx.tensor([ + [-1.3005900382995605], + [-1.2838343381881714], + [0.24112752079963684], + [2.6636931896209717], + [-0.3203960657119751] + ]), + atol: 1.0e-3 + ) + end + + test "fit_transform test - y is has only one dimension" do + {x_transformed, y_transformed} = + Scholar.CrossDecomposition.PLSSVD.fit_transform(x(), y_1d(), num_components: 1) + + assert_all_close( + x_transformed, + Nx.tensor([ + [-1.2138643264770508], + [-1.868216872215271], + [0.703800618648529], + [1.7553009986877441], + [0.6229796409606934] + ]), + atol: 1.0e-3 + ) + + assert_all_close( + y_transformed, + Nx.tensor([ + [-0.8578669428825378], + [-0.9624848365783691], + [0.15343964099884033], + [1.5134726762771606], + [0.15343964099884033] + ]), + atol: 1.0e-3 + ) + end + + test "fit_transform test - :scale is set to :false" do + {x_transformed, y_transformed} = + Scholar.CrossDecomposition.PLSSVD.fit_transform(x(), y(), scale: false) + + assert_all_close( + x_transformed, + Nx.tensor([ + [6.641565322875977, 1.5491820573806763], + [15.36169719696045, 3.2503585815429688], + [-11.394588470458984, -2.017521619796753], + [-6.2775702476501465, 2.303945779800415], + [-4.3311028480529785, -5.085964679718018] + ]), + atol: 1.0e-3 + ) + + assert_all_close( + y_transformed, + Nx.tensor([ + [6.744043827056885, 1.1535897254943848], + [6.1893134117126465, -0.3978065252304077], + [-1.4090275764465332, -0.40731552243232727], + [-12.453459739685059, 3.961534023284912], + [0.9291285872459412, -4.310001850128174] + ]), + atol: 1.0e-3 + ) + end +end From 32ba6ee0c5eb358616e1d2c4864055804ea0d859 Mon Sep 17 00:00:00 2001 From: Szymon Date: Thu, 31 Oct 2024 06:26:48 +0100 Subject: [PATCH 4/9] Update lib/scholar/cross_decomposition/pls_svd.ex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Valim --- lib/scholar/cross_decomposition/pls_svd.ex | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/scholar/cross_decomposition/pls_svd.ex b/lib/scholar/cross_decomposition/pls_svd.ex index a761146c..2dc1ecf6 100644 --- a/lib/scholar/cross_decomposition/pls_svd.ex +++ b/lib/scholar/cross_decomposition/pls_svd.ex @@ -1,13 +1,11 @@ defmodule Scholar.CrossDecomposition.PLSSVD do @moduledoc """ - Partial Least Square SVD. This transformer simply performs a SVD on the cross-covariance matrix. + Partial Least Square SVD. + This transformer simply performs a SVD on the cross-covariance matrix. It is able to project both the training data `x` and the targets `y`. The training data `x` is projected on the left singular vectors, while the targets are projected on the right singular vectors. - - - """ import Nx.Defn From fd1b4a75cf6003713d378ca9907507e339c0c7d6 Mon Sep 17 00:00:00 2001 From: Szymon Date: Thu, 31 Oct 2024 06:26:55 +0100 Subject: [PATCH 5/9] Update lib/scholar/cross_decomposition/pls_svd.ex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Valim --- lib/scholar/cross_decomposition/pls_svd.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scholar/cross_decomposition/pls_svd.ex b/lib/scholar/cross_decomposition/pls_svd.ex index 2dc1ecf6..5f0a767b 100644 --- a/lib/scholar/cross_decomposition/pls_svd.ex +++ b/lib/scholar/cross_decomposition/pls_svd.ex @@ -232,7 +232,7 @@ defmodule Scholar.CrossDecomposition.PLSSVD do ## Return Values - Returns tuple with transformed data `{x_transformed, y_transformed}` where: + Returns tuple with transformed data `{x_transformed, y_transformed}` where: * `x_transformed` is `{num_samples, num_features}` shaped tensor. From 7cab840cb979795817055adfbfc0e911493d1d1c Mon Sep 17 00:00:00 2001 From: Szymon Date: Thu, 31 Oct 2024 06:29:49 +0100 Subject: [PATCH 6/9] Update lib/scholar/cross_decomposition/pls_svd.ex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Valim --- lib/scholar/cross_decomposition/pls_svd.ex | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/scholar/cross_decomposition/pls_svd.ex b/lib/scholar/cross_decomposition/pls_svd.ex index 5f0a767b..9aa2b439 100644 --- a/lib/scholar/cross_decomposition/pls_svd.ex +++ b/lib/scholar/cross_decomposition/pls_svd.ex @@ -219,8 +219,9 @@ defmodule Scholar.CrossDecomposition.PLSSVD do end @doc """ - Learn and apply the dimensionality reduction. - Takes as arguments: + Learn and apply the dimensionality reduction. + + The arguments are: * `x` - training samples, `{num_samples, num_features}` shaped tensor From d9572990a6ff232f37234e27d00cd40773337959 Mon Sep 17 00:00:00 2001 From: norm4nn Date: Thu, 31 Oct 2024 06:54:27 +0100 Subject: [PATCH 7/9] mix format --- lib/scholar/cross_decomposition/pls_svd.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scholar/cross_decomposition/pls_svd.ex b/lib/scholar/cross_decomposition/pls_svd.ex index 9aa2b439..2fe9224e 100644 --- a/lib/scholar/cross_decomposition/pls_svd.ex +++ b/lib/scholar/cross_decomposition/pls_svd.ex @@ -220,7 +220,7 @@ defmodule Scholar.CrossDecomposition.PLSSVD do @doc """ Learn and apply the dimensionality reduction. - + The arguments are: * `x` - training samples, `{num_samples, num_features}` shaped tensor From 8440c38dc9440654678b2e754a28c332b131a2c8 Mon Sep 17 00:00:00 2001 From: norm4nn Date: Thu, 31 Oct 2024 21:21:29 +0100 Subject: [PATCH 8/9] Addressing comments --- lib/scholar/cross_decomposition/pls_svd.ex | 11 ++++------- test/scholar/cross_decomposition/pls_svd_test.exs | 5 ++--- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/lib/scholar/cross_decomposition/pls_svd.ex b/lib/scholar/cross_decomposition/pls_svd.ex index 2fe9224e..f62e0490 100644 --- a/lib/scholar/cross_decomposition/pls_svd.ex +++ b/lib/scholar/cross_decomposition/pls_svd.ex @@ -69,7 +69,7 @@ defmodule Scholar.CrossDecomposition.PLSSVD do * `:x_weights` - tensor of shape `{num_features, num_components}` the left singular vectors of the SVD of the cross-covariance matrix. - * `:y_weights` - tensor of shape `{num_targets, num_components}` the right singular vectors of the SVD of the cross-covariance matrix. + * `:y_weights` - tensor of shape `{num_components, num_targets}` the transposed right singular vectors of the SVD of the cross-covariance matrix. ## Examples @@ -112,18 +112,15 @@ defmodule Scholar.CrossDecomposition.PLSSVD do num_components = opts[:num_components] {x, y, x_mean, y_mean, x_std, y_std} = center_scale_x_y(x, y, opts) - c = - Nx.transpose(x) - |> Nx.dot(y) + c = Nx.dot(x, [0], y, [0]) {u, _s, vt} = Nx.LinAlg.svd(c, full_matrices?: false) u = Nx.slice_along_axis(u, 0, num_components, axis: 1) vt = Nx.slice_along_axis(vt, 0, num_components, axis: 0) {u, vt} = Scholar.Decomposition.Utils.flip_svd(u, vt) - v = Nx.transpose(vt) x_weights = u - y_weights = v + y_weights = vt %__MODULE__{ x_mean: x_mean, @@ -214,7 +211,7 @@ defmodule Scholar.CrossDecomposition.PLSSVD do x_scores = Nx.dot(xr, x_weights) yr = (y - y_mean) / y_std - y_scores = Nx.dot(yr, y_weights) + y_scores = Nx.dot(yr, [1], y_weights, [1]) {x_scores, y_scores} end diff --git a/test/scholar/cross_decomposition/pls_svd_test.exs b/test/scholar/cross_decomposition/pls_svd_test.exs index 9afb5803..5662bfc4 100644 --- a/test/scholar/cross_decomposition/pls_svd_test.exs +++ b/test/scholar/cross_decomposition/pls_svd_test.exs @@ -68,9 +68,8 @@ defmodule Scholar.CrossDecomposition.PLSSVDTest do assert_all_close( model.y_weights, Nx.tensor([ - [0.6292941570281982, 0.7398861646652222], - [0.5848351716995239, -0.2493150532245636], - [-0.5118170976638794, 0.6248283386230469] + [0.6292941570281982, 0.5848351716995239, -0.5118170976638794], + [0.7398861646652222, -0.2493150532245636, 0.6248283386230469] ]), atol: 1.0e-3 ) From 4b34c96c9e75c2ea23ed809e142fa05600d8347a Mon Sep 17 00:00:00 2001 From: norm4nn Date: Fri, 8 Nov 2024 16:50:01 +0100 Subject: [PATCH 9/9] simplified center_scale function --- lib/scholar/cross_decomposition/pls_svd.ex | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/lib/scholar/cross_decomposition/pls_svd.ex b/lib/scholar/cross_decomposition/pls_svd.ex index f62e0490..656e384f 100644 --- a/lib/scholar/cross_decomposition/pls_svd.ex +++ b/lib/scholar/cross_decomposition/pls_svd.ex @@ -110,7 +110,8 @@ defmodule Scholar.CrossDecomposition.PLSSVD do defnp fit_n(x, y, opts) do {x, y} = check_x_y(x, y, opts) num_components = opts[:num_components] - {x, y, x_mean, y_mean, x_std, y_std} = center_scale_x_y(x, y, opts) + {x, x_mean, x_std} = center_scale(x, opts) + {y, y_mean, y_std} = center_scale(y, opts) c = Nx.dot(x, [0], y, [0]) @@ -326,28 +327,21 @@ defmodule Scholar.CrossDecomposition.PLSSVD do {x, y} end - defnp center_scale_x_y(x, y, opts) do + defnp center_scale(x, opts) do scale = opts[:scale] x_mean = Nx.mean(x, axes: [0]) x = x - x_mean - y_mean = Nx.mean(y, axes: [0]) - y = y - y_mean - if scale do x_std = Nx.standard_deviation(x, axes: [0], ddof: 1) x_std = Nx.select(x_std == 0.0, 1.0, x_std) x = x / Nx.broadcast(x_std, Nx.shape(x)) - y_std = Nx.standard_deviation(y, axes: [0], ddof: 1) - y_std = Nx.select(y_std == 0.0, 1.0, y_std) - y = y / Nx.broadcast(y_std, Nx.shape(y)) - - {x, y, x_mean, y_mean, x_std, y_std} + {x, x_mean, x_std} else x_std = Nx.broadcast(1, {Nx.axis_size(x, 1)}) - y_std = Nx.broadcast(1, {Nx.axis_size(y, 1)}) - {x, y, x_mean, y_mean, x_std, y_std} + + {x, x_mean, x_std} end end end