Skip to content

Commit

Permalink
Draft of implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
msluszniak committed Oct 3, 2023
1 parent f69d4af commit d96b5a2
Showing 1 changed file with 287 additions and 24 deletions.
311 changes: 287 additions & 24 deletions lib/scholar/manifold/mds.ex
Original file line number Diff line number Diff line change
Expand Up @@ -7,38 +7,99 @@ defmodule Scholar.Manifold.MDS do
* [t-SNE: t-Distributed Stochastic Neighbor Embedding](http://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf)
"""
import Nx.Defn
import Scholar.Shared
# import Scholar.Shared
alias Scholar.Metrics.Distance

@derive {Nx.Container, containers: [:embedding, :stress, :n_iter]}
defstruct [:embedding, :stress, :n_iter]

opts_schema = [
num_components: [
type: :pos_integer,
default: 2,
doc: ~S"""
Dimension of the embedded space.
"""
],
metric: [
type: :boolean,
default: false,
doc: ~S"""
If `true`, use dissimilarities as metric distances in the embedding space.
"""
],
normalized_stress: [
type: :boolean,
default: false,
doc: ~S"""
If `true`, normalize the stress by the sum of squared dissimilarities.
"""
],
eps: [
type: :float,
default: 1.0e-3,
doc: ~S"""
Tolerance for stopping criterion.
"""
],
max_iter: [
type: :pos_integer,
default: 300,
doc: ~S"""
Maximum number of iterations for the optimization.
"""
],
key: [
type: {:custom, Scholar.Options, :key, []},
doc: """
Determines random number generation for centroid initialization.
If the key is not provided, it is set to `Nx.Random.key(System.system_time())`.
"""
],
n_init: [
type: :pos_integer,
default: 4,
doc: ~S"""
Number of times the embedding will be computed with different centroid seeds.
The final embedding is the embedding with the lowest stress.
"""
]
]

@opts_schema NimbleOptions.new!(opts_schema)

# initialize x randomly or pass the init x earlier
defnp smacof(dissimilarities, x, max_iter, opts) do
num_samples = Nx.axis_size(dissimilarities, 0)
similarities_flat = Nx.flatten((1 - Nx.tri(num_samples)) * dissimilarities)
similarities_flat_indices = remove_main_diag_indices(similarities_flat)
# n = Nx.axis_size(dissimilarities, 0)
similarities_flat = Nx.flatten(dissimilarities)
similarities_flat_indices = lower_triangle_indices(similarities_flat)

n = Nx.axis_size(dissimilarities, 0)
similarities_flat_w = Nx.take(similarities_flat, similarities_flat_indices)

similarities_flat_w =
Nx.take(similarities_flat, similarities_flat_indices) |> Nx.reshape({n, n - 1})
metric = if opts[:metric], do: 1, else: 0
normalized_stress = if opts[:normalized_stress], do: 1, else: 0
eps = opts[:eps]

res =
while {{x, stress = Nx.Constants.infinity(), i = 0}, dissimilarities, max_iter,
similarities_flat_indices, similarities_flat, old_stress = Nx.Constants.infinity(),
{{x, stress, i}, _} =
while {{x, _stress = Nx.Constants.infinity(), i = 0}, dissimilarities, max_iter,
similarities_flat_indices, similarities_flat, similarities_flat_w,
old_stress = Nx.Constants.infinity(), metric, normalized_stress, eps,
stop_value = 0},
i < max_iter and not stop_value do
dis = Distance.pairwise_euclidean(x)
n = Nx.axis_size(dissimilarities, 0)

disparities =
if opts[:metric] do
if metric do
dissimilarities
else
dis_flat = Nx.flatten(dis)

dis_flat_indices = remove_main_diag_indices(dis_flat)
dis_flat_indices = lower_triangle_indices(dis)

n = Nx.axis_size(dis, 0)

dis_flat_w = Nx.take(dis_flat, dis_flat_indices) |> Nx.reshape({n, n - 1})
dis_flat_w = Nx.take(dis_flat, dis_flat_indices)
# dis_flat_w = Nx.flatten(remove_main_diag(dis))

disparities_flat =
Expand All @@ -58,7 +119,7 @@ defmodule Scholar.Manifold.MDS do
stress = Nx.sum((Nx.flatten(dis) - Nx.flatten(disparities)) ** 2) / 2

stress =
if opts[:normalized_stress] do
if normalized_stress do
Nx.sqrt(stress / (Nx.sum(Nx.flatten(disparities) ** 2) / 2))
else
stress
Expand All @@ -72,25 +133,227 @@ defmodule Scholar.Manifold.MDS do

dis = Nx.sum(Nx.sqrt(Nx.sum(x ** 2, axes: [1])))

stop_value = if old_stress - stress / dis < opts[:eps], do: 1, else: 0
stop_value = if old_stress - stress / dis < eps, do: 1, else: 0

old_stress = stress / dis

{{x, stress, i + 1}, dissimilarities, max_iter, similarities_flat_indices,
similarities_flat, old_stress, stop_value}
similarities_flat, similarities_flat_w, old_stress, metric, normalized_stress, eps,
stop_value}
end

{x, stress, i}
end

defnp mds_main_loop(dissimilarities, x, key, opts) do
n_init = opts[:n_init]

{{best, best_stress, best_iter}, _} =
while {{best = x, best_stress = Nx.Constants.infinity(), best_iter = 0},
{n_init, dissimilarities, x, i = 0}},
i < n_init do
{temp, stress, iter} = smacof(dissimilarities, x, opts[:max_iter], opts)

{best, best_stress, best_iter} =
if stress < best_stress, do: {temp, stress, iter}, else: {best, best_stress, best_iter}

{best, best_stress, best_iter, {n_init, dissimilarities, x, i + 1}}
end

{best, best_stress, best_iter}
end

defnp mds_main_loop(dissimilarities, key, opts) do
# key = opts[:key]
n_init = opts[:n_init]
max_iter = opts[:max_iter]
num_samples = Nx.axis_size(dissimilarities, 0)
{dummy, new_key} = Nx.Random.uniform(key, shape: {num_samples, opts[:num_components]})

{{best, best_stress, best_iter}, _} =
while {{best = dummy, best_stress = Nx.Constants.infinity(), best_iter = 0},
{n_init, new_key, max_iter, dissimilarities, i = 0}},
i < n_init do
num_samples = Nx.axis_size(dissimilarities, 0)
{x, new_key} = Nx.Random.uniform(new_key, shape: {num_samples, opts[:num_components]})
{temp, stress, iter} = smacof(dissimilarities, x, max_iter, opts)

{best, best_stress, best_iter} =
if stress < best_stress, do: {temp, stress, iter}, else: {best, best_stress, best_iter}

{{best, best_stress, best_iter}, {n_init, new_key, max_iter, dissimilarities, i + 1}}
end

{best, best_stress, best_iter}
end

defn remove_main_diag_indices(tensor) do
defnp lower_triangle_indices(tensor) do
n = Nx.axis_size(tensor, 0)

temp =
Nx.broadcast(Nx.s64(0), {n})
|> Nx.indexed_put(Nx.new_axis(0, -1), Nx.s64(1))
|> Nx.tile([n - 1])
temp = Nx.broadcast(Nx.s64(0), {div(n * (n - 1), 2)})

{temp, _} =
while {temp, {i = 0, j = 0}}, i < n ** 2 do
{temp, j} =
if Nx.remainder(i, n) < Nx.quotient(i, n) do
temp = Nx.indexed_put(temp, Nx.new_axis(j, -1), i)
{temp, j + 1}
else
{temp, j}
end

{temp, {i + 1, j}}
end

temp
end

@doc """
Fits MDS for sample inputs `x`. It is simpyfied version of `fit/3` function.
## Options
#{NimbleOptions.docs(@opts_schema)}
## Return Values
Returns struct with embedded data, stress value, and number of iterations for best run.
## Examples
iex> x = Nx.iota({4,5})
iex> Scholar.Manifold.MDS.fit(x)
#Nx.Tensor<
f32[4][2]
[
[-2197.154296875, 0.0],
[-1055.148681640625, 0.0],
[1055.148681640625, 0.0],
[2197.154296875, 0.0]
]
>
"""
deftransform fit(x) do
opts = NimbleOptions.validate!([], @opts_schema)
key = Keyword.get_lazy(opts, :key, fn -> Nx.Random.key(System.system_time()) end)
fit_n(x, key, opts)
end

@doc """
Fits MDS for sample inputs `x`. It is simpyfied version of `fit/3` function.
## Options
#{NimbleOptions.docs(@opts_schema)}
## Return Values
Returns struct with embedded data, stress value, and number of iterations for best run.
## Examples
iex> x = Nx.iota({4,5})
iex> Scholar.Manifold.MDS.fit(x, num_components: 2)
#Nx.Tensor<
f32[4][2]
[
[-2197.154296875, 0.0],
[-1055.148681640625, 0.0],
[1055.148681640625, 0.0],
[2197.154296875, 0.0]
]
>
"""
deftransform fit(x, opts) when is_list(opts) do
opts = NimbleOptions.validate!(opts, @opts_schema)
key = Keyword.get_lazy(opts, :key, fn -> Nx.Random.key(System.system_time()) end)
fit_n(x, key, opts)
end

defnp fit_n(x, key, opts) do
{best, best_stress, best_iter} = mds_main_loop(x, key, opts)
%__MODULE__{embedding: best, stress: best_stress, n_iter: best_iter}
end

@doc """
Fits MDS for sample inputs `x`. It is simpyfied version of `fit/3` function.
## Options
#{NimbleOptions.docs(@opts_schema)}
## Return Values
Nx.iota({n * (n - 1)}) + Nx.cumulative_sum(temp)
# indices = Nx.iota({n * (n - 1)}) + Nx.cumulative_sum(temp)
# Nx.take(Nx.flatten(tensor), indices) |> Nx.reshape({n, n - 1})
Returns struct with embedded data, stress value, and number of iterations for best run.
## Examples
iex> x = Nx.iota({4,5})
iex> init = Nx.reverse(Nx.iota({4,5}))
iex> Scholar.Manifold.MDS.fit(x, init)
#Nx.Tensor<
f32[4][2]
[
[-2197.154296875, 0.0],
[-1055.148681640625, 0.0],
[1055.148681640625, 0.0],
[2197.154296875, 0.0]
]
>
"""
deftransform fit(x, init) do
opts = NimbleOptions.validate!([], @opts_schema)
key = Keyword.get_lazy(opts, :key, fn -> Nx.Random.key(System.system_time()) end)
fit_n(x, init, key, opts)
end

@doc """
Fits MDS for sample inputs `x`. It is simpyfied version of `fit/3` function.
## Options
#{NimbleOptions.docs(@opts_schema)}
## Return Values
Returns struct with embedded data, stress value, and number of iterations for best run.
## Examples
iex> x = Nx.iota({4,5})
iex> init = Nx.reverse(Nx.iota({4,5}))
iex> Scholar.Manifold.MDS.fit(x, init, num_clusters: 3)
#Nx.Tensor<
f32[4][3]
[
[-2197.154296875, 0.0, 0.0],
[-1055.148681640625, 0.0, 0.0],
[1055.148681640625, 0.0, 0.0],
[2197.154296875, 0.0, 0.0]
]
>
"""
deftransform fit(x, init, opts) when is_list(opts) do
opts = NimbleOptions.validate!(opts, @opts_schema)
key = Keyword.get_lazy(opts, :key, fn -> Nx.Random.key(System.system_time()) end)
fit_n(x, init, key, opts)
end

defnp fit_n(x, init, key, opts) do
{best, best_stress, best_iter} = mds_main_loop(x, init, key, opts)
%__MODULE__{embedding: best, stress: best_stress, n_iter: best_iter}
end

# defn remove_main_diag_indices(tensor) do
# n = Nx.axis_size(tensor, 0)

# temp =
# Nx.broadcast(Nx.s64(0), {n})
# |> Nx.indexed_put(Nx.new_axis(0, -1), Nx.s64(1))
# |> Nx.tile([n - 1])

# Nx.iota({n * (n - 1)}) + Nx.cumulative_sum(temp)
# # indices = Nx.iota({n * (n - 1)}) + Nx.cumulative_sum(temp)
# # Nx.take(Nx.flatten(tensor), indices) |> Nx.reshape({n, n - 1})
# end
end

0 comments on commit d96b5a2

Please sign in to comment.