Skip to content

Commit

Permalink
Add tests, change default param, add new preprocessing to isotonic re…
Browse files Browse the repository at this point in the history
…gression
  • Loading branch information
msluszniak committed Nov 7, 2023
1 parent b231377 commit b3d8f0a
Show file tree
Hide file tree
Showing 3 changed files with 295 additions and 67 deletions.
96 changes: 58 additions & 38 deletions lib/scholar/linear/isotonic_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ defmodule Scholar.Linear.IsotonicRegression do
observations by solving a convex optimization problem. It is a form of
regression analysis that can be used as an alternative to polynomial
regression to fit nonlinear data.
Time complexity of isotonic regression is $O(N^2)$ where $N$ is the
number of points.
"""
require Nx
import Nx.Defn, except: [transform: 2]
Expand Down Expand Up @@ -38,7 +41,7 @@ defmodule Scholar.Linear.IsotonicRegression do
y_thresholds: Nx.Tensor.t(),
increasing: Nx.Tensor.t(),
cutoff_index: Nx.Tensor.t(),
preprocess: Tuple.t() | Scholar.Interpolation.Linear.t()
preprocess: tuple() | Scholar.Interpolation.Linear.t()
}

opts = [
Expand Down Expand Up @@ -174,8 +177,6 @@ defmodule Scholar.Linear.IsotonicRegression do
Nx.u8(0)
end

# increasing = Nx.u8(1)

fit_n(x, y, sample_weights, increasing, opts)
end

Expand Down Expand Up @@ -206,12 +207,12 @@ defmodule Scholar.Linear.IsotonicRegression do
iex> Scholar.Linear.IsotonicRegression.predict(model, to_predict)
#Nx.Tensor<
f32[10]
[1.0, 1.6666667461395264, 2.3333334922790527, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
[1.0, 1.6666667461395264, 2.3333332538604736, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
>
"""
defn predict(model, x) do
check_input_shape(x)
# check_preprocess(model)
check_preprocess(model)

x = Nx.flatten(x)
x = Nx.clip(x, model.x_min, model.x_max)
Expand Down Expand Up @@ -261,43 +262,61 @@ defmodule Scholar.Linear.IsotonicRegression do
]
),
x: Nx.tensor(
[1.0, 4.0, 7.0, 9.0, 10.0]
[1.0, 4.0, 7.0, 9.0, 10.0, 11.0]
)
}
}
"""
defn preprocess(model) do
# cutoff = Nx.to_number(model.cutoff_index)
# x = model.x_thresholds[0..cutoff]
# y = model.y_thresholds[0..cutoff]

# {x, y} =
# if trim_duplicates do
# keep_mask =
# Nx.logical_or(
# Nx.not_equal(y[1..-2//1], y[0..-3//1]),
# Nx.not_equal(y[1..-2//1], y[2..-1//1])
# )

# keep_mask = Nx.concatenate([Nx.tensor([1]), keep_mask, Nx.tensor([1])])

# indices =
# Nx.iota({Nx.axis_size(y, 0)})
# |> Nx.add(1)
# |> Nx.multiply(keep_mask)
# |> Nx.to_flat_list()

# indices = Enum.filter(indices, fn x -> x != 0 end) |> Nx.tensor() |> Nx.subtract(1)
# x = Nx.take(x, indices)
# y = Nx.take(y, indices)
# {x, y}
# else
# {x, y}
# end

# model = %__MODULE__{model | x_thresholds: x}
# model = %__MODULE__{model | y_thresholds: y}
def preprocess(model, trim_duplicates \\ true) do
cutoff = Nx.to_number(model.cutoff_index)
x = model.x_thresholds[0..cutoff]
y = model.y_thresholds[0..cutoff]

{x, y} =
if trim_duplicates do
keep_mask =
Nx.logical_or(
Nx.not_equal(y[1..-2//1], y[0..-3//1]),
Nx.not_equal(y[1..-2//1], y[2..-1//1])
)

keep_mask = Nx.concatenate([Nx.tensor([1]), keep_mask, Nx.tensor([1])])

indices =
Nx.iota({Nx.axis_size(y, 0)})
|> Nx.add(1)
|> Nx.multiply(keep_mask)
|> Nx.to_flat_list()

indices = Enum.filter(indices, fn x -> x != 0 end) |> Nx.tensor() |> Nx.subtract(1)
x = Nx.take(x, indices)
y = Nx.take(y, indices)
{x, y}
else
{x, y}
end

model = %__MODULE__{model | x_thresholds: x}
model = %__MODULE__{model | y_thresholds: y}

%__MODULE__{
model
| preprocess:
Scholar.Interpolation.Linear.fit(
model.x_thresholds,
model.y_thresholds
)
}
end

@doc """
Preprocesses the `model` for prediction.
Returns an updated `model`. This is a special version of `preprocess/1` that
does not trim duplicates so it can be used in defns. It is not recommended
to use this function directly.
"""
defn special_preprocess(model) do
%__MODULE__{
model
| preprocess:
Expand Down Expand Up @@ -517,7 +536,8 @@ defmodule Scholar.Linear.IsotonicRegression do

defnp check_increasing(x, y) do
x = Nx.new_axis(x, -1)
y = Nx.new_axis(y, -1)
model = Scholar.Linear.LinearRegression.fit(x, y)
Nx.squeeze(model.coefficients[0] >= 0)
model.coefficients[0][0] >= 0
end
end
59 changes: 30 additions & 29 deletions lib/scholar/manifold/mds.ex
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ defmodule Scholar.Manifold.MDS do
],
metric: [
type: :boolean,
default: false,
default: true,
doc: ~S"""
If `true`, use dissimilarities as metric distances in the embedding space.
"""
Expand All @@ -33,6 +33,7 @@ defmodule Scholar.Manifold.MDS do
default: false,
doc: ~S"""
If `true`, normalize the stress by the sum of squared dissimilarities.
Only valid if `metric` is `false`.
"""
],
eps: [
Expand Down Expand Up @@ -78,6 +79,7 @@ defmodule Scholar.Manifold.MDS do
metric = if opts[:metric], do: 1, else: 0
normalized_stress = if opts[:normalized_stress], do: 1, else: 0
eps = opts[:eps]
n = Nx.axis_size(dissimilarities, 0)

{{x, stress, i}, _} =
while {{x, _stress = Nx.Constants.infinity(Nx.type(dissimilarities)), i = 0},
Expand All @@ -86,7 +88,6 @@ defmodule Scholar.Manifold.MDS do
metric, normalized_stress, eps, stop_value = 0}},
i < max_iter and not stop_value do
dis = Distance.pairwise_euclidean(x)
n = Nx.axis_size(dissimilarities, 0)

disparities =
if metric do
Expand All @@ -96,14 +97,14 @@ defmodule Scholar.Manifold.MDS do

dis_flat_indices = lower_triangle_indices(dis)

n = Nx.axis_size(dis, 0)

dis_flat_w = Nx.take(dis_flat, dis_flat_indices)

disparities_flat_model =
Scholar.Linear.IsotonicRegression.fit(similarities_flat_w, dis_flat_w)
Scholar.Linear.IsotonicRegression.fit(similarities_flat_w, dis_flat_w,
increasing: true
)

model = Scholar.Linear.IsotonicRegression.preprocess(disparities_flat_model)
model = Scholar.Linear.IsotonicRegression.special_preprocess(disparities_flat_model)

disparities_flat =
Scholar.Linear.IsotonicRegression.predict(model, similarities_flat_w)
Expand Down Expand Up @@ -133,7 +134,7 @@ defmodule Scholar.Manifold.MDS do
ratio = disparities / dis
b = -ratio
b = Nx.put_diagonal(b, Nx.take_diagonal(b) + Nx.sum(ratio, axes: [1]))
x = 1.0 / n * Nx.dot(b, x)
x = Nx.dot(b, x) * (1.0 / n)

dis = Nx.sum(Nx.sqrt(Nx.sum(x ** 2, axes: [1])))

Expand Down Expand Up @@ -209,7 +210,7 @@ defmodule Scholar.Manifold.MDS do
{best, best_stress, best_iter}
end

defn lower_triangle_indices(tensor) do
defnp lower_triangle_indices(tensor) do
n = Nx.axis_size(tensor, 0)

temp = Nx.broadcast(Nx.s64(0), {div(n * (n - 1), 2)})
Expand Down Expand Up @@ -249,17 +250,17 @@ defmodule Scholar.Manifold.MDS do
%Scholar.Manifold.MDS{
embedding: Nx.tensor(
[
[0.040477119386196136, -0.4997042417526245],
[-0.35801631212234497, -0.09504470974206924],
[-0.08517580479383469, 0.35293734073638916],
[0.42080432176589966, 0.23617777228355408]
[16.3013916015625, -3.444634437561035],
[5.866805553436279, 1.6378790140151978],
[-5.487184524536133, 0.5837264657020569],
[-16.681013107299805, 1.2230290174484253]
]
),
stress: Nx.tensor(
0.0016479993937537074
0.3993147909641266
),
n_iter: Nx.tensor(
19
23
)
}
"""
Expand Down Expand Up @@ -288,17 +289,17 @@ defmodule Scholar.Manifold.MDS do
%Scholar.Manifold.MDS{
embedding: Nx.tensor(
[
[0.040477119386196136, -0.4997042417526245],
[-0.35801631212234497, -0.09504470974206924],
[-0.08517580479383469, 0.35293734073638916],
[0.42080432176589966, 0.23617777228355408]
[16.3013916015625, -3.444634437561035],
[5.866805553436279, 1.6378790140151978],
[-5.487184524536133, 0.5837264657020569],
[-16.681013107299805, 1.2230290174484253]
]
),
stress: Nx.tensor(
0.0016479993937537074
0.3993147909641266
),
n_iter: Nx.tensor(
19
23
)
}
"""
Expand Down Expand Up @@ -333,10 +334,10 @@ defmodule Scholar.Manifold.MDS do
%Scholar.Manifold.MDS{
embedding: Nx.tensor(
[
[0.41079193353652954, 0.41079193353652954],
[0.1369306445121765, 0.1369306445121765],
[-0.1369306445121765, -0.1369306445121765],
[-0.41079193353652954, -0.41079193353652954]
[11.858541488647461, 11.858541488647461],
[3.9528470039367676, 3.9528470039367676],
[-3.9528470039367676, -3.9528470039367676],
[-11.858541488647461, -11.858541488647461]
]
),
stress: Nx.tensor(
Expand Down Expand Up @@ -373,14 +374,14 @@ defmodule Scholar.Manifold.MDS do
%Scholar.Manifold.MDS{
embedding: Nx.tensor(
[
[0.3354101777076721, 0.3354101777076721, 0.3354101777076721],
[0.11180339753627777, 0.11180339753627777, 0.11180339753627777],
[-0.11180339753627777, -0.11180340498685837, -0.11180339753627777],
[-0.3354102075099945, -0.3354102075099945, -0.3354102075099945]
[9.682458877563477, 9.682458877563477, 9.682458877563477],
[3.2274858951568604, 3.2274858951568604, 3.2274858951568604],
[-3.2274863719940186, -3.2274863719940186, -3.2274863719940186],
[-9.682458877563477, -9.682458877563477, -9.682458877563477]
]
),
stress: Nx.tensor(
2.6645352591003757e-15
9.094947017729282e-12
),
n_iter: Nx.tensor(
3
Expand Down
Loading

0 comments on commit b3d8f0a

Please sign in to comment.