Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Ledoit Wolf shrinkage covariance estimator #304

Merged
merged 14 commits into from
Nov 7, 2024
348 changes: 348 additions & 0 deletions lib/scholar/covariance/ledoit_wolf.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,348 @@
defmodule Scholar.Covariance.LedoitWolf do
@moduledoc """
Ledoit-Wolf is a particular form of shrinkage covariance estimator, where the shrinkage coefficient is computed using O.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"computed using 0" feels like an incomplete sentence.

And I'm a bit confused. As someone who's not familiar with the algorithm, this phrase seems to contradict the :shrinkage option provided below.


Ledoit and M. Wolf's formula as
described in "A Well-Conditioned Estimator for Large-Dimensional
Covariance Matrices", Ledoit and Wolf, Journal of Multivariate
Analysis, Volume 88, Issue 2, February 2004, pages 365-411.
"""
import Nx.Defn

@derive {Nx.Container, containers: [:covariance, :shrinkage, :location]}
defstruct [:covariance, :shrinkage, :location]

opts_schema = [
block_size: [
default: 1000,
type: {:custom, Scholar.Options, :positive_number, []},
doc: "Size of blocks into which the covariance matrix will be split."
],
assume_centered: [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rename it to assume_centered? (add ?).

default: false,
type: :boolean,
doc: """
If `true`, data will not be centered before computation.
Useful when working with data whose mean is almost, but not exactly
zero.
If `false`, data will be centered before computation.
"""
]
]

@opts_schema NimbleOptions.new!(opts_schema)
@doc """

norm4nn marked this conversation as resolved.
Show resolved Hide resolved
Estimate the shrunk Ledoit-Wolf covariance matrix.

## Options

#{NimbleOptions.docs(@opts_schema)}

## Return Values

The function returns a struct with the following parameters:

* `:covariance` - Tensor of shape `{n_features, n_features}`. Estimated covariance matrix.

* `:shrinkage` - Coefficient in the convex combination used for the computation of the shrunk estimate. Range is `[0, 1]`.
norm4nn marked this conversation as resolved.
Show resolved Hide resolved

* `:location` - Tensor of shape `{n_features,}`.
Estimated location, i.e. the estimated mean.

## Examples

iex> key = Nx.Random.key(0)
iex> {x, _new_key} = Nx.Random.multivariate_normal(key, Nx.tensor([0.0, 0.0]), Nx.tensor([[0.4, 0.2], [0.2, 0.8]]), shape: {50}, type: :f32)
iex> Scholar.Covariance.LedoitWolf.fit(x)
%Scholar.Covariance.LedoitWolf{
covariance: #Nx.Tensor<
f32[2][2]
[
[0.355768620967865, 0.17340737581253052],
[0.17340737581253052, 1.0300586223602295]
]
>,
shrinkage: #Nx.Tensor<
f32
0.15034136176109314
>,
location: #Nx.Tensor<
f32[2]
[0.17184630036354065, 0.3276958167552948]
>
}
iex> key = Nx.Random.key(0)
iex> {x, _new_key} = Nx.Random.multivariate_normal(key, Nx.tensor([0.0, 0.0, 0.0]), Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]), shape: {10}, type: :f32)
iex> Scholar.Covariance.LedoitWolf.fit(x)
%Scholar.Covariance.LedoitWolf{
covariance: #Nx.Tensor<
f32[3][3]
[
[2.5945029258728027, 1.507835865020752, 1.1623677015304565],
[1.507835865020752, 2.106797218322754, 1.181215524673462],
[1.1623677015304565, 1.181215524673462, 1.460626482963562]
]
>,
shrinkage: #Nx.Tensor<
f32
0.1908363550901413
>,
location: #Nx.Tensor<
f32[3]
[1.1228725910186768, 0.5419300198554993, 0.8678852319717407]
>
}
iex> key = Nx.Random.key(0)
iex> {x, _new_key} = Nx.Random.multivariate_normal(key, Nx.tensor([0.0, 0.0, 0.0]), Nx.tensor([[3.0, 2.0, 1.0], [1.0, 2.0, 3.0], [1.3, 1.0, 2.2]]), shape: {10}, type: :f32)
iex> cov = Scholar.Covariance.LedoitWolf.fit(x, assume_centered: true)
iex> cov.covariance
#Nx.Tensor<
f32[3][3]
[
[3.8574986457824707, 2.2048025131225586, 2.1504499912261963],
[2.2048025131225586, 2.4572863578796387, 1.7215262651443481],
[2.1504499912261963, 1.7215262651443481, 2.154898166656494]
]
>
"""

deftransform fit(x, opts \\ []) do
fit_n(x, NimbleOptions.validate!(opts, @opts_schema))
end

defnp fit_n(x, opts) do
{x, location} = center(x, opts)

{covariance, shrinkage} =
ledoit_wolf(x, opts)

%__MODULE__{
covariance: covariance,
shrinkage: shrinkage,
location: location
}
end

defnp center(x, opts) do
location =
if opts[:assume_centered] do
Nx.broadcast(0, {Nx.axis_size(x, 1)})
else
Nx.mean(x, axes: [0])
end

{x - Nx.broadcast(location, x), location}
norm4nn marked this conversation as resolved.
Show resolved Hide resolved
end

defnp ledoit_wolf(x, opts) do
case Nx.shape(x) do
{_n, 1} ->
{
Nx.pow(x, 2)
|> Nx.mean()
|> Nx.broadcast({1, 1}),
0.0
}
norm4nn marked this conversation as resolved.
Show resolved Hide resolved

_ ->
ledoit_wolf_complex(x, opts)
end
end

defnp ledoit_wolf_complex(x, opts) do
n_features = Nx.axis_size(x, 1)
shrinkage = ledoit_wolf_shrinkage(x, opts)
emp_cov = empirical_covariance(x, opts)
# git
norm4nn marked this conversation as resolved.
Show resolved Hide resolved
mu = Nx.sum(trace(emp_cov)) / n_features
shrunk_cov = (1.0 - shrinkage) * emp_cov
mask = Nx.iota(Nx.shape(shrunk_cov))
selector = Nx.remainder(mask, n_features + 1) == 0
shrunk_cov = Nx.select(selector, shrunk_cov + shrinkage * mu, shrunk_cov)
{shrunk_cov, shrinkage}
end

defnp empirical_covariance(x, _opts) do
x =
case Nx.shape(x) do
{n} -> Nx.broadcast(x, {n, 1})
_ -> x
end

n = Nx.axis_size(x, 0)

covariance =
Nx.transpose(x)
|> Nx.dot(x)
|> Nx.divide(n)
norm4nn marked this conversation as resolved.
Show resolved Hide resolved

case Nx.shape(covariance) do
{} -> Nx.reshape(covariance, {1, 1})
_ -> covariance
end
end

defnp trace(x) do
n = Nx.axis_size(x, 0)

Nx.eye(n)
|> Nx.multiply(x)
|> Nx.sum()
norm4nn marked this conversation as resolved.
Show resolved Hide resolved
end

defnp ledoit_wolf_shrinkage(x, opts) do
case Nx.shape(x) do
{_, 1} ->
0

{n} ->
Nx.broadcast(x, {1, n})
norm4nn marked this conversation as resolved.
Show resolved Hide resolved
|> ledoit_wolf_shrinkage_complex(opts)

_ ->
ledoit_wolf_shrinkage_complex(x, opts)
end
end

defnp ledoit_wolf_shrinkage_complex(x, opts) do
{n_samples, n_features} = Nx.shape(x)
{_, size} = Nx.type(x)
block_size = opts[:block_size]
n_splits = (n_features / block_size) |> Nx.floor() |> Nx.as_type({:s, size})

x2 = Nx.pow(x, 2)
emp_cov_trace = Nx.sum(x2, axes: [0]) / n_samples
mu = Nx.sum(emp_cov_trace) / n_features
beta = Nx.tensor(0.0, type: {:f, size})
delta = Nx.tensor(0.0, type: {:f, size})
norm4nn marked this conversation as resolved.
Show resolved Hide resolved
i = Nx.tensor(0)
block = Nx.iota({block_size})

if n_splits > 0 do
{beta, delta, _} =
while {beta, delta, {x, x2, block, i, n_splits}}, i < n_splits do
{block_size} = Nx.shape(block)
j = Nx.tensor(0)

{beta, delta, _} =
while {beta, delta, {x, x2, block, i, j, n_splits}}, j < n_splits do
{block_size} = Nx.shape(block)
rows_from = block_size * i
cols_from = block_size * j
x2_t = Nx.transpose(x2)
x_t = Nx.transpose(x)

to_beta =
Nx.slice_along_axis(x2_t, rows_from, block_size, axis: 0)
|> Nx.dot(Nx.slice_along_axis(x2, cols_from, block_size, axis: 1))
|> Nx.sum()
norm4nn marked this conversation as resolved.
Show resolved Hide resolved

beta = beta + to_beta

to_delta =
Nx.slice_along_axis(x_t, rows_from, block_size, axis: 0)
|> Nx.dot(Nx.slice_along_axis(x, cols_from, block_size, axis: 1))
|> Nx.pow(2)
|> Nx.sum()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Nx.slice_along_axis(x_t, rows_from, block_size, axis: 0)
|> Nx.dot(Nx.slice_along_axis(x, cols_from, block_size, axis: 1))
|> Nx.pow(2)
|> Nx.sum()
Nx.slice_along_axis(x, rows_from, block_size, axis: 1)
|> Nx.dot([0], Nx.slice_along_axis(x, cols_from, block_size, axis: 1), [0])
|> Nx.pow(2)
|> Nx.sum()

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this Nx.pow(2) here seems odd, mathematically speaking. is it correct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’ll double-check that, but at first glance - it looks correct.

Copy link
Contributor Author

@norm4nn norm4nn Nov 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@polvalente So, if we take the scikit-learn implementation as a reference, then it is correct. But I have to agree with you - this looks weird. I don't see a reason to implement it that way - I will simplify it in the next commit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will simplify this whole section significantly in the next commit. After longer investigation of the scikit-learn code, I found it highly overcomplicated for Scholar. Maybe in Python it has some reason to calculate beta and delta in the loop, but I don't believe this applies here.


delta = delta + to_delta

{beta, delta, {x, x2, block, i, j + 1, n_splits}}
end

rows_from = block_size * i
x2_t = Nx.transpose(x2)
x_t = Nx.transpose(x)
{m, n} = Nx.shape(x2)
mask = Nx.iota({1, n}) |> Nx.tile([m]) |> Nx.reshape({m, n})
mask = Nx.select(mask >= block_size * n_splits, 1, 0)

to_beta =
Nx.slice_along_axis(x2_t, rows_from, block_size, axis: 0)
|> Nx.dot(Nx.multiply(x2, mask))
|> Nx.sum()

beta = beta + to_beta

to_delta =
Nx.slice_along_axis(x_t, rows_from, block_size, axis: 0)
|> Nx.dot(Nx.multiply(x, mask))
|> Nx.pow(2)
|> Nx.sum()
norm4nn marked this conversation as resolved.
Show resolved Hide resolved

delta = delta + to_delta

{beta, delta, {x, x2, block, i + 1, n_splits}}
end

j = Nx.tensor(0)

{beta, delta, _} =
while {beta, delta, {x, x2, block, j, n_splits}}, j < n_splits do
{block_size} = Nx.shape(block)
cols_from = block_size * j
x2_t = Nx.transpose(x2)
x_t = Nx.transpose(x)
{rows, cols} = Nx.shape(x)

mask =
Nx.iota({1, cols}) |> Nx.tile([rows]) |> Nx.reshape({rows, cols}) |> Nx.transpose()

mask = Nx.select(mask >= block_size * n_splits, 1, 0)

to_beta =
Nx.multiply(x2_t, mask)
|> Nx.dot(Nx.slice_along_axis(x2, cols_from, block_size, axis: 1))
|> Nx.sum()

beta = beta + to_beta

to_delta =
Nx.multiply(x_t, mask)
|> Nx.dot(Nx.slice_along_axis(x, cols_from, block_size, axis: 1))
|> Nx.pow(2)
|> Nx.sum()
norm4nn marked this conversation as resolved.
Show resolved Hide resolved

delta = delta + to_delta
{beta, delta, {x, x2, block, j + 1, n_splits}}
end

{beta, delta}
else
{beta, delta}
end

x2_t = Nx.transpose(x2)
x_t = Nx.transpose(x)
{rows, cols} = Nx.shape(x)
mask = Nx.iota({1, cols}) |> Nx.tile([rows]) |> Nx.reshape({rows, cols})
mask = Nx.select(mask >= block_size * n_splits, 1, 0)
mask_t = Nx.transpose(mask)

to_delta =
Nx.multiply(x_t, mask_t)
|> Nx.dot(Nx.multiply(x, mask))
|> Nx.pow(2)
|> Nx.sum()

delta = delta + to_delta
delta = delta / Nx.pow(n_samples, 2)

to_beta =
Nx.multiply(x2_t, mask_t)
|> Nx.dot(Nx.multiply(x2, mask))
|> Nx.sum()
norm4nn marked this conversation as resolved.
Show resolved Hide resolved

beta = beta + to_beta
beta = 1.0 / (n_features * n_samples) * (beta / n_samples - delta)
delta = delta - 2.0 * mu * Nx.sum(emp_cov_trace) + n_features * Nx.pow(mu, 2)
delta = delta / n_features
beta = Nx.min(beta, delta)

case beta do
0 -> 0
_ -> beta / delta
end
end
end
Loading