Skip to content

Commit

Permalink
Added d2_pinball_score and d2_absolute_error_score (#277)
Browse files Browse the repository at this point in the history
* Added d2_absolute_error_score and d2_pinball_score

* Updateted multioutput desc

* run mix format

* run mix format

* Update lib/scholar/metrics/regression.ex

Co-authored-by: Mateusz Sluszniak <[email protected]>

* Removed repeating code

---------

Co-authored-by: Mateusz Sluszniak <[email protected]>
  • Loading branch information
norm4nn and msluszniak authored Jun 7, 2024
1 parent 32a5b56 commit 341301b
Showing 1 changed file with 237 additions and 0 deletions.
237 changes: 237 additions & 0 deletions lib/scholar/metrics/regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -642,4 +642,241 @@ defmodule Scholar.Metrics.Regression do
assert_rank!(y_true, 1)
assert_same_shape!(y_true, y_pred)
end

d2_absolute_error_score_opts = [
multioutput: [
type: {:in, [:raw_values, :uniform_average]},
default: :uniform_average,
doc: """
Defines aggregating of multiple output values.
Defaults to `:uniform_average`.
`:raw_values` :
Returns a full set of errors in case of multioutput input.
`:uniform_average` :
Errors of all outputs are averaged with uniform weight.
"""
]
]

@d2_absolute_error_score_schema NimbleOptions.new!(d2_absolute_error_score_opts)

@doc ~S"""
`D^2` regression score function, fraction of absolute error explained.
Best possible score is 1.0 and it can be negative (because the model can be
arbitrarily worse). A model that always uses the empirical median of `y_true`
as constant prediction, disregarding the input features,
gets a `D^2` score of 0.0.
## Options
#{NimbleOptions.docs(@d2_absolute_error_score_opts)}
## Return Values
It returns float or tensor of floats.
## Examples
iex> y_true = Nx.tensor([1, 2, 3])
iex> y_pred = Nx.tensor([1, 2, 3])
iex> Scholar.Metrics.Regression.d2_absolute_error_score(y_true, y_pred)
#Nx.Tensor<
f32
1.0
>
iex> y_true = Nx.tensor([1, 2, 3])
iex> y_pred = Nx.tensor([2, 2, 2])
iex> Scholar.Metrics.Regression.d2_absolute_error_score(y_true, y_pred)
#Nx.Tensor<
f32
0.0
>
iex> y_true = Nx.tensor([1, 2, 3])
iex> y_pred = Nx.tensor([3, 2, 1])
iex> Scholar.Metrics.Regression.d2_absolute_error_score(y_true, y_pred)
#Nx.Tensor<
f32
-1.0
>
iex> y_true = Nx.tensor([3, -0.5, 2, 7])
iex> y_pred = Nx.tensor([2.5, 0.0, 2, 8])
iex> Scholar.Metrics.Regression.d2_absolute_error_score(y_true, y_pred)
#Nx.Tensor<
f32
0.7647058963775635
>
iex> y_true = Nx.tensor([[0.5, 1], [-1, 1], [7, -6]])
iex> y_pred = Nx.tensor([[0, 2], [-1, 2], [8, -5]])
iex> Scholar.Metrics.Regression.d2_absolute_error_score(y_true, y_pred)
#Nx.Tensor<
f32
0.6919642686843872
>
iex> y_true = Nx.tensor([[0.5, 1], [-1, 1], [7, -6]])
iex> y_pred = Nx.tensor([[0, 2], [-1, 2], [8, -5]])
iex> Scholar.Metrics.Regression.d2_absolute_error_score(y_true, y_pred, multioutput: :raw_values)
#Nx.Tensor<
f32[2]
[0.8125, 0.5714285373687744]
>
"""

deftransform d2_absolute_error_score(y_true, y_pred, opts \\ []) do
d2_absolute_error_score_n(
y_true,
y_pred,
NimbleOptions.validate!(opts, @d2_absolute_error_score_schema)
)
end

defn d2_absolute_error_score_n(y_true, y_pred, opts \\ []) do
d2_pinball_score(y_true, y_pred, alpha: 0.5, multioutput: opts[:multioutput])
end

d2_pinball_score_opts = [
alpha: [
type: :float,
default: 0.5,
doc: """
The slope of the pinball loss, default=0.5,
This loss is equivalent to $$mean_absolute_error$$ when $$\alpha$$ is 0.5,
$$\alpha = 0.95$$ is minimized by estimators of the 95th percentile.
"""
],
multioutput: [
type: {:in, [:raw_values, :uniform_average]},
default: :uniform_average,
doc: """
Defines aggregating of multiple output values.
Defaults to `:uniform_average`.
`:raw_values` :
Returns a full set of errors in case of multioutput input.
`:uniform_average` :
Errors of all outputs are averaged with uniform weight.
"""
]
]

@d2_pinball_score_schema NimbleOptions.new!(d2_pinball_score_opts)

@doc ~S"""
`D^2` regression score function, fraction of pinball loss explained.
Best possible score is 1.0 and it can be negative (because the model can be
arbitrarily worse). A model that always uses the empirical alpha-quantile of
`y_true` as constant prediction, disregarding the input features,
gets a `D^2` score of 0.0.
## Options
#{NimbleOptions.docs(@d2_pinball_score_opts)}
## Return Values
It returns float or tensor of floats.
## Examples
iex> y_true = Nx.tensor([1, 2, 3])
iex> y_pred = Nx.tensor([1, 3, 3])
iex> Scholar.Metrics.Regression.d2_pinball_score(y_true, y_pred)
#Nx.Tensor<
f32
0.5
>
iex> Scholar.Metrics.Regression.d2_pinball_score(y_true, y_pred, alpha: 0.9)
#Nx.Tensor<
f32
0.7727271914482117
>
iex> Scholar.Metrics.Regression.d2_pinball_score(y_true, y_true, alpha: 0.1)
#Nx.Tensor<
f32
1.0
>
"""

deftransform d2_pinball_score(y_true, y_pred, opts \\ []) do
if Nx.size(y_pred) < 2 do
Nx.Constants.nan()
else
d2_pinball_score_n(y_true, y_pred, NimbleOptions.validate!(opts, @d2_pinball_score_schema))
end
end

defnp d2_pinball_score_n(y_true, y_pred, opts \\ []) do
alpha = opts[:alpha]
shape = Nx.shape(y_true)

m =
if Nx.rank(shape) == 1 do
shape |> elem(0)
else
shape |> elem(1)
end

numerator = mean_pinball_loss(y_true, y_pred, alpha: alpha, multioutput: :raw_values)

y_quantile = Nx.broadcast(quantile(y_true, alpha), shape)
denominator = mean_pinball_loss(y_true, y_quantile, alpha: alpha, multioutput: :raw_values)

nonzero_numerator = numerator != 0
nonzero_denominator = denominator != 0

valid_score = nonzero_numerator and nonzero_denominator
invalid_score = nonzero_numerator and not nonzero_denominator

output_scores = Nx.broadcast(1, {m})

output_scores =
Nx.select(valid_score, Nx.subtract(1, Nx.divide(numerator, denominator)), output_scores)

output_scores = Nx.select(invalid_score, 0.0, output_scores)

case opts[:multioutput] do
:uniform_average -> Nx.mean(output_scores)
:raw_values -> output_scores
end
end

defn quantile(tensor, q) do
rank = Nx.rank(tensor)
sorted_tensor = Nx.sort(tensor, axis: 0)
n = Nx.shape(sorted_tensor) |> elem(0)
float_indices = q * (n - 1)
indices = Nx.floor(float_indices) |> Nx.as_type(:s64)
next_indices = Nx.min(indices + 1, n - 1)

{values_at_indices, values_at_next_indices} =
if rank == 1 do
{
Nx.take(sorted_tensor, indices),
Nx.take(sorted_tensor, next_indices)
}
else
m = Nx.shape(sorted_tensor) |> elem(1)
indices_tensor = Nx.broadcast(indices, {n, m})
next_indices_tensor = Nx.broadcast(next_indices, {n, m})

{
Nx.take_along_axis(sorted_tensor, indices_tensor, axis: 0),
Nx.take_along_axis(sorted_tensor, next_indices_tensor, axis: 0)
}
end

weights = float_indices - Nx.floor(float_indices)

quantiles =
Nx.add(
Nx.multiply(values_at_indices, 1 - weights),
Nx.multiply(values_at_next_indices, weights)
)

Nx.squeeze(quantiles)
end
end

0 comments on commit 341301b

Please sign in to comment.