Skip to content

Commit

Permalink
Merge branch 'improve_linear_interpolation' of github.com:msluszniak/…
Browse files Browse the repository at this point in the history
…scholar into improve_linear_interpolation
  • Loading branch information
msluszniak committed Oct 17, 2023
2 parents 03f4707 + cff206b commit 384d415
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 50 deletions.
28 changes: 25 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
<h1><img src="./images/scholar.png" alt="Scholar" width="400"></h1>
<p align="center">
<img src="https://github.com/elixir-nx/scholar/raw/main/images/scholar.png" alt="Scholar" width="400">
<br />
<a href="https://hexdocs.pm/scholar"><img src="http://img.shields.io/badge/hex.pm-docs-green.svg?style=flat" title="Documentation" /></a>
<a href="https://hex.pm/packages/scholar"><img src="https://img.shields.io/hexpm/v/scholar.svg" title="Package" /></a>
</p>

[![Documentation](http://img.shields.io/badge/hex.pm-docs-green.svg?style=flat)](https://hexdocs.pm/scholar)
[![Package](https://img.shields.io/hexpm/v/scholar.svg)](https://hex.pm/packages/scholar)
<br />

Traditional machine learning tools built on top of Nx. Scholar implements
several algorithms for classification, regression, clustering, dimensionality
Expand Down Expand Up @@ -45,6 +49,15 @@ config :nx, :default_backend, EXLA.Backend
config :nx, :default_defn_options, [compiler: EXLA, client: :host]
```

> #### JIT required! {: .warning}
>
> It is important you set the `default_defn_options` as shown in the snippet above,
> as many algorithms in Scholar use loops which are much more memory efficient when
> JIT compiled.
>
> If for some reason you cannot set a default `defn` compiler, you can explicitly
> JIT any function, for example: `EXLA.jit(&Scholar.Cluster.AffinityPropagation.fit/1)`.
### Notebooks

To use Scholar inside code notebooks, run:
Expand All @@ -60,6 +73,15 @@ Nx.global_default_backend(EXLA.Backend)
Nx.Defn.global_default_options(compiler: EXLA, client: :host)
```

> #### JIT required! {: .warning}
>
> It is important you set the `Nx.Defn.global_default_options/1` as shown in the snippet
> above, as many algorithms in Scholar use loops which are much more memory efficient
> when JIT compiled.
>
> If for some reason you cannot set a default `defn` compiler, you can explicitly
> JIT any function, for example: `EXLA.jit(&Scholar.Cluster.AffinityPropagation.fit/1)`.
## License

Copyright (c) 2022 The Machine Learning Working Group of the Erlang Ecosystem Foundation
Expand Down
11 changes: 0 additions & 11 deletions lib/scholar.ex

This file was deleted.

1 change: 0 additions & 1 deletion lib/scholar/interpolation/linear.ex
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ defmodule Scholar.Interpolation.Linear do
]

@opts_schema NimbleOptions.new!(opts_schema)

@doc """
Fits a linear interpolation of the given `(x, y)` points
Expand Down
2 changes: 1 addition & 1 deletion lib/scholar/linear/isotonic_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ defmodule Scholar.Linear.IsotonicRegression do
y_thresholds: Nx.Tensor.t(),
increasing: Nx.Tensor.t(),
cutoff_index: Nx.Tensor.t(),
preprocess: Tuple.t() | Scholar.Interpolation.Linear.t()
preprocess: tuple() | Scholar.Interpolation.Linear.t()
}

opts = [
Expand Down
163 changes: 131 additions & 32 deletions lib/scholar/metrics/classification.ex
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,13 @@ defmodule Scholar.Metrics.Classification do
* `:micro` - Calculate metrics globally by counting the total true positives,
false negatives and false positives.
* `:none` - The f1 scores for each class are returned.
* `:none` - The F-score values for each class are returned.
"""
]
]

fbeta_score_schema = f1_score_schema

confusion_matrix_schema =
general_schema ++
[
Expand Down Expand Up @@ -163,6 +165,7 @@ defmodule Scholar.Metrics.Classification do
@confusion_matrix_schema NimbleOptions.new!(confusion_matrix_schema)
@balanced_accuracy_schema NimbleOptions.new!(balanced_accuracy_schema)
@cohen_kappa_schema NimbleOptions.new!(cohen_kappa_schema)
@fbeta_score_schema NimbleOptions.new!(fbeta_score_schema)
@f1_score_schema NimbleOptions.new!(f1_score_schema)
@brier_score_loss_schema NimbleOptions.new!(brier_score_loss_schema)
@accuracy_schema NimbleOptions.new!(accuracy_schema)
Expand Down Expand Up @@ -586,86 +589,182 @@ defmodule Scholar.Metrics.Classification do
end

@doc """
Calculates F1 score given rank-1 tensors which represent
Calculates F-beta score given rank-1 tensors which represent
the expected (`y_true`) and predicted (`y_pred`) classes.
If all examples are true negatives, then the result is 0 to
avoid zero division.
## Options
#{NimbleOptions.docs(@f1_score_schema)}
#{NimbleOptions.docs(@fbeta_score_schema)}
## Examples
iex> y_true = Nx.tensor([0, 1, 1, 1, 1, 0, 2, 1, 0, 1], type: :u32)
iex> y_pred = Nx.tensor([0, 2, 1, 1, 2, 2, 2, 0, 0, 1], type: :u32)
iex> Scholar.Metrics.Classification.f1_score(y_true, y_pred, num_classes: 3)
iex> Scholar.Metrics.Classification.fbeta_score(y_true, y_pred, Nx.u32(1), num_classes: 3)
#Nx.Tensor<
f32[3]
[0.6666666865348816, 0.6666666865348816, 0.4000000059604645]
>
iex> Scholar.Metrics.Classification.f1_score(y_true, y_pred, num_classes: 3, average: :macro)
iex> Scholar.Metrics.Classification.fbeta_score(y_true, y_pred, Nx.u32(2), num_classes: 3)
#Nx.Tensor<
f32[3]
[0.6666666865348816, 0.5555555820465088, 0.625]
>
iex> Scholar.Metrics.Classification.fbeta_score(y_true, y_pred, Nx.f32(0.5), num_classes: 3)
#Nx.Tensor<
f32[3]
[0.6666666865348816, 0.8333333134651184, 0.29411765933036804]
>
iex> Scholar.Metrics.Classification.fbeta_score(y_true, y_pred, Nx.u32(2), num_classes: 3, average: :macro)
#Nx.Tensor<
f32
0.5777778029441833
0.6157407760620117
>
iex> Scholar.Metrics.Classification.f1_score(y_true, y_pred, num_classes: 3, average: :weighted)
iex> Scholar.Metrics.Classification.fbeta_score(y_true, y_pred, Nx.u32(2), num_classes: 3, average: :weighted)
#Nx.Tensor<
f32
0.6399999856948853
0.5958333611488342
>
iex> Scholar.Metrics.Classification.f1_score(y_true, y_pred, num_classes: 3, average: :micro)
iex> Scholar.Metrics.Classification.fbeta_score(y_true, y_pred, Nx.f32(0.5), num_classes: 3, average: :micro)
#Nx.Tensor<
f32
0.6000000238418579
>
iex> Scholar.Metrics.Classification.f1_score(Nx.tensor([1,0,1,0]), Nx.tensor([0, 1, 0, 1]), num_classes: 2, average: :none)
iex> Scholar.Metrics.Classification.fbeta_score(Nx.tensor([1, 0, 1, 0]), Nx.tensor([0, 1, 0, 1]), Nx.tensor(0.5), num_classes: 2, average: :none)
#Nx.Tensor<
f32[2]
[0.0, 0.0]
>
iex> Scholar.Metrics.Classification.fbeta_score(Nx.tensor([1, 0, 1, 0]), Nx.tensor([0, 1, 0, 1]), 0.5, num_classes: 2, average: :none)
#Nx.Tensor<
f32[2]
[0.0, 0.0]
>
"""
deftransform f1_score(y_true, y_pred, opts \\ []) do
f1_score_n(y_true, y_pred, NimbleOptions.validate!(opts, @f1_score_schema))
deftransform fbeta_score(y_true, y_pred, beta, opts \\ []) do
fbeta_score_n(y_true, y_pred, beta, NimbleOptions.validate!(opts, @fbeta_score_schema))
end

defnp f1_score_n(y_true, y_pred, opts) do
defnp fbeta_score_n(y_true, y_pred, beta, opts) do
check_shape(y_pred, y_true)
num_classes = check_num_classes(opts[:num_classes])
average = opts[:average]

case opts[:average] do
{_precision, _recall, per_class_fscore} =
precision_recall_fscore_n(y_true, y_pred, beta, num_classes, average)

per_class_fscore
end

defnp fbeta_score_v(confusion_matrix, average) do
true_positive = Nx.take_diagonal(confusion_matrix)
false_positive = Nx.sum(confusion_matrix, axes: [0]) - true_positive
false_negative = Nx.sum(confusion_matrix, axes: [1]) - true_positive

case average do
:micro ->
accuracy(y_true, y_pred)
true_positive = Nx.sum(true_positive)
false_positive = Nx.sum(false_positive)
false_negative = Nx.sum(false_negative)

{true_positive, false_positive, false_negative}

_ ->
cm = confusion_matrix(y_true, y_pred, num_classes: num_classes)
true_positive = Nx.take_diagonal(cm)
false_positive = Nx.sum(cm, axes: [0]) - true_positive
false_negative = Nx.sum(cm, axes: [1]) - true_positive
{true_positive, false_positive, false_negative}
end
end

precision = safe_division(true_positive, true_positive + false_positive)
defnp precision_recall_fscore_n(y_true, y_pred, beta, num_classes, average) do
confusion_matrix = confusion_matrix(y_true, y_pred, num_classes: num_classes)
{true_positive, false_positive, false_negative} = fbeta_score_v(confusion_matrix, average)

recall = safe_division(true_positive, true_positive + false_negative)
precision = safe_division(true_positive, true_positive + false_positive)
recall = safe_division(true_positive, true_positive + false_negative)

per_class_f1 = safe_division(2 * precision * recall, precision + recall)
per_class_fscore =
cond do
# Should only be +Inf
Nx.is_infinity(beta) ->
recall

case opts[:average] do
:none ->
per_class_f1
beta == 0 ->
precision

:macro ->
Nx.mean(per_class_f1)
true ->
beta2 = Nx.pow(beta, 2)
safe_division((1 + beta2) * precision * recall, beta2 * precision + recall)
end

:weighted ->
support = (y_true == Nx.iota({num_classes, 1})) |> Nx.sum(axes: [1])
case average do
:none ->
{precision, recall, per_class_fscore}

safe_division(per_class_f1 * support, Nx.sum(support))
|> Nx.sum()
end
:micro ->
{precision, recall, per_class_fscore}

:macro ->
{precision, recall, Nx.mean(per_class_fscore)}

:weighted ->
support = (y_true == Nx.iota({num_classes, 1})) |> Nx.sum(axes: [1])

per_class_fscore =
(per_class_fscore * support)
|> safe_division(Nx.sum(support))
|> Nx.sum()

{precision, recall, per_class_fscore}
end
end

@doc """
Calculates F1 score given rank-1 tensors which represent
the expected (`y_true`) and predicted (`y_pred`) classes.
If all examples are true negatives, then the result is 0 to
avoid zero division.
## Options
#{NimbleOptions.docs(@f1_score_schema)}
## Examples
iex> y_true = Nx.tensor([0, 1, 1, 1, 1, 0, 2, 1, 0, 1], type: :u32)
iex> y_pred = Nx.tensor([0, 2, 1, 1, 2, 2, 2, 0, 0, 1], type: :u32)
iex> Scholar.Metrics.Classification.f1_score(y_true, y_pred, num_classes: 3)
#Nx.Tensor<
f32[3]
[0.6666666865348816, 0.6666666865348816, 0.4000000059604645]
>
iex> Scholar.Metrics.Classification.f1_score(y_true, y_pred, num_classes: 3, average: :macro)
#Nx.Tensor<
f32
0.5777778029441833
>
iex> Scholar.Metrics.Classification.f1_score(y_true, y_pred, num_classes: 3, average: :weighted)
#Nx.Tensor<
f32
0.6399999856948853
>
iex> Scholar.Metrics.Classification.f1_score(y_true, y_pred, num_classes: 3, average: :micro)
#Nx.Tensor<
f32
0.6000000238418579
>
iex> Scholar.Metrics.Classification.f1_score(Nx.tensor([1, 0, 1, 0]), Nx.tensor([0, 1, 0, 1]), num_classes: 2, average: :none)
#Nx.Tensor<
f32[2]
[0.0, 0.0]
>
"""
deftransform f1_score(y_true, y_pred, opts \\ []) do
fbeta_score_n(y_true, y_pred, 1, NimbleOptions.validate!(opts, @f1_score_schema))
end

@doc """
Zero-one classification loss.
Expand Down
3 changes: 2 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,12 @@ defmodule Scholar.MixProject do

defp docs do
[
main: "Scholar",
main: "readme",
source_url: @source_url,
logo: "images/scholar_simplified.png",
extra_section: "Guides",
extras: [
"README.md",
"notebooks/linear_regression.livemd",
"notebooks/k_means.livemd",
"notebooks/k_nearest_neighbors.livemd",
Expand Down
2 changes: 1 addition & 1 deletion mix.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%{
"complex": {:hex, :complex, "0.5.0", "af2d2331ff6170b61bb738695e481b27a66780e18763e066ee2cd863d0b1dd92", [:mix], [], "hexpm", "2683bd3c184466cfb94fad74cbfddfaa94b860e27ad4ca1bffe3bff169d91ef1"},
"earmark_parser": {:hex, :earmark_parser, "1.4.33", "3c3fd9673bb5dcc9edc28dd90f50c87ce506d1f71b70e3de69aa8154bc695d44", [:mix], [], "hexpm", "2d526833729b59b9fdb85785078697c72ac5e5066350663e5be6a1182da61b8f"},
"earmark_parser": {:hex, :earmark_parser, "1.4.37", "2ad73550e27c8946648b06905a57e4d454e4d7229c2dafa72a0348c99d8be5f7", [:mix], [], "hexpm", "6b19783f2802f039806f375610faa22da130b8edc21209d0bff47918bb48360e"},
"elixir_make": {:hex, :elixir_make, "0.7.7", "7128c60c2476019ed978210c245badf08b03dbec4f24d05790ef791da11aa17c", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}], "hexpm", "5bc19fff950fad52bbe5f211b12db9ec82c6b34a9647da0c2224b8b8464c7e6c"},
"ex_doc": {:hex, :ex_doc, "0.30.6", "5f8b54854b240a2b55c9734c4b1d0dd7bdd41f71a095d42a70445c03cf05a281", [:mix], [{:earmark_parser, "~> 1.4.31", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "bd48f2ddacf4e482c727f9293d9498e0881597eae6ddc3d9562bd7923375109f"},
"exla": {:hex, :exla, "0.6.0", "af63e45ce41ad25630967923147d14292a0cc48e507b8a3cf3bf3d5483099a28", [:make, :mix], [{:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nx, "~> 0.6.0", [hex: :nx, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:xla, "~> 0.5.0", [hex: :xla, repo: "hexpm", optional: false]}], "hexpm", "5f6a4a105ea9ab207b9aa4de5a294730e2bfe9639f4b8d37a7c00da131090d7a"},
Expand Down
20 changes: 20 additions & 0 deletions test/scholar/metrics/classification_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,24 @@ defmodule Scholar.Metrics.ClassificationTest do
assert_all_close(tpr, Nx.tensor([0.0, 0.5, 1.0, 1.0]))
assert_all_close(thresholds, Nx.tensor([1.3, 0.3, 0.2, 0.1]))
end

describe "fbeta_score" do
test "equals recall when beta is infinity" do
beta = Nx.tensor(:infinity)
y_true = Nx.tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], type: :u32)
y_pred = Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], type: :u32)
fbeta_scores = Classification.fbeta_score(y_true, y_pred, beta, num_classes: 2)

assert_all_close(fbeta_scores, Classification.recall(y_true, y_pred, num_classes: 2))
end

test "equals precision when beta is 0" do
beta = 0
y_true = Nx.tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], type: :u32)
y_pred = Nx.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], type: :u32)
fbeta_scores = Classification.fbeta_score(y_true, y_pred, beta, num_classes: 2)

assert_all_close(fbeta_scores, Classification.precision(y_true, y_pred, num_classes: 2))
end
end
end

0 comments on commit 384d415

Please sign in to comment.