Skip to content

Commit

Permalink
Improvements to OrdinalEncoder, OneHotEncoder, NaiveBayes, LogisticRe…
Browse files Browse the repository at this point in the history
…gression (elixir-nx#293)

* Update

* mix format

* Remove Scholar.Preprocessing import from Scholar.Metrics.Classification

* mix format

* emove commented out code from NaiveBayes.Complement

* update docstrings

---------

Co-authored-by: Krsto Proroković <[email protected]>
  • Loading branch information
krstopro and Krsto Proroković authored Aug 1, 2024
1 parent e8a45a3 commit 7050d32
Show file tree
Hide file tree
Showing 8 changed files with 235 additions and 152 deletions.
15 changes: 11 additions & 4 deletions lib/scholar/linear/logistic_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -142,17 +142,23 @@ defmodule Scholar.Linear.LogisticRegression do
# Logistic Regression training loop

defnp fit_n(x, y, coef, bias, coef_optimizer_state, bias_optimizer_state, opts) do
num_samples = Nx.axis_size(x, 0)
iterations = opts[:iterations]
num_classes = opts[:num_classes]
optimizer_update_fn = opts[:optimizer_update_fn]
y = Scholar.Preprocessing.one_hot_encode(y, num_classes: num_classes)

y_one_hot =
y
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_classes})
|> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1))

{{final_coef, final_bias}, _} =
while {{coef, bias},
{x, iterations, y, coef_optimizer_state, bias_optimizer_state,
{x, iterations, y_one_hot, coef_optimizer_state, bias_optimizer_state,
has_converged = Nx.u8(0), iter = 0}},
iter < iterations and not has_converged do
{loss, {coef_grad, bias_grad}} = loss_and_grad(coef, bias, x, y)
{loss, {coef_grad, bias_grad}} = loss_and_grad(coef, bias, x, y_one_hot)

{coef_updates, coef_optimizer_state} =
optimizer_update_fn.(coef_grad, coef_optimizer_state, coef)
Expand All @@ -167,7 +173,8 @@ defmodule Scholar.Linear.LogisticRegression do
has_converged = Nx.sum(Nx.abs(loss)) < Nx.size(x) * opts[:eps]

{{coef, bias},
{x, iterations, y, coef_optimizer_state, bias_optimizer_state, has_converged, iter + 1}}
{x, iterations, y_one_hot, coef_optimizer_state, bias_optimizer_state, has_converged,
iter + 1}}
end

%__MODULE__{
Expand Down
18 changes: 11 additions & 7 deletions lib/scholar/metrics/classification.ex
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ defmodule Scholar.Metrics.Classification do

import Nx.Defn, except: [assert_shape: 2, assert_shape_pattern: 2]
import Scholar.Shared
import Scholar.Preprocessing
alias Scholar.Integrate

general_schema = [
Expand Down Expand Up @@ -1263,8 +1262,10 @@ defmodule Scholar.Metrics.Classification do
each class, from which the log loss is computed by averaging the negative log
of the probability forecasted for the true class over a number of samples.
`y_true` should contain `num_classes` unique values, and the sum of `y_prob`
along axis 1 should be 1 to respect the law of total probability.
`y_true` should be a tensor of shape {num_samples} containing values
between 0 and num_classes - 1 (inclusive).
`y_prob` should be a tensor of shape {num_samples, num_classes} containing
predicted probability distributions over classes for each sample.
## Options
Expand Down Expand Up @@ -1307,6 +1308,7 @@ defmodule Scholar.Metrics.Classification do
raise ArgumentError, "y_true and y_prob must have the same size along axis 0"
end

num_samples = Nx.size(y_true)
num_classes = opts[:num_classes]

if Nx.axis_size(y_prob, 1) != num_classes do
Expand All @@ -1320,14 +1322,16 @@ defmodule Scholar.Metrics.Classification do
type: to_float_type(y_prob)
)

y_true_onehot =
ordinal_encode(y_true, num_classes: num_classes)
|> one_hot_encode(num_classes: num_classes)
y_one_hot =
y_true
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_classes})
|> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1))

y_prob = Nx.clip(y_prob, 0, 1)

sample_loss =
Nx.multiply(y_true_onehot, y_prob)
Nx.multiply(y_one_hot, y_prob)
|> Nx.sum(axes: [-1])
|> Nx.log()
|> Nx.negate()
Expand Down
44 changes: 15 additions & 29 deletions lib/scholar/naive_bayes/complement.ex
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ defmodule Scholar.NaiveBayes.Complement do
Reference:
* [1] - [Paper about Complement Naive Bayes Algorithm](https://cdn.aaai.org/ICML/2003/ICML03-081.pdf)
* [1] [Tackling the Poor Assumptions of Naive Bayes Text Classifiers](https://cdn.aaai.org/ICML/2003/ICML03-081.pdf)
"""
import Nx.Defn
import Scholar.Shared
Expand Down Expand Up @@ -93,8 +93,9 @@ defmodule Scholar.NaiveBayes.Complement do
@opts_schema NimbleOptions.new!(opts_schema)

@doc """
The multinomial Naive Bayes classifier is suitable for classification with
discrete features (e.g., word counts for text classification)
Fits a complement naive Bayes classifier. The function assumes that the targets `y` are integers
between 0 and `num_classes` - 1 (inclusive). Otherwise, those samples will not
contribute to `class_count`.
## Options
Expand Down Expand Up @@ -340,35 +341,20 @@ defmodule Scholar.NaiveBayes.Complement do

classes_encoded = Nx.iota({num_classes})

classes =
y_one_hot =
y
|> Scholar.Preprocessing.ordinal_encode(num_classes: num_classes)
|> Scholar.Preprocessing.one_hot_encode(num_classes: num_classes)
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_classes})
|> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1))
|> Nx.as_type(x_type)

{_, classes_features} = classes_shape = Nx.shape(classes)

classes =
cond do
classes_features == 1 and num_classes == 2 ->
Nx.concatenate([1 - classes, classes], axis: 1)

classes_features == 1 and num_classes != 2 ->
Nx.broadcast(1.0, classes_shape)

true ->
classes
end

classes =
y_weighted =
if opts[:sample_weights_flag],
do: classes * Nx.reshape(sample_weights, {:auto, 1}),
else: classes

{_, n_classes} = Nx.shape(classes)
class_count = Nx.broadcast(Nx.tensor(0.0, type: x_type), {n_classes})
feature_count = Nx.broadcast(Nx.tensor(0.0, type: x_type), {n_classes, num_features})
feature_count = feature_count + Nx.dot(classes, [0], x, [0])
class_count = class_count + Nx.sum(classes, axes: [0])
do: Nx.reshape(sample_weights, {num_samples, 1}) * y_one_hot,
else: y_one_hot

class_count = Nx.sum(y_weighted, axes: [0])
feature_count = Nx.dot(y_weighted, [0], x, [0])
feature_all = Nx.sum(feature_count, axes: [0])
alpha = check_alpha(alpha, opts[:force_alpha], num_features)
complement_count = feature_all + alpha - feature_count
Expand Down
3 changes: 2 additions & 1 deletion lib/scholar/naive_bayes/multinomial.ex
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ defmodule Scholar.NaiveBayes.Multinomial do
@opts_schema NimbleOptions.new!(opts_schema)

@doc """
Fits a naive Bayes model. The function assumes that targets `y` are integers
Fits a naive Bayes model. The function assumes that the targets `y` are integers
between 0 and `num_classes` - 1 (inclusive). Otherwise, those samples will not
contribute to `class_count`.
Expand Down Expand Up @@ -239,6 +239,7 @@ defmodule Scholar.NaiveBayes.Multinomial do
y_one_hot =
y
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_classes})
|> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1))
|> Nx.as_type(type)

Expand Down
2 changes: 1 addition & 1 deletion lib/scholar/neighbors/large_vis.ex
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ defmodule Scholar.Neighbors.LargeVis do
],
num_iters: [
type: :non_neg_integer,
default: 1,
default: 3,
doc: "The number of times to perform neighborhood expansion."
],
key: [
Expand Down
12 changes: 6 additions & 6 deletions lib/scholar/preprocessing.ex
Original file line number Diff line number Diff line change
Expand Up @@ -145,14 +145,14 @@ defmodule Scholar.Preprocessing do
## Examples
iex> Scholar.Preprocessing.ordinal_encode(Nx.tensor([3, 2, 4, 56, 2, 4, 2]), num_classes: 4)
iex> Scholar.Preprocessing.ordinal_encode(Nx.tensor([3, 2, 4, 56, 2, 4, 2]))
#Nx.Tensor<
s64[7]
u64[7]
[1, 0, 2, 3, 0, 2, 0]
>
"""
defn ordinal_encode(tensor, opts \\ []) do
Scholar.Preprocessing.OrdinalEncoder.fit_transform(tensor, opts)
defn ordinal_encode(tensor) do
Scholar.Preprocessing.OrdinalEncoder.fit_transform(tensor)
end

@doc """
Expand All @@ -161,7 +161,7 @@ defmodule Scholar.Preprocessing do
## Examples
iex> Scholar.Preprocessing.one_hot_encode(Nx.tensor([2, 0, 3, 2, 1, 1, 0]), num_classes: 4)
iex> Scholar.Preprocessing.one_hot_encode(Nx.tensor([2, 0, 3, 2, 1, 1, 0]), num_categories: 4)
#Nx.Tensor<
u8[7][4]
[
Expand All @@ -175,7 +175,7 @@ defmodule Scholar.Preprocessing do
]
>
"""
defn one_hot_encode(tensor, opts \\ []) do
defn one_hot_encode(tensor, opts) do
Scholar.Preprocessing.OneHotEncoder.fit_transform(tensor, opts)
end

Expand Down
110 changes: 67 additions & 43 deletions lib/scholar/preprocessing/one_hot_encoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ defmodule Scholar.Preprocessing.OneHotEncoder do
"""
import Nx.Defn

@derive {Nx.Container, containers: [:encoder, :one_hot]}
defstruct [:encoder, :one_hot]
@derive {Nx.Container, containers: [:ordinal_encoder]}
defstruct [:ordinal_encoder]

encode_schema = [
num_classes: [
num_categories: [
required: true,
type: :pos_integer,
doc: """
Number of classes to be encoded.
The number of categories to be encoded.
"""
]
]
Expand All @@ -31,37 +31,32 @@ defmodule Scholar.Preprocessing.OneHotEncoder do
## Examples
iex> t = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> Scholar.Preprocessing.OneHotEncoder.fit(t, num_classes: 4)
iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> Scholar.Preprocessing.OneHotEncoder.fit(tensor, num_categories: 4)
%Scholar.Preprocessing.OneHotEncoder{
encoder: %Scholar.Preprocessing.OrdinalEncoder{
encoding_tensor: Nx.tensor(
[
[0, 2],
[1, 3],
[2, 4],
[3, 56]
]
ordinal_encoder: %Scholar.Preprocessing.OrdinalEncoder{
categories: Nx.tensor([2, 3, 4, 56]
)
},
one_hot: Nx.tensor(
[
[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]
], type: :u8
)
}
}
"""
deftransform fit(tensor, opts \\ []) do
fit_n(tensor, NimbleOptions.validate!(opts, @encode_schema))
deftransform fit(tensor, opts) do
if Nx.rank(tensor) != 1 do
raise ArgumentError,
"""
expected input tensor to have shape {num_samples}, \
got tensor with shape: #{inspect(Nx.shape(tensor))}
"""
end

opts = NimbleOptions.validate!(opts, @encode_schema)

fit_n(tensor, opts)
end

defnp fit_n(tensor, opts) do
encoder = Scholar.Preprocessing.OrdinalEncoder.fit(tensor, opts)
one_hot = Nx.iota({opts[:num_classes]}) == Nx.iota({opts[:num_classes], 1})
%__MODULE__{encoder: encoder, one_hot: one_hot}
ordinal_encoder = Scholar.Preprocessing.OrdinalEncoder.fit(tensor, opts)
%__MODULE__{ordinal_encoder: ordinal_encoder}
end

@doc """
Expand All @@ -70,9 +65,9 @@ defmodule Scholar.Preprocessing.OneHotEncoder do
## Examples
iex> t = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> encoder = Scholar.Preprocessing.OneHotEncoder.fit(t, num_classes: 4)
iex> Scholar.Preprocessing.OneHotEncoder.transform(encoder, t)
iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> encoder = Scholar.Preprocessing.OneHotEncoder.fit(tensor, num_categories: 4)
iex> Scholar.Preprocessing.OneHotEncoder.transform(encoder, tensor)
#Nx.Tensor<
u8[7][4]
[
Expand All @@ -86,8 +81,8 @@ defmodule Scholar.Preprocessing.OneHotEncoder do
]
>
iex> t = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> encoder = Scholar.Preprocessing.OneHotEncoder.fit(t, num_classes: 4)
iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> encoder = Scholar.Preprocessing.OneHotEncoder.fit(tensor, num_categories: 4)
iex> new_tensor = Nx.tensor([2, 3, 4, 3, 4, 56, 2])
iex> Scholar.Preprocessing.OneHotEncoder.transform(encoder, new_tensor)
#Nx.Tensor<
Expand All @@ -103,18 +98,27 @@ defmodule Scholar.Preprocessing.OneHotEncoder do
]
>
"""
defn transform(%__MODULE__{encoder: encoder, one_hot: one_hot}, tensor) do
decoded = Scholar.Preprocessing.OrdinalEncoder.transform(encoder, tensor)
Nx.take(one_hot, decoded)
defn transform(%__MODULE__{ordinal_encoder: ordinal_encoder}, tensor) do
num_categories = Nx.size(ordinal_encoder.categories)
num_samples = Nx.size(tensor)

encoded =
ordinal_encoder
|> Scholar.Preprocessing.OrdinalEncoder.transform(tensor)
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_categories})

encoded == Nx.iota({num_samples, num_categories}, axis: 1)
end

@doc """
Apply encoding on the provided tensor directly. It's equivalent to `fit/2` and then `transform/2` on the same data.
Appl
encoding on the provided tensor directly. It's equivalent to `fit/2` and then `transform/2` on the same data.
## Examples
iex> t = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> Scholar.Preprocessing.OneHotEncoder.fit_transform(t, num_classes: 4)
iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> Scholar.Preprocessing.OneHotEncoder.fit_transform(tensor, num_categories: 4)
#Nx.Tensor<
u8[7][4]
[
Expand All @@ -128,9 +132,29 @@ defmodule Scholar.Preprocessing.OneHotEncoder do
]
>
"""
defn fit_transform(tensor, opts \\ []) do
tensor
|> fit(opts)
|> transform(tensor)
deftransform fit_transform(tensor, opts) do
if Nx.rank(tensor) != 1 do
raise ArgumentError,
"""
expected input tensor to have shape {num_samples}, \
got tensor with shape: #{inspect(Nx.shape(tensor))}
"""
end

opts = NimbleOptions.validate!(opts, @encode_schema)
fit_transform_n(tensor, opts)
end

defnp fit_transform_n(tensor, opts) do
num_samples = Nx.size(tensor)
num_categories = opts[:num_categories]

encoded =
tensor
|> Scholar.Preprocessing.OrdinalEncoder.fit_transform()
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_categories})

encoded == Nx.iota({num_samples, num_categories}, axis: 1)
end
end
Loading

0 comments on commit 7050d32

Please sign in to comment.