Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to OrdinalEncoder, OneHotEncoder, NaiveBayes, LogisticRegression #293

Merged
merged 7 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions lib/scholar/linear/logistic_regression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -142,17 +142,23 @@ defmodule Scholar.Linear.LogisticRegression do
# Logistic Regression training loop

defnp fit_n(x, y, coef, bias, coef_optimizer_state, bias_optimizer_state, opts) do
num_samples = Nx.axis_size(x, 0)
iterations = opts[:iterations]
num_classes = opts[:num_classes]
optimizer_update_fn = opts[:optimizer_update_fn]
y = Scholar.Preprocessing.one_hot_encode(y, num_classes: num_classes)

y_one_hot =
y
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_classes})
|> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1))

{{final_coef, final_bias}, _} =
while {{coef, bias},
{x, iterations, y, coef_optimizer_state, bias_optimizer_state,
{x, iterations, y_one_hot, coef_optimizer_state, bias_optimizer_state,
has_converged = Nx.u8(0), iter = 0}},
iter < iterations and not has_converged do
{loss, {coef_grad, bias_grad}} = loss_and_grad(coef, bias, x, y)
{loss, {coef_grad, bias_grad}} = loss_and_grad(coef, bias, x, y_one_hot)

{coef_updates, coef_optimizer_state} =
optimizer_update_fn.(coef_grad, coef_optimizer_state, coef)
Expand All @@ -167,7 +173,8 @@ defmodule Scholar.Linear.LogisticRegression do
has_converged = Nx.sum(Nx.abs(loss)) < Nx.size(x) * opts[:eps]

{{coef, bias},
{x, iterations, y, coef_optimizer_state, bias_optimizer_state, has_converged, iter + 1}}
{x, iterations, y_one_hot, coef_optimizer_state, bias_optimizer_state, has_converged,
iter + 1}}
end

%__MODULE__{
Expand Down
18 changes: 11 additions & 7 deletions lib/scholar/metrics/classification.ex
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ defmodule Scholar.Metrics.Classification do

import Nx.Defn, except: [assert_shape: 2, assert_shape_pattern: 2]
import Scholar.Shared
import Scholar.Preprocessing
alias Scholar.Integrate

general_schema = [
Expand Down Expand Up @@ -1263,8 +1262,10 @@ defmodule Scholar.Metrics.Classification do
each class, from which the log loss is computed by averaging the negative log
of the probability forecasted for the true class over a number of samples.

`y_true` should contain `num_classes` unique values, and the sum of `y_prob`
along axis 1 should be 1 to respect the law of total probability.
`y_true` should be a tensor of shape {num_samples} containing values
between 0 and num_classes - 1 (inclusive).
`y_prob` should be a tensor of shape {num_samples, num_classes} containing
predicted probability distributions over classes for each sample.

## Options

Expand Down Expand Up @@ -1307,6 +1308,7 @@ defmodule Scholar.Metrics.Classification do
raise ArgumentError, "y_true and y_prob must have the same size along axis 0"
end

num_samples = Nx.size(y_true)
num_classes = opts[:num_classes]

if Nx.axis_size(y_prob, 1) != num_classes do
Expand All @@ -1320,14 +1322,16 @@ defmodule Scholar.Metrics.Classification do
type: to_float_type(y_prob)
)

y_true_onehot =
ordinal_encode(y_true, num_classes: num_classes)
|> one_hot_encode(num_classes: num_classes)
y_one_hot =
y_true
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_classes})
|> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1))

y_prob = Nx.clip(y_prob, 0, 1)

sample_loss =
Nx.multiply(y_true_onehot, y_prob)
Nx.multiply(y_one_hot, y_prob)
|> Nx.sum(axes: [-1])
|> Nx.log()
|> Nx.negate()
Expand Down
44 changes: 15 additions & 29 deletions lib/scholar/naive_bayes/complement.ex
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ defmodule Scholar.NaiveBayes.Complement do

Reference:

* [1] - [Paper about Complement Naive Bayes Algorithm](https://cdn.aaai.org/ICML/2003/ICML03-081.pdf)
* [1] [Tackling the Poor Assumptions of Naive Bayes Text Classifiers](https://cdn.aaai.org/ICML/2003/ICML03-081.pdf)
"""
import Nx.Defn
import Scholar.Shared
Expand Down Expand Up @@ -93,8 +93,9 @@ defmodule Scholar.NaiveBayes.Complement do
@opts_schema NimbleOptions.new!(opts_schema)

@doc """
The multinomial Naive Bayes classifier is suitable for classification with
discrete features (e.g., word counts for text classification)
Fits a complement naive Bayes classifier. The function assumes that the targets `y` are integers
between 0 and `num_classes` - 1 (inclusive). Otherwise, those samples will not
contribute to `class_count`.

## Options

Expand Down Expand Up @@ -340,35 +341,20 @@ defmodule Scholar.NaiveBayes.Complement do

classes_encoded = Nx.iota({num_classes})

classes =
y_one_hot =
y
|> Scholar.Preprocessing.ordinal_encode(num_classes: num_classes)
|> Scholar.Preprocessing.one_hot_encode(num_classes: num_classes)
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_classes})
|> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1))
|> Nx.as_type(x_type)

{_, classes_features} = classes_shape = Nx.shape(classes)

classes =
cond do
classes_features == 1 and num_classes == 2 ->
Nx.concatenate([1 - classes, classes], axis: 1)

classes_features == 1 and num_classes != 2 ->
Nx.broadcast(1.0, classes_shape)

true ->
classes
end

classes =
y_weighted =
if opts[:sample_weights_flag],
do: classes * Nx.reshape(sample_weights, {:auto, 1}),
else: classes

{_, n_classes} = Nx.shape(classes)
class_count = Nx.broadcast(Nx.tensor(0.0, type: x_type), {n_classes})
feature_count = Nx.broadcast(Nx.tensor(0.0, type: x_type), {n_classes, num_features})
feature_count = feature_count + Nx.dot(classes, [0], x, [0])
class_count = class_count + Nx.sum(classes, axes: [0])
do: Nx.reshape(sample_weights, {num_samples, 1}) * y_one_hot,
else: y_one_hot

class_count = Nx.sum(y_weighted, axes: [0])
feature_count = Nx.dot(y_weighted, [0], x, [0])
feature_all = Nx.sum(feature_count, axes: [0])
alpha = check_alpha(alpha, opts[:force_alpha], num_features)
complement_count = feature_all + alpha - feature_count
Expand Down
3 changes: 2 additions & 1 deletion lib/scholar/naive_bayes/multinomial.ex
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ defmodule Scholar.NaiveBayes.Multinomial do
@opts_schema NimbleOptions.new!(opts_schema)

@doc """
Fits a naive Bayes model. The function assumes that targets `y` are integers
Fits a naive Bayes model. The function assumes that the targets `y` are integers
between 0 and `num_classes` - 1 (inclusive). Otherwise, those samples will not
contribute to `class_count`.

Expand Down Expand Up @@ -239,6 +239,7 @@ defmodule Scholar.NaiveBayes.Multinomial do
y_one_hot =
y
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_classes})
|> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1))
|> Nx.as_type(type)

Expand Down
2 changes: 1 addition & 1 deletion lib/scholar/neighbors/large_vis.ex
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ defmodule Scholar.Neighbors.LargeVis do
],
num_iters: [
type: :non_neg_integer,
default: 1,
default: 3,
doc: "The number of times to perform neighborhood expansion."
],
key: [
Expand Down
12 changes: 6 additions & 6 deletions lib/scholar/preprocessing.ex
Original file line number Diff line number Diff line change
Expand Up @@ -145,14 +145,14 @@ defmodule Scholar.Preprocessing do

## Examples

iex> Scholar.Preprocessing.ordinal_encode(Nx.tensor([3, 2, 4, 56, 2, 4, 2]), num_classes: 4)
iex> Scholar.Preprocessing.ordinal_encode(Nx.tensor([3, 2, 4, 56, 2, 4, 2]))
#Nx.Tensor<
s64[7]
u64[7]
[1, 0, 2, 3, 0, 2, 0]
>
"""
defn ordinal_encode(tensor, opts \\ []) do
Scholar.Preprocessing.OrdinalEncoder.fit_transform(tensor, opts)
defn ordinal_encode(tensor) do
Scholar.Preprocessing.OrdinalEncoder.fit_transform(tensor)
end

@doc """
Expand All @@ -161,7 +161,7 @@ defmodule Scholar.Preprocessing do

## Examples

iex> Scholar.Preprocessing.one_hot_encode(Nx.tensor([2, 0, 3, 2, 1, 1, 0]), num_classes: 4)
iex> Scholar.Preprocessing.one_hot_encode(Nx.tensor([2, 0, 3, 2, 1, 1, 0]), num_categories: 4)
#Nx.Tensor<
u8[7][4]
[
Expand All @@ -175,7 +175,7 @@ defmodule Scholar.Preprocessing do
]
>
"""
defn one_hot_encode(tensor, opts \\ []) do
defn one_hot_encode(tensor, opts) do
Scholar.Preprocessing.OneHotEncoder.fit_transform(tensor, opts)
end

Expand Down
110 changes: 67 additions & 43 deletions lib/scholar/preprocessing/one_hot_encoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ defmodule Scholar.Preprocessing.OneHotEncoder do
"""
import Nx.Defn

@derive {Nx.Container, containers: [:encoder, :one_hot]}
defstruct [:encoder, :one_hot]
@derive {Nx.Container, containers: [:ordinal_encoder]}
defstruct [:ordinal_encoder]

encode_schema = [
num_classes: [
num_categories: [
required: true,
type: :pos_integer,
doc: """
Number of classes to be encoded.
The number of categories to be encoded.
"""
]
]
Expand All @@ -31,37 +31,32 @@ defmodule Scholar.Preprocessing.OneHotEncoder do

## Examples

iex> t = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> Scholar.Preprocessing.OneHotEncoder.fit(t, num_classes: 4)
iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> Scholar.Preprocessing.OneHotEncoder.fit(tensor, num_categories: 4)
%Scholar.Preprocessing.OneHotEncoder{
encoder: %Scholar.Preprocessing.OrdinalEncoder{
encoding_tensor: Nx.tensor(
[
[0, 2],
[1, 3],
[2, 4],
[3, 56]
]
ordinal_encoder: %Scholar.Preprocessing.OrdinalEncoder{
categories: Nx.tensor([2, 3, 4, 56]
)
},
one_hot: Nx.tensor(
[
[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]
], type: :u8
)
}
}
"""
deftransform fit(tensor, opts \\ []) do
fit_n(tensor, NimbleOptions.validate!(opts, @encode_schema))
deftransform fit(tensor, opts) do
if Nx.rank(tensor) != 1 do
raise ArgumentError,
"""
expected input tensor to have shape {num_samples}, \
got tensor with shape: #{inspect(Nx.shape(tensor))}
"""
end

opts = NimbleOptions.validate!(opts, @encode_schema)

fit_n(tensor, opts)
end

defnp fit_n(tensor, opts) do
encoder = Scholar.Preprocessing.OrdinalEncoder.fit(tensor, opts)
one_hot = Nx.iota({opts[:num_classes]}) == Nx.iota({opts[:num_classes], 1})
%__MODULE__{encoder: encoder, one_hot: one_hot}
ordinal_encoder = Scholar.Preprocessing.OrdinalEncoder.fit(tensor, opts)
%__MODULE__{ordinal_encoder: ordinal_encoder}
end

@doc """
Expand All @@ -70,9 +65,9 @@ defmodule Scholar.Preprocessing.OneHotEncoder do

## Examples

iex> t = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> encoder = Scholar.Preprocessing.OneHotEncoder.fit(t, num_classes: 4)
iex> Scholar.Preprocessing.OneHotEncoder.transform(encoder, t)
iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> encoder = Scholar.Preprocessing.OneHotEncoder.fit(tensor, num_categories: 4)
iex> Scholar.Preprocessing.OneHotEncoder.transform(encoder, tensor)
#Nx.Tensor<
u8[7][4]
[
Expand All @@ -86,8 +81,8 @@ defmodule Scholar.Preprocessing.OneHotEncoder do
]
>

iex> t = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> encoder = Scholar.Preprocessing.OneHotEncoder.fit(t, num_classes: 4)
iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> encoder = Scholar.Preprocessing.OneHotEncoder.fit(tensor, num_categories: 4)
iex> new_tensor = Nx.tensor([2, 3, 4, 3, 4, 56, 2])
iex> Scholar.Preprocessing.OneHotEncoder.transform(encoder, new_tensor)
#Nx.Tensor<
Expand All @@ -103,18 +98,27 @@ defmodule Scholar.Preprocessing.OneHotEncoder do
]
>
"""
defn transform(%__MODULE__{encoder: encoder, one_hot: one_hot}, tensor) do
decoded = Scholar.Preprocessing.OrdinalEncoder.transform(encoder, tensor)
Nx.take(one_hot, decoded)
defn transform(%__MODULE__{ordinal_encoder: ordinal_encoder}, tensor) do
num_categories = Nx.size(ordinal_encoder.categories)
num_samples = Nx.size(tensor)

encoded =
ordinal_encoder
|> Scholar.Preprocessing.OrdinalEncoder.transform(tensor)
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_categories})

encoded == Nx.iota({num_samples, num_categories}, axis: 1)
end

@doc """
Apply encoding on the provided tensor directly. It's equivalent to `fit/2` and then `transform/2` on the same data.
Appl
encoding on the provided tensor directly. It's equivalent to `fit/2` and then `transform/2` on the same data.

## Examples

iex> t = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> Scholar.Preprocessing.OneHotEncoder.fit_transform(t, num_classes: 4)
iex> tensor = Nx.tensor([3, 2, 4, 56, 2, 4, 2])
iex> Scholar.Preprocessing.OneHotEncoder.fit_transform(tensor, num_categories: 4)
#Nx.Tensor<
u8[7][4]
[
Expand All @@ -128,9 +132,29 @@ defmodule Scholar.Preprocessing.OneHotEncoder do
]
>
"""
defn fit_transform(tensor, opts \\ []) do
tensor
|> fit(opts)
|> transform(tensor)
deftransform fit_transform(tensor, opts) do
if Nx.rank(tensor) != 1 do
raise ArgumentError,
"""
expected input tensor to have shape {num_samples}, \
got tensor with shape: #{inspect(Nx.shape(tensor))}
"""
end

opts = NimbleOptions.validate!(opts, @encode_schema)
fit_transform_n(tensor, opts)
end

defnp fit_transform_n(tensor, opts) do
num_samples = Nx.size(tensor)
num_categories = opts[:num_categories]

encoded =
tensor
|> Scholar.Preprocessing.OrdinalEncoder.fit_transform()
|> Nx.new_axis(1)
|> Nx.broadcast({num_samples, num_categories})

encoded == Nx.iota({num_samples, num_categories}, axis: 1)
end
end
Loading
Loading