Skip to content

Commit

Permalink
Add other non-encoding preprocessing utilities as separate modules (e…
Browse files Browse the repository at this point in the history
…lixir-nx#222)

* Add other non-encoding preprocessing utilities as separate modules

* Delete test/scholar/preprocessing/normalizer_test.exs

* Delete lib/scholar/preprocessing/normalizer.ex

* Format

* Remove redundant module attribute
  • Loading branch information
msluszniak authored Dec 20, 2023
1 parent 6c60968 commit 00f3de3
Show file tree
Hide file tree
Showing 5 changed files with 367 additions and 73 deletions.
79 changes: 6 additions & 73 deletions lib/scholar/preprocessing.ex
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,6 @@ defmodule Scholar.Preprocessing do
]
]

min_max_schema =
general_schema ++
[
min: [
type: {:or, [:integer, :float]},
default: 0,
doc: """
The lower boundary of the desired range of transformed data.
"""
],
max: [
type: {:or, [:integer, :float]},
default: 1,
doc: """
The upper boundary of the desired range of transformed data.
"""
]
]

normalize_schema =
general_schema ++
[
Expand Down Expand Up @@ -75,8 +56,6 @@ defmodule Scholar.Preprocessing do
]
]

@general_schema NimbleOptions.new!(general_schema)
@min_max_schema NimbleOptions.new!(min_max_schema)
@normalize_schema NimbleOptions.new!(normalize_schema)
@binarize_schema NimbleOptions.new!(binarize_schema)
@encode_schema NimbleOptions.new!(encode_schema)
Expand All @@ -101,11 +80,9 @@ defmodule Scholar.Preprocessing do
end

@doc """
Scales a tensor by dividing each sample in batch by maximum absolute value in the batch
It is a shortcut for `Scholar.Preprocessing.MaxAbsScaler.fit_transform/2`.
See `Scholar.Preprocessing.MaxAbsScaler` for more information.
## Options
#{NimbleOptions.docs(@general_schema)}
## Examples
Expand Down Expand Up @@ -133,20 +110,12 @@ defmodule Scholar.Preprocessing do
>
"""
deftransform max_abs_scale(tensor, opts \\ []) do
max_abs_scale_n(tensor, NimbleOptions.validate!(opts, @general_schema))
end

defnp max_abs_scale_n(tensor, opts) do
max_abs = Nx.abs(tensor) |> Nx.reduce_max(axes: opts[:axes], keep_axes: true)
tensor / Nx.select(max_abs == 0, 1, max_abs)
Scholar.Preprocessing.MaxAbsScaler.fit_transform(tensor, opts)
end

@doc """
Transform a tensor by scaling each batch to the given range.
## Options
#{NimbleOptions.docs(@min_max_schema)}
It is a shortcut for `Scholar.Preprocessing.MinMaxScaler.fit_transform/2`.
See `Scholar.Preprocessing.MinMaxScaler` for more information.
## Examples
Expand All @@ -156,50 +125,14 @@ defmodule Scholar.Preprocessing do
[0.0, 0.5, 1.0]
>
iex> Scholar.Preprocessing.min_max_scale(Nx.tensor([[1, -1, 2], [3, 0, 0], [0, 1, -1], [2, 3, 1]]), axes: [0])
#Nx.Tensor<
f32[4][3]
[
[0.3333333432674408, 0.0, 1.0],
[1.0, 0.25, 0.3333333432674408],
[0.0, 0.5, 0.0],
[0.6666666865348816, 1.0, 0.6666666865348816]
]
>
iex> Scholar.Preprocessing.min_max_scale(Nx.tensor([[1, -1, 2], [3, 0, 0], [0, 1, -1], [2, 3, 1]]), axes: [0], min: 1, max: 3)
#Nx.Tensor<
f32[4][3]
[
[1.6666667461395264, 1.0, 3.0],
[3.0, 1.5, 1.6666667461395264],
[1.0, 2.0, 1.0],
[2.3333334922790527, 3.0, 2.3333334922790527]
]
>
iex> Scholar.Preprocessing.min_max_scale(42)
#Nx.Tensor<
f32
0.0
>
"""
deftransform min_max_scale(tensor, opts \\ []) do
min_max_scale_n(tensor, NimbleOptions.validate!(opts, @min_max_schema))
end

defnp min_max_scale_n(tensor, opts) do
if opts[:max] <= opts[:min] do
raise ArgumentError,
"expected :max to be greater than :min"
else
reduced_max = Nx.reduce_max(tensor, axes: opts[:axes], keep_axes: true)
reduced_min = Nx.reduce_min(tensor, axes: opts[:axes], keep_axes: true)
denominator = reduced_max - reduced_min
denominator = Nx.select(denominator == 0, 1, denominator)
x_std = (tensor - reduced_min) / denominator
x_std * (opts[:max] - opts[:min]) + opts[:min]
end
Scholar.Preprocessing.MinMaxScaler.fit_transform(tensor, opts)
end

@doc """
Expand Down
119 changes: 119 additions & 0 deletions lib/scholar/preprocessing/max_abs_scaler.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
defmodule Scholar.Preprocessing.MaxAbsScaler do
@moduledoc """
Scales a tensor by dividing each sample in batch by maximum absolute value in the batch
Centering and scaling happen independently on each feature by computing the relevant
statistics on the samples in the training set. Maximum absolute value then is
stored to be used on new samples.
"""

import Nx.Defn

@derive {Nx.Container, containers: [:max_abs]}
defstruct [:max_abs]

opts_schema = [
axes: [
type: {:custom, Scholar.Options, :axes, []},
doc: """
Axes to calculate the max absolute value over. By default the absolute values
are calculated between the whole tensors.
"""
]
]

@opts_schema NimbleOptions.new!(opts_schema)

@doc """
Compute the maximum absolute value of samples to be used for later scaling.
## Options
#{NimbleOptions.docs(@opts_schema)}
## Return values
Returns a struct with the following parameters:
* `max_abs`: the calculated maximum absolute value of samples.
## Examples
iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
iex> Scholar.Preprocessing.MaxAbsScaler.fit(t)
%Scholar.Preprocessing.MaxAbsScaler{
max_abs: Nx.tensor(
[
[2]
]
)
}
"""
deftransform fit(tensor, opts \\ []) do
fit_n(tensor, NimbleOptions.validate!(opts, @opts_schema))
end

defnp fit_n(tensor, opts) do
max_abs =
Nx.abs(tensor)
|> Nx.reduce_max(axes: opts[:axes], keep_axes: true)

max_abs = Nx.select(max_abs == 0, 1, max_abs)

%__MODULE__{max_abs: max_abs}
end

@doc """
Performs the standardization of the tensor using a fitted scaler.
## Examples
iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
iex> scaler = Scholar.Preprocessing.MaxAbsScaler.fit(t)
iex> Scholar.Preprocessing.MaxAbsScaler.transform(scaler, t)
#Nx.Tensor<
f32[3][3]
[
[0.5, -0.5, 1.0],
[1.0, 0.0, 0.0],
[0.0, 0.5, -0.5]
]
>
iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
iex> scaler = Scholar.Preprocessing.MaxAbsScaler.fit(t)
iex> new_tensor = Nx.tensor([[0.5, 1, -1], [0.3, 0.8, -1.6]])
iex> Scholar.Preprocessing.MaxAbsScaler.transform(scaler, new_tensor)
#Nx.Tensor<
f32[2][3]
[
[0.25, 0.5, -0.5],
[0.15000000596046448, 0.4000000059604645, -0.800000011920929]
]
>
"""
defn transform(%__MODULE__{max_abs: max_abs}, tensor) do
tensor / max_abs
end

@doc """
Standardizes the tensor by removing the mean and scaling to unit variance.
## Examples
iex> t = Nx.tensor([[1, -1, 2], [2, 0, 0], [0, 1, -1]])
iex> Scholar.Preprocessing.MaxAbsScaler.fit_transform(t)
#Nx.Tensor<
f32[3][3]
[
[0.5, -0.5, 1.0],
[1.0, 0.0, 0.0],
[0.0, 0.5, -0.5]
]
>
"""
defn fit_transform(tensor, opts \\ []) do
tensor
|> fit(opts)
|> transform(tensor)
end
end
Loading

0 comments on commit 00f3de3

Please sign in to comment.