From e5a87eedecb51e64b4490887370b1fcb6a904324 Mon Sep 17 00:00:00 2001 From: NeTT Date: Mon, 2 Sep 2024 20:10:48 +0530 Subject: [PATCH 01/15] add RMSProp optimizer --- crates/core-gpu/src/types.rs | 8 ++++ crates/core/src/cpu/optimizers/mod.rs | 11 ++++- crates/core/src/cpu/optimizers/rmsprop.rs | 55 +++++++++++++++++++++++ crates/core/src/types.rs | 16 +++++++ examples/classification/iris.ts | 18 ++++---- src/core/api/optimizer.ts | 14 +++++- src/core/types.ts | 1 + 7 files changed, 112 insertions(+), 11 deletions(-) create mode 100644 crates/core/src/cpu/optimizers/rmsprop.rs diff --git a/crates/core-gpu/src/types.rs b/crates/core-gpu/src/types.rs index 86eb87a..d2dd6a6 100644 --- a/crates/core-gpu/src/types.rs +++ b/crates/core-gpu/src/types.rs @@ -133,12 +133,20 @@ pub struct AdamOptimizer { pub epsilon: f32, } +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(rename_all = "lowercase")] +pub struct RMSPropOptimizer { + pub decay_rate: f32, + pub epsilon: f32, +} + #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(tag = "type", content = "config")] #[serde(rename_all = "lowercase")] pub enum Optimizer { SGD, Adam(AdamOptimizer), + RMSProp(RMSPropOptimizer), } #[derive(Serialize, Deserialize, Debug, Clone)] diff --git a/crates/core/src/cpu/optimizers/mod.rs b/crates/core/src/cpu/optimizers/mod.rs index de0dbe4..24aec72 100644 --- a/crates/core/src/cpu/optimizers/mod.rs +++ b/crates/core/src/cpu/optimizers/mod.rs @@ -1,8 +1,10 @@ mod adam; mod sgd; +mod rmsprop; -pub use adam::*; use ndarray::{ArrayViewD, ArrayViewMutD}; +pub use adam::*; +pub use rmsprop::*; pub use sgd::*; use crate::{CPULayer, CPUScheduler, Optimizer}; @@ -10,6 +12,7 @@ use crate::{CPULayer, CPUScheduler, Optimizer}; pub enum CPUOptimizer { SGD(CPUSGDOptimizer), Adam(CPUAdamOptimizer), + RMSProp(CPURMSPropOptimizer), } impl CPUOptimizer { @@ -24,6 +27,9 @@ impl CPUOptimizer { Optimizer::SGD => CPUOptimizer::SGD(CPUSGDOptimizer::new()), Optimizer::Adam(config) => { CPUOptimizer::Adam(CPUAdamOptimizer::new(config, all_params)) + }, + Optimizer::RMSProp(config) => { + CPUOptimizer::RMSProp(CPURMSPropOptimizer::new(config, all_params)) } } } @@ -49,6 +55,9 @@ impl CPUOptimizer { CPUOptimizer::Adam(adam) => { adam.update_grads(params, grads, idx, scheduler, rate) } + CPUOptimizer::RMSProp(rmsprop) => { + rmsprop.update_grads(params, grads, idx, scheduler, rate, epoch) + } } idx += 1; } diff --git a/crates/core/src/cpu/optimizers/rmsprop.rs b/crates/core/src/cpu/optimizers/rmsprop.rs new file mode 100644 index 0000000..3b11cb6 --- /dev/null +++ b/crates/core/src/cpu/optimizers/rmsprop.rs @@ -0,0 +1,55 @@ +use std::ops::{Add, Div, Mul, SubAssign}; + +use ndarray::{ArrayD, ArrayViewD, ArrayViewMutD}; + +use crate::{CPUScheduler, RMSPropOptimizer}; + +pub struct CPURMSPropOptimizer { + decay_rate: f32, + epsilon: f32, + acc_sg: Vec>>, +} + +impl CPURMSPropOptimizer { + pub fn new(config: RMSPropOptimizer, params: Vec>>) -> Self { + let mut acc_sg = Vec::new(); + for params in params { + acc_sg.push( + params + .iter() + .map(|param| ArrayD::zeros(param.dim())) + .collect(), + ); + } + Self { + acc_sg, + decay_rate: config.decay_rate, + epsilon: config.epsilon, + } + } + + pub fn update_grads( + &mut self, + mut params: Vec>, + grads: Vec>, + idx: usize, + scheduler: &CPUScheduler, + rate: f32, + epoch: usize, + ) { + for (j, (param, grad)) in params.iter_mut().zip(grads).enumerate() { + self.acc_sg[idx][j] = self + .decay_rate + .mul(&self.acc_sg[idx][j]) + .add((1.0 - self.decay_rate).mul(&grad.map(|x| x.powi(2)))); + + let rate = scheduler.eta(rate, epoch); + + param.sub_assign( + &rate + .mul(&grad) + .div(self.acc_sg[idx][j].map(|x| x.sqrt()).add(self.epsilon)) + ) + } + } +} diff --git a/crates/core/src/types.rs b/crates/core/src/types.rs index 278f280..1edc436 100644 --- a/crates/core/src/types.rs +++ b/crates/core/src/types.rs @@ -133,12 +133,20 @@ pub struct AdamOptimizer { pub epsilon: f32, } +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(rename_all = "camelCase")] +pub struct RMSPropOptimizer { + pub decay_rate: f32, + pub epsilon: f32, +} + #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(tag = "type", content = "config")] #[serde(rename_all = "lowercase")] pub enum Optimizer { SGD, Adam(AdamOptimizer), + RMSProp(RMSPropOptimizer), } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -183,3 +191,11 @@ pub struct PredictOptions { pub output_shape: Vec, pub layers: Option>, } + +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(rename_all = "camelCase")] +pub struct RegularizeOptions { + pub c: f32, + pub l1_ratio: f32, +} + diff --git a/examples/classification/iris.ts b/examples/classification/iris.ts index c9e262d..a2e0036 100644 --- a/examples/classification/iris.ts +++ b/examples/classification/iris.ts @@ -5,9 +5,11 @@ import { DenseLayer, OneCycle, ReluLayer, + RMSPropOptimizer, Sequential, setupBackend, SoftmaxLayer, + Tensor, tensor, tensor2D, } from "../../mod.ts"; @@ -22,7 +24,7 @@ import { Matrix, // Split the dataset useSplit, -} from "https://deno.land/x/vectorizer@v0.3.7/mod.ts"; +} from "jsr:@lala/appraisal@0.7.3"; // Read the training dataset const _data = Deno.readTextFileSync("examples/classification/iris.csv"); @@ -40,7 +42,7 @@ const y = encoder.fit(y_pre).transform(y_pre, "f32"); // @ts-ignore Matrices can be split const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y) as [ [typeof x, typeof y], - [typeof x, typeof y], + [typeof x, typeof y] ]; // Setup the CPU backend for Netsaur @@ -68,7 +70,7 @@ const net = new Sequential({ // A Softmax activation layer SoftmaxLayer(), ], - optimizer: AdamOptimizer(), + optimizer: RMSPropOptimizer(), // We are using CrossEntropy for finding cost cost: Cost.CrossEntropy, scheduler: OneCycle({ max_rate: 0.05, step_size: 50 }), @@ -81,13 +83,13 @@ net.train( [ { inputs: tensor2D(train[0]), - outputs: tensor(train[1].data as Float32Array, train[1].shape), + outputs: tensor(train[1]), }, ], // Train for 300 epochs 400, 1, - 0.02, + 0.02 ); console.log(`training time: ${performance.now() - time}ms`); @@ -95,9 +97,7 @@ console.log(`training time: ${performance.now() - time}ms`); // Calculate metrics const res = await net.predict(tensor2D(test[0])); const y1 = encoder.untransform( - CategoricalEncoder.fromSoftmax( - new Matrix(res.data, [res.shape[0], res.shape[1]]), - ), + CategoricalEncoder.fromSoftmax(res as Tensor<2>) ); const y0 = encoder.untransform(test[1]); @@ -106,5 +106,5 @@ const cMatrix = new ClassificationReport(y0, y1); console.log(cMatrix); console.log( "Total Accuracy: ", - y1.filter((x, i) => x === y0[i]).length / y1.length, + y1.filter((x, i) => x === y0[i]).length / y1.length ); diff --git a/src/core/api/optimizer.ts b/src/core/api/optimizer.ts index dd6efd0..1b36f47 100644 --- a/src/core/api/optimizer.ts +++ b/src/core/api/optimizer.ts @@ -2,7 +2,8 @@ import { OptimizerType } from "../types.ts"; export type Optimizer = | { type: OptimizerType.SGD } - | { type: OptimizerType.Adam; config: AdamOptimizerConfig }; + | { type: OptimizerType.Adam; config: AdamOptimizerConfig } + | { type: OptimizerType.RMSProp; config: RMSPropOptimizerConfig }; export type AdamOptimizerConfig = { beta1?: number; @@ -10,6 +11,11 @@ export type AdamOptimizerConfig = { epsilon?: number; }; +export type RMSPropOptimizerConfig = { + decayRate?: number; + epsilon?: number; +}; + export function SGDOptimizer(): Optimizer { return { type: OptimizerType.SGD }; } @@ -20,3 +26,9 @@ export function AdamOptimizer(config: AdamOptimizerConfig = {}): Optimizer { config.epsilon = config.epsilon || 1e-8; return { type: OptimizerType.Adam, config }; } + +export function RMSPropOptimizer(config: RMSPropOptimizerConfig = {}): Optimizer { + config.decayRate = config.decayRate || 0.9; + config.epsilon = config.epsilon || 1e-8; + return { type: OptimizerType.RMSProp, config }; +} diff --git a/src/core/types.ts b/src/core/types.ts index 6a1fb2f..df8f85a 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -149,6 +149,7 @@ export enum Cost { export enum OptimizerType { SGD = "sgd", Adam = "adam", + RMSProp = "rmsprop" } export enum SchedulerType { From ffd601fb5526e20dd16c2786702b979370c77346 Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 08:25:55 +0530 Subject: [PATCH 02/15] add regularizer to dense layer --- crates/core/src/cpu/layers/dense.rs | 14 +++++- crates/core/src/cpu/mod.rs | 2 + crates/core/src/cpu/optimizers/adam.rs | 8 ++-- crates/core/src/cpu/optimizers/mod.rs | 27 ++++++++++-- crates/core/src/cpu/optimizers/rmsprop.rs | 6 ++- crates/core/src/cpu/optimizers/sgd.rs | 5 ++- crates/core/src/cpu/regularizer.rs | 53 +++++++++++++++++++++++ crates/core/src/types.rs | 12 +++++ 8 files changed, 116 insertions(+), 11 deletions(-) create mode 100644 crates/core/src/cpu/regularizer.rs diff --git a/crates/core/src/cpu/layers/dense.rs b/crates/core/src/cpu/layers/dense.rs index 1813b7e..d908c7f 100644 --- a/crates/core/src/cpu/layers/dense.rs +++ b/crates/core/src/cpu/layers/dense.rs @@ -1,7 +1,7 @@ use ndarray::{Array1, Array2, ArrayD, Axis, Dimension, Ix1, Ix2, IxDyn}; use std::ops::Add; -use crate::{CPUInit, DenseLayer, Init, Tensors}; +use crate::{CPUInit, CPURegularizer, DenseLayer, Init, Tensors}; pub struct DenseCPULayer { // cache @@ -15,6 +15,12 @@ pub struct DenseCPULayer { // gradients pub d_weights: Array2, pub d_biases: Array1, + + // regularization + pub l_weights: Array2, + pub l_biases: Array1, + + pub regularizer: CPURegularizer, } impl DenseCPULayer { @@ -39,6 +45,9 @@ impl DenseCPULayer { biases: biases.into_dimensionality::().unwrap(), d_weights: Array2::zeros(weight_size), d_biases: Array1::zeros(config.size[0]), + l_weights: Array2::zeros(weight_size), + l_biases: Array1::zeros(config.size[0]), + regularizer: CPURegularizer::from(config.c, config.l1_ratio) } } @@ -66,6 +75,9 @@ impl DenseCPULayer { inputs_t.swap_axes(0, 1); self.d_weights = inputs_t.dot(&d_outputs); self.d_biases = d_outputs.sum_axis(Axis(0)); + + self.l_weights = self.regularizer.coeff(&self.weights.into_dyn()).into_dimensionality::().unwrap(); + self.l_biases = self.regularizer.coeff(&self.biases.into_dyn()).into_dimensionality::().unwrap(); d_inputs.into_dyn() } } diff --git a/crates/core/src/cpu/mod.rs b/crates/core/src/cpu/mod.rs index e12191b..462bcb8 100644 --- a/crates/core/src/cpu/mod.rs +++ b/crates/core/src/cpu/mod.rs @@ -5,6 +5,7 @@ mod init; mod layers; mod optimizers; mod schedulers; +mod regularizer; pub use activation::*; pub use backend::*; @@ -13,3 +14,4 @@ pub use init::*; pub use layers::*; pub use optimizers::*; pub use schedulers::*; +pub use regularizer::*; \ No newline at end of file diff --git a/crates/core/src/cpu/optimizers/adam.rs b/crates/core/src/cpu/optimizers/adam.rs index 26958b0..f115b79 100644 --- a/crates/core/src/cpu/optimizers/adam.rs +++ b/crates/core/src/cpu/optimizers/adam.rs @@ -1,4 +1,4 @@ -use std::ops::{Add, Div, Mul, SubAssign}; +use std::ops::{Add, Div, Mul, SubAssign, Sub}; use ndarray::{ArrayD, ArrayViewD, ArrayViewMutD}; @@ -48,8 +48,9 @@ impl CPUAdamOptimizer { idx: usize, scheduler: &CPUScheduler, rate: f32, + l: Vec>, ) { - for (j, (param, grad)) in params.iter_mut().zip(grads).enumerate() { + for (j, ((param, grad), li)) in params.iter_mut().zip(grads).zip(l).enumerate() { self.m[idx][j] = self .beta1 .mul(&self.m[idx][j]) @@ -67,7 +68,8 @@ impl CPUAdamOptimizer { param.sub_assign( &rate .mul(m_hat) - .div(v_hat.map(|x| x.sqrt()).add(self.epsilon)), + .div(v_hat.map(|x| x.sqrt()).add(self.epsilon)) + .sub(&li), ) } } diff --git a/crates/core/src/cpu/optimizers/mod.rs b/crates/core/src/cpu/optimizers/mod.rs index 24aec72..56b8de3 100644 --- a/crates/core/src/cpu/optimizers/mod.rs +++ b/crates/core/src/cpu/optimizers/mod.rs @@ -2,7 +2,7 @@ mod adam; mod sgd; mod rmsprop; -use ndarray::{ArrayViewD, ArrayViewMutD}; +use ndarray::{ArrayD, ArrayViewD, ArrayViewMutD}; pub use adam::*; pub use rmsprop::*; pub use sgd::*; @@ -40,6 +40,7 @@ impl CPUOptimizer { scheduler: &CPUScheduler, rate: f32, epoch: usize, + l: ArrayD, ) { match self { CPUOptimizer::Adam(adam) => adam.t += 1.0, @@ -50,7 +51,7 @@ impl CPUOptimizer { if let Some((params, grads)) = CPUOptimizer::get_params(layer) { match self { CPUOptimizer::SGD(sgd) => { - sgd.update_grads(params, grads, scheduler, rate, epoch) + sgd.update_grads(params, grads, scheduler, rate, epoch, l) } CPUOptimizer::Adam(adam) => { adam.update_grads(params, grads, idx, scheduler, rate) @@ -66,7 +67,7 @@ impl CPUOptimizer { pub fn get_params<'a>( layer: &'a mut CPULayer, - ) -> Option<(Vec>, Vec>)> { + ) -> Option<(Vec>, Vec>, Vec>)> { match layer { CPULayer::Dense(layer) => Some(( vec![ @@ -77,6 +78,10 @@ impl CPUOptimizer { layer.d_weights.view().into_dyn(), layer.d_biases.view().into_dyn(), ], + vec![ + layer.l_weights.view().into_dyn(), + layer.l_biases.view().into_dyn(), + ] )), CPULayer::Conv2D(layer) => Some(( vec![ @@ -87,6 +92,10 @@ impl CPUOptimizer { layer.d_weights.view().into_dyn(), layer.d_biases.view().into_dyn(), ], + vec![ + ArrayD::zeros(layer.d_weights.shape()).view(), + ArrayD::zeros(layer.d_biases.shape()).view(), + ] )), CPULayer::ConvTranspose2D(layer) => Some(( vec![ @@ -97,6 +106,10 @@ impl CPUOptimizer { layer.d_weights.view().into_dyn(), layer.d_biases.view().into_dyn(), ], + vec![ + ArrayD::zeros(layer.d_weights.shape()).view(), + ArrayD::zeros(layer.d_biases.shape()).view(), + ] )), CPULayer::BatchNorm1D(layer) => Some(( vec![ @@ -107,6 +120,10 @@ impl CPUOptimizer { layer.d_gamma.view().into_dyn(), layer.d_beta.view().into_dyn(), ], + vec![ + ArrayD::zeros(layer.d_gamma.shape()).view(), + ArrayD::zeros(layer.d_gamma.shape()).view(), + ] )), CPULayer::BatchNorm2D(layer) => Some(( vec![ @@ -117,6 +134,10 @@ impl CPUOptimizer { layer.d_gamma.view().into_dyn(), layer.d_beta.view().into_dyn(), ], + vec![ + ArrayD::zeros(layer.d_gamma.shape()).view(), + ArrayD::zeros(layer.d_gamma.shape()).view(), + ] )), _ => return None, } diff --git a/crates/core/src/cpu/optimizers/rmsprop.rs b/crates/core/src/cpu/optimizers/rmsprop.rs index 3b11cb6..615516e 100644 --- a/crates/core/src/cpu/optimizers/rmsprop.rs +++ b/crates/core/src/cpu/optimizers/rmsprop.rs @@ -1,4 +1,4 @@ -use std::ops::{Add, Div, Mul, SubAssign}; +use std::ops::{Add, Div, Mul, SubAssign, Sub}; use ndarray::{ArrayD, ArrayViewD, ArrayViewMutD}; @@ -36,8 +36,9 @@ impl CPURMSPropOptimizer { scheduler: &CPUScheduler, rate: f32, epoch: usize, + l: Vec>, ) { - for (j, (param, grad)) in params.iter_mut().zip(grads).enumerate() { + for (j, ((param, grad), li)) in params.iter_mut().zip(grads).zip(l).enumerate() { self.acc_sg[idx][j] = self .decay_rate .mul(&self.acc_sg[idx][j]) @@ -49,6 +50,7 @@ impl CPURMSPropOptimizer { &rate .mul(&grad) .div(self.acc_sg[idx][j].map(|x| x.sqrt()).add(self.epsilon)) + .sub(&li), ) } } diff --git a/crates/core/src/cpu/optimizers/sgd.rs b/crates/core/src/cpu/optimizers/sgd.rs index 8903be9..dfca9e6 100644 --- a/crates/core/src/cpu/optimizers/sgd.rs +++ b/crates/core/src/cpu/optimizers/sgd.rs @@ -18,10 +18,11 @@ impl CPUSGDOptimizer { scheduler: &CPUScheduler, rate: f32, epoch: usize, + l: Vec>, ) { let eta = scheduler.eta(rate, epoch); - for (param, grad) in params.iter_mut().zip(grads) { - param.sub_assign(&grad.mul(eta)); + for ((param, grad), li) in params.iter_mut().zip(grads).zip(l) { + param.sub_assign(&(&grad - &li).mul(eta)); } } } diff --git a/crates/core/src/cpu/regularizer.rs b/crates/core/src/cpu/regularizer.rs new file mode 100644 index 0000000..d0a2e97 --- /dev/null +++ b/crates/core/src/cpu/regularizer.rs @@ -0,0 +1,53 @@ +use ndarray::ArrayD; + +pub struct CPURegularizer { + l1_strength: f32, + l2_strength: f32, +} + +impl CPURegularizer { + pub fn from(c: f32, l1_ratio: f32) -> Self { + if c == 0.0 { + return CPURegularizer { + l1_strength: 0.0, + l2_strength: 0.0 + } + } + let strength = 1.0 / c; + if l1_ratio == 1.0 { + CPURegularizer { + l1_strength: strength, + l2_strength: 0.0, + } + } else if l1_ratio == 0.0 { + CPURegularizer { + l1_strength: 0.0, + l2_strength: strength, + } + } else { + let l1_strength = strength * l1_ratio; + let l2_strength = strength - l1_strength; + CPURegularizer { + l1_strength, + l2_strength, + } + } + } + pub fn l1_coeff(&self, x: &ArrayD) -> ArrayD { + if self.l1_strength == 0.0 { + ArrayD::zeros(x.shape()) + } else { + self.l1_strength * x.map(|w| w.abs()) + } + } + pub fn l2_coeff(&self, x: &ArrayD) -> ArrayD { + if self.l2_strength == 0.0 { + ArrayD::zeros(x.shape()) + } else { + self.l2_strength * x.map(|w| w * w) + } + } + pub fn coeff(&self, x: &ArrayD) -> ArrayD { + self.l1_coeff(x) + self.l2_coeff(x) + } +} diff --git a/crates/core/src/types.rs b/crates/core/src/types.rs index 1edc436..0d53322 100644 --- a/crates/core/src/types.rs +++ b/crates/core/src/types.rs @@ -54,9 +54,12 @@ pub struct JSTensor { } #[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(rename_all = "camelCase")] pub struct DenseLayer { pub size: Vec, pub init: Option, + pub c: f32, + pub l1_ratio: f32, } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -133,6 +136,14 @@ pub struct AdamOptimizer { pub epsilon: f32, } +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(rename_all = "lowercase")] +pub struct NadamOptimizer { + pub beta1: f32, + pub beta2: f32, + pub epsilon: f32, +} + #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(rename_all = "camelCase")] pub struct RMSPropOptimizer { @@ -146,6 +157,7 @@ pub struct RMSPropOptimizer { pub enum Optimizer { SGD, Adam(AdamOptimizer), + Nadam(NadamOptimizer), RMSProp(RMSPropOptimizer), } From 64d6eba31f9ea4dd4534630ae2d26be81ed4db51 Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 09:08:49 +0530 Subject: [PATCH 03/15] add nadam --- crates/core/src/cpu/optimizers/mod.rs | 36 +++++++----- crates/core/src/cpu/optimizers/nadam.rs | 78 +++++++++++++++++++++++++ crates/core/src/types.rs | 4 ++ 3 files changed, 104 insertions(+), 14 deletions(-) create mode 100644 crates/core/src/cpu/optimizers/nadam.rs diff --git a/crates/core/src/cpu/optimizers/mod.rs b/crates/core/src/cpu/optimizers/mod.rs index 56b8de3..d463174 100644 --- a/crates/core/src/cpu/optimizers/mod.rs +++ b/crates/core/src/cpu/optimizers/mod.rs @@ -1,9 +1,11 @@ mod adam; +mod nadam; mod sgd; mod rmsprop; -use ndarray::{ArrayD, ArrayViewD, ArrayViewMutD}; +use ndarray::{ArrayViewD, ArrayViewMutD}; pub use adam::*; +pub use nadam::*; pub use rmsprop::*; pub use sgd::*; @@ -12,6 +14,7 @@ use crate::{CPULayer, CPUScheduler, Optimizer}; pub enum CPUOptimizer { SGD(CPUSGDOptimizer), Adam(CPUAdamOptimizer), + Nadam(CPUNadamOptimizer), RMSProp(CPURMSPropOptimizer), } @@ -19,7 +22,7 @@ impl CPUOptimizer { pub fn from(optimizer: Optimizer, layers: &mut Vec) -> Self { let mut all_params = Vec::new(); for layer in layers { - if let Some((params, _)) = CPUOptimizer::get_params(layer) { + if let Some((params, _, _)) = CPUOptimizer::get_params(layer) { all_params.push(params) } } @@ -28,6 +31,9 @@ impl CPUOptimizer { Optimizer::Adam(config) => { CPUOptimizer::Adam(CPUAdamOptimizer::new(config, all_params)) }, + Optimizer::Nadam(config) => { + CPUOptimizer::Nadam(CPUNadamOptimizer::new(config, all_params)) + }, Optimizer::RMSProp(config) => { CPUOptimizer::RMSProp(CPURMSPropOptimizer::new(config, all_params)) } @@ -40,7 +46,6 @@ impl CPUOptimizer { scheduler: &CPUScheduler, rate: f32, epoch: usize, - l: ArrayD, ) { match self { CPUOptimizer::Adam(adam) => adam.t += 1.0, @@ -48,16 +53,19 @@ impl CPUOptimizer { } let mut idx = 0; for layer in layers.iter_mut() { - if let Some((params, grads)) = CPUOptimizer::get_params(layer) { + if let Some((params, grads, l)) = CPUOptimizer::get_params(layer) { match self { CPUOptimizer::SGD(sgd) => { sgd.update_grads(params, grads, scheduler, rate, epoch, l) } CPUOptimizer::Adam(adam) => { - adam.update_grads(params, grads, idx, scheduler, rate) + adam.update_grads(params, grads, idx, scheduler, rate, l) + } + CPUOptimizer::Nadam(nadam) => { + nadam.update_grads(params, grads, idx, scheduler, rate, l) } CPUOptimizer::RMSProp(rmsprop) => { - rmsprop.update_grads(params, grads, idx, scheduler, rate, epoch) + rmsprop.update_grads(params, grads, idx, scheduler, rate, epoch, l) } } idx += 1; @@ -93,8 +101,8 @@ impl CPUOptimizer { layer.d_biases.view().into_dyn(), ], vec![ - ArrayD::zeros(layer.d_weights.shape()).view(), - ArrayD::zeros(layer.d_biases.shape()).view(), + layer.l_weights.view().into_dyn(), + layer.l_biases.view().into_dyn(), ] )), CPULayer::ConvTranspose2D(layer) => Some(( @@ -107,8 +115,8 @@ impl CPUOptimizer { layer.d_biases.view().into_dyn(), ], vec![ - ArrayD::zeros(layer.d_weights.shape()).view(), - ArrayD::zeros(layer.d_biases.shape()).view(), + layer.l_weights.view().into_dyn(), + layer.l_biases.view().into_dyn(), ] )), CPULayer::BatchNorm1D(layer) => Some(( @@ -121,8 +129,8 @@ impl CPUOptimizer { layer.d_beta.view().into_dyn(), ], vec![ - ArrayD::zeros(layer.d_gamma.shape()).view(), - ArrayD::zeros(layer.d_gamma.shape()).view(), + layer.l_gamma.view().into_dyn(), + layer.l_beta.view().into_dyn(), ] )), CPULayer::BatchNorm2D(layer) => Some(( @@ -135,8 +143,8 @@ impl CPUOptimizer { layer.d_beta.view().into_dyn(), ], vec![ - ArrayD::zeros(layer.d_gamma.shape()).view(), - ArrayD::zeros(layer.d_gamma.shape()).view(), + layer.l_gamma.view().into_dyn(), + layer.l_beta.view().into_dyn(), ] )), _ => return None, diff --git a/crates/core/src/cpu/optimizers/nadam.rs b/crates/core/src/cpu/optimizers/nadam.rs new file mode 100644 index 0000000..9cf1782 --- /dev/null +++ b/crates/core/src/cpu/optimizers/nadam.rs @@ -0,0 +1,78 @@ +use std::ops::{Add, Div, Mul, SubAssign, Sub}; + +use ndarray::{ArrayD, ArrayViewD, ArrayViewMutD}; + +use crate::{NadamOptimizer, CPUScheduler}; + +pub struct CPUNadamOptimizer { + pub beta1: f32, + pub beta2: f32, + pub epsilon: f32, + pub m: Vec>>, + pub n: Vec>>, + pub t: f32, +} + +impl CPUNadamOptimizer { + pub fn new(config: NadamOptimizer, params: Vec>>) -> Self { + let mut m = Vec::new(); + let mut n = Vec::new(); + for params in params { + m.push( + params + .iter() + .map(|param| ArrayD::zeros(param.dim())) + .collect(), + ); + n.push( + params + .iter() + .map(|param| ArrayD::zeros(param.dim())) + .collect(), + ); + } + Self { + beta1: config.beta1, + beta2: config.beta2, + epsilon: config.epsilon, + m, + n, + t: 0.0, + } + } + + pub fn update_grads( + &mut self, + mut params: Vec>, + grads: Vec>, + idx: usize, + scheduler: &CPUScheduler, + rate: f32, + l: Vec>, + ) { + for (j, ((param, grad), li)) in params.iter_mut().zip(grads).zip(l).enumerate() { + self.m[idx][j] = self + .beta1 + .mul(&self.m[idx][j]) + .add((1.0 - self.beta1).mul(&grad)); + self.n[idx][j] = self + .beta2 + .mul(&self.n[idx][j]) + .add((1.0 - self.beta2).mul(&grad.map(|x| x.powi(2)))); + + let m_hat = self.m[idx][j].view(); + let n_hat = self.n[idx][j].view().div(1.0 - self.beta2.powf(self.t)); + + let nestrov_m_hat = self.beta1.mul(&m_hat).add((1.0 - self.beta1).mul(&grad)).div(1.0 - self.beta1.powf(self.t)); + + let rate = scheduler.eta(rate, self.t as usize); + + param.sub_assign( + &rate + .mul(nestrov_m_hat) + .div(n_hat.map(|x| x.sqrt()).add(self.epsilon)) + .sub(&li), + ) + } + } +} diff --git a/crates/core/src/types.rs b/crates/core/src/types.rs index 0d53322..adc833d 100644 --- a/crates/core/src/types.rs +++ b/crates/core/src/types.rs @@ -70,6 +70,8 @@ pub struct Conv2DLayer { pub kernel_size: Vec, pub padding: Option>, pub strides: Option>, + pub c: f32, + pub l1_ratio: f32, } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -80,6 +82,8 @@ pub struct ConvTranspose2DLayer { pub kernel_size: Vec, pub padding: Option>, pub strides: Option>, + pub c: f32, + pub l1_ratio: f32, } #[derive(Serialize, Deserialize, Debug, Clone)] From 60aca489c8fda42ca1b1e9cf0c19d62280b5d5f4 Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 09:10:58 +0530 Subject: [PATCH 04/15] fix regularizer --- crates/core/src/cpu/layers/batchnorm1d.rs | 7 ++++++ crates/core/src/cpu/layers/batchnorm2d.rs | 7 ++++++ crates/core/src/cpu/layers/conv2d.rs | 18 +++++++++++--- crates/core/src/cpu/layers/convtrans2d.rs | 30 +++++++++++++++++++---- crates/core/src/cpu/layers/dense.rs | 4 +-- 5 files changed, 56 insertions(+), 10 deletions(-) diff --git a/crates/core/src/cpu/layers/batchnorm1d.rs b/crates/core/src/cpu/layers/batchnorm1d.rs index 75be811..9208342 100644 --- a/crates/core/src/cpu/layers/batchnorm1d.rs +++ b/crates/core/src/cpu/layers/batchnorm1d.rs @@ -35,6 +35,10 @@ pub struct BatchNorm1DCPULayer { // gradients pub d_gamma: Array2, pub d_beta: Array2, + + // gradients + pub l_gamma: Array2, + pub l_beta: Array2, } impl BatchNorm1DCPULayer { @@ -74,6 +78,9 @@ impl BatchNorm1DCPULayer { d_gamma: Array2::zeros((1, size[1])), d_beta: Array2::zeros((1, size[1])), + + l_gamma: Array2::zeros((1, size[1])), + l_beta: Array2::zeros((1, size[1])), } } diff --git a/crates/core/src/cpu/layers/batchnorm2d.rs b/crates/core/src/cpu/layers/batchnorm2d.rs index da15e6a..4f08797 100644 --- a/crates/core/src/cpu/layers/batchnorm2d.rs +++ b/crates/core/src/cpu/layers/batchnorm2d.rs @@ -32,6 +32,10 @@ pub struct BatchNorm2DCPULayer { // gradients pub d_gamma: Array4, pub d_beta: Array4, + + // regularization + pub l_gamma: Array4, + pub l_beta: Array4, } impl BatchNorm2DCPULayer { @@ -71,6 +75,9 @@ impl BatchNorm2DCPULayer { d_gamma: Array4::zeros((1, size[1], 1, 1)), d_beta: Array4::zeros((1, size[1], 1, 1)), + + l_gamma: Array4::zeros((1, size[1], 1, 1)), + l_beta: Array4::zeros((1, size[1], 1, 1)), } } diff --git a/crates/core/src/cpu/layers/conv2d.rs b/crates/core/src/cpu/layers/conv2d.rs index 9fd8cf8..7e0d41e 100644 --- a/crates/core/src/cpu/layers/conv2d.rs +++ b/crates/core/src/cpu/layers/conv2d.rs @@ -1,7 +1,7 @@ use ndarray::{s, Array1, Array4, ArrayD, Dimension, Ix1, Ix4, IxDyn}; use std::ops::{Add, AddAssign, Mul}; -use crate::{CPUInit, Conv2DLayer, Init, Tensors}; +use crate::{CPUInit, CPURegularizer, Conv2DLayer, Init, Tensors}; pub struct Conv2DCPULayer { // cache @@ -17,6 +17,12 @@ pub struct Conv2DCPULayer { // gradients pub d_weights: Array4, pub d_biases: Array1, + + // regulatization + pub l_weights: Array4, + pub l_biases: Array1, + + pub regularizer: CPURegularizer, } impl Conv2DCPULayer { @@ -30,7 +36,6 @@ impl Conv2DCPULayer { let input_size = Ix4(size[0], size[1], input_y, input_x); let weight_size = IxDyn(config.kernel_size.as_slice()); let output_size = Ix4(size[0], weight_size[0], output_y, output_x); - let (weights, biases) = if let Some(Tensors::Conv(tensors)) = tensors { (tensors.weights, tensors.biases) } else { @@ -54,10 +59,15 @@ impl Conv2DCPULayer { inputs: Array4::zeros(input_size), weights: weights.into_dimensionality::().unwrap(), biases: biases.into_dimensionality::().unwrap(), - d_weights: ArrayD::zeros(weight_size) + d_weights: ArrayD::zeros(weight_size.clone()) .into_dimensionality::() .unwrap(), d_biases: Array1::zeros(config.kernel_size[0]), + l_weights: ArrayD::zeros(weight_size) + .into_dimensionality::() + .unwrap(), + l_biases: Array1::zeros(config.kernel_size[0]), + regularizer: CPURegularizer::from(config.c, config.l1_ratio), } } @@ -138,6 +148,8 @@ impl Conv2DCPULayer { } } } + self.l_weights = self.regularizer.coeff(&self.weights.clone().into_dyn()).into_dimensionality::().unwrap(); + self.l_biases = self.regularizer.coeff(&self.biases.clone().into_dyn()).into_dimensionality::().unwrap(); d_inputs.into_dyn() } diff --git a/crates/core/src/cpu/layers/convtrans2d.rs b/crates/core/src/cpu/layers/convtrans2d.rs index 1992af3..f78aea4 100644 --- a/crates/core/src/cpu/layers/convtrans2d.rs +++ b/crates/core/src/cpu/layers/convtrans2d.rs @@ -1,7 +1,7 @@ use ndarray::{s, Array1, Array4, ArrayD, Dimension, Ix1, Ix4, IxDyn}; use std::ops::{Add, AddAssign, Mul}; -use crate::{CPUInit, ConvTranspose2DLayer, Init, Tensors}; +use crate::{CPUInit, CPURegularizer, ConvTranspose2DLayer, Init, Tensors}; pub struct ConvTranspose2DCPULayer { // cache @@ -17,6 +17,12 @@ pub struct ConvTranspose2DCPULayer { // gradients pub d_weights: Array4, pub d_biases: Array1, + + // regulatization + pub l_weights: Array4, + pub l_biases: Array1, + + pub regularizer: CPURegularizer, } impl ConvTranspose2DCPULayer { @@ -25,12 +31,11 @@ impl ConvTranspose2DCPULayer { let padding = config.padding.unwrap_or(vec![0, 0]); let input_y = size[2] + 2 * padding[0]; let input_x = size[3] + 2 * padding[1]; - let output_y = (input_y + config.kernel_size[2]) / strides[0] - 1; - let output_x = (input_x + config.kernel_size[3]) / strides[1] - 1; + let output_y = (input_y - 1) * strides[0] - config.kernel_size[2] + 2; + let output_x = (input_x - 1) * strides[1] - config.kernel_size[3] + 2; let input_size = Ix4(size[0], size[1], input_y, input_x); let weight_size = IxDyn(config.kernel_size.as_slice()); let output_size = Ix4(size[0], weight_size[0], output_y, output_x); - let (weights, biases) = if let Some(Tensors::Conv(tensors)) = tensors { (tensors.weights, tensors.biases) } else { @@ -54,10 +59,15 @@ impl ConvTranspose2DCPULayer { inputs: Array4::zeros(input_size), weights: weights.into_dimensionality::().unwrap(), biases: biases.into_dimensionality::().unwrap(), - d_weights: ArrayD::zeros(weight_size) + d_weights: ArrayD::zeros(weight_size.clone()) .into_dimensionality::() .unwrap(), d_biases: Array1::zeros(config.kernel_size[0]), + l_weights: ArrayD::zeros(weight_size) + .into_dimensionality::() + .unwrap(), + l_biases: Array1::zeros(config.kernel_size[0]), + regularizer: CPURegularizer::from(config.c, config.l1_ratio), } } @@ -141,6 +151,16 @@ impl ConvTranspose2DCPULayer { } } + self.l_weights = self + .regularizer + .coeff(&self.weights.clone().into_dyn()) + .into_dimensionality::() + .unwrap(); + self.l_biases = self + .regularizer + .coeff(&self.biases.clone().into_dyn()) + .into_dimensionality::() + .unwrap(); d_inputs.into_dyn() } } diff --git a/crates/core/src/cpu/layers/dense.rs b/crates/core/src/cpu/layers/dense.rs index d908c7f..ae73c7c 100644 --- a/crates/core/src/cpu/layers/dense.rs +++ b/crates/core/src/cpu/layers/dense.rs @@ -76,8 +76,8 @@ impl DenseCPULayer { self.d_weights = inputs_t.dot(&d_outputs); self.d_biases = d_outputs.sum_axis(Axis(0)); - self.l_weights = self.regularizer.coeff(&self.weights.into_dyn()).into_dimensionality::().unwrap(); - self.l_biases = self.regularizer.coeff(&self.biases.into_dyn()).into_dimensionality::().unwrap(); + self.l_weights = self.regularizer.coeff(&self.weights.clone().into_dyn()).into_dimensionality::().unwrap(); + self.l_biases = self.regularizer.coeff(&self.biases.clone().into_dyn()).into_dimensionality::().unwrap(); d_inputs.into_dyn() } } From 890f9de84c25d586524ff86cfd6725da172dd6b6 Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 09:48:33 +0530 Subject: [PATCH 05/15] fix reg --- crates/core/src/cpu/layers/conv2d.rs | 2 +- crates/core/src/cpu/layers/convtrans2d.rs | 2 +- crates/core/src/cpu/layers/dense.rs | 2 +- crates/core/src/types.rs | 12 ++++++------ 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/core/src/cpu/layers/conv2d.rs b/crates/core/src/cpu/layers/conv2d.rs index 7e0d41e..a56ac68 100644 --- a/crates/core/src/cpu/layers/conv2d.rs +++ b/crates/core/src/cpu/layers/conv2d.rs @@ -67,7 +67,7 @@ impl Conv2DCPULayer { .into_dimensionality::() .unwrap(), l_biases: Array1::zeros(config.kernel_size[0]), - regularizer: CPURegularizer::from(config.c, config.l1_ratio), + regularizer: CPURegularizer::from(config.c.unwrap_or(0.0), config.l1_ratio.unwrap_or(1.0)) } } diff --git a/crates/core/src/cpu/layers/convtrans2d.rs b/crates/core/src/cpu/layers/convtrans2d.rs index f78aea4..c10bc49 100644 --- a/crates/core/src/cpu/layers/convtrans2d.rs +++ b/crates/core/src/cpu/layers/convtrans2d.rs @@ -67,7 +67,7 @@ impl ConvTranspose2DCPULayer { .into_dimensionality::() .unwrap(), l_biases: Array1::zeros(config.kernel_size[0]), - regularizer: CPURegularizer::from(config.c, config.l1_ratio), + regularizer: CPURegularizer::from(config.c.unwrap_or(0.0), config.l1_ratio.unwrap_or(1.0)) } } diff --git a/crates/core/src/cpu/layers/dense.rs b/crates/core/src/cpu/layers/dense.rs index ae73c7c..6f80c28 100644 --- a/crates/core/src/cpu/layers/dense.rs +++ b/crates/core/src/cpu/layers/dense.rs @@ -47,7 +47,7 @@ impl DenseCPULayer { d_biases: Array1::zeros(config.size[0]), l_weights: Array2::zeros(weight_size), l_biases: Array1::zeros(config.size[0]), - regularizer: CPURegularizer::from(config.c, config.l1_ratio) + regularizer: CPURegularizer::from(config.c.unwrap_or(0.0), config.l1_ratio.unwrap_or(1.0)) } } diff --git a/crates/core/src/types.rs b/crates/core/src/types.rs index adc833d..66349c6 100644 --- a/crates/core/src/types.rs +++ b/crates/core/src/types.rs @@ -58,8 +58,8 @@ pub struct JSTensor { pub struct DenseLayer { pub size: Vec, pub init: Option, - pub c: f32, - pub l1_ratio: f32, + pub c: Option, + pub l1_ratio: Option, } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -70,8 +70,8 @@ pub struct Conv2DLayer { pub kernel_size: Vec, pub padding: Option>, pub strides: Option>, - pub c: f32, - pub l1_ratio: f32, + pub c: Option, + pub l1_ratio: Option, } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -82,8 +82,8 @@ pub struct ConvTranspose2DLayer { pub kernel_size: Vec, pub padding: Option>, pub strides: Option>, - pub c: f32, - pub l1_ratio: f32, + pub c: Option, + pub l1_ratio: Option, } #[derive(Serialize, Deserialize, Debug, Clone)] From 9f88aba64762efc691051d4e7d3c8e15ce51511c Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 09:49:32 +0530 Subject: [PATCH 06/15] fix nadam --- crates/core/src/cpu/optimizers/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/core/src/cpu/optimizers/mod.rs b/crates/core/src/cpu/optimizers/mod.rs index d463174..e0df0cf 100644 --- a/crates/core/src/cpu/optimizers/mod.rs +++ b/crates/core/src/cpu/optimizers/mod.rs @@ -49,6 +49,7 @@ impl CPUOptimizer { ) { match self { CPUOptimizer::Adam(adam) => adam.t += 1.0, + CPUOptimizer::Nadam(nadam) => nadam.t += 1.0, _ => {} } let mut idx = 0; From a201646d77fa35dd9a5c53894d46b1d32c112805 Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 09:52:42 +0530 Subject: [PATCH 07/15] finalize nadam --- src/core/api/optimizer.ts | 8 ++++++++ src/core/types.ts | 1 + 2 files changed, 9 insertions(+) diff --git a/src/core/api/optimizer.ts b/src/core/api/optimizer.ts index 1b36f47..739cfc9 100644 --- a/src/core/api/optimizer.ts +++ b/src/core/api/optimizer.ts @@ -3,6 +3,7 @@ import { OptimizerType } from "../types.ts"; export type Optimizer = | { type: OptimizerType.SGD } | { type: OptimizerType.Adam; config: AdamOptimizerConfig } + | { type: OptimizerType.Nadam; config: AdamOptimizerConfig } | { type: OptimizerType.RMSProp; config: RMSPropOptimizerConfig }; export type AdamOptimizerConfig = { @@ -27,6 +28,13 @@ export function AdamOptimizer(config: AdamOptimizerConfig = {}): Optimizer { return { type: OptimizerType.Adam, config }; } +export function NadamOptimizer(config: AdamOptimizerConfig = {}): Optimizer { + config.beta1 = config.beta1 || 0.9; + config.beta2 = config.beta2 || 0.999; + config.epsilon = config.epsilon || 1e-8; + return { type: OptimizerType.Nadam, config }; +} + export function RMSPropOptimizer(config: RMSPropOptimizerConfig = {}): Optimizer { config.decayRate = config.decayRate || 0.9; config.epsilon = config.epsilon || 1e-8; diff --git a/src/core/types.ts b/src/core/types.ts index df8f85a..d0ef9cb 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -149,6 +149,7 @@ export enum Cost { export enum OptimizerType { SGD = "sgd", Adam = "adam", + Nadam = "nadam", RMSProp = "rmsprop" } From 14093db92061e66faa40e74913bd92752c6d20a2 Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 09:52:56 +0530 Subject: [PATCH 08/15] add regularization params to ts --- src/core/api/layer.ts | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/core/api/layer.ts b/src/core/api/layer.ts index 0dca5c5..a950a62 100644 --- a/src/core/api/layer.ts +++ b/src/core/api/layer.ts @@ -31,6 +31,16 @@ export type DenseLayerConfig = { * The size of the layer. */ size: Shape1D; + + /** + * Inverse of regularization strength. + */ + c?: number; + + /** + * Ratio of l1:l2. + */ + l1Ratio?: number; }; /** @@ -86,6 +96,16 @@ export type Conv2DLayerConfig = { * The optional strides to use. */ strides?: Shape2D; + + /** + * Inverse of regularization strength. + */ + c?: number; + + /** + * Ratio of l1:l2. + */ + l1Ratio?: number; }; /** @@ -111,6 +131,16 @@ export type ConvTranspose2DLayerConfig = { * The optional strides to use. */ strides?: Shape2D; + + /** + * Inverse of regularization strength. + */ + c?: number; + + /** + * Ratio of l1:l2. + */ + l1Ratio?: number; }; export enum PoolMode { From 03c2e8f109eb607e03ff8fc84b82a6f84d15960d Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 11:42:44 +0530 Subject: [PATCH 09/15] add tolerance and patience --- crates/core/src/cpu/backend.rs | 35 +++++++++++++++++++++++++++++++++- crates/core/src/types.rs | 2 ++ src/core/types.ts | 16 +++++++++++++--- 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/crates/core/src/cpu/backend.rs b/crates/core/src/cpu/backend.rs index 655fcf7..71f9c29 100644 --- a/crates/core/src/cpu/backend.rs +++ b/crates/core/src/cpu/backend.rs @@ -14,6 +14,8 @@ use crate::{ pub struct Backend { pub silent: bool, pub config: BackendConfig, + pub tolerance: f32, + pub patience: usize, pub layers: Vec, pub size: Vec, pub cost: CPUCost, @@ -83,10 +85,14 @@ impl Backend { let scheduler = CPUScheduler::from(&config.scheduler); let cost = CPUCost::from(config.cost.clone()); let silent = config.silent.is_some_and(|x| x == true); + let tolerance = config.tolerance.unwrap_or(0.0); + let patience = config.patience.unwrap_or(0); Self { logger, silent, config, + tolerance, + patience, layers, cost, optimizer, @@ -131,6 +137,10 @@ impl Backend { pub fn train(&mut self, datasets: Vec, epochs: usize, batches: usize, rate: f32) { let mut epoch = 0; + let mut best_cost = -1f32; + let mut disappointments = 0; + let mut best_net = self.save(); + let mut cost = 0f32; while epoch < epochs { let mut total = 0.0; for (i, dataset) in datasets.iter().enumerate() { @@ -141,12 +151,35 @@ impl Backend { total += (self.cost.cost)(outputs.view(), dataset.outputs.view()); let minibatch = outputs.dim()[0]; if !self.silent && ((i + 1) * minibatch) % batches == 0 { - let cost = total / (batches) as f32; + cost = total / (batches) as f32; let msg = format!("Epoch={}, Dataset={}, Cost={}", epoch, i * minibatch, cost); (self.logger.log)(msg); total = 0.0; } } + if self.patience != 0 { + if best_cost < 0.0 { + best_cost = cost; + } + if cost < best_cost - self.tolerance { + disappointments = 0; + best_cost = cost; + best_net = self.save(); + } else { + disappointments += 1; + if !self.silent { + println!("Patience counter: {} disappointing epochs out of {}.", disappointments, self.patience); + } + } + if disappointments >= self.patience { + if !self.silent { + println!("No improvement for {} epochs. Stopping early at cost={}", disappointments, best_cost); + } + let net = Self::load(&best_net, Logger { log: |x| println!("{}", x) }); + self.layers = net.layers; + break; + } + } epoch += 1 } } diff --git a/crates/core/src/types.rs b/crates/core/src/types.rs index 66349c6..875aacb 100644 --- a/crates/core/src/types.rs +++ b/crates/core/src/types.rs @@ -9,6 +9,8 @@ pub struct BackendConfig { pub cost: Cost, pub optimizer: Optimizer, pub scheduler: Scheduler, + pub tolerance: Option, + pub patience: Option, } #[derive(Debug)] diff --git a/src/core/types.ts b/src/core/types.ts index d0ef9cb..30a22b0 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -17,7 +17,7 @@ export interface Backend { datasets: DataSet[], epochs: number, batches: number, - rate: number, + rate: number ): void; /** @@ -28,7 +28,7 @@ export interface Backend { predict( input: Tensor, layers?: number[], - outputShape?: Shape, + outputShape?: Shape ): Promise>; /** @@ -77,6 +77,16 @@ export type NetworkConfig = { * Whether or not to silence the verbose messages. */ silent?: boolean; + + /** + * Minimum threshold for weight updates in each epoch. + */ + tolerance?: number; + + /** + * Number of disappointing iterations to allow before early stopping + */ + patience?: number; }; /** @@ -150,7 +160,7 @@ export enum OptimizerType { SGD = "sgd", Adam = "adam", Nadam = "nadam", - RMSProp = "rmsprop" + RMSProp = "rmsprop", } export enum SchedulerType { From 799d048bee9eef6923b2d7fa846b41cd5b3663de Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 15:50:21 +0530 Subject: [PATCH 10/15] add huber, tukey, smooth hinge --- crates/core/src/cpu/cost.rs | 138 ++++++++++++++++++++++++++++++++++-- crates/core/src/types.rs | 6 +- src/core/types.ts | 24 ++++++- 3 files changed, 159 insertions(+), 9 deletions(-) diff --git a/crates/core/src/cpu/cost.rs b/crates/core/src/cpu/cost.rs index b81b076..440b0ac 100644 --- a/crates/core/src/cpu/cost.rs +++ b/crates/core/src/cpu/cost.rs @@ -3,10 +3,13 @@ use std::{ ops::{Mul, Sub}, }; -use ndarray::{ArrayD, ArrayViewD}; +use ndarray::{Array1, ArrayD, ArrayViewD}; use crate::Cost; +const HUBER_DELTA: f32 = 1.5; +const TUKEY_C: f32 = 4.685; + pub struct CPUCost { pub cost: for<'a> fn(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> f32, pub prime: for<'a> fn(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> ArrayD, @@ -19,6 +22,10 @@ impl CPUCost { cost: mse, prime: mse_prime, }, + Cost::MAE => CPUCost { + cost: mae, + prime: mae_prime, + }, Cost::CrossEntropy => CPUCost { cost: cross_entropy, prime: cross_entropy_prime, @@ -31,27 +38,48 @@ impl CPUCost { cost: hinge, prime: hinge_prime, }, + Cost::Huber => CPUCost { + cost: huber, + prime: huber_prime, + }, + Cost::SmoothHinge => CPUCost { + cost: smooth_hinge, + prime: smooth_hinge_prime, + }, + Cost::Tukey => CPUCost { + cost: tukey, + prime: tukey_prime, + }, } } } fn mse<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> f32 { - let sub = y.sub(&y_hat); + let sub = y_hat.sub(&y); return sub.clone().mul(sub).sum() / y.len() as f32; } fn mse_prime<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> ArrayD { - return y.sub(&y_hat); + return y_hat.sub(&y); +} + +fn mae<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> f32 { + let sub = y_hat.sub(&y); + return sub.map(|x| x.abs()).sum() / y.len() as f32; +} + +fn mae_prime<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> ArrayD { + return y_hat.sub(&y).map(|x| x.signum()); } fn cross_entropy<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> f32 { let batches = y_hat.dim()[0]; - let total = (-&y_hat * (y.map(|x| x.max(EPSILON).min(1f32 - EPSILON).ln()))).sum(); + let total = (-&y * (y_hat.map(|x| x.max(EPSILON).min(1f32 - EPSILON).ln()))).sum(); return total / batches as f32; } fn cross_entropy_prime<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> ArrayD { - return -&y_hat / (&y + EPSILON); + return -&y / (&y_hat + EPSILON); } fn bin_cross_entropy<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> f32 { @@ -63,7 +91,7 @@ fn bin_cross_entropy<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> } fn bin_cross_entropy_prime<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> ArrayD { - return (-&y_hat / (&y + EPSILON)) + (1.0 - &y_hat) / (1.0 - &y + EPSILON); + return (-&y / (&y_hat + EPSILON)) + (1.0 - &y) / (1.0 - &y_hat + EPSILON); } fn hinge<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> f32 { @@ -85,5 +113,101 @@ fn hinge_prime<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> ArrayD *result_i = -y_i; } } - return result; + return result +} + +pub fn smooth_hinge<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> f32 { + y_hat + .iter() + .zip(y.iter()) + .map(|(y_hat_i, y_i)| { + let margin = y_i * y_hat_i; + if margin > -1f32 { + (1.0 - margin).max(0.0) + } else { + -4f32 * margin + } + }) + .collect::>() + .to_shape(y.shape()) + .unwrap() + .to_owned() + .sum() + / y.len() as f32 +} + +pub fn smooth_hinge_prime<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> ArrayD { + y_hat + .iter() + .zip(y.iter()) + .map(|(y_hat_i, y_i)| { + let margin = y_i * y_hat_i; + if margin > -1f32 { + -y_i + } else { + -4f32 * y_i + } + }) + .collect::>() + .to_shape(y.shape()) + .unwrap() + .to_owned() +} + +pub fn tukey<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> f32 { + let c_squared = TUKEY_C * TUKEY_C / 6.0; + y.sub(&y_hat) + .map(|el| { + let r = el.abs(); + if r <= TUKEY_C { + c_squared * (1.0 - (1.0 - (r / TUKEY_C).powi(2)).powi(3)) + } else { + c_squared + } + }) + .sum() + / y.len() as f32 +} + +pub fn tukey_prime<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> ArrayD { + y.sub(&y_hat).map(|el| { + let r = el.abs(); + if r <= TUKEY_C { + r * (1.0 - ((r / TUKEY_C).powi(2))).powi(2) + } else { + 0f32 + } + }) +} + +pub fn huber<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> f32 { + let loss: Array1 = y_hat + .iter() + .zip(y.iter()) + .map(|(y_hat_i, y_i)| { + let residual = y_i - y_hat_i; + if residual.abs() <= HUBER_DELTA { + 0.5 * residual.powi(2) + } else { + HUBER_DELTA * (residual.abs() - 0.5 * HUBER_DELTA) + } + }) + .collect(); + loss.to_shape(y.shape()).unwrap().sum() / y.len() as f32 +} + +pub fn huber_prime<'a>(y_hat: ArrayViewD<'a, f32>, y: ArrayViewD<'a, f32>) -> ArrayD { + let gradient: Array1 = y_hat + .iter() + .zip(y.iter()) + .map(|(y_hat_i, y_i)| { + let residual = y_i - y_hat_i; + if residual.abs() <= HUBER_DELTA { + -residual + } else { + -HUBER_DELTA * residual.signum() + } + }) + .collect(); + gradient.to_shape(y.shape()).unwrap().to_owned() } diff --git a/crates/core/src/types.rs b/crates/core/src/types.rs index 875aacb..d54f4e8 100644 --- a/crates/core/src/types.rs +++ b/crates/core/src/types.rs @@ -119,10 +119,14 @@ pub struct ActivationLayer { #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(rename_all = "lowercase")] pub enum Cost { + BinCrossEntropy, CrossEntropy, Hinge, + Huber, + MAE, MSE, - BinCrossEntropy, + SmoothHinge, + Tukey, } #[derive(Serialize, Deserialize, Debug, Clone)] diff --git a/src/core/types.ts b/src/core/types.ts index 30a22b0..607f34e 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -77,7 +77,7 @@ export type NetworkConfig = { * Whether or not to silence the verbose messages. */ silent?: boolean; - + /** * Minimum threshold for weight updates in each epoch. */ @@ -154,6 +154,28 @@ export enum Cost { * Mean squared error cost function is the standard cost function for regression. */ MSE = "mse", + + /** + * Mean absolute error cost function is a popular cost function for regression. + */ + MAE = "mae", + + /** + * Huber is a cost function for regression and is less sensitive to outliers than the + * squared error loss + */ + Huber = "huber", + + /** + * Smoothed hinge is a variant of the Huber cost function used for binary classification. + * It is a smoothed version of hinge and is more robust to outliers. + */ + SmoothHinge = "smoothhinge", + + /** + * Tukey's biweight loss is a robust cost function for regression problems. + */ + Tukey = "tukey", } export enum OptimizerType { From b49b3875499d6c03f6f3a08dea49abf57e690feb Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 15:50:44 +0530 Subject: [PATCH 11/15] fix variable naming for cost functions --- crates/core/src/cpu/backend.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core/src/cpu/backend.rs b/crates/core/src/cpu/backend.rs index 71f9c29..1273ff8 100644 --- a/crates/core/src/cpu/backend.rs +++ b/crates/core/src/cpu/backend.rs @@ -128,7 +128,7 @@ impl Backend { outputs: ArrayViewD<'b, f32>, data: ArrayViewD<'b, f32>, ) -> ArrayD { - let mut d_outputs = (self.cost.prime)(data, outputs); + let mut d_outputs = (self.cost.prime)(outputs, data); for layer in self.layers.iter_mut().rev() { d_outputs = layer.backward_propagate(d_outputs); } From 12b5e1bd176ae015da881a373c25d516ab19d0bb Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 16:01:38 +0530 Subject: [PATCH 12/15] fix multi-linreg example --- examples/multiple-linear/student.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/multiple-linear/student.ts b/examples/multiple-linear/student.ts index a3816c7..12e88b7 100644 --- a/examples/multiple-linear/student.ts +++ b/examples/multiple-linear/student.ts @@ -36,6 +36,8 @@ const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y) as [ // Setup the CPU backend for Netsaur await setupBackend(CPU); +console.log(train) + // Create a sequential neural network const net = new Sequential({ // Set number of minibatches to 4 @@ -81,9 +83,9 @@ console.log(`training time: ${performance.now() - time}ms`); // Compute RMSE let err = 0; +const y_test = await net.predict(tensor2D(test[0])); for (const i in test[0]) { - const y_test = await net.predict(tensor1D(test[0][i])); - err += (test[1][i] - y_test.data[0]) ** 2; - console.log(`\nOutput: ${y_test.data[0]}\nExpected: ${test[1][i]}`); + err += (test[1][i] - y_test.data[i]) ** 2; + console.log(`\nOutput: ${y_test.data[i]}\nExpected: ${test[1][i]}`); } console.log("RMSE:", Math.sqrt(err / test[0].length)); From 74a31f5920729d69eda80d0907e159e4837efdeb Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 16:05:12 +0530 Subject: [PATCH 13/15] fix text classifier --- examples/classification/spam.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/classification/spam.ts b/examples/classification/spam.ts index 15ce5b6..04dacf8 100644 --- a/examples/classification/spam.ts +++ b/examples/classification/spam.ts @@ -3,6 +3,7 @@ import { Cost, CPU, DenseLayer, + NadamOptimizer, ReluLayer, Sequential, setupBackend, @@ -30,7 +31,7 @@ const data = parse(_data); const x = data.map((msg) => msg[1]); // Get the classes -const y = data.map((msg) => ymap.indexOf(msg[0])); +const y = data.map((msg) => ymap.indexOf(msg[0]) === 0 ? -1 : 1); // Split the dataset for training and testing const [train, test] = useSplit({ ratio: [7, 3], shuffle: true }, x, y) as [ @@ -67,11 +68,11 @@ const net = new Sequential({ // A dense layer with 1 neuron DenseLayer({ size: [1] }), // A sigmoid activation layer - SigmoidLayer(), ], // We are using Log Loss for finding cost - cost: Cost.BinCrossEntropy, + cost: Cost.Hinge, + optimizer: NadamOptimizer() }); const inputs = tensor(x_vec.data, x_vec.shape); @@ -97,6 +98,6 @@ const x_vec_test = vec.transform(test[0]); // Calculate metrics const res = await net.predict(tensor(x_vec_test.data, x_vec_test.shape)); -const y1 = res.data.map((i) => i < 0.5 ? 0 : 1); +const y1 = res.data.map((i) => i < 0 ? -1 : 1); const cMatrix = new ClassificationReport(test[1], y1); console.log("Confusion Matrix: ", cMatrix); From badf9a39e72053835075b0e8478b880cde4f0854 Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 16:08:36 +0530 Subject: [PATCH 14/15] fix filters example --- examples/filters/conv.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/filters/conv.ts b/examples/filters/conv.ts index d883317..47e11d2 100644 --- a/examples/filters/conv.ts +++ b/examples/filters/conv.ts @@ -199,7 +199,7 @@ async function feedForward(layers: Layer[]) { cost: Cost.MSE, }); - const data = new Tensor(buffer, [1, dim, dim]); + const data = new Tensor(buffer, [1, 1, dim, dim]); return (await net.predict(data)) as Tensor; } From 279be0cdd23960e64c409498c07adefaa84d0557 Mon Sep 17 00:00:00 2001 From: NeTT Date: Wed, 4 Sep 2024 17:12:41 +0530 Subject: [PATCH 15/15] update autoencoder example --- deno.json | 1 + examples/autoencoders/decoded.html | 2 +- examples/autoencoders/encoded.html | 2 +- examples/autoencoders/example.ts | 79 ++++++++++++++++++++++++++++++ examples/autoencoders/output.html | 2 +- 5 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 examples/autoencoders/example.ts diff --git a/deno.json b/deno.json index 30b23bf..69f7422 100644 --- a/deno.json +++ b/deno.json @@ -14,6 +14,7 @@ "example:xor-cpu": "deno run -A --unstable-ffi ./examples/xor_cpu.ts", "example:xor-gpu": "deno run -A --unstable-ffi ./examples/xor_gpu.ts", "example:xor-wasm": "deno run -A ./examples/xor_wasm.ts", + "example:autoencoder": "deno run -A --unstable-ffi ./examples/autoencoders/example.ts", "example:linear": "deno run -A --unstable-ffi ./examples/linear.ts", "example:multiple-linear": "deno run -A --unstable-ffi ./examples/multiple-linear/student.ts", "example:binary": "deno run -A --unstable-ffi ./examples/classification/binary_iris.ts", diff --git a/examples/autoencoders/decoded.html b/examples/autoencoders/decoded.html index bd664db..a25a3d1 100644 --- a/examples/autoencoders/decoded.html +++ b/examples/autoencoders/decoded.html @@ -1,2 +1,2 @@ -
idx012345678910
07.48701953887939450.70807158946990970.085863113403320311.55048477649688720.1242417991161346410.86820316314697334.017768859863280.99858814477920533.35027217864990230.73695981502532969.366321563720703
17.9246816635131840.69655573368072510.115316510200500491.70907092094421390.145182043313980124.9526710510253967.157752990722660.99852544069290163.28579354286193850.78209817409515389.353534698486328
27.7715969085693360.62292098999023440.16823399066925052.37082266807556150.120469123125076315.09679412841796954.042163848876950.99748486280441283.32093334197998050.7495480775833139.556328773498535
311.085703849792480.27288162708282470.5742774009704592.73363661766052250.1151309311389923117.144536972045959.9382095336914060.98367446660995483.13219380378723140.843707919120788610.08228874206543
47.48701953887939450.70807158946990970.085863113403320311.55048477649688720.1242417991161346410.86820316314697334.017768859863280.99858814477920533.35027217864990230.73695981502532969.366321563720703
57.4630146026611330.70832252502441410.085562467575073241.65131986141204830.126419037580490112.9258317947387739.9806823730468750.99890333414077763.34552884101867680.74060118198394789.369568824768066
67.6665906906127930.60470557212829590.179949283599853522.70638728141784670.1165349781513214115.2289695739746158.893474578857420.99775058031082153.3256506919860840.74470698833465589.61563777923584
77.3226995468139650.44654589891433716-0.0041861534118652341.32664525508880620.0740555822849273715.00424766540527321.0402050018310550.99705392122268683.38134431838989260.493489503860473639.421689987182617
87.66801452636718750.40233427286148070.097456812858581541.87287116050720210.067487865686416638.9941034317016617.982604980468750.99502581357955933.3723399639129640.53006863594055189.609257698059082
97.5674619674682620.364090532064437870.378419995307922365.6713242530822750.0816148221492767316.920429229736328102.056442260742190.99668985605239873.3202958106994630.726112723350524910.25082778930664
106.44728088378906250.69361209869384770.06417459249496463.0628681182861330.1134854853153228815.22094917297363364.881698608398441.0026489496231083.39197850227355960.70857274532318129.52576732635498
117.5674619674682620.364090532064437870.378419995307922365.6713242530822750.0816148221492767316.920429229736328102.056442260742190.99668985605239873.3202958106994630.726112723350524910.25082778930664
125.73292064666748050.8227219581604004-0.06776791810989382.41105222702026370.1241386830806732216.08333015441894558.9282531738281251.00612699985504153.4301075935363770.69529306888580329.274213790893555
137.8419790267944340.67022502422332760.12891328334808351.5156990289688110.121115654706954968.99335670471191428.9900627136230470.99687570333480833.33616590499877930.74247181415557869.42242431640625
148.8503141403198240.57082939147949220.261386275291442873.1134998798370360.171047657728195251.883949279785156145.019226074218750.99740284681320193.1554024219512940.86345207691192639.587322235107422
158.792822837829590.5401765108108520.284728169441223143.49809312820434570.164581507444381750.90985107421875147.99577331542970.99729412794113163.16055846214294430.85691726207733159.672752380371094
168.7819576263427730.55442780256271360.26689124107360842.75966405868530270.1477071940898895335.142677307128906102.961975097656250.99568134546279913.2075359821319580.8234077692031869.628429412841797
177.9644260406494140.60850977897644040.187198996543884282.35667681694030760.1221270859241485616.12517929077148455.951713562011720.99682289361953743.30743241310119630.75746691226959239.572019577026367
187.5204696655273440.64338737726211550.139255881309509282.06776690483093260.108851760625839235.63091564178466829.1215534210205080.99746042490005493.36229848861694340.72172415256500249.527031898498535
197.8783407211303710.63425529003143310.163074970245361332.19130659103393550.125925451517105117.07127189636230555.967712402343750.99740272760391243.30980563163757320.75848925113677989.515798568725586
\ No newline at end of file +idx01234567891007.558476924896240.60374689102172850.117951095104217531.96667981147766110.106578290462493910.06482601165771534.210468292236330.99668729305267333.35533070564270.68901741504669199.51998138427734417.8810958862304690.57679510116577150.187343478202819822.68102669715881350.1204662919044494620.41938209533691468.40732574462890.99712616205215453.30222821235656740.74343585968017589.6174049377441427.7428851127624510.58834218978881840.157615661621093752.37499642372131350.114517211914062515.98343753814697353.757221221923830.99693781137466433.3249773979187010.72012269496917729.57566642761230537.7996149063110350.58360201120376590.169818013906478882.50061178207397460.1169580817222595217.8042335510253959.7705650329589840.99701493978500373.31563949584960940.72969222068786629.59279632568359447.558476924896240.60374689102172850.117951095104217531.96667981147766110.106578290462493910.06482601165771534.210468292236330.99668729305267333.35533070564270.68901741504669199.51998138427734457.6156573295593260.59897023439407350.130249917507171632.0932872295379640.1090410351753234911.90002346038818440.271377563476560.99676507711410523.3459186553955080.69866240024566659.53724670410156267.78566694259643550.58476763963699340.166817426681518552.4697246551513670.116358935832977317.35653114318847758.291984558105470.99699592590332033.31793546676635740.72733914852142339.58858394622802777.45732116699218750.61219817399978640.096193492412567141.74269998073577880.102224290370941166.81821060180664123.4882297515869140.99654948711395263.3719806671142580.6719548702239999.48943519592285287.4161453247070310.61563813686370850.087336897850036621.6515282392501830.100451290607452395.49666118621826219.123691558837890.99649333953857423.3787584304809570.66500949859619149.47700119018554798.1629238128662110.55324959754943850.24796092510223393.30504274368286130.132598161697387729.464593887329198.279960632324220.99750858545303343.25584197044372560.7909728884696969.702510833740234107.8374028205871580.58044439554214480.177945256233215332.58427858352661130.1185846924781799319.01701927185058663.775897979736330.99706673622131353.3094198703765870.7360656857490549.604207992553711118.1629238128662110.55324959754943850.24796092510223393.30504274368286130.132598161697387729.464593887329198.279960632324220.99750858545303343.25584197044372560.7909728884696969.702510833740234127.7878942489624020.58458095788955690.167296111583709722.4746561050415040.1164526343345642117.42801666259765658.5280723571777340.99699860811233523.31756949424743650.72771453857421889.589258193969727137.5106606483459470.60774213075637820.107665598392486571.860800027847290.104520440101623548.530093193054229.1418781280517580.99662232398986823.363201618194580.68095135688781749.505542755126953148.62542438507080.51460629701614380.347441047430038454.3291277885437010.1525041460990905844.3088264465332147.30436706542970.99813938140869143.17971253395080570.86898672580718999.842171669006348158.6488246917724610.51265501976013180.35247492790222174.3809347152709960.1535125374794006345.05979919433594149.784530639648440.99817049503326423.1758611202239990.87293350696563729.849235534667969168.218014717102050.54864370822906490.25981095433235173.42703628540039060.1349675059318542531.232906341552734104.119964599609380.99758362770080573.2467720508575440.80026543140411389.719144821166992177.76234912872314450.58671480417251590.16180223226547242.4180936813354490.1153549551963806216.60813522338867255.8203430175781250.9969641566276553.32177352905273440.72340559959411629.581544876098633187.5032954216003420.60835766792297360.106082081794738771.8444962501525880.104203343391418468.29376316070556628.361375808715820.99661195278167723.36441349983215330.67970961332321179.503316879272461197.76466369628906250.58652126789093020.162300288677215582.42321872711181640.1154540777206420916.68242263793945356.065681457519530.99696743488311773.3213942050933840.72379672527313239.582244873046875 \ No newline at end of file diff --git a/examples/autoencoders/encoded.html b/examples/autoencoders/encoded.html index 2716588..ca7b752 100644 --- a/examples/autoencoders/encoded.html +++ b/examples/autoencoders/encoded.html @@ -1,2 +1,2 @@ -
idx012345
024.0633411407470704.852350234985352014.6001358032226560
140.160884857177734014.056723594665527036.389476776123050
233.80953216552734406.300647258758545025.8132095336914060
339.1633377075195307.603827953338623028.0347652435302730
424.0633411407470704.852350234985352014.6001358032226560
526.88119506835937506.047214031219482018.350294113159180
636.0377655029296905.788516044616699028.2024993896484380
718.097959518432617010.515393257141113010.2910108566284180
816.85203170776367205.15378665924072305.9976205825805660
956.5046653747558601.7545897960662842049.321025848388670
1037.9501190185546904.956309795379639031.5612754821777340
1156.5046653747558601.7545897960662842049.321025848388670
1234.5660896301269506.541671276092529029.4713783264160160
1321.94504356384277303.721369504928589011.2249479293823240
1477.90839385986328029.760351181030273084.832382202148440
1579.28185272216797028.432512283325195085.816200256347660
1657.85295867919922019.1838321685791057.47657394409180
1734.86581420898437507.044812202453613027.0922126770019530
1821.7634410858154300.462952077388763409.904079437255860
1934.80633163452148407.94892692565918027.5677471160888670
\ No newline at end of file +idx0102.5389764308929443-8.39288997650146514.668254852294922-17.08562088012695323.75606107711792-13.36161041259765634.130483627319336-14.89018249511718842.5389764308929443-8.39288997650146552.91636061668396-9.93355274200439564.038419723510742-14.51433372497558671.8713524341583252-5.66733121871948281.599593162536621-4.5578799247741796.528283596038818-24.67914581298828104.379877090454102-15.908326148986816116.528283596038818-24.67914581298828124.053119659423828-14.574345588684082132.2233786582946777-7.104470252990723149.580804824829102-37.1409912109375159.735233306884766-37.77143859863281166.891912937164307-26.163654327392578173.8845224380493164-13.8860502243042182.1747803688049316-6.906069278717041193.8997983932495117-13.948413848876953 \ No newline at end of file diff --git a/examples/autoencoders/example.ts b/examples/autoencoders/example.ts new file mode 100644 index 0000000..ea8ddb0 --- /dev/null +++ b/examples/autoencoders/example.ts @@ -0,0 +1,79 @@ +import { Matrix } from "https://deno.land/x/vectorizer@v0.3.6/mod.ts"; +import { + Sequential, + setupBackend, + CPU, + DenseLayer, + AdamOptimizer, + Shape2D, + ReluLayer, + tensor, + Cost, + OneCycle +} from "../../mod.ts";; + +import { parse } from "https://deno.land/std@0.188.0/csv/parse.ts"; + +const data = parse(Deno.readTextFileSync("examples/autoencoders/winequality-red.csv")) +data.shift() + +const x_data = data.slice(0, 20).map((fl, i) => fl.slice(0, 11).map(Number)); +const X = new Matrix<"f32">(Float32Array.from(x_data.flat()), [x_data.length]) + +await setupBackend(CPU); + +const net = new Sequential({ + size: [4, X.nCols], + silent: false, + layers: [ + // Encoder + DenseLayer({ size: [8] }), + ReluLayer(), + DenseLayer({ size: [4] }), + ReluLayer(), + DenseLayer({size: [2]}), + // Decoder + DenseLayer({ size: [4] }), + ReluLayer(), + DenseLayer({ size: [8] }), + ReluLayer(), + DenseLayer({ size: [X.nCols] }), + ], + cost: Cost.MSE, + patience: 50, + optimizer: AdamOptimizer(), +// scheduler: OneCycle() +}); + +const input = tensor(X.data, X.shape) + +const timeStart = performance.now() +net.train([{inputs: input, outputs: input}], 10000, 1, 0.01) +console.log(`Trained in ${performance.now() - timeStart}ms`) + +function saveTable(name: string, data: Matrix<"f32">) { + Deno.writeTextFileSync(`examples/autoencoders/${name}.html`, data.html) +} + +saveTable("input", X) + +console.log("Running Whole Net") +const output = await net.predict(input) + +const output_mat = new Matrix<"f32">(output.data, output.shape as Shape2D) + +saveTable("output", output_mat) + +console.log("Running Encoder") +const encoded = await net.predict(input, [0, 5]) + +const encoded_mat = new Matrix<"f32">(encoded.data, encoded.shape as Shape2D) + +saveTable("encoded", encoded_mat) + +console.log("Running Decoder") +const decoded = await net.predict(tensor(encoded_mat.data, encoded_mat.shape), [5, 10]) + +const decoded_mat = new Matrix<"f32">(decoded.data, decoded.shape as Shape2D) + +saveTable("decoded", decoded_mat) \ No newline at end of file diff --git a/examples/autoencoders/output.html b/examples/autoencoders/output.html index bd664db..a25a3d1 100644 --- a/examples/autoencoders/output.html +++ b/examples/autoencoders/output.html @@ -1,2 +1,2 @@ -
idx012345678910
07.48701953887939450.70807158946990970.085863113403320311.55048477649688720.1242417991161346410.86820316314697334.017768859863280.99858814477920533.35027217864990230.73695981502532969.366321563720703
17.9246816635131840.69655573368072510.115316510200500491.70907092094421390.145182043313980124.9526710510253967.157752990722660.99852544069290163.28579354286193850.78209817409515389.353534698486328
27.7715969085693360.62292098999023440.16823399066925052.37082266807556150.120469123125076315.09679412841796954.042163848876950.99748486280441283.32093334197998050.7495480775833139.556328773498535
311.085703849792480.27288162708282470.5742774009704592.73363661766052250.1151309311389923117.144536972045959.9382095336914060.98367446660995483.13219380378723140.843707919120788610.08228874206543
47.48701953887939450.70807158946990970.085863113403320311.55048477649688720.1242417991161346410.86820316314697334.017768859863280.99858814477920533.35027217864990230.73695981502532969.366321563720703
57.4630146026611330.70832252502441410.085562467575073241.65131986141204830.126419037580490112.9258317947387739.9806823730468750.99890333414077763.34552884101867680.74060118198394789.369568824768066
67.6665906906127930.60470557212829590.179949283599853522.70638728141784670.1165349781513214115.2289695739746158.893474578857420.99775058031082153.3256506919860840.74470698833465589.61563777923584
77.3226995468139650.44654589891433716-0.0041861534118652341.32664525508880620.0740555822849273715.00424766540527321.0402050018310550.99705392122268683.38134431838989260.493489503860473639.421689987182617
87.66801452636718750.40233427286148070.097456812858581541.87287116050720210.067487865686416638.9941034317016617.982604980468750.99502581357955933.3723399639129640.53006863594055189.609257698059082
97.5674619674682620.364090532064437870.378419995307922365.6713242530822750.0816148221492767316.920429229736328102.056442260742190.99668985605239873.3202958106994630.726112723350524910.25082778930664
106.44728088378906250.69361209869384770.06417459249496463.0628681182861330.1134854853153228815.22094917297363364.881698608398441.0026489496231083.39197850227355960.70857274532318129.52576732635498
117.5674619674682620.364090532064437870.378419995307922365.6713242530822750.0816148221492767316.920429229736328102.056442260742190.99668985605239873.3202958106994630.726112723350524910.25082778930664
125.73292064666748050.8227219581604004-0.06776791810989382.41105222702026370.1241386830806732216.08333015441894558.9282531738281251.00612699985504153.4301075935363770.69529306888580329.274213790893555
137.8419790267944340.67022502422332760.12891328334808351.5156990289688110.121115654706954968.99335670471191428.9900627136230470.99687570333480833.33616590499877930.74247181415557869.42242431640625
148.8503141403198240.57082939147949220.261386275291442873.1134998798370360.171047657728195251.883949279785156145.019226074218750.99740284681320193.1554024219512940.86345207691192639.587322235107422
158.792822837829590.5401765108108520.284728169441223143.49809312820434570.164581507444381750.90985107421875147.99577331542970.99729412794113163.16055846214294430.85691726207733159.672752380371094
168.7819576263427730.55442780256271360.26689124107360842.75966405868530270.1477071940898895335.142677307128906102.961975097656250.99568134546279913.2075359821319580.8234077692031869.628429412841797
177.9644260406494140.60850977897644040.187198996543884282.35667681694030760.1221270859241485616.12517929077148455.951713562011720.99682289361953743.30743241310119630.75746691226959239.572019577026367
187.5204696655273440.64338737726211550.139255881309509282.06776690483093260.108851760625839235.63091564178466829.1215534210205080.99746042490005493.36229848861694340.72172415256500249.527031898498535
197.8783407211303710.63425529003143310.163074970245361332.19130659103393550.125925451517105117.07127189636230555.967712402343750.99740272760391243.30980563163757320.75848925113677989.515798568725586
\ No newline at end of file +idx01234567891007.558476924896240.60374689102172850.117951095104217531.96667981147766110.106578290462493910.06482601165771534.210468292236330.99668729305267333.35533070564270.68901741504669199.51998138427734417.8810958862304690.57679510116577150.187343478202819822.68102669715881350.1204662919044494620.41938209533691468.40732574462890.99712616205215453.30222821235656740.74343585968017589.6174049377441427.7428851127624510.58834218978881840.157615661621093752.37499642372131350.114517211914062515.98343753814697353.757221221923830.99693781137466433.3249773979187010.72012269496917729.57566642761230537.7996149063110350.58360201120376590.169818013906478882.50061178207397460.1169580817222595217.8042335510253959.7705650329589840.99701493978500373.31563949584960940.72969222068786629.59279632568359447.558476924896240.60374689102172850.117951095104217531.96667981147766110.106578290462493910.06482601165771534.210468292236330.99668729305267333.35533070564270.68901741504669199.51998138427734457.6156573295593260.59897023439407350.130249917507171632.0932872295379640.1090410351753234911.90002346038818440.271377563476560.99676507711410523.3459186553955080.69866240024566659.53724670410156267.78566694259643550.58476763963699340.166817426681518552.4697246551513670.116358935832977317.35653114318847758.291984558105470.99699592590332033.31793546676635740.72733914852142339.58858394622802777.45732116699218750.61219817399978640.096193492412567141.74269998073577880.102224290370941166.81821060180664123.4882297515869140.99654948711395263.3719806671142580.6719548702239999.48943519592285287.4161453247070310.61563813686370850.087336897850036621.6515282392501830.100451290607452395.49666118621826219.123691558837890.99649333953857423.3787584304809570.66500949859619149.47700119018554798.1629238128662110.55324959754943850.24796092510223393.30504274368286130.132598161697387729.464593887329198.279960632324220.99750858545303343.25584197044372560.7909728884696969.702510833740234107.8374028205871580.58044439554214480.177945256233215332.58427858352661130.1185846924781799319.01701927185058663.775897979736330.99706673622131353.3094198703765870.7360656857490549.604207992553711118.1629238128662110.55324959754943850.24796092510223393.30504274368286130.132598161697387729.464593887329198.279960632324220.99750858545303343.25584197044372560.7909728884696969.702510833740234127.7878942489624020.58458095788955690.167296111583709722.4746561050415040.1164526343345642117.42801666259765658.5280723571777340.99699860811233523.31756949424743650.72771453857421889.589258193969727137.5106606483459470.60774213075637820.107665598392486571.860800027847290.104520440101623548.530093193054229.1418781280517580.99662232398986823.363201618194580.68095135688781749.505542755126953148.62542438507080.51460629701614380.347441047430038454.3291277885437010.1525041460990905844.3088264465332147.30436706542970.99813938140869143.17971253395080570.86898672580718999.842171669006348158.6488246917724610.51265501976013180.35247492790222174.3809347152709960.1535125374794006345.05979919433594149.784530639648440.99817049503326423.1758611202239990.87293350696563729.849235534667969168.218014717102050.54864370822906490.25981095433235173.42703628540039060.1349675059318542531.232906341552734104.119964599609380.99758362770080573.2467720508575440.80026543140411389.719144821166992177.76234912872314450.58671480417251590.16180223226547242.4180936813354490.1153549551963806216.60813522338867255.8203430175781250.9969641566276553.32177352905273440.72340559959411629.581544876098633187.5032954216003420.60835766792297360.106082081794738771.8444962501525880.104203343391418468.29376316070556628.361375808715820.99661195278167723.36441349983215330.67970961332321179.503316879272461197.76466369628906250.58652126789093020.162300288677215582.42321872711181640.1154540777206420916.68242263793945356.065681457519530.99696743488311773.3213942050933840.72379672527313239.582244873046875 \ No newline at end of file