Skip to content

Commit

Permalink
feat: add embeddings, lstm, gelu (#68)
Browse files Browse the repository at this point in the history
* add embeddings

* change import

* update flatten backprop

* feat: Add postprocessing (and fix WASM) (#65) (#66)

* add postprocessing for sign and step

* fix wasm

* update flatten

* allow custom mapping

* update matrix types

* add lstm

* try to fix lstm

* allow other activation

* add config for activation

* add GELU

* build wasm

* wasm

* last lstm test attempt
  • Loading branch information
retraigo authored Sep 30, 2024
1 parent a811fd1 commit 8e31438
Show file tree
Hide file tree
Showing 36 changed files with 1,194 additions and 272 deletions.
14 changes: 14 additions & 0 deletions crates/core/src/cpu/activation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ pub struct CPUActivation {

type ActivationFn = fn(x: &f32) -> f32;

const ROOT_2_BY_PI: f32 = 0.7978845608028654;
const GELU_APPROX: f32 = 0.044715;

impl CPUActivation {
pub fn from(activation: Activation) -> Self {
let (activate, prime): (ActivationFn, ActivationFn) = match activation {
Expand All @@ -15,6 +18,7 @@ impl CPUActivation {
Activation::Linear => (linear, linear_prime),
Activation::Relu => (relu, relu_prime),
Activation::Relu6 => (relu6, relu6_prime),
Activation::Gelu => (gelu, gelu_prime),
Activation::Selu => (selu, selu_prime),
Activation::Sigmoid => (sigmoid, sigmoid_prime),
Activation::Tanh => (tanh, tanh_prime),
Expand Down Expand Up @@ -75,6 +79,16 @@ fn relu_prime(x: &f32) -> f32 {
return if *x > 0.0 { 1.0 } else { 0.0 };
}

fn gelu(x: &f32) -> f32 {
return (0.5 * x) * (1.0 + (ROOT_2_BY_PI * (x + GELU_APPROX * x.powi(3))).tanh());
}

fn gelu_prime(x: &f32) -> f32 {
let tanned = (ROOT_2_BY_PI * (x + GELU_APPROX * x.powi(3))).tanh();
return (0.5 * (1.0 + tanned))
+ (0.5 * x * (1.0 - tanned.powi(2))) * ROOT_2_BY_PI * (1.0 + 3.0 * GELU_APPROX * x.powi(2));
}

fn relu6(x: &f32) -> f32 {
return x.max(0.0).min(6.0);
}
Expand Down
28 changes: 24 additions & 4 deletions crates/core/src/cpu/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ use crate::{
Pool2DCPULayer, PostProcessor, SoftmaxCPULayer, Tensor, Tensors, Timer,
};

use super::{EmbeddingCPULayer, LSTMCPULayer};

pub struct Backend {
pub silent: bool,
pub config: BackendConfig,
Expand Down Expand Up @@ -71,18 +73,28 @@ impl Backend {
size = layer.output_size().to_vec();
layers.push(CPULayer::Dense(layer));
}
Layer::Flatten(config) => {
let layer = FlattenCPULayer::new(config, IxDyn(&size));
Layer::Embedding(config) => {
let layer = EmbeddingCPULayer::new(config, IxDyn(&size));
size = layer.output_size().to_vec();
layers.push(CPULayer::Embedding(layer));
}
Layer::Flatten => {
let layer = FlattenCPULayer::new(IxDyn(&size));
size = layer.output_size().to_vec();
layers.push(CPULayer::Flatten(layer));
}
Layer::LSTM(config) => {
let layer = LSTMCPULayer::new(config, IxDyn(&size), None);
size = layer.output_size().to_vec();
layers.push(CPULayer::LSTM(layer));
}
Layer::Pool2D(config) => {
let layer = Pool2DCPULayer::new(config, IxDyn(&size));
size = layer.output_size().to_vec();
layers.push(CPULayer::Pool2D(layer));
}
Layer::Softmax => {
let layer = SoftmaxCPULayer::new(IxDyn(&size));
Layer::Softmax(config) => {
let layer = SoftmaxCPULayer::new(config, IxDyn(&size));
layers.push(CPULayer::Softmax(layer));
}
}
Expand Down Expand Up @@ -125,7 +137,10 @@ impl Backend {
}
}
None => {
// let mut i = 0;
for layer in &mut self.layers {
// i += 1;
// println!("\n\nLayer +{}: {:?}", i, &inputs);
inputs = layer.forward_propagate(inputs, training);
}
}
Expand All @@ -138,9 +153,14 @@ impl Backend {
outputs: ArrayViewD<'b, f32>,
data: ArrayViewD<'b, f32>,
) -> ArrayD<f32> {
// println!("\n\nOutput: {:?}", &outputs);
let mut d_outputs = (self.cost.prime)(outputs, data);
// println!("\n\nD Output: {:?}", &d_outputs);
// let mut i = 0;
for layer in self.layers.iter_mut().rev() {
// i += 1;
d_outputs = layer.backward_propagate(d_outputs);
// println!("\n\nLayer -{}: {:?}", i, &d_outputs);
}
d_outputs
}
Expand Down
45 changes: 45 additions & 0 deletions crates/core/src/cpu/layer_norm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
extern crate ndarray;
use ndarray::{Array1, ArrayD, Axis};

pub struct LayerNorm {
pub gamma: Array1<f32>,
pub beta: Array1<f32>,
pub epsilon: f32,
}

impl LayerNorm {
pub fn new(hidden_size: usize, epsilon: f32) -> Self {
LayerNorm {
gamma: Array1::ones(hidden_size),
beta: Array1::zeros(hidden_size),
epsilon,
}
}

pub fn forward(&self, input: ArrayD<f32>) -> ArrayD<f32> {
let shape = input.shape();
let last_axis = shape.len() - 1;

let mean = input.mean_axis(Axis(last_axis)).unwrap();
let variance = input.var_axis(Axis(last_axis), 0.0);

let mut normalized_input = input.clone();
normalized_input
.axis_iter_mut(Axis(last_axis))
.enumerate()
.for_each(|(i, mut row)| {
let mean_i = mean[i];
let var_i = variance[i].sqrt() + self.epsilon;
row -= mean_i;
row /= var_i;
});

normalized_input
.axis_iter_mut(Axis(last_axis))
.for_each(|mut item| {
let new = &item * &self.gamma + &self.beta;
item.assign(&new);
});
normalized_input
}
}
15 changes: 9 additions & 6 deletions crates/core/src/cpu/layers/activation.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use ndarray::{s, ArrayD, Dimension, IxDyn};
use std::ops::{Div, Mul, Sub};
use std::{f32::EPSILON, ops::{Div, Mul, Sub}};

use crate::{ActivationLayer, CPUActivation};
use crate::{ActivationLayer, CPUActivation, SoftmaxLayer};

pub struct ActivationCPULayer {
pub outputs: ArrayD<f32>,
Expand Down Expand Up @@ -45,11 +45,13 @@ impl ActivationCPULayer {

pub struct SoftmaxCPULayer {
pub outputs: ArrayD<f32>,
pub temperature: f32,
}

impl SoftmaxCPULayer {
pub fn new(size: IxDyn) -> Self {
pub fn new(config: SoftmaxLayer, size: IxDyn) -> Self {
Self {
temperature: config.temperature.unwrap_or(1f32),
outputs: ArrayD::zeros(size),
}
}
Expand All @@ -68,18 +70,19 @@ impl SoftmaxCPULayer {
self.outputs = inputs.clone();
let batches = self.outputs.dim()[0];
for b in 0..batches {
let exp = inputs.slice(s![b, ..]).map(|x| x.exp());
let current_input = inputs.slice(s![b, ..]).map(|x| x / self.temperature);
let max = current_input.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let exp = current_input.map(|x| (x - max).exp());
self.outputs
.slice_mut(s![b, ..])
.assign(&exp.clone().div(exp.sum()));
.assign(&exp.clone().div(exp.sum() + EPSILON));
}
self.outputs.clone().into_dyn()
}

pub fn backward_propagate(&mut self, d_outputs: ArrayD<f32>) -> ArrayD<f32> {
let batches = self.outputs.dim()[0];
let array_size = self.outputs.dim().size() / batches;

let mut d_inputs = ArrayD::zeros(self.outputs.dim());
for b in 0..batches {
for y in 0..array_size {
Expand Down
2 changes: 1 addition & 1 deletion crates/core/src/cpu/layers/dropout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ impl Dropout1DCPULayer {

pub fn forward_propagate(&mut self, inputs: ArrayD<f32>, training: bool) -> ArrayD<f32> {
if training {
self.mask = ArrayD::random(self.mask.dim(), Uniform::new(0.0, 1.0))
self.mask = ArrayD::random(inputs.dim(), Uniform::new(0.0, 1.0))
.map(|x| (if x > &self.probability { 1.0 } else { 0.0 }));
inputs.mul(&self.mask).mul(1.0 / 1.0 - self.probability)
} else {
Expand Down
68 changes: 68 additions & 0 deletions crates/core/src/cpu/layers/embedding.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use std::ops::AddAssign;
use ndarray::{Array2, ArrayD, Axis, Ix2, IxDyn};

use crate::{CPUInit, CPURegularizer, EmbeddingLayer, Init};

pub struct EmbeddingCPULayer {
pub input_size: IxDyn,
pub input_indices: Vec<usize>,
pub output_size: Vec<usize>,
pub vocab_size: usize,
pub embedding_size: usize,
pub embeddings: Array2<f32>,
pub d_embeddings: Array2<f32>,
// regularization
pub l_embeddings: Array2<f32>,

pub regularizer: CPURegularizer,
}

impl EmbeddingCPULayer {
pub fn new(config: EmbeddingLayer, size: IxDyn) -> Self {
let init = CPUInit::from(Init::Uniform);
let output_size = vec![size[0], size[1], config.embedding_size];
let embeddings = init.init(IxDyn(&[config.vocab_size, config.embedding_size]), 0, 0).into_dimensionality::<Ix2>().unwrap();
let d_embeddings = Array2::zeros((config.vocab_size, config.embedding_size));
Self {
input_size: size,
input_indices: vec![],
output_size,
vocab_size: config.vocab_size,
embedding_size: config.embedding_size,
embeddings,
d_embeddings,
l_embeddings: Array2::zeros((config.vocab_size, config.embedding_size)),
regularizer: CPURegularizer::from(config.c.unwrap_or(0.0), config.l1_ratio.unwrap_or(1.0))
}
}

pub fn output_size(&self) -> Vec<usize> {
self.output_size.clone()
}

pub fn reset(&mut self, batches: usize) {
self.output_size[0] = batches
}

pub fn forward_propagate(&mut self, inputs: ArrayD<f32>) -> ArrayD<f32> {
let input_indices: Vec<usize> = inputs.iter().map(|&x| x as usize).collect();
self.input_indices = input_indices.clone();
let embeddings = self.embeddings.select(Axis(0), input_indices.as_slice());
// let output_size = IxDyn(&self.output_size);
embeddings.into_shape_with_order(IxDyn(&[inputs.shape()[0], inputs.shape()[1], self.embedding_size])).unwrap()
}

pub fn backward_propagate(&mut self, d_outputs: ArrayD<f32>) -> ArrayD<f32> {
let indices = Array2::from_shape_vec(Ix2(d_outputs.shape()[0], self.input_size[1]), self.input_indices.clone());
self.d_embeddings = Array2::zeros((self.d_embeddings.shape()[0], self.d_embeddings.shape()[1]));
d_outputs.axis_iter(Axis(0)).zip(indices).for_each(|(rec, i)| {
rec.axis_iter(Axis(0)).zip(i).for_each(|(grad, idx)| {
self.d_embeddings.index_axis_mut(Axis(0), idx).add_assign(&grad);
});
});
self.l_embeddings = self.regularizer.coeff(&self.embeddings.clone().into_dyn()).into_dimensionality::<Ix2>().unwrap();
let mut input_size = self.input_size.clone();
input_size[0] = d_outputs.shape()[0];
ArrayD::from_shape_vec(input_size, self.input_indices.iter().map(|x| *x as f32).collect()).unwrap()
}
}
24 changes: 8 additions & 16 deletions crates/core/src/cpu/layers/flatten.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,15 @@
use ndarray::{ArrayD, Dimension, IxDyn};

use crate::FlattenLayer;

pub struct FlattenCPULayer {
pub input_size: IxDyn,
pub output_size: Vec<usize>,
}

impl FlattenCPULayer {
pub fn new(config: FlattenLayer, size: IxDyn) -> Self {
let mut new_size = config.size.clone();
new_size.insert(0, size[0]);
let output_size = IxDyn(&new_size);
if output_size.size() != size.size() {
panic!(
"Shape {:#?} is incompatible with shape {:#?}",
output_size, size
)
}
pub fn new(size: IxDyn) -> Self {
Self {
input_size: size,
output_size: new_size,
input_size: size.clone(),
output_size: vec![size[0], size.size() / size[0]],
}
}

Expand All @@ -33,11 +22,14 @@ impl FlattenCPULayer {
}

pub fn forward_propagate(&mut self, inputs: ArrayD<f32>) -> ArrayD<f32> {
let output_size = IxDyn(&self.output_size);
let output_size = IxDyn(&[inputs.shape()[0], self.output_size[1]]);
println!("O {:?} {:?}", inputs.shape(), self.output_size);
inputs.into_shape_with_order(output_size).unwrap()
}

pub fn backward_propagate(&mut self, d_outputs: ArrayD<f32>) -> ArrayD<f32> {
d_outputs.into_shape_with_order(self.input_size.clone()).unwrap()
let mut current_size = self.input_size.clone();
current_size[0] = d_outputs.shape()[0];
d_outputs.to_shape(current_size).unwrap().to_owned()
}
}
Loading

0 comments on commit 8e31438

Please sign in to comment.