From 6403559f361ecbcbafe7df9dbf6696254f470f7f Mon Sep 17 00:00:00 2001 From: aminediro Date: Fri, 10 May 2024 23:43:48 +0200 Subject: [PATCH 01/21] softmax migration --- CONTRIBUTING.md | 22 ++++++++++++++++++++++ Cargo.toml | 17 +++++++++++------ crates/ratchet-core/Cargo.toml | 9 +++++---- crates/ratchet-core/src/ops/softmax.rs | 16 +++++----------- crates/ratchet-core/src/tensor.rs | 22 ++++++++++++++++++++++ requirements.txt | 2 +- 6 files changed, 66 insertions(+), 22 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7291737f..442692c1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -131,6 +131,8 @@ If that looks like this, you are good to go 🎉 ### Step 3: Run Tests +#### PYO3 tests + Finally, run the tests for the package using Cargo: ```sh @@ -143,6 +145,26 @@ To run the `PyO3` tests, add the `pyo3` flag: cargo test --features pyo3 ``` +#### `tch` tests + +`tch` based tests are ran behind the `testing` feature. You need to first have the PyTorch library (libtorch) in v2.3.0 to be available on your system. Follow the [official `tch` for more details](https://github.com/LaurentMazare/tch-rs/tree/main?tab=readme-ov-file). We'll use the libtorch library installed in the python envionment: + +```sh +export LIBTORCH_USE_PYTORCH=1 +``` + +You can now run tests: + +```sh +cargo test --features testing +``` + +**NOTE**: If you're having compilation issue with MacOS. You can add the `libtorch` lib to your environment : + +```sh +export DYLD_LIBRARY_PATH=$PWD/venv/lib/python3.10/site-packages/torch/lib:$DYLD_LIBRARY_PATH +``` + ### Step 5: Run WASM Tests To run WASM tests (e.g., the whisper test) run: diff --git a/Cargo.toml b/Cargo.toml index 0d5f518f..967aee7a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,8 +5,8 @@ members = [ "crates/ratchet-web", "crates/ratchet-loader", "crates/ratchet-models", - "crates/ratchet-nn", - "crates/ratchet-hub", + "crates/ratchet-nn", + "crates/ratchet-hub", "crates/ratchet-cli", ] resolver = "2" @@ -18,7 +18,7 @@ debug-assertions = true [profile.release] panic = 'abort' lto = "fat" -codegen-units = 1 +codegen-units = 1 [profile.profiling] inherits = "release" @@ -26,18 +26,22 @@ debug = 2 [workspace.dependencies] wgpu = { version = "0.20", features = ["fragile-send-sync-non-atomic-wasm"] } -bytemuck = { version = "1.14.0", features=["wasm_simd", "aarch64_simd", "extern_crate_alloc"] } +bytemuck = { version = "1.14.0", features = [ + "wasm_simd", + "aarch64_simd", + "extern_crate_alloc", +] } num-traits = "0.2.17" half = { version = "2.3.1", features = ["num-traits", "bytemuck"] } derive-new = "0.6.0" log = "0.4.20" thiserror = "1.0.56" byteorder = "1.5.0" -npyz = { version = "0.8.3"} +npyz = { version = "0.8.3" } hf-hub = "0.3.2" serde = "1.0" anyhow = "1.0.79" -tokenizers = "0.19.1" +tokenizers = "0.19.1" js-sys = "0.3.64" wasm-bindgen = "0.2.91" @@ -90,3 +94,4 @@ wasm-bindgen-futures = "0.4.41" web-sys = "0.3.64" web-time = "1.0.0" futures-intrusive = "0.5.0" +tch = "0.16.0" diff --git a/crates/ratchet-core/Cargo.toml b/crates/ratchet-core/Cargo.toml index 1426c0c6..db3ab55d 100644 --- a/crates/ratchet-core/Cargo.toml +++ b/crates/ratchet-core/Cargo.toml @@ -9,7 +9,7 @@ default = ["rand", "testing"] gpu-profiling = ["dep:tabled", "dep:itertools"] rand = ["dep:rand", "dep:rand_distr"] plotting = ["dep:dot3", "dep:tempfile"] -testing = ["dep:npyz", "dep:ndarray"] +testing = ["dep:npyz", "dep:ndarray","dep:tch"] pyo3 = ["dep:pyo3", "dep:numpy", "dep:regex"] [build-dependencies] @@ -31,7 +31,7 @@ num-traits = { workspace = true } log = { workspace = true } thiserror = { workspace = true } serde = { workspace = true, features = ["derive"] } -anyhow.workspace = true +anyhow.workspace = true rustc-hash = { workspace = true } slotmap = { workspace = true } @@ -55,12 +55,13 @@ tempfile = { workspace = true, optional = true } tabled = { workspace = true, optional = true } itertools = { workspace = true, optional = true } -pyo3 = { workspace = true, features = ["auto-initialize"], optional = true } +pyo3 = { workspace = true, features = ["auto-initialize"], optional = true } regex = { workspace = true, optional = true } numpy = { workspace = true, optional = true } +tch = {workspace =true, optional=true} [target.'cfg(target_arch = "wasm32")'.dependencies] -wasm-bindgen.workspace = true +wasm-bindgen.workspace = true futures-intrusive.workspace = true async-trait = "0.1.77" diff --git a/crates/ratchet-core/src/ops/softmax.rs b/crates/ratchet-core/src/ops/softmax.rs index f481d8f4..1addc7ad 100644 --- a/crates/ratchet-core/src/ops/softmax.rs +++ b/crates/ratchet-core/src/ops/softmax.rs @@ -104,25 +104,19 @@ impl MetaOperation for Softmax { } } -#[cfg(all(test, feature = "pyo3"))] +#[cfg(all(test, feature = "testing"))] mod tests { - use test_strategy::{proptest, Arbitrary}; - - use crate::test_util::run_py_prg; use crate::{shape, Device, DeviceRequest, Tensor}; + use tch; + use test_strategy::{proptest, Arbitrary}; thread_local! { static GPU_DEVICE: Device = Device::request_device(DeviceRequest::GPU).unwrap(); } fn ground_truth(a: &Tensor) -> anyhow::Result { - let prg = r#" -import torch -import torch.nn.functional as F -def softmax(a): - return F.softmax(torch.from_numpy(a), dim=-1).numpy() -"#; - run_py_prg(prg.to_string(), &[a], &[]) + let t = a.to_tch::()?; + Tensor::try_from(&t.softmax(-1, Some(tch::kind::Kind::Float))) } fn run_softmax_trial(problem: SoftmaxProblem) { diff --git a/crates/ratchet-core/src/tensor.rs b/crates/ratchet-core/src/tensor.rs index c0ed6ecd..7bea37d0 100644 --- a/crates/ratchet-core/src/tensor.rs +++ b/crates/ratchet-core/src/tensor.rs @@ -5,6 +5,7 @@ use crate::{ Storage, Strides, TensorDType, TensorId, }; use derive_new::new; +use ndarray::OwnedRepr; use parking_lot::{RwLock, RwLockReadGuard}; use std::collections::HashSet; use std::io::{BufRead, Seek}; @@ -15,6 +16,7 @@ use std::sync::Arc; #[cfg(feature = "rand")] use {rand::prelude::*, rand_distr::StandardNormal}; +use ndarray::ArrayBase; #[cfg(feature = "testing")] use ndarray::{ArrayD, ArrayViewD, Dimension}; @@ -838,6 +840,17 @@ impl Tensor { )) } + #[cfg(feature = "testing")] + pub fn to_tch(&self) -> anyhow::Result { + assert!( + self.device().is_cpu(), + "Cannot convert non-CPU tensor to numpy array" + ); + Ok(tch::Tensor::try_from( + &self.deep_clone().into_ndarray::(), + )?) + } + #[cfg(feature = "pyo3")] pub fn to_py<'s, 'p: 's, T: TensorDType + numpy::Element>( &'s self, @@ -852,6 +865,15 @@ impl Tensor { } } +#[cfg(feature = "testing")] +impl TryFrom<&tch::Tensor> for Tensor { + type Error = anyhow::Error; + fn try_from(array: &tch::Tensor) -> anyhow::Result { + let base: ArrayBase, _> = array.try_into()?; + Ok(Self::from(base)) + } +} + #[cfg(feature = "pyo3")] impl From<&PyArrayDyn> for Tensor { fn from(array: &PyArrayDyn) -> Self { diff --git a/requirements.txt b/requirements.txt index f431ed23..21c0a13e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ --extra-index-url https://download.pytorch.org/whl/cpu numpy==1.24.3 -torch==2.0.1 +torch==2.3.0 requests==2.26.0 mlx==0.9.0; sys_platform == 'darwin' git+https://github.com/FL33TW00D/whisper.git@feature/reference#egg=openai-whisper From 7047532d5adef65f8f39480e2d187a1a117f0912 Mon Sep 17 00:00:00 2001 From: aminediro Date: Sat, 11 May 2024 00:44:19 +0200 Subject: [PATCH 02/21] support tch dtype conversion --- crates/ratchet-core/src/tensor.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/ratchet-core/src/tensor.rs b/crates/ratchet-core/src/tensor.rs index 7bea37d0..b1ec9762 100644 --- a/crates/ratchet-core/src/tensor.rs +++ b/crates/ratchet-core/src/tensor.rs @@ -4,6 +4,7 @@ use crate::{ InvariantError, LazyOp, MetaOperation, Operation, OperationError, RVec, RawCPUBuffer, Shape, Storage, Strides, TensorDType, TensorId, }; +use anyhow::bail; use derive_new::new; use ndarray::OwnedRepr; use parking_lot::{RwLock, RwLockReadGuard}; @@ -869,8 +870,18 @@ impl Tensor { impl TryFrom<&tch::Tensor> for Tensor { type Error = anyhow::Error; fn try_from(array: &tch::Tensor) -> anyhow::Result { - let base: ArrayBase, _> = array.try_into()?; - Ok(Self::from(base)) + let kind = array.kind(); + match kind { + tch::Kind::Float => { + let base: ArrayD = array.try_into()?; + Ok(Self::from(base)) + } + tch::Kind::QInt8 => todo!(), + tch::Kind::Half => todo!(), + tch::Kind::BFloat16 => todo!(), + tch::Kind::Int => todo!(), + _ => bail!("unsupported tch dtype"), + } } } From 35af80e995abac6f2b6f578f71a72f4993f39163 Mon Sep 17 00:00:00 2001 From: aminediro Date: Sat, 11 May 2024 10:39:20 +0200 Subject: [PATCH 03/21] added binary tests tch --- crates/ratchet-core/src/ops/binary.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/crates/ratchet-core/src/ops/binary.rs b/crates/ratchet-core/src/ops/binary.rs index 89193f37..0e1fd0a7 100644 --- a/crates/ratchet-core/src/ops/binary.rs +++ b/crates/ratchet-core/src/ops/binary.rs @@ -151,9 +151,9 @@ impl MetaOperation for Binary { } } -#[cfg(all(test, feature = "pyo3"))] +#[cfg(all(test, feature = "testing"))] mod tests { - use crate::{test_util::run_py_prg, BinaryOp, Device, DeviceRequest, Shape, Tensor}; + use crate::{BinaryOp, Device, DeviceRequest, Shape, Tensor}; use test_strategy::{proptest, Arbitrary}; thread_local! { @@ -168,16 +168,15 @@ mod tests { } fn ground_truth(a: &Tensor, b: &Tensor, op: &BinaryOp) -> anyhow::Result { - let kn = op.kernel_name(); - let prg = format!( - r#" -import torch -def {}(a, b): - return torch.{}(torch.from_numpy(a), torch.from_numpy(b)).numpy() -"#, - kn, kn - ); - run_py_prg(prg.to_string(), &[a, b], &[]) + let a = a.to_tch::()?; + let b = b.to_tch::()?; + let result = match op { + BinaryOp::Add => a.f_add(&b)?, + BinaryOp::Sub => a.f_sub(&b)?, + BinaryOp::Mul => a.f_mul(&b)?, + BinaryOp::Div => a.f_div(&b)?, + }; + Tensor::try_from(&result) } fn run_binary_trial(prob: BinaryProblem) -> anyhow::Result<()> { From 8aad89f75d6b7c365f1c454db76fd37e061ff76c Mon Sep 17 00:00:00 2001 From: aminediro Date: Sat, 11 May 2024 11:54:22 +0200 Subject: [PATCH 04/21] cleanup imports --- crates/ratchet-core/src/tensor.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/ratchet-core/src/tensor.rs b/crates/ratchet-core/src/tensor.rs index b1ec9762..86add21b 100644 --- a/crates/ratchet-core/src/tensor.rs +++ b/crates/ratchet-core/src/tensor.rs @@ -6,7 +6,6 @@ use crate::{ }; use anyhow::bail; use derive_new::new; -use ndarray::OwnedRepr; use parking_lot::{RwLock, RwLockReadGuard}; use std::collections::HashSet; use std::io::{BufRead, Seek}; @@ -17,7 +16,6 @@ use std::sync::Arc; #[cfg(feature = "rand")] use {rand::prelude::*, rand_distr::StandardNormal}; -use ndarray::ArrayBase; #[cfg(feature = "testing")] use ndarray::{ArrayD, ArrayViewD, Dimension}; From 3bb4f50b49b093bdb2861afaa20d96032ee7e3b4 Mon Sep 17 00:00:00 2001 From: aminediro Date: Sat, 11 May 2024 12:05:38 +0200 Subject: [PATCH 05/21] sgemm to tch tests --- crates/ratchet-core/src/ops/matmul.rs | 65 +++++++-------------------- 1 file changed, 16 insertions(+), 49 deletions(-) diff --git a/crates/ratchet-core/src/ops/matmul.rs b/crates/ratchet-core/src/ops/matmul.rs index 09a2c51f..51d7229a 100644 --- a/crates/ratchet-core/src/ops/matmul.rs +++ b/crates/ratchet-core/src/ops/matmul.rs @@ -594,12 +594,10 @@ impl MetaOperation for GEMM { } } -#[cfg(all(test, feature = "pyo3"))] +#[cfg(all(test, feature = "testing"))] mod tests { use test_strategy::{proptest, Arbitrary}; - use crate::test_util::run_py_prg; - use crate::{shape, Device, DeviceRequest, Quantization, Quantizer}; use super::*; @@ -612,53 +610,22 @@ mod tests { trans_rhs: bool, trans_out: bool, ) -> anyhow::Result { - let a_op = if trans_lhs { - "torch.permute(torch.from_numpy(a), [0, 2, 1])" - } else { - "torch.from_numpy(a)" - }; - - let b_op = if trans_rhs { - "torch.permute(torch.from_numpy(b), [0, 2, 1])" - } else { - "torch.from_numpy(b)" - }; - - let inner = if bias.is_some() { - format!( - "torch.add(torch.matmul({}, {}), torch.from_numpy(bias))", - a_op, b_op - ) - } else { - format!("torch.matmul({}, {})", a_op, b_op) - }; - - let result_op = if trans_out { - format!( - "np.ascontiguousarray(torch.permute({}, [0, 2, 1]).numpy())", - inner - ) - } else { - format!("{}.numpy()", inner) - }; - - let prg = format!( - r#" -import torch -import numpy as np -def matmul(a, b{}): - return {}"#, - if bias.is_some() { ", bias" } else { "" }, - result_op - ); - - let args = if let Some(bias) = bias { - vec![a, b, bias] - } else { - vec![a, b] + let a = a.to_tch::()?; + let b = b.to_tch::()?; + let a = if trans_lhs { a.permute([0, 2, 1]) } else { a }; + let b = if trans_rhs { b.permute([0, 2, 1]) } else { b }; + + let result = match bias { + Some(bias) => { + let bias = bias.to_tch::()?; + a.matmul(&b).f_add(&bias)? + } + None => a.matmul(&b), }; - - run_py_prg(prg.to_string(), &args, &[]) + if trans_out { + return Tensor::try_from(&result.permute([0, 2, 1]).contiguous()); + } + Tensor::try_from(&result) } #[derive(Arbitrary, Clone, Debug)] From ef60b168dbfc87f00f1886112e5e1e8e7a4155fc Mon Sep 17 00:00:00 2001 From: aminediro Date: Sat, 11 May 2024 12:32:48 +0200 Subject: [PATCH 06/21] norm to tch --- crates/ratchet-core/src/ops/norm/groupnorm.rs | 24 +++----- crates/ratchet-core/src/ops/norm/mod.rs | 55 +++++++++---------- 2 files changed, 35 insertions(+), 44 deletions(-) diff --git a/crates/ratchet-core/src/ops/norm/groupnorm.rs b/crates/ratchet-core/src/ops/norm/groupnorm.rs index df94e0ed..e74451c9 100644 --- a/crates/ratchet-core/src/ops/norm/groupnorm.rs +++ b/crates/ratchet-core/src/ops/norm/groupnorm.rs @@ -30,11 +30,10 @@ impl Operation for GroupNorm { Ok(self.norm.input.storage_view().clone()) } } -#[cfg(all(test, feature = "pyo3"))] +#[cfg(all(test, feature = "testing"))] mod tests { use test_strategy::{proptest, Arbitrary}; - use crate::test_util::run_py_prg; use crate::{rvec, shape, Device, DeviceRequest, Tensor}; fn ground_truth( @@ -43,20 +42,15 @@ mod tests { bias: Option<&Tensor>, num_groups: usize, ) -> anyhow::Result { - let prg = r#" -import torch -import torch.nn.functional as F - -def manual_group_norm(input, scale, bias, num_groups): - (input, scale, bias) = (torch.from_numpy(input), torch.from_numpy(scale), torch.from_numpy(bias)) - return F.group_norm(input, num_groups, weight=scale, bias=bias).numpy() -"#; - - let inputs = match bias { - Some(bias) => rvec![input, scale, bias], - None => rvec![input, scale], + let input = input.to_tch::()?; + let scale = scale.to_tch::()?; + let bias = match bias { + Some(b) => Some(b.to_tch::()?), + None => None, }; - run_py_prg(prg.to_string(), &inputs, &[&num_groups]) + let result = + input.f_group_norm(num_groups as i64, Some(&scale), bias.as_ref(), 1e-5, false)?; + Tensor::try_from(&result) } fn run_norm_trial(device: &Device, problem: GroupNormProblem) -> anyhow::Result<()> { diff --git a/crates/ratchet-core/src/ops/norm/mod.rs b/crates/ratchet-core/src/ops/norm/mod.rs index 7a2fe21f..7598cf41 100644 --- a/crates/ratchet-core/src/ops/norm/mod.rs +++ b/crates/ratchet-core/src/ops/norm/mod.rs @@ -180,11 +180,10 @@ impl MetaOperation for NormOp { } } -#[cfg(all(test, feature = "pyo3"))] +#[cfg(all(test, feature = "testing"))] mod tests { use test_strategy::{proptest, Arbitrary}; - use crate::test_util::run_py_prg; use crate::{rvec, shape, Device, DeviceRequest, Tensor}; fn ground_truth( @@ -193,35 +192,33 @@ mod tests { scale: &Tensor, bias: Option<&Tensor>, ) -> anyhow::Result { - let ln_prg = r#" -import torch -import torch.nn.functional as F - -def layer_norm(input, scale, bias): - (input, scale, bias) = (torch.from_numpy(input), torch.from_numpy(scale), torch.from_numpy(bias)) - return F.layer_norm(input, (input.shape[-1],), weight=scale, bias=bias).numpy() -"#; - - let rms_prg = r#" -import torch -def manual_rms_norm(input, scale): - (input, scale) = (torch.from_numpy(input), torch.from_numpy(scale)) - variance = input.to(torch.float32).pow(2).mean(dim=-1, keepdim=True) - input = input * torch.rsqrt(variance + 1e-5) - return (scale * input).numpy() -"#; - - let prg = match var { - NormVariant::LayerNorm => ln_prg, - NormVariant::RMSNorm => rms_prg, + let input = input.to_tch::()?; + let scale = scale.to_tch::()?; + let bias = match bias { + Some(b) => Some(b.to_tch::()?), + None => None, }; - - let inputs = match bias { - Some(bias) => rvec![input, scale, bias], - None => rvec![input, scale], + let result = match var { + NormVariant::LayerNorm => input.f_layer_norm( + [*input.size().last().unwrap()], + Some(&scale), + bias.as_ref(), + 1e-5, + false, + )?, + NormVariant::RMSNorm => { + // (input, scale) = (torch.from_numpy(input), torch.from_numpy(scale)) + // variance = input.to(torch.float32).pow(2).mean(dim=-1, keepdim=True) + // input = input * torch.rsqrt(variance + 1e-5) + // return (scale * input).numpy() + let variance = input + .f_pow_tensor_scalar(2)? + .mean_dim(-1, true, input.kind()); + let input = input.multiply(&variance.f_add_scalar(1e-5)?.rsqrt()); + scale.multiply(&input) + } }; - - run_py_prg(prg.to_string(), &inputs, &[]) + Tensor::try_from(&result) } fn run_norm_trial(device: &Device, problem: NormProblem) -> anyhow::Result<()> { From 70576c2e12ba5da628075389f64417524d169922 Mon Sep 17 00:00:00 2001 From: aminediro Date: Sat, 11 May 2024 12:43:25 +0200 Subject: [PATCH 07/21] added unary ops --- crates/ratchet-core/src/ops/unary.rs | 58 ++++++++++++---------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/crates/ratchet-core/src/ops/unary.rs b/crates/ratchet-core/src/ops/unary.rs index ac258206..bed6d8b0 100644 --- a/crates/ratchet-core/src/ops/unary.rs +++ b/crates/ratchet-core/src/ops/unary.rs @@ -156,12 +156,11 @@ impl MetaOperation for Unary { } } -#[cfg(all(test, feature = "pyo3"))] +#[cfg(all(test, feature = "testing"))] mod tests { + use crate::{shape, Device, DeviceRequest, Tensor, UnaryOp}; use test_strategy::{proptest, Arbitrary}; - use crate::{shape, test_util::run_py_prg, Device, DeviceRequest, Tensor, UnaryOp}; - #[derive(Arbitrary, Debug)] struct UnaryProblem { op: UnaryOp, @@ -173,33 +172,28 @@ mod tests { N: usize, } - fn ground_truth(a: &Tensor, op: &UnaryOp, args: &str) -> anyhow::Result { - let kn = op.kernel_name(); - let func_prg = format!( - r#" -import torch -import torch.nn.functional as F -def {}(a): - return F.{}(torch.from_numpy(a), {}).numpy() -"#, - kn, kn, args, - ); - - let imp_prg = format!( - r#" -import torch -def {}(a): - return torch.{}(torch.from_numpy(a), {}).numpy() -"#, - kn, kn, args, - ); - - let prg = match op { - UnaryOp::Gelu | UnaryOp::Silu | UnaryOp::Sigmoid => func_prg, - _ => imp_prg, + fn ground_truth(a: &Tensor, op: &UnaryOp) -> anyhow::Result { + let a = a.to_tch::()?; + let result = match op { + UnaryOp::Gelu => { + // UnaryOp::Gelu => "approximate=\"tanh\"", + a.f_gelu("tanh")? + } + UnaryOp::Tanh => a.tanh(), + UnaryOp::Exp => a.exp(), + UnaryOp::Log => a.log(), + UnaryOp::Sin => a.sin(), + UnaryOp::Cos => a.cos(), + UnaryOp::Abs => a.abs(), + UnaryOp::Sqrt => a.sqrt(), + UnaryOp::Relu => a.relu(), + UnaryOp::Floor => a.floor(), + UnaryOp::Ceil => a.ceil(), + UnaryOp::Neg => a.neg(), + UnaryOp::Silu => a.silu(), + UnaryOp::Sigmoid => a.sigmoid(), }; - - run_py_prg(prg.to_string(), &[a], &[]) + Tensor::try_from(&result) } thread_local! { @@ -212,11 +206,7 @@ def {}(a): println!("op: {:?}, B: {}, M: {}, N: {}", op, B, M, N); let a = Tensor::randn::(shape![B, M], Device::CPU); - let args = match op { - UnaryOp::Gelu => "approximate=\"tanh\"", - _ => "", - }; - let ground = ground_truth(&a, &op, args)?; + let ground = ground_truth(&a, &op)?; let a_gpu = a.to(&device)?; let c_gpu = match op { From f62e3f0f1f65c257e88e661e68064b414eb05063 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 14 May 2024 22:00:35 +0100 Subject: [PATCH 08/21] chore: migrate more ops --- crates/ratchet-core/src/ops/concat.rs | 30 +++++++------------ crates/ratchet-core/src/ops/conv.rs | 23 ++++---------- crates/ratchet-core/src/ops/norm/groupnorm.rs | 18 +++++------ crates/ratchet-core/src/ops/norm/mod.rs | 6 +--- crates/ratchet-core/src/ops/softmax.rs | 3 +- crates/ratchet-core/src/tensor.rs | 8 ++--- 6 files changed, 30 insertions(+), 58 deletions(-) diff --git a/crates/ratchet-core/src/ops/concat.rs b/crates/ratchet-core/src/ops/concat.rs index 0ab39cd0..cd190572 100644 --- a/crates/ratchet-core/src/ops/concat.rs +++ b/crates/ratchet-core/src/ops/concat.rs @@ -141,7 +141,8 @@ impl MetaOperation for Concat { #[cfg(all(test, feature = "pyo3"))] mod tests { - use crate::{rvec, shape, test_util::run_py_prg, Device, DeviceRequest, Tensor}; + use crate::{rvec, shape, Device, DeviceRequest, Tensor}; + use tch::Tensor as TchTensor; thread_local! { static GPU_DEVICE: Device = Device::request_device(DeviceRequest::GPU).unwrap(); @@ -157,22 +158,13 @@ mod tests { dim: usize, } - fn ground_truth(to_cat: &[&Tensor], args: &str) -> anyhow::Result { - let prg = format!( - r#" -import torch -import numpy as np -def permute(t0, t1, t2, t3, t4): - t0 = torch.from_numpy(t0) - t1 = torch.from_numpy(t1) - t2 = torch.from_numpy(t2) - t3 = torch.from_numpy(t3) - t4 = torch.from_numpy(t4) - return np.ascontiguousarray(torch.cat((t0, t1, t2, t3, t4), dim={}).numpy()) -"#, - args - ); - run_py_prg(prg.to_string(), to_cat, &[]) + fn ground_truth(to_cat: &[&Tensor], dim: i64) -> anyhow::Result { + let tch_tensors = to_cat + .iter() + .map(|x| x.to_tch::()) + .collect::, _>>()?; + + Tensor::try_from(TchTensor::cat(&tch_tensors, dim)) } fn run_concat_trial(prob: ConcatProblem) -> anyhow::Result<()> { @@ -185,9 +177,7 @@ def permute(t0, t1, t2, t3, t4): dim, } = prob; let device = GPU_DEVICE.with(|d| d.clone()); - - let arg_str = format!("{}", dim); - let ground = ground_truth(&[&t0, &t1, &t2, &t3, &t4], arg_str.as_str())?; + let ground = ground_truth(&[&t0, &t1, &t2, &t3, &t4], dim as _)?; t0 = t0.to(&device)?; t1 = t1.to(&device)?; diff --git a/crates/ratchet-core/src/ops/conv.rs b/crates/ratchet-core/src/ops/conv.rs index ba2de4d4..f14e14dc 100644 --- a/crates/ratchet-core/src/ops/conv.rs +++ b/crates/ratchet-core/src/ops/conv.rs @@ -127,10 +127,8 @@ impl MetaOperation for Conv { #[cfg(all(test, feature = "pyo3"))] mod tests { - use test_strategy::{proptest, Arbitrary}; - - use crate::test_util::run_py_prg; use crate::{shape, Device, DeviceRequest, Tensor}; + use test_strategy::{proptest, Arbitrary}; fn ground_truth( input: &Tensor, @@ -139,20 +137,11 @@ mod tests { stride: usize, padding: usize, ) -> anyhow::Result { - let prg = r#" -import torch -import torch.nn.functional as F -def conv(input, filters, bias, stride, padding): - input = torch.from_numpy(input) - filters = torch.from_numpy(filters) - bias = torch.from_numpy(bias) - return F.conv1d(input, filters, bias, stride=stride, padding=padding).numpy() -"#; - run_py_prg( - prg.to_string(), - &[input, filters, bias], - &[&stride, &padding], - ) + let i_tch = input.to_tch::()?; + let f_tch = filters.to_tch::()?; + let b_tch = bias.to_tch::()?; + + Tensor::try_from(i_tch.conv1d(&f_tch, Some(b_tch), stride as i64, padding as i64, 0, 1)) } fn run_conv_trial(device: &Device, problem: ConvProblem) { diff --git a/crates/ratchet-core/src/ops/norm/groupnorm.rs b/crates/ratchet-core/src/ops/norm/groupnorm.rs index e74451c9..759d0a61 100644 --- a/crates/ratchet-core/src/ops/norm/groupnorm.rs +++ b/crates/ratchet-core/src/ops/norm/groupnorm.rs @@ -1,8 +1,6 @@ +use crate::{DType, Norm, OpGuards, Operation, OperationError, StorageView}; use derive_new::new; -use super::*; -use crate::{DType, OpGuards, Operation, OperationError, StorageView, Tensor}; - #[derive(new, Debug, Clone)] pub struct GroupNorm { pub norm: Norm, @@ -19,9 +17,10 @@ impl OpGuards for GroupNorm { fn check_dtypes(&self) { assert!(self.norm.input.dt() == DType::F32); assert!(self.norm.scale.dt() == DType::F32); - if self.norm.bias.is_some() { - assert!(self.norm.bias.as_ref().unwrap().dt() == DType::F32); - } + self.norm + .bias + .as_ref() + .map(|b| assert!(b.dt() == DType::F32)); } } @@ -32,9 +31,8 @@ impl Operation for GroupNorm { } #[cfg(all(test, feature = "testing"))] mod tests { - use test_strategy::{proptest, Arbitrary}; - use crate::{rvec, shape, Device, DeviceRequest, Tensor}; + use test_strategy::{proptest, Arbitrary}; fn ground_truth( input: &Tensor, @@ -50,7 +48,7 @@ mod tests { }; let result = input.f_group_norm(num_groups as i64, Some(&scale), bias.as_ref(), 1e-5, false)?; - Tensor::try_from(&result) + Tensor::try_from(result) } fn run_norm_trial(device: &Device, problem: GroupNormProblem) -> anyhow::Result<()> { @@ -83,7 +81,7 @@ mod tests { #[derive(Arbitrary, Debug)] struct GroupNormProblem { - #[map(|num_groups: u32| #C/2 )] + #[map(|_num_groups: u32| #C/2 )] num_groups: usize, #[strategy(1..=1usize)] B: usize, diff --git a/crates/ratchet-core/src/ops/norm/mod.rs b/crates/ratchet-core/src/ops/norm/mod.rs index 7598cf41..eb44195b 100644 --- a/crates/ratchet-core/src/ops/norm/mod.rs +++ b/crates/ratchet-core/src/ops/norm/mod.rs @@ -207,10 +207,6 @@ mod tests { false, )?, NormVariant::RMSNorm => { - // (input, scale) = (torch.from_numpy(input), torch.from_numpy(scale)) - // variance = input.to(torch.float32).pow(2).mean(dim=-1, keepdim=True) - // input = input * torch.rsqrt(variance + 1e-5) - // return (scale * input).numpy() let variance = input .f_pow_tensor_scalar(2)? .mean_dim(-1, true, input.kind()); @@ -218,7 +214,7 @@ mod tests { scale.multiply(&input) } }; - Tensor::try_from(&result) + Tensor::try_from(result) } fn run_norm_trial(device: &Device, problem: NormProblem) -> anyhow::Result<()> { diff --git a/crates/ratchet-core/src/ops/softmax.rs b/crates/ratchet-core/src/ops/softmax.rs index 1addc7ad..cdbf81af 100644 --- a/crates/ratchet-core/src/ops/softmax.rs +++ b/crates/ratchet-core/src/ops/softmax.rs @@ -107,7 +107,6 @@ impl MetaOperation for Softmax { #[cfg(all(test, feature = "testing"))] mod tests { use crate::{shape, Device, DeviceRequest, Tensor}; - use tch; use test_strategy::{proptest, Arbitrary}; thread_local! { @@ -116,7 +115,7 @@ mod tests { fn ground_truth(a: &Tensor) -> anyhow::Result { let t = a.to_tch::()?; - Tensor::try_from(&t.softmax(-1, Some(tch::kind::Kind::Float))) + Tensor::try_from(t.softmax(-1, Some(tch::kind::Kind::Float))) } fn run_softmax_trial(problem: SoftmaxProblem) { diff --git a/crates/ratchet-core/src/tensor.rs b/crates/ratchet-core/src/tensor.rs index 86add21b..2baa0483 100644 --- a/crates/ratchet-core/src/tensor.rs +++ b/crates/ratchet-core/src/tensor.rs @@ -865,13 +865,13 @@ impl Tensor { } #[cfg(feature = "testing")] -impl TryFrom<&tch::Tensor> for Tensor { +impl TryFrom for Tensor { type Error = anyhow::Error; - fn try_from(array: &tch::Tensor) -> anyhow::Result { - let kind = array.kind(); + fn try_from(t: tch::Tensor) -> anyhow::Result { + let kind = t.kind(); match kind { tch::Kind::Float => { - let base: ArrayD = array.try_into()?; + let base: ArrayD = (&t).try_into()?; Ok(Self::from(base)) } tch::Kind::QInt8 => todo!(), From 8ed7e938a6e0bcd8ebfe12fea0eb8177f3164ebe Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 14 May 2024 22:17:36 +0100 Subject: [PATCH 09/21] chore: migrate more ops --- crates/ratchet-core/src/ops/binary.rs | 2 +- crates/ratchet-core/src/ops/matmul.rs | 4 ++-- .../ratchet-core/src/ops/reindex/broadcast.rs | 19 ++++++------------- .../ratchet-core/src/ops/reindex/permute.rs | 18 ++++++------------ crates/ratchet-core/src/ops/unary.rs | 2 +- 5 files changed, 16 insertions(+), 29 deletions(-) diff --git a/crates/ratchet-core/src/ops/binary.rs b/crates/ratchet-core/src/ops/binary.rs index 0e1fd0a7..561c172f 100644 --- a/crates/ratchet-core/src/ops/binary.rs +++ b/crates/ratchet-core/src/ops/binary.rs @@ -176,7 +176,7 @@ mod tests { BinaryOp::Mul => a.f_mul(&b)?, BinaryOp::Div => a.f_div(&b)?, }; - Tensor::try_from(&result) + Tensor::try_from(result) } fn run_binary_trial(prob: BinaryProblem) -> anyhow::Result<()> { diff --git a/crates/ratchet-core/src/ops/matmul.rs b/crates/ratchet-core/src/ops/matmul.rs index 51d7229a..b0dadbd0 100644 --- a/crates/ratchet-core/src/ops/matmul.rs +++ b/crates/ratchet-core/src/ops/matmul.rs @@ -623,9 +623,9 @@ mod tests { None => a.matmul(&b), }; if trans_out { - return Tensor::try_from(&result.permute([0, 2, 1]).contiguous()); + return Tensor::try_from(result.permute([0, 2, 1]).contiguous()); } - Tensor::try_from(&result) + Tensor::try_from(result) } #[derive(Arbitrary, Clone, Debug)] diff --git a/crates/ratchet-core/src/ops/reindex/broadcast.rs b/crates/ratchet-core/src/ops/reindex/broadcast.rs index 1c92f014..14c43934 100644 --- a/crates/ratchet-core/src/ops/reindex/broadcast.rs +++ b/crates/ratchet-core/src/ops/reindex/broadcast.rs @@ -84,18 +84,11 @@ mod tests { op: Broadcast, } - fn ground_truth(a: &Tensor, args: &str) -> anyhow::Result { - let prg = format!( - r#" -import torch -import numpy as np -def slice(a): - torch_a = torch.from_numpy(a) - return np.ascontiguousarray(torch_a.broadcast_to({}).numpy()) -"#, - args - ); - run_py_prg(prg.to_string(), &[a], &[]) + fn ground_truth(a: &Tensor, shape: &Shape) -> anyhow::Result { + let a_tch = a.to_tch::()?; + let dims = shape.iter().map(|&x| x as i64).collect::>(); + let broadcasted = a_tch.broadcast_to(dims).contiguous(); + Tensor::try_from(broadcasted) } fn run_reindex_trial(prob: BroadcastProblem) -> anyhow::Result<()> { @@ -105,7 +98,7 @@ def slice(a): let device = GPU_DEVICE.with(|d| d.clone()); let a_gpu = a.to(&device)?; - let ground = ground_truth(&a, &op.to.as_torch())?; + let ground = ground_truth(&a, &op.to)?; let ours = a_gpu.broadcast_to(op.to.clone())?.resolve()?; let d_gpu = ours.to(&Device::CPU)?; ground.all_close(&d_gpu, 1e-5, 1e-5)?; diff --git a/crates/ratchet-core/src/ops/reindex/permute.rs b/crates/ratchet-core/src/ops/reindex/permute.rs index 6e73fdcf..0f38451f 100644 --- a/crates/ratchet-core/src/ops/reindex/permute.rs +++ b/crates/ratchet-core/src/ops/reindex/permute.rs @@ -83,17 +83,11 @@ mod tests { op: Permute, } - fn ground_truth(a: &Tensor, args: &str) -> anyhow::Result { - let prg = format!( - r#" -import torch -import numpy as np -def permute(a): - return np.ascontiguousarray(torch.permute(torch.from_numpy(a), {}).numpy()) -"#, - args - ); - run_py_prg(prg.to_string(), &[a], &[]) + fn ground_truth(a: &Tensor, dims: &[usize]) -> anyhow::Result { + let tch_dims = dims.iter().map(|&x| x as i64).collect::>(); + let a_tch = a.to_tch::()?; + let permuted = a_tch.permute(&tch_dims).contiguous(); + Tensor::try_from(permuted) } fn run_reindex_trial(prob: PermuteProblem) -> anyhow::Result<()> { @@ -102,7 +96,7 @@ def permute(a): let a = op.src.clone(); let a_gpu = a.to(&device)?; - let ground = ground_truth(&a, format!("{:?}", op.dims).as_str())?; + let ground = ground_truth(&a, &op.dims)?; let ours = a_gpu.permute(&op.dims)?.resolve()?; let d_gpu = ours.to(&Device::CPU)?; ground.all_close(&d_gpu, 1e-5, 1e-5)?; diff --git a/crates/ratchet-core/src/ops/unary.rs b/crates/ratchet-core/src/ops/unary.rs index bed6d8b0..af61efef 100644 --- a/crates/ratchet-core/src/ops/unary.rs +++ b/crates/ratchet-core/src/ops/unary.rs @@ -193,7 +193,7 @@ mod tests { UnaryOp::Silu => a.silu(), UnaryOp::Sigmoid => a.sigmoid(), }; - Tensor::try_from(&result) + Tensor::try_from(result) } thread_local! { From 9c4318f5a81e4ebb94f12eb3d8311a157e3fe9c1 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 14 May 2024 22:31:55 +0100 Subject: [PATCH 10/21] chore: migrate more ops --- .../ratchet-core/src/ops/reindex/broadcast.rs | 2 +- crates/ratchet-core/src/ops/reindex/slice.rs | 43 ++++++------------- 2 files changed, 14 insertions(+), 31 deletions(-) diff --git a/crates/ratchet-core/src/ops/reindex/broadcast.rs b/crates/ratchet-core/src/ops/reindex/broadcast.rs index 14c43934..50ffc69e 100644 --- a/crates/ratchet-core/src/ops/reindex/broadcast.rs +++ b/crates/ratchet-core/src/ops/reindex/broadcast.rs @@ -43,7 +43,7 @@ mod tests { }; use test_strategy::proptest; - use crate::{shape, test_util::run_py_prg, Broadcast, Device, DeviceRequest, Shape, Tensor}; + use crate::{shape, Broadcast, Device, DeviceRequest, Shape, Tensor}; thread_local! { static GPU_DEVICE: Device = Device::request_device(DeviceRequest::GPU).unwrap(); diff --git a/crates/ratchet-core/src/ops/reindex/slice.rs b/crates/ratchet-core/src/ops/reindex/slice.rs index d0f924aa..37e7624b 100644 --- a/crates/ratchet-core/src/ops/reindex/slice.rs +++ b/crates/ratchet-core/src/ops/reindex/slice.rs @@ -50,29 +50,16 @@ impl Operation for Slice { mod tests { use std::ops::Range; - use crate::{test_util::run_py_prg, Device, DeviceRequest, Tensor}; - use crate::{Shape, Slice}; + use crate::{Device, DeviceRequest, Tensor}; + use crate::{RVec, Shape, Slice}; use proptest::prelude::*; + use tch::IndexOp; use test_strategy::proptest; thread_local! { static GPU_DEVICE: Device = Device::request_device(DeviceRequest::GPU).unwrap(); } - impl Slice { - fn as_torch(&self) -> String { - let mut s = String::from("["); - for (idx, range) in self.indices.iter().enumerate() { - if idx > 0 { - s.push_str(", "); - } - s.push_str(&format!("{}:{}", range.start, range.end)); - } - s.push(']'); - s - } - } - #[derive(Debug, Clone)] pub struct SubSlice(pub Range); @@ -126,28 +113,24 @@ mod tests { } } - fn ground_truth(a: &Tensor, args: &str) -> anyhow::Result { - let prg = format!( - r#" -import torch -import numpy as np -def slice(a): - torch_a = torch.from_numpy(a) - return np.ascontiguousarray(torch_a{}) -"#, - args - ); - run_py_prg(prg.to_string(), &[a], &[]) + fn ground_truth(a: &Tensor, indices: &[Range]) -> anyhow::Result { + let a_tch = a.to_tch::()?; + let mut ci = indices + .iter() + .map(|range| (range.start as i64)..(range.end as i64)) + .collect::>(); + let tch_indices = (ci.remove(0), ci.remove(0), ci.remove(0), ci.remove(0)); + let sliced = a_tch.i(tch_indices).contiguous(); + Tensor::try_from(sliced) } fn run_reindex_trial(prob: SliceProblem) -> anyhow::Result<()> { let SliceProblem { op } = prob; - println!("SLICE PROBLEM: {:?}", op); let device = GPU_DEVICE.with(|d| d.clone()); let a = op.src.clone(); let a_gpu = a.to(&device)?; - let ground = ground_truth(&a, &op.as_torch())?; + let ground = ground_truth(&a, &op.indices)?; let ours = a_gpu.slice(&op.indices)?.resolve()?; let d_gpu = ours.to(&Device::CPU)?; ground.all_close(&d_gpu, 1e-5, 1e-5)?; From 2bc17e17a02f3b28eed656b75496cad4a7158042 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 14 May 2024 22:40:43 +0100 Subject: [PATCH 11/21] chore: migrate more ops --- crates/ratchet-core/src/dtype/mod.rs | 17 ++++++++++------- crates/ratchet-core/src/ops/reindex/permute.rs | 2 +- crates/ratchet-core/src/ops/select.rs | 13 ++++--------- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/crates/ratchet-core/src/dtype/mod.rs b/crates/ratchet-core/src/dtype/mod.rs index 6a03a2ee..dbd2b5b6 100644 --- a/crates/ratchet-core/src/dtype/mod.rs +++ b/crates/ratchet-core/src/dtype/mod.rs @@ -19,6 +19,7 @@ pub enum DType { I32, U32, GGUF(gguf::GGUFDType), + I64, } impl DType { @@ -52,6 +53,7 @@ impl DType { DType::I32 => 4, DType::U32 => 4, DType::GGUF(g) => g.size_of(), + DType::I64 => 8, } } @@ -111,7 +113,7 @@ pub trait TensorDType: fn one() -> Self; } -macro_rules! map_type { +macro_rules! tensor_dt { ($t:ty, $v:ident) => { impl TensorDType for $t { fn dt() -> DType { @@ -125,7 +127,7 @@ macro_rules! map_type { }; } -macro_rules! map_half_type { +macro_rules! tensor_half_dt { ($t:ty, $v:ident) => { impl TensorDType for $t { fn dt() -> DType { @@ -139,11 +141,12 @@ macro_rules! map_half_type { }; } -map_type!(f32, F32); -map_type!(i32, I32); -map_type!(u32, U32); -map_half_type!(f16, F16); -map_half_type!(bf16, BF16); +tensor_dt!(f32, F32); +tensor_dt!(i32, I32); +tensor_dt!(u32, U32); +tensor_dt!(i64, I64); +tensor_half_dt!(f16, F16); +tensor_half_dt!(bf16, BF16); //Handy trait for WebGPU buffer alignment pub trait Align { diff --git a/crates/ratchet-core/src/ops/reindex/permute.rs b/crates/ratchet-core/src/ops/reindex/permute.rs index 0f38451f..6cdd0caf 100644 --- a/crates/ratchet-core/src/ops/reindex/permute.rs +++ b/crates/ratchet-core/src/ops/reindex/permute.rs @@ -55,7 +55,7 @@ impl OpGuards for Permute { #[cfg(all(test, feature = "pyo3"))] mod tests { - use crate::{test_util::run_py_prg, Device, DeviceRequest, Permute, Shape, Tensor}; + use crate::{Device, DeviceRequest, Permute, Shape, Tensor}; use proptest::prelude::*; use test_strategy::{proptest, Arbitrary}; diff --git a/crates/ratchet-core/src/ops/select.rs b/crates/ratchet-core/src/ops/select.rs index 191f82da..2c46e820 100644 --- a/crates/ratchet-core/src/ops/select.rs +++ b/crates/ratchet-core/src/ops/select.rs @@ -123,6 +123,7 @@ mod tests { use crate::test_util::run_py_prg; use crate::{rvec, shape, Device, DeviceRequest, Quantization, Quantizer, Shape, Tensor}; + use tch::Tensor as TchTensor; thread_local! { static GPU_DEVICE: Device = Device::request_device(DeviceRequest::GPU).unwrap(); @@ -148,15 +149,9 @@ mod tests { } fn ground_truth(input: &Tensor, indices: &Tensor, dim: usize) -> anyhow::Result { - let prg = format!( - r#" -import torch -def index_select(input, indices): - return torch.index_select(torch.from_numpy(input),{},torch.from_numpy(indices)).numpy() -"#, - dim - ); - run_py_prg(prg.to_string(), &[input, indices], &[]) + let tch_input = input.to_tch::()?; + let tch_indices = indices.to_tch::()?; + Tensor::try_from(TchTensor::index_select(&tch_input, dim as i64, &tch_indices).contiguous()) } fn run_index_select_trial(problem: IndexSelectProblem, quantize: bool) { From c4ae38b4a53b01976b58688405b6a7619dcb9641 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 14 May 2024 22:43:06 +0100 Subject: [PATCH 12/21] chore: migrate more ops --- crates/ratchet-core/src/ops/select.rs | 1 - crates/ratchet-core/src/ops/unary.rs | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/ratchet-core/src/ops/select.rs b/crates/ratchet-core/src/ops/select.rs index 2c46e820..196ca42d 100644 --- a/crates/ratchet-core/src/ops/select.rs +++ b/crates/ratchet-core/src/ops/select.rs @@ -121,7 +121,6 @@ mod tests { use proptest::strategy::{BoxedStrategy, Just, Strategy}; use test_strategy::proptest; - use crate::test_util::run_py_prg; use crate::{rvec, shape, Device, DeviceRequest, Quantization, Quantizer, Shape, Tensor}; use tch::Tensor as TchTensor; diff --git a/crates/ratchet-core/src/ops/unary.rs b/crates/ratchet-core/src/ops/unary.rs index af61efef..315bddeb 100644 --- a/crates/ratchet-core/src/ops/unary.rs +++ b/crates/ratchet-core/src/ops/unary.rs @@ -175,10 +175,7 @@ mod tests { fn ground_truth(a: &Tensor, op: &UnaryOp) -> anyhow::Result { let a = a.to_tch::()?; let result = match op { - UnaryOp::Gelu => { - // UnaryOp::Gelu => "approximate=\"tanh\"", - a.f_gelu("tanh")? - } + UnaryOp::Gelu => a.f_gelu("tanh")?, UnaryOp::Tanh => a.tanh(), UnaryOp::Exp => a.exp(), UnaryOp::Log => a.log(), From 4f3b06dc801e90fa07e98a607b7925f3824ae860 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 14 May 2024 22:43:34 +0100 Subject: [PATCH 13/21] chore: clip --- crates/ratchet-core/src/gpu/buffer_allocator/allocator.rs | 6 ++++++ crates/ratchet-core/src/gpu/pools/bind_group_layout_pool.rs | 6 ++++++ crates/ratchet-core/src/gpu/pools/bind_group_pool.rs | 6 ++++++ crates/ratchet-core/src/gpu/pools/buffer_pool.rs | 6 ++++++ crates/ratchet-core/src/gpu/pools/pipeline_pool.rs | 6 ++++++ crates/ratchet-core/src/ops/reindex/permute.rs | 2 +- crates/ratchet-core/src/ops/reindex/slice.rs | 2 +- 7 files changed, 32 insertions(+), 2 deletions(-) diff --git a/crates/ratchet-core/src/gpu/buffer_allocator/allocator.rs b/crates/ratchet-core/src/gpu/buffer_allocator/allocator.rs index 4ee9f65f..f62d3a3e 100644 --- a/crates/ratchet-core/src/gpu/buffer_allocator/allocator.rs +++ b/crates/ratchet-core/src/gpu/buffer_allocator/allocator.rs @@ -21,6 +21,12 @@ pub struct BufferAllocator { pool: RwLock, } +impl Default for BufferAllocator { + fn default() -> Self { + Self::new() + } +} + impl BufferAllocator { pub fn new() -> Self { Self { diff --git a/crates/ratchet-core/src/gpu/pools/bind_group_layout_pool.rs b/crates/ratchet-core/src/gpu/pools/bind_group_layout_pool.rs index 7940cfa3..056d93a7 100644 --- a/crates/ratchet-core/src/gpu/pools/bind_group_layout_pool.rs +++ b/crates/ratchet-core/src/gpu/pools/bind_group_layout_pool.rs @@ -106,6 +106,12 @@ pub struct BindGroupLayoutPool { StaticResourcePool, } +impl Default for BindGroupLayoutPool { + fn default() -> Self { + Self::new() + } +} + impl BindGroupLayoutPool { pub fn new() -> Self { Self { diff --git a/crates/ratchet-core/src/gpu/pools/bind_group_pool.rs b/crates/ratchet-core/src/gpu/pools/bind_group_pool.rs index 05f6d647..850d9752 100644 --- a/crates/ratchet-core/src/gpu/pools/bind_group_pool.rs +++ b/crates/ratchet-core/src/gpu/pools/bind_group_pool.rs @@ -85,6 +85,12 @@ pub struct BindGroupPool { inner: DynamicResourcePool, } +impl Default for BindGroupPool { + fn default() -> Self { + Self::new() + } +} + impl BindGroupPool { pub fn new() -> Self { Self { diff --git a/crates/ratchet-core/src/gpu/pools/buffer_pool.rs b/crates/ratchet-core/src/gpu/pools/buffer_pool.rs index fdfa4614..8ff79349 100644 --- a/crates/ratchet-core/src/gpu/pools/buffer_pool.rs +++ b/crates/ratchet-core/src/gpu/pools/buffer_pool.rs @@ -60,6 +60,12 @@ pub struct BufferPool { inner: DynamicResourcePool, } +impl Default for BufferPool { + fn default() -> Self { + Self::new() + } +} + impl BufferPool { pub fn new() -> Self { Self { diff --git a/crates/ratchet-core/src/gpu/pools/pipeline_pool.rs b/crates/ratchet-core/src/gpu/pools/pipeline_pool.rs index e22580e6..c1d5b662 100644 --- a/crates/ratchet-core/src/gpu/pools/pipeline_pool.rs +++ b/crates/ratchet-core/src/gpu/pools/pipeline_pool.rs @@ -21,6 +21,12 @@ pub struct ComputePipelinePool { StaticResourcePool, } +impl Default for ComputePipelinePool { + fn default() -> Self { + Self::new() + } +} + impl ComputePipelinePool { pub fn new() -> Self { Self { diff --git a/crates/ratchet-core/src/ops/reindex/permute.rs b/crates/ratchet-core/src/ops/reindex/permute.rs index 6cdd0caf..38c04cd7 100644 --- a/crates/ratchet-core/src/ops/reindex/permute.rs +++ b/crates/ratchet-core/src/ops/reindex/permute.rs @@ -86,7 +86,7 @@ mod tests { fn ground_truth(a: &Tensor, dims: &[usize]) -> anyhow::Result { let tch_dims = dims.iter().map(|&x| x as i64).collect::>(); let a_tch = a.to_tch::()?; - let permuted = a_tch.permute(&tch_dims).contiguous(); + let permuted = a_tch.permute(tch_dims).contiguous(); Tensor::try_from(permuted) } diff --git a/crates/ratchet-core/src/ops/reindex/slice.rs b/crates/ratchet-core/src/ops/reindex/slice.rs index 37e7624b..94aca7b3 100644 --- a/crates/ratchet-core/src/ops/reindex/slice.rs +++ b/crates/ratchet-core/src/ops/reindex/slice.rs @@ -51,7 +51,7 @@ mod tests { use std::ops::Range; use crate::{Device, DeviceRequest, Tensor}; - use crate::{RVec, Shape, Slice}; + use crate::{Shape, Slice}; use proptest::prelude::*; use tch::IndexOp; use test_strategy::proptest; From 5421f202fbb2675d5d5341ca6353b82fdc9ff575 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 14 May 2024 22:52:15 +0100 Subject: [PATCH 14/21] chore: py --- .github/workflows/rust.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 170f6e5e..48665314 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -132,6 +132,25 @@ jobs: fi shell: bash + - name: Download and install libtorch + run: | + import requests + import zipfile + import os + + with open('requirements.txt') as f: + for line in f: + if 'torch' in line: + version = line.split('==')[1].strip() + break + response = requests.get(f'https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-{version}.zip') + with open('libtorch.zip', 'wb') as f: + f.write(response.content) + with zipfile.ZipFile('libtorch.zip', 'r') as zip_ref: + zip_ref.extractall() + os.environ['LIBTORCH'] = os.path.join(os.getcwd(), f'libtorch') + shell: python + - uses: FedericoCarboni/setup-ffmpeg@v3 if: matrix.os != 'macos-14' From 52b1f591f89c893a19d7a4348145adac566d9645 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 14 May 2024 23:00:00 +0100 Subject: [PATCH 15/21] chore: py --- .github/workflows/rust.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 48665314..cff6617d 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -138,9 +138,10 @@ jobs: import zipfile import os + version = '2.3.0' # default version with open('requirements.txt') as f: for line in f: - if 'torch' in line: + if 'torch==' in line: version = line.split('==')[1].strip() break response = requests.get(f'https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-{version}.zip') From 7c615774610b687caf01f23ec4a012182b8a139f Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 14 May 2024 23:08:18 +0100 Subject: [PATCH 16/21] chore: try simlpe --- .github/workflows/rust.yml | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index cff6617d..9ef710de 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -14,6 +14,7 @@ env: DXC_RELEASE: "v1.7.2308" DXC_FILENAME: "dxc_2023_08_14.zip" WASM_BINDGEN_TEST_TIMEOUT: 300 # 5 minutes + LIBTORCH_USE_PYTORCH: 1 jobs: build: @@ -132,26 +133,6 @@ jobs: fi shell: bash - - name: Download and install libtorch - run: | - import requests - import zipfile - import os - - version = '2.3.0' # default version - with open('requirements.txt') as f: - for line in f: - if 'torch==' in line: - version = line.split('==')[1].strip() - break - response = requests.get(f'https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-{version}.zip') - with open('libtorch.zip', 'wb') as f: - f.write(response.content) - with zipfile.ZipFile('libtorch.zip', 'r') as zip_ref: - zip_ref.extractall() - os.environ['LIBTORCH'] = os.path.join(os.getcwd(), f'libtorch') - shell: python - - uses: FedericoCarboni/setup-ffmpeg@v3 if: matrix.os != 'macos-14' From b95001a077279311d19d2b631cdb683497788a12 Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Tue, 14 May 2024 23:17:12 +0100 Subject: [PATCH 17/21] chore: does it work --- .github/workflows/rust.yml | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 9ef710de..cff6617d 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -14,7 +14,6 @@ env: DXC_RELEASE: "v1.7.2308" DXC_FILENAME: "dxc_2023_08_14.zip" WASM_BINDGEN_TEST_TIMEOUT: 300 # 5 minutes - LIBTORCH_USE_PYTORCH: 1 jobs: build: @@ -133,6 +132,26 @@ jobs: fi shell: bash + - name: Download and install libtorch + run: | + import requests + import zipfile + import os + + version = '2.3.0' # default version + with open('requirements.txt') as f: + for line in f: + if 'torch==' in line: + version = line.split('==')[1].strip() + break + response = requests.get(f'https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-{version}.zip') + with open('libtorch.zip', 'wb') as f: + f.write(response.content) + with zipfile.ZipFile('libtorch.zip', 'r') as zip_ref: + zip_ref.extractall() + os.environ['LIBTORCH'] = os.path.join(os.getcwd(), f'libtorch') + shell: python + - uses: FedericoCarboni/setup-ffmpeg@v3 if: matrix.os != 'macos-14' From 997ca5617855101ef1c328a479a27cafd44c679e Mon Sep 17 00:00:00 2001 From: FL33TW00D Date: Wed, 15 May 2024 09:51:56 +0100 Subject: [PATCH 18/21] chore: try --- .github/workflows/rust.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index cff6617d..496090c5 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -150,6 +150,7 @@ jobs: with zipfile.ZipFile('libtorch.zip', 'r') as zip_ref: zip_ref.extractall() os.environ['LIBTORCH'] = os.path.join(os.getcwd(), f'libtorch') + os.environ['DYLD_LIBRARY_PATH'] = os.path.join(os.getcwd(), f'libtorch') shell: python - uses: FedericoCarboni/setup-ffmpeg@v3 From 87e37eff69c1cf083b89465f89c2ba9e3bc33c63 Mon Sep 17 00:00:00 2001 From: AmineDiro Date: Wed, 15 May 2024 12:06:58 +0200 Subject: [PATCH 19/21] Update rust.yml use installed torch==2.3.0 version Added --- .github/workflows/rust.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 496090c5..e86225a3 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -166,9 +166,11 @@ jobs: - name: run tests shell: bash + env: + LIBTORCH_USE_PYTORCH: 1 run: | set -e - cargo nextest run -j 1 --no-fail-fast --features=ci,pyo3 + cargo nextest run -j 1 --no-fail-fast --features=ci,pyo3,testing - name: Set up WebDriver for Ubuntu if: matrix.os == 'ubuntu-22.04' From 15a3b2c99d3d79c5ec65fbbc00fb1a919b90bc7b Mon Sep 17 00:00:00 2001 From: AmineDiro Date: Wed, 15 May 2024 12:33:09 +0200 Subject: [PATCH 20/21] Added DyLD_LIB --- .github/workflows/rust.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index e86225a3..e755740a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -167,7 +167,9 @@ jobs: - name: run tests shell: bash env: - LIBTORCH_USE_PYTORCH: 1 + LIBTORCH_USE_PYTORCH: 1 + SITE_PACKAGES: $(python -c "import site; print(site.getsitepackages()[0])") + DYLD_LIBRARY_PATH: $SITE_PACKAGES/torch/lib:$DYLD_LIBRARY_PATH run: | set -e cargo nextest run -j 1 --no-fail-fast --features=ci,pyo3,testing From 88f63b55ddd84d339823b7851568e25df6b5d79d Mon Sep 17 00:00:00 2001 From: AmineDiro Date: Wed, 15 May 2024 15:07:51 +0200 Subject: [PATCH 21/21] DYLD_LIBRARY_PATH fix --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index e755740a..34093114 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -169,7 +169,7 @@ jobs: env: LIBTORCH_USE_PYTORCH: 1 SITE_PACKAGES: $(python -c "import site; print(site.getsitepackages()[0])") - DYLD_LIBRARY_PATH: $SITE_PACKAGES/torch/lib:$DYLD_LIBRARY_PATH + DYLD_LIBRARY_PATH: ${{ env.SITE_PACKAGES }}/torch/lib:$DYLD_LIBRARY_PATH run: | set -e cargo nextest run -j 1 --no-fail-fast --features=ci,pyo3,testing