huggingface · AmineDiro · May 10, 2024 · May 10, 2024 · May 11, 2024 · May 11, 2024
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -132,6 +132,27 @@ jobs:
           fi
         shell: bash
 
+      - name: Download and install libtorch
+        run: |
+          import requests
+          import zipfile
+          import os
+
+          version = '2.3.0' # default version
+          with open('requirements.txt') as f:
+              for line in f:
+                  if 'torch==' in line:
+                      version = line.split('==')[1].strip()
+                      break
+          response = requests.get(f'https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-{version}.zip')
+          with open('libtorch.zip', 'wb') as f:
+              f.write(response.content)
+          with zipfile.ZipFile('libtorch.zip', 'r') as zip_ref:
+              zip_ref.extractall()
+          os.environ['LIBTORCH'] = os.path.join(os.getcwd(), f'libtorch')
+          os.environ['DYLD_LIBRARY_PATH'] = os.path.join(os.getcwd(), f'libtorch')
+        shell: python
+
       - uses: FedericoCarboni/setup-ffmpeg@v3
         if: matrix.os != 'macos-14'
 
@@ -145,9 +166,13 @@ jobs:
 
       - name: run tests
         shell: bash
+        env: 
+          LIBTORCH_USE_PYTORCH: 1
+          SITE_PACKAGES: $(python -c "import site; print(site.getsitepackages()[0])")
+          DYLD_LIBRARY_PATH: ${{ env.SITE_PACKAGES }}/torch/lib:$DYLD_LIBRARY_PATH
         run: |
           set -e
-          cargo nextest run -j 1 --no-fail-fast --features=ci,pyo3
+          cargo nextest run -j 1 --no-fail-fast --features=ci,pyo3,testing
 
       - name: Set up WebDriver for Ubuntu
         if: matrix.os == 'ubuntu-22.04'

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -131,6 +131,8 @@ If that looks like this, you are good to go 🎉
 
 ### Step 3: Run Tests
 
+#### PYO3 tests
+
 Finally, run the tests for the package using Cargo:
 
 ```sh
@@ -143,6 +145,26 @@ To run the `PyO3` tests, add the `pyo3` flag:
 cargo test --features pyo3
 ```
 
+#### `tch` tests
+
+`tch` based tests are ran behind the `testing` feature. You need to first have the PyTorch library (libtorch) in v2.3.0 to be available on your system. Follow the [official `tch` for more details](https://github.com/LaurentMazare/tch-rs/tree/main?tab=readme-ov-file). We'll use the libtorch library installed in the python envionment:
+
+```sh
+export LIBTORCH_USE_PYTORCH=1
+```
+
+You can now run tests:
+
+```sh
+cargo test --features testing
+```
+
+**NOTE**: If you're having compilation issue with MacOS. You can add the `libtorch` lib to your environment :
+
+```sh
+export DYLD_LIBRARY_PATH=$PWD/venv/lib/python3.10/site-packages/torch/lib:$DYLD_LIBRARY_PATH
+```
+
 ### Step 5: Run WASM Tests
 
 To run WASM tests (e.g., the whisper test) run:

diff --git a/Cargo.toml b/Cargo.toml
@@ -5,8 +5,8 @@ members = [
     "crates/ratchet-web",
     "crates/ratchet-loader",
     "crates/ratchet-models",
-    "crates/ratchet-nn", 
-    "crates/ratchet-hub", 
+    "crates/ratchet-nn",
+    "crates/ratchet-hub",
     "crates/ratchet-cli",
 ]
 resolver = "2"
@@ -18,26 +18,30 @@ debug-assertions = true
 [profile.release]
 panic = 'abort'
 lto = "fat"
-codegen-units = 1 
+codegen-units = 1
 
 [profile.profiling]
 inherits = "release"
 debug = 2
 
 [workspace.dependencies]
 wgpu = { version = "0.20", features = ["fragile-send-sync-non-atomic-wasm"] }
-bytemuck = { version = "1.14.0", features=["wasm_simd", "aarch64_simd", "extern_crate_alloc"] }
+bytemuck = { version = "1.14.0", features = [
+    "wasm_simd",
+    "aarch64_simd",
+    "extern_crate_alloc",
+] }
 num-traits = "0.2.17"
 half = { version = "2.3.1", features = ["num-traits", "bytemuck"] }
 derive-new = "0.6.0"
 log = "0.4.20"
 thiserror = "1.0.56"
 byteorder = "1.5.0"
-npyz = { version = "0.8.3"}
+npyz = { version = "0.8.3" }
 hf-hub = "0.3.2"
 serde = "1.0"
 anyhow = "1.0.79"
-tokenizers = "0.19.1" 
+tokenizers = "0.19.1"
 
 js-sys = "0.3.64"
 wasm-bindgen = "0.2.91"
@@ -90,3 +94,4 @@ wasm-bindgen-futures = "0.4.41"
 web-sys = "0.3.64"
 web-time = "1.0.0"
 futures-intrusive = "0.5.0"
+tch = "0.16.0"
diff --git a/crates/ratchet-core/Cargo.toml b/crates/ratchet-core/Cargo.toml
@@ -9,7 +9,7 @@ default = ["rand", "testing"]
 gpu-profiling = ["dep:tabled", "dep:itertools"]
 rand = ["dep:rand", "dep:rand_distr"]
 plotting = ["dep:dot3", "dep:tempfile"]
-testing = ["dep:npyz", "dep:ndarray"]
+testing = ["dep:npyz", "dep:ndarray","dep:tch"]
 pyo3 = ["dep:pyo3", "dep:numpy", "dep:regex"]
 
 [build-dependencies]
@@ -31,7 +31,7 @@ num-traits = { workspace = true }
 log = { workspace = true }
 thiserror = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
-anyhow.workspace = true 
+anyhow.workspace = true
 
 rustc-hash = { workspace = true }
 slotmap = { workspace = true }
@@ -55,12 +55,13 @@ tempfile = { workspace = true, optional = true }
 tabled = { workspace = true, optional = true }
 itertools = { workspace = true, optional = true }
 
-pyo3 = { workspace = true, features = ["auto-initialize"], optional = true } 
+pyo3 = { workspace = true, features = ["auto-initialize"], optional = true }
 regex = { workspace = true, optional = true }
 numpy = { workspace = true, optional = true }
+tch = {workspace =true, optional=true}
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
-wasm-bindgen.workspace = true 
+wasm-bindgen.workspace = true
 futures-intrusive.workspace = true
 
 async-trait = "0.1.77"

diff --git a/crates/ratchet-core/src/dtype/mod.rs b/crates/ratchet-core/src/dtype/mod.rs
@@ -19,6 +19,7 @@ pub enum DType {
     I32,
     U32,
     GGUF(gguf::GGUFDType),
+    I64,
 }
 
 impl DType {
@@ -52,6 +53,7 @@ impl DType {
             DType::I32 => 4,
             DType::U32 => 4,
             DType::GGUF(g) => g.size_of(),
+            DType::I64 => 8,
         }
     }
 
@@ -111,7 +113,7 @@ pub trait TensorDType:
     fn one() -> Self;
 }
 
-macro_rules! map_type {
+macro_rules! tensor_dt {
     ($t:ty, $v:ident) => {
         impl TensorDType for $t {
             fn dt() -> DType {
@@ -125,7 +127,7 @@ macro_rules! map_type {
     };
 }
 
-macro_rules! map_half_type {
+macro_rules! tensor_half_dt {
     ($t:ty, $v:ident) => {
         impl TensorDType for $t {
             fn dt() -> DType {
@@ -139,11 +141,12 @@ macro_rules! map_half_type {
     };
 }
 
-map_type!(f32, F32);
-map_type!(i32, I32);
-map_type!(u32, U32);
-map_half_type!(f16, F16);
-map_half_type!(bf16, BF16);
+tensor_dt!(f32, F32);
+tensor_dt!(i32, I32);
+tensor_dt!(u32, U32);
+tensor_dt!(i64, I64);
+tensor_half_dt!(f16, F16);
+tensor_half_dt!(bf16, BF16);
 
 //Handy trait for WebGPU buffer alignment
 pub trait Align {

diff --git a/crates/ratchet-core/src/gpu/buffer_allocator/allocator.rs b/crates/ratchet-core/src/gpu/buffer_allocator/allocator.rs
@@ -21,6 +21,12 @@ pub struct BufferAllocator {
     pool: RwLock<BufferPool>,
 }
 
+impl Default for BufferAllocator {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl BufferAllocator {
     pub fn new() -> Self {
         Self {

diff --git a/crates/ratchet-core/src/gpu/pools/bind_group_layout_pool.rs b/crates/ratchet-core/src/gpu/pools/bind_group_layout_pool.rs
@@ -106,6 +106,12 @@ pub struct BindGroupLayoutPool {
         StaticResourcePool<BindGroupLayoutHandle, BindGroupLayoutDescriptor, wgpu::BindGroupLayout>,
 }
 
+impl Default for BindGroupLayoutPool {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl BindGroupLayoutPool {
     pub fn new() -> Self {
         Self {

diff --git a/crates/ratchet-core/src/gpu/pools/bind_group_pool.rs b/crates/ratchet-core/src/gpu/pools/bind_group_pool.rs
@@ -85,6 +85,12 @@ pub struct BindGroupPool {
     inner: DynamicResourcePool<GpuBindGroupHandle, BindGroupDescriptor, wgpu::BindGroup>,
 }
 
+impl Default for BindGroupPool {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl BindGroupPool {
     pub fn new() -> Self {
         Self {

diff --git a/crates/ratchet-core/src/gpu/pools/buffer_pool.rs b/crates/ratchet-core/src/gpu/pools/buffer_pool.rs
@@ -60,6 +60,12 @@ pub struct BufferPool {
     inner: DynamicResourcePool<GpuBufferHandle, BufferDescriptor, RawGPUBuffer>,
 }
 
+impl Default for BufferPool {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl BufferPool {
     pub fn new() -> Self {
         Self {

diff --git a/crates/ratchet-core/src/gpu/pools/pipeline_pool.rs b/crates/ratchet-core/src/gpu/pools/pipeline_pool.rs
@@ -21,6 +21,12 @@ pub struct ComputePipelinePool {
         StaticResourcePool<ComputePipelineHandle, ComputePipelineDescriptor, wgpu::ComputePipeline>,
 }
 
+impl Default for ComputePipelinePool {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ComputePipelinePool {
     pub fn new() -> Self {
         Self {

diff --git a/crates/ratchet-core/src/ops/binary.rs b/crates/ratchet-core/src/ops/binary.rs
@@ -151,9 +151,9 @@ impl MetaOperation for Binary {
     }
 }
 
-#[cfg(all(test, feature = "pyo3"))]
+#[cfg(all(test, feature = "testing"))]
 mod tests {
-    use crate::{test_util::run_py_prg, BinaryOp, Device, DeviceRequest, Shape, Tensor};
+    use crate::{BinaryOp, Device, DeviceRequest, Shape, Tensor};
     use test_strategy::{proptest, Arbitrary};
 
     thread_local! {
@@ -168,16 +168,15 @@ mod tests {
     }
 
     fn ground_truth(a: &Tensor, b: &Tensor, op: &BinaryOp) -> anyhow::Result<Tensor> {
-        let kn = op.kernel_name();
-        let prg = format!(
-            r#"
-import torch
-def {}(a, b):
-    return torch.{}(torch.from_numpy(a), torch.from_numpy(b)).numpy()
-"#,
-            kn, kn
-        );
-        run_py_prg(prg.to_string(), &[a, b], &[])
+        let a = a.to_tch::<f32>()?;
+        let b = b.to_tch::<f32>()?;
+        let result = match op {
+            BinaryOp::Add => a.f_add(&b)?,
+            BinaryOp::Sub => a.f_sub(&b)?,
+            BinaryOp::Mul => a.f_mul(&b)?,
+            BinaryOp::Div => a.f_div(&b)?,
+        };
+        Tensor::try_from(result)
     }
 
     fn run_binary_trial(prob: BinaryProblem) -> anyhow::Result<()> {

diff --git a/crates/ratchet-core/src/ops/concat.rs b/crates/ratchet-core/src/ops/concat.rs
@@ -141,7 +141,8 @@ impl MetaOperation for Concat {
 
 #[cfg(all(test, feature = "pyo3"))]
 mod tests {
-    use crate::{rvec, shape, test_util::run_py_prg, Device, DeviceRequest, Tensor};
+    use crate::{rvec, shape, Device, DeviceRequest, Tensor};
+    use tch::Tensor as TchTensor;
 
     thread_local! {
         static GPU_DEVICE: Device = Device::request_device(DeviceRequest::GPU).unwrap();
@@ -157,22 +158,13 @@ mod tests {
         dim: usize,
     }
 
-    fn ground_truth(to_cat: &[&Tensor], args: &str) -> anyhow::Result<Tensor> {
-        let prg = format!(
-            r#"
-import torch
-import numpy as np
-def permute(t0, t1, t2, t3, t4):
-    t0 = torch.from_numpy(t0)
-    t1 = torch.from_numpy(t1)
-    t2 = torch.from_numpy(t2)
-    t3 = torch.from_numpy(t3)
-    t4 = torch.from_numpy(t4)
-    return np.ascontiguousarray(torch.cat((t0, t1, t2, t3, t4), dim={}).numpy())
-"#,
-            args
-        );
-        run_py_prg(prg.to_string(), to_cat, &[])
+    fn ground_truth(to_cat: &[&Tensor], dim: i64) -> anyhow::Result<Tensor> {
+        let tch_tensors = to_cat
+            .iter()
+            .map(|x| x.to_tch::<f32>())
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Tensor::try_from(TchTensor::cat(&tch_tensors, dim))
     }
 
     fn run_concat_trial(prob: ConcatProblem) -> anyhow::Result<()> {
@@ -185,9 +177,7 @@ def permute(t0, t1, t2, t3, t4):
             dim,
         } = prob;
         let device = GPU_DEVICE.with(|d| d.clone());
-
-        let arg_str = format!("{}", dim);
-        let ground = ground_truth(&[&t0, &t1, &t2, &t3, &t4], arg_str.as_str())?;
+        let ground = ground_truth(&[&t0, &t1, &t2, &t3, &t4], dim as _)?;
 
         t0 = t0.to(&device)?;
         t1 = t1.to(&device)?;