Add init_shape API for states.

cryscan · Dec 21, 2024 · 2b57f87 · 2b57f87
1 parent b5575d4
commit 2b57f87
Show file tree

Hide file tree

Showing 6 changed files with 41 additions and 15 deletions.
diff --git a/examples/chat.rs b/examples/chat.rs
@@ -23,7 +23,7 @@ use web_rwkv::{
         loader::{Loader, Lora},
         model::{Bundle, ContextAutoLimits, ModelBuilder, ModelInfo, ModelVersion, Quant, State},
         softmax::softmax_one,
-        v4, v5, v6, v7, TokioRuntime,
+        v4, v5, v6, v7, Runtime, TokioRuntime,
     },
     tensor::{TensorCpu, TensorInit, TensorShape},
     tokenizer::Tokenizer,
@@ -258,30 +258,34 @@ async fn main() -> Result<()> {
         None => builder,
     };
 
-    let (runtime, state): (_, Box<dyn State>) = match info.version {
+    let (runtime, state): (Box<dyn Runtime>, Box<dyn State>) = match info.version {
         ModelVersion::V4 => {
             let model = builder.build_v4().await?;
             let bundle = v4::Bundle::<f16>::new(model, 1);
             let state = bundle.state();
-            (TokioRuntime::new(bundle).await, Box::new(state))
+            let runtime = TokioRuntime::new(bundle).await;
+            (Box::new(runtime), Box::new(state))
         }
         ModelVersion::V5 => {
             let model = builder.build_v5().await?;
             let bundle = v5::Bundle::<f16>::new(model, 1);
             let state = bundle.state();
-            (TokioRuntime::new(bundle).await, Box::new(state))
+            let runtime = TokioRuntime::new(bundle).await;
+            (Box::new(runtime), Box::new(state))
         }
         ModelVersion::V6 => {
             let model = builder.build_v6().await?;
             let bundle = v6::Bundle::<f16>::new(model, 1);
             let state = bundle.state();
-            (TokioRuntime::new(bundle).await, Box::new(state))
+            let runtime = TokioRuntime::new(bundle).await;
+            (Box::new(runtime), Box::new(state))
         }
         ModelVersion::V7 => {
             let model = builder.build_v7().await?;
             let bundle = v7::Bundle::<f16>::new(model, 1);
             let state = bundle.state();
-            (TokioRuntime::new(bundle).await, Box::new(state))
+            let runtime = TokioRuntime::new(bundle).await;
+            (Box::new(runtime), Box::new(state))
         }
     };
 

diff --git a/src/runtime/model.rs b/src/runtime/model.rs
@@ -15,7 +15,7 @@ use crate::{
     context::{Context, ContextBuilder},
     impl_deserialize_seed,
     num::Scalar,
-    tensor::{kind::ReadWrite, TensorCpu, TensorError, TensorGpu, TensorGpuView},
+    tensor::{kind::ReadWrite, shape::Shape, TensorCpu, TensorError, TensorGpu, TensorGpuView},
 };
 
 #[wasm_bindgen]
@@ -87,6 +87,8 @@ pub trait AsAny {
 pub trait State {
     /// Batch number of this state.
     fn num_batch(&self) -> usize;
+    /// Shape of the initialized one-batch CPU state.
+    fn init_shape(&self) -> Shape;
     /// Initialize a one-batch state on CPU.
     fn init(&self) -> TensorCpu<f32>;
     /// The part of the state that is used in an `att` layer.

diff --git a/src/runtime/v4.rs b/src/runtime/v4.rs
@@ -148,6 +148,12 @@ impl super::model::State for State {
         self.data.shape()[2]
     }
 
+    #[inline]
+    fn init_shape(&self) -> Shape {
+        let info = &self.info;
+        [info.num_emb, 5 * info.num_layer, 1, 1].into()
+    }
+
     fn init(&self) -> TensorCpu<f32> {
         let info = &self.info;
         let data = (0..info.num_layer)
@@ -163,8 +169,7 @@ impl super::model::State for State {
             })
             .collect_vec()
             .concat();
-        let shape = Shape::new(info.num_emb, 5 * info.num_layer, 1, 1);
-        TensorCpu::from_data(shape, data).unwrap()
+        TensorCpu::from_data(self.init_shape(), data).unwrap()
     }
 
     fn att(&self, layer: usize) -> Result<TensorGpuView<f32>, TensorError> {

diff --git a/src/runtime/v5.rs b/src/runtime/v5.rs
@@ -159,10 +159,15 @@ impl super::model::State for State {
         self.data[0].shape()[2]
     }
 
-    fn init(&self) -> TensorCpu<f32> {
+    #[inline]
+    fn init_shape(&self) -> Shape {
         let info = &self.info;
         let head_size = info.num_emb / info.num_head;
-        let shape = Shape::new(info.num_emb, head_size + 2, info.num_layer, 1);
+        [info.num_emb, head_size + 2, info.num_layer, 1].into()
+    }
+
+    fn init(&self) -> TensorCpu<f32> {
+        let shape = self.init_shape();
         let data = vec![0.0; shape.len()];
         TensorCpu::from_data(shape, data).unwrap()
     }

diff --git a/src/runtime/v6.rs b/src/runtime/v6.rs
@@ -171,10 +171,15 @@ impl super::model::State for State {
         self.data[0].shape()[2]
     }
 
-    fn init(&self) -> TensorCpu<f32> {
+    #[inline]
+    fn init_shape(&self) -> Shape {
         let info = &self.info;
         let head_size = info.num_emb / info.num_head;
-        let shape = Shape::new(info.num_emb, head_size + 2, info.num_layer, 1);
+        [info.num_emb, head_size + 2, info.num_layer, 1].into()
+    }
+
+    fn init(&self) -> TensorCpu<f32> {
+        let shape = self.init_shape();
         let data = vec![0.0; shape.len()];
         TensorCpu::from_data(shape, data).unwrap()
     }

diff --git a/src/runtime/v7.rs b/src/runtime/v7.rs
@@ -182,10 +182,15 @@ impl super::model::State for State {
         self.data[0].shape()[2]
     }
 
-    fn init(&self) -> TensorCpu<f32> {
+    #[inline]
+    fn init_shape(&self) -> Shape {
         let info = &self.info;
         let head_size = info.num_emb / info.num_head;
-        let shape = Shape::new(info.num_emb, head_size + 2, info.num_layer, 1);
+        [info.num_emb, head_size + 2, info.num_layer, 1].into()
+    }
+
+    fn init(&self) -> TensorCpu<f32> {
+        let shape = self.init_shape();
         let data = vec![0.0; shape.len()];
         TensorCpu::from_data(shape, data).unwrap()
     }