atoma-network · jorgeantonio21 · Apr 3, 2024 · Apr 1, 2024 · Apr 1, 2024 · Apr 1, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -8,9 +8,9 @@ version = "0.1.0"
 
 [workspace.dependencies]
 async-trait = "0.1.78"
-candle = { git = "https://github.com/jorgeantonio21/candle/", package = "candle-core", branch = "main" }
-candle-nn = { git = "https://github.com/jorgeantonio21/candle/", package = "candle-nn", branch = "main" }
-candle-transformers = { git = "https://github.com/jorgeantonio21/candle/", package = "candle-transformers", branch = "main" }
+candle = { git = "https://github.com/huggingface/candle", package = "candle-core", version = "0.4.2" }
+candle-nn = { git = "https://github.com/huggingface/candle", package = "candle-nn", version = "0.4.2" }
+candle-transformers = { git = "https://github.com/huggingface/candle", package = "candle-transformers", version = "0.4.2" }
 config = "0.14.0"
 ed25519-consensus = "2.1.0"
 futures = "0.3.30"

diff --git a/atoma-inference/Cargo.toml b/atoma-inference/Cargo.toml
@@ -10,7 +10,7 @@ async-trait.workspace = true
 candle.workspace = true
 candle-nn.workspace = true
 candle-transformers.workspace = true
-config.true = true
+config.workspace = true
 ed25519-consensus.workspace = true
 futures.workspace = true
 hf-hub.workspace = true

diff --git a/atoma-inference/src/apis/hugging_face.rs b/atoma-inference/src/apis/hugging_face.rs
@@ -1,95 +1,16 @@
 use std::path::PathBuf;
 
 use async_trait::async_trait;
-use hf_hub::api::sync::{Api, ApiBuilder};
+use hf_hub::{
+    api::sync::{Api, ApiBuilder},
+    Repo, RepoType,
+};
+use tracing::error;
 
 use crate::models::ModelId;
 
 use super::{ApiError, ApiTrait};
 
-struct FilePaths {
-    file_paths: Vec<String>,
-}
-
-fn get_model_safe_tensors_from_hf(model_id: &ModelId) -> (String, FilePaths) {
-    match model_id.as_str() {
-        "Llama2_7b" => (
-            String::from("meta-llama/Llama-2-7b-hf"),
-            FilePaths {
-                file_paths: vec![
-                    "model-00001-of-00002.safetensors".to_string(),
-                    "model-00002-of-00002.safetensors".to_string(),
-                ],
-            },
-        ),
-        "Mamba3b" => (
-            String::from("state-spaces/mamba-2.8b-hf"),
-            FilePaths {
-                file_paths: vec![
-                    "model-00001-of-00003.safetensors".to_string(),
-                    "model-00002-of-00003.safetensors".to_string(),
-                    "model-00003-of-00003.safetensors".to_string(),
-                ],
-            },
-        ),
-        "Mistral7b" => (
-            String::from("mistralai/Mistral-7B-Instruct-v0.2"),
-            FilePaths {
-                file_paths: vec![
-                    "model-00001-of-00003.safetensors".to_string(),
-                    "model-00002-of-00003.safetensors".to_string(),
-                    "model-00003-of-00003.safetensors".to_string(),
-                ],
-            },
-        ),
-        "Mixtral8x7b" => (
-            String::from("mistralai/Mixtral-8x7B-Instruct-v0.1"),
-            FilePaths {
-                file_paths: vec![
-                    "model-00001-of-00019.safetensors".to_string(),
-                    "model-00002-of-00019.safetensors".to_string(),
-                    "model-00003-of-00019.safetensors".to_string(),
-                    "model-00004-of-00019.safetensors".to_string(),
-                    "model-00005-of-00019.safetensors".to_string(),
-                    "model-00006-of-00019.safetensors".to_string(),
-                    "model-00007-of-00019.safetensors".to_string(),
-                    "model-00008-of-00019.safetensors".to_string(),
-                    "model-00009-of-00019.safetensors".to_string(),
-                    "model-000010-of-00019.safetensors".to_string(),
-                    "model-000011-of-00019.safetensors".to_string(),
-                    "model-000012-of-00019.safetensors".to_string(),
-                    "model-000013-of-00019.safetensors".to_string(),
-                    "model-000014-of-00019.safetensors".to_string(),
-                    "model-000015-of-00019.safetensors".to_string(),
-                    "model-000016-of-00019.safetensors".to_string(),
-                    "model-000017-of-00019.safetensors".to_string(),
-                    "model-000018-of-00019.safetensors".to_string(),
-                    "model-000019-of-00019.safetensors".to_string(),
-                ],
-            },
-        ),
-        "StableDiffusion2" => (
-            String::from("stabilityai/stable-diffusion-2"),
-            FilePaths {
-                file_paths: vec!["768-v-ema.safetensors".to_string()],
-            },
-        ),
-        "StableDiffusionXl" => (
-            String::from("stabilityai/stable-diffusion-xl-base-1.0"),
-            FilePaths {
-                file_paths: vec![
-                    "sd_xl_base_1.0.safetensors".to_string(),
-                    "sd_xl_base_1.0_0.9vae.safetensors".to_string(),
-                    "sd_xl_offset_example-lora_1.0.safetensors".to_string(),
-                ],
-            },
-        ),
-        _ => {
-            panic!("Invalid model id")
-        }
-    }
-}
-
 #[async_trait]
 impl ApiTrait for Api {
     fn create(api_key: String, cache_dir: PathBuf) -> Result<Self, ApiError>
@@ -103,15 +24,29 @@ impl ApiTrait for Api {
             .build()?)
     }
 
-    fn fetch(&self, model_id: &ModelId) -> Result<Vec<PathBuf>, ApiError> {
-        let (model_path, files) = get_model_safe_tensors_from_hf(model_id);
-        let api_repo = self.model(model_path);
-        let mut path_bufs = Vec::with_capacity(files.file_paths.len());
-
-        for file in files.file_paths {
-            path_bufs.push(api_repo.get(&file)?);
+    fn fetch(&self, model_id: ModelId, revision: String) -> Result<Vec<PathBuf>, ApiError> {
+        let mut tokenizer_file = None;
+        if model_id.contains("mamba") {
+            tokenizer_file = Some(
+                self.model("EleutherAI/gpt-neox-20b".to_string())
+                    .get("tokenizer.json")
+                    .map_err(|e| {
+                        error!("Failed to fetch tokenizer file: {e}");
+                        e
+                    })?,
+            )
         }
 
-        Ok(path_bufs)
+        let repo = self.repo(Repo::with_revision(model_id, RepoType::Model, revision));
+
+        Ok(vec![
+            repo.get("config.json")?,
+            if let Some(tkn) = tokenizer_file {
+                tkn
+            } else {
+                repo.get("tokenizer.json")?
+            },
+            repo.get("model.safetensors")?,
+        ])
     }
 }
diff --git a/atoma-inference/src/apis/mod.rs b/atoma-inference/src/apis/mod.rs
@@ -22,7 +22,7 @@ impl From<HuggingFaceError> for ApiError {
 }
 
 pub trait ApiTrait: Send {
-    fn fetch(&self, model_id: &ModelId) -> Result<Vec<PathBuf>, ApiError>;
+    fn fetch(&self, model_id: ModelId, revision: String) -> Result<Vec<PathBuf>, ApiError>;
     fn create(api_key: String, cache_dir: PathBuf) -> Result<Self, ApiError>
     where
         Self: Sized;

diff --git a/atoma-inference/src/lib.rs b/atoma-inference/src/lib.rs
@@ -1,7 +1,6 @@
 pub mod model_thread;
 pub mod service;
 pub mod specs;
-pub mod types;
 
 pub mod apis;
 pub mod models;
diff --git a/atoma-inference/src/main.rs b/atoma-inference/src/main.rs
@@ -1,16 +1,58 @@
-// use hf_hub::api::sync::Api;
-// use inference::service::ModelService;
+use std::time::Duration;
+
+use hf_hub::api::sync::Api;
+use inference::{
+    models::{
+        candle::mamba::MambaModel,
+        types::{TextRequest, TextResponse},
+    },
+    service::{ModelService, ModelServiceError},
+};
 
 #[tokio::main]
-async fn main() {
+async fn main() -> Result<(), ModelServiceError> {
     tracing_subscriber::fmt::init();
 
-    // let (_, receiver) = tokio::sync::mpsc::channel(32);
+    let (req_sender, req_receiver) = tokio::sync::mpsc::channel::<TextRequest>(32);
+    let (resp_sender, mut resp_receiver) = tokio::sync::mpsc::channel::<TextResponse>(32);
+
+    let mut service = ModelService::start::<MambaModel, Api>(
+        "../inference.toml".parse().unwrap(),
+        "../private_key".parse().unwrap(),
+        req_receiver,
+        resp_sender,
+    )
+    .expect("Failed to start inference service");
+
+    let pk = service.public_key();
+
+    tokio::spawn(async move {
+        service.run().await?;
+        Ok::<(), ModelServiceError>(())
+    });
+
+    tokio::time::sleep(Duration::from_millis(5000)).await;
+
+    req_sender
+        .send(TextRequest {
+            request_id: 0,
+            prompt: "Who was the first american president ?".to_string(),
+            model: "state-spaces/mamba-130m".to_string(),
+            max_tokens: 512,
+            temperature: Some(0.0),
+            random_seed: 42,
+            repeat_last_n: 64,
+            repeat_penalty: 1.1,
+            sampled_nodes: vec![pk],
+            top_p: Some(1.0),
+            top_k: 10,
+        })
+        .await
+        .expect("Failed to send request");
+
+    if let Some(response) = resp_receiver.recv().await {
+        println!("Got a response: {:?}", response);
+    }
 
-    // let _ = ModelService::start::<Model, Api>(
-    //     "../inference.toml".parse().unwrap(),
-    //     "../private_key".parse().unwrap(),
-    //     receiver,
-    // )
-    // .expect("Failed to start inference service");
+    Ok(())
 }
diff --git a/atoma-inference/src/model_thread.rs b/atoma-inference/src/model_thread.rs
@@ -1,10 +1,13 @@
-use std::{collections::HashMap, sync::mpsc};
+use std::{
+    collections::HashMap,
+    sync::{mpsc, Arc},
+};
 
 use ed25519_consensus::VerificationKey as PublicKey;
 use futures::stream::FuturesUnordered;
 use thiserror::Error;
 use tokio::sync::oneshot::{self, error::RecvError};
-use tracing::{debug, error, warn};
+use tracing::{debug, error, info, warn};
 
 use crate::{
     apis::{ApiError, ApiTrait},
@@ -69,7 +72,7 @@ where
     Req: Request,
     Resp: Response,
 {
-    pub fn run(self, public_key: PublicKey) -> Result<(), ModelThreadError> {
+    pub fn run(mut self, public_key: PublicKey) -> Result<(), ModelThreadError> {
         debug!("Start Model thread");
 
         while let Ok(command) = self.receiver.recv() {
@@ -112,24 +115,29 @@ where
         public_key: PublicKey,
     ) -> Result<(Self, Vec<ModelThreadHandle<Req, Resp>>), ModelThreadError>
     where
-        F: ApiTrait,
+        F: ApiTrait + Send + Sync + 'static,
         M: ModelTrait<Input = Req::ModelInput, Output = Resp::ModelOutput> + Send + 'static,
     {
         let model_ids = config.model_ids();
         let api_key = config.api_key();
         let storage_path = config.storage_path();
-        let api = F::create(api_key, storage_path)?;
+        let api = Arc::new(F::create(api_key, storage_path)?);
 
         let mut handles = Vec::with_capacity(model_ids.len());
         let mut model_senders = HashMap::with_capacity(model_ids.len());
 
-        for model_id in model_ids {
-            let filenames = api.fetch(&model_id)?;
+        for (model_id, precision, revision) in model_ids {
+            info!("Spawning new thread for model: {model_id}");
+            let api = api.clone();
 
             let (model_sender, model_receiver) = mpsc::channel::<ModelThreadCommand<_, _>>();
+            let model_name = model_id.clone();
 
             let join_handle = std::thread::spawn(move || {
-                let model = M::load(filenames)?; // TODO: for now this piece of code cannot be shared among threads safely
+                info!("Fetching files for model: {model_name}");
+                let filenames = api.fetch(model_name, revision)?;
+
+                let model = M::load(filenames, precision)?;
                 let model_thread = ModelThread {
                     model,
                     receiver: model_receiver,