From c8e3d1e8bdff0590e634f755e247944d39d9db65 Mon Sep 17 00:00:00 2001 From: BBC-Esq Date: Thu, 5 Sep 2024 10:22:12 -0400 Subject: [PATCH] add codeqwen 1.5 - 7b --- src/constants.py | 11 +++++++++++ src/module_chat.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/src/constants.py b/src/constants.py index 68fcfde4..c84e2737 100644 --- a/src/constants.py +++ b/src/constants.py @@ -5,6 +5,7 @@ 'Dolphin-Qwen 2 - 1.5b': 8192, 'Phi 3.5 Mini - 4b': 8192, 'Internlm2_5 - 7b': 8192, + 'CodeQwen 1.5 - 7b': 8192, 'Dolphin-Llama 3.1 - 8b': 8192, 'Hermes-3-Llama-3.1 - 8b': 8192, 'Dolphin-Qwen 2 - 7b': 8192, @@ -360,6 +361,16 @@ 'function': 'Dolphin_Qwen2_7b', 'precision': 'bfloat16' }, + 'CodeQwen 1.5 - 7b': { + 'model': 'CodeQwen 1.5 - 7b', + 'repo_id': 'Qwen/CodeQwen1.5-7B-Chat', + 'cache_dir': 'Qwen--CodeQwen1.5-7B-Chat', + 'tokens_per_second': 52, + 'context_length': 16384, + 'avg_vram_usage': '9.2 GB', + 'function': 'CodeQwen1_5_7b_chat', + 'precision': 'bfloat16' + }, 'Dolphin-Phi 3 - Medium': { 'model': 'Dolphin-Phi 3 - Medium', 'repo_id': 'cognitivecomputations/dolphin-2.9.2-Phi-3-Medium', diff --git a/src/module_chat.py b/src/module_chat.py index 25e4d310..ed37440a 100644 --- a/src/module_chat.py +++ b/src/module_chat.py @@ -238,6 +238,42 @@ def create_prompt(self, augmented_query): <|im_start|>assistant """ +class CodeQwen1_5_7b_chat(BaseModel): + def __init__(self, generation_settings): + model_info = CHAT_MODELS['CodeQwen 1.5 - 7b'] + super().__init__(model_info, bnb_bfloat16_settings, generation_settings) + + def create_prompt(self, augmented_query): + return f"""<|im_start|>system + {system_message}<|im_end|> + <|im_start|>user + {augmented_query}<|im_end|> + <|im_start|>assistant + """ + + def generate_response(self, inputs): + """ + Overrides the BaseModel method to handle model-specific kwargs. + """ + # Remove token_type_ids if it exists, as this model doesn't need it. + inputs.pop('token_type_ids', None) + + streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True) + eos_token_id = self.tokenizer.eos_token_id + + # Combine inputs with generation settings + all_settings = {**inputs, **self.generation_settings, 'streamer': streamer, 'eos_token_id': eos_token_id} + + # generation + streamer require two threads to work + generation_thread = threading.Thread(target=self.model.generate, kwargs=all_settings) + generation_thread.start() + + for partial_response in streamer: + yield partial_response + + generation_thread.join() + + class Dolphin_Qwen2_7b(BaseModel): def __init__(self, generation_settings): model_info = CHAT_MODELS['Dolphin-Qwen 2 - 7b']