From c8e3d1e8bdff0590e634f755e247944d39d9db65 Mon Sep 17 00:00:00 2001
From: BBC-Esq <bbc@chintellalaw.com>
Date: Thu, 5 Sep 2024 10:22:12 -0400
Subject: [PATCH] add codeqwen 1.5 - 7b

---
 src/constants.py   | 11 +++++++++++
 src/module_chat.py | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/src/constants.py b/src/constants.py
index 68fcfde4..c84e2737 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -5,6 +5,7 @@
     'Dolphin-Qwen 2 - 1.5b': 8192,
     'Phi 3.5 Mini - 4b': 8192,
     'Internlm2_5 - 7b': 8192,
+    'CodeQwen 1.5 - 7b': 8192,
     'Dolphin-Llama 3.1 - 8b': 8192,
     'Hermes-3-Llama-3.1 - 8b': 8192,
     'Dolphin-Qwen 2 - 7b': 8192,
@@ -360,6 +361,16 @@
         'function': 'Dolphin_Qwen2_7b',
         'precision': 'bfloat16'
     },
+    'CodeQwen 1.5 - 7b': {
+        'model': 'CodeQwen 1.5 - 7b',
+        'repo_id': 'Qwen/CodeQwen1.5-7B-Chat',
+        'cache_dir': 'Qwen--CodeQwen1.5-7B-Chat',
+        'tokens_per_second': 52,
+        'context_length': 16384,
+        'avg_vram_usage': '9.2 GB',
+        'function': 'CodeQwen1_5_7b_chat',
+        'precision': 'bfloat16'
+    },
     'Dolphin-Phi 3 - Medium': {
         'model': 'Dolphin-Phi 3 - Medium',
         'repo_id': 'cognitivecomputations/dolphin-2.9.2-Phi-3-Medium',
diff --git a/src/module_chat.py b/src/module_chat.py
index 25e4d310..ed37440a 100644
--- a/src/module_chat.py
+++ b/src/module_chat.py
@@ -238,6 +238,42 @@ def create_prompt(self, augmented_query):
         <|im_start|>assistant
         """
 
+class CodeQwen1_5_7b_chat(BaseModel):
+    def __init__(self, generation_settings):
+        model_info = CHAT_MODELS['CodeQwen 1.5 - 7b']
+        super().__init__(model_info, bnb_bfloat16_settings, generation_settings)
+
+    def create_prompt(self, augmented_query):
+        return f"""<|im_start|>system
+        {system_message}<|im_end|>
+        <|im_start|>user
+        {augmented_query}<|im_end|>
+        <|im_start|>assistant
+        """
+
+    def generate_response(self, inputs):
+        """
+        Overrides the BaseModel method to handle model-specific kwargs.
+        """
+        # Remove token_type_ids if it exists, as this model doesn't need it.
+        inputs.pop('token_type_ids', None)
+
+        streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
+        eos_token_id = self.tokenizer.eos_token_id
+
+        # Combine inputs with generation settings
+        all_settings = {**inputs, **self.generation_settings, 'streamer': streamer, 'eos_token_id': eos_token_id}
+
+        # generation + streamer require two threads to work
+        generation_thread = threading.Thread(target=self.model.generate, kwargs=all_settings)
+        generation_thread.start()
+
+        for partial_response in streamer:
+            yield partial_response
+
+        generation_thread.join()
+
+
 class Dolphin_Qwen2_7b(BaseModel):
     def __init__(self, generation_settings):
         model_info = CHAT_MODELS['Dolphin-Qwen 2 - 7b']