Added new model to Together.ai

airtai · Jul 6, 2024 · de83397 · de83397
1 parent 5f32798
commit de83397
Show file tree

Hide file tree

Showing 2 changed files with 48 additions and 46 deletions.
diff --git a/fastagency/models/llms/together.py b/fastagency/models/llms/together.py
@@ -14,66 +14,66 @@
 
 # retrieve the models from the API on June 26, 2024
 together_model_string = {
+    "WizardLM v1.2 (13B)": "WizardLM/WizardLM-13B-V1.2",
+    "Code Llama Instruct (34B)": "togethercomputer/CodeLlama-34b-Instruct",
+    "Upstage SOLAR Instruct v1 (11B)": "upstage/SOLAR-10.7B-Instruct-v1.0",
+    "OpenHermes-2-Mistral (7B)": "teknium/OpenHermes-2-Mistral-7B",
+    "LLaMA-2-7B-32K-Instruct (7B)": "togethercomputer/Llama-2-7B-32K-Instruct",
+    "ReMM SLERP L2 (13B)": "Undi95/ReMM-SLERP-L2-13B",
+    "Toppy M (7B)": "Undi95/Toppy-M-7B",
+    "OpenChat 3.5": "openchat/openchat-3.5-1210",
     "Chronos Hermes (13B)": "Austism/chronos-hermes-13b",
-    "MythoMax-L2 (13B)": "Gryphe/MythoMax-L2-13b",
-    "Nous Capybara v1.9 (7B)": "NousResearch/Nous-Capybara-7B-V1p9",
-    "Nous Hermes 2 - Mistral DPO (7B)": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
-    "Nous Hermes 2 - Mixtral 8x7B-DPO ": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-    "Nous Hermes 2 - Mixtral 8x7B-SFT": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
-    "Nous Hermes-2 Yi (34B)": "NousResearch/Nous-Hermes-2-Yi-34B",
-    "Nous Hermes Llama-2 (13B)": "NousResearch/Nous-Hermes-Llama2-13b",
-    "Nous Hermes LLaMA-2 (7B)": "NousResearch/Nous-Hermes-llama-2-7b",
-    "OpenOrca Mistral (7B) 8K": "Open-Orca/Mistral-7B-OpenOrca",
-    "Qwen 1.5 Chat (0.5B)": "Qwen/Qwen1.5-0.5B-Chat",
-    "Qwen 1.5 Chat (1.8B)": "Qwen/Qwen1.5-1.8B-Chat",
-    "Qwen 1.5 Chat (110B)": "Qwen/Qwen1.5-110B-Chat",
-    "Qwen 1.5 Chat (14B)": "Qwen/Qwen1.5-14B-Chat",
-    "Qwen 1.5 Chat (32B)": "Qwen/Qwen1.5-32B-Chat",
-    "Qwen 1.5 Chat (4B)": "Qwen/Qwen1.5-4B-Chat",
-    "Qwen 1.5 Chat (72B)": "Qwen/Qwen1.5-72B-Chat",
+    "Snorkel Mistral PairRM DPO (7B)": "snorkelai/Snorkel-Mistral-PairRM-DPO",
     "Qwen 1.5 Chat (7B)": "Qwen/Qwen1.5-7B-Chat",
-    "Qwen 2 Instruct (72B)": "Qwen/Qwen2-72B-Instruct",
+    "Qwen 1.5 Chat (14B)": "Qwen/Qwen1.5-14B-Chat",
+    "Qwen 1.5 Chat (1.8B)": "Qwen/Qwen1.5-1.8B-Chat",
     "Snowflake Arctic Instruct": "Snowflake/snowflake-arctic-instruct",
-    "ReMM SLERP L2 (13B)": "Undi95/ReMM-SLERP-L2-13B",
-    "Toppy M (7B)": "Undi95/Toppy-M-7B",
-    "WizardLM v1.2 (13B)": "WizardLM/WizardLM-13B-V1.2",
-    "OLMo Instruct (7B)": "allenai/OLMo-7B-Instruct",
-    "Code Llama Instruct (13B)": "togethercomputer/CodeLlama-13b-Instruct",
-    "Code Llama Instruct (34B)": "togethercomputer/CodeLlama-34b-Instruct",
     "Code Llama Instruct (70B)": "codellama/CodeLlama-70b-Instruct-hf",
-    "Code Llama Instruct (7B)": "togethercomputer/CodeLlama-7b-Instruct",
+    "Nous Hermes 2 - Mixtral 8x7B-SFT": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
     "Dolphin 2.5 Mixtral 8x7b": "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
-    "DBRX Instruct": "databricks/dbrx-instruct",
+    "Nous Hermes 2 - Mixtral 8x7B-DPO ": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+    "Mixtral-8x22B Instruct v0.1": "mistralai/Mixtral-8x22B-Instruct-v0.1",
     "Deepseek Coder Instruct (33B)": "deepseek-ai/deepseek-coder-33b-instruct",
-    "DeepSeek LLM Chat (67B)": "deepseek-ai/deepseek-llm-67b-chat",
+    "Nous Hermes Llama-2 (13B)": "NousResearch/Nous-Hermes-Llama2-13b",
+    "Vicuna v1.5 (13B)": "lmsys/vicuna-13b-v1.5",
+    "Qwen 1.5 Chat (0.5B)": "Qwen/Qwen1.5-0.5B-Chat",
+    "Code Llama Instruct (7B)": "togethercomputer/CodeLlama-7b-Instruct",
+    "Nous Hermes-2 Yi (34B)": "NousResearch/Nous-Hermes-2-Yi-34B",
+    "Code Llama Instruct (13B)": "togethercomputer/CodeLlama-13b-Instruct",
+    "Llama3 8B Chat HF INT4": "togethercomputer/Llama-3-8b-chat-hf-int4",
+    "OpenHermes-2.5-Mistral (7B)": "teknium/OpenHermes-2p5-Mistral-7B",
+    "Nous Capybara v1.9 (7B)": "NousResearch/Nous-Capybara-7B-V1p9",
+    "Nous Hermes 2 - Mistral DPO (7B)": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
+    "StripedHyena Nous (7B)": "togethercomputer/StripedHyena-Nous-7B",
+    "Alpaca (7B)": "togethercomputer/alpaca-7b",
     "Platypus2 Instruct (70B)": "garage-bAInd/Platypus2-70B-instruct",
     "Gemma Instruct (2B)": "google/gemma-2b-it",
     "Gemma Instruct (7B)": "google/gemma-7b-it",
-    "Vicuna v1.5 (13B)": "lmsys/vicuna-13b-v1.5",
-    "Vicuna v1.5 (7B)": "lmsys/vicuna-7b-v1.5",
-    "Reserved - DBRX Instruct": "medaltv/dbrx-instruct",
-    "LLaMA-2 Chat (13B)": "togethercomputer/llama-2-13b-chat",
-    "LLaMA-2 Chat (70B)": "togethercomputer/llama-2-70b-chat",
-    "LLaMA-2 Chat (7B)": "togethercomputer/llama-2-7b-chat",
-    "Meta Llama 3 8B Chat": "meta-llama/Llama-3-8b-chat-hf",
-    "WizardLM-2 (8x22B)": "microsoft/WizardLM-2-8x22B",
+    "OLMo Instruct (7B)": "allenai/OLMo-7B-Instruct",
+    "Qwen 1.5 Chat (4B)": "Qwen/Qwen1.5-4B-Chat",
+    "MythoMax-L2 (13B)": "Gryphe/MythoMax-L2-13b",
     "Mistral (7B) Instruct": "mistralai/Mistral-7B-Instruct-v0.1",
     "Mistral (7B) Instruct v0.2": "mistralai/Mistral-7B-Instruct-v0.2",
+    "OpenOrca Mistral (7B) 8K": "Open-Orca/Mistral-7B-OpenOrca",
+    "01-ai Yi Chat (34B)": "zero-one-ai/Yi-34B-Chat",
+    "Nous Hermes LLaMA-2 (7B)": "NousResearch/Nous-Hermes-llama-2-7b",
+    "Qwen 1.5 Chat (32B)": "Qwen/Qwen1.5-32B-Chat",
+    "DBRX Instruct": "databricks/dbrx-instruct",
+    "Qwen 2 Instruct (72B)": "Qwen/Qwen2-72B-Instruct",
+    "Qwen 1.5 Chat (72B)": "Qwen/Qwen1.5-72B-Chat",
+    "DeepSeek LLM Chat (67B)": "deepseek-ai/deepseek-llm-67b-chat",
+    "Vicuna v1.5 (7B)": "lmsys/vicuna-7b-v1.5",
+    "WizardLM-2 (8x22B)": "microsoft/WizardLM-2-8x22B",
+    "Togethercomputer Llama3 8B Instruct Int8": "togethercomputer/Llama-3-8b-chat-hf-int8",
     "Mistral (7B) Instruct v0.3": "mistralai/Mistral-7B-Instruct-v0.3",
-    "Mixtral-8x22B Instruct v0.1": "mistralai/Mixtral-8x22B-Instruct-v0.1",
+    "Qwen 1.5 Chat (110B)": "Qwen/Qwen1.5-110B-Chat",
+    "LLaMA-2 Chat (13B)": "togethercomputer/llama-2-13b-chat",
+    "Meta Llama 3 8B Chat": "meta-llama/Llama-3-8b-chat-hf",
     "Mixtral-8x7B Instruct v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
-    "OpenChat 3.5": "openchat/openchat-3.5-1210",
-    "Snorkel Mistral PairRM DPO (7B)": "snorkelai/Snorkel-Mistral-PairRM-DPO",
-    "OpenHermes-2-Mistral (7B)": "teknium/OpenHermes-2-Mistral-7B",
-    "OpenHermes-2.5-Mistral (7B)": "teknium/OpenHermes-2p5-Mistral-7B",
-    "LLaMA-2-7B-32K-Instruct (7B)": "togethercomputer/Llama-2-7B-32K-Instruct",
-    "StripedHyena Nous (7B)": "togethercomputer/StripedHyena-Nous-7B",
-    "Alpaca (7B)": "togethercomputer/alpaca-7b",
-    "Upstage SOLAR Instruct v1 (11B)": "upstage/SOLAR-10.7B-Instruct-v1.0",
-    "01-ai Yi Chat (34B)": "zero-one-ai/Yi-34B-Chat",
+    "LLaMA-2 Chat (7B)": "togethercomputer/llama-2-7b-chat",
+    "LLaMA-2 Chat (70B)": "togethercomputer/llama-2-70b-chat",
     "Meta Llama 3 70B Chat": "meta-llama/Llama-3-70b-chat-hf",
-    "Llama3 8B Chat HF INT4": "togethercomputer/Llama-3-8b-chat-hf-int4",
-    "Togethercomputer Llama3 8B Instruct Int8": "togethercomputer/Llama-3-8b-chat-hf-int8",
+    "Reserved - DBRX Instruct": "medaltv/dbrx-instruct",
     "Koala (7B)": "togethercomputer/Koala-7B",
     "Guanaco (65B) ": "togethercomputer/guanaco-65b",
     "Vicuna v1.3 (7B)": "lmsys/vicuna-7b-v1.3",
@@ -89,6 +89,7 @@
     "Meta Llama 3 8B Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
     "Meta Llama 3 70B Instruct": "meta-llama/Meta-Llama-3-70B-Instruct",
     "Gemma-2 Instruct (9B)": "google/gemma-2-9b-it",
+    "Hermes 2 Theta Llama-3 70B": "NousResearch/Hermes-2-Theta-Llama-3-70B",
 }
 
 TogetherModels: TypeAlias = Literal[tuple(together_model_string.keys())]  # type: ignore[valid-type]

diff --git a/tests/models/llms/test_together.py b/tests/models/llms/test_together.py
@@ -54,6 +54,7 @@ def test_together_model_string(self) -> None:
             if model.type == "chat"
         }
 
+        # print(expected_together_model_string)
         assert together_model_string == expected_together_model_string
 
     @pytest.mark.db()