togethercomputer · tijszwinkels · Jul 8, 2024 · Jul 7, 2024 · Jul 7, 2024 · Jul 8, 2024
diff --git a/README.md b/README.md
@@ -117,12 +117,48 @@ The CLI will prompt you to input instructions interactively:
 You can configure the demo by specifying the following parameters:
 
 - `--aggregator`: The primary model used for final response generation.
-- `--reference_models`: List of models used as references.
+- `--reference-models`: Models used as references.
 - `--temperature`: Controls the randomness of the response generation.
-- `--max_tokens`: Maximum number of tokens in the response.
+- `--max-tokens`: Maximum number of tokens in the response.
 - `--rounds`: Number of rounds to process the input for refinement. (num rounds == num of MoA layers - 1)
-- `--num_proc`: Number of processes to run in parallel for faster execution.
-- `--multi_turn`: Boolean to toggle multi-turn interaction capability.
+- `--num-proc`: Number of processes to run in parallel for faster execution.
+- `--multi-turn`: Boolean to toggle multi-turn interaction capability.
+
+Specify `--reference-models` multiple times to use multiple models as references. For example:
+
+```bash
+# Specify multiple reference models
+python bot.py --reference-models "mistralai/Mixtral-8x22B-Instruct-v0.1" --reference-models "Qwen/Qwen2-72B-Instruct"
+```
+
+## Other OpenAI Compatible API endpoints
+
+To use different OpenAI-compatible API endpoints, set the OPENAI_BASE_URL and OPENAI_API_KEY variable.
+
+```
+export TOGETHER_API_KEY=
+export OPENAI_BASE_URL="https://your-api-provider.com/v1"
+export OPENAI_API_KEY="your-api-key-here"
+```
+
+This way, any 3rd party API can be used, such as OpenRouter, Groq, local models, etc.
+
+### Ollama
+
+For example, to run the bot using Ollama:
+
+1. Set up the environment:
+
+```
+export OPENAI_BASE_URL=http://localhost:11434/v1
+export OPENAI_API_KEY=ollama
+```
+
+2. Run the bot command:
+
+```
+python bot.py --model llama3 --reference-models llama3 --reference-models mistral
+```
 
 ## Evaluation
 

diff --git a/bot.py b/bot.py
@@ -83,7 +83,7 @@ def process_fn(
 
 
 def main(
-    model: str = "Qwen/Qwen2-72B-Instruct",
+    aggregator: str = "Qwen/Qwen2-72B-Instruct",
     reference_models: list[str] = default_reference_models,
     temperature: float = 0.7,
     max_tokens: int = 512,
@@ -118,7 +118,7 @@ def main(
 
     model = Prompt.ask(
         "\n1. What main model do you want to use?",
-        default="Qwen/Qwen2-72B-Instruct",
+        default=aggregator,
     )
     console.print(f"Selected {model}.", style="yellow italic")
     temperature = float(
@@ -199,8 +199,9 @@ def main(
 
         for chunk in output:
             out = chunk.choices[0].delta.content
-            console.print(out, end="")
-            all_output += out
+            if out is not None:
+                console.print(out, end="")
+                all_output += out
         print()
 
         if DEBUG:

diff --git a/utils.py b/utils.py
@@ -10,6 +10,19 @@
 
 DEBUG = int(os.environ.get("DEBUG", "0"))
 
+TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY")
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+EVAL_API_KEY = os.environ.get("EVAL_API_KEY")
+
+# If TOGETHER_API_KEY is set, use that one instead and use OPENAI for evaluations
+if TOGETHER_API_KEY:
+    OPENAI_API_KEY = TOGETHER_API_KEY
+    EVAL_API_KEY = os.environ.get("OPENAI_API_KEY")
+
+OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.together.xyz/v1")
+EVAL_BASE_URL = os.environ.get("EVAL_BASE_URL", "https://api.openai.com/v1")
+
+
 
 def generate_together(
     model,
@@ -21,12 +34,12 @@ def generate_together(
 
     output = None
 
+    endpoint = f"{OPENAI_BASE_URL}/chat/completions"
+
     for sleep_time in [1, 2, 4, 8, 16, 32]:
 
         try:
 
-            endpoint = "https://api.together.xyz/v1/chat/completions"
-
             if DEBUG:
                 logger.debug(
                     f"Sending messages ({len(messages)}) (last message: `{messages[-1]['content'][:20]}...`) to `{model}`."
@@ -41,7 +54,7 @@ def generate_together(
                     "messages": messages,
                 },
                 headers={
-                    "Authorization": f"Bearer {os.environ.get('TOGETHER_API_KEY')}",
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
                 },
             )
             if "error" in res.json():
@@ -80,11 +93,10 @@ def generate_together_stream(
     max_tokens=2048,
     temperature=0.7,
 ):
-    endpoint = "https://api.together.xyz/v1"
     client = openai.OpenAI(
-        api_key=os.environ.get("TOGETHER_API_KEY"), base_url=endpoint
+        api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL
     )
-    endpoint = "https://api.together.xyz/v1/chat/completions"
+    endpoint = f"{OPENAI_BASE_URL}/chat/completions"
     response = client.chat.completions.create(
         model=model,
         messages=messages,
@@ -104,7 +116,8 @@ def generate_openai(
 ):
 
     client = openai.OpenAI(
-        api_key=os.environ.get("OPENAI_API_KEY"),
+        api_key=EVAL_API_KEY,
+        base_url=EVAL_BASE_URL,
     )
 
     for sleep_time in [1, 2, 4, 8, 16, 32]:
@@ -179,3 +192,4 @@ def generate_with_references(
         temperature=temperature,
         max_tokens=max_tokens,
     )
+