docs: add notice and example for model config (#3510)

* docs(config): add notice and example for model config Signed-off-by: Wei Zhang <[email protected]> * chore: emphasise do not words on example Signed-off-by: Wei Zhang <[email protected]> --------- Signed-off-by: Wei Zhang <[email protected]>
TabbyML · Dec 6, 2024 · fc956e5 · fc956e5
1 parent a52ac1b
commit fc956e5
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 30 deletions.
diff --git a/website/docs/references/models-http-api/llamafile.md b/website/docs/references/models-http-api/llamafile.md
@@ -17,23 +17,23 @@ Below is an example configuration:
 ```toml title="~/.tabby/config.toml"
 # Chat model
 [model.chat.http]
-kind = "openai/chat"
+kind = "openai/chat"  # llamafile uses openai/chat kind
 model_name = "your_model"
-api_endpoint = "http://localhost:8081/v1"
+api_endpoint = "http://localhost:8081/v1"  # Please add and conclude with the `v1` suffix
 api_key = ""
 
 # Completion model
 [model.completion.http]
-kind = "llama.cpp/completion"
+kind = "llama.cpp/completion"   # llamafile uses llama.cpp/completion kind
 model_name = "your_model"
-api_endpoint = "http://localhost:8081"
+api_endpoint = "http://localhost:8081"  # DO NOT append the `v1` suffix
 api_key = "secret-api-key"
 prompt_template = "<|fim_prefix|>{prefix}<|fim_suffix|>{suffix}<|fim_middle|>" # Example prompt template for the Qwen2.5 Coder model series.
 
 # Embedding model
 [model.embedding.http]
-kind = "llama.cpp/embedding"
+kind = "llama.cpp/embedding"  # llamafile uses llama.cpp/embedding kind
 model_name = "your_model"
-api_endpoint = "http://localhost:8082"
+api_endpoint = "http://localhost:8082"  # DO NOT append the `v1` suffix
 api_key = ""
 ```
diff --git a/website/docs/references/models-http-api/openai.md b/website/docs/references/models-http-api/openai.md
@@ -15,21 +15,21 @@ Tabby continues to support the OpenAI Completion API specifications due to its w
 # Chat model
 [model.chat.http]
 kind = "openai/chat"
-model_name = "gpt-3.5-turbo"
-api_endpoint = "https://api.openai.com/v1"
-api_key = "secret-api-key"
-
-# Embedding model
-[model.embedding.http]
-kind = "openai/embedding"
-model_name = "text-embedding-3-small"
-api_endpoint = "https://api.openai.com/v1"
+model_name = "gpt-3.5-turbo"  # Please make sure to use a chat model, such as gpt-4o
+api_endpoint = "https://api.openai.com/v1"   # DO NOT append the `/chat/completions` suffix
 api_key = "secret-api-key"
 
 # Completion model
 [model.completion.http]
 kind = "openai/completion"
-model_name = "your_model"
-api_endpoint = "https://url_to_your_backend_or_service"
+model_name = "gpt-3.5-turbo-instruct"   # Please make sure to use a completion model, such as gpt-3.5-turbo-instruct
+api_endpoint = "https://api.openai.com/v1"   # DO NOT append the `/completions` suffix
+api_key = "secret-api-key"
+
+# Embedding model
+[model.embedding.http]
+kind = "openai/embedding"
+model_name = "text-embedding-3-small"   # Please make sure to use a embedding model, such as text-embedding-3-small
+api_endpoint = "https://api.openai.com/v1"  # DO NOT append the `/embeddings` suffix
 api_key = "secret-api-key"
 ```
diff --git a/website/docs/references/models-http-api/vllm.md b/website/docs/references/models-http-api/vllm.md
@@ -3,30 +3,42 @@
 [vLLM](https://docs.vllm.ai/en/stable/) is a fast and user-friendly library for LLM inference and serving.
 
 vLLM offers an `OpenAI Compatible Server`, enabling us to use the OpenAI kinds for chat and embedding.
-However, for completion, there are certain differences in the implementation. Therefore, we should use the `vllm/completion` kind and provide a `prompt_template` depending on the specific models.
+However, for completion, there are certain differences in the implementation.
+Therefore, we should use the `vllm/completion` kind and provide a `prompt_template` depending on the specific models.
 
-Below is an example
+Please note that models differ in their capabilities for completion or chat.
+You should confirm the model's capability before employing it for chat or completion tasks.
+
+Additionally, there are models that can serve both as chat and completion.
+For detailed information, please refer to the [Model Registry](../../models/index.mdx).
+
+Below is an example of the vLLM running at `http://localhost:8000`:
+
+Please note the following requirements in each model type:
+1. `model_name` must exactly match the one used to run vLLM.
+2. `api_endpoint` should follow the format `http://host:port/v1`.
+3. `api_key` should be identical to the one used to run vLLM.
 
 ```toml title="~/.tabby/config.toml"
 # Chat model
 [model.chat.http]
 kind = "openai/chat"
-model_name = "your_model"
-api_endpoint = "https://url_to_your_backend_or_service"
-api_key = "secret-api-key"
-
-# Embedding model
-[model.embedding.http]
-kind = "openai/embedding"
-model_name = "your_model"
-api_endpoint = "https://url_to_your_backend_or_service"
+model_name = "your_model"   # Please make sure to use a chat model.
+api_endpoint = "http://localhost:8000/v1"
 api_key = "secret-api-key"
 
 # Completion model
 [model.completion.http]
 kind = "vllm/completion"
-model_name = "your_model"
-api_endpoint = "https://url_to_your_backend_or_service"
+model_name = "your_model"  # Please make sure to use a completion model.
+api_endpoint = "http://localhost:8000/v1"
 api_key = "secret-api-key"
 prompt_template = "<PRE> {prefix} <SUF>{suffix} <MID>"  # Example prompt template for the CodeLlama model series.
+
+# Embedding model
+[model.embedding.http]
+kind = "openai/embedding"
+model_name = "your_model"
+api_endpoint = "http://localhost:8000/v1"
+api_key = "secret-api-key"
 ```