diff --git a/docs/api-inference/tasks/chat-completion.md b/docs/api-inference/tasks/chat-completion.md
index 97310fce8..b2bf55f81 100644
--- a/docs/api-inference/tasks/chat-completion.md
+++ b/docs/api-inference/tasks/chat-completion.md
@@ -24,13 +24,12 @@ This is a subtask of [`text-generation`](https://huggingface.co/docs/api-inferen
- [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
- [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
- [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct): Small yet powerful text generation model.
-- [HuggingFaceH4/starchat2-15b-v0.1](https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1): Strong coding assistant model.
-- [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407): Very strong open-source large language model.
+- [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct): Strong text generation model to follow instructions.
#### Conversational Vision-Language Models (VLMs)
- [meta-llama/Llama-3.2-11B-Vision-Instruct](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct): Powerful vision language model with great visual understanding and reasoning capabilities.
-- [microsoft/Phi-3.5-vision-instruct](https://huggingface.co/microsoft/Phi-3.5-vision-instruct): Strong image-text-to-text model.
+- [Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct): Strong image-text-to-text model.
### API Playground
@@ -65,48 +64,139 @@ The API supports:
curl 'https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1/chat/completions' \
-H "Authorization: Bearer hf_***" \
-H 'Content-Type: application/json' \
--d '{
- "model": "google/gemma-2-2b-it",
- "messages": [{"role": "user", "content": "What is the capital of France?"}],
- "max_tokens": 500,
- "stream": false
+--data '{
+ "model": "google/gemma-2-2b-it",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What is the capital of France?"
+ }
+ ],
+ "max_tokens": 500,
+ "stream": true
}'
-
```
+
```py
from huggingface_hub import InferenceClient
client = InferenceClient(api_key="hf_***")
-for message in client.chat_completion(
- model="google/gemma-2-2b-it",
- messages=[{"role": "user", "content": "What is the capital of France?"}],
+messages = [
+ {
+ "role": "user",
+ "content": "What is the capital of France?"
+ }
+]
+
+stream = client.chat.completions.create(
+ model="google/gemma-2-2b-it",
+ messages=messages,
max_tokens=500,
- stream=True,
-):
- print(message.choices[0].delta.content, end="")
+ stream=True
+)
+
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
```
+
+
+
+```py
+from openai import OpenAI
+
+client = OpenAI(
+ base_url="https://api-inference.huggingface.co/v1/",
+ api_key="hf_***"
+)
+
+messages = [
+ {
+ "role": "user",
+ "content": "What is the capital of France?"
+ }
+]
+
+stream = client.chat.completions.create(
+ model="google/gemma-2-2b-it",
+ messages=messages,
+ max_tokens=500,
+ stream=True
+)
+
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
+```
+
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
+
```js
-import { HfInference } from "@huggingface/inference";
+import { HfInference } from "@huggingface/inference"
-const inference = new HfInference("hf_***");
+const client = new HfInference("hf_***")
-for await (const chunk of inference.chatCompletionStream({
+let out = "";
+
+const stream = client.chatCompletionStream({
model: "google/gemma-2-2b-it",
- messages: [{ role: "user", content: "What is the capital of France?" }],
+ messages: [
+ {
+ role: "user",
+ content: "What is the capital of France?"
+ }
+ ],
+ max_tokens: 500
+});
+
+for await (const chunk of stream) {
+ if (chunk.choices && chunk.choices.length > 0) {
+ const newContent = chunk.choices[0].delta.content;
+ out += newContent;
+ console.log(newContent);
+ }
+}
+```
+
+
+
+```js
+import { OpenAI } from "openai"
+
+const client = new OpenAI({
+ baseURL: "https://api-inference.huggingface.co/v1/",
+ apiKey: "hf_***"
+})
+
+let out = "";
+
+const stream = await client.chat.completions.create({
+ model: "google/gemma-2-2b-it",
+ messages: [
+ {
+ role: "user",
+ content: "What is the capital of France?"
+ }
+ ],
max_tokens: 500,
-})) {
- process.stdout.write(chunk.choices[0]?.delta?.content || "");
+ stream: true,
+});
+
+for await (const chunk of stream) {
+ if (chunk.choices && chunk.choices.length > 0) {
+ const newContent = chunk.choices[0].delta.content;
+ out += newContent;
+ console.log(newContent);
+ }
}
```
+
To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion).
@@ -125,75 +215,194 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
curl 'https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions' \
-H "Authorization: Bearer hf_***" \
-H 'Content-Type: application/json' \
--d '{
- "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
- "messages": [
+--data '{
+ "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
+ "messages": [
{
"role": "user",
"content": [
- {"type": "image_url", "image_url": {"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"}},
- {"type": "text", "text": "Describe this image in one sentence."}
+ {
+ "type": "text",
+ "text": "Describe this image in one sentence."
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+ }
+ }
]
}
],
- "max_tokens": 500,
- "stream": false
+ "max_tokens": 500,
+ "stream": true
}'
-
```
+
```py
from huggingface_hub import InferenceClient
client = InferenceClient(api_key="hf_***")
-image_url = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+messages = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Describe this image in one sentence."
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+ }
+ }
+ ]
+ }
+]
+
+stream = client.chat.completions.create(
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+ messages=messages,
+ max_tokens=500,
+ stream=True
+)
-for message in client.chat_completion(
- model="meta-llama/Llama-3.2-11B-Vision-Instruct",
- messages=[
- {
- "role": "user",
- "content": [
- {"type": "image_url", "image_url": {"url": image_url}},
- {"type": "text", "text": "Describe this image in one sentence."},
- ],
- }
- ],
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
+```
+
+
+
+```py
+from openai import OpenAI
+
+client = OpenAI(
+ base_url="https://api-inference.huggingface.co/v1/",
+ api_key="hf_***"
+)
+
+messages = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Describe this image in one sentence."
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+ }
+ }
+ ]
+ }
+]
+
+stream = client.chat.completions.create(
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+ messages=messages,
max_tokens=500,
- stream=True,
-):
- print(message.choices[0].delta.content, end="")
+ stream=True
+)
+
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
```
+
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
+
```js
-import { HfInference } from "@huggingface/inference";
+import { HfInference } from "@huggingface/inference"
-const inference = new HfInference("hf_***");
-const imageUrl = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg";
+const client = new HfInference("hf_***")
-for await (const chunk of inference.chatCompletionStream({
+let out = "";
+
+const stream = client.chatCompletionStream({
model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
messages: [
{
- "role": "user",
- "content": [
- {"type": "image_url", "image_url": {"url": imageUrl}},
- {"type": "text", "text": "Describe this image in one sentence."},
- ],
+ role: "user",
+ content: [
+ {
+ type: "text",
+ text: "Describe this image in one sentence."
+ },
+ {
+ type: "image_url",
+ image_url: {
+ url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+ }
+ }
+ ]
+ }
+ ],
+ max_tokens: 500
+});
+
+for await (const chunk of stream) {
+ if (chunk.choices && chunk.choices.length > 0) {
+ const newContent = chunk.choices[0].delta.content;
+ out += newContent;
+ console.log(newContent);
+ }
+}
+```
+
+
+
+```js
+import { OpenAI } from "openai"
+
+const client = new OpenAI({
+ baseURL: "https://api-inference.huggingface.co/v1/",
+ apiKey: "hf_***"
+})
+
+let out = "";
+
+const stream = await client.chat.completions.create({
+ model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
+ messages: [
+ {
+ role: "user",
+ content: [
+ {
+ type: "text",
+ text: "Describe this image in one sentence."
+ },
+ {
+ type: "image_url",
+ image_url: {
+ url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+ }
+ }
+ ]
}
],
max_tokens: 500,
-})) {
- process.stdout.write(chunk.choices[0]?.delta?.content || "");
+ stream: true,
+});
+
+for await (const chunk of stream) {
+ if (chunk.choices && chunk.choices.length > 0) {
+ const newContent = chunk.choices[0].delta.content;
+ out += newContent;
+ console.log(newContent);
+ }
}
```
+
To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion).
diff --git a/docs/api-inference/tasks/image-classification.md b/docs/api-inference/tasks/image-classification.md
index ce5ad7192..31a4e68f7 100644
--- a/docs/api-inference/tasks/image-classification.md
+++ b/docs/api-inference/tasks/image-classification.md
@@ -25,7 +25,6 @@ For more details about the `image-classification` task, check out its [dedicated
### Recommended models
- [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224): A strong image classification model.
-- [facebook/deit-base-distilled-patch16-224](https://huggingface.co/facebook/deit-base-distilled-patch16-224): A robust image classification model.
Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-classification&sort=trending).
diff --git a/docs/api-inference/tasks/image-segmentation.md b/docs/api-inference/tasks/image-segmentation.md
index 437b599f8..7163f3fd0 100644
--- a/docs/api-inference/tasks/image-segmentation.md
+++ b/docs/api-inference/tasks/image-segmentation.md
@@ -24,7 +24,8 @@ For more details about the `image-segmentation` task, check out its [dedicated p
### Recommended models
-- [nvidia/segformer-b0-finetuned-ade-512-512](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512): Semantic segmentation model trained on ADE20k dataset.
+- [openmmlab/upernet-convnext-small](https://huggingface.co/openmmlab/upernet-convnext-small): Solid semantic segmentation model trained on ADE20k.
+- [facebook/mask2former-swin-large-coco-panoptic](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic): Panoptic segmentation model trained on the COCO (common objects) dataset.
Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-segmentation&sort=trending).
@@ -35,7 +36,7 @@ Explore all available models and find the one that suits you best [here](https:/
```bash
-curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512 \
+curl https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small \
-X POST \
--data-binary '@cats.jpg' \
-H "Authorization: Bearer hf_***"
@@ -46,7 +47,7 @@ curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-a
```py
import requests
-API_URL = "https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512"
+API_URL = "https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small"
headers = {"Authorization": "Bearer hf_***"}
def query(filename):
@@ -66,7 +67,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
async function query(filename) {
const data = fs.readFileSync(filename);
const response = await fetch(
- "https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512",
+ "https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small",
{
headers: {
Authorization: "Bearer hf_***"
diff --git a/docs/api-inference/tasks/image-text-to-text.md b/docs/api-inference/tasks/image-text-to-text.md
index bacc08dac..3ee52e917 100644
--- a/docs/api-inference/tasks/image-text-to-text.md
+++ b/docs/api-inference/tasks/image-text-to-text.md
@@ -25,8 +25,7 @@ For more details about the `image-text-to-text` task, check out its [dedicated p
### Recommended models
- [meta-llama/Llama-3.2-11B-Vision-Instruct](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct): Powerful vision language model with great visual understanding and reasoning capabilities.
-- [HuggingFaceM4/idefics2-8b-chatty](https://huggingface.co/HuggingFaceM4/idefics2-8b-chatty): Cutting-edge conversational vision language model that can take multiple image inputs.
-- [microsoft/Phi-3.5-vision-instruct](https://huggingface.co/microsoft/Phi-3.5-vision-instruct): Strong image-text-to-text model.
+- [Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct): Strong image-text-to-text model.
Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-text-to-text&sort=trending).
@@ -39,13 +38,14 @@ Explore all available models and find the one that suits you best [here](https:/
```bash
curl https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct \
-X POST \
- -d '{"inputs": No input example has been defined for this model task.}' \
+ -d '{"inputs": "Can you please let us know more details about your "}' \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer hf_***"
```
+
```py
import requests
@@ -56,24 +56,47 @@ from huggingface_hub import InferenceClient
client = InferenceClient(api_key="hf_***")
-image_url = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+messages = "\"Can you please let us know more details about your \""
-for message in client.chat_completion(
- model="meta-llama/Llama-3.2-11B-Vision-Instruct",
- messages=[
- {
- "role": "user",
- "content": [
- {"type": "image_url", "image_url": {"url": image_url}},
- {"type": "text", "text": "Describe this image in one sentence."},
- ],
- }
- ],
+stream = client.chat.completions.create(
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+ messages=messages,
+ max_tokens=500,
+ stream=True
+)
+
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
+```
+
+
+
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct"
+headers = {"Authorization": "Bearer hf_***"}
+
+from openai import OpenAI
+
+client = OpenAI(
+ base_url="https://api-inference.huggingface.co/v1/",
+ api_key="hf_***"
+)
+
+messages = "\"Can you please let us know more details about your \""
+
+stream = client.chat.completions.create(
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+ messages=messages,
max_tokens=500,
- stream=True,
-):
- print(message.choices[0].delta.content, end="")
+ stream=True
+)
+
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
```
+
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_text-to-text).
@@ -96,7 +119,7 @@ async function query(data) {
return result;
}
-query({"inputs": No input example has been defined for this model task.}).then((response) => {
+query({"inputs": "Can you please let us know more details about your "}).then((response) => {
console.log(JSON.stringify(response));
});
```
diff --git a/docs/api-inference/tasks/text-generation.md b/docs/api-inference/tasks/text-generation.md
index 7e315ddc4..f04909d4d 100644
--- a/docs/api-inference/tasks/text-generation.md
+++ b/docs/api-inference/tasks/text-generation.md
@@ -27,11 +27,9 @@ For more details about the `text-generation` task, check out its [dedicated page
### Recommended models
- [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
-- [bigcode/starcoder](https://huggingface.co/bigcode/starcoder): A code generation model that can generate code in 80+ languages.
- [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
- [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct): Small yet powerful text generation model.
-- [HuggingFaceH4/starchat2-15b-v0.1](https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1): Strong coding assistant model.
-- [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407): Very strong open-source large language model.
+- [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct): Strong text generation model to follow instructions.
Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending).
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index 8240b6095..8af49b21b 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -99,10 +99,34 @@ const TASKS_DATA = (await response.json()) as any;
//// Snippet utils ////
///////////////////////
+const formatSnippets = (result: snippets.types.InferenceSnippet | snippets.types.InferenceSnippet[], defaultClient: string, language: string): string => {
+ // For single snippet, return just the content (let the template handle the wrapping)
+ if (!Array.isArray(result) || result.length === 1) {
+ const snippet = Array.isArray(result) ? result[0] : result;
+ return `\`\`\`${language}\n${snippet.content}\n\`\`\``;
+ }
+
+ // For multiple snippets, include the client tags
+ return result
+ .map(snippet =>
+ `<${snippet.client || defaultClient}>\n\`\`\`${language}\n${snippet.content}\n\`\`\`\n${snippet.client || defaultClient}>`
+ )
+ .join('\n\n');
+};
+
const GET_SNIPPET_FN = {
- curl: snippets.curl.getCurlInferenceSnippet,
- js: snippets.js.getJsInferenceSnippet,
- python: snippets.python.getPythonInferenceSnippet,
+ curl: (modelData: any, token: string) => {
+ const result = snippets.curl.getCurlInferenceSnippet(modelData, token);
+ return formatSnippets(result, 'curl', 'bash');
+ },
+ js: (modelData: any, token: string) => {
+ const result = snippets.js.getJsInferenceSnippet(modelData, token);
+ return formatSnippets(result, 'javascript', 'js');
+ },
+ python: (modelData: any, token: string) => {
+ const result = snippets.python.getPythonInferenceSnippet(modelData, token);
+ return formatSnippets(result, 'python', 'py');
+ },
} as const;
const HAS_SNIPPET_FN = {
@@ -129,8 +153,7 @@ export function getInferenceSnippet(
// @ts-ignore
if (HAS_SNIPPET_FN[language](modelData)) {
// @ts-ignore
- const snippets = GET_SNIPPET_FN[language](modelData, "hf_***");
- return Array.isArray(snippets) ? snippets[0].content : snippets.content;
+ return GET_SNIPPET_FN[language](modelData, "hf_***");
}
}
@@ -483,6 +506,7 @@ function fetchChatCompletion() {
// @ts-ignore
javascript: getInferenceSnippet(mainModel.id, task.pipelineTag, "js", mainModel.config, ["conversational"]),
};
+ console.log(taskSnippets);
DATA.snippets[task.name] = SNIPPETS_TEMPLATE({
taskSnippets,
taskSnakeCase: baseName.replace("-", "_"),
diff --git a/scripts/api-inference/templates/common/snippets-template.handlebars b/scripts/api-inference/templates/common/snippets-template.handlebars
index 2d0f099e2..09202f6ba 100644
--- a/scripts/api-inference/templates/common/snippets-template.handlebars
+++ b/scripts/api-inference/templates/common/snippets-template.handlebars
@@ -5,18 +5,14 @@
{{!-- cURL snippet (if exists) --}}
{{#if taskSnippets.curl}}
-```bash
{{{taskSnippets.curl}}}
-```
{{/if}}
{{!-- Python snippet (if exists) --}}
{{#if taskSnippets.python}}
-```py
{{{taskSnippets.python}}}
-```
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.{{taskSnakeCase}}).
@@ -25,9 +21,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
{{!-- JavaScript snippet (if exists) --}}
{{#if taskSnippets.javascript}}
-```js
{{{taskSnippets.javascript}}}
-```
To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#{{taskAttached}}).