diff --git a/docs/api-inference/tasks/chat-completion.md b/docs/api-inference/tasks/chat-completion.md
index b2ff4aa49..1452756d2 100644
--- a/docs/api-inference/tasks/chat-completion.md
+++ b/docs/api-inference/tasks/chat-completion.md
@@ -64,46 +64,133 @@ The API supports:
curl 'https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1/chat/completions' \
-H "Authorization: Bearer hf_***" \
-H 'Content-Type: application/json' \
--d '{
- "model": "google/gemma-2-2b-it",
- "messages": [{"role": "user", "content": "What is the capital of France?"}],
- "max_tokens": 500,
- "stream": false
+--data '{
+ "model": "google/gemma-2-2b-it",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What is the capital of France?"
+ }
+ ],
+ "max_tokens": 500,
+ "stream": true
}'
-
```
+With huggingface_hub client:
```py
from huggingface_hub import InferenceClient
client = InferenceClient(api_key="hf_***")
-for message in client.chat_completion(
- model="google/gemma-2-2b-it",
- messages=[{"role": "user", "content": "What is the capital of France?"}],
+messages = [
+ {
+ "role": "user",
+ "content": "What is the capital of France?"
+ }
+]
+
+stream = client.chat.completions.create(
+ model="google/gemma-2-2b-it",
+ messages=messages,
max_tokens=500,
- stream=True,
-):
- print(message.choices[0].delta.content, end="")
+ stream=True
+)
+
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
+```
+
+With openai client:
+```py
+from openai import OpenAI
+
+client = OpenAI(
+ base_url="https://api-inference.huggingface.co/v1/",
+ api_key="hf_***"
+)
+
+messages = [
+ {
+ "role": "user",
+ "content": "What is the capital of France?"
+ }
+]
+
+stream = client.chat.completions.create(
+ model="google/gemma-2-2b-it",
+ messages=messages,
+ max_tokens=500,
+ stream=True
+)
+
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
```
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
+With huggingface_hub client:
```js
-import { HfInference } from "@huggingface/inference";
+import { HfInference } from "@huggingface/inference"
-const inference = new HfInference("hf_***");
+const client = new HfInference("hf_***")
-for await (const chunk of inference.chatCompletionStream({
+let out = "";
+
+const stream = client.chatCompletionStream({
model: "google/gemma-2-2b-it",
- messages: [{ role: "user", content: "What is the capital of France?" }],
+ messages: [
+ {
+ role: "user",
+ content: "What is the capital of France?"
+ }
+ ],
+ max_tokens: 500
+});
+
+for await (const chunk of stream) {
+ if (chunk.choices && chunk.choices.length > 0) {
+ const newContent = chunk.choices[0].delta.content;
+ out += newContent;
+ console.log(newContent);
+ }
+}
+```
+
+With openai client:
+```js
+import { OpenAI } from "openai"
+
+const client = new OpenAI({
+ baseURL: "https://api-inference.huggingface.co/v1/",
+ apiKey: "hf_***"
+})
+
+let out = "";
+
+const stream = await client.chat.completions.create({
+ model: "google/gemma-2-2b-it",
+ messages: [
+ {
+ role: "user",
+ content: "What is the capital of France?"
+ }
+ ],
max_tokens: 500,
-})) {
- process.stdout.write(chunk.choices[0]?.delta?.content || "");
+ stream: true,
+});
+
+for await (const chunk of stream) {
+ if (chunk.choices && chunk.choices.length > 0) {
+ const newContent = chunk.choices[0].delta.content;
+ out += newContent;
+ console.log(newContent);
+ }
}
```
@@ -124,73 +211,188 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
curl 'https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions' \
-H "Authorization: Bearer hf_***" \
-H 'Content-Type: application/json' \
--d '{
- "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
- "messages": [
+--data '{
+ "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
+ "messages": [
{
"role": "user",
"content": [
- {"type": "image_url", "image_url": {"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"}},
- {"type": "text", "text": "Describe this image in one sentence."}
+ {
+ "type": "text",
+ "text": "Describe this image in one sentence."
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+ }
+ }
]
}
],
- "max_tokens": 500,
- "stream": false
+ "max_tokens": 500,
+ "stream": true
}'
-
```
+With huggingface_hub client:
```py
from huggingface_hub import InferenceClient
client = InferenceClient(api_key="hf_***")
-image_url = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+messages = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Describe this image in one sentence."
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+ }
+ }
+ ]
+ }
+]
+
+stream = client.chat.completions.create(
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+ messages=messages,
+ max_tokens=500,
+ stream=True
+)
-for message in client.chat_completion(
- model="meta-llama/Llama-3.2-11B-Vision-Instruct",
- messages=[
- {
- "role": "user",
- "content": [
- {"type": "image_url", "image_url": {"url": image_url}},
- {"type": "text", "text": "Describe this image in one sentence."},
- ],
- }
- ],
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
+```
+
+With openai client:
+```py
+from openai import OpenAI
+
+client = OpenAI(
+ base_url="https://api-inference.huggingface.co/v1/",
+ api_key="hf_***"
+)
+
+messages = [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Describe this image in one sentence."
+ },
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+ }
+ }
+ ]
+ }
+]
+
+stream = client.chat.completions.create(
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+ messages=messages,
max_tokens=500,
- stream=True,
-):
- print(message.choices[0].delta.content, end="")
+ stream=True
+)
+
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
```
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
+With huggingface_hub client:
```js
-import { HfInference } from "@huggingface/inference";
+import { HfInference } from "@huggingface/inference"
-const inference = new HfInference("hf_***");
-const imageUrl = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg";
+const client = new HfInference("hf_***")
-for await (const chunk of inference.chatCompletionStream({
+let out = "";
+
+const stream = client.chatCompletionStream({
model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
messages: [
{
- "role": "user",
- "content": [
- {"type": "image_url", "image_url": {"url": imageUrl}},
- {"type": "text", "text": "Describe this image in one sentence."},
- ],
+ role: "user",
+ content: [
+ {
+ type: "text",
+ text: "Describe this image in one sentence."
+ },
+ {
+ type: "image_url",
+ image_url: {
+ url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+ }
+ }
+ ]
+ }
+ ],
+ max_tokens: 500
+});
+
+for await (const chunk of stream) {
+ if (chunk.choices && chunk.choices.length > 0) {
+ const newContent = chunk.choices[0].delta.content;
+ out += newContent;
+ console.log(newContent);
+ }
+}
+```
+
+With openai client:
+```js
+import { OpenAI } from "openai"
+
+const client = new OpenAI({
+ baseURL: "https://api-inference.huggingface.co/v1/",
+ apiKey: "hf_***"
+})
+
+let out = "";
+
+const stream = await client.chat.completions.create({
+ model: "meta-llama/Llama-3.2-11B-Vision-Instruct",
+ messages: [
+ {
+ role: "user",
+ content: [
+ {
+ type: "text",
+ text: "Describe this image in one sentence."
+ },
+ {
+ type: "image_url",
+ image_url: {
+ url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+ }
+ }
+ ]
}
],
max_tokens: 500,
-})) {
- process.stdout.write(chunk.choices[0]?.delta?.content || "");
+ stream: true,
+});
+
+for await (const chunk of stream) {
+ if (chunk.choices && chunk.choices.length > 0) {
+ const newContent = chunk.choices[0].delta.content;
+ out += newContent;
+ console.log(newContent);
+ }
}
```
diff --git a/docs/api-inference/tasks/image-classification.md b/docs/api-inference/tasks/image-classification.md
index ce5ad7192..31a4e68f7 100644
--- a/docs/api-inference/tasks/image-classification.md
+++ b/docs/api-inference/tasks/image-classification.md
@@ -25,7 +25,6 @@ For more details about the `image-classification` task, check out its [dedicated
### Recommended models
- [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224): A strong image classification model.
-- [facebook/deit-base-distilled-patch16-224](https://huggingface.co/facebook/deit-base-distilled-patch16-224): A robust image classification model.
Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-classification&sort=trending).
diff --git a/docs/api-inference/tasks/image-segmentation.md b/docs/api-inference/tasks/image-segmentation.md
index 437b599f8..7163f3fd0 100644
--- a/docs/api-inference/tasks/image-segmentation.md
+++ b/docs/api-inference/tasks/image-segmentation.md
@@ -24,7 +24,8 @@ For more details about the `image-segmentation` task, check out its [dedicated p
### Recommended models
-- [nvidia/segformer-b0-finetuned-ade-512-512](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512): Semantic segmentation model trained on ADE20k dataset.
+- [openmmlab/upernet-convnext-small](https://huggingface.co/openmmlab/upernet-convnext-small): Solid semantic segmentation model trained on ADE20k.
+- [facebook/mask2former-swin-large-coco-panoptic](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic): Panoptic segmentation model trained on the COCO (common objects) dataset.
Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-segmentation&sort=trending).
@@ -35,7 +36,7 @@ Explore all available models and find the one that suits you best [here](https:/
```bash
-curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512 \
+curl https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small \
-X POST \
--data-binary '@cats.jpg' \
-H "Authorization: Bearer hf_***"
@@ -46,7 +47,7 @@ curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-a
```py
import requests
-API_URL = "https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512"
+API_URL = "https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small"
headers = {"Authorization": "Bearer hf_***"}
def query(filename):
@@ -66,7 +67,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
async function query(filename) {
const data = fs.readFileSync(filename);
const response = await fetch(
- "https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512",
+ "https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small",
{
headers: {
Authorization: "Bearer hf_***"
diff --git a/docs/api-inference/tasks/image-text-to-text.md b/docs/api-inference/tasks/image-text-to-text.md
index 9630578c8..e1e44c1d6 100644
--- a/docs/api-inference/tasks/image-text-to-text.md
+++ b/docs/api-inference/tasks/image-text-to-text.md
@@ -38,13 +38,14 @@ Explore all available models and find the one that suits you best [here](https:/
```bash
curl https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct \
-X POST \
- -d '{"inputs": No input example has been defined for this model task.}' \
+ -d '{"inputs": "Can you please let us know more details about your "}' \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer hf_***"
```
+With huggingface_hub client:
```py
import requests
@@ -55,23 +56,44 @@ from huggingface_hub import InferenceClient
client = InferenceClient(api_key="hf_***")
-image_url = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+messages = "\"Can you please let us know more details about your \""
-for message in client.chat_completion(
- model="meta-llama/Llama-3.2-11B-Vision-Instruct",
- messages=[
- {
- "role": "user",
- "content": [
- {"type": "image_url", "image_url": {"url": image_url}},
- {"type": "text", "text": "Describe this image in one sentence."},
- ],
- }
- ],
+stream = client.chat.completions.create(
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+ messages=messages,
+ max_tokens=500,
+ stream=True
+)
+
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
+```
+
+With openai client:
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct"
+headers = {"Authorization": "Bearer hf_***"}
+
+from openai import OpenAI
+
+client = OpenAI(
+ base_url="https://api-inference.huggingface.co/v1/",
+ api_key="hf_***"
+)
+
+messages = "\"Can you please let us know more details about your \""
+
+stream = client.chat.completions.create(
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+ messages=messages,
max_tokens=500,
- stream=True,
-):
- print(message.choices[0].delta.content, end="")
+ stream=True
+)
+
+for chunk in stream:
+ print(chunk.choices[0].delta.content, end="")
```
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_text-to-text).
@@ -95,7 +117,7 @@ async function query(data) {
return result;
}
-query({"inputs": No input example has been defined for this model task.}).then((response) => {
+query({"inputs": "Can you please let us know more details about your "}).then((response) => {
console.log(JSON.stringify(response));
});
```
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index 51997f008..c6e48a58e 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -99,10 +99,36 @@ const TASKS_DATA = (await response.json()) as any;
//// Snippet utils ////
///////////////////////
+const formatSnippets = (result: snippets.types.InferenceSnippet | snippets.types.InferenceSnippet[], defaultClient: string, language: string): string => {
+ // For single snippet, just wrap with code block
+ if (!Array.isArray(result) || result.length === 1) {
+ const snippet = Array.isArray(result) ? result[0] : result;
+ return `\`\`\`${language}\n${snippet.content}\n\`\`\``;
+ }
+
+ // For multiple snippets, add description and wrap each one
+ return result
+ .map(snippet => {
+ const client = snippet.client || defaultClient;
+ return `Using \`${client}\`:\n\`\`\`${language}\n${snippet.content}\n\`\`\``;
+ })
+ .join('\n\n');
+};
+
+
const GET_SNIPPET_FN = {
- curl: snippets.curl.getCurlInferenceSnippet,
- js: snippets.js.getJsInferenceSnippet,
- python: snippets.python.getPythonInferenceSnippet,
+ curl: (modelData: any, token: string) => {
+ const result = snippets.curl.getCurlInferenceSnippet(modelData, token);
+ return formatSnippets(result, 'curl', 'bash');
+ },
+ js: (modelData: any, token: string) => {
+ const result = snippets.js.getJsInferenceSnippet(modelData, token);
+ return formatSnippets(result, 'javascript', 'js');
+ },
+ python: (modelData: any, token: string) => {
+ const result = snippets.python.getPythonInferenceSnippet(modelData, token);
+ return formatSnippets(result, 'python', 'py');
+ },
} as const;
const HAS_SNIPPET_FN = {
@@ -115,14 +141,16 @@ export function getInferenceSnippet(
id: string,
pipeline_tag: PipelineType,
language: InferenceSnippetLanguage,
+ config?: JsonObject,
+ tags?: string[],
): string | undefined {
const modelData = {
id,
pipeline_tag,
mask_token: "[MASK]",
library_name: "",
- config: {},
- tags: [],
+ config: config ?? {},
+ tags: tags ?? [],
};
// @ts-ignore
if (HAS_SNIPPET_FN[language](modelData)) {
@@ -472,25 +500,14 @@ function fetchChatCompletion() {
);
const mainModel = DATA.models[task.name][0];
- const mainModelData = {
- // @ts-ignore
- id: mainModel.id,
- pipeline_tag: task.pipelineTag,
- mask_token: "",
- library_name: "",
- // @ts-ignore
- tags: ["conversational"],
- // @ts-ignore
- config: mainModel.config,
- };
const taskSnippets = {
// @ts-ignore
- curl: GET_SNIPPET_FN["curl"](mainModelData, "hf_***"),
+ curl: getInferenceSnippet(mainModel.id, task.pipelineTag, "curl", mainModel.config, ["conversational"]),
// @ts-ignore
- python: GET_SNIPPET_FN["python"](mainModelData, "hf_***"),
+ python: getInferenceSnippet(mainModel.id, task.pipelineTag, "python", mainModel.config, ["conversational"]),
// @ts-ignore
- javascript: GET_SNIPPET_FN["js"](mainModelData, "hf_***"),
+ javascript: getInferenceSnippet(mainModel.id, task.pipelineTag, "js", mainModel.config, ["conversational"]),
};
DATA.snippets[task.name] = SNIPPETS_TEMPLATE({
taskSnippets,
@@ -524,4 +541,4 @@ await Promise.all(
}),
);
-console.log("✅ All done!");
+console.log("✅ All done!");
\ No newline at end of file
diff --git a/scripts/api-inference/templates/common/snippets-template.handlebars b/scripts/api-inference/templates/common/snippets-template.handlebars
index 2d0f099e2..09202f6ba 100644
--- a/scripts/api-inference/templates/common/snippets-template.handlebars
+++ b/scripts/api-inference/templates/common/snippets-template.handlebars
@@ -5,18 +5,14 @@
{{!-- cURL snippet (if exists) --}}
{{#if taskSnippets.curl}}
-```bash
{{{taskSnippets.curl}}}
-```
{{/if}}
{{!-- Python snippet (if exists) --}}
{{#if taskSnippets.python}}
-```py
{{{taskSnippets.python}}}
-```
To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.{{taskSnakeCase}}).
@@ -25,9 +21,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
{{!-- JavaScript snippet (if exists) --}}
{{#if taskSnippets.javascript}}
-```js
{{{taskSnippets.javascript}}}
-```
To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#{{taskAttached}}).