diff --git a/docs/api-inference/tasks/chat-completion.md b/docs/api-inference/tasks/chat-completion.md index 97310fce8..b2bf55f81 100644 --- a/docs/api-inference/tasks/chat-completion.md +++ b/docs/api-inference/tasks/chat-completion.md @@ -24,13 +24,12 @@ This is a subtask of [`text-generation`](https://huggingface.co/docs/api-inferen - [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions. - [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions. - [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct): Small yet powerful text generation model. -- [HuggingFaceH4/starchat2-15b-v0.1](https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1): Strong coding assistant model. -- [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407): Very strong open-source large language model. +- [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct): Strong text generation model to follow instructions. #### Conversational Vision-Language Models (VLMs) - [meta-llama/Llama-3.2-11B-Vision-Instruct](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct): Powerful vision language model with great visual understanding and reasoning capabilities. -- [microsoft/Phi-3.5-vision-instruct](https://huggingface.co/microsoft/Phi-3.5-vision-instruct): Strong image-text-to-text model. +- [Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct): Strong image-text-to-text model. ### API Playground @@ -65,48 +64,139 @@ The API supports: curl 'https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1/chat/completions' \ -H "Authorization: Bearer hf_***" \ -H 'Content-Type: application/json' \ --d '{ - "model": "google/gemma-2-2b-it", - "messages": [{"role": "user", "content": "What is the capital of France?"}], - "max_tokens": 500, - "stream": false +--data '{ + "model": "google/gemma-2-2b-it", + "messages": [ + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "max_tokens": 500, + "stream": true }' - ``` + ```py from huggingface_hub import InferenceClient client = InferenceClient(api_key="hf_***") -for message in client.chat_completion( - model="google/gemma-2-2b-it", - messages=[{"role": "user", "content": "What is the capital of France?"}], +messages = [ + { + "role": "user", + "content": "What is the capital of France?" + } +] + +stream = client.chat.completions.create( + model="google/gemma-2-2b-it", + messages=messages, max_tokens=500, - stream=True, -): - print(message.choices[0].delta.content, end="") + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") ``` + + + +```py +from openai import OpenAI + +client = OpenAI( + base_url="https://api-inference.huggingface.co/v1/", + api_key="hf_***" +) + +messages = [ + { + "role": "user", + "content": "What is the capital of France?" + } +] + +stream = client.chat.completions.create( + model="google/gemma-2-2b-it", + messages=messages, + max_tokens=500, + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") +``` + To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion). + ```js -import { HfInference } from "@huggingface/inference"; +import { HfInference } from "@huggingface/inference" -const inference = new HfInference("hf_***"); +const client = new HfInference("hf_***") -for await (const chunk of inference.chatCompletionStream({ +let out = ""; + +const stream = client.chatCompletionStream({ model: "google/gemma-2-2b-it", - messages: [{ role: "user", content: "What is the capital of France?" }], + messages: [ + { + role: "user", + content: "What is the capital of France?" + } + ], + max_tokens: 500 +}); + +for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + const newContent = chunk.choices[0].delta.content; + out += newContent; + console.log(newContent); + } +} +``` + + + +```js +import { OpenAI } from "openai" + +const client = new OpenAI({ + baseURL: "https://api-inference.huggingface.co/v1/", + apiKey: "hf_***" +}) + +let out = ""; + +const stream = await client.chat.completions.create({ + model: "google/gemma-2-2b-it", + messages: [ + { + role: "user", + content: "What is the capital of France?" + } + ], max_tokens: 500, -})) { - process.stdout.write(chunk.choices[0]?.delta?.content || ""); + stream: true, +}); + +for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + const newContent = chunk.choices[0].delta.content; + out += newContent; + console.log(newContent); + } } ``` + To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion). @@ -125,75 +215,194 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/ curl 'https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions' \ -H "Authorization: Bearer hf_***" \ -H 'Content-Type: application/json' \ --d '{ - "model": "meta-llama/Llama-3.2-11B-Vision-Instruct", - "messages": [ +--data '{ + "model": "meta-llama/Llama-3.2-11B-Vision-Instruct", + "messages": [ { "role": "user", "content": [ - {"type": "image_url", "image_url": {"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"}}, - {"type": "text", "text": "Describe this image in one sentence."} + { + "type": "text", + "text": "Describe this image in one sentence." + }, + { + "type": "image_url", + "image_url": { + "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + } + } ] } ], - "max_tokens": 500, - "stream": false + "max_tokens": 500, + "stream": true }' - ``` + ```py from huggingface_hub import InferenceClient client = InferenceClient(api_key="hf_***") -image_url = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" +messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Describe this image in one sentence." + }, + { + "type": "image_url", + "image_url": { + "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + } + } + ] + } +] + +stream = client.chat.completions.create( + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + messages=messages, + max_tokens=500, + stream=True +) -for message in client.chat_completion( - model="meta-llama/Llama-3.2-11B-Vision-Instruct", - messages=[ - { - "role": "user", - "content": [ - {"type": "image_url", "image_url": {"url": image_url}}, - {"type": "text", "text": "Describe this image in one sentence."}, - ], - } - ], +for chunk in stream: + print(chunk.choices[0].delta.content, end="") +``` + + + +```py +from openai import OpenAI + +client = OpenAI( + base_url="https://api-inference.huggingface.co/v1/", + api_key="hf_***" +) + +messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Describe this image in one sentence." + }, + { + "type": "image_url", + "image_url": { + "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + } + } + ] + } +] + +stream = client.chat.completions.create( + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + messages=messages, max_tokens=500, - stream=True, -): - print(message.choices[0].delta.content, end="") + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") ``` + To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion). + ```js -import { HfInference } from "@huggingface/inference"; +import { HfInference } from "@huggingface/inference" -const inference = new HfInference("hf_***"); -const imageUrl = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"; +const client = new HfInference("hf_***") -for await (const chunk of inference.chatCompletionStream({ +let out = ""; + +const stream = client.chatCompletionStream({ model: "meta-llama/Llama-3.2-11B-Vision-Instruct", messages: [ { - "role": "user", - "content": [ - {"type": "image_url", "image_url": {"url": imageUrl}}, - {"type": "text", "text": "Describe this image in one sentence."}, - ], + role: "user", + content: [ + { + type: "text", + text: "Describe this image in one sentence." + }, + { + type: "image_url", + image_url: { + url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + } + } + ] + } + ], + max_tokens: 500 +}); + +for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + const newContent = chunk.choices[0].delta.content; + out += newContent; + console.log(newContent); + } +} +``` + + + +```js +import { OpenAI } from "openai" + +const client = new OpenAI({ + baseURL: "https://api-inference.huggingface.co/v1/", + apiKey: "hf_***" +}) + +let out = ""; + +const stream = await client.chat.completions.create({ + model: "meta-llama/Llama-3.2-11B-Vision-Instruct", + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: "Describe this image in one sentence." + }, + { + type: "image_url", + image_url: { + url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + } + } + ] } ], max_tokens: 500, -})) { - process.stdout.write(chunk.choices[0]?.delta?.content || ""); + stream: true, +}); + +for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + const newContent = chunk.choices[0].delta.content; + out += newContent; + console.log(newContent); + } } ``` + To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion). diff --git a/docs/api-inference/tasks/image-classification.md b/docs/api-inference/tasks/image-classification.md index ce5ad7192..31a4e68f7 100644 --- a/docs/api-inference/tasks/image-classification.md +++ b/docs/api-inference/tasks/image-classification.md @@ -25,7 +25,6 @@ For more details about the `image-classification` task, check out its [dedicated ### Recommended models - [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224): A strong image classification model. -- [facebook/deit-base-distilled-patch16-224](https://huggingface.co/facebook/deit-base-distilled-patch16-224): A robust image classification model. Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-classification&sort=trending). diff --git a/docs/api-inference/tasks/image-segmentation.md b/docs/api-inference/tasks/image-segmentation.md index 437b599f8..7163f3fd0 100644 --- a/docs/api-inference/tasks/image-segmentation.md +++ b/docs/api-inference/tasks/image-segmentation.md @@ -24,7 +24,8 @@ For more details about the `image-segmentation` task, check out its [dedicated p ### Recommended models -- [nvidia/segformer-b0-finetuned-ade-512-512](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512): Semantic segmentation model trained on ADE20k dataset. +- [openmmlab/upernet-convnext-small](https://huggingface.co/openmmlab/upernet-convnext-small): Solid semantic segmentation model trained on ADE20k. +- [facebook/mask2former-swin-large-coco-panoptic](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic): Panoptic segmentation model trained on the COCO (common objects) dataset. Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-segmentation&sort=trending). @@ -35,7 +36,7 @@ Explore all available models and find the one that suits you best [here](https:/ ```bash -curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512 \ +curl https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small \ -X POST \ --data-binary '@cats.jpg' \ -H "Authorization: Bearer hf_***" @@ -46,7 +47,7 @@ curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-a ```py import requests -API_URL = "https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512" +API_URL = "https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small" headers = {"Authorization": "Bearer hf_***"} def query(filename): @@ -66,7 +67,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu async function query(filename) { const data = fs.readFileSync(filename); const response = await fetch( - "https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512", + "https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small", { headers: { Authorization: "Bearer hf_***" diff --git a/docs/api-inference/tasks/image-text-to-text.md b/docs/api-inference/tasks/image-text-to-text.md index bacc08dac..3ee52e917 100644 --- a/docs/api-inference/tasks/image-text-to-text.md +++ b/docs/api-inference/tasks/image-text-to-text.md @@ -25,8 +25,7 @@ For more details about the `image-text-to-text` task, check out its [dedicated p ### Recommended models - [meta-llama/Llama-3.2-11B-Vision-Instruct](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct): Powerful vision language model with great visual understanding and reasoning capabilities. -- [HuggingFaceM4/idefics2-8b-chatty](https://huggingface.co/HuggingFaceM4/idefics2-8b-chatty): Cutting-edge conversational vision language model that can take multiple image inputs. -- [microsoft/Phi-3.5-vision-instruct](https://huggingface.co/microsoft/Phi-3.5-vision-instruct): Strong image-text-to-text model. +- [Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct): Strong image-text-to-text model. Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-text-to-text&sort=trending). @@ -39,13 +38,14 @@ Explore all available models and find the one that suits you best [here](https:/ ```bash curl https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct \ -X POST \ - -d '{"inputs": No input example has been defined for this model task.}' \ + -d '{"inputs": "Can you please let us know more details about your "}' \ -H 'Content-Type: application/json' \ -H "Authorization: Bearer hf_***" ``` + ```py import requests @@ -56,24 +56,47 @@ from huggingface_hub import InferenceClient client = InferenceClient(api_key="hf_***") -image_url = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" +messages = "\"Can you please let us know more details about your \"" -for message in client.chat_completion( - model="meta-llama/Llama-3.2-11B-Vision-Instruct", - messages=[ - { - "role": "user", - "content": [ - {"type": "image_url", "image_url": {"url": image_url}}, - {"type": "text", "text": "Describe this image in one sentence."}, - ], - } - ], +stream = client.chat.completions.create( + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + messages=messages, + max_tokens=500, + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") +``` + + + +```py +import requests + +API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct" +headers = {"Authorization": "Bearer hf_***"} + +from openai import OpenAI + +client = OpenAI( + base_url="https://api-inference.huggingface.co/v1/", + api_key="hf_***" +) + +messages = "\"Can you please let us know more details about your \"" + +stream = client.chat.completions.create( + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + messages=messages, max_tokens=500, - stream=True, -): - print(message.choices[0].delta.content, end="") + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") ``` + To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_text-to-text). @@ -96,7 +119,7 @@ async function query(data) { return result; } -query({"inputs": No input example has been defined for this model task.}).then((response) => { +query({"inputs": "Can you please let us know more details about your "}).then((response) => { console.log(JSON.stringify(response)); }); ``` diff --git a/docs/api-inference/tasks/text-generation.md b/docs/api-inference/tasks/text-generation.md index 7e315ddc4..f04909d4d 100644 --- a/docs/api-inference/tasks/text-generation.md +++ b/docs/api-inference/tasks/text-generation.md @@ -27,11 +27,9 @@ For more details about the `text-generation` task, check out its [dedicated page ### Recommended models - [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions. -- [bigcode/starcoder](https://huggingface.co/bigcode/starcoder): A code generation model that can generate code in 80+ languages. - [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions. - [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct): Small yet powerful text generation model. -- [HuggingFaceH4/starchat2-15b-v0.1](https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1): Strong coding assistant model. -- [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407): Very strong open-source large language model. +- [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct): Strong text generation model to follow instructions. Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending). diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts index 8240b6095..8af49b21b 100644 --- a/scripts/api-inference/scripts/generate.ts +++ b/scripts/api-inference/scripts/generate.ts @@ -99,10 +99,34 @@ const TASKS_DATA = (await response.json()) as any; //// Snippet utils //// /////////////////////// +const formatSnippets = (result: snippets.types.InferenceSnippet | snippets.types.InferenceSnippet[], defaultClient: string, language: string): string => { + // For single snippet, return just the content (let the template handle the wrapping) + if (!Array.isArray(result) || result.length === 1) { + const snippet = Array.isArray(result) ? result[0] : result; + return `\`\`\`${language}\n${snippet.content}\n\`\`\``; + } + + // For multiple snippets, include the client tags + return result + .map(snippet => + `<${snippet.client || defaultClient}>\n\`\`\`${language}\n${snippet.content}\n\`\`\`\n` + ) + .join('\n\n'); +}; + const GET_SNIPPET_FN = { - curl: snippets.curl.getCurlInferenceSnippet, - js: snippets.js.getJsInferenceSnippet, - python: snippets.python.getPythonInferenceSnippet, + curl: (modelData: any, token: string) => { + const result = snippets.curl.getCurlInferenceSnippet(modelData, token); + return formatSnippets(result, 'curl', 'bash'); + }, + js: (modelData: any, token: string) => { + const result = snippets.js.getJsInferenceSnippet(modelData, token); + return formatSnippets(result, 'javascript', 'js'); + }, + python: (modelData: any, token: string) => { + const result = snippets.python.getPythonInferenceSnippet(modelData, token); + return formatSnippets(result, 'python', 'py'); + }, } as const; const HAS_SNIPPET_FN = { @@ -129,8 +153,7 @@ export function getInferenceSnippet( // @ts-ignore if (HAS_SNIPPET_FN[language](modelData)) { // @ts-ignore - const snippets = GET_SNIPPET_FN[language](modelData, "hf_***"); - return Array.isArray(snippets) ? snippets[0].content : snippets.content; + return GET_SNIPPET_FN[language](modelData, "hf_***"); } } @@ -483,6 +506,7 @@ function fetchChatCompletion() { // @ts-ignore javascript: getInferenceSnippet(mainModel.id, task.pipelineTag, "js", mainModel.config, ["conversational"]), }; + console.log(taskSnippets); DATA.snippets[task.name] = SNIPPETS_TEMPLATE({ taskSnippets, taskSnakeCase: baseName.replace("-", "_"), diff --git a/scripts/api-inference/templates/common/snippets-template.handlebars b/scripts/api-inference/templates/common/snippets-template.handlebars index 2d0f099e2..09202f6ba 100644 --- a/scripts/api-inference/templates/common/snippets-template.handlebars +++ b/scripts/api-inference/templates/common/snippets-template.handlebars @@ -5,18 +5,14 @@ {{!-- cURL snippet (if exists) --}} {{#if taskSnippets.curl}} -```bash {{{taskSnippets.curl}}} -``` {{/if}} {{!-- Python snippet (if exists) --}} {{#if taskSnippets.python}} -```py {{{taskSnippets.python}}} -``` To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.{{taskSnakeCase}}). @@ -25,9 +21,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu {{!-- JavaScript snippet (if exists) --}} {{#if taskSnippets.javascript}} -```js {{{taskSnippets.javascript}}} -``` To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#{{taskAttached}}).