diff --git a/docs/api-inference/tasks/chat-completion.md b/docs/api-inference/tasks/chat-completion.md index b2ff4aa49..1452756d2 100644 --- a/docs/api-inference/tasks/chat-completion.md +++ b/docs/api-inference/tasks/chat-completion.md @@ -64,46 +64,133 @@ The API supports: curl 'https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1/chat/completions' \ -H "Authorization: Bearer hf_***" \ -H 'Content-Type: application/json' \ --d '{ - "model": "google/gemma-2-2b-it", - "messages": [{"role": "user", "content": "What is the capital of France?"}], - "max_tokens": 500, - "stream": false +--data '{ + "model": "google/gemma-2-2b-it", + "messages": [ + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "max_tokens": 500, + "stream": true }' - ``` +With huggingface_hub client: ```py from huggingface_hub import InferenceClient client = InferenceClient(api_key="hf_***") -for message in client.chat_completion( - model="google/gemma-2-2b-it", - messages=[{"role": "user", "content": "What is the capital of France?"}], +messages = [ + { + "role": "user", + "content": "What is the capital of France?" + } +] + +stream = client.chat.completions.create( + model="google/gemma-2-2b-it", + messages=messages, max_tokens=500, - stream=True, -): - print(message.choices[0].delta.content, end="") + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") +``` + +With openai client: +```py +from openai import OpenAI + +client = OpenAI( + base_url="https://api-inference.huggingface.co/v1/", + api_key="hf_***" +) + +messages = [ + { + "role": "user", + "content": "What is the capital of France?" + } +] + +stream = client.chat.completions.create( + model="google/gemma-2-2b-it", + messages=messages, + max_tokens=500, + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") ``` To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion). +With huggingface_hub client: ```js -import { HfInference } from "@huggingface/inference"; +import { HfInference } from "@huggingface/inference" -const inference = new HfInference("hf_***"); +const client = new HfInference("hf_***") -for await (const chunk of inference.chatCompletionStream({ +let out = ""; + +const stream = client.chatCompletionStream({ model: "google/gemma-2-2b-it", - messages: [{ role: "user", content: "What is the capital of France?" }], + messages: [ + { + role: "user", + content: "What is the capital of France?" + } + ], + max_tokens: 500 +}); + +for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + const newContent = chunk.choices[0].delta.content; + out += newContent; + console.log(newContent); + } +} +``` + +With openai client: +```js +import { OpenAI } from "openai" + +const client = new OpenAI({ + baseURL: "https://api-inference.huggingface.co/v1/", + apiKey: "hf_***" +}) + +let out = ""; + +const stream = await client.chat.completions.create({ + model: "google/gemma-2-2b-it", + messages: [ + { + role: "user", + content: "What is the capital of France?" + } + ], max_tokens: 500, -})) { - process.stdout.write(chunk.choices[0]?.delta?.content || ""); + stream: true, +}); + +for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + const newContent = chunk.choices[0].delta.content; + out += newContent; + console.log(newContent); + } } ``` @@ -124,73 +211,188 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/ curl 'https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions' \ -H "Authorization: Bearer hf_***" \ -H 'Content-Type: application/json' \ --d '{ - "model": "meta-llama/Llama-3.2-11B-Vision-Instruct", - "messages": [ +--data '{ + "model": "meta-llama/Llama-3.2-11B-Vision-Instruct", + "messages": [ { "role": "user", "content": [ - {"type": "image_url", "image_url": {"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"}}, - {"type": "text", "text": "Describe this image in one sentence."} + { + "type": "text", + "text": "Describe this image in one sentence." + }, + { + "type": "image_url", + "image_url": { + "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + } + } ] } ], - "max_tokens": 500, - "stream": false + "max_tokens": 500, + "stream": true }' - ``` +With huggingface_hub client: ```py from huggingface_hub import InferenceClient client = InferenceClient(api_key="hf_***") -image_url = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" +messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Describe this image in one sentence." + }, + { + "type": "image_url", + "image_url": { + "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + } + } + ] + } +] + +stream = client.chat.completions.create( + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + messages=messages, + max_tokens=500, + stream=True +) -for message in client.chat_completion( - model="meta-llama/Llama-3.2-11B-Vision-Instruct", - messages=[ - { - "role": "user", - "content": [ - {"type": "image_url", "image_url": {"url": image_url}}, - {"type": "text", "text": "Describe this image in one sentence."}, - ], - } - ], +for chunk in stream: + print(chunk.choices[0].delta.content, end="") +``` + +With openai client: +```py +from openai import OpenAI + +client = OpenAI( + base_url="https://api-inference.huggingface.co/v1/", + api_key="hf_***" +) + +messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Describe this image in one sentence." + }, + { + "type": "image_url", + "image_url": { + "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + } + } + ] + } +] + +stream = client.chat.completions.create( + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + messages=messages, max_tokens=500, - stream=True, -): - print(message.choices[0].delta.content, end="") + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") ``` To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion). +With huggingface_hub client: ```js -import { HfInference } from "@huggingface/inference"; +import { HfInference } from "@huggingface/inference" -const inference = new HfInference("hf_***"); -const imageUrl = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"; +const client = new HfInference("hf_***") -for await (const chunk of inference.chatCompletionStream({ +let out = ""; + +const stream = client.chatCompletionStream({ model: "meta-llama/Llama-3.2-11B-Vision-Instruct", messages: [ { - "role": "user", - "content": [ - {"type": "image_url", "image_url": {"url": imageUrl}}, - {"type": "text", "text": "Describe this image in one sentence."}, - ], + role: "user", + content: [ + { + type: "text", + text: "Describe this image in one sentence." + }, + { + type: "image_url", + image_url: { + url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + } + } + ] + } + ], + max_tokens: 500 +}); + +for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + const newContent = chunk.choices[0].delta.content; + out += newContent; + console.log(newContent); + } +} +``` + +With openai client: +```js +import { OpenAI } from "openai" + +const client = new OpenAI({ + baseURL: "https://api-inference.huggingface.co/v1/", + apiKey: "hf_***" +}) + +let out = ""; + +const stream = await client.chat.completions.create({ + model: "meta-llama/Llama-3.2-11B-Vision-Instruct", + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: "Describe this image in one sentence." + }, + { + type: "image_url", + image_url: { + url: "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + } + } + ] } ], max_tokens: 500, -})) { - process.stdout.write(chunk.choices[0]?.delta?.content || ""); + stream: true, +}); + +for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + const newContent = chunk.choices[0].delta.content; + out += newContent; + console.log(newContent); + } } ``` diff --git a/docs/api-inference/tasks/image-classification.md b/docs/api-inference/tasks/image-classification.md index ce5ad7192..31a4e68f7 100644 --- a/docs/api-inference/tasks/image-classification.md +++ b/docs/api-inference/tasks/image-classification.md @@ -25,7 +25,6 @@ For more details about the `image-classification` task, check out its [dedicated ### Recommended models - [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224): A strong image classification model. -- [facebook/deit-base-distilled-patch16-224](https://huggingface.co/facebook/deit-base-distilled-patch16-224): A robust image classification model. Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-classification&sort=trending). diff --git a/docs/api-inference/tasks/image-segmentation.md b/docs/api-inference/tasks/image-segmentation.md index 437b599f8..7163f3fd0 100644 --- a/docs/api-inference/tasks/image-segmentation.md +++ b/docs/api-inference/tasks/image-segmentation.md @@ -24,7 +24,8 @@ For more details about the `image-segmentation` task, check out its [dedicated p ### Recommended models -- [nvidia/segformer-b0-finetuned-ade-512-512](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512): Semantic segmentation model trained on ADE20k dataset. +- [openmmlab/upernet-convnext-small](https://huggingface.co/openmmlab/upernet-convnext-small): Solid semantic segmentation model trained on ADE20k. +- [facebook/mask2former-swin-large-coco-panoptic](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic): Panoptic segmentation model trained on the COCO (common objects) dataset. Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-segmentation&sort=trending). @@ -35,7 +36,7 @@ Explore all available models and find the one that suits you best [here](https:/ ```bash -curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512 \ +curl https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small \ -X POST \ --data-binary '@cats.jpg' \ -H "Authorization: Bearer hf_***" @@ -46,7 +47,7 @@ curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-a ```py import requests -API_URL = "https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512" +API_URL = "https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small" headers = {"Authorization": "Bearer hf_***"} def query(filename): @@ -66,7 +67,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu async function query(filename) { const data = fs.readFileSync(filename); const response = await fetch( - "https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512", + "https://api-inference.huggingface.co/models/openmmlab/upernet-convnext-small", { headers: { Authorization: "Bearer hf_***" diff --git a/docs/api-inference/tasks/image-text-to-text.md b/docs/api-inference/tasks/image-text-to-text.md index 9630578c8..e1e44c1d6 100644 --- a/docs/api-inference/tasks/image-text-to-text.md +++ b/docs/api-inference/tasks/image-text-to-text.md @@ -38,13 +38,14 @@ Explore all available models and find the one that suits you best [here](https:/ ```bash curl https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct \ -X POST \ - -d '{"inputs": No input example has been defined for this model task.}' \ + -d '{"inputs": "Can you please let us know more details about your "}' \ -H 'Content-Type: application/json' \ -H "Authorization: Bearer hf_***" ``` +With huggingface_hub client: ```py import requests @@ -55,23 +56,44 @@ from huggingface_hub import InferenceClient client = InferenceClient(api_key="hf_***") -image_url = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" +messages = "\"Can you please let us know more details about your \"" -for message in client.chat_completion( - model="meta-llama/Llama-3.2-11B-Vision-Instruct", - messages=[ - { - "role": "user", - "content": [ - {"type": "image_url", "image_url": {"url": image_url}}, - {"type": "text", "text": "Describe this image in one sentence."}, - ], - } - ], +stream = client.chat.completions.create( + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + messages=messages, + max_tokens=500, + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") +``` + +With openai client: +```py +import requests + +API_URL = "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct" +headers = {"Authorization": "Bearer hf_***"} + +from openai import OpenAI + +client = OpenAI( + base_url="https://api-inference.huggingface.co/v1/", + api_key="hf_***" +) + +messages = "\"Can you please let us know more details about your \"" + +stream = client.chat.completions.create( + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + messages=messages, max_tokens=500, - stream=True, -): - print(message.choices[0].delta.content, end="") + stream=True +) + +for chunk in stream: + print(chunk.choices[0].delta.content, end="") ``` To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_text-to-text). @@ -95,7 +117,7 @@ async function query(data) { return result; } -query({"inputs": No input example has been defined for this model task.}).then((response) => { +query({"inputs": "Can you please let us know more details about your "}).then((response) => { console.log(JSON.stringify(response)); }); ``` diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts index 51997f008..c6e48a58e 100644 --- a/scripts/api-inference/scripts/generate.ts +++ b/scripts/api-inference/scripts/generate.ts @@ -99,10 +99,36 @@ const TASKS_DATA = (await response.json()) as any; //// Snippet utils //// /////////////////////// +const formatSnippets = (result: snippets.types.InferenceSnippet | snippets.types.InferenceSnippet[], defaultClient: string, language: string): string => { + // For single snippet, just wrap with code block + if (!Array.isArray(result) || result.length === 1) { + const snippet = Array.isArray(result) ? result[0] : result; + return `\`\`\`${language}\n${snippet.content}\n\`\`\``; + } + + // For multiple snippets, add description and wrap each one + return result + .map(snippet => { + const client = snippet.client || defaultClient; + return `Using \`${client}\`:\n\`\`\`${language}\n${snippet.content}\n\`\`\``; + }) + .join('\n\n'); +}; + + const GET_SNIPPET_FN = { - curl: snippets.curl.getCurlInferenceSnippet, - js: snippets.js.getJsInferenceSnippet, - python: snippets.python.getPythonInferenceSnippet, + curl: (modelData: any, token: string) => { + const result = snippets.curl.getCurlInferenceSnippet(modelData, token); + return formatSnippets(result, 'curl', 'bash'); + }, + js: (modelData: any, token: string) => { + const result = snippets.js.getJsInferenceSnippet(modelData, token); + return formatSnippets(result, 'javascript', 'js'); + }, + python: (modelData: any, token: string) => { + const result = snippets.python.getPythonInferenceSnippet(modelData, token); + return formatSnippets(result, 'python', 'py'); + }, } as const; const HAS_SNIPPET_FN = { @@ -115,14 +141,16 @@ export function getInferenceSnippet( id: string, pipeline_tag: PipelineType, language: InferenceSnippetLanguage, + config?: JsonObject, + tags?: string[], ): string | undefined { const modelData = { id, pipeline_tag, mask_token: "[MASK]", library_name: "", - config: {}, - tags: [], + config: config ?? {}, + tags: tags ?? [], }; // @ts-ignore if (HAS_SNIPPET_FN[language](modelData)) { @@ -472,25 +500,14 @@ function fetchChatCompletion() { ); const mainModel = DATA.models[task.name][0]; - const mainModelData = { - // @ts-ignore - id: mainModel.id, - pipeline_tag: task.pipelineTag, - mask_token: "", - library_name: "", - // @ts-ignore - tags: ["conversational"], - // @ts-ignore - config: mainModel.config, - }; const taskSnippets = { // @ts-ignore - curl: GET_SNIPPET_FN["curl"](mainModelData, "hf_***"), + curl: getInferenceSnippet(mainModel.id, task.pipelineTag, "curl", mainModel.config, ["conversational"]), // @ts-ignore - python: GET_SNIPPET_FN["python"](mainModelData, "hf_***"), + python: getInferenceSnippet(mainModel.id, task.pipelineTag, "python", mainModel.config, ["conversational"]), // @ts-ignore - javascript: GET_SNIPPET_FN["js"](mainModelData, "hf_***"), + javascript: getInferenceSnippet(mainModel.id, task.pipelineTag, "js", mainModel.config, ["conversational"]), }; DATA.snippets[task.name] = SNIPPETS_TEMPLATE({ taskSnippets, @@ -524,4 +541,4 @@ await Promise.all( }), ); -console.log("✅ All done!"); +console.log("✅ All done!"); \ No newline at end of file diff --git a/scripts/api-inference/templates/common/snippets-template.handlebars b/scripts/api-inference/templates/common/snippets-template.handlebars index 2d0f099e2..09202f6ba 100644 --- a/scripts/api-inference/templates/common/snippets-template.handlebars +++ b/scripts/api-inference/templates/common/snippets-template.handlebars @@ -5,18 +5,14 @@ {{!-- cURL snippet (if exists) --}} {{#if taskSnippets.curl}} -```bash {{{taskSnippets.curl}}} -``` {{/if}} {{!-- Python snippet (if exists) --}} {{#if taskSnippets.python}} -```py {{{taskSnippets.python}}} -``` To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.{{taskSnakeCase}}). @@ -25,9 +21,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu {{!-- JavaScript snippet (if exists) --}} {{#if taskSnippets.javascript}} -```js {{{taskSnippets.javascript}}} -``` To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#{{taskAttached}}).