diff --git a/docs/source/templates/gallery_generative_ai.ejs b/docs/source/templates/gallery_generative_ai.ejs index f842f6aa7434..08987a677423 100644 --- a/docs/source/templates/gallery_generative_ai.ejs +++ b/docs/source/templates/gallery_generative_ai.ejs @@ -1,5 +1,5 @@ --- -title: Generative AI +title: LLM Fine-tuning type: templates order: 1007 meta_title: Gallery of Generative AI Labeling Templates @@ -22,25 +22,10 @@ cards: image: "/images/templates/generative-pairwise-human-preference.png" url: "/templates/generative-pairwise-human-preference.html" -- title: Chatbot Model Assessment - categories: - - LLM Evaluation - - Chatbot - - Text Generation - image: "/images/templates/generative-chatbot-assessment.png" - url: "/templates/generative-chatbot-assessment.html" - -- title: LLM Ranker +- title: RAG Retrieval categories: - Ranking - Retrieval-Augmented Generation image: "/images/templates/llm-ranker.png" url: "/templates/generative-llm-ranker.html" - -- title: Visual Ranker - categories: - - Ranking - - Retrieval-Augmented Generation - image: "/images/templates/visual-ranker.png" - url: "/templates/generative-visual-ranker.html" --- \ No newline at end of file diff --git a/docs/source/templates/gallery_llm_evals.ejs b/docs/source/templates/gallery_llm_evals.ejs new file mode 100644 index 000000000000..4ff8ac592bd8 --- /dev/null +++ b/docs/source/templates/gallery_llm_evals.ejs @@ -0,0 +1,44 @@ +--- +title: LLM Evaluations +type: templates +order: 1008 +meta_title: Gallery of Labeling Templates for LLM Evaluation +meta_description: Gallery of templates available to perform data labeling and annotation tasks with Label Studio for your machine learning model and data science projects. +layout: templates +cards: +- title: LLM Response Moderation + categories: + - LLM evaluation + - Text classification + image: "/images/templates/response-moderation.png" + url: "/templates/llm_response_moderation.html" + +- title: LLM Response Grading + categories: + - LLM evaluation + - Text Classification + image: "/images/templates/response-grading.png" + url: "/templates/llm_response_grading.html" + +- title: Side-by-Side LLM Output Comparison + categories: + - Ranking + - Retrieval-Augmented Generation + image: "/images/templates/side-by-side-comparison.png" + url: "/templates/llm_side_by_side.html" + +- title: Evaluate RAG with Human Feedback + categories: + - Response Evaluation + - Retrieval-Augmented Generation + image: "/images/templates/evaluate-rag-human-feedback.png" + url: "/templates/llm_rag_human_feedback.html" + +- title: Evaluate RAG with Ragas + categories: + - Response Evaluation + - Ragas + - Retrieval-Augmented Generation + image: "/images/templates/evaluate-rag-automated-metrics.png" + url: "/templates/llm_ragas.html" +--- \ No newline at end of file diff --git a/docs/source/templates/gallery_rns.html b/docs/source/templates/gallery_rns.html index 00d2de3dfbcb..5018130e3e9c 100644 --- a/docs/source/templates/gallery_rns.html +++ b/docs/source/templates/gallery_rns.html @@ -41,4 +41,11 @@ image: "/images/templates-misc/website-rating.png" url: "/templates/website_rating.html" +- title: Visual Ranker + categories: + - Ranking + - Retrieval-Augmented Generation + image: "/images/templates/visual-ranker.png" + url: "/templates/generative-visual-ranker.html" + --- diff --git a/docs/source/templates/gallery_videos.ejs b/docs/source/templates/gallery_videos.ejs index d074934e2474..36761a389a77 100644 --- a/docs/source/templates/gallery_videos.ejs +++ b/docs/source/templates/gallery_videos.ejs @@ -1,7 +1,7 @@ --- title: Videos type: templates -order: 1007 +order: 1009 meta_title: Gallery of Video Labeling Templates meta_description: Gallery of templates available to perform data labeling and annotation tasks with Label Studio for your machine learning model and data science projects. layout: templates diff --git a/docs/source/templates/generative-chatbot-assessment.md b/docs/source/templates/generative-chatbot-assessment.md deleted file mode 100644 index 65ffc2f22996..000000000000 --- a/docs/source/templates/generative-chatbot-assessment.md +++ /dev/null @@ -1,263 +0,0 @@ ---- -title: Chatbot Assessment -type: templates -category: Generative AI -cat: generative-ai -order: 905 -is_new: t -meta_title: Create Dataset for Collection of Human Preference for RLHF -meta_description: Template for creating dataset for collection of human preference for RLHF with Label Studio for your machine learning and data science projects. ---- - -## Overview - -Looking to get started fine-tuning your own chatbot based off of your company's data? This template provides you with a workflow to get started! With this data labeling template you can collect human preference data with ease to better assess the quality of chatbot responses. This is helpful when adding context-specific details to a chatbot. - -When evaluating the quality of chatbot responses, there are a few different errors that you should tackle to ensure AI safety but also integrity of the data as well. - -Areas to look out for include: - -- hallucinations -- misinformation -- offensive language -- biased response -- personal and sensitive information disclosure -- etc. - -The template is based on the paper [Training language models to follow instructions -with human feedback](https://arxiv.org/pdf/2203.02155.pdf), which proposes a set of human evaluation metrics for the LLMs responses. - -Curious to find a Large Language Model (LLM) to fine tune? Check out [our guide on the Label Studio blog](https://labelstud.io/blog/five-large-language-models-you-can-fine-tune-today/). - -## How to Collect the Dataset - -The input for this template is a list of dialogues between `"user"` and `"assistant"`, packed in `"messages"` - -For example: - -```json -[{ - "messages": [ - { - "role": "user", - "content": "What's your opinion on pineapple pizza?" - }, - { - "role": "assistant", - "content": "As an AI, I don't have personal opinions." - }, - { - "role": "user", - "content": "But do people generally like it?" - } - ] -}, ...] -``` - -Collect dataset examples and store them in `dataset.json` file. - -## How to Configure the Labeling Interface - -The `Chatbot Model Assessment` template includes the following labeling interface in XML format: - -```xml - - -
InstructGPT Assessment
- - - - - -
- - - -
- - - - - - - -
- - - - - - - -
- - - - - - - -
- - - - - - - -
- - - - - - - -
- - - - - - - -
- - - - - - - - -
- - - - - - - - -
- - - - - - - - -
- - - - - - - - -
- - - - - - - - - - - -``` - -In this configuration, there are few blocks each of which represents binary choice question. Feel free add more blocks or remove some of them as your needs require. - -## Starting your labeling project - -*Need a hand getting started with Label Studio? Check out our [Zero to One Tutorial](https://labelstud.io/blog/zero-to-one-getting-started-with-label-studio/).* - -1. Create new project in Label Studio -2. Go to `Settings > Labeling Interface > Browse Templates > Generative AI > Chatbot Model Assessment` -3. Save the project - -Alternatively, you can create a new project by using our Python SDK: - -```python -import label_studio_sdk - -ls = label_studio_sdk.Client('YOUR_LABEL_STUDIO_URL', 'YOUR_API_KEY') -project = ls.create_project(title='Chatbot Model Assessment', label_config='...') -``` - -## Import the dataset - -To import your dataset, in the project settings go to `Import` and upload the dataset file `dataset.json`. - -Using the Python SDK, import the dataset with input prompts into Label Studio using the `PROJECT_ID` of the project you've just created. - -Run the following code: - -```python -from label_studio_sdk import Client - -ls = Client(url='', api_key='') - -project = ls.get_project(id=PROJECT_ID) -project.import_tasks('dataset.json') -``` - -This will allow you to start annotating the dataset by assessing the quality of the generated responses in dialogues. - -## Export the dataset - -Labeling results can be exported in JSON format. To export the dataset, go to `Export` in the project settings and download the file. - -Using the Python SDK, export the dataset with annotations from Label Studio through running the following: - -```python -annotations = project.export_tasks(format='JSON') -``` - -The exported JSON file will look like this: - -```json -[ - { - "id": 1, - "data": { - "messages": [...] - }, - "annotations": [ - { - "id": 1, - "created_at": "2021-03-03T14:00:00.000000Z", - "result": [ - { - "from_name": "likert_scale", - "to_name": "dialogue", - "type": "rating", - "value": { - "rating": 5 - } - }, - { - "from_name": "fails_to_follow", - "to_name": "dialogue", - "type": "choices", - "value": { - "choices": ["No"] - } - } - // other fields - ], - -``` - -## Related tags - -- [Paragraphs](/tags/paragraphs.html) -- [Choices](/tags/choices.html) -- [Rating](/tags/rating.html) diff --git a/docs/source/templates/generative-llm-ranker.md b/docs/source/templates/generative-llm-ranker.md index 7d824fe4cc67..15de8a31a39c 100644 --- a/docs/source/templates/generative-llm-ranker.md +++ b/docs/source/templates/generative-llm-ranker.md @@ -1,17 +1,17 @@ --- -title: LLM Ranker +title: RAG Retrieval type: templates -category: Generative AI -cat: generative-ai +category: LLM Fine-tuning +cat: llm-fine-tuning order: 906 is_new: t -meta_title: Create a Ranked Dataset for LLMs with Label Studio -meta_description: Create a ranked dataset for LLMs with Label Studio for your machine learning and data science projects. +meta_title: Create a ranked dataset for building a RAG system for LLMs with Label Studio +meta_description: Create a ranked dataset for building a RAG system for LLMs with Label Studio for your machine learning and data science projects. --- -## Overview + -This template provides you with a worklow to rank the quality of a large language model (LLM) responses. +This template provides you with a workflow to rank the quality of a large language model (LLM) responses. Using this template will give you the ability to compare the quality of the responses from different LLMs,and rank the dynamic set of items with a handy drag-and-drop interface. @@ -49,47 +49,70 @@ The `LLM Ranker` template includes the following labeling interface in XML forma ```xml - - -
+ + + +
+ + + + + + + - - - - -
+ + + + + + + - - - - - - - - - - + + + + + + + + + + ``` The configuration includes the following elements: @@ -103,10 +126,11 @@ Items can be styled in Style tag by using `.htx-ranker-item` class. ## Starting your labeling project -*Need a hand getting started with Label Studio? Check out our [Zero to One Tutorial](https://labelstud.io/blog/zero-to-one-getting-started-with-label-studio/).* +!!! info Tip + Need a hand getting started with Label Studio? Check out our [Zero to One Tutorial](https://labelstud.io/blog/zero-to-one-getting-started-with-label-studio/). 1. Create new project in Label Studio -2. Go to `Settings > Labeling Interface > Browse Templates > Generative AI > LLM Ranker` +2. Go to **Settings > Labeling Interface > Browse Templates > Generative AI > LLM Ranker**. 3. Save the project Alternatively, you can create project by using our Python SDK: diff --git a/docs/source/templates/generative-pairwise-human-preference.md b/docs/source/templates/generative-pairwise-human-preference.md index 322d596cf492..eda8717b9bdd 100644 --- a/docs/source/templates/generative-pairwise-human-preference.md +++ b/docs/source/templates/generative-pairwise-human-preference.md @@ -1,14 +1,15 @@ --- title: Human Preferences collection for RLHF type: templates -category: Generative AI -cat: generative-ai +category: LLM Fine-tuning +cat: llm-fine-tuning order: 904 is_new: t meta_title: Create Dataset for Human Preferences Collection for RLHF meta_description: Template for creating dataset for human preferences collection for RLHF with Label Studio for your machine learning and data science projects. --- -## Overview + + This project will help you to get up your LLM to the ChatGPT quality level through collecting comparison data to establish human preferences for the responses generated by the supervised model. diff --git a/docs/source/templates/generative-supervised-llm.md b/docs/source/templates/generative-supervised-llm.md index 38e0bf8384d8..2647927a5d7b 100644 --- a/docs/source/templates/generative-supervised-llm.md +++ b/docs/source/templates/generative-supervised-llm.md @@ -1,15 +1,15 @@ --- title: Supervised LLM Fine-Tuning type: templates -category: Generative AI -cat: generative-ai +category: LLM Fine-tuning +cat: llm-fine-tuning order: 903 is_new: t meta_title: Create dataset for supervised LLM fine-tuning meta_description: Template for creating dataset for supervised LLM fine-tuning with Label Studio for your machine learning and data science projects. --- -## Overview + This template is designed for you to get started with the process of supervised LLM fine-tuning. @@ -76,7 +76,7 @@ Each JSON item will be rendered as a separate task in Label Studio to complete t *Need a hand getting started with Label Studio? Check out our [Zero to One Tutorial](https://labelstud.io/blog/zero-to-one-getting-started-with-label-studio/).* 1. Create new project in Label Studio -2. Go to `Settings > Labeling Interface > Browse Templates > Generative AI > Supervised LLM Fine-tuning` +2. Go to **Settings > Labeling Interface > Browse Templates > Generative AI > Supervised LLM Fine-tuning** 3. Save Alternatively, you can create a new project by using our Python SDK: diff --git a/docs/source/templates/generative-visual-ranker.md b/docs/source/templates/generative-visual-ranker.md index 5b7cc190b7f4..b0f39d0e971e 100644 --- a/docs/source/templates/generative-visual-ranker.md +++ b/docs/source/templates/generative-visual-ranker.md @@ -1,17 +1,17 @@ --- title: Visual Ranker type: templates -category: Generative AI -cat: generative-ai -order: 907 +category: Ranking and Scoring +cat: ranking-and-scoring +order: 560 is_new: t meta_title: Create a ranked dataset for text-to-image models with Label Studio meta_description: Template for creating a ranked dataset for text-to-image models with Label Studio for your machine learning and data science projects. --- -## Overview + -The template provides the worklow to rank the quality of the text-to-image models responses, like Dall-E, Midjourney, Stable Diffusion etc. +The template provides the workflow to rank the quality of the text-to-image models responses, like Dall-E, Midjourney, Stable Diffusion etc. Using this template gives the ability to compare the quality of the responses from different generative AI models, and rank the dynamic set of items with handy drag-and-drop interface. diff --git a/docs/source/templates/index.ejs b/docs/source/templates/index.ejs index 2890141e447d..be0395d49fd6 100644 --- a/docs/source/templates/index.ejs +++ b/docs/source/templates/index.ejs @@ -45,8 +45,12 @@ cards: categories: video classification, timeline segmentation image: "/images/templates/video-timeline-segmentation.png" url: "/templates/gallery_videos.html" -- title: Generative AI +- title: LLM Fine-tuning categories: generative ai, llm image: "/images/templates/generative-pairwise-human-preference.png" url: "/templates/gallery_generative_ai.html" +- title: LLM Evaluations + categories: generative ai, llm + image: "/images/templates/generative-pairwise-human-preference.png" + url: "/templates/gallery_llm_evals.html" --- \ No newline at end of file diff --git a/docs/source/templates/llm_rag_human_feedback.md b/docs/source/templates/llm_rag_human_feedback.md new file mode 100644 index 000000000000..db4fdd74d2c9 --- /dev/null +++ b/docs/source/templates/llm_rag_human_feedback.md @@ -0,0 +1,218 @@ +--- +title: Evaluate RAG with Human Feedback +type: templates +category: LLM Evaluations +cat: llm-evaluations +order: 965 +is_new: t +meta_description: Evaluate the contextual relevancy of retrieved documents and rate the LLM response. +date: 2024-07-26 14:49:29 +--- + + + +When dealing with RAG (Retrieval-Augmented Generation) pipeline, your goal is not only evaluating a single LLM response, but also incorporating various assessments of the retrieved documents like contextual and answer relevancy and faithfulness. + +In this example, you will create a labeling interface that aims to evaluate: + +- Contextual relevancy of the retrieved documents +- Answer relevancy +- Answer faithfulness + +For a tutorial on how to use this template with the Label Studio SDK, see [Evaluate LLM Responses](https://api.labelstud.io/tutorials/tutorials/evaluate-llm-responses). + +## Configure the labeling interface + +[Create a project](/guide/setup_project) with the following labeling configuration: + +```xml + + + + +
+ + + + +
+ + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + +``` + +This configuration includes the following elements: + +* `` - All labeling configurations must include a base `View` tag. In this configuration, the `View` tag is used to configure the display of blocks, similar to the div tag in HTML. It helps in organizing the layout of the labeling interface. +* ` +
+ + + +
+ +