From 0fc35ea45ca5e900c8d9e22b7ce163ff47340e5c Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 18 Sep 2024 12:41:25 +0200 Subject: [PATCH] Add `metadata` to every example under `examples` --- examples/cloud-run/tgi-deployment/README.md | 5 +++++ examples/gke/tei-deployment/README.md | 5 +++++ examples/gke/tei-from-gcs-deployment/README.md | 5 +++++ examples/gke/tgi-deployment/README.md | 5 +++++ examples/gke/tgi-from-gcs-deployment/README.md | 5 +++++ examples/gke/trl-full-fine-tuning/README.md | 5 +++++ examples/gke/trl-lora-fine-tuning/README.md | 5 +++++ .../deploy-bert-on-vertex-ai/vertex-notebook.ipynb | 7 +++++++ .../vertex-notebook.ipynb | 10 ++++++++++ .../deploy-flux-on-vertex-ai/vertex-notebook.ipynb | 10 ++++++++++ .../vertex-notebook.ipynb | 10 ++++++++++ .../deploy-gemma-on-vertex-ai/vertex-notebook.ipynb | 10 ++++++++++ .../vertex-notebook.ipynb | 11 +++++++++++ .../vertex-notebook.ipynb | 10 ++++++++++ .../vertex-notebook.ipynb | 10 ++++++++++ 15 files changed, 113 insertions(+) diff --git a/examples/cloud-run/tgi-deployment/README.md b/examples/cloud-run/tgi-deployment/README.md index 633193d3..3a6c3c10 100644 --- a/examples/cloud-run/tgi-deployment/README.md +++ b/examples/cloud-run/tgi-deployment/README.md @@ -1,3 +1,8 @@ +--- +title: Deploy Meta Llama 3.1 8B with Text Generation Inference on Cloud Run +type: inference +--- + # Deploy Meta Llama 3.1 8B with Text Generation Inference on Cloud Run Meta Llama 3.1 is the latest open LLM from Meta, released in July 2024. Meta Llama 3.1 comes in three sizes: 8B for efficient deployment and development on consumer-size GPU, 70B for large-scale AI native applications, and 405B for synthetic data, LLM as a Judge or distillation; among other use cases. Text Generation Inference (TGI) is a toolkit developed by Hugging Face for deploying and serving LLMs, with high performance text generation. Google Cloud Run is a serverless container platform that allows developers to deploy and manage containerized applications without managing infrastructure, enabling automatic scaling and billing only for usage. This example showcases how to deploy an LLM from the Hugging Face Hub, in this case Meta Llama 3.1 8B Instruct model quantized to INT4 using AWQ, with the Hugging Face DLC for TGI on Google Cloud Run with GPU support (on preview). diff --git a/examples/gke/tei-deployment/README.md b/examples/gke/tei-deployment/README.md index 162e4a37..6fc84461 100644 --- a/examples/gke/tei-deployment/README.md +++ b/examples/gke/tei-deployment/README.md @@ -1,3 +1,8 @@ +--- +title: Deploy Snowflake's Arctic Embed (M) with Text Embeddings Inference (TEI) on GKE +type: inference +--- + # Deploy Snowflake's Arctic Embed (M) with Text Embeddings Inference (TEI) on GKE Snowflake's Arctic Embed is a suite of text embedding models that focuses on creating high-quality retrieval models optimized for performance, achieving state-of-the-art (SOTA) performance on the MTEB/BEIR leaderboard for each of their size variants. Text Embeddings Inference (TEI) is a toolkit developed by Hugging Face for deploying and serving open source text embeddings and sequence classification models; enabling high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5. And, Google Kubernetes Engine (GKE) is a fully-managed Kubernetes service in Google Cloud that can be used to deploy and operate containerized applications at scale using GCP's infrastructure. diff --git a/examples/gke/tei-from-gcs-deployment/README.md b/examples/gke/tei-from-gcs-deployment/README.md index 01baf00f..d07f0b98 100644 --- a/examples/gke/tei-from-gcs-deployment/README.md +++ b/examples/gke/tei-from-gcs-deployment/README.md @@ -1,3 +1,8 @@ +--- +title: Deploy BGE Base v1.5 (English) with Text Embeddings Inference (TEI) from a GCS Bucket on GKE +type: inference +--- + # Deploy BGE Base v1.5 (English) with Text Embeddings Inference (TEI) from a GCS Bucket on GKE BGE, standing for BAAI General Embedding, is a collection of embedding models released by BAAI, which is an English base model for general embedding tasks ranked in the MTEB Leaderboard. Text Embeddings Inference (TEI) is a toolkit developed by Hugging Face for deploying and serving open source text embeddings and sequence classification models; enabling high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5. And, Google Kubernetes Engine (GKE) is a fully-managed Kubernetes service in Google Cloud that can be used to deploy and operate containerized applications at scale using GCP's infrastructure. diff --git a/examples/gke/tgi-deployment/README.md b/examples/gke/tgi-deployment/README.md index 81a92a12..9f607491 100644 --- a/examples/gke/tgi-deployment/README.md +++ b/examples/gke/tgi-deployment/README.md @@ -1,3 +1,8 @@ +--- +title: Deploy Meta Llama 3 8B with Text Generation Inference (TGI) on GKE +type: inference +--- + # Deploy Meta Llama 3 8B with Text Generation Inference (TGI) on GKE Meta Llama 3 is the latest LLM from the Llama family, released by Meta; coming in two sizes 8B and 70B, including both the base model and the instruction-tuned model. Text Generation Inference (TGI) is a toolkit developed by Hugging Face for deploying and serving LLMs, with high performance text generation. And, Google Kubernetes Engine (GKE) is a fully-managed Kubernetes service in Google Cloud that can be used to deploy and operate containerized applications at scale using GCP's infrastructure. This post explains how to deploy an LLM from the Hugging Face Hub, as Llama3 8B Instruct, on a GKE Cluster running a purpose-built container to deploy LLMs in a secure and managed environment with the Hugging Face DLC for TGI. diff --git a/examples/gke/tgi-from-gcs-deployment/README.md b/examples/gke/tgi-from-gcs-deployment/README.md index 2e2d97fe..ffdc2268 100644 --- a/examples/gke/tgi-from-gcs-deployment/README.md +++ b/examples/gke/tgi-from-gcs-deployment/README.md @@ -1,3 +1,8 @@ +--- +title: Deploy Qwen2 7B Instruct with Text Generation Inference (TGI) from a GCS Bucket on GKE +type: inference +--- + # Deploy Qwen2 7B Instruct with Text Generation Inference (TGI) from a GCS Bucket on GKE Qwen2 is the new series of Qwen Large Language Models (LLMs) built by Alibaba Cloud, with both base and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model; the 7B variant sitting in the second place in the 7B size range in the Open LLM Leaderboard by Hugging Face and the 72B one in the first place amongst any size. Text Generation Inference (TGI) is a toolkit developed by Hugging Face for deploying and serving LLMs, with high performance text generation. And, Google Kubernetes Engine (GKE) is a fully-managed Kubernetes service in Google Cloud that can be used to deploy and operate containerized applications at scale using GCP's infrastructure. This post explains how to deploy an LLM from a Google Cloud Storage (GCS) Bucket on a GKE Cluster running a purpose-built container to deploy LLMs in a secure and managed environment with the Hugging Face DLC for TGI. diff --git a/examples/gke/trl-full-fine-tuning/README.md b/examples/gke/trl-full-fine-tuning/README.md index 98b3522c..78eac490 100644 --- a/examples/gke/trl-full-fine-tuning/README.md +++ b/examples/gke/trl-full-fine-tuning/README.md @@ -1,3 +1,8 @@ +--- +title: Fine-tune Gemma 2B with TRL on GKE +type: training +--- + # Fine-tune Gemma 2B with TRL on GKE Gemma is a family of lightweight, state-of-the-art open models built from the same research and technology used to create the Gemini models, developed by Google DeepMind and other teams across Google. TRL is a full stack library to fine-tune and align Large Language Models (LLMs) developed by Hugging Face. And, Google Kubernetes Engine (GKE) is a fully-managed Kubernetes service in Google Cloud that can be used to deploy and operate containerized applications at scale using GCP's infrastructure. This post explains how to full fine-tune Gemma 2B with TRL via Supervised Fine-Tuning (SFT) in a multi-GPU setting on a GKE Cluster. diff --git a/examples/gke/trl-lora-fine-tuning/README.md b/examples/gke/trl-lora-fine-tuning/README.md index 6b84570c..64dd67b0 100644 --- a/examples/gke/trl-lora-fine-tuning/README.md +++ b/examples/gke/trl-lora-fine-tuning/README.md @@ -1,3 +1,8 @@ +--- +title: Fine-tune Mistral 7B v0.3 with TRL on GKE +type: training +--- + # Fine-tune Mistral 7B v0.3 with TRL on GKE Mistral is a family of models with varying sizes, created by the Mistral AI team; the Mistral 7B v0.3 LLM is a Mistral 7B v0.2 with extended vocabulary. TRL is a full stack library to fine-tune and align Large Language Models (LLMs) developed by Hugging Face. And, Google Kubernetes Engine (GKE) is a fully-managed Kubernetes service in Google Cloud that can be used to deploy and operate containerized applications at scale using GCP's infrastructure. This post explains how to fine-tune Mistral 7B v0.3 with TRL via Supervised Fine-Tuning (SFT) and Low-Rank Adaptation (LoRA) in a single GPU on a GKE Cluster. diff --git a/examples/vertex-ai/notebooks/deploy-bert-on-vertex-ai/vertex-notebook.ipynb b/examples/vertex-ai/notebooks/deploy-bert-on-vertex-ai/vertex-notebook.ipynb index 09a1fd5c..e18ff6dc 100644 --- a/examples/vertex-ai/notebooks/deploy-bert-on-vertex-ai/vertex-notebook.ipynb +++ b/examples/vertex-ai/notebooks/deploy-bert-on-vertex-ai/vertex-notebook.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/examples/vertex-ai/notebooks/deploy-embedding-on-vertex-ai/vertex-notebook.ipynb b/examples/vertex-ai/notebooks/deploy-embedding-on-vertex-ai/vertex-notebook.ipynb index 8c829009..398c31e0 100644 --- a/examples/vertex-ai/notebooks/deploy-embedding-on-vertex-ai/vertex-notebook.ipynb +++ b/examples/vertex-ai/notebooks/deploy-embedding-on-vertex-ai/vertex-notebook.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/examples/vertex-ai/notebooks/deploy-flux-on-vertex-ai/vertex-notebook.ipynb b/examples/vertex-ai/notebooks/deploy-flux-on-vertex-ai/vertex-notebook.ipynb index 4340b179..bd88d47f 100644 --- a/examples/vertex-ai/notebooks/deploy-flux-on-vertex-ai/vertex-notebook.ipynb +++ b/examples/vertex-ai/notebooks/deploy-flux-on-vertex-ai/vertex-notebook.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/examples/vertex-ai/notebooks/deploy-gemma-from-gcs-on-vertex-ai/vertex-notebook.ipynb b/examples/vertex-ai/notebooks/deploy-gemma-from-gcs-on-vertex-ai/vertex-notebook.ipynb index c2f98ba3..c9cd4baf 100644 --- a/examples/vertex-ai/notebooks/deploy-gemma-from-gcs-on-vertex-ai/vertex-notebook.ipynb +++ b/examples/vertex-ai/notebooks/deploy-gemma-from-gcs-on-vertex-ai/vertex-notebook.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/examples/vertex-ai/notebooks/deploy-gemma-on-vertex-ai/vertex-notebook.ipynb b/examples/vertex-ai/notebooks/deploy-gemma-on-vertex-ai/vertex-notebook.ipynb index 184380f3..07d182be 100644 --- a/examples/vertex-ai/notebooks/deploy-gemma-on-vertex-ai/vertex-notebook.ipynb +++ b/examples/vertex-ai/notebooks/deploy-gemma-on-vertex-ai/vertex-notebook.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai/vertex-notebook.ipynb b/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai/vertex-notebook.ipynb index 9c8f267e..792af645 100644 --- a/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai/vertex-notebook.ipynb +++ b/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai/vertex-notebook.ipynb @@ -1,5 +1,16 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "d7c432cf-dc16-4bd8-89bd-7c1c0eb58d37", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "id": "e4e7faed-c34a-4f01-84ec-eefbfb65506d", diff --git a/examples/vertex-ai/notebooks/trl-full-sft-fine-tuning-on-vertex-ai/vertex-notebook.ipynb b/examples/vertex-ai/notebooks/trl-full-sft-fine-tuning-on-vertex-ai/vertex-notebook.ipynb index d46087ed..4da6dc97 100644 --- a/examples/vertex-ai/notebooks/trl-full-sft-fine-tuning-on-vertex-ai/vertex-notebook.ipynb +++ b/examples/vertex-ai/notebooks/trl-full-sft-fine-tuning-on-vertex-ai/vertex-notebook.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/examples/vertex-ai/notebooks/trl-lora-sft-fine-tuning-on-vertex-ai/vertex-notebook.ipynb b/examples/vertex-ai/notebooks/trl-lora-sft-fine-tuning-on-vertex-ai/vertex-notebook.ipynb index 4c951131..02ea7902 100644 --- a/examples/vertex-ai/notebooks/trl-lora-sft-fine-tuning-on-vertex-ai/vertex-notebook.ipynb +++ b/examples/vertex-ai/notebooks/trl-lora-sft-fine-tuning-on-vertex-ai/vertex-notebook.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "markdown", "metadata": {},