diff --git a/.github/workflows/doc-build.yml b/.github/workflows/doc-build.yml index f864e5c5..8f75f38a 100644 --- a/.github/workflows/doc-build.yml +++ b/.github/workflows/doc-build.yml @@ -6,7 +6,10 @@ on: - main - doc-builder* paths: - - docs/source/** + - docs/** + - examples/**/*.md + - examples/**/*.ipynb + - Makefile - .github/workflows/doc-build.yml jobs: diff --git a/.github/workflows/doc-pr-build.yml b/.github/workflows/doc-pr-build.yml index 98500342..95341b57 100644 --- a/.github/workflows/doc-pr-build.yml +++ b/.github/workflows/doc-pr-build.yml @@ -3,7 +3,10 @@ name: Build PR Documentation on: pull_request: paths: - - docs/source/** + - docs/** + - examples/**/*.md + - examples/**/*.ipynb + - Makefile - .github/workflows/doc-pr-build.yml concurrency: @@ -20,3 +23,5 @@ jobs: package_name: google-cloud additional_args: --not_python_module pre_command: cd Google-Cloud-Containers && make docs + env: + GITHUB_BRANCH: ${{ github.head_ref || github.ref_name }} diff --git a/README.md b/README.md index c637eb93..ce6e9b40 100644 --- a/README.md +++ b/README.md @@ -44,27 +44,28 @@ The [`examples`](./examples) directory contains examples for using the container | Service | Example | Title | | --------- | ------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------- | +| Vertex AI | [examples/vertex-ai/notebooks/trl-lora-sft-fine-tuning-on-vertex-ai](./examples/vertex-ai/notebooks/trl-lora-sft-fine-tuning-on-vertex-ai) | Fine-tune Gemma 2B with PyTorch Training DLC using SFT + LoRA on Vertex AI | +| Vertex AI | [examples/vertex-ai/notebooks/trl-full-sft-fine-tuning-on-vertex-ai](./examples/vertex-ai/notebooks/trl-full-sft-fine-tuning-on-vertex-ai) | Fine-tune Mistral 7B v0.3 with PyTorch Training DLC using SFT on Vertex AI | | GKE | [examples/gke/trl-full-fine-tuning](./examples/gke/trl-full-fine-tuning) | Fine-tune Gemma 2B with PyTorch Training DLC using SFT on GKE | | GKE | [examples/gke/trl-lora-fine-tuning](./examples/gke/trl-lora-fine-tuning) | Fine-tune Mistral 7B v0.3 with PyTorch Training DLC using SFT + LoRA on GKE | -| Vertex AI | [examples/vertex-ai/notebooks/trl-full-sft-fine-tuning-on-vertex-ai](./examples/vertex-ai/notebooks/trl-full-sft-fine-tuning-on-vertex-ai) | Fine-tune Mistral 7B v0.3 with PyTorch Training DLC using SFT on Vertex AI | -| Vertex AI | [examples/vertex-ai/notebooks/trl-lora-sft-fine-tuning-on-vertex-ai](./examples/vertex-ai/notebooks/trl-lora-sft-fine-tuning-on-vertex-ai) | Fine-tune Gemma 2B with PyTorch Training DLC using SFT + LoRA on Vertex AI | ### Inference Examples -| Service | Example | Title | -| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------- | -| GKE | [examples/gke/tgi-deployment](./examples/gke/tgi-deployment) | Deploy Meta Llama 3 8B with TGI DLC on GKE | -| GKE | [examples/gke/tgi-from-gcs-deployment](./examples/gke/tgi-from-gcs-deployment) | Deploy Qwen2 7B with TGI DLC from GCS on GKE | -| GKE | [examples/gke/tgi-llama-405b-deployment](./examples/gke/tgi-llama-405b-deployment) | Deploy Llama 3.1 405B with TGI DLC on GKE | -| GKE | [examples/gke/tei-deployment](./examples/gke/tei-deployment) | Deploy Snowflake's Arctic Embed with TEI DLC on GKE | -| GKE | [examples/gke/tei-from-gcs-deployment](./examples/gke/tei-from-gcs-deployment) | Deploy BGE Base v1.5 with TEI DLC from GCS on GKE | -| Vertex AI | [examples/vertex-ai/notebooks/deploy-bert-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-bert-on-vertex-ai) | Deploy BERT Models with PyTorch Inference DLC on Vertex AI | -| Vertex AI | [examples/vertex-ai/notebooks/deploy-embedding-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-embedding-on-vertex-ai) | Deploy Embedding Models with TEI DLC on Vertex AI | -| Vertex AI | [examples/vertex-ai/notebooks/deploy-gemma-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-gemma-on-vertex-ai) | Deploy Gemma 7B with TGI DLC on Vertex AI | -| Vertex AI | [examples/vertex-ai/notebooks/deploy-gemma-from-gcs-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-gemma-from-gcs-on-vertex-ai) | Deploy Gemma 7B with TGI DLC from GCS on Vertex AI | -| Vertex AI | [examples/vertex-ai/notebooks/deploy-flux-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-flux-on-vertex-ai) | Deploy FLUX with PyTorch Inference DLC on Vertex AI | -| Vertex AI | [examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-llama-405b-on-vertex-ai/vertex-notebook.ipynb) | Deploy Meta Llama 3.1 405B with TGI DLC on Vertex AI | -| Cloud Run | [examples/cloud-run/tgi-deployment](./examples/cloud-run/tgi-deployment/README.md) | Deploy Meta Llama 3.1 with TGI DLC on Cloud Run | +| Service | Example | Title | +| --------- | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------- | +| Vertex AI | [examples/vertex-ai/notebooks/deploy-bert-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-bert-on-vertex-ai) | Deploy BERT Models with PyTorch Inference DLC on Vertex AI | +| Vertex AI | [examples/vertex-ai/notebooks/deploy-embedding-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-embedding-on-vertex-ai) | Deploy Embedding Models with TEI DLC on Vertex AI | +| Vertex AI | [examples/vertex-ai/notebooks/deploy-flux-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-flux-on-vertex-ai) | Deploy FLUX with PyTorch Inference DLC on Vertex AI | +| Vertex AI | [examples/vertex-ai/notebooks/deploy-gemma-from-gcs-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-gemma-from-gcs-on-vertex-ai) | Deploy Gemma 7B with TGI DLC from GCS on Vertex AI | +| Vertex AI | [examples/vertex-ai/notebooks/deploy-gemma-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-gemma-on-vertex-ai) | Deploy Gemma 7B with TGI DLC on Vertex AI | +| Vertex AI | [examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai](./examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai) | Deploy Meta Llama 3.1 405B with TGI DLC on Vertex AI | +| GKE | [examples/gke/tei-from-gcs-deployment](./examples/gke/tei-from-gcs-deployment) | Deploy BGE Base v1.5 with TEI DLC from GCS on GKE | +| GKE | [examples/gke/tgi-multi-lora-deployment](./examples/gke/tgi-multi-lora-deployment) | Deploy Gemma2 with multiple LoRA adapters with TGI DLC on GKE | +| GKE | [examples/gke/tgi-llama-405b-deployment](./examples/gke/tgi-llama-405b-deployment) | Deploy Llama 3.1 405B with TGI DLC on GKE | +| GKE | [examples/gke/tgi-deployment](./examples/gke/tgi-deployment) | Deploy Meta Llama 3 8B with TGI DLC on GKE | +| GKE | [examples/gke/tgi-from-gcs-deployment](./examples/gke/tgi-from-gcs-deployment) | Deploy Qwen2 7B with TGI DLC from GCS on GKE | +| GKE | [examples/gke/tei-deployment](./examples/gke/tei-deployment) | Deploy Snowflake's Arctic Embed with TEI DLC on GKE | +| Cloud Run | [examples/cloud-run/tgi-deployment](./examples/cloud-run/tgi-deployment) | Deploy Meta Llama 3.1 8B with TGI DLC on Cloud Run | ### Evaluation diff --git a/docs/scripts/auto-generate-examples.py b/docs/scripts/auto-generate-examples.py index 8490b294..9e01c307 100644 --- a/docs/scripts/auto-generate-examples.py +++ b/docs/scripts/auto-generate-examples.py @@ -1,6 +1,8 @@ import os import re +GITHUB_BRANCH = os.getenv("GITHUB_BRANCH", "main") + def process_readme_files(): print("Processing README.md files from examples/gke and examples/cloud-run...") @@ -35,37 +37,32 @@ def process_file(root, file, dir): # Replace image and link paths content = re.sub( r"\(\./(imgs|assets)/([^)]*\.png)\)", - r"(https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/main/" + rf"(https://raw.githubusercontent.com/huggingface/Google-Cloud-Containers/{GITHUB_BRANCH}/" + root + r"/\1/\2)", content, ) content = re.sub( r"\(\.\./([^)]+)\)", - r"(https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/" + rf"(https://github.com/huggingface/Google-Cloud-Containers/tree/{GITHUB_BRANCH}/examples/" + dir + r"/\1)", content, ) content = re.sub( r"\(\.\/([^)]+)\)", - r"(https://github.com/huggingface/Google-Cloud-Containers/tree/main/" + rf"(https://github.com/huggingface/Google-Cloud-Containers/tree/{GITHUB_BRANCH}/" + root + r"/\1)", content, ) - # Regular expression to match the specified blocks - pattern = r"> \[!(NOTE|WARNING)\]\n((?:> .*\n)+)" - def replacement(match): block_type = match.group(1) content = match.group(2) - # Remove '> ' from the beginning of each line and strip whitespace - lines = [ - line.lstrip("> ").strip() for line in content.split("\n") if line.strip() - ] + # Remove '> ' from the beginning of each line + lines = [line[2:] for line in content.split("\n") if line.strip()] # Determine the Tip type tip_type = " warning" if block_type == "WARNING" else "" @@ -77,11 +74,14 @@ def replacement(match): return new_block + # Regular expression to match the specified blocks + pattern = r"> \[!(NOTE|WARNING)\]\n((?:>.*(?:\n|$))+)" + # Perform the transformation content = re.sub(pattern, replacement, content, flags=re.MULTILINE) - # Remove blockquotes - content = re.sub(r"^(>[ ]*)+", "", content, flags=re.MULTILINE) + # Remove any remaining '>' or '> ' at the beginning of lines + content = re.sub(r"^>[ ]?", "", content, flags=re.MULTILINE) # Check for remaining relative paths if re.search(r"\(\.\./|\(\./", content): diff --git a/docs/source/resources.mdx b/docs/source/resources.mdx index a0c916f4..c05c190d 100644 --- a/docs/source/resources.mdx +++ b/docs/source/resources.mdx @@ -24,45 +24,44 @@ Learn how to use Hugging Face in Google Cloud by reading our blog posts, present - [All examples](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples) -### GKE - -- Training - - - [Fine-tune Gemma 2B with PyTorch Training DLC using SFT on GKE](https://github.com/huggingface/Google-Cloud-Containers/blob/main/examples/gke/trl-full-fine-tuning) - - [Fine-tune Mistral 7B v0.3 with PyTorch Training DLC using SFT + LoRA on GKE](https://github.com/huggingface/Google-Cloud-Containers/blob/main/examples/gke/trl-lora-fine-tuning) +### Vertex AI - Inference - - [Deploy Meta Llama 3 8B with TGI DLC on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-deployment) - - [Deploy Llama3 8B with TGI DLC on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-from-gcs-deployment) - - [Deploy Llama 3.1 405B with TGI DLC on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-from-gcs-deployment) - - [Deploy Snowflake's Arctic Embed with TEI DLC on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tei-deployment) - - [Deploy BGE Base v1.5 with TEI DLC from GCS on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tei-from-gcs-deployment) - -### Vertex AI + - [Deploy BERT Models with PyTorch Inference DLC on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-bert-on-vertex-ai) + - [Deploy Embedding Models with TEI DLC on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-embedding-on-vertex-ai) + - [Deploy FLUX with PyTorch Inference DLC on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-flux-on-vertex-ai) + - [Deploy Gemma 7B with TGI DLC from GCS on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-gemma-from-gcs-on-vertex-ai) + - [Deploy Gemma 7B with TGI DLC on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-gemma-on-vertex-ai) + - [Deploy Meta Llama 3.1 405B with TGI DLC on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai) - Training - - [Fine-tune Mistral 7B v0.3 with PyTorch Training DLC using SFT on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/blob/main/examples/vertex-ai/notebooks/trl-full-sft-fine-tuning-on-vertex-ai/vertex-notebook.ipynb) - - [Fine-tune Gemma 2B with PyTorch Training DLC using SFT + LoRA on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/blob/main/examples/vertex-ai/notebooks/trl-lora-sft-fine-tuning-on-vertex-ai/vertex-notebook.ipynb) + - [Fine-tune Gemma 2B with PyTorch Training DLC using SFT + LoRA on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/trl-lora-sft-fine-tuning-on-vertex-ai) + - [Fine-tune Mistral 7B v0.3 with PyTorch Training DLC using SFT on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/trl-full-sft-fine-tuning-on-vertex-ai) -- Inference +- Evaluation - - [Deploy BERT Models with PyTorch Inference DLC on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-bert-on-vertex-ai/vertex-notebook.ipynb) - - [Deploy Embedding Models with TEI DLC on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-embedding-on-vertex-ai/vertex-notebook.ipynb) - - [Deploy Gemma 7B with TGI DLC on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-gemma-on-vertex-ai/vertex-notebook.ipynb) - - [Deploy Gemma 7B with TGI DLC from GCS on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-gemma-from-gcs-on-vertex-ai/vertex-notebook.ipynb) - - [Deploy FLUX with PyTorch Inference DLC on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-flux-on-vertex-ai/vertex-notebook.ipynb) - - [Deploy Meta Llama 3.1 405B with TGI DLC on Vertex AI](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/deploy-llama-3-1-405b-on-vertex-ai/vertex-notebook.ipynb) + - [Evaluate open LLMs with Vertex AI and Gemini](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/evaluate-llms-with-vertex-ai) +### GKE -- Evaluation +- Inference - - [Evaluate open LLMs with Vertex AI and Gemini](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/vertex-ai/notebooks/evaluate-llms-with-vertex-ai) + - [Deploy BGE Base v1.5 with TEI DLC from GCS on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tei-from-gcs-deployment) + - [Deploy Gemma2 with multiple LoRA adapters with TGI DLC on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-multi-lora-deployment) + - [Deploy Llama 3.1 405B with TGI DLC on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-llama-405b-deployment) + - [Deploy Meta Llama 3 8B with TGI DLC on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-deployment) + - [Deploy Qwen2 7B with TGI DLC from GCS on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tgi-from-gcs-deployment) + - [Deploy Snowflake's Arctic Embed with TEI DLC on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/tei-deployment) + +- Training + - [Fine-tune Gemma 2B with PyTorch Training DLC using SFT on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/trl-full-fine-tuning) + - [Fine-tune Mistral 7B v0.3 with PyTorch Training DLC using SFT + LoRA on GKE](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/gke/trl-lora-fine-tuning) ### (Preview) Cloud Run - Inference - - [Deploy Meta Llama 3.1 with TGI DLC on Cloud Run](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/cloud-run/tgi-deployment) + - [Deploy Meta Llama 3.1 8B with TGI DLC on Cloud Run](https://github.com/huggingface/Google-Cloud-Containers/tree/main/examples/cloud-run/tgi-deployment) diff --git a/examples/gke/README.md b/examples/gke/README.md index 0432897c..cbb52fc8 100644 --- a/examples/gke/README.md +++ b/examples/gke/README.md @@ -11,10 +11,11 @@ This directory contains usage examples of the Hugging Face Deep Learning Contain ## Inference Examples -| Example | Title | -| -------------------------------------------------------- | --------------------------------------------------- | -| [tgi-deployment](./tgi-deployment) | Deploy Meta Llama 3 8B with TGI DLC on GKE | -| [tgi-from-gcs-deployment](./tgi-from-gcs-deployment) | Deploy Qwen2 7B with TGI DLC from GCS on GKE | -| [tgi-llama-405b-deployment](./tgi-llama-405b-deployment) | Deploy Llama 3.1 405B with TGI DLC on GKE | -| [tei-deployment](./tei-deployment) | Deploy Snowflake's Arctic Embed with TEI DLC on GKE | -| [tei-from-gcs-deployment](./tei-from-gcs-deployment) | Deploy BGE Base v1.5 with TEI DLC from GCS on GKE | +| Example | Title | +| -------------------------------------------------------- | ------------------------------------------------------------- | +| [tei-deployment](./tei-deployment) | Deploy Snowflake's Arctic Embed with TEI DLC on GKE | +| [tei-from-gcs-deployment](./tei-from-gcs-deployment) | Deploy BGE Base v1.5 with TEI DLC from GCS on GKE | +| [tgi-deployment](./tgi-deployment) | Deploy Meta Llama 3 8B with TGI DLC on GKE | +| [tgi-from-gcs-deployment](./tgi-from-gcs-deployment) | Deploy Qwen2 7B with TGI DLC from GCS on GKE | +| [tgi-llama-405b-deployment](./tgi-llama-405b-deployment) | Deploy Llama 3.1 405B with TGI DLC on GKE | +| [tgi-multi-lora-deployment](./tgi-multi-lora-deployment) | Deploy Gemma2 with multiple LoRA adapters with TGI DLC on GKE | diff --git a/examples/gke/tgi-deployment/README.md b/examples/gke/tgi-deployment/README.md index 17549fc9..76b81748 100644 --- a/examples/gke/tgi-deployment/README.md +++ b/examples/gke/tgi-deployment/README.md @@ -154,7 +154,7 @@ kubectl apply -f config/ > Alternatively, you can just wait for the deployment to be ready with the following command: > > ```bash -> kubectl wait --for=condition=Available --timeout=700s deployment/tei-deployment +> kubectl wait --for=condition=Available --timeout=700s deployment/tgi-deployment > ``` ## Inference with TGI diff --git a/examples/gke/tgi-multi-lora-deployment/README.md b/examples/gke/tgi-multi-lora-deployment/README.md new file mode 100644 index 00000000..34a4dea4 --- /dev/null +++ b/examples/gke/tgi-multi-lora-deployment/README.md @@ -0,0 +1,314 @@ +--- +title: Deploy Gemma2 with multiple LoRA adapters with TGI DLC on GKE +type: inference +--- + +# Deploy Gemma2 with multiple LoRA adapters with TGI DLC on GKE + +Gemma 2 is an advanced, lightweight open model that enhances performance and efficiency while building on the research and technology of its predecessor and the Gemini models developed by Google DeepMind and other teams across Google. Text Generation Inference (TGI) is a toolkit developed by Hugging Face for deploying and serving LLMs, with high performance text generation. And, Google Kubernetes Engine (GKE) is a fully-managed Kubernetes service in Google Cloud that can be used to deploy and operate containerized applications at scale using GCP's infrastructure. + +This example showcases how to deploy Gemma 2 2B from the Hugging Face Hub with multiple LoRA adapters fine-tuned for different purposes such as coding, SQL, or Japanese, on a GKE Cluster running the Hugging Face DLC for TGI i.e. a purpose-built container to deploy LLMs in a secure and managed environment. + +## Setup / Configuration + +First, you need to install both `gcloud` and `kubectl` in your local machine, which are the command-line tools for Google Cloud and Kubernetes, respectively, to interact with the GCP and the GKE Cluster. + +- To install `gcloud`, follow the instructions at [Cloud SDK Documentation - Install the gcloud CLI](https://cloud.google.com/sdk/docs/install). +- To install `kubectl`, follow the instructions at [Kubernetes Documentation - Install Tools](https://kubernetes.io/docs/tasks/tools/#kubectl). + +Optionally, to ease the usage of the commands within this tutorial, you need to set the following environment variables for GCP: + +```bash +export PROJECT_ID=your-project-id +export LOCATION=your-location +export CLUSTER_NAME=your-cluster-name +``` + +Then you need to login into your GCP account and set the project ID to the one you want to use for the deployment of the GKE Cluster. + +```bash +gcloud auth login +gcloud auth application-default login # For local development +gcloud config set project $PROJECT_ID +``` + +Once you are logged in, you need to enable the necessary service APIs in GCP i.e. the Google Kubernetes Engine API and the Google Container Registry API, which are necessary for the deployment of the GKE Cluster and the Hugging Face DLC for TGI. + +```bash +gcloud services enable container.googleapis.com +gcloud services enable containerregistry.googleapis.com +``` + +Additionally, to use `kubectl` with the GKE Cluster credentials, you also need to install the `gke-gcloud-auth-plugin`, that can be installed with `gcloud` as follows: + +```bash +gcloud components install gke-gcloud-auth-plugin +``` + +> [!NOTE] +> Installing the `gke-gcloud-auth-plugin` does not need to be installed via `gcloud` specifically, to read more about the alternative installation methods, please visit [GKE Documentation - Install kubectl and configure cluster access](https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-access-for-kubectl#install_plugin). + +## Create GKE Cluster + +Once everything's set up, you can proceed with the creation of the GKE Cluster and the node pool, which in this case will be a single GPU node, in order to use the GPU accelerator for high performance inference, also following TGI recommendations based on their internal optimizations for GPUs. + +To deploy the GKE Cluster, the "Autopilot" mode will be used as it is the recommended one for most of the workloads, since the underlying infrastructure is managed by Google. Alternatively, you can also use the "Standard" mode. + +> [!NOTE] +> Important to check before creating the GKE Autopilot Cluster the [GKE Documentation - Optimize Autopilot Pod performance by choosing a machine series](https://cloud.google.com/kubernetes-engine/docs/how-to/performance-pods), since not all the versions support GPU accelerators e.g. `nvidia-l4` is not supported in the GKE cluster versions 1.28.3 or lower. + +```bash +gcloud container clusters create-auto $CLUSTER_NAME \ + --project=$PROJECT_ID \ + --location=$LOCATION \ + --release-channel=stable \ + --cluster-version=1.29 \ + --no-autoprovisioning-enable-insecure-kubelet-readonly-port +``` + +> [!NOTE] +> To select the specific version in your location of the GKE Cluster, you can run the following command: +> +> ```bash +> gcloud container get-server-config \ +> --flatten="channels" \ +> --filter="channels.channel=STABLE" \ +> --format="yaml(channels.channel,channels.defaultVersion)" \ +> --location=$LOCATION +> ``` +> +> For more information please visit [GKE Documentation - Specifying cluster version](https://cloud.google.com/kubernetes-engine/versioning#specifying_cluster_version). + +![GKE Cluster in the GCP Console](./imgs/gke-cluster.png) + +Once the GKE Cluster is created, you can get the credentials to access it via `kubectl` with the following command: + +```bash +gcloud container clusters get-credentials $CLUSTER_NAME --location=$LOCATION +``` + +## Get Hugging Face token and set secrets in GKE + +As [`google/gemma-2-2b-it`](https://huggingface.co/google/gemma-2-2b-it) is a gated model, you need to set a Kubernetes secret with the Hugging Face Hub token via `kubectl`. + +To generate a custom token for the Hugging Face Hub, you can follow the instructions at [Hugging Face Hub - User access tokens](https://huggingface.co/docs/hub/en/security-tokens); and the recommended way of setting it is to install the `huggingface_hub` Python SDK as follows: + +```bash +pip install --upgrade --quiet huggingface_hub +``` + +And then login in with the generated token with read-access over the gated/private model: + +```bash +huggingface-cli login +``` + +Finally, you can create the Kubernetes secret with the generated token for the Hugging Face Hub as follows using the `huggingface_hub` Python SDK to retrieve the token: + +```bash +kubectl create secret generic hf-secret \ + --from-literal=hf_token=$(python -c "from huggingface_hub import get_token; print(get_token())") \ + --dry-run=client -o yaml | kubectl apply -f - +``` + +Or, alternatively, you can directly set the token as follows: + +```bash +kubectl create secret generic hf-secret \ + --from-literal=hf_token=hf_*** \ + --dry-run=client -o yaml | kubectl apply -f - +``` + +![GKE Secret in the GCP Console](./imgs/gke-secrets.png) + +More information on how to set Kubernetes secrets in a GKE Cluster at [Secret Manager Documentation - Use Secret Manager add-on with Google Kubernetes Engine](https://cloud.google.com/secret-manager/docs/secret-manager-managed-csi-component). + +## Deploy TGI + +Now you can proceed to the Kubernetes deployment of the Hugging Face DLC for TGI, serving the [`google/gemma-2-2b-it`](https://huggingface.co/google/gemma-2-2b-it) model and multiple LoRA adapters fine-tuned on top of it, from the Hugging Face Hub. + +> [!NOTE] +> To explore all the models that can be served via TGI, you can explore [the models tagged with `text-generation-inference` in the Hub](https://huggingface.co/models?other=text-generation-inference). + +The Hugging Face DLC for TGI will be deployed via `kubectl`, from the configuration files in the `config/` directory: + +- `deployment.yaml`: contains the deployment details of the pod including the reference to the Hugging Face DLC for TGI setting the `MODEL_ID` to [`google/gemma-2-2b-it`](https://huggingface.co/google/gemma-2-2b-it), and the `LORA_ADAPTERS` to `google-cloud-partnership/gemma-2-2b-it-lora-magicoder,google-cloud-partnership/gemma-2-2b-it-lora-sql`, being the following adapters: + + - [`google-cloud-partnership/gemma-2-2b-it-lora-sql`](https://huggingface.co/google-cloud-partnership/gemma-2-2b-it-lora-sql): fine-tuned with [`gretelai/synthetic_text_to_sql`](https://huggingface.co/datasets/gretelai/synthetic_text_to_sql) to generate SQL queries with an explanation, given an SQL context and a prompt / question about it. + - [`google-cloud-partnership/gemma-2-2b-it-lora-magicoder`](https://huggingface.co/google-cloud-partnership/gemma-2-2b-it-lora-magicoder): fine-tuned with [`ise-uiuc/Magicoder-OSS-Instruct-75K`](https://huggingface.co/datasets/ise-uiuc/Magicoder-OSS-Instruct-75K) to generate code in diverse programming languages such as Python, Rust, or C, among many others; based on an input problem. + - [`google-cloud-partnership/gemma-2-2b-it-lora-jap-en`](https://huggingface.co/google-cloud-partnership/gemma-2-2b-it-lora-jap-en): fine-tuned with [`Jofthomas/japanese-english-translation`](https://huggingface.co/datasets/Jofthomas/japanese-english-translation), a synthetically generated dataset of short Japanese sentences translated to English; to translate English to Japanese and the other way around. + +- `service.yaml`: contains the service details of the pod, exposing the port 8080 for the TGI service. + +- (optional) `ingress.yaml`: contains the ingress details of the pod, exposing the service to the external world so that it can be accessed via the ingress IP. + +> [!WARNING] +> Note that the selected LoRA adapters are not intended to be used on production environments, as the fine-tuned adapters have not been tested extensively. + +```bash +kubectl apply -f config/ +``` + +> [!NOTE] +> The Kubernetes deployment may take a few minutes to be ready, so you can check the status of the deployment with the following command: +> +> ```bash +> kubectl get pods +> ``` +> +> Alternatively, you can just wait for the deployment to be ready with the following command: +> +> ```bash +> kubectl wait --for=condition=Available --timeout=700s deployment/tgi-deployment +> ``` + +![GKE Deployment in the GCP Console](./imgs/gke-deployment.png) + +![GKE Deployment Logs in the GCP Console](./imgs/gke-deployment-logs.png) + +## Inference with TGI + +To run the inference over the deployed TGI service, you need to make sure that the service is accessible first, you can do so by either: + +- Port-forwarding the deployed TGI service to the port 8080, so as to access via `localhost` with the command: + + ```bash + kubectl port-forward service/tgi-service 8080:8080 + ``` + +- Accessing the TGI service via the external IP of the ingress, which is the default scenario here since you have defined the ingress configuration in the `config/ingress.yaml` file (but it can be skipped in favour of the port-forwarding), that can be retrieved with the following command: + + ```bash + kubectl get ingress tgi-ingress -o jsonpath='{.status.loadBalancer.ingress[0].ip}' + ``` + +### Via cURL + +To send a POST request to the TGI service using `cURL`, you can run the following command: + +```bash +curl http://localhost:8080/v1/chat/completions \ + -X POST \ + -d '{"messages":[{"role":"user","content":"What is Deep Learning?"}],"temperature":0.7,"top_p":0.95,"max_tokens":128}}' \ + -H 'Content-Type: application/json' +``` + +Or send a POST request to the ingress IP instead: + +```bash +curl http://$(kubectl get ingress tgi-ingress -o jsonpath='{.status.loadBalancer.ingress[0].ip}')/v1/chat/completions \ + -X POST \ + -d '{"messages":[{"role":"user","content":"What is Deep Learning?"}],"temperature":0.7,"top_p":0.95,"max_tokens":128}}' \ + -H 'Content-Type: application/json' +``` + +> [!NOTE] +> As in this case you are serving multiple LoRA adapters, to use those you will need to specify the `model` parameter when using the `/v1/chat/completions` endpoint (or the `adapter_id` parameter when using the `/generate` endpoint), so that the LoRA adapter is used. In any other case, the base model will be used instead, meaning that the adapters are only used when explicitly specified. + +For example, say that you want to generate a piece of code for a problem that you cannot solve, then you should ideally use the fine-tuned adapter [`google-cloud-partnership/gemma-2-2b-it-lora-magicoder`](https://huggingface.co/google-cloud-partnership/gemma-2-2b-it-lora-magicoder) which is specifically fine-tuned for that; alternatively you could also use the base instruction-tuned model as it may be able to tackle a wide variety of tasks, but e.g. the Japanese to English model wouldn't be a nice pick for that task. + +```bash +curl http://localhost:8080/v1/chat/completions \ + -X POST \ + -d '{"messages":[{"role":"user","content":"You are given a vector of integers, A, of length n. Your task is to implement a function that finds the maximum product of any two distinct elements in the vector. Write a function in Rust to return this maximum product. Function Signature: rust fn max_product(a: Vec) -> i32 Input: - A vector a of length n (2 <= n <= 10^5), where each element is an integer (-10^4 <= a[i] <= 10^4). Output: - Return the maximum product of two distinct elements. Example: Input: a = vec![1, 5, 3, 9] Output: max_product(a) -> 45"}],"temperature":0.7,"top_p":0.95,"max_tokens":256,"model":"google-cloud-partnership/gemma-2-2b-it-lora-magicoder"}}' \ + -H 'Content-Type: application/json' +``` + +Which generates the following solution to the given prompt: + +``` +{"object":"chat.completion","id":"","created":1727378101,"model":"google/gemma-2-2b-it","system_fingerprint":"2.3.1-dev0-native","choices":[{"index":0,"message":{"role":"assistant","content":"\`\`\`rust\nfn max_product(a: Vec) -> i32 {\n let mut max1 = a[0];\n let mut max2 = a[1];\n if max2 < max1 {\n std::mem::swap(&mut max1, &mut max2);\n }\n for i in 2..a.len() {\n if a[i] > max1 {\n max2 = max1;\n max1 = a[i];\n } else if a[i] > max2 {\n "},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":163,"completion_tokens":128,"total_tokens":291}} +``` + +Translated to Rust code that would be: + +```rust +fn max_product(a: Vec) -> i32 { + if a.len() < 2 { + return 0; + } + let mut max_product = a[0] * a[1]; + for i in 1..a.len() { + for j in i + 1..a.len() { + if a[i] * a[j] > max_product { + max_product = a[i] * a[j]; + } + } + } + max_product +} +``` + +### Via Python + +To run the inference using Python, you can either use the `huggingface_hub` Python SDK (recommended) or the `openai` Python SDK. + +> [!NOTE] +> In the examples below `localhost` will be used, but if you did deploy TGI with the ingress, feel free to use the ingress IP as mentioned above (without specifying the port). + +#### `huggingface_hub` + +You can install it via `pip` as `pip install --upgrade --quiet huggingface_hub`, and then run the following snippet to mimic the `cURL` commands above i.e. sending requests to the Messages API providing the adapter identifier via the `model` parameter: + +```python +from huggingface_hub import InferenceClient + +client = InferenceClient(base_url="http://localhost:8080", api_key="-") + +chat_completion = client.chat.completions.create( + model="google-cloud-partnership/gemma-2-2b-it-lora-magicoder", + messages=[ + {"role": "user", "content": "You are given a vector of integers, A, of length n. Your task is to implement a function that finds the maximum product of any two distinct elements in the vector. Write a function in Rust to return this maximum product. Function Signature: rust fn max_product(a: Vec) -> i32 Input: - A vector a of length n (2 <= n <= 10^5), where each element is an integer (-10^4 <= a[i] <= 10^4). Output: - Return the maximum product of two distinct elements. Example: Input: a = vec![1, 5, 3, 9] Output: max_product(a) -> 45"}, + ], + max_tokens=128, +) +``` + +Alternatively, you can also format the prompt yourself and send that via the Text Generation API providing the adapter identifier via the `adapter_id` argument as follows: + +```python +from huggingface_hub import InferenceClient + +client = InferenceClient("http://localhost:8080", api_key="-") + +generation = client.text_generation( + prompt="You are given a vector of integers, A, of length n. Your task is to implement a function that finds the maximum product of any two distinct elements in the vector. Write a function in Rust to return this maximum product. Function Signature: rust fn max_product(a: Vec) -> i32 Input: - A vector a of length n (2 <= n <= 10^5), where each element is an integer (-10^4 <= a[i] <= 10^4). Output: - Return the maximum product of two distinct elements. Example: Input: a = vec![1, 5, 3, 9] Output: max_product(a) -> 45", + max_new_tokens=128, + adapter_id="google-cloud-partnership/gemma-2-2b-it-lora-magicoder", +) +``` + +#### `openai` + +Additionally, you can also use the Messages API via `openai`; you can install it via `pip` as `pip install --upgrade openai`, and then run: + +```python +from openai import OpenAI + +client = OpenAI( + base_url="http://localhost:8080/v1/", + api_key="-", +) + +chat_completion = client.chat.completions.create( + model="google-cloud-partnership/gemma-2-2b-it-lora-magicoder", + messages=[ + {"role": "user", "content": "You are given a vector of integers, A, of length n. Your task is to implement a function that finds the maximum product of any two distinct elements in the vector. Write a function in Rust to return this maximum product. Function Signature: rust fn max_product(a: Vec) -> i32 Input: - A vector a of length n (2 <= n <= 10^5), where each element is an integer (-10^4 <= a[i] <= 10^4). Output: - Return the maximum product of two distinct elements. Example: Input: a = vec![1, 5, 3, 9] Output: max_product(a) -> 45"}, + ], + max_tokens=128, +) +``` + +## Delete GKE Cluster + +Finally, once you are done using TGI on the GKE Cluster, you can safely delete the GKE Cluster to avoid incurring in unnecessary costs. + +```bash +gcloud container clusters delete $CLUSTER_NAME --location=$LOCATION +``` + +Alternatively, you can also downscale the replicas of the deployed pod to 0 in case you want to preserve the cluster, since the default GKE Cluster deployed with GKE Autopilot mode is running just a single `e2-small` instance. + +```bash +kubectl scale --replicas=0 deployment/tgi-deployment +``` diff --git a/examples/gke/tgi-multi-lora-deployment/config/deployment.yaml b/examples/gke/tgi-multi-lora-deployment/config/deployment.yaml new file mode 100644 index 00000000..24ca7b94 --- /dev/null +++ b/examples/gke/tgi-multi-lora-deployment/config/deployment.yaml @@ -0,0 +1,50 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: tgi-deployment +spec: + replicas: 1 + selector: + matchLabels: + app: tgi-server + template: + metadata: + labels: + app: tgi-server + hf.co/model: google--gemma-2-2b-it + hf.co/task: text-generation + spec: + containers: + - name: tgi-container + image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311 + resources: + requests: + nvidia.com/gpu: 1 + env: + - name: MODEL_ID + value: google/gemma-2-2b-it + - name: LORA_ADAPTERS + value: google-cloud-partnership/gemma-2-2b-it-lora-magicoder,google-cloud-partnership/gemma-2-2b-it-lora-sql,google-cloud-partnership/gemma-2-2b-it-lora-jap-en + - name: NUM_SHARD + value: "1" + - name: PORT + value: "8080" + - name: HUGGING_FACE_HUB_TOKEN + valueFrom: + secretKeyRef: + name: hf-secret + key: hf_token + volumeMounts: + - mountPath: /dev/shm + name: dshm + - mountPath: /data + name: data + volumes: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: 1Gi + - name: data + emptyDir: {} + nodeSelector: + cloud.google.com/gke-accelerator: nvidia-l4 diff --git a/examples/gke/tgi-multi-lora-deployment/config/ingress.yaml b/examples/gke/tgi-multi-lora-deployment/config/ingress.yaml new file mode 100644 index 00000000..2aac93a7 --- /dev/null +++ b/examples/gke/tgi-multi-lora-deployment/config/ingress.yaml @@ -0,0 +1,17 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: tgi-ingress + annotations: + kubernetes.io/ingress.class: "gce" +spec: + rules: + - http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: tgi-service + port: + number: 8080 diff --git a/examples/gke/tgi-multi-lora-deployment/config/service.yaml b/examples/gke/tgi-multi-lora-deployment/config/service.yaml new file mode 100644 index 00000000..2e3978ac --- /dev/null +++ b/examples/gke/tgi-multi-lora-deployment/config/service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: tgi-service +spec: + selector: + app: tgi-server + type: ClusterIP + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 diff --git a/examples/gke/tgi-multi-lora-deployment/imgs/gke-cluster.png b/examples/gke/tgi-multi-lora-deployment/imgs/gke-cluster.png new file mode 100644 index 00000000..863cd3d2 Binary files /dev/null and b/examples/gke/tgi-multi-lora-deployment/imgs/gke-cluster.png differ diff --git a/examples/gke/tgi-multi-lora-deployment/imgs/gke-deployment-logs.png b/examples/gke/tgi-multi-lora-deployment/imgs/gke-deployment-logs.png new file mode 100644 index 00000000..6d8c5552 Binary files /dev/null and b/examples/gke/tgi-multi-lora-deployment/imgs/gke-deployment-logs.png differ diff --git a/examples/gke/tgi-multi-lora-deployment/imgs/gke-deployment.png b/examples/gke/tgi-multi-lora-deployment/imgs/gke-deployment.png new file mode 100644 index 00000000..45cfd0e4 Binary files /dev/null and b/examples/gke/tgi-multi-lora-deployment/imgs/gke-deployment.png differ diff --git a/examples/gke/tgi-multi-lora-deployment/imgs/gke-secrets.png b/examples/gke/tgi-multi-lora-deployment/imgs/gke-secrets.png new file mode 100644 index 00000000..4399a63e Binary files /dev/null and b/examples/gke/tgi-multi-lora-deployment/imgs/gke-secrets.png differ diff --git a/examples/vertex-ai/README.md b/examples/vertex-ai/README.md index 9a822af8..59c29abd 100644 --- a/examples/vertex-ai/README.md +++ b/examples/vertex-ai/README.md @@ -8,28 +8,28 @@ For Google Vertex AI, we differentiate between the executable Jupyter Notebook e ### Training Examples -| Example | Title | -| ------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------- | -| [trl-full-sft-fine-tuning-on-vertex-ai](./notebooks/trl-full-sft-fine-tuning-on-vertex-ai) | Fine-tune Mistral 7B v0.3 with PyTorch Training DLC using SFT on Vertex AI | -| [trl-lora-sft-fine-tuning-on-vertex-ai](./notebooks/trl-lora-sft-fine-tuning-on-vertex-ai) | Fine-tune Gemma 2B with PyTorch Training DLC using SFT + LoRA on Vertex AI | +| Example | Title | +| ---------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------- | +| [notebooks/trl-lora-sft-fine-tuning-on-vertex-ai](./notebooks/trl-lora-sft-fine-tuning-on-vertex-ai) | Fine-tune Gemma 2B with PyTorch Training DLC using SFT + LoRA on Vertex AI | +| [notebooks/trl-full-sft-fine-tuning-on-vertex-ai](./notebooks/trl-full-sft-fine-tuning-on-vertex-ai) | Fine-tune Mistral 7B v0.3 with PyTorch Training DLC using SFT on Vertex AI | ### Inference Examples -| Example | Title | -| ------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------- | -| [deploy-bert-on-vertex-ai](./notebooks/deploy-bert-on-vertex-ai) | Deploy BERT Models with PyTorch Inference DLC on Vertex AI | -| [deploy-embedding-on-vertex-ai](./notebooks/deploy-embedding-on-vertex-ai) | Deploy Embedding Models with TEI DLC on Vertex AI | -| [deploy-gemma-on-vertex-ai](./notebooks/deploy-gemma-on-vertex-ai) | Deploy Gemma 7B with TGI DLC on Vertex AI | -| [deploy-gemma-from-gcs-on-vertex-ai](./notebooks/deploy-gemma-from-gcs-on-vertex-ai) | Deploy Gemma 7B with TGI DLC from GCS on Vertex AI | -| [deploy-flux-on-vertex-ai](./notebooks/deploy-flux-on-vertex-ai) | Deploy FLUX with PyTorch Inference DLC on Vertex AI | -| [deploy-llama-3-1-405b-on-vertex-ai](./notebooks/deploy-llama-405b-on-vertex-ai/vertex-notebook.ipynb) | Deploy Meta Llama 3.1 405B with TGI DLC on Vertex AI | +| Example | Title | +| ---------------------------------------------------------------------------------------------- | ---------------------------------------------------------- | +| [notebooks/deploy-bert-on-vertex-ai](./notebooks/deploy-bert-on-vertex-ai) | Deploy BERT Models with PyTorch Inference DLC on Vertex AI | +| [notebooks/deploy-embedding-on-vertex-ai](./notebooks/deploy-embedding-on-vertex-ai) | Deploy Embedding Models with TEI DLC on Vertex AI | +| [notebooks/deploy-flux-on-vertex-ai](./notebooks/deploy-flux-on-vertex-ai) | Deploy FLUX with PyTorch Inference DLC on Vertex AI | +| [notebooks/deploy-gemma-from-gcs-on-vertex-ai](./notebooks/deploy-gemma-from-gcs-on-vertex-ai) | Deploy Gemma 7B with TGI DLC from GCS on Vertex AI | +| [notebooks/deploy-gemma-on-vertex-ai](./notebooks/deploy-gemma-on-vertex-ai) | Deploy Gemma 7B with TGI DLC on Vertex AI | +| [notebooks/deploy-llama-3-1-405b-on-vertex-ai](./notebooks/deploy-llama-3-1-405b-on-vertex-ai) | Deploy Meta Llama 3.1 405B with TGI DLC on Vertex AI | ### Evaluation Examples -| Example | Description | -| ------------------------------------------------------------------------ | ----------------------------------------------- | -| [evaluate-llms-with-vertex-ai](./notebooks/evaluate-llms-with-vertex-ai) | Evaluating open LLMs with Vertex AI and Gemini. | +| Example | Title | +| ---------------------------------------------------------------------------------- | -------------------------------------------- | +| [notebooks/evaluate-llms-with-vertex-ai](./notebooks/evaluate-llms-with-vertex-ai) | Evaluate open LLMs with Vertex AI and Gemini | ## Pipelines -Coming soon! +Coming soon! \ No newline at end of file diff --git a/scripts/internal/update_example_tables.py b/scripts/internal/update_example_tables.py new file mode 100644 index 00000000..88a4fb8e --- /dev/null +++ b/scripts/internal/update_example_tables.py @@ -0,0 +1,285 @@ +import os +import re +import json +from collections import defaultdict +import subprocess + + +def get_tracked_files(): + result = subprocess.run(["git", "ls-files"], capture_output=True, text=True) + return set(result.stdout.splitlines()) + + +def extract_info_from_md(file_path): + with open(file_path, "r") as f: + content = f.read() + + match = re.search(r"---\s*title:\s*(.*?)\s*type:\s*(.*?)\s*---", content, re.DOTALL) + if match: + return match.group(1).strip(), match.group(2).strip() + return None, None + + +def extract_info_from_ipynb(file_path): + with open(file_path, "r") as f: + notebook = json.load(f) + + first_cell = notebook["cells"][0] + if first_cell["cell_type"] == "markdown": + content = "".join(first_cell["source"]) + match = re.search( + r"", + content, + re.DOTALL, + ) + if match: + return match.group(1).strip(), match.group(2).strip() + return None, None + + +def get_service(path): + if "gke" in path: + return "GKE" + elif "vertex-ai" in path: + return "Vertex AI" + elif "cloud-run" in path: + return "Cloud Run" + return None + + +def generate_tables(): + examples = defaultdict(lambda: defaultdict(list)) + root_dir = "examples" + tracked_files = get_tracked_files() + + for dirpath, _, filenames in os.walk(root_dir): + for filename in filenames: + if filename in ["README.md", "vertex-notebook.ipynb"]: + file_path = os.path.join(dirpath, filename) + relative_path = os.path.relpath(file_path, start=".") + + if relative_path not in tracked_files: + continue + + dir_path = os.path.dirname(relative_path) + + if filename.endswith(".md"): + title, example_type = extract_info_from_md(file_path) + elif filename.endswith(".ipynb"): + title, example_type = extract_info_from_ipynb(file_path) + + if title and example_type: # type: ignore + service = get_service(relative_path) + if service: + examples[service][example_type].append((dir_path, title)) + + return examples + + +def update_readme(examples): + with open("README.md", "r") as f: + content = f.read() + + ordered_services = ["Vertex AI", "GKE", "Cloud Run"] + + for example_type in ["training", "inference", "evaluation"]: + table_rows = [] + for service in ordered_services: + if examples[service].get(example_type): + for path, title in sorted( + examples[service][example_type], key=lambda x: x[1] + ): + # Format the path to include 'examples/' + table_rows.append( + ( + service, + f"[{path}](./{path})", + title, + ) + ) + + if table_rows: + table = format_table(["Service", "Example", "Title"], table_rows) + pattern = ( + rf"(### {example_type.capitalize()} Examples\n\n)[\s\S]*?(\n\n###|\Z)" + ) + replacement = rf"\1{table}\2" + content = re.sub(pattern, replacement, content, flags=re.DOTALL) + + with open("README.md", "w") as f: + f.write(content.rstrip() + "\n") + + +def update_docs(examples): + with open("docs/source/resources.mdx", "r") as f: + content = f.read() + + new_content = [] + ordered_services = ["Vertex AI", "GKE", "Cloud Run"] + ordered_types = ["inference", "training", "evaluation"] + + for service in ordered_services: + service_name = f"(Preview) {service}" if service == "Cloud Run" else service + new_content.append(f"\n### {service_name}\n") + + for example_type in ordered_types: + if examples[service].get(example_type): + new_content.append(f"\n- {example_type.capitalize()}\n\n") + for path, title in sorted( + examples[service][example_type], key=lambda x: x[1] + ): + github_url = f"https://github.com/huggingface/Google-Cloud-Containers/tree/main/{path}" + new_content.append(f" - [{title}]({github_url})\n") + + new_examples_content = "".join(new_content) + + # Replace the Examples section in the original content + pattern = r"(## Examples\n\n- \[All examples\].*?\n)[\s\S]*" + updated_content = re.sub( + pattern, rf"\1{new_examples_content}", content, flags=re.DOTALL + ) + + with open("docs/source/resources.mdx", "w") as f: + f.write(updated_content) + + +def update_cloud_run_examples(examples): + file_path = "examples/cloud-run/README.md" + + with open(file_path, "r") as f: + content = f.read() + + # Update Inference Examples + inference_examples = examples.get("Cloud Run", {}).get("inference", []) + inference_table = format_table( + ["Example", "Title"], + [ + (f"[{os.path.basename(path)}](./{os.path.basename(path)})", title) + for path, title in sorted(inference_examples, key=lambda x: x[1]) + ], + ) + + inference_pattern = r"(## Inference Examples\n\n)[\s\S]*?(\n\n## Training Examples)" + inference_replacement = rf"\1{inference_table}\2" + content = re.sub(inference_pattern, inference_replacement, content, flags=re.DOTALL) + + # Update Training Examples + training_pattern = r"(## Training Examples\n\n)[\s\S]*" + training_replacement = r"\1Coming soon!" + content = re.sub(training_pattern, training_replacement, content, flags=re.DOTALL) + + with open(file_path, "w") as f: + f.write(content) + + +def update_gke_examples(examples): + file_path = "examples/gke/README.md" + + with open(file_path, "r") as f: + content = f.read() + + for example_type in ["Training", "Inference"]: + examples_list = examples.get("GKE", {}).get(example_type.lower(), []) + pattern = rf"(## {example_type} Examples\n\n)[\s\S]*?(\n\n##|\Z)" + + if examples_list: + # Sort examples alphabetically by their basename + sorted_examples = sorted( + examples_list, key=lambda x: os.path.basename(x[0]) + ) + table = format_table( + ["Example", "Title"], + [ + (f"[{os.path.basename(path)}](./{os.path.basename(path)})", title) + for path, title in sorted_examples + ], + ) + replacement = rf"\1{table}\2" + else: + replacement = rf"\1No {example_type.lower()} examples available yet.\2" + + content = re.sub(pattern, replacement, content, flags=re.DOTALL) + + with open(file_path, "w") as f: + f.write(content.rstrip() + "\n") + + +def update_vertex_ai_examples(examples): + file_path = "examples/vertex-ai/README.md" + + with open(file_path, "r") as f: + content = f.read() + + new_content = [] + for line in content.split("\n"): + if line.startswith("## Notebooks"): + new_content.append(line) + break + new_content.append(line) + + for example_type in ["Training", "Inference", "Evaluation"]: + examples_list = examples.get("Vertex AI", {}).get(example_type.lower(), []) + new_content.append(f"\n### {example_type} Examples\n") + if examples_list: + table = format_table( + ["Example", "Title"], + [ + ( + f"[notebooks/{os.path.basename(path)}](./notebooks/{os.path.basename(path)})", + title, + ) + for path, title in sorted(examples_list, key=lambda x: x[1]) + ], + ) + new_content.append(table) + else: + new_content.append("Coming soon!") + + # Handle Pipelines section + new_content.append("\n## Pipelines\n") + pipeline_examples = examples.get("Vertex AI", {}).get("pipeline", []) + if pipeline_examples: + table = format_table( + ["Example", "Title"], + [ + ( + f"[pipelines/{os.path.basename(path)}](./pipelines/{os.path.basename(path)})", + title, + ) + for path, title in sorted(pipeline_examples, key=lambda x: x[1]) + ], + ) + new_content.append(table) + else: + new_content.append("Coming soon!") + + with open(file_path, "w") as f: + f.write("\n".join(new_content).strip()) + + +def format_table(headers, rows): + col_widths = [len(h) for h in headers] + for row in rows: + for i, cell in enumerate(row): + col_widths[i] = max(col_widths[i], len(cell)) + + header = "| " + " | ".join(f"{h:<{w}}" for h, w in zip(headers, col_widths)) + " |" + separator = "| " + " | ".join("-" * w for w in col_widths) + " |" + body = [ + "| " + " | ".join(f"{cell:<{w}}" for cell, w in zip(row, col_widths)) + " |" + for row in rows + ] + + return "\n".join([header, separator] + body) + + +if __name__ == "__main__": + examples = generate_tables() + update_readme(examples) + update_docs(examples) + update_cloud_run_examples(examples) + update_gke_examples(examples) + update_vertex_ai_examples(examples) + print( + "README.md, docs/source/resources.mdx, and example README files have been updated." + )