From d37a43e58189e5556b9ed73e067db4a8d03191ef Mon Sep 17 00:00:00 2001
From: Ruida Zeng <31152346+ruidazeng@users.noreply.github.com>
Date: Thu, 9 Jan 2025 03:09:23 -0600
Subject: [PATCH] chore: fixed some typos and attribute issues in README
 (#2891)

* chore: fixed html repeated attribute in README

* chore: fix minor grammar/capitalization

* chore: fixed spelling mistakes in README
---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 6d3a9b12474..31966ddbd3d 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 <div align="center">
 
 <a href="https://www.youtube.com/watch?v=jlMAX2Oaht0">
-  <img width=560 width=315 alt="Making TGI deployment optimal" src="https://huggingface.co/datasets/Narsil/tgi_assets/resolve/main/thumbnail.png">
+  <img width=560 alt="Making TGI deployment optimal" src="https://huggingface.co/datasets/Narsil/tgi_assets/resolve/main/thumbnail.png">
 </a>
 
 # Text Generation Inference
@@ -141,8 +141,8 @@ You have the option to utilize the `HF_TOKEN` environment variable for configuri
 For example, if you want to serve the gated Llama V2 model variants:
 
 1. Go to https://huggingface.co/settings/tokens
-2. Copy your cli READ token
-3. Export `HF_TOKEN=<your cli READ token>`
+2. Copy your CLI READ token
+3. Export `HF_TOKEN=<your CLI READ token>`
 
 or with Docker:
 
@@ -157,7 +157,7 @@ docker run --gpus all --shm-size 1g -e HF_TOKEN=$token -p 8080:80 -v $volume:/da
 ### A note on Shared Memory (shm)
 
 [`NCCL`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/index.html) is a communication framework used by
-`PyTorch` to do distributed training/inference. `text-generation-inference` make
+`PyTorch` to do distributed training/inference. `text-generation-inference` makes
 use of `NCCL` to enable Tensor Parallelism to dramatically speed up inference for large language models.
 
 In order to share data between the different devices of a `NCCL` group, `NCCL` might fall back to using the host memory if
@@ -196,7 +196,7 @@ Detailed blogpost by Adyen on TGI inner workings: [LLM inference at scale with T
 
 You can also opt to install `text-generation-inference` locally.
 
-First clone the repository and change directoy into it:
+First clone the repository and change directory into it:
 
 ```shell
 git clone https://github.com/huggingface/text-generation-inference
@@ -213,7 +213,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
 conda create -n text-generation-inference python=3.11
 conda activate text-generation-inference
 
-#using pyton venv
+#using python venv
 python3 -m venv .venv
 source .venv/bin/activate
 ```