huggingface · drbh · Dec 16, 2024 · Dec 16, 2024
diff --git a/.devcontainer/Dockerfile_trtllm b/.devcontainer/Dockerfile_trtllm
@@ -72,4 +72,4 @@ RUN cargo install cargo-chef
 COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
 COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
 
-ENV MPI_HOME=/usr/local/mpi
+ENV MPI_HOME=/usr/local/mpi
diff --git a/backends/trtllm/csrc/backend.hpp b/backends/trtllm/csrc/backend.hpp
@@ -228,4 +228,4 @@ struct fmt::formatter<huggingface::tgi::backends::trtllm::sampling_params_t> : f
     }
 };
 
-#endif
+#endif
diff --git a/backends/trtllm/csrc/ffi.hpp b/backends/trtllm/csrc/ffi.hpp
@@ -159,4 +159,4 @@ namespace huggingface::tgi::backends::trtllm {
         );
     }
 }
-#endif
+#endif
diff --git a/backends/trtllm/csrc/hardware.hpp b/backends/trtllm/csrc/hardware.hpp
@@ -78,4 +78,4 @@ namespace huggingface::tgi::hardware::cuda {
         [[nodiscard]] constexpr bool is_at_least_hopper() const { return is_at_least(HOPPER); }
     };
 }
-#endif
+#endif
diff --git a/backends/trtllm/tests/test_backend.cpp b/backends/trtllm/tests/test_backend.cpp
@@ -149,4 +149,4 @@ TEST_CASE("sampling_params_t to tle::SamplingConfig", "[backend_t]")
 
     REQUIRE(config.getTemperature().has_value());
     REQUIRE_THAT(*config.getTemperature(), Catch::Matchers::WithinAbs(params.temperature, 1e-6f));
-}
+}
diff --git a/backends/trtllm/tests/test_hardware.cpp b/backends/trtllm/tests/test_hardware.cpp
@@ -79,4 +79,4 @@ TEST_CASE("is_at_least") {
     REQUIRE(HOPPER_CAPABILITIES.is_at_least(AMPERE));
     REQUIRE(HOPPER_CAPABILITIES.is_at_least(ADA_LOVELACE));
     REQUIRE(HOPPER_CAPABILITIES.is_at_least(HOPPER));
-}
+}
diff --git a/docs/source/backends/trtllm.md b/docs/source/backends/trtllm.md
@@ -17,7 +17,7 @@ supported.
 You can use [Optimum-NVIDIA](https://github.com/huggingface/optimum-nvidia) to compile engines for the models you
 want to use.
 
-```bash 
+```bash
 MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct"
 
 # Install huggingface_cli
@@ -32,7 +32,7 @@ mkdir -p /tmp/models/$MODEL_NAME
 # Create a directory to store the compiled engine
 mkdir -p /tmp/engines/$MODEL_NAME
 
-# Download the model 
+# Download the model
 HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download --local-dir /tmp/models/$MODEL_NAME $MODEL_NAME
 
 # Compile the engine using Optimum-NVIDIA
@@ -69,7 +69,7 @@ docker run \
   -e MODEL=$MODEL_NAME \
   -e PORT=3000 \
   -e HF_TOKEN='hf_XXX' \
-  -v /tmp/engines/$MODEL_NAME:/data \ 
+  -v /tmp/engines/$MODEL_NAME:/data \
   ghcr.io/huggingface/text-generation-inference:latest-trtllm \
   --executor-worker executorWorker \
   --model-id /data/$MODEL_NAME
@@ -78,4 +78,4 @@ docker run \
 ## Development
 
 To develop TRTLLM backend, you can use [dev containers](https://containers.dev/) located in
-`.devcontainer` directory.
+`.devcontainer` directory.
diff --git a/docs/source/multi_backend_support.md b/docs/source/multi_backend_support.md
@@ -1,13 +1,13 @@
 # Multi-backend support
 
 TGI (Text Generation Inference) offers flexibility by supporting multiple backends for serving large language models (LLMs).
-With multi-backend support, you can choose the backend that best suits your needs, 
-whether you prioritize performance, ease of use, or compatibility with specific hardware. API interaction with 
+With multi-backend support, you can choose the backend that best suits your needs,
+whether you prioritize performance, ease of use, or compatibility with specific hardware. API interaction with
 TGI remains consistent across backends, allowing you to switch between them seamlessly.
 
 **Supported backends:**
-* **TGI CUDA backend**: This high-performance backend is optimized for NVIDIA GPUs and serves as the default option 
+* **TGI CUDA backend**: This high-performance backend is optimized for NVIDIA GPUs and serves as the default option
   within TGI. Developed in-house, it boasts numerous optimizations and is used in production by various projects, including those by Hugging Face.
-* **[TGI TRTLLM backend](./backends/trtllm)**: This backend leverages NVIDIA's TensorRT library to accelerate LLM inference. 
-  It utilizes specialized optimizations and custom kernels for enhanced performance. 
-  However, it requires a model-specific compilation step for each GPU architecture.
+* **[TGI TRTLLM backend](./backends/trtllm)**: This backend leverages NVIDIA's TensorRT library to accelerate LLM inference.
+  It utilizes specialized optimizations and custom kernels for enhanced performance.
+  However, it requires a model-specific compilation step for each GPU architecture.
-Original file line number
+Diff line change
@@ Expand Up @@
         }
     };
-    #endif
+    #endif