From 562bf3c7ea28f0416b8b361f2b7226dcd6d0bcd9 Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Mon, 16 Dec 2024 21:09:48 +0000
Subject: [PATCH] fix: lint backend and doc files

---
 .devcontainer/Dockerfile_trtllm         |  2 +-
 backends/trtllm/csrc/backend.hpp        |  2 +-
 backends/trtllm/csrc/ffi.hpp            |  2 +-
 backends/trtllm/csrc/hardware.hpp       |  2 +-
 backends/trtllm/tests/test_backend.cpp  |  2 +-
 backends/trtllm/tests/test_hardware.cpp |  2 +-
 docs/source/backends/trtllm.md          |  8 ++++----
 docs/source/multi_backend_support.md    | 12 ++++++------
 8 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/.devcontainer/Dockerfile_trtllm b/.devcontainer/Dockerfile_trtllm
index 21b7114ce03..239a7bf8c2c 100644
--- a/.devcontainer/Dockerfile_trtllm
+++ b/.devcontainer/Dockerfile_trtllm
@@ -72,4 +72,4 @@ RUN cargo install cargo-chef
 COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
 COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
 
-ENV MPI_HOME=/usr/local/mpi
\ No newline at end of file
+ENV MPI_HOME=/usr/local/mpi
diff --git a/backends/trtllm/csrc/backend.hpp b/backends/trtllm/csrc/backend.hpp
index f49c437a2b2..40b44a842b3 100644
--- a/backends/trtllm/csrc/backend.hpp
+++ b/backends/trtllm/csrc/backend.hpp
@@ -228,4 +228,4 @@ struct fmt::formatter<huggingface::tgi::backends::trtllm::sampling_params_t> : f
     }
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/backends/trtllm/csrc/ffi.hpp b/backends/trtllm/csrc/ffi.hpp
index de2333afe37..d0342d4bb38 100644
--- a/backends/trtllm/csrc/ffi.hpp
+++ b/backends/trtllm/csrc/ffi.hpp
@@ -159,4 +159,4 @@ namespace huggingface::tgi::backends::trtllm {
         );
     }
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/backends/trtllm/csrc/hardware.hpp b/backends/trtllm/csrc/hardware.hpp
index 8e5fa696dbb..abfb4afd51d 100644
--- a/backends/trtllm/csrc/hardware.hpp
+++ b/backends/trtllm/csrc/hardware.hpp
@@ -78,4 +78,4 @@ namespace huggingface::tgi::hardware::cuda {
         [[nodiscard]] constexpr bool is_at_least_hopper() const { return is_at_least(HOPPER); }
     };
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/backends/trtllm/tests/test_backend.cpp b/backends/trtllm/tests/test_backend.cpp
index ae097405bc4..14d92b75434 100644
--- a/backends/trtllm/tests/test_backend.cpp
+++ b/backends/trtllm/tests/test_backend.cpp
@@ -149,4 +149,4 @@ TEST_CASE("sampling_params_t to tle::SamplingConfig", "[backend_t]")
 
     REQUIRE(config.getTemperature().has_value());
     REQUIRE_THAT(*config.getTemperature(), Catch::Matchers::WithinAbs(params.temperature, 1e-6f));
-}
\ No newline at end of file
+}
diff --git a/backends/trtllm/tests/test_hardware.cpp b/backends/trtllm/tests/test_hardware.cpp
index 4cb7b562087..e14f1f357f4 100644
--- a/backends/trtllm/tests/test_hardware.cpp
+++ b/backends/trtllm/tests/test_hardware.cpp
@@ -79,4 +79,4 @@ TEST_CASE("is_at_least") {
     REQUIRE(HOPPER_CAPABILITIES.is_at_least(AMPERE));
     REQUIRE(HOPPER_CAPABILITIES.is_at_least(ADA_LOVELACE));
     REQUIRE(HOPPER_CAPABILITIES.is_at_least(HOPPER));
-}
\ No newline at end of file
+}
diff --git a/docs/source/backends/trtllm.md b/docs/source/backends/trtllm.md
index 8eb37180c7d..be6416b15e5 100644
--- a/docs/source/backends/trtllm.md
+++ b/docs/source/backends/trtllm.md
@@ -17,7 +17,7 @@ supported.
 You can use [Optimum-NVIDIA](https://github.com/huggingface/optimum-nvidia) to compile engines for the models you
 want to use.
 
-```bash 
+```bash
 MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct"
 
 # Install huggingface_cli
@@ -32,7 +32,7 @@ mkdir -p /tmp/models/$MODEL_NAME
 # Create a directory to store the compiled engine
 mkdir -p /tmp/engines/$MODEL_NAME
 
-# Download the model 
+# Download the model
 HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download --local-dir /tmp/models/$MODEL_NAME $MODEL_NAME
 
 # Compile the engine using Optimum-NVIDIA
@@ -69,7 +69,7 @@ docker run \
   -e MODEL=$MODEL_NAME \
   -e PORT=3000 \
   -e HF_TOKEN='hf_XXX' \
-  -v /tmp/engines/$MODEL_NAME:/data \ 
+  -v /tmp/engines/$MODEL_NAME:/data \
   ghcr.io/huggingface/text-generation-inference:latest-trtllm \
   --executor-worker executorWorker \
   --model-id /data/$MODEL_NAME
@@ -78,4 +78,4 @@ docker run \
 ## Development
 
 To develop TRTLLM backend, you can use [dev containers](https://containers.dev/) located in
-`.devcontainer` directory.
\ No newline at end of file
+`.devcontainer` directory.
diff --git a/docs/source/multi_backend_support.md b/docs/source/multi_backend_support.md
index 5899e4b77d4..c4df15bc2ca 100644
--- a/docs/source/multi_backend_support.md
+++ b/docs/source/multi_backend_support.md
@@ -1,13 +1,13 @@
 # Multi-backend support
 
 TGI (Text Generation Inference) offers flexibility by supporting multiple backends for serving large language models (LLMs).
-With multi-backend support, you can choose the backend that best suits your needs, 
-whether you prioritize performance, ease of use, or compatibility with specific hardware. API interaction with 
+With multi-backend support, you can choose the backend that best suits your needs,
+whether you prioritize performance, ease of use, or compatibility with specific hardware. API interaction with
 TGI remains consistent across backends, allowing you to switch between them seamlessly.
 
 **Supported backends:**
-* **TGI CUDA backend**: This high-performance backend is optimized for NVIDIA GPUs and serves as the default option 
+* **TGI CUDA backend**: This high-performance backend is optimized for NVIDIA GPUs and serves as the default option
   within TGI. Developed in-house, it boasts numerous optimizations and is used in production by various projects, including those by Hugging Face.
-* **[TGI TRTLLM backend](./backends/trtllm)**: This backend leverages NVIDIA's TensorRT library to accelerate LLM inference. 
-  It utilizes specialized optimizations and custom kernels for enhanced performance. 
-  However, it requires a model-specific compilation step for each GPU architecture.
\ No newline at end of file
+* **[TGI TRTLLM backend](./backends/trtllm)**: This backend leverages NVIDIA's TensorRT library to accelerate LLM inference.
+  It utilizes specialized optimizations and custom kernels for enhanced performance.
+  However, it requires a model-specific compilation step for each GPU architecture.