Skip to content

Commit

Permalink
fix: update lints after rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
drbh committed Dec 13, 2024
1 parent 58e24a3 commit 3aa8263
Show file tree
Hide file tree
Showing 12 changed files with 38 additions and 381 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile_trtllm
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,4 @@ RUN cargo install cargo-chef
COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi

ENV MPI_HOME=/usr/local/mpi
ENV MPI_HOME=/usr/local/mpi
13 changes: 7 additions & 6 deletions backends/trtllm/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ fn build_backend(is_debug: bool, opt_level: &str, out_dir: &PathBuf) -> (PathBuf
}

let mut config = cmake::Config::new(".");
config.uses_cxx11()
config
.uses_cxx11()
.generator("Ninja")
.profile(match is_debug {
true => "Debug",
Expand All @@ -57,12 +58,12 @@ fn build_backend(is_debug: bool, opt_level: &str, out_dir: &PathBuf) -> (PathBuf
.define("TGI_TRTLLM_BACKEND_TARGET_CUDA_ARCH_LIST", cuda_arch_list)
.define("TGI_TRTLLM_BACKEND_TRT_ROOT", tensorrt_path);

// Allow to override which Python to use ...
if let Some(python3) = option_env!("Python3_EXECUTABLE") {
config.define("Python3_EXECUTABLE", python3);
}
// Allow to override which Python to use ...
if let Some(python3) = option_env!("Python3_EXECUTABLE") {
config.define("Python3_EXECUTABLE", python3);
}

config.build();
config.build();

// Additional transitive CMake dependencies
let deps_folder = out_dir.join("build").join("_deps");
Expand Down
2 changes: 1 addition & 1 deletion backends/trtllm/csrc/backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,4 +228,4 @@ struct fmt::formatter<huggingface::tgi::backends::trtllm::sampling_params_t> : f
}
};

#endif
#endif
2 changes: 1 addition & 1 deletion backends/trtllm/csrc/ffi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,4 @@ namespace huggingface::tgi::backends::trtllm {
);
}
}
#endif
#endif
2 changes: 1 addition & 1 deletion backends/trtllm/csrc/hardware.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,4 @@ namespace huggingface::tgi::hardware::cuda {
[[nodiscard]] constexpr bool is_at_least_hopper() const { return is_at_least(HOPPER); }
};
}
#endif
#endif
26 changes: 13 additions & 13 deletions backends/trtllm/src/looper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,16 @@ fn executor_status_looper(
Ok(decoded_token) => {
post_process_decoded_token(&tokenizer, ctx, decoded_token)
}
Err(err) => Err(err)
Err(err) => Err(err),
};

// Attempt to send back the response to the client
if let Err(_) = ctx.streamer.send(response) {
// Client has dropped, remove from tracked requests
debug!("Client dropped - removing request {} from tracked requests", step.request_id);
debug!(
"Client dropped - removing request {} from tracked requests",
step.request_id
);
backend.as_mut().cancel(step.request_id);
let _ = in_flights.remove(&step.request_id);
}
Expand All @@ -160,11 +163,14 @@ fn executor_status_looper(
}
}

fn post_process_decoded_token(tokenizer: &Tokenizer, ctx: &mut GenerationContext, decoded_token: DecodedToken) -> InferResult<InferStreamResponse> {
fn post_process_decoded_token(
tokenizer: &Tokenizer,
ctx: &mut GenerationContext,
decoded_token: DecodedToken,
) -> InferResult<InferStreamResponse> {
match tokenizer.decode(&[decoded_token.id], false) {
Ok(text) => {
let is_special =
tokenizer.get_added_vocabulary().is_special_token(&text);
let is_special = tokenizer.get_added_vocabulary().is_special_token(&text);
let token = Token {
id: decoded_token.id,
text,
Expand All @@ -186,7 +192,7 @@ fn post_process_decoded_token(tokenizer: &Tokenizer, ctx: &mut GenerationContext
let generated_text = GeneratedText {
text: text.unwrap(),
generated_tokens: ctx.tokens.len() as u32,
finish_reason: FinishReason::EndOfSequenceToken, // TODO : Map FinishReason
finish_reason: FinishReason::EndOfSequenceToken, // TODO : Map FinishReason
seed: None,
};

Expand Down Expand Up @@ -248,7 +254,6 @@ unsafe impl Send for TensorRtLlmBackendImpl {}

pub struct TensorRtLlmBackendV2(UnboundedSender<GenerationContext>);


impl TensorRtLlmBackendV2 {
pub fn new<P: AsRef<Path> + Send, PP: AsRef<Path> + Send>(
tokenizer: Tokenizer,
Expand All @@ -268,12 +273,7 @@ impl TensorRtLlmBackendV2 {

// Executor looper is responsible for scheduling and pulling requests state at regular interval
spawn_blocking(move || {
executor_status_looper(
max_inflight_requests,
tokenizer,
backend,
executor_receiver,
)
executor_status_looper(max_inflight_requests, tokenizer, backend, executor_receiver)
});

Ok(TensorRtLlmBackendV2(executor_sender))
Expand Down
Loading

0 comments on commit 3aa8263

Please sign in to comment.