Skip to content

Commit

Permalink
fix: better error handing (#65)
Browse files Browse the repository at this point in the history
Co-authored-by: sangjanai <[email protected]>
  • Loading branch information
vansangpfiev and sangjanai authored Jul 29, 2024
1 parent 9d83224 commit 9311ae8
Showing 1 changed file with 28 additions and 1 deletion.
29 changes: 28 additions & 1 deletion cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,34 @@ void TensorrtllmEngine::LoadModel(std::shared_ptr<Json::Value> json_body, std::f

// Init gpt_session
auto model_path = model_dir / json.engineFilename(world_config, model_id_);
gpt_session = std::make_unique<GptSession>(session_config_, *model_config_, world_config, model_path.string(), logger_);
try {
gpt_session = std::make_unique<GptSession>(session_config_, *model_config_, world_config, model_path.string(), logger_);
} catch(const std::exception& e) {
LOG_ERROR << "Failed to load model: " << e.what();
LOG_INFO << "Retry once with smaller maxSequenceLength";
gpt_session.reset();
// Retry again with smaller maxSequenceLength once
session_config_.maxSequenceLength /= 2;
try {
gpt_session = std::make_unique<GptSession>(session_config_, *model_config_, world_config, model_path.string(), logger_);
} catch(const std::exception& e) {
LOG_ERROR << "Failed to load model: " << e.what();
gpt_session.reset();
cortex_tokenizer.reset();
q_.reset();
model_config_.reset();
logger_.reset();
Json::Value json_resp;
json_resp["message"] = "Failed to load model";
Json::Value status;
status["is_done"] = false;
status["has_error"] = true;
status["is_stream"] = false;
status["status_code"] = k500InternalServerError;
callback(std::move(status), std::move(json_resp));
return;
}
}

model_loaded_ = true;
if (q_ == nullptr) {
Expand Down

0 comments on commit 9311ae8

Please sign in to comment.