diff --git a/serving/docker/partition/sm_neo_shard.py b/serving/docker/partition/sm_neo_shard.py index 86b6dc0b8..d499022bf 100644 --- a/serving/docker/partition/sm_neo_shard.py +++ b/serving/docker/partition/sm_neo_shard.py @@ -30,7 +30,6 @@ from lmi_dist.arg_utils import VllmEngineArgs from lmi_dist.comms import comms -MODEL_DIR_NAME = "sagemaker-fast-model-loader" CHUNK_MB = 8 @@ -107,7 +106,7 @@ def shard_lmi_dist_model(self, input_dir: str, output_dir: str, ) engine = engine_from_args(engine_args) - model_dir = os.path.join(output_dir, MODEL_DIR_NAME) + model_dir = os.path.join(output_dir, sm_fml.MODEL_DIR_NAME) os.makedirs(model_dir, exist_ok=True) config_for_current_rank = engine.model_runner.vllm_worker.save_chunked_shard(