Merge pull request #14 from aws-samples/mllm

add m llm support
aws-samples · Sep 20, 2024 · b404712 · b404712
2 parents 29a5bcc + 10af922
commit b404712
Show file tree

Hide file tree

Showing 34 changed files with 1,060 additions and 338 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,4 @@
 [submodule "backend/LLaMA-Factory"]
 	path = backend/LLaMA-Factory
 	url = https://github.com/xiehust/LLaMA-Factory.git
+	branch = main
diff --git a/backend/0.setup-cn.sh b/backend/0.setup-cn.sh
@@ -0,0 +1,56 @@
+
+# 定义要添加的内容
+MIRROR_LINE="-i https://pypi.tuna.tsinghua.edu.cn/simple"
+
+# 处理 backend/requirements.txt
+BACKEND_REQ="/home/ubuntu/llm_model_hub/backend/requirements.txt"
+if [ -f "$BACKEND_REQ" ]; then
+    sed -i "1i$MIRROR_LINE" "$BACKEND_REQ"
+    echo "Added mirror line to $BACKEND_REQ"
+else
+    echo "File $BACKEND_REQ not found"
+fi
+
+# 处理 backend/byoc/requirements.txt
+BACKEND2_REQ="/home/ubuntu/llm_model_hub/backend/byoc/requirements.txt"
+if [ -f "$BACKEND2_REQ" ]; then
+    sed -i "1i$MIRROR_LINE" "$BACKEND2_REQ"
+    echo "Added mirror line to $BACKEND2_REQ"
+    sed -i 's|https://github.com/|https://gitclone.com/github.com/|' "$BACKEND2_REQ"
+else
+    echo "File $BACKEND2_REQ not found"
+fi
+
+
+
+# 处理 backend/LLaMA-Factory/requirements.txt
+LLAMA_REQ="/home/ubuntu/llm_model_hub/backend/LLaMA-Factory/requirements.txt"
+if [ -f "$LLAMA_REQ" ]; then
+    sed -i "1i$MIRROR_LINE" "$LLAMA_REQ"
+    sed -i 's|https://github.com/|https://gitclone.com/github.com/|' "$LLAMA_REQ"
+    echo "Modified $LLAMA_REQ"
+else
+    echo "File $LLAMA_REQ not found"
+fi
+
+# 添加 Docker 配置
+DOCKER_CONFIG="/etc/docker/daemon.json"
+sudo mkdir -p /etc/docker
+sudo tee "$DOCKER_CONFIG" > /dev/null <<EOT
+{ 
+  "registry-mirrors" : 
+    [ 
+      "https://docker.m.daocloud.io", 
+      "https://noohub.ru", 
+      "https://huecker.io",
+      "https://dockerhub.timeweb.cloud" 
+    ] 
+}
+EOT
+echo "Docker configuration added to $DOCKER_CONFIG"
+
+# 重启 Docker 服务
+sudo systemctl restart docker
+echo "Docker service restarted"
+
+echo "Script execution completed."
diff --git a/backend/LLaMA-Factory b/backend/LLaMA-Factory
diff --git a/backend/README.md b/backend/README.md
@@ -1,45 +1,10 @@
 # 后端环境安装
 ## 0. 中国区说明（海外区可以跳过）
-1. 如果在中国区部署，请手动修改python库安装源
-- backend/requirements.txt和
-- backend/LLaMA-Factory/requirements.txt文件
-- 分别在这2个requirements.txt中的第一行添加以下内容
-```
--i https://pypi.tuna.tsinghua.edu.cn/simple
-```
-
-2. 另外，在backend/LLaMA-Factory/requirements.txt文件中
-把原有的  
-```
-unsloth[cu121-torch220] @ git+https://github.com/unslothai/unsloth.git
-```
-替换成： 
-```
-unsloth[cu121-torch220] @ git+https://gitclone.com/github.com/unslothai/unsloth.git
-```
-
-3. 配置Docker中国区镜像
-- 使用vim添加 /etc/docker/daemon.json 文件并添加上下内容
-```bash
-sudo vim /etc/docker/daemon.json 
-```
-```json
-{ 
-  "registry-mirrors" : 
-    [ 
-      "https://docker.m.daocloud.io", 
-      "https://noohub.ru", 
-      "https://huecker.io",
-      "https://dockerhub.timeweb.cloud" 
-    ] 
-}
-```
-修改保存之后，再重启docker服务
+1. 如果在中国区部署，请先执行以下脚本，修改pip源，docker源。
 ```bash
-sudo systemctl restart docker
+bash 0.setup-cn.sh
 ```
 
-
 ## 1.安装后端环境
 1. 进入backend目录,复制env.sample 文件为.env
 ```bash
@@ -67,6 +32,8 @@ db_user=llmdata
 db_password=llmdata
 api_keys=
 HUGGING_FACE_HUB_TOKEN=
+WANDB_API_KEY=
+MAX_MODEL_LEN=4096
 ```
 
 2. 仍然在backend/目录下执行以下命令进行安装

diff --git a/backend/byoc/Dockerfile b/backend/byoc/Dockerfile
@@ -7,6 +7,10 @@ WORKDIR /app
 # 复制当前目录下的内容到容器内的/app
 COPY app/ /app
 
+# 安装新的依赖
+COPY requirements.txt /app
+RUN pip install -r requirements.txt  
+
 # 修改restapi
 RUN \
 export PYTHON_SITEPACKAGES=`python3 -c "import site; print(site.getsitepackages()[0])"`; \

diff --git a/backend/byoc/build_and_push.sh b/backend/byoc/build_and_push.sh
@@ -12,13 +12,18 @@ TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-meta
 # Get the current region and write it to the backend .env file
 region=$(curl -H "X-aws-ec2-metadata-token: $TOKEN" -s http://169.254.169.254/latest/meta-data/placement/region)
 # region=$(aws configure get region)
+suffix="com"
+
+if [[ "$region" == cn*  ]]; then
+    suffix="com.cn"
+fi
 
 # Get the account number associated with the current IAM credentials
 account=$(aws sts  get-caller-identity --query Account --output text)
 
-VLLM_VERSION=v0.5.5
+VLLM_VERSION=v0.6.1.post2
 inference_image=sagemaker_endpoint/vllm
-inference_fullname=${account}.dkr.ecr.${region}.amazonaws.com/${inference_image}:${VLLM_VERSION}
+inference_fullname=${account}.dkr.ecr.${region}.amazonaws.${suffix}/${inference_image}:${VLLM_VERSION}
 
 # If the repository doesn't exist in ECR, create it.
 aws  ecr describe-repositories --repository-names "${inference_image}" --region ${region} || aws ecr create-repository --repository-name "${inference_image}" --region ${region}
@@ -29,7 +34,7 @@ then
 fi
 
 # Get the login command from ECR and execute it directly
-aws  ecr get-login-password --region $region | docker login --username AWS --password-stdin $account.dkr.ecr.$region.amazonaws.com
+aws  ecr get-login-password --region $region | docker login --username AWS --password-stdin $account.dkr.ecr.$region.amazonaws.${suffix}
 
 aws ecr set-repository-policy \
     --repository-name "${inference_image}" \

diff --git a/backend/byoc/requirements.txt b/backend/byoc/requirements.txt
@@ -0,0 +1 @@
+transformers @ git+https://github.com/huggingface/transformers.git@21fac7abba2a37fae86106f87fcf9974fd1e3830
diff --git a/backend/db_management/database.py b/backend/db_management/database.py
@@ -145,14 +145,14 @@ def create_endpoint(self,
                                model_s3_path:str,
                                endpoint_name:str,
                                instance_type:str,
+                               instance_count:int,
                                endpoint_create_time:str,
                                endpoint_delete_time:str,
                                extra_config:str,
                                engine:str,
                                enable_lora:bool,
                                endpoint_status:EndpointStatus):
         ret = True
-        instance_count = 1
         try:
             with self.connection_pool.get_connection() as connection:
                 with connection.cursor() as cursor:

diff --git a/backend/env.sample b/backend/env.sample
@@ -10,6 +10,4 @@ db_password=llmdata
 api_keys=123456
 HUGGING_FACE_HUB_TOKEN=
 WANDB_API_KEY=
-vllm_image=434444145045.dkr.ecr.us-east-1.amazonaws.com/sagemaker_endpoint/vllm:v0.5.5
-model_artifact=s3://sagemaker-us-east-1-434444145045/sagemaker_endpoint/vllm//model.tar.gz
-MAX_MODEL_LEN=12288
+MAX_MODEL_LEN=4096
diff --git a/backend/inference/endpoint_management.py b/backend/inference/endpoint_management.py
@@ -134,8 +134,10 @@ def register_cust_model(cust_repo_type:DownloadSource,cust_repo_addr:str):
     #register_model_group会改变以下2个对象，需要持久化保存，服务器重启之后仍然能加载
     with open(SUPPORTED_MODELS_FILE, 'wb') as f:
         pickle.dump(SUPPORTED_MODELS, f)
-    with open(DEFAULT_TEMPLATE_FILE, 'wb') as f:
-        pickle.dump(DEFAULT_TEMPLATE, f)
+
+    # 只有输入了模板才有用，暂时不需要此代码
+    # with open(DEFAULT_TEMPLATE_FILE, 'wb') as f:
+    #     pickle.dump(DEFAULT_TEMPLATE, f)
 
 def get_auto_tensor_parallel_size(instance_type:str) -> int:
     return instance_gpus_map.get(instance_type, 1)
@@ -184,6 +186,7 @@ def deploy_endpoint_byoc(job_id:str,engine:str,instance_type:str,quantize:str,en
     create_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 
     endpoint_name = sagemaker.utils.name_from_base(pure_model_name).replace('.','-').replace('_','-')
+    instance_count = int(extra_params.get("instance_count",1))
 
     # Create the SageMaker Model object. In this example we let LMI configure the deployment settings based on the model architecture  
     model = Model(
@@ -197,7 +200,7 @@ def deploy_endpoint_byoc(job_id:str,engine:str,instance_type:str,quantize:str,en
     try:
         model.deploy(
             instance_type= instance_type,
-            initial_instance_count=1,
+            initial_instance_count= instance_count,
             endpoint_name=endpoint_name,
             wait=False,
             accept_eula=True,
@@ -207,6 +210,7 @@ def deploy_endpoint_byoc(job_id:str,engine:str,instance_type:str,quantize:str,en
                                  model_name= model_name,
                                  model_s3_path= model_path,
                                  instance_type= instance_type,
+                                 instance_count = instance_count,
                                  endpoint_name= endpoint_name,
                                  endpoint_create_time= create_time,
                                  endpoint_delete_time= None,
@@ -223,7 +227,7 @@ def deploy_endpoint_byoc(job_id:str,engine:str,instance_type:str,quantize:str,en
     return True,endpoint_name
 
 # 如果job_id="",则使用model_name原始模型
-def deploy_endpoint(job_id:str,engine:str,instance_type:str,quantize:str,enable_lora:bool,model_name:str,cust_repo_type:str,cust_repo_addr:str) -> Dict[bool,str]:
+def deploy_endpoint(job_id:str,engine:str,instance_type:str,quantize:str,enable_lora:bool,model_name:str,cust_repo_type:str,cust_repo_addr:str,extra_params:Dict[str,Any]) -> Dict[bool,str]:
      #统一处理成repo/modelname格式
     repo_type = DownloadSource.MODELSCOPE  if DEFAULT_REGION.startswith('cn') else DownloadSource.DEFAULT
     model_name=get_model_path_by_name(model_name,repo_type) if model_name and len(model_name.split('/')) < 2 else model_name
@@ -302,7 +306,7 @@ def deploy_endpoint(job_id:str,engine:str,instance_type:str,quantize:str,enable_
     create_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
     pure_model_name = model_name.split('/')[1]
     endpoint_name = sagemaker.utils.name_from_base(pure_model_name).replace('.','-').replace('_','-')
-
+    instance_count = int(extra_params.get("instance_count",1))
     # Create the SageMaker Model object. In this example we let LMI configure the deployment settings based on the model architecture  
     model = Model(
             image_uri=lmi_image_uri,
@@ -314,7 +318,7 @@ def deploy_endpoint(job_id:str,engine:str,instance_type:str,quantize:str,enable_
     try:
         model.deploy(
             instance_type= instance_type,
-            initial_instance_count=1,
+            initial_instance_count=instance_count,
             endpoint_name=endpoint_name,
             wait=False,
             accept_eula=True,
@@ -324,6 +328,7 @@ def deploy_endpoint(job_id:str,engine:str,instance_type:str,quantize:str,enable_
                                  model_name= model_name,
                                  model_s3_path= model_path,
                                  instance_type= instance_type,
+                                 instance_count = instance_count,
                                  endpoint_name= endpoint_name,
                                  endpoint_create_time= create_time,
                                  endpoint_delete_time= None,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		transformers @ git+https://github.com/huggingface/transformers.git@21fac7abba2a37fae86106f87fcf9974fd1e3830