Skip to content

Commit

Permalink
Merge pull request #5 from apecloud/feature/llm-env
Browse files Browse the repository at this point in the history
feat: llm use env instead of cv
  • Loading branch information
lynnleelhl authored Nov 9, 2023
2 parents 49360a0 + 674180a commit a3618aa
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 131 deletions.
15 changes: 14 additions & 1 deletion addons/llm-cluster/templates/cluster.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
{{- include "kblib.clusterCommon" . }}
apiVersion: apps.kubeblocks.io/v1alpha1
kind: Cluster
metadata:
name: {{ include "kblib.clusterName" . }}
namespace: {{ .Release.Namespace }}
labels: {{ include "kblib.clusterLabels" . | nindent 4 }}
{{ if not .Values.cpuMode }}
annotations:
"kubeblocks.io/extra-env": "{\"MODEL_NAME\":\"{{ .Values.model }}\", \"EXTRA_ARGS\":\"{{ .Values.extraArgs }}\"}"
{{ end }}
spec:
clusterVersionRef: {{ .Values.version }}
terminationPolicy: {{ .Values.extra.terminationPolicy }}
{{- include "kblib.affinity" . | indent 2 }}
{{ if .Values.cpuMode }}
clusterDefinitionRef: ggml # ref clusterDefinition.name
componentSpecs:
Expand Down
16 changes: 16 additions & 0 deletions addons/llm-cluster/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@
"minimum": 0,
"maximum": 64,
"multipleOf": 1
},
"model": {
"title": "Model",
"description": "Model name",
"type": [
"string"
],
"default": "facebook/opt-125m"
},
"extraArgs": {
"title": "extra arguments",
"description": "extra arguments that will be passed to run model",
"type": [
"string"
],
"default": "--trust-remote-code"
}
}
}
6 changes: 5 additions & 1 deletion addons/llm-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,8 @@ memory: 6
##
gpu: 1

cpuMode: false
cpuMode: false

model: "facebook/opt-125m"

extraArgs: "--trust-remote-code"
155 changes: 29 additions & 126 deletions addons/llm/templates/clusterversion.yaml
Original file line number Diff line number Diff line change
@@ -1,27 +1,7 @@
apiVersion: apps.kubeblocks.io/v1alpha1
kind: ClusterVersion
metadata:
name: vllm-baichuan-13b
labels:
{{- include "llm.labels" . | nindent 4 }}
spec:
clusterDefinitionRef: vllm
componentVersions:
- componentDefRef: vllm
versionsContext:
containers:
- name: vllm
image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:{{ default .Chart.AppVersion .Values.image.tag }}
env:
- name: MODEL_NAME
value: baichuan-inc/Baichuan-13B-Chat
- name: EXTRA_ARGS
value: "--trust-remote-code"
---
apiVersion: apps.kubeblocks.io/v1alpha1
kind: ClusterVersion
metadata:
name: vllm-baichuan2-13b
name: vllm-latest
annotations:
kubeblocks.io/is-default-cluster-version: "true"
labels:
Expand All @@ -34,111 +14,6 @@ spec:
containers:
- name: vllm
image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:{{ default .Chart.AppVersion .Values.image.tag }}
env:
- name: MODEL_NAME
value: baichuan-inc/Baichuan2-13B-Chat
- name: EXTRA_ARGS
value: "--trust-remote-code"
---
apiVersion: apps.kubeblocks.io/v1alpha1
kind: ClusterVersion
metadata:
name: vllm-baichuan-7b
labels:
{{- include "llm.labels" . | nindent 4 }}
spec:
clusterDefinitionRef: vllm
componentVersions:
- componentDefRef: vllm
versionsContext:
containers:
- name: vllm
image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:{{ default .Chart.AppVersion .Values.image.tag }}
env:
- name: MODEL_NAME
value: baichuan-inc/Baichuan-7B
- name: EXTRA_ARGS
value: "--trust-remote-code"
---
apiVersion: apps.kubeblocks.io/v1alpha1
kind: ClusterVersion
metadata:
name: vllm-baichuan2-7b
labels:
{{- include "llm.labels" . | nindent 4 }}
spec:
clusterDefinitionRef: vllm
componentVersions:
- componentDefRef: vllm
versionsContext:
containers:
- name: vllm
image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:{{ default .Chart.AppVersion .Values.image.tag }}
env:
- name: MODEL_NAME
value: baichuan-inc/Baichuan2-7B-Chat
- name: EXTRA_ARGS
value: "--trust-remote-code"
---
apiVersion: apps.kubeblocks.io/v1alpha1
kind: ClusterVersion
metadata:
name: vllm-vicuna-13b
labels:
{{- include "llm.labels" . | nindent 4 }}
spec:
clusterDefinitionRef: vllm
componentVersions:
- componentDefRef: vllm
versionsContext:
containers:
- name: vllm
image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:{{ default .Chart.AppVersion .Values.image.tag }}
env:
- name: MODEL_NAME
value: lmsys/vicuna-13b-v1.3
- name: EXTRA_ARGS
value: "--trust-remote-code"
---
apiVersion: apps.kubeblocks.io/v1alpha1
kind: ClusterVersion
metadata:
name: vllm-vicuna-7b
labels:
{{- include "llm.labels" . | nindent 4 }}
spec:
clusterDefinitionRef: vllm
componentVersions:
- componentDefRef: vllm
versionsContext:
containers:
- name: vllm
image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:{{ default .Chart.AppVersion .Values.image.tag }}
env:
- name: MODEL_NAME
value: lmsys/vicuna-7b-v1.5
- name: EXTRA_ARGS
value: "--trust-remote-code"
---
apiVersion: apps.kubeblocks.io/v1alpha1
kind: ClusterVersion
metadata:
name: vllm-llama-70b
labels:
{{- include "llm.labels" . | nindent 4 }}
spec:
clusterDefinitionRef: vllm
componentVersions:
- componentDefRef: vllm
versionsContext:
containers:
- name: vllm
image: {{ .Values.image.registry | default "docker.io" }}/{{ .Values.image.repository}}:{{ default .Chart.AppVersion .Values.image.tag }}
env:
- name: MODEL_NAME
value: upstage/SOLAR-0-70b-16bit
- name: EXTRA_ARGS
value: "--trust-remote-code --swap-space 1"
---
apiVersion: apps.kubeblocks.io/v1alpha1
kind: ClusterVersion
Expand Down Expand Up @@ -281,3 +156,31 @@ spec:
volumeMounts:
- name: models
mountPath: /models
---
apiVersion: apps.kubeblocks.io/v1alpha1
kind: ClusterVersion
metadata:
name: ggml-zephyr-beta-7b-q4
labels:
{{- include "llm.labels" . | nindent 4 }}
spec:
clusterDefinitionRef: ggml
componentVersions:
- componentDefRef: ggml
versionsContext:
initContainers:
- name: download
image: {{ .Values.imageDev.registry | default "docker.io" }}/apecloud/zephyr-7b-beta-gguf:q4_0
command: ["sh", "-c", "cp /models/ggml-model-q4.gguf /models-target/"]
volumeMounts:
- name: models
mountPath: /models-target
containers:
- name: ggml
image: {{ .Values.imageDev.registry | default "docker.io" }}/{{ .Values.imageDev.repository}}:{{ default .Chart.AppVersion .Values.imageDev.tagNew }}
env:
- name: MODEL
value: /models/ggml-model-q4.gguf
volumeMounts:
- name: models
mountPath: /models
10 changes: 9 additions & 1 deletion addons/llm/templates/scripts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ data:
start.sh: |
#!/bin/bash
echo "model=${MODEL_NAME}"
domain=${MODEL_NAME%%/*}
echo "domain=${domain}"
if [[ "${domain,,}" == "qwen" ]]; then
# install dependencies for qwen
pip install tiktoken
fi
country=`curl https://ifconfig.io/country_code`
if [ "$country" == "CN" ]; then
CLONE_MODEL_SCRIPT="git lfs install; git clone https://www.modelscope.cn/${MODEL_NAME}.git"
Expand Down Expand Up @@ -39,14 +45,16 @@ data:
echo "EXTRA_ARGS=${EXTRA_ARGS}"
cd vllm
echo "model=${MODEL_NAME}" > log
# wait for ray start
sleep 3
while true; do
node_num=`ray status | grep "1 node" | wc -l`
# continue waiting if ray status not ok
if [[ "$node_num" -ne "$KB_VLLM_N" ]]; then
sleep 1
continue
fi
python -m vllm.entrypoints.api_server --host 0.0.0.0 --port 8000 --model ${MODEL_NAME} --gpu-memory-utilization 0.95 --max-num-seqs 512 --max-num-batched-tokens 8192 --tensor-parallel-size ${KB_VLLM_N} ${EXTRA_ARGS} 2>&1 > log
python -m vllm.entrypoints.api_server --host 0.0.0.0 --port 8000 --model ${MODEL_NAME} --gpu-memory-utilization 0.95 --max-num-seqs 512 --tensor-parallel-size ${KB_VLLM_N} ${EXTRA_ARGS} 2>&1 > log
code=$?
if [ $code -eq 0 ]; then
break
Expand Down
4 changes: 2 additions & 2 deletions addons/xinference-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ replicas: 1

## @param cpu
##
cpu: 3
cpu: 2

## @param memory, the unit is Gi
##
memory: 7
memory: 6

## @param gpu
##
Expand Down

0 comments on commit a3618aa

Please sign in to comment.