-
Notifications
You must be signed in to change notification settings - Fork 29
/
zarf-config.yaml
39 lines (38 loc) · 1.25 KB
/
zarf-config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
package:
create:
set:
# x-release-please-start-version
image_version: "0.14.0"
# x-release-please-end
model_repo_id: "TheBloke/Synthia-7B-v2.0-GPTQ"
model_revision: "gptq-4bit-32g-actorder_True"
model_path: "/data/.model/"
name_override: "vllm"
deploy:
set:
# vLLM runtime configuration (usually influenced by .env in local development)
trust_remote_code: "True"
tensor_parallel_size: "1"
enforce_eager: "False"
gpu_memory_utilization: "0.90"
worker_use_ray: "True"
engine_use_ray: "True"
quantization: "None"
load_format: "auto"
# LeapfrogAI SDK runtime configuration (usually influenced by config.yaml in development)
max_context_length: "32768"
stop_tokens: "</s>, <|im_end|>, <|endoftext|>"
prompt_format_chat_system: "SYSTEM: {}\n"
prompt_format_chat_user: "USER: {}\n"
prompt_format_chat_assistant: "ASSISTANT: {}\n"
temperature: "0.1"
top_p: "1.0"
top_k: "0"
repetition_penalty: "1.0"
max_new_tokens: "8192"
# Pod deployment configuration
gpu_limit: "1"
gpu_runtime: "nvidia"
pvc_size: "15Gi"
pvc_access_mode: "ReadWriteOnce"
pvc_storage_class: "local-path"