forked from caikit/caikit-nlp
-
Notifications
You must be signed in to change notification settings - Fork 1
/
runtime_config.yaml
61 lines (57 loc) · 1.88 KB
/
runtime_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# its contents configure the TGIS server & caikit
jvm_options: []
runtime:
library: caikit_nlp
lazy_load_local_models: true
batching:
standalone-model:
size: 0 # Set to batch size for batching
model_management:
finders:
default:
type: LOCAL
remote_tgis:
type: TGIS-AUTO
config:
test_connection: true
initializers:
default:
type: LOCAL
config:
backend_priority:
- type: TGIS
config:
local:
load_timeout: 120
grpc_port: null
http_port: null
health_poll_delay: 1.0
remote_models:
flan-t5-xl:
hostname: localhost:8033
prompt_dir: tgis_prompts
llama-70b:
hostname: localhost:8034
prompt_dir: tgis_prompts
connection:
hostname: "foo.{model_id}:1234"
ca_cert_file: null
client_cert_file: null
client_key_file: null
# Config used only in EmbeddingModule. Set here or use env vars like EMBEDDING_RETRIES=32
embedding:
# Number of times to retry on error. Most deployments should use 0 retries.
retries: 0
# Batch size for encode() if <= 0 or invalid, the sentence-transformers default is used
batch_size: 0
# Should implicit truncation (with truncate_input_tokens=0) throw error for truncation (default) or disable this
implicit_truncation_errors: true
# Attempt to optimize with PyTorch compile()
pt2_compile: false
# Use IPEX optimize. Works best when used with autocast (bfloat16) below.
ipex: false
# Use autocast in encode with its default dtype (bfloat16)
autocast: false
# For testing, set device to "mps" on MacOS or "xpu" for IPEX GPU.
# Otherwise, the default does automatic checks for cuda GPU (else cpu).
device: ""