forked from mosaicml/llm-foundry
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mcli-1b-eval.yaml
59 lines (51 loc) · 1.63 KB
/
mcli-1b-eval.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.4.0
# git_commit: # OR use your commit hash
pip_install: -e .[gpu]
ssh_clone: false # Should be true if using a private repo
command: |
cd llm-foundry/scripts/
composer eval/eval.py /mnt/config/parameters.yaml
image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
name: mpt-1b-eval
compute:
gpus: 8 # Number of GPUs to use
## These configurations are optional
# cluster: TODO # Name of the cluster to use for this run
# gpu_type: a100_80gb # Type of GPU to use. We use a100_80gb in our experiments
# The below is injected as a YAML file: /mnt/config/parameters.yaml
parameters:
run_name: # If left blank, will be read from top YAML name for W&B logging and checkpointing
seed: 1
max_seq_len: 1024
models:
-
model_name: mpt1b
tokenizer:
name: EleutherAI/gpt-neox-20b
kwargs:
model_max_length: ${max_seq_len}
model:
name: mpt_causal_lm
init_device: mixed
d_model: 2048
n_heads: 16 # Modified 24->16 so that d_head == 128 to satisfy FlashAttention
n_layers: 24
expansion_ratio: 4
max_seq_len: ${max_seq_len}
vocab_size: 50368
attn_config:
attn_impl: triton
load_path: # Add your (non-optional) Composer checkpoint path here!
device_eval_batch_size: 4
precision: amp_fp16
# FSDP config for model sharding
fsdp_config:
sharding_strategy: FULL_SHARD
mixed_precision: FULL
forward_prefetch: True
limit_all_gathers: True
icl_tasks: 'eval/yamls/tasks_v0.2.yaml'
eval_gauntlet: 'eval/yamls/eval_gauntlet_v0.2.yaml'