forked from skypilot-org/skypilot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaxolotl-spot.yaml
33 lines (25 loc) · 872 Bytes
/
axolotl-spot.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Usage:
#
# Unmanaged spot (no auto-recovery; for debugging):
# HF_TOKEN=abc BUCKET=<unique-name> sky launch -c axolotl-spot axolotl-spot.yaml --env HF_TOKEN --env BUCKET -i30 --down
#
# Managed spot (auto-recovery; for full runs):
# HF_TOKEN=abc BUCKET=<unique-name> sky spot launch -n axolotl-spot axolotl-spot.yaml --env HF_TOKEN --env BUCKET
name: axolotl
resources:
accelerators: A100:1
cloud: gcp # optional
use_spot: True
image_id: docker:winglian/axolotl:main-py3.10-cu118-2.0.1
workdir: mistral
file_mounts:
/sky-notebook:
name: ${BUCKET}
mode: MOUNT
run: |
huggingface-cli login --token ${HF_TOKEN}
accelerate launch -m axolotl.cli.train qlora-checkpoint.yaml
envs:
HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass.
BUCKET: # TODO: Fill with your unique bucket name, or use --env to pass.
4