Skip to content

Commit

Permalink
Merge branch 'main' into anna/asynceval
Browse files Browse the repository at this point in the history
  • Loading branch information
aspfohl authored Dec 7, 2023
2 parents ac37d09 + 3909516 commit 535d5fb
Show file tree
Hide file tree
Showing 9 changed files with 1,504 additions and 1,493 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,8 @@ dmypy.json

# notebooks
notebooks/

# artifacts from training
**/*.pt
**/mlruns/*
**/tokenizer-save-dir-*/**
2 changes: 2 additions & 0 deletions mcli/mcli-llama2-finetune.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ parameters:
memory_monitor: {}
runtime_estimator: {}

load_weights_only: true # Only load the weights, not the optimizer state, LR schedule, etc

# loggers:
# wandb: {}

Expand Down
2,946 changes: 1,473 additions & 1,473 deletions scripts/eval/local_data/commonsense_reasoning/siqa.jsonl

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions scripts/train/yamls/finetune/1b_local_data_sft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,4 @@ callbacks:
# Load from remote object store
# REPLACE THE BELOW with you own checkpoint!
load_path: oci://my-bucket/my-folder/mpt-1b/checkpoints/some_checkpoint.pt
load_weights_only: true # Only load the weights, not the optimizer state, LR schedule, etc
1 change: 1 addition & 0 deletions scripts/train/yamls/finetune/7b_dolly_sft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,4 @@ callbacks:
# Load from remote object store
# REPLACE THE BELOW with you own checkpoint!
load_path: oci://my-bucket/my-folder/mpt-7b/checkpoints/some_checkpoint.pt
load_weights_only: true # Only load the weights, not the optimizer state, LR schedule, etc
7 changes: 0 additions & 7 deletions scripts/train/yamls/finetune/mpt-30b-instruct.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,6 @@ callbacks:
# save_interval: 3ep
# save_num_checkpoints_to_keep: 1

# need to use converted checkpoint with llm-foundry code
# load_path:
autoresume: false
load_weights_only: false
python_log_level: debug


icl_max_seq_len: 2048

# YOU MUST ADD YOUR OWN DATASET URIs
Expand Down
1 change: 1 addition & 0 deletions scripts/train/yamls/finetune/mpt-7b_domain_adapt.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,4 @@ save_folder: ./{run_name}/checkpoints
# Load from local filesystem or remote object store
# load_path: ./gpt-7b/checkpoints/latest-rank{rank}.pt
# load_path: s3://my-bucket/my-folder/gpt-7b/checkpoints/latest-rank{rank}.pt
load_weights_only: true # Only load the weights, not the optimizer state, LR schedule, etc
1 change: 1 addition & 0 deletions scripts/train/yamls/finetune/t5-small_dolly_sft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,4 @@ callbacks:
# Load from remote object store
# REPLACE THE BELOW with you own checkpoint!
# load_path: oci://my-bucket/my-folder/checkpoints/some_checkpoint.pt
load_weights_only: true # Only load the weights, not the optimizer state, LR schedule, etc
33 changes: 20 additions & 13 deletions tests/a_scripts/inference/test_convert_composer_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,13 +382,17 @@ def test_huggingface_conversion_callback_interval(
'hf_save_interval,save_interval,max_duration,expected_hf_checkpoints,expected_normal_checkpoints',
[('1ba', '1ba', '1ba', 1, 1)])
@patch('os.cpu_count', MagicMock(return_value=None))
def test_huggingface_conversion_callback(model: str, tmp_path: pathlib.Path,
tie_word_embeddings: bool,
fsdp_state_dict_type: Optional[str],
hf_save_interval: str,
save_interval: str, max_duration: str,
expected_hf_checkpoints: int,
expected_normal_checkpoints: int):
def test_huggingface_conversion_callback(
model: str,
tmp_path: pathlib.Path,
tie_word_embeddings: bool,
fsdp_state_dict_type: Optional[str],
hf_save_interval: str,
save_interval: str,
max_duration: str,
expected_hf_checkpoints: int,
expected_normal_checkpoints: int,
):
delete_transformers_cache()

dist.initialize_dist(get_device('gpu'))
Expand Down Expand Up @@ -580,12 +584,15 @@ def test_huggingface_conversion_callback(model: str, tmp_path: pathlib.Path,
assert len(normal_checkpoints) == expected_normal_checkpoints
assert len(huggingface_checkpoints) == expected_hf_checkpoints

# Load the last huggingface checkpoint
loaded_model = transformers.AutoModelForCausalLM.from_pretrained(
os.path.join(tmp_path, 'checkpoints', 'huggingface',
f'ba{batches_per_epoch}'),
trust_remote_code=True,
)
# Patch flash_attn package to be empty to simulate loading the model in
# an environment without flash atttention installed
with patch.dict('sys.modules', {'flash_attn': None}):
# Load the last huggingface checkpoint
loaded_model = transformers.AutoModelForCausalLM.from_pretrained(
os.path.join(tmp_path, 'checkpoints', 'huggingface',
f'ba{batches_per_epoch}'),
trust_remote_code=True,
)

# Check that the loaded model has the correct precision, and then set it back
# to the original for the equivalence check
Expand Down

0 comments on commit 535d5fb

Please sign in to comment.