diff --git a/.github/workflows/regression_yamls/eval-7b-composer.yaml b/.github/workflows/regression_yamls/eval-7b-composer.yaml deleted file mode 100644 index 0ed3ff9c26..0000000000 --- a/.github/workflows/regression_yamls/eval-7b-composer.yaml +++ /dev/null @@ -1,46 +0,0 @@ -integrations: -- integration_type: git_repo - git_repo: mosaicml/llm-foundry - git_branch: main - pip_install: -e .[gpu] - -command: | - cd llm-foundry/scripts/ - composer eval/eval.py /mnt/config/parameters.yaml -image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04 -name: mpt-7b-composer-eval-regression - -compute: - gpus: 8 - -parameters: - run_name: mpt-7b-composer-eval-regression - seed: 1 - max_seq_len: 1024 - - models: - - - model_name: mosaicml/mpt-7b - model: - name: hf_causal_lm - pretrained_model_name_or_path: mosaicml/mpt-7b - init_device: cpu - pretrained: true - tokenizer: - name: mosaicml/mpt-7b - kwargs: - model_max_length: ${max_seq_len} - load_path: 'FILL IN' - - device_eval_batch_size: 4 - precision: amp_fp16 - - fsdp_config: - sharding_strategy: FULL_SHARD - mixed_precision: FULL - forward_prefetch: True - limit_all_gathers: True - - icl_subset_num_batches: 20 - icl_tasks: 'eval/yamls/tasks.yaml' - eval_gauntlet: 'eval/yamls/eval_gauntlet.yaml' diff --git a/.github/workflows/regression_yamls/eval-7b-hf.yaml b/.github/workflows/regression_yamls/eval-7b-hf.yaml deleted file mode 100644 index e80d54723f..0000000000 --- a/.github/workflows/regression_yamls/eval-7b-hf.yaml +++ /dev/null @@ -1,45 +0,0 @@ -integrations: -- integration_type: git_repo - git_repo: mosaicml/llm-foundry - git_branch: main - pip_install: -e .[gpu] - -command: | - cd llm-foundry/scripts/ - composer eval/eval.py /mnt/config/parameters.yaml -image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04 -name: mpt-7b-hf-eval-regression - -compute: - gpus: 8 - -parameters: - run_name: mpt-7b-hf-eval-regression - seed: 1 - max_seq_len: 1024 - - models: - - - model_name: mosaicml/mpt-7b - model: - name: hf_causal_lm - pretrained_model_name_or_path: mosaicml/mpt-7b - init_device: cpu - pretrained: true - tokenizer: - name: mosaicml/mpt-7b - kwargs: - model_max_length: ${max_seq_len} - - device_eval_batch_size: 4 - precision: amp_fp16 - - fsdp_config: - sharding_strategy: FULL_SHARD - mixed_precision: FULL - forward_prefetch: True - limit_all_gathers: True - - icl_subset_num_batches: 20 - icl_tasks: 'eval/yamls/tasks.yaml' - eval_gauntlet: 'eval/yamls/eval_gauntlet.yaml' diff --git a/.github/workflows/regression_yamls/llama2-finetune.yaml b/.github/workflows/regression_yamls/llama2-finetune.yaml deleted file mode 100644 index 9e20a86b3e..0000000000 --- a/.github/workflows/regression_yamls/llama2-finetune.yaml +++ /dev/null @@ -1,121 +0,0 @@ -integrations: -- integration_type: git_repo - git_repo: mosaicml/llm-foundry - git_branch: main - pip_install: -e .[gpu] - -command: | - cd llm-foundry/scripts - composer train/train.py /mnt/config/parameters.yaml - python inference/convert_composer_to_hf.py \ - --composer_path ./llama2-finetune-regression/checkpoints/latest-rank0.pt \ - --hf_output_path ./hf-checkpoints/ -image: mosaicml/llm-foundry:1.13.1_cu117-latest -name: llama2-finetune-regression - -compute: - gpus: 8 - -parameters: - tokenizer_name: meta-llama/Llama-2-13b-hf - max_seq_len: 4096 - global_seed: 17 - - run_name: llama2-finetune-regression - max_split_size_mb: 512 - - model: - name: hf_causal_lm - init_device: mixed - pretrained_model_name_or_path: meta-llama/Llama-2-13b-hf - pretrained: true - use_auth_token: true - attention_patch_type: triton - - tokenizer: - name: ${tokenizer_name} - kwargs: - model_max_length: ${max_seq_len} - - train_loader: - name: finetuning - dataset: - hf_name: mosaicml/dolly_hhrlhf - split: train - max_seq_len: ${max_seq_len} - allow_pad_trimming: false - decoder_only_format: true - shuffle: true - drop_last: true - num_workers: 8 - pin_memory: false - prefetch_factor: 2 - persistent_workers: true - timeout: 0 - - eval_loader: - name: finetuning - dataset: - hf_name: mosaicml/dolly_hhrlhf - split: test - max_seq_len: ${max_seq_len} - allow_pad_trimming: false - decoder_only_format: true - shuffle: false - drop_last: true - num_workers: 8 - pin_memory: false - prefetch_factor: 2 - persistent_workers: true - timeout: 0 - - scheduler: - name: cosine_with_warmup - t_warmup: 100ba - alpha_f: 0.1 - - optimizer: - name: decoupled_lionw - lr: 5.0e-7 - betas: - - 0.9 - - 0.95 - weight_decay: 0.0 - - algorithms: - gradient_clipping: - clipping_type: norm - clipping_threshold: 1.0 - - max_duration: 50ba - eval_first: false - eval_interval: 1ep - eval_subset_num_batches: -1 - global_train_batch_size: 64 - - seed: ${global_seed} - device_eval_batch_size: 8 - device_train_microbatch_size: 8 - precision: amp_bf16 - - fsdp_config: - sharding_strategy: FULL_SHARD - mixed_precision: PURE - activation_checkpointing: true - activation_checkpointing_reentrant: false - activation_cpu_offload: false - limit_all_gathers: true - verbose: false - - progress_bar: false - log_to_console: true - console_log_interval: 1ba - - callbacks: - speed_monitor: - window_size: 10 - lr_monitor: {} - memory_monitor: {} - runtime_estimator: {} - - save_folder: ./{run_name}/checkpoints diff --git a/.github/workflows/regression_yamls/mpt-125m-chinchilla.yaml b/.github/workflows/regression_yamls/mpt-125m-chinchilla.yaml deleted file mode 100644 index 6db157ace8..0000000000 --- a/.github/workflows/regression_yamls/mpt-125m-chinchilla.yaml +++ /dev/null @@ -1,122 +0,0 @@ -integrations: -- integration_type: git_repo - git_repo: mosaicml/llm-foundry - git_branch: main - pip_install: -e .[gpu] - -command: | - cd llm-foundry/scripts - python data_prep/convert_dataset_hf.py \ - --dataset c4 --data_subset en \ - --out_root ./my-copy-c4 --splits train val \ - --concat_tokens 2048 --tokenizer EleutherAI/gpt-neox-20b --eos_text '<|endoftext|>' - composer train/train.py /mnt/config/parameters.yaml - python inference/convert_composer_to_hf.py \ - --composer_path ./mpt-125m-chinchilla-regression/checkpoints/latest-rank0.pt \ - --hf_output_path ./hf-checkpoints/ -image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04 -name: mpt-125m-chinchilla-regression - -compute: - gpus: 8 - -parameters: - run_name: mpt-125m-chinchilla-regression - data_local: ./my-copy-c4 - data_remote: - max_seq_len: 2048 - global_seed: 17 - - model: - name: mpt_causal_lm - init_device: meta - d_model: 768 - n_heads: 12 - n_layers: 12 - expansion_ratio: 4 - max_seq_len: ${max_seq_len} - vocab_size: 50368 - attn_config: - attn_impl: triton - - tokenizer: - name: EleutherAI/gpt-neox-20b - kwargs: - model_max_length: ${max_seq_len} - - train_loader: - name: text - dataset: - local: ${data_local} - remote: ${data_remote} - split: train - shuffle: true - max_seq_len: ${max_seq_len} - shuffle_seed: ${global_seed} - drop_last: true - num_workers: 8 - - eval_loader: - name: text - dataset: - local: ${data_local} - remote: ${data_remote} - split: val - shuffle: false - max_seq_len: ${max_seq_len} - shuffle_seed: ${global_seed} - drop_last: false - num_workers: 8 - - scheduler: - name: cosine_with_warmup - t_warmup: 100ba - alpha_f: 0.1 - - optimizer: - name: decoupled_adamw - lr: 6.0e-4 - betas: - - 0.9 - - 0.95 - eps: 1.0e-08 - weight_decay: 0.0 - - algorithms: - gradient_clipping: - clipping_type: norm - clipping_threshold: 1.0 - - max_duration: 4800ba - eval_interval: 500ba - eval_first: false - eval_subset_num_batches: -1 - global_train_batch_size: 256 - - seed: ${global_seed} - device_eval_batch_size: 16 - device_train_microbatch_size: auto - precision: amp_bf16 - - fsdp_config: - sharding_strategy: FULL_SHARD - mixed_precision: PURE - activation_checkpointing: false - activation_checkpointing_reentrant: false - activation_cpu_offload: false - limit_all_gathers: true - verbose: false - - progress_bar: false - log_to_console: true - console_log_interval: 1ba - - callbacks: - speed_monitor: - window_size: 10 - lr_monitor: {} - memory_monitor: {} - runtime_estimator: {} - - save_interval: 500ba - save_folder: ./{run_name}/checkpoints diff --git a/.github/workflows/regression_yamls/mpt-125m-sharded-resumption.yaml b/.github/workflows/regression_yamls/mpt-125m-sharded-resumption.yaml deleted file mode 100644 index 96122c0218..0000000000 --- a/.github/workflows/regression_yamls/mpt-125m-sharded-resumption.yaml +++ /dev/null @@ -1,129 +0,0 @@ -integrations: -- integration_type: git_repo - git_repo: mosaicml/llm-foundry - git_branch: main - pip_install: -e .[gpu] - -command: | - cd llm-foundry/scripts - python data_prep/convert_dataset_hf.py \ - --dataset c4 --data_subset en \ - --out_root ./my-copy-c4 --splits train_small val_small \ - --concat_tokens 2048 --tokenizer EleutherAI/gpt-neox-20b --eos_text '<|endoftext|>' - composer train/train.py /mnt/config/parameters.yaml \ - max_duration=10ba \ - train_loader.dataset.split=train_small \ - eval_loader.dataset.split=val_small - composer train/train.py /mnt/config/parameters.yaml \ - max_duration=20ba \ - autoresume=true \ - train_loader.dataset.split=train_small \ - eval_loader.dataset.split=val_small - -image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04 -name: mpt-125m-sharded-regression - -compute: - gpus: 8 - -parameters: - run_name: mpt-125m-sharded-regression - data_local: ./my-copy-c4 - data_remote: - max_seq_len: 2048 - global_seed: 17 - - model: - name: mpt_causal_lm - init_device: meta - d_model: 768 - n_heads: 12 - n_layers: 12 - expansion_ratio: 4 - max_seq_len: ${max_seq_len} - vocab_size: 50368 - attn_config: - attn_impl: triton - - tokenizer: - name: EleutherAI/gpt-neox-20b - kwargs: - model_max_length: ${max_seq_len} - - train_loader: - name: text - dataset: - local: ${data_local} - remote: ${data_remote} - split: train - shuffle: true - max_seq_len: ${max_seq_len} - shuffle_seed: ${global_seed} - drop_last: true - num_workers: 8 - - eval_loader: - name: text - dataset: - local: ${data_local} - remote: ${data_remote} - split: val - shuffle: false - max_seq_len: ${max_seq_len} - shuffle_seed: ${global_seed} - drop_last: false - num_workers: 8 - - scheduler: - name: cosine_with_warmup - t_warmup: 100ba - alpha_f: 0.1 - - optimizer: - name: decoupled_adamw - lr: 6.0e-4 - betas: - - 0.9 - - 0.95 - eps: 1.0e-08 - weight_decay: 0.0 - - algorithms: - gradient_clipping: - clipping_type: norm - clipping_threshold: 1.0 - - max_duration: 4800ba - eval_interval: 500ba - eval_first: false - eval_subset_num_batches: -1 - global_train_batch_size: 256 - - seed: ${global_seed} - device_eval_batch_size: 16 - device_train_microbatch_size: auto - precision: amp_bf16 - - fsdp_config: - sharding_strategy: FULL_SHARD - mixed_precision: PURE - activation_checkpointing: false - activation_checkpointing_reentrant: false - activation_cpu_offload: false - limit_all_gathers: true - verbose: false - state_dict_type: sharded - - progress_bar: false - log_to_console: true - console_log_interval: 1ba - - callbacks: - speed_monitor: - window_size: 10 - lr_monitor: {} - memory_monitor: {} - runtime_estimator: {} - - save_interval: 500ba - save_folder: ./{run_name}/checkpoints diff --git a/.github/workflows/regressions.py b/.github/workflows/regressions.py deleted file mode 100644 index 9211df1908..0000000000 --- a/.github/workflows/regressions.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright 2022 MosaicML LLM Foundry authors -# SPDX-License-Identifier: Apache-2.0 - -import argparse -import datetime -import os -import subprocess - -DIR_PATH = os.path.dirname(os.path.abspath(__file__)) -REGRESSIONS_DIR = os.path.join(DIR_PATH, 'regression_yamls') - -from mcli import RunConfig, create_run - - -def get_configs(cluster: str, mpt_7b_ckpt_path: str, wandb_entity: str, - wandb_project: str, git_repo: str, git_branch: str): - print(f'Running regression tests on {git_repo} {git_branch}.') - eval_7b_hf = RunConfig.from_file( - os.path.join(REGRESSIONS_DIR, 'eval-7b-hf.yaml')) - eval_7b_composer = RunConfig.from_file( - os.path.join(REGRESSIONS_DIR, 'eval-7b-composer.yaml')) - llama2_finetune = RunConfig.from_file( - os.path.join(REGRESSIONS_DIR, 'llama2-finetune.yaml')) - mpt_125m_chinchilla = RunConfig.from_file( - os.path.join(REGRESSIONS_DIR, 'mpt-125m-chinchilla.yaml')) - mpt_125m_sharded_resumption = RunConfig.from_file( - os.path.join(REGRESSIONS_DIR, 'mpt-125m-sharded-resumption.yaml')) - - # make specific changes - eval_7b_composer.parameters['models'][0]['load_path'] = mpt_7b_ckpt_path - - all_configs = [ - eval_7b_hf, eval_7b_composer, llama2_finetune, mpt_125m_chinchilla, - mpt_125m_sharded_resumption - ] - - commit_hash = subprocess.check_output(['git', 'rev-parse', - 'HEAD']).strip().decode('utf-8') - timestamp = datetime.datetime.now().strftime('%m-%d-%Y::%H:%M:%S') - wandb_group = f'{timestamp}::{commit_hash}' - - # make general changes - wandb_config = { - 'entity': wandb_entity, - 'project': wandb_project, - 'group': wandb_group - } - for config in all_configs: - config.cluster = cluster - config.parameters['loggers'] = config.parameters.get('loggers', {}) - config.parameters['loggers']['wandb'] = wandb_config - config.integrations[0]['git_repo'] = git_repo - config.integrations[0]['git_branch'] = git_branch - - return all_configs, [] - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--cluster', type=str) - parser.add_argument('--mpt-7b-ckpt-path', type=str) - parser.add_argument('--wandb-entity', type=str) - parser.add_argument('--wandb-project', type=str) - parser.add_argument('--git-repo', type=str, default='mosaicml/llm-foundry') - parser.add_argument('--git-branch', type=str, default='main') - - args = parser.parse_args() - - run_configs, _ = get_configs(args.cluster, args.mpt_7b_ckpt_path, - args.wandb_entity, args.wandb_project, - args.git_repo, args.git_branch) - for run_config in run_configs: - run = create_run(run_config)