diff --git a/scripts/inference/benchmarking/yamls/1b.yaml b/scripts/inference/benchmarking/yamls/1b.yaml index f94aa3d806..d1cfb3c913 100644 --- a/scripts/inference/benchmarking/yamls/1b.yaml +++ b/scripts/inference/benchmarking/yamls/1b.yaml @@ -12,7 +12,6 @@ tokenizer: model: name: mpt_causal_lm init_device: cpu - tokenizer_name: ${tokenizer_name} d_model: 2048 n_heads: 16 # Modified 24->16 so that d_head == 128 to statisfy FlashAttention n_layers: 24 diff --git a/scripts/inference/benchmarking/yamls/7b.yaml b/scripts/inference/benchmarking/yamls/7b.yaml index 55e9ae8413..f57ed2657f 100644 --- a/scripts/inference/benchmarking/yamls/7b.yaml +++ b/scripts/inference/benchmarking/yamls/7b.yaml @@ -12,7 +12,6 @@ tokenizer: model: name: mpt_causal_lm init_device: cpu - tokenizer_name: ${tokenizer_name} d_model: 4096 n_heads: 32 n_layers: 32 diff --git a/scripts/train/yamls/pretrain/gpt-neo-125m.yaml b/scripts/train/yamls/pretrain/gpt-neo-125m.yaml index cfb447e2e4..12914e14bc 100644 --- a/scripts/train/yamls/pretrain/gpt-neo-125m.yaml +++ b/scripts/train/yamls/pretrain/gpt-neo-125m.yaml @@ -34,7 +34,6 @@ train_loader: remote: ${data_remote} split: train shuffle: true - tokenizer_name: ${tokenizer_name} max_seq_len: ${max_seq_len} shuffle_seed: ${global_seed} drop_last: true @@ -47,7 +46,6 @@ eval_loader: remote: ${data_remote} split: val shuffle: false - tokenizer_name: ${tokenizer_name} max_seq_len: ${max_seq_len} shuffle_seed: ${global_seed} drop_last: false diff --git a/scripts/train/yamls/pretrain/gpt-neo-125m_eval.yaml b/scripts/train/yamls/pretrain/gpt-neo-125m_eval.yaml index fc1e3b0b7f..3da239c717 100644 --- a/scripts/train/yamls/pretrain/gpt-neo-125m_eval.yaml +++ b/scripts/train/yamls/pretrain/gpt-neo-125m_eval.yaml @@ -34,7 +34,6 @@ train_loader: remote: ${data_remote} split: train shuffle: true - tokenizer_name: ${tokenizer_name} max_seq_len: ${max_seq_len} shuffle_seed: ${global_seed} drop_last: true @@ -47,7 +46,6 @@ eval_loader: remote: ${data_remote} split: val shuffle: false - tokenizer_name: ${tokenizer_name} max_seq_len: ${max_seq_len} shuffle_seed: ${global_seed} drop_last: false diff --git a/scripts/train/yamls/pretrain/gpt2-small.yaml b/scripts/train/yamls/pretrain/gpt2-small.yaml index dde59d55b1..d40cff6e9e 100644 --- a/scripts/train/yamls/pretrain/gpt2-small.yaml +++ b/scripts/train/yamls/pretrain/gpt2-small.yaml @@ -34,7 +34,6 @@ train_loader: remote: ${data_remote} split: train shuffle: true - tokenizer_name: ${tokenizer_name} max_seq_len: ${max_seq_len} shuffle_seed: ${global_seed} drop_last: true @@ -47,7 +46,6 @@ eval_loader: remote: ${data_remote} split: val shuffle: false - tokenizer_name: ${tokenizer_name} max_seq_len: ${max_seq_len} shuffle_seed: ${global_seed} drop_last: false diff --git a/scripts/train/yamls/pretrain/opt-3b.yaml b/scripts/train/yamls/pretrain/opt-3b.yaml index 3ac281f0ea..4423784b54 100644 --- a/scripts/train/yamls/pretrain/opt-3b.yaml +++ b/scripts/train/yamls/pretrain/opt-3b.yaml @@ -27,7 +27,6 @@ train_loader: remote: ${data_remote} split: train shuffle: true - tokenizer_name: ${tokenizer_name} max_seq_len: ${max_seq_len} shuffle_seed: ${global_seed} drop_last: true @@ -40,7 +39,6 @@ eval_loader: remote: ${data_remote} split: val shuffle: false - tokenizer_name: ${tokenizer_name} max_seq_len: ${max_seq_len} shuffle_seed: ${global_seed} drop_last: false