Skip to content

Commit

Permalink
Merge branch 'notie_embd' of https://github.com/vchiley/llm-foundry i…
Browse files Browse the repository at this point in the history
…nto notie_embd
  • Loading branch information
vchiley committed Nov 10, 2023
2 parents 867dc7f + 7fbfc5d commit a829f2c
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 9 deletions.
5 changes: 3 additions & 2 deletions llmfoundry/models/mpt/modeling_mpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,12 +684,13 @@ def forward(
use_cache=use_cache,
)

out = outputs.last_hidden_state.to(self.transformer.wte.weight.device)
if self.transformer.lm_head is not None:
logits = self.transformer.lm_head(out)
logits = self.transformer.lm_head(outputs.last_hidden_state)
else:
# move outputs to same device as weights for token embedding
# needed to support HF `device_map`
out = outputs.last_hidden_state
out = out.to(self.transformer.wte.weight.device)
logits = self.transformer.wte(out, True)

if self.logit_scale is not None:
Expand Down
20 changes: 13 additions & 7 deletions mcli/mcli-1b-max-seq-len-8k.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
integrations:
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: v0.3.0
git_repo: vchiley/llm-foundry
git_branch: notie_embd
# git_commit: # OR use your commit hash
pip_install: -e .[gpu]
ssh_clone: false # Should be true if using a private repo
- integration_type: wandb
entity: mosaic-ml
project: notie_embd_test

# We are fetching, converting, and training on the 'val' split
# as it is small and quick to get going for this demo.
Expand All @@ -18,10 +21,12 @@ command: |
--concat_tokens 8192 --tokenizer EleutherAI/gpt-neox-20b --eos_text '<|endoftext|>'
composer train/train.py /mnt/config/parameters.yaml
image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
name: mpt-1b-ctx-8k-gpus-8

name: mpt-1b-ctx-8k-gpus-8-notieembd

compute:
gpus: 8 # Number of GPUs to use
cluster: r1z1

## These configurations are optional
# cluster: TODO # Name of the cluster to use for this run
Expand All @@ -48,6 +53,7 @@ parameters:
expansion_ratio: 4
max_seq_len: ${max_seq_len}
vocab_size: 50368
tie_word_embeddings: false
attn_config:
attn_impl: triton

Expand Down Expand Up @@ -102,7 +108,7 @@ parameters:
clipping_type: norm
clipping_threshold: 1.0

max_duration: 24800ba # ~ 26B tokens
max_duration: 500ba # ~ 26B tokens
eval_interval: 2000ba
eval_first: false
eval_subset_num_batches: -1
Expand All @@ -111,7 +117,7 @@ parameters:
# System
seed: 17
device_eval_batch_size: 1
device_train_microbatch_size: 1
device_train_microbatch_size: 4
# device_train_microbatch_size: auto
precision: amp_bf16

Expand All @@ -136,8 +142,8 @@ parameters:
lr_monitor: {}
memory_monitor: {}
runtime_estimator: {}
# loggers:
# wandb: {}
loggers:
wandb: {}

# Checkpoint to local filesystem or remote object store
# save_interval: 2000ba
Expand Down

0 comments on commit a829f2c

Please sign in to comment.