Skip to content

Commit

Permalink
Merge pull request #3 from mosaicml/main
Browse files Browse the repository at this point in the history
Pulling from the main repository
  • Loading branch information
ShashankMosaicML authored Oct 17, 2023
2 parents 8b886ba + cc238a3 commit 76a2095
Show file tree
Hide file tree
Showing 35 changed files with 2,159 additions and 514 deletions.
4 changes: 2 additions & 2 deletions .github/mcp/mcp_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@
export COMMON_ARGS="-v --durations=20 -m '{args.pytest_markers}' {clear_tmp_path_flag}"
make test PYTEST='{args.pytest_command}' EXTRA_ARGS="$COMMON_ARGS --codeblocks"
make test-dist PYTEST='{args.pytest_command}' EXTRA_ARGS="$COMMON_ARGS" WORLD_SIZE=2
make test PYTEST='{args.pytest_command}' EXTRA_ARGS="$COMMON_ARGS --codeblocks"
python -m coverage combine
python -m coverage report
Expand Down
41 changes: 36 additions & 5 deletions .github/workflows/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ on:
push:
branches:
- main
pull_request:
branches:
- main
paths:
- ./Dockerfile
- .github/workflows/docker.yaml
workflow_dispatch: {}
jobs:
docker-build:
Expand All @@ -13,10 +19,16 @@ jobs:
include:
- name: '1.13.1_cu117'
base_image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
dep_groups: '[gpu]'
- name: '2.0.1_cu118'
base_image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04
dep_groups: '[gpu]'
- name: '2.1.0_cu121'
base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
dep_groups: '[gpu]'
- name: '2.1.0_cu121_flash2'
base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
dep_groups: '[gpu-flash2]'

steps:
- name: Maximize Build Space on Worker
Expand Down Expand Up @@ -52,13 +64,32 @@ jobs:
GIT_SHA=$(echo ${{ github.sha }} | cut -c1-7)
echo "IMAGE_TAG=${GIT_SHA}" >> ${GITHUB_ENV}
if [ "${{ github.event_name }}" == "push" ]; then
echo "Triggered by push event."
PROD_REPO="mosaicml/llm-foundry"
IMAGE_TAG="${PROD_REPO}:${{matrix.name}}-${GIT_SHA},${PROD_REPO}:${{matrix.name}}-latest"
IMAGE_CACHE="${PROD_REPO}:${{matrix.name}}-buildcache"
elif [ "${{ github.event_name }}" == "pull_request" ]; then
echo "Triggered by pull_request event."
STAGING_REPO="mosaicml/ci-staging"
IMAGE_TAG="${STAGING_REPO}:${{matrix.name}}-${GIT_SHA}"
IMAGE_CACHE="${STAGING_REPO}:${{matrix.name}}-buildcache"
else
echo "Triggered by unknown event: ${{ github.event_name }}"
exit 1
fi
echo "IMAGE_TAG=${IMAGE_TAG}" >> ${GITHUB_ENV}
echo "IMAGE_CACHE=${IMAGE_CACHE}" >> ${GITHUB_ENV}
- name: Build and Push the Docker Image
uses: docker/build-push-action@v3
with:
context: .
tags: mosaicml/llm-foundry:${{ matrix.name }}-latest,
mosaicml/llm-foundry:${{ matrix.name }}-${{ env.IMAGE_TAG }}
tags: ${{ env.IMAGE_TAG }}
push: true
cache-from: type=registry,ref=mosaicml/llm-foundry:${{ matrix.name }}-buildcache
cache-to: type=registry,ref=mosaicml/llm-foundry:${{ matrix.name }}-buildcache,mode=max
build-args: BASE_IMAGE=${{ matrix.base_image }}
cache-from: type=registry,ref=${{ env.IMAGE_CACHE }}
cache-to: type=registry,ref=${{ env.IMAGE_CACHE }},mode=max
build-args: |
BASE_IMAGE=${{ matrix.base_image }}
DEP_GROUPS=${{ matrix.dep_groups }}
5 changes: 5 additions & 0 deletions .github/workflows/pr-gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
uses: ./.github/workflows/pytest-gpu.yaml
strategy:
matrix:
# TODO: After the PR with the flash attention 2 images goes in, add the new unit test suite
include:
- name: 'gpu-latest'
container: mosaicml/pytorch:latest # mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
Expand All @@ -31,6 +32,10 @@ jobs:
container: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
markers: 'gpu'
pytest_command: 'coverage run -m pytest'
- name: 'gpu-2.1.0-flash2'
container: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
markers: 'gpu'
pytest_command: 'coverage run -m pytest'
name: ${{ matrix.name }}
if: github.repository_owner == 'mosaicml'
with:
Expand Down
9 changes: 5 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
ARG BASE_IMAGE
FROM $BASE_IMAGE

ARG DEP_GROUPS

# Install and uninstall foundry to cache foundry requirements
RUN git clone -b main https://github.com/mosaicml/llm-foundry.git && \
pip install --no-cache-dir "./llm-foundry[gpu]" && \
pip uninstall -y llm-foundry && \
rm -rf llm-foundry
RUN git clone -b main https://github.com/mosaicml/llm-foundry.git
RUN pip install --no-cache-dir "./llm-foundry${DEP_GROUPS}"
RUN pip uninstall -y llm-foundry
RUN rm -rf llm-foundry
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,10 @@ If you have success/failure using LLM Foundry on other systems, please let us kn
|---------------------------|------------------|--------------|-------------------------------|
| A100-40GB/80GB | 1.13.1 | 11.7 | :white_check_mark: Supported |
| A100-40GB/80GB | 2.0.1 | 11.7, 11.8 | :white_check_mark: Supported |
| A100-40GB/80GB | 2.1.0 | 11.8, 12.1 | :white_check_mark: Supported |
| H100-80GB | 1.13.1 | 11.7 | :x: Not Supported |
| H100-80GB | 2.0.1 | 11.8 | :white_check_mark: Supported |
| H100-80GB | 2.1.0 | 12.1 | :white_check_mark: Supported |
| A10-24GB | 1.13.1 | 11.7 | :construction: In Progress |
| A10-24GB | 2.0.1 | 11.7, 11.8 | :construction: In Progress |
| MI250 | 2.0.1 | ROCm 5.4 | :construction: In Progress |
Expand All @@ -113,8 +115,11 @@ You can select a specific commit hash such as `mosaicml/llm-foundry:1.13.1_cu117
|-------------------------------------------------------------|----------------|--------------|-------------------------------------|
| `mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04` | 1.13.1 | 11.7 | No |
| `mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04` | 2.0.1 | 11.8 | No |
| `mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04` | 2.1.0 | 12.1 | No |
| `mosaicml/llm-foundry:1.13.1_cu117-latest` | 1.13.1 | 11.7 | Yes |
| `mosaicml/llm-foundry:2.0.1_cu118-latest` | 2.0.1 | 11.8 | Yes |
| `mosaicml/llm-foundry:2.1.0_cu121-latest` | 2.1.0 | 12.1 | Yes (flash attention v1) |
| `mosaicml/llm-foundry:2.1.0_cu121_flash2-latest` | 2.1.0 | 12.1 | Yes (flash attention v2) |


# Installation
Expand Down
121 changes: 16 additions & 105 deletions llmfoundry/callbacks/generate_callback.py
Original file line number Diff line number Diff line change
@@ -1,119 +1,30 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0

"""Periodically log generations to wandb from a set of prompts."""
from typing import Any, List, Union, cast
"""Deprecated Generate callback.
import torch
import wandb
from composer.core import Callback, State, get_precision_context
from composer.loggers import Logger, WandBLogger
from composer.utils import dist, ensure_tuple
Please use composer.callbacks.Generate instead.
"""
import warnings
from typing import Any, List, Union

from composer.callbacks import Generate as ComposerGenerate
from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast

Tokenizer = Union[PreTrainedTokenizer, PreTrainedTokenizerFast]


class Generate(Callback):
class Generate(ComposerGenerate):

def __init__(self, prompts: List[str], batch_log_interval: int,
**kwargs: Any):
"""Periodically log generations to wandb from a set of prompts.
In the main view for a run, there will be a table that will show the _last_ logged generations.
To compare previous iterations of the generations, you need to
1. Click on the run
2. Click on "artifacts" in the menu on the left side of the screen
3. Click on one of the artifacts called "predictions"
4. Click on the "files" tab
5. Click on "predictions.table.json"
6. On the left hand side, there are different versions of the table produced throughout training. Select one of these.
7. Now, when you hover over other versions, there will be a "compare" button, which will allow you to compare the currently
selected version to the version you add via compare.
Args:
prompts (List[str]): The list of prompts you would like to produce generations for
batch_log_interval (int): The interval (in batches) at which this callback runs
kwargs: All kwargs well be passed along to the call to generate. This is for things like `do_sample`, `top_p`, etc
"""
self.prompts = prompts
self.batch_log_interval = batch_log_interval
self.generate_kwargs = kwargs
self.wandb_logger = None

def init(self, state: State, logger: Logger):
if dist.get_global_rank() == 0:
for destination in ensure_tuple(logger.destinations):
if isinstance(destination, WandBLogger):
self.wandb_logger = destination

def batch_checkpoint(self, state: State, logger: Logger) -> None:
if (state.timestamp.batch.value % self.batch_log_interval) == 0:
self.generate(state, logger)

def generate(self, state: State, logger: Logger) -> None:
model = state.model
original_mode = model.training
model.eval()
tokenizer = cast(Tokenizer, state.model.tokenizer)
device = state.device

if not hasattr(model.model, 'generate'):
raise ValueError(
f'Cannot generate from model {model.model.__class__.__name__} because it does not have a `generate` method'
)

# stash the original original value of padding_side because generation requires left padding
original_padding_side = tokenizer.padding_side
tokenizer.padding_side = 'left'
if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenized_input = tokenizer(self.prompts,
return_tensors='pt',
padding=True)

for k, v in tokenized_input.items():
tokenized_input[k] = device.tensor_to_device(v)

# dummy forward call needed for FSDP to work consistently
dummy_input = torch.tensor([[0]], dtype=torch.long)
dummy_input = device.tensor_to_device(dummy_input)
with get_precision_context(state.precision):
with torch.no_grad():
assert isinstance(model.model, torch.nn.Module)
_ = model.model(input_ids=dummy_input)

output_token_ids = model.model.generate( # type: ignore
input_ids=tokenized_input['input_ids'],
attention_mask=tokenized_input['attention_mask'],
synced_gpus=True,
**self.generate_kwargs,
)

if dist.get_global_rank() == 0:
if self.wandb_logger is not None:
assert wandb.run is not None, 'wandb should have started run'

artifact = wandb.Artifact('generate_samples_' +
str(wandb.run.id),
type='predictions')

rows = []
for i in range(len(self.prompts)):
prompt = self.prompts[i]
output_tokens = output_token_ids[i][
tokenized_input['input_ids'].shape[1]:]
output_text = tokenizer.decode(output_tokens,
skip_special_tokens=True)

rows.append([prompt, output_text])

text_table = wandb.Table(data=rows,
columns=['prompt', 'generation'])
artifact.add(text_table, 'predictions')
wandb.log_artifact(artifact)
wandb.log({'generations': text_table},
step=state.timestamp.batch.value)
warnings.warn(
('Accessing llmfoundry.callbacks.generate_callback.Generate '
'is deprecated and will be removed in a future release. '
'Please use composer.callbacks.Generate instead.'),
DeprecationWarning,
)

tokenizer.padding_side = original_padding_side
model.train(mode=original_mode)
interval = f'{batch_log_interval}ba'
super().__init__(prompts=prompts, interval=interval, **kwargs)
Loading

0 comments on commit 76a2095

Please sign in to comment.