Skip to content

Commit

Permalink
Merge branch 'main' into output_eval_logging
Browse files Browse the repository at this point in the history
  • Loading branch information
bmosaicml committed Dec 15, 2023
2 parents 1e6e923 + f56f122 commit e386107
Show file tree
Hide file tree
Showing 159 changed files with 76,588 additions and 1,695 deletions.
8 changes: 8 additions & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Require admin approval to modify all files in the root of the repository
# This includes setup.py, the README, and the CODEOWNERS file itself!
/* @mosaicml/composer-team-admins

# Require admin approval to change the CI build configuration
# All CI Changes should be reviewed for security
/.ci/ @mosaicml/composer-team-admins
/.github/ @mosaicml/composer-team-admins
33 changes: 24 additions & 9 deletions .github/mcp/mcp_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import argparse
import time

from mcli.sdk import (RunConfig, RunStatus, create_run, follow_run_logs,
wait_for_run_status)
from mcli import (RunConfig, RunStatus, create_run, follow_run_logs,
wait_for_run_status)

if __name__ == '__main__':

Expand Down Expand Up @@ -54,6 +54,9 @@
type=int,
default=1800,
help='Timeout for run (in seconds)')
parser.add_argument('--deps_group',
type=str,
help='Dependency group to install')
args = parser.parse_args()

name = args.name
Expand Down Expand Up @@ -89,7 +92,7 @@
clear_tmp_path_flag = '-o tmp_path_retention_policy=none'
command += f'''
pip install --upgrade --user .[all]
pip install --upgrade --user .[{args.deps_group}]
export COMMON_ARGS="-v --durations=20 -m '{args.pytest_markers}' {clear_tmp_path_flag}"
Expand All @@ -104,13 +107,25 @@

config = RunConfig(
name=name,
cluster=args.cluster,
gpu_type=args.gpu_type,
gpu_num=args.gpu_num,
compute={
'cluster': args.cluster,
'gpu_type': args.gpu_type,
'gpus': args.gpu_num
},
image=args.image,
integrations=[git_integration],
command=command,
scheduling={'max_duration': args.timeout / 60 / 60},
env_variables=[
{
'key': 'MOSAICML_PLATFORM',
'value': 'False',
},
{
'key': 'PYTHONUNBUFFERED',
'value': '1',
},
],
)

# Create run
Expand All @@ -127,7 +142,7 @@
print(line, end='')

print('[GHA] Run completed. Waiting for run to finish...')
run = wait_for_run_status(run, status='completed')
run = wait_for_run_status(run, status=RunStatus.COMPLETED)

# Fail if command exited with non-zero exit code or timed out
assert run.status == RunStatus.COMPLETED
# Fail if command exited with non-zero exit code or timed out (didn't reach COMPLETED)
assert run.status == RunStatus.COMPLETED, f'Run did not complete: {run.status} ({run.reason})'
27 changes: 12 additions & 15 deletions .github/workflows/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,18 @@ jobs:
strategy:
matrix:
include:
- name: '1.13.1_cu117'
base_image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
dep_groups: '[gpu]'
- name: '2.0.1_cu118'
base_image: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04
dep_groups: '[gpu]'
- name: '2.1.0_cu121'
base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
dep_groups: '[gpu]'
- name: '2.1.0_cu121_flash2'
base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
dep_groups: '[gpu-flash2]'

- name: '2.1.0_cu121_aws'
base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04-aws
dep_groups: '[gpu]'
- name: '2.1.0_cu121_flash2_aws'
base_image: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04-aws
dep_groups: '[gpu-flash2]'
steps:
- name: Maximize Build Space on Worker
uses: easimon/maximize-build-space@v4
Expand Down Expand Up @@ -64,19 +63,17 @@ jobs:
GIT_SHA=$(echo ${{ github.sha }} | cut -c1-7)
echo "IMAGE_TAG=${GIT_SHA}" >> ${GITHUB_ENV}
if [ "${{ github.event_name }}" == "push" ]; then
echo "Triggered by push event."
PROD_REPO="mosaicml/llm-foundry"
IMAGE_TAG="${PROD_REPO}:${{matrix.name}}-${GIT_SHA},${PROD_REPO}:${{matrix.name}}-latest"
IMAGE_CACHE="${PROD_REPO}:${{matrix.name}}-buildcache"
elif [ "${{ github.event_name }}" == "pull_request" ]; then
if [ "${{ github.event_name }}" == "pull_request" ]; then
echo "Triggered by pull_request event."
STAGING_REPO="mosaicml/ci-staging"
IMAGE_TAG="${STAGING_REPO}:${{matrix.name}}-${GIT_SHA}"
IMAGE_CACHE="${STAGING_REPO}:${{matrix.name}}-buildcache"
else
echo "Triggered by unknown event: ${{ github.event_name }}"
exit 1
# Triggered by push or workflow_dispatch event
echo "Triggered by ${{ github.event_name }} event, releasing to prod"
PROD_REPO="mosaicml/llm-foundry"
IMAGE_TAG="${PROD_REPO}:${{matrix.name}}-${GIT_SHA},${PROD_REPO}:${{matrix.name}}-latest"
IMAGE_CACHE="${PROD_REPO}:${{matrix.name}}-buildcache"
fi
echo "IMAGE_TAG=${IMAGE_TAG}" >> ${GITHUB_ENV}
Expand Down
8 changes: 0 additions & 8 deletions .github/workflows/pr-cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,6 @@ jobs:
strategy:
matrix:
include:
- name: 'cpu-latest'
container: mosaicml/pytorch:latest_cpu # mosaicml/pytorch:1.13.1_cpu-python3.10-ubuntu20.04
markers: 'not gpu'
pytest_command: 'coverage run -m pytest'
- name: 'cpu-2.0.1'
container: mosaicml/pytorch:2.0.1_cpu-python3.10-ubuntu20.04
markers: 'not gpu'
pytest_command: 'coverage run -m pytest'
- name: 'cpu-2.1.0'
container: mosaicml/pytorch:2.1.0_cpu-python3.10-ubuntu20.04
markers: 'not gpu'
Expand Down
12 changes: 3 additions & 9 deletions .github/workflows/pr-gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,17 @@ jobs:
uses: ./.github/workflows/pytest-gpu.yaml
strategy:
matrix:
# TODO: After the PR with the flash attention 2 images goes in, add the new unit test suite
include:
- name: 'gpu-latest'
container: mosaicml/pytorch:latest # mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04
markers: 'gpu'
pytest_command: 'coverage run -m pytest'
- name: 'gpu-2.0.1'
container: mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04
markers: 'gpu'
pytest_command: 'coverage run -m pytest'
- name: 'gpu-2.1.0'
container: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
markers: 'gpu'
pytest_command: 'coverage run -m pytest'
deps_group: 'all'
- name: 'gpu-2.1.0-flash2'
container: mosaicml/llm-foundry:2.1.0_cu121_flash2-latest
markers: 'gpu'
pytest_command: 'coverage run -m pytest'
deps_group: 'all-flash2'
name: ${{ matrix.name }}
if: github.repository_owner == 'mosaicml'
with:
Expand All @@ -45,5 +38,6 @@ jobs:
pytest-command: ${{ matrix.pytest_command }}
pytest-markers: ${{ matrix.markers }}
python-version: 3.9
deps-group: ${{ matrix.deps_group }}
secrets:
mcloud-api-key: ${{ secrets.MCLOUD_API_KEY }}
6 changes: 5 additions & 1 deletion .github/workflows/pytest-gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ on:
required: false
type: string
default: 3.9
deps-group:
required: true
type: string
secrets:
mcloud-api-key:
required: true
Expand Down Expand Up @@ -77,4 +80,5 @@ jobs:
--image '${{ inputs.container }}' \
--pytest_markers '${{ inputs.pytest-markers }}' \
--pytest_command '${{ inputs.pytest-command }}' \
--timeout ${{ inputs.mcloud-timeout }} ${REF_ARGS}
--timeout ${{ inputs.mcloud-timeout }} ${REF_ARGS} \
--deps_group ${{ inputs.deps-group }}
41 changes: 41 additions & 0 deletions .github/workflows/smoketest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Smoketest
on:
push:
branches:
- main
- release/*
pull_request:
branches:
- main
- release/*
workflow_dispatch:
# Cancel old runs when a new commit is pushed to the same branch if not on main or dev
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/dev' }}
defaults:
run:
working-directory: .
jobs:
smoketest:
runs-on: ubuntu-20.04
timeout-minutes: 10
strategy:
matrix:
python_version:
- "3.9"
- "3.10"
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python_version }}
- name: Setup
run: |
set -ex
python -m pip install --upgrade 'pip<23' wheel
python -m pip install --upgrade .
python -m pip install pytest==7.2.1 pytest_codeblocks==0.16.1
- name: Run checks
run: |
pytest tests/test_smoketest.py
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,8 @@ dmypy.json

# notebooks
notebooks/

# artifacts from training
**/*.pt
**/mlruns/*
**/tokenizer-save-dir-*/**
Loading

0 comments on commit e386107

Please sign in to comment.