diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 8e30554475..35c6a3cccc 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -3,6 +3,12 @@ on: push: branches: - main + pull_request: + branches: + - main + paths: + - ./Dockerfile + - .github/workflows/docker.yaml workflow_dispatch: {} jobs: docker-build: @@ -52,13 +58,23 @@ jobs: GIT_SHA=$(echo ${{ github.sha }} | cut -c1-7) echo "IMAGE_TAG=${GIT_SHA}" >> ${GITHUB_ENV} + if [ "${{ github.event_name }}" == "push" ]; then + echo "Triggered by push event." + PROD_REPO="mosaicml/llm-foundry" + IMAGE_TAG=${PROD_REPO}:${{matrix.name}}-${GIT_SHA},${PROD_REPO}:${{matrix.name}}-latest + IMAGE_CACHE="${PROD_REPO}:${{matrix.name}}-buildcache" + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Triggered by pull_request event." + STAGING_REPO="mosaicml/ci-staging" + IMAGE_TAG=${STAGING_REPO}:${{matrix.name}}-${GIT_SHA} + IMAGE_CACHE="${STAGING_REPO}:${{matrix.name}}-buildcache" + - name: Build and Push the Docker Image uses: docker/build-push-action@v3 with: context: . - tags: mosaicml/llm-foundry:${{ matrix.name }}-latest, - mosaicml/llm-foundry:${{ matrix.name }}-${{ env.IMAGE_TAG }} + tags: ${{ env.IMAGE_TAG }} push: true - cache-from: type=registry,ref=mosaicml/llm-foundry:${{ matrix.name }}-buildcache - cache-to: type=registry,ref=mosaicml/llm-foundry:${{ matrix.name }}-buildcache,mode=max + cache-from: type=registry,ref=${{ env.IMAGE_CACHE }} + cache-to: type=registry,ref=${{ env.IMAGE_CACHE }},mode=max build-args: BASE_IMAGE=${{ matrix.base_image }} diff --git a/setup.py b/setup.py index be5b6708a3..6f2b77f689 100644 --- a/setup.py +++ b/setup.py @@ -91,6 +91,12 @@ # PyPI does not support direct dependencies, so we remove this line before uploading from PyPI 'xentropy-cuda-lib@git+https://github.com/HazyResearch/flash-attention.git@v1.0.9#subdirectory=csrc/xentropy', ] +extra_deps['gpu-flash2'] = [ + 'flash-attn==2.3.1', + 'mosaicml-turbo==0.0.4', + # PyPI does not support direct dependencies, so we remove this line before uploading from PyPI + 'xentropy-cuda-lib@git+https://github.com/HazyResearch/flash-attention.git@v2.3.1#subdirectory=csrc/xentropy', +] extra_deps['peft'] = [ 'loralib==0.1.1', # lora core @@ -107,7 +113,10 @@ ] extra_deps['all-cpu'] = set( dep for key, deps in extra_deps.items() for dep in deps if 'gpu' not in key) -extra_deps['all'] = set(dep for deps in extra_deps.values() for dep in deps) +extra_deps['all'] = set(dep for key, deps in extra_deps.items() for dep in deps + if key != 'gpu-flash2') +extra_deps['all-flash2'] = set( + dep for key, deps in extra_deps.items() for dep in deps if key != 'gpu') setup( name=_PACKAGE_NAME,