From 99ee1a9e8b71af5383f857d98b9a38db8feb9520 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Wed, 28 Feb 2024 15:08:40 +0800 Subject: [PATCH 1/2] restructure workflow files --- .github/workflows/build_and_test.yml | 34 +++++++++++++++ ...ld-push-image.yml => build_push_image.yml} | 13 ++---- .github/workflows/run_build_and_test.yml | 20 +++++++++ .github/workflows/test-build-push-image.yml | 15 ------- .github/workflows/test_model.yml | 41 +++++++++++++++---- tests/models/vit/test_training_vit.py | 10 +---- 6 files changed, 94 insertions(+), 39 deletions(-) create mode 100644 .github/workflows/build_and_test.yml rename .github/workflows/{build-push-image.yml => build_push_image.yml} (83%) create mode 100644 .github/workflows/run_build_and_test.yml delete mode 100644 .github/workflows/test-build-push-image.yml diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml new file mode 100644 index 00000000..1f39c701 --- /dev/null +++ b/.github/workflows/build_and_test.yml @@ -0,0 +1,34 @@ +name: Build images and run tests + +on: + workflow_call: + inputs: + dockerfile_path: + description: 'Path to the Dockerfile' + type: string + required: true + docker_image_tag: # It should be in the form as mentioned in our internal docs. Just pass insensitive data here like huggingface-text-generation-inference-${accelerator}.${version}:latest + description: 'Docker Image Tag' + type: string + required: true + +jobs: + build-push-image: + uses: ./.github/workflows/build_push_image.yml + secrets: inherit + with: + region: us-central1 + dockerfile_path: ${{ inputs.dockerfile_path }} + docker_image_tag: ${{ inputs.docker_image_tag }} + gcp_artifact_registry_repository: deep-learning-images + GCP_PROJECT_ID: ${{ vars.GCP_PROJECT_ID }} + + model_test: + needs: build-push-image + uses: ./.github/workflows/test_model.yml + secrets: inherit + with: + region: us-central1 + docker_image_tag: ${{ inputs.docker_image_tag }} + gcp_artifact_registry_repository: deep-learning-images + GCP_PROJECT_ID: ${{ vars.GCP_PROJECT_ID }} diff --git a/.github/workflows/build-push-image.yml b/.github/workflows/build_push_image.yml similarity index 83% rename from .github/workflows/build-push-image.yml rename to .github/workflows/build_push_image.yml index 53f10e87..7ea17837 100644 --- a/.github/workflows/build-push-image.yml +++ b/.github/workflows/build_push_image.yml @@ -1,4 +1,4 @@ -name: Build and Push to GCP Artifact Registry Resuable Workflow +name: Build and push images on: workflow_call: @@ -19,16 +19,13 @@ on: description: 'GCP Artifact Registry Repository' type: string required: true - secrets: - GCP_SERVICE_ACCOUNT_JSON_KEY: - description: 'Service Account' - required: true GCP_PROJECT_ID: description: 'GCP Project ID' + type: string required: true jobs: - build-push-image-job: + build-push-image: name: Build and Push Docker Image to GCP Artifact Registry runs-on: [intel-cpu, 8-cpu, ci] steps: @@ -55,7 +52,5 @@ jobs: uses: 'docker/build-push-action@v5.1.0' with: file: ${{ inputs.dockerfile_path }} # Read more about it here: https://github.com/marketplace/actions/build-and-push-docker-images#customizing - tags: ${{ inputs.region }}-docker.pkg.dev/${{ secrets.GCP_PROJECT_ID }}/${{ inputs.gcp_artifact_registry_repository }}/${{ inputs.docker_image_tag }} + tags: ${{ inputs.region }}-docker.pkg.dev/${{ inputs.GCP_PROJECT_ID }}/${{ inputs.gcp_artifact_registry_repository }}/${{ inputs.docker_image_tag }} push: true - - \ No newline at end of file diff --git a/.github/workflows/run_build_and_test.yml b/.github/workflows/run_build_and_test.yml new file mode 100644 index 00000000..01ba948d --- /dev/null +++ b/.github/workflows/run_build_and_test.yml @@ -0,0 +1,20 @@ +name: Build images and run tests + +on: + push: + branches: + - build_images_and_run_tests_* + +jobs: + build-push-image: + strategy: + fail-fast: false + matrix: + torch: [2.1] + transformers: [4.38.1] + python: [310] + uses: ./.github/workflows/build_and_test.yml + secrets: inherit + with: + dockerfile_path: ./containers/pytorch/training/gpu/${{ matrix.torch }}/transformers/${{ matrix.transformers }}/py${{ matrix.python }}/Dockerfile + docker_image_tag: huggingface-pytorch-training-gpu-${{ matrix.torch }}.transformers.${{ matrix.transformers }}.py${{ matrix.python }}:latest diff --git a/.github/workflows/test-build-push-image.yml b/.github/workflows/test-build-push-image.yml deleted file mode 100644 index 029f69e4..00000000 --- a/.github/workflows/test-build-push-image.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: Test Build and Push Image Reusable Workflow -on: - workflow_dispatch: - -jobs: - test-reusable-workflow: - uses: ./.github/workflows/build-push-image.yml - with: - region: us-central1 - dockerfile_path: ./containers/pytorch/training/gpu/2.1/transformers/4.37.2/py310/Dockerfile - docker_image_tag: huggingface-pytorch-training-gpu-2.1.transformers.4.37.2.py310:latest - gcp_artifact_registry_repository: deep-learning-images - secrets: - GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} - GCP_SERVICE_ACCOUNT_JSON_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON_KEY }} \ No newline at end of file diff --git a/.github/workflows/test_model.yml b/.github/workflows/test_model.yml index bdeb732b..8832d83d 100644 --- a/.github/workflows/test_model.yml +++ b/.github/workflows/test_model.yml @@ -1,9 +1,24 @@ -name: Model Test +name: Model tests on: - push: - branches: - - run_tests_against_images_* + workflow_call: + inputs: + region: + description: 'Region where the Artifact Registry is located' + type: string + required: true + docker_image_tag: # It should be in the form as mentioned in our internal docs. Just pass insensitive data here like huggingface-text-generation-inference-${accelerator}.${version}:latest + description: 'Docker Image Tag' + type: string + required: true + gcp_artifact_registry_repository: + description: 'GCP Artifact Registry Repository' + type: string + required: true + GCP_PROJECT_ID: + description: 'GCP Project ID' + type: string + required: true env: HF_HOME: /mnt/cache @@ -13,16 +28,28 @@ env: jobs: run_test: - name: github repo + name: Run model tests runs-on: [single-gpu, nvidia-gpu, a10, ci] container: - # TODO: make this $ {{ inputs.image }} to use GCP's DLC images - image: huggingface/transformers-all-latest-gpu + image: ${{ inputs.region }}-docker.pkg.dev/${{ inputs.GCP_PROJECT_ID }}/${{ inputs.gcp_artifact_registry_repository }}/${{ inputs.docker_image_tag }} options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + credentials: + username: _json_key + password: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON_KEY }} steps: - name: Checkout repository uses: actions/checkout@v3 + - name: Prepare `transformers` examples/pytorch directory + shell: bash + run: | + git config --global --add safe.directory /__w/Google-Cloud-Containers/Google-Cloud-Containers + git clone https://github.com/huggingface/transformers.git + cd transformers + git checkout tags/v4.38.1 + git log -n 1 + - name: run tests run: | + export TRANSFORMERS_DIR=/__w/Google-Cloud-Containers/Google-Cloud-Containers/transformers python3 -m pytest -v tests/models diff --git a/tests/models/vit/test_training_vit.py b/tests/models/vit/test_training_vit.py index dcb7cb44..815dffa3 100644 --- a/tests/models/vit/test_training_vit.py +++ b/tests/models/vit/test_training_vit.py @@ -4,14 +4,8 @@ from ..test_model import ModelTrainingTestMixin, TestCasePlus -# So far, the image we use has a `/transformers` directory. -# TODO: Find a way to get the installed `transformers` directory. -SRC_DIRS = [ - os.path.join("/transformers/examples/pytorch", dirname) - for dirname in [ - "image-classification", - ] -] +# `TRANSFORMERS_DIR` is an environment variable pointing to a `transformers` source directory (containing `examples`) +SRC_DIRS = [os.path.join(os.getenv("TRANSFORMERS_DIR"), "examples/pytorch", dirname) for dirname in ["image-classification"]] sys.path.extend(SRC_DIRS) From 4aac639c609b8e363a8ed85310448c5eb89e9d88 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Wed, 28 Feb 2024 17:23:18 +0800 Subject: [PATCH 2/2] restructure workflow files --- .github/workflows/test_model.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_model.yml b/.github/workflows/test_model.yml index 8832d83d..94571104 100644 --- a/.github/workflows/test_model.yml +++ b/.github/workflows/test_model.yml @@ -38,7 +38,7 @@ jobs: password: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON_KEY }} steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Prepare `transformers` examples/pytorch directory shell: bash