Skip to content

Commit

Permalink
Adding Graviton Regression test CI (#3273)
Browse files Browse the repository at this point in the history
* testing on graviton

* testing on graviton

* testing on graviton

* checking python

* rmv python

* changing back to python

* testing cpu instead

* adding torchtext

* adding torchtext

* testing torchtext

* removing two tests

* removing pytorch test

* adding numpy upgrade

* adding numpy upgrade

* testing full ci

* testing full ci

* testing full ci

* skipping grpc

* addign graviton ci

* addign graviton ci

* adding ci cpu graviton

* adding ci cpu graviton

* adding env

* skipping a test for now

* fixing env variable

* removing scripted 3&4

* small changes

* fixing lint

* fixing lint

* fixing lint

* removing torchtext

---------

Co-authored-by: Ubuntu <[email protected]>
Co-authored-by: Ankith Gunapal <[email protected]>
  • Loading branch information
3 people authored Sep 10, 2024
1 parent 640b406 commit 87c9823
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 9 deletions.
48 changes: 48 additions & 0 deletions .github/workflows/ci_graviton_cpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: CI CPU Graviton

on:
workflow_dispatch:
push:
branches:
- master
pull_request:
branches:
- master
merge_group:


concurrency:
group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }}
cancel-in-progress: true

jobs:
ci-cpu:
runs-on: [self-hosted, graviton-test]
steps:
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
architecture: arm64
- name: Setup Java 17
uses: actions/setup-java@v3
with:
distribution: 'zulu'
java-version: '17'
- name: Checkout TorchServe
uses: actions/checkout@v3
with:
submodules: recursive
- name: Install dependencies
run: |
python ts_scripts/install_dependencies.py --environment=dev
- name: Torchserve Sanity
uses: nick-fields/retry@v3
env:
TS_MAC_ARM64_CPU_ONLY: 'True'
with:
timeout_minutes: 60
max_attempts: 3
retry_on: error
command: |
python torchserve_sanity.py
41 changes: 41 additions & 0 deletions .github/workflows/regression_tests_graviton_cpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Run Regression Tests on CPU for Graviton

on:
push:
branches:
- master
pull_request:
branches:
- master
merge_group:

concurrency:
group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }}
cancel-in-progress: true

jobs:
regression-cpu:
runs-on: [self-hosted, graviton-test]
steps:
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
architecture: arm64
- name: Setup Java 17
uses: actions/setup-java@v3
with:
distribution: 'zulu'
java-version: '17'
- name: Checkout TorchServe
uses: actions/checkout@v3
with:
submodules: recursive
- name: Install dependencies
run: |
python ts_scripts/install_dependencies.py --environment=dev
- name: Torchserve Regression Tests
env:
TS_MAC_ARM64_CPU_ONLY: 'True'
run: |
python test/regression_tests.py
5 changes: 5 additions & 0 deletions test/pytest/test_gRPC_inference_api.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import json
import os
import platform
import threading
from ast import literal_eval

import inference_pb2
import management_pb2
import pytest
import test_gRPC_utils
import test_utils

Expand Down Expand Up @@ -50,6 +52,9 @@ def __infer(stub, model_name, model_input):
return prediction


@pytest.mark.skipif(
platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
)
def test_inference_apis():
with open(os.path.join(os.path.dirname(__file__), inference_data_json), "rb") as f:
test_data = json.loads(f.read())
Expand Down
5 changes: 5 additions & 0 deletions test/pytest/test_model_custom_dependencies.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import os
import pathlib
import platform
import subprocess

import pytest
import requests
import test_utils
from model_archiver import ModelArchiver, ModelArchiverConfig
Expand Down Expand Up @@ -140,6 +142,9 @@ def register_model_and_make_inference_request(expect_model_load_failure=False):
resp.raise_for_status()


@pytest.mark.skipif(
platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
)
def test_install_dependencies_to_target_directory_with_requirements():
test_utils.torchserve_cleanup()

Expand Down
49 changes: 41 additions & 8 deletions test/pytest/test_pytorch_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,28 @@
import json
import os
import pathlib
import platform
import shutil
import subprocess
from concurrent import futures

import pytest
import requests

import test_utils
from concurrent import futures

REPO_ROOT = os.path.normpath(
os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../")
)
data_file_mnist = os.path.join(REPO_ROOT, "examples", "image_classifier", "mnist", "test_data", "1.png")
data_file_mnist = os.path.join(
REPO_ROOT, "examples", "image_classifier", "mnist", "test_data", "1.png"
)
data_file_resnet = os.path.join(
REPO_ROOT, "examples", "image_classifier", "resnet_152_batch", "images", "kitten.jpg"
REPO_ROOT,
"examples",
"image_classifier",
"resnet_152_batch",
"images",
"kitten.jpg",
)
data_file_resnet_dog = os.path.join(
REPO_ROOT, "examples", "image_classifier", "resnet_152_batch", "images", "dog.jpg"
Expand All @@ -33,6 +40,9 @@


@pytest.fixture
@pytest.mark.skipif(
platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
)
def set_custom_handler(handler_name):
"""
This method downloads resnet serialized file, creates mar file and sets up a custom handler
Expand All @@ -48,7 +58,8 @@ def set_custom_handler(handler_name):
serialized_file = os.path.join(test_utils.MODEL_STORE, "resnet152-394f9c45.pth")
if not os.path.exists(serialized_file):
response = requests.get(
"https://download.pytorch.org/models/resnet152-394f9c45.pth", allow_redirects=True
"https://download.pytorch.org/models/resnet152-394f9c45.pth",
allow_redirects=True,
)
assert response.status_code == 200
with open(serialized_file, "wb") as f:
Expand All @@ -58,10 +69,21 @@ def set_custom_handler(handler_name):
cmd = test_utils.model_archiver_command_builder(
model_name="resnet-152-batch",
version="1.0",
model_file=os.path.join(test_utils.CODEBUILD_WD, "examples", "image_classifier", "resnet_152_batch", "model.py"),
model_file=os.path.join(
test_utils.CODEBUILD_WD,
"examples",
"image_classifier",
"resnet_152_batch",
"model.py",
),
serialized_file=serialized_file,
handler=handler_name,
extra_files=os.path.join(test_utils.CODEBUILD_WD, "examples", "image_classifier", "index_to_name.json"),
extra_files=os.path.join(
test_utils.CODEBUILD_WD,
"examples",
"image_classifier",
"index_to_name.json",
),
force=True,
)
print(cmd)
Expand Down Expand Up @@ -94,6 +116,9 @@ def set_custom_handler(handler_name):
"handler_name",
[os.path.join(profiler_utils, "resnet_custom.py"), "image_classifier"],
)
@pytest.mark.skipif(
platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
)
def test_profiler_default_and_custom_handler(set_custom_handler, handler_name):
"""
Tests pytorch profiler integration with default and custom handler
Expand All @@ -112,6 +137,9 @@ def test_profiler_default_and_custom_handler(set_custom_handler, handler_name):
"handler_name",
[os.path.join(profiler_utils, "resnet_profiler_override.py")],
)
@pytest.mark.skipif(
platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
)
def test_profiler_arguments_override(set_custom_handler, handler_name):
"""
Tests pytorch profiler integration when user overrides the profiler arguments
Expand All @@ -133,6 +161,9 @@ def test_profiler_arguments_override(set_custom_handler, handler_name):
"handler_name",
[os.path.join(profiler_utils, "resnet_profiler_override.py")],
)
@pytest.mark.skipif(
platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
)
def test_batch_input(set_custom_handler, handler_name):
"""
Tests pytorch profiler integration with batch inference
Expand All @@ -146,7 +177,9 @@ def test_batch_input(set_custom_handler, handler_name):

def invoke_batch_input():
data = open(data_file_resnet, "rb")
response = requests.post("{}/predictions/resnet152".format(TF_INFERENCE_API), data)
response = requests.post(
"{}/predictions/resnet152".format(TF_INFERENCE_API), data
)
assert response.status_code == 200
assert "tiger_cat" in json.loads(response.content)

Expand Down
5 changes: 4 additions & 1 deletion ts/torch_handler/unit_tests/test_object_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Ensures it can load and execute an example model
"""

import platform
import sys
from pathlib import Path

Expand Down Expand Up @@ -56,7 +57,6 @@ def model_dir(tmp_path_factory, model_name):

@pytest.fixture(scope="module")
def context(model_dir, model_name):

context = MockContext(
model_name="mnist",
model_dir=model_dir.as_posix(),
Expand All @@ -73,6 +73,9 @@ def handler(context):
return handler


@pytest.mark.skipif(
platform.machine() == "aarch64", reason="Test skipped on aarch64 architecture"
)
def test_handle(handler, context, image_bytes):
test_data = [{"data": image_bytes}] * 2
results = handler.handle(test_data, context)
Expand Down

0 comments on commit 87c9823

Please sign in to comment.