From bb43aa2c4872c86bc836432539be6a6252680d61 Mon Sep 17 00:00:00 2001 From: Milan Kordic <78221808+milank94@users.noreply.github.com> Date: Fri, 5 Jan 2024 07:55:34 -0500 Subject: [PATCH] 2312 TT-BUDA release alignment (#3) * initial track_pybuda; 2313 rc * fix typo in ViLT tests * Modify test case paths * Modify clean up command to include onnx and tflite file formats * Fix ONNX download paths for ResNet and RetinaNet * Add NotImplemented error to Fuyu-8B model * Fix ONNX model paths * Add clean up for .h5 files * Remove .png files from clean up * Add wideresnet in model_demos * Add Xception in model_demos * Add GhostNet in model_demos * Fix model demos table * Fix WideResNet and Xception file paths * Stream image and label files * Patch Xception variant for GS silicon * Skip Fuyu-8B (WIP) * Remove commented code --- model_demos/Makefile | 4 + model_demos/README.md | 24 +- .../whisper/pytorch_whisper_generation.py | 4 +- .../beit/pytorch_beit_classify_16_224_hf.py | 57 ++++ model_demos/cv_demos/clip/pytorch_clip.py | 12 +- .../cv_demos/densenet/pytorch_densenet.py | 3 +- .../tflite_efficientnet_lite0_1x1.py | 63 ++++ .../tflite_efficientnet_lite4_1x1.py | 63 ++++ .../cv_demos/ghostnet/timm_ghostnet.py | 53 ++++ .../cv_demos/hrnet/pytorch_hrnet_osmr.py | 2 + .../cv_demos/hrnet/pytorch_hrnet_timm.py | 2 + .../landmark/hand_landmark_lite_1x1.py | 55 ++++ .../landmark/palm_detection_lite_1x1.py | 53 ++++ .../landmark/pose_landmark_lite_1x1.py | 57 ++++ .../cv_demos/mlpmixer/timm_mlpmixer.py | 59 ++++ .../tflite_mobilenet_v2_ssd_1x1.py | 60 ++++ .../openpose/pytorch_lwopenpose_2d_osmr.py | 52 ++++ .../openpose/pytorch_lwopenpose_3d_osmr.py | 53 ++++ model_demos/cv_demos/resnet/onnx_resnet.py | 116 +++++++ .../cv_demos/retinanet/onnx_retinanet_r101.py | 81 +++++ .../pytorch_stable_diffusion.py | 3 +- .../cv_demos/unet/pytorch_unet_torchhub.py | 2 +- model_demos/cv_demos/vgg/pytorch_vgg_hf.py | 3 + model_demos/cv_demos/vgg/pytorch_vgg_osmr.py | 9 +- model_demos/cv_demos/vgg/pytorch_vgg_timm.py | 9 + .../cv_demos/vgg/pytorch_vgg_torchhub.py | 3 + .../cv_demos/vilt/pytorch_vilt_maskedlm.py | 73 +++++ .../vilt/pytorch_vilt_question_answering.py | 66 ++++ model_demos/cv_demos/vilt/vilt_model.py | 83 ++++++ .../wideresnet/pytorch_wideresnet_timm.py | 62 ++++ .../wideresnet/pytorch_wideresnet_torchhub.py | 64 ++++ .../cv_demos/xception/timm_xception.py | 76 +++++ .../cv_demos/yolo_v3/holli_src/utils.py | 256 ++++++++++++++++ .../cv_demos/yolo_v3/holli_src/yolo_layer.py | 227 ++++++++++++++ .../cv_demos/yolo_v3/holli_src/yolov3.py | 128 ++++++++ .../cv_demos/yolo_v3/holli_src/yolov3_base.py | 113 +++++++ .../cv_demos/yolo_v3/holli_src/yolov3_tiny.py | 134 +++++++++ .../cv_demos/yolo_v3/pytorch_yolov3_holli.py | 63 ++++ .../yolo_v3/pytorch_yolov3_holli_1x1.py | 74 +++++ .../yolo_v3/pytorch_yolov3_tiny_holli.py | 57 ++++ .../cv_demos/yolo_v5/pytorch_yolov5_320.py | 6 +- .../cv_demos/yolo_v5/pytorch_yolov5_480.py | 45 ++- .../cv_demos/yolo_v5/pytorch_yolov5_640.py | 43 ++- .../codegen/pytorch_codegen_causal_lm.py | 1 + .../nlp_demos/falcon/pytorch_falcon.py | 2 - .../fuyu8b/pytorch_fuyu8b_past_cache.py | 282 ++++++++++++++++++ .../nlp_demos/opt/pytorch_opt_causal_lm.py | 2 + .../nlp_demos/t5/pytorch_t5_generation.py | 7 + .../nlp_demos/xglm/pytorch_xglm_causal_lm.py | 16 +- model_demos/pyproject.toml | 13 + model_demos/requirements.txt | 1 + model_demos/tests/conftest.py | 10 +- model_demos/tests/test_onnx_resnet.py | 8 + model_demos/tests/test_onnx_retinanet.py | 8 + model_demos/tests/test_pytorch_beit.py | 11 + model_demos/tests/test_pytorch_distilbert.py | 6 +- model_demos/tests/test_pytorch_dpr.py | 15 +- model_demos/tests/test_pytorch_fuyu8b.py | 8 + model_demos/tests/test_pytorch_ghostnet.py | 8 + model_demos/tests/test_pytorch_mlpmixer.py | 8 + model_demos/tests/test_pytorch_openpose.py | 14 + model_demos/tests/test_pytorch_vgg.py | 11 +- model_demos/tests/test_pytorch_vilt.py | 14 + model_demos/tests/test_pytorch_wideresnet.py | 18 ++ model_demos/tests/test_pytorch_xception.py | 11 + model_demos/tests/test_pytorch_yolov3.py | 20 ++ .../tests/test_tflite_efficientnet_lite.py | 14 + model_demos/tests/test_tflite_landmark.py | 20 ++ .../tests/test_tflite_mobilenet_ssd.py | 8 + 69 files changed, 2921 insertions(+), 87 deletions(-) create mode 100644 model_demos/cv_demos/beit/pytorch_beit_classify_16_224_hf.py create mode 100644 model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py create mode 100644 model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py create mode 100644 model_demos/cv_demos/ghostnet/timm_ghostnet.py create mode 100644 model_demos/cv_demos/landmark/hand_landmark_lite_1x1.py create mode 100644 model_demos/cv_demos/landmark/palm_detection_lite_1x1.py create mode 100644 model_demos/cv_demos/landmark/pose_landmark_lite_1x1.py create mode 100644 model_demos/cv_demos/mlpmixer/timm_mlpmixer.py create mode 100644 model_demos/cv_demos/mobilenet_ssd/tflite_mobilenet_v2_ssd_1x1.py create mode 100644 model_demos/cv_demos/openpose/pytorch_lwopenpose_2d_osmr.py create mode 100644 model_demos/cv_demos/openpose/pytorch_lwopenpose_3d_osmr.py create mode 100644 model_demos/cv_demos/resnet/onnx_resnet.py create mode 100644 model_demos/cv_demos/retinanet/onnx_retinanet_r101.py create mode 100644 model_demos/cv_demos/vilt/pytorch_vilt_maskedlm.py create mode 100644 model_demos/cv_demos/vilt/pytorch_vilt_question_answering.py create mode 100644 model_demos/cv_demos/vilt/vilt_model.py create mode 100644 model_demos/cv_demos/wideresnet/pytorch_wideresnet_timm.py create mode 100644 model_demos/cv_demos/wideresnet/pytorch_wideresnet_torchhub.py create mode 100644 model_demos/cv_demos/xception/timm_xception.py create mode 100644 model_demos/cv_demos/yolo_v3/holli_src/utils.py create mode 100644 model_demos/cv_demos/yolo_v3/holli_src/yolo_layer.py create mode 100644 model_demos/cv_demos/yolo_v3/holli_src/yolov3.py create mode 100644 model_demos/cv_demos/yolo_v3/holli_src/yolov3_base.py create mode 100644 model_demos/cv_demos/yolo_v3/holli_src/yolov3_tiny.py create mode 100644 model_demos/cv_demos/yolo_v3/pytorch_yolov3_holli.py create mode 100644 model_demos/cv_demos/yolo_v3/pytorch_yolov3_holli_1x1.py create mode 100644 model_demos/cv_demos/yolo_v3/pytorch_yolov3_tiny_holli.py create mode 100644 model_demos/nlp_demos/fuyu8b/pytorch_fuyu8b_past_cache.py create mode 100644 model_demos/tests/test_onnx_resnet.py create mode 100644 model_demos/tests/test_onnx_retinanet.py create mode 100644 model_demos/tests/test_pytorch_beit.py create mode 100644 model_demos/tests/test_pytorch_fuyu8b.py create mode 100644 model_demos/tests/test_pytorch_ghostnet.py create mode 100644 model_demos/tests/test_pytorch_mlpmixer.py create mode 100644 model_demos/tests/test_pytorch_openpose.py create mode 100644 model_demos/tests/test_pytorch_vilt.py create mode 100644 model_demos/tests/test_pytorch_wideresnet.py create mode 100644 model_demos/tests/test_pytorch_xception.py create mode 100644 model_demos/tests/test_pytorch_yolov3.py create mode 100644 model_demos/tests/test_tflite_efficientnet_lite.py create mode 100644 model_demos/tests/test_tflite_landmark.py create mode 100644 model_demos/tests/test_tflite_mobilenet_ssd.py diff --git a/model_demos/Makefile b/model_demos/Makefile index facccbb1..c2e65ed7 100644 --- a/model_demos/Makefile +++ b/model_demos/Makefile @@ -41,4 +41,8 @@ clean_tt: @find . | grep -E ".pkl_memoize_py3" | xargs rm -rf @find . | grep -E "generated_modules" | xargs rm -rf @find . | grep -E "tt_build" | xargs rm -rf + @find . -type f -name "*.onnx" | xargs rm -f + @find . -type f -name "*.tflite" | xargs rm -f + @find . -type f -name "*.h5" | xargs rm -f + @find . -type f -name "*.png" | xargs rm -f @echo "All done cleaning TT files!" diff --git a/model_demos/README.md b/model_demos/README.md index 7839b9e3..d3fd0bb2 100644 --- a/model_demos/README.md +++ b/model_demos/README.md @@ -31,6 +31,7 @@ python cv_demos/resnet/pytorch_resnet.py |-------------------------------------------|:--------:| | [ALBERT](nlp_demos/albert/) | GS, WH | | [Autoencoder](cv_demos/autoencoder/) | GS, WH | +| [BeiT](nlp_demos/beit/) | GS, WH | | [BERT](nlp_demos/bert/) | GS, WH | | [CLIP](cv_demos/clip/) | GS, WH | | [CodeGen](nlp_demos/codegen/) | GS, WH | @@ -38,19 +39,28 @@ python cv_demos/resnet/pytorch_resnet.py | [DenseNet](cv_demos/densenet/) | GS, WH | | [DistilBERT](nlp_demos/distilbert/) | GS, WH | | [DPR](nlp_demos/dpr/) | GS, WH | -| [Falcon](nlp_demos/falcon/) | WH | +| [EfficientNet-Lite](cv_demos/efficientnet_lite/) | WH | +| [Falcon-7B](nlp_demos/falcon/) | WH | | [FLAN-T5](nlp_demos/flant5/) | GS, WH | +| [Fuyu-8B](nlp_demos/fuyu8b/) | | +| [GhostNet](cv_demos/ghostnet/) | GS, WH | | [GoogLeNet](cv_demos/googlenet/) | GS, WH | | [GPT-2](nlp_demos/gpt2/) | GS, WH | | [GPT Neo](nlp_demos/gptneo/) | GS, WH | +| [Hand Landmark](nlp_demos/landmark/) | WH | | [HRNet](cv_demos/hrnet/) | GS, WH | | [Inception-v4](cv_demos/inceptionv4/) | GS, WH | -| [MobileNetV1](cv_demos/mobilenetv1/) | GS, WH | -| [MobileNetV2](cv_demos/mobilenetv2/) | GS, WH | -| [MobileNetV3](cv_demos/mobilenetv3/) | GS, WH | +| [MLP-Mixer](cv_demos/mlpmixer/) | GS, WH | +| [MobileNetSSD](cv_demos/mobilenet_ssd/) | WH | +| [MobileNetV1](cv_demos/mobilenet_v1/) | GS, WH | +| [MobileNetV2](cv_demos/mobilenet_v2/) | GS, WH | +| [MobileNetV3](cv_demos/mobilenet_v3/) | GS, WH | +| [OpenPose](nlp_demos/openpose/) | GS, WH | | [OPT](nlp_demos/opt/) | GS, WH | +| [Pose Landmark](nlp_demos/landmark/) | WH | | [ResNet](cv_demos/resnet/) | GS, WH | | [ResNeXt](cv_demos/resnext/) | GS, WH | +| [RetinaNet](cv_demos/retinanet/) | GS, WH | | [RoBERTa](nlp_demos/roberta/) | GS, WH | | [SqueezeBERT](nlp_demos/squeezebert/) | GS, WH | | [Stable Diffusion](cv_demos/stable_diffusion/) | WH | @@ -58,10 +68,14 @@ python cv_demos/resnet/pytorch_resnet.py | [U-Net](cv_demos/unet/) | GS, WH | | [VGG](cv_demos/vgg/) | GS, WH | | [ViT](cv_demos/vit/) | GS, WH | +| [ViLT](cv_demos/vilt/) | GS, WH | | [VoVNet](cv_demos/vovnet/) | GS, WH | +| [WideResNet](cv_demos/wideresnet/) | GS, WH | | [Whisper](audio_demos/whisper/) | GS, WH | +| [Xception](cv_demos/xception/) | GS, WH | | [XGLM](nlp_demos/xglm/) | GS, WH | -| [YOLOv5](cv_demos/yolov5/) | GS, WH | +| [YOLOv3](cv_demos/yolo_v3/) | GS, WH | +| [YOLOv5](cv_demos/yolo_v5/) | GS, WH | ## Contributing diff --git a/model_demos/audio_demos/whisper/pytorch_whisper_generation.py b/model_demos/audio_demos/whisper/pytorch_whisper_generation.py index e11c5685..8a7af832 100644 --- a/model_demos/audio_demos/whisper/pytorch_whisper_generation.py +++ b/model_demos/audio_demos/whisper/pytorch_whisper_generation.py @@ -18,9 +18,11 @@ def run_whisper_generation(variant="openai/whisper-small"): compiler_cfg.amp_level = 2 compiler_cfg.enable_enumerate_u_kt = False compiler_cfg.default_df_override = pybuda._C.DataFormat.Float16_b + os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1" if "small" in variant: os.environ["PYBUDA_NLP_MANUAL_TARGET"] = "35000" - + elif "medium" in variant or "large" in variant: + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "65536" available_devices = pybuda.detect_available_devices() if available_devices[0] == BackendDevice.Grayskull: softmax_ops_to_override = [ diff --git a/model_demos/cv_demos/beit/pytorch_beit_classify_16_224_hf.py b/model_demos/cv_demos/beit/pytorch_beit_classify_16_224_hf.py new file mode 100644 index 00000000..02ef48a1 --- /dev/null +++ b/model_demos/cv_demos/beit/pytorch_beit_classify_16_224_hf.py @@ -0,0 +1,57 @@ +# BeiT Model Demo + +import os + +import pybuda +import requests +from PIL import Image +from pybuda._C.backend_api import BackendDevice +from transformers import BeitForImageClassification, BeitImageProcessor + + +def run_beit_classify_224_hf_pytorch(variant="microsoft/beit-base-patch16-224"): + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + available_devices = pybuda.detect_available_devices() + + compiler_cfg.enable_t_streaming = True + if variant == "microsoft/beit-base-patch16-224": + compiler_cfg.retain_tvm_python_files = True + compiler_cfg.enable_tvm_constant_prop = True + if available_devices[0] == BackendDevice.Grayskull: + os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1" + elif variant == "microsoft/beit-large-patch16-224": + if available_devices[0] == BackendDevice.Grayskull: + compiler_cfg.retain_tvm_python_files = True + compiler_cfg.enable_tvm_constant_prop = True + os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1" + else: + compiler_cfg.default_df_override = pybuda._C.DataFormat.Float16_b + + # Create PyBuda module from PyTorch model + image_processor = BeitImageProcessor.from_pretrained(variant) + model = BeitForImageClassification.from_pretrained(variant) + tt_model = pybuda.PyTorchModule("pt_beit_classif_16_224", model) + + # Get sample image + url = "http://images.cocodataset.org/val2017/000000039769.jpg" + sample_image = Image.open(requests.get(url, stream=True).raw) + + # Preprocessing + img_tensor = image_processor(sample_image, return_tensors="pt").pixel_values + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([img_tensor])) + output = output_q.get()[0].value().detach().float().numpy() + + # Postprocessing + predicted_class_idx = output.argmax(-1).item() + + # Print output + print("Predicted class:", predicted_class_idx) + print(model.config.id2label[predicted_class_idx]) + + +if __name__ == "__main__": + run_beit_classify_224_hf_pytorch() diff --git a/model_demos/cv_demos/clip/pytorch_clip.py b/model_demos/cv_demos/clip/pytorch_clip.py index 03f87226..63c0c687 100644 --- a/model_demos/cv_demos/clip/pytorch_clip.py +++ b/model_demos/cv_demos/clip/pytorch_clip.py @@ -5,7 +5,9 @@ import torch from PIL import Image from transformers import CLIPModel, CLIPProcessor -from transformers.models.clip.modeling_clip import _expand_mask, _make_causal_mask + +# from transformers.models.clip.modeling_clip import _expand_mask, _make_causal_mask +from transformers.modeling_attn_mask_utils import _create_4d_causal_attention_mask, _prepare_4d_attention_mask class CLIPVisionWrapper(torch.nn.Module): @@ -32,11 +34,15 @@ def forward(self, input_ids, attention_mask): hidden_states = self.clip_model.text_model.embeddings(input_ids=input_ids, position_ids=None) bsz, seq_len = input_shape - causal_attention_mask = _make_causal_mask(input_shape, hidden_states.dtype, device=hidden_states.device) + # CLIP's text model uses causal mask, prepare it here. + # https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324 + causal_attention_mask = _create_4d_causal_attention_mask( + input_shape, hidden_states.dtype, device=hidden_states.device + ) # expand attention_mask if attention_mask is not None: # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len] - attention_mask = _expand_mask(attention_mask, hidden_states.dtype) + attention_mask = _prepare_4d_attention_mask(attention_mask, hidden_states.dtype) encoder_outputs = self.clip_model.text_model.encoder( inputs_embeds=hidden_states, diff --git a/model_demos/cv_demos/densenet/pytorch_densenet.py b/model_demos/cv_demos/densenet/pytorch_densenet.py index de5208b3..de81c683 100644 --- a/model_demos/cv_demos/densenet/pytorch_densenet.py +++ b/model_demos/cv_demos/densenet/pytorch_densenet.py @@ -50,14 +50,13 @@ def run_densenet_pytorch(variant="densenet121"): if available_devices[0] == BackendDevice.Wormhole_B0: compiler_cfg.default_dram_parameters = False compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b - elif available_devices[0] == BackendDevice.Grayskull: - os.environ["PYBUDA_RIBBON2"] = "1" elif variant == "densenet161": compiler_cfg.balancer_policy = "CNN" compiler_cfg.enable_t_streaming = True compiler_cfg.place_on_new_epoch("concatenate_131.dc.sparse_matmul.7.lc2") os.environ["PYBUDA_DISABLE_CONSTANT_FOLDING"] = "1" + os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1" os.environ["PYBUDA_GRAPHSOLVER_SELF_CUT_TYPE"] = "ConsumerOperandDataEdgesFirst" # Device specific configurations available_devices = pybuda.detect_available_devices() diff --git a/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py new file mode 100644 index 00000000..451af0b3 --- /dev/null +++ b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py @@ -0,0 +1,63 @@ +# EfficientNet-Lite0 1x1 demo + +import os +import shutil +import tarfile + +import pybuda +import requests +import torch +from pybuda import TFLiteModule +from pybuda._C.backend_api import BackendDevice + + +def run_efficientnet_lite0_1x1(): + + # Device specific configurations + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] != BackendDevice.Wormhole_B0: + raise NotImplementedError("Model not supported on Grayskull") + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + compiler_cfg.enable_tvm_constant_prop = True + compiler_cfg.graph_solver_self_cut_type = "FastCut" + compiler_cfg.default_df_override = pybuda.DataFormat.Float16 + + # Set PyBDUA environment variable + os.environ["PYBUDA_OVERRIDE_DEVICE_YAML"] = "wormhole_b0_1x1.yaml" + os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" + + # Download model weights + MODEL = "efficientnet-lite0" + url = f"https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/lite/{MODEL}.tar.gz" + extract_to = "cv_demos/efficientnet_lite" + file_name = url.split("/")[-1] + response = requests.get(url, stream=True) + with open(file_name, "wb") as f: + f.write(response.content) + with tarfile.open(file_name, "r:gz") as tar: + tar.extractall(path=extract_to) + os.remove(file_name) + + # Load model path + tflite_path = f"cv_demos/efficientnet_lite/{MODEL}/{MODEL}-fp32.tflite" + tt_model = TFLiteModule("tflite_efficientnet_lite0", tflite_path) + + # Run inference on Tenstorrent device + input_shape = (1, 224, 224, 3) + input_tensor = torch.rand(input_shape) + + output_q = pybuda.run_inference(tt_model, inputs=([input_tensor])) + output = output_q.get()[0].value().detach().float().numpy() + print(output) + + # Remove remanent files + shutil.rmtree(extract_to + "/" + MODEL) + + +if __name__ == "__main__": + run_efficientnet_lite0_1x1() diff --git a/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py new file mode 100644 index 00000000..cfe61eec --- /dev/null +++ b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py @@ -0,0 +1,63 @@ +# EfficientNet-Lite4 1x1 demo + +import os +import shutil +import tarfile + +import pybuda +import requests +import torch +from pybuda import TFLiteModule +from pybuda._C.backend_api import BackendDevice + + +def run_efficientnet_lite4_1x1(): + + # Device specific configurations + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] != BackendDevice.Wormhole_B0: + raise NotImplementedError("Model not supported on Grayskull") + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + compiler_cfg.enable_tvm_constant_prop = True + compiler_cfg.graph_solver_self_cut_type = "FastCut" + compiler_cfg.default_df_override = pybuda.DataFormat.Float16 + + # Set PyBDUA environment variable + os.environ["PYBUDA_OVERRIDE_DEVICE_YAML"] = "wormhole_b0_1x1.yaml" + os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" + + # Download model weights + MODEL = "efficientnet-lite4" + url = f"https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/lite/{MODEL}.tar.gz" + extract_to = "cv_demos/efficientnet_lite" + file_name = url.split("/")[-1] + response = requests.get(url, stream=True) + with open(file_name, "wb") as f: + f.write(response.content) + with tarfile.open(file_name, "r:gz") as tar: + tar.extractall(path=extract_to) + os.remove(file_name) + + # Load model path + tflite_path = f"cv_demos/efficientnet_lite/{MODEL}/{MODEL}-fp32.tflite" + tt_model = TFLiteModule("tflite_efficientnet_lite4", tflite_path) + + # STEP 3: Run inference on Tenstorrent device + input_shape = (1, 320, 320, 3) + input_tensor = torch.rand(input_shape) + + output_q = pybuda.run_inference(tt_model, inputs=([input_tensor])) + output = output_q.get()[0].value().detach().float().numpy() + print(output) + + # Remove remanent files + shutil.rmtree(extract_to + "/" + MODEL) + + +if __name__ == "__main__": + run_efficientnet_lite4_1x1() diff --git a/model_demos/cv_demos/ghostnet/timm_ghostnet.py b/model_demos/cv_demos/ghostnet/timm_ghostnet.py new file mode 100644 index 00000000..914e5981 --- /dev/null +++ b/model_demos/cv_demos/ghostnet/timm_ghostnet.py @@ -0,0 +1,53 @@ +# Ghostnet + +import os +import urllib + +import pybuda +import requests +import timm +import torch +from PIL import Image + + +def run_ghostnet_timm(): + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() # load global compiler config object + compiler_cfg.enable_t_streaming = True + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + os.environ["PYBUDA_RIBBON2"] = "1" + + model = timm.create_model("ghostnet_100", pretrained=True) + + # Create PyBuda module from PyTorch model + tt_model = pybuda.PyTorchModule("ghostnet_100_timm_pt", model) + + data_config = timm.data.resolve_data_config({}, model=model) + transforms = timm.data.create_transform(**data_config) + + url = "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg" + img = Image.open(requests.get(url, stream=True).raw).convert("RGB") + img_tensor = transforms(img).unsqueeze(0) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([img_tensor])) + output = output_q.get()[0].value() + + top5_probabilities, top5_class_indices = torch.topk(output.softmax(dim=1) * 100, k=5) + + # Get imagenet class mappings + url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" + image_classes = urllib.request.urlopen(url) + categories = [s.decode("utf-8").strip() for s in image_classes.readlines()] + + for i in range(top5_probabilities.size(1)): + class_idx = top5_class_indices[0, i].item() + class_prob = top5_probabilities[0, i].item() + class_label = categories[class_idx] + + print(f"{class_label} : {class_prob}") + + +if __name__ == "__main__": + run_ghostnet_timm() diff --git a/model_demos/cv_demos/hrnet/pytorch_hrnet_osmr.py b/model_demos/cv_demos/hrnet/pytorch_hrnet_osmr.py index 22459fc4..e51a3f14 100644 --- a/model_demos/cv_demos/hrnet/pytorch_hrnet_osmr.py +++ b/model_demos/cv_demos/hrnet/pytorch_hrnet_osmr.py @@ -1,5 +1,6 @@ # HRNet Demo Script +import os import urllib import pybuda @@ -22,6 +23,7 @@ def run_hrnet_osmr_pytorch(variant="hrnet_w18_small_v1"): compiler_cfg.balancer_policy = "CNN" compiler_cfg.enable_t_streaming = True compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1" # Variant specific configurations if variant in ["hrnetv2_w44", "hrnetv2_w48"]: diff --git a/model_demos/cv_demos/hrnet/pytorch_hrnet_timm.py b/model_demos/cv_demos/hrnet/pytorch_hrnet_timm.py index 3f03ef2c..3f81b447 100644 --- a/model_demos/cv_demos/hrnet/pytorch_hrnet_timm.py +++ b/model_demos/cv_demos/hrnet/pytorch_hrnet_timm.py @@ -1,5 +1,6 @@ # HRNet Demo Script +import os import urllib import pybuda @@ -23,6 +24,7 @@ def run_hrnet_timm_pytorch(variant="hrnet_w18_small"): compiler_cfg.balancer_policy = "CNN" compiler_cfg.enable_t_streaming = True compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1" # Variant specific configurations if variant == "hrnet_w48": diff --git a/model_demos/cv_demos/landmark/hand_landmark_lite_1x1.py b/model_demos/cv_demos/landmark/hand_landmark_lite_1x1.py new file mode 100644 index 00000000..937c420b --- /dev/null +++ b/model_demos/cv_demos/landmark/hand_landmark_lite_1x1.py @@ -0,0 +1,55 @@ +# Hand Landmark lite 1x1 demo + +import os + +import pybuda +import requests +import torch +from pybuda import TFLiteModule +from pybuda._C.backend_api import BackendDevice + + +def run_hand_landmark_lite_1x1(): + + # Device specific configurations + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] != BackendDevice.Wormhole_B0: + raise NotImplementedError("Model not supported on Grayskull") + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + compiler_cfg.enable_tvm_constant_prop = True + compiler_cfg.graph_solver_self_cut_type = "ConsumerOperandDataEdgesFirst" + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + + # Set PyBDUA environment variable + os.environ["PYBUDA_OVERRIDE_DEVICE_YAML"] = "wormhole_b0_1x1.yaml" + os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" + os.environ["PYBUDA_ENABLE_SINGLE_BUFFER_FALLBACK"] = "1" + + # Download model weights + url = "https://storage.googleapis.com/mediapipe-assets/hand_landmark_lite.tflite" + tflite_path = "cv_demos/landmark/" + url.split("/")[-1] + response = requests.get(url, stream=True) + with open(tflite_path, "wb") as f: + f.write(response.content) + + # Load Hand Landmark model + tt_model = TFLiteModule("tflite_hand_landmark_lite", tflite_path) + + # Run inference on Tenstorrent device + input_shape = (1, 224, 224, 3) + input_tensor = torch.rand(input_shape) + output_q = pybuda.run_inference(tt_model, inputs=([input_tensor])) + output = output_q.get() + print(output) + + # Remove weight file + os.remove(tflite_path) + + +if __name__ == "__main__": + run_hand_landmark_lite_1x1() diff --git a/model_demos/cv_demos/landmark/palm_detection_lite_1x1.py b/model_demos/cv_demos/landmark/palm_detection_lite_1x1.py new file mode 100644 index 00000000..4ab7ae39 --- /dev/null +++ b/model_demos/cv_demos/landmark/palm_detection_lite_1x1.py @@ -0,0 +1,53 @@ +# Palm Detection Lite 1x1 demo + +import os + +import pybuda +import requests +import torch +from pybuda import TFLiteModule +from pybuda._C.backend_api import BackendDevice + + +def run_palm_detection_lite_1x1(): + + # Device specific configurations + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] != BackendDevice.Wormhole_B0: + raise NotImplementedError("Model not supported on Grayskull") + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + compiler_cfg.enable_tvm_constant_prop = True + compiler_cfg.graph_solver_self_cut_type = "ConsumerOperandDataEdgesFirst" + + # Set PyBDUA environment variable + os.environ["PYBUDA_OVERRIDE_DEVICE_YAML"] = "wormhole_b0_1x1.yaml" + os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" + + # Download model weights + url = "https://storage.googleapis.com/mediapipe-assets/palm_detection_lite.tflite" + tflite_path = "cv_demos/landmark/" + url.split("/")[-1] + response = requests.get(url, stream=True) + with open(tflite_path, "wb") as f: + f.write(response.content) + + # Load Palm Landmark model + tt_model = TFLiteModule("tflite_palm_detection_lite", tflite_path) + + # Run inference on Tenstorrent device + input_shape = (1, 192, 192, 3) + input_tensor = torch.rand(input_shape) + output_q = pybuda.run_inference(tt_model, inputs=([input_tensor])) + output = output_q.get() + print(output) + + # Remove weight file + os.remove(tflite_path) + + +if __name__ == "__main__": + run_palm_detection_lite_1x1() diff --git a/model_demos/cv_demos/landmark/pose_landmark_lite_1x1.py b/model_demos/cv_demos/landmark/pose_landmark_lite_1x1.py new file mode 100644 index 00000000..827320e5 --- /dev/null +++ b/model_demos/cv_demos/landmark/pose_landmark_lite_1x1.py @@ -0,0 +1,57 @@ +# Pose Landmark Lite 1x1 demo + +import os + +import pybuda +import requests +import torch +from pybuda import TFLiteModule +from pybuda._C.backend_api import BackendDevice + + +def run_pose_landmark_lite_1x1(): + + # Device specific configurations + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] != BackendDevice.Wormhole_B0: + raise NotImplementedError("Model not supported on Grayskull") + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + compiler_cfg.enable_tvm_constant_prop = True + compiler_cfg.graph_solver_self_cut_type = "FastCut" + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + compiler_cfg.enable_single_buffer_fallback = True + + # Set PyBDUA environment variable + os.environ["PYBUDA_OVERRIDE_DEVICE_YAML"] = "wormhole_b0_1x1.yaml" + os.environ["PYBUDA_SPLIT_RESIZE2D"] = "128" + os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" + os.environ["PYBUDA_MAX_CONCAT_INPUTS"] = "6" + + # Download model weights + url = "https://storage.googleapis.com/mediapipe-assets/pose_landmark_lite.tflite" + tflite_path = "cv_demos/landmark/" + url.split("/")[-1] + response = requests.get(url, stream=True) + with open(tflite_path, "wb") as f: + f.write(response.content) + + # Load Pose Landmark model + tt_model = TFLiteModule("tflite_pose_landmark_light", tflite_path) + + # STEP 3: Run inference on Tenstorrent device + input_shape = (1, 256, 256, 3) + input_tensor = torch.rand(input_shape) + output_q = pybuda.run_inference(tt_model, inputs=([input_tensor])) + output = output_q.get() + print(output) + + # Remove weight file + os.remove(tflite_path) + + +if __name__ == "__main__": + run_pose_landmark_lite_1x1() diff --git a/model_demos/cv_demos/mlpmixer/timm_mlpmixer.py b/model_demos/cv_demos/mlpmixer/timm_mlpmixer.py new file mode 100644 index 00000000..06ce380d --- /dev/null +++ b/model_demos/cv_demos/mlpmixer/timm_mlpmixer.py @@ -0,0 +1,59 @@ +# MLP-Mixer - TIMM Demo Script + +import urllib + +import pybuda +import requests +import timm +import torch +from PIL import Image +from timm.data import resolve_data_config +from timm.data.transforms_factory import create_transform + + +def run_mlpmixer_timm(): + + # Load MLP-Mixer feature extractor and model from TIMM + # "mixer_b16_224", "mixer_b16_224_in21k", "mixer_b16_224_miil", "mixer_b16_224_miil_in21k", + # "mixer_b32_224", "mixer_l16_224", "mixer_l16_224_in21k", + # "mixer_l32_224", "mixer_s16_224", "mixer_s32_224" + variant = "mixer_b16_224" + model = timm.create_model(variant, pretrained=True) + config = resolve_data_config({}, model=model) + transform = create_transform(**config) + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() # load global compiler config object + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + + # Load data sample + url = "https://datasets-server.huggingface.co/assets/imagenet-1k/--/default/train/18/image/image.jpg" + image = Image.open(requests.get(url, stream=True).raw).convert("RGB") + label = "tiger" + + # Data preprocessing + pixel_values = transform(image).unsqueeze(0) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(pybuda.PyTorchModule(f"timm_{variant}", model), inputs=[(pixel_values,)]) + output = output_q.get() + + # Data postprocessing + probabilities = torch.nn.functional.softmax(output[0].value()[0], dim=0) + + # Get ImageNet class mappings + url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" + image_classes = urllib.request.urlopen(url) + categories = [s.decode("utf-8").strip() for s in image_classes.readlines()] + + # Get top-k prediction + top1_prob, top1_catid = torch.topk(probabilities, 1) + predicted_label = categories[top1_catid] + + # Display output + print(f"True Label: {label} | Predicted Label: {predicted_label} | Predicted Probability: {top1_prob.item():.2f}") + + +if __name__ == "__main__": + run_mlpmixer_timm() diff --git a/model_demos/cv_demos/mobilenet_ssd/tflite_mobilenet_v2_ssd_1x1.py b/model_demos/cv_demos/mobilenet_ssd/tflite_mobilenet_v2_ssd_1x1.py new file mode 100644 index 00000000..42a68fcb --- /dev/null +++ b/model_demos/cv_demos/mobilenet_ssd/tflite_mobilenet_v2_ssd_1x1.py @@ -0,0 +1,60 @@ +# MobileNet SSD 1x1 Demo Script + +import os + +import pybuda +import requests +from PIL import Image +from pybuda import TFLiteModule +from pybuda._C.backend_api import BackendDevice +from torchvision import transforms + + +def run_mobilenetv2_ssd_1x1_tflite(): + + # Set PyBUDA configuration parameters + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] != BackendDevice.Wormhole_B0: + raise NotImplementedError("Model not supported on Grayskull") + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + compiler_cfg.enable_tvm_constant_prop = True + compiler_cfg.graph_solver_self_cut_type = "FastCut" + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + compiler_cfg.cpu_fallback_ops = set(["concatenate"]) + + # Set PyBDUA environment variable + os.environ["PYBUDA_OVERRIDE_DEVICE_YAML"] = "wormhole_b0_1x1.yaml" + os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" + + # Download model weights + url = "https://storage.googleapis.com/mediapipe-models/object_detector/ssd_mobilenet_v2/float32/latest/ssd_mobilenet_v2.tflite" + tflite_path = "cv_demos/mobilenet_ssd/" + url.split("/")[-1] + response = requests.get(url, stream=True) + with open(tflite_path, "wb") as f: + f.write(response.content) + + # Load model path + tt_model = TFLiteModule("tflite_mobilenet_ssd", tflite_path) + + # Image preprocessing + url = "http://images.cocodataset.org/val2017/000000039769.jpg" + image = Image.open(requests.get(url, stream=True).raw) + transform = transforms.Compose([transforms.Resize((256, 256)), transforms.ToTensor()]) + img_tensor = transform(image).permute((1, 2, 0)).unsqueeze(0) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([img_tensor])) + output = output_q.get() + print(output) + + # Remove weight file + os.remove(tflite_path) + + +if __name__ == "__main__": + run_mobilenetv2_ssd_1x1_tflite() diff --git a/model_demos/cv_demos/openpose/pytorch_lwopenpose_2d_osmr.py b/model_demos/cv_demos/openpose/pytorch_lwopenpose_2d_osmr.py new file mode 100644 index 00000000..5631f4d6 --- /dev/null +++ b/model_demos/cv_demos/openpose/pytorch_lwopenpose_2d_osmr.py @@ -0,0 +1,52 @@ +# LW-OpenPose 2D Demo Script + +import pybuda +import requests +from PIL import Image +from pytorchcv.model_provider import get_model as ptcv_get_model +from torchvision import transforms + + +def get_image_tensor(): + # Image processing + url = "https://raw.githubusercontent.com/axinc-ai/ailia-models/master/pose_estimation_3d/blazepose-fullbody/girl-5204299_640.jpg" + input_image = Image.open(requests.get(url, stream=True).raw) + preprocess = transforms.Compose( + [ + transforms.Resize(224), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) + input_tensor = preprocess(input_image) + input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model + return input_batch + + +def run_lwopenpose_2d_osmr_pytorch(): + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "CNN" + compiler_cfg.enable_t_streaming = True + compiler_cfg.enable_auto_fusing = False + compiler_cfg.default_df_override = pybuda.DataFormat.Float16 + + # Create PyBuda module from PyTorch model + model = ptcv_get_model("lwopenpose2d_mobilenet_cmupan_coco", pretrained=True) + model.eval() + tt_model = pybuda.PyTorchModule("pt_lwopenpose_2d_osmr", model) + + input_batch = get_image_tensor() + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([input_batch])) + output = output_q.get()[0].value() + + # Print output + print(output) + + +if __name__ == "__main__": + run_lwopenpose_2d_osmr_pytorch() diff --git a/model_demos/cv_demos/openpose/pytorch_lwopenpose_3d_osmr.py b/model_demos/cv_demos/openpose/pytorch_lwopenpose_3d_osmr.py new file mode 100644 index 00000000..982d5f5c --- /dev/null +++ b/model_demos/cv_demos/openpose/pytorch_lwopenpose_3d_osmr.py @@ -0,0 +1,53 @@ +# LW-OpenPose 2D Demo Script + +import pybuda +import requests +from PIL import Image +from pytorchcv.model_provider import get_model as ptcv_get_model +from torchvision import transforms + + +def get_image_tensor(): + # Image processing + url = "https://raw.githubusercontent.com/axinc-ai/ailia-models/master/pose_estimation_3d/blazepose-fullbody/girl-5204299_640.jpg" + input_image = Image.open(requests.get(url, stream=True).raw) + preprocess = transforms.Compose( + [ + transforms.Resize(224), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) + input_tensor = preprocess(input_image) + input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model + return input_batch + + +def run_lwopenpose_3d_osmr_pytorch(): + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "CNN" + compiler_cfg.enable_t_streaming = True + compiler_cfg.enable_auto_fusing = False + compiler_cfg.default_df_override = pybuda.DataFormat.Float16 + + # Create PyBuda module from PyTorch model + model = ptcv_get_model("lwopenpose3d_mobilenet_cmupan_coco", pretrained=True) + model.eval() + tt_model = pybuda.PyTorchModule("pt_lwopenpose_3d_osmr", model) + + # Get sample input + input_batch = get_image_tensor() + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([input_batch])) + output = output_q.get()[0].value() + + # Print output + print(output) + + +if __name__ == "__main__": + run_lwopenpose_3d_osmr_pytorch() diff --git a/model_demos/cv_demos/resnet/onnx_resnet.py b/model_demos/cv_demos/resnet/onnx_resnet.py new file mode 100644 index 00000000..867c0f6c --- /dev/null +++ b/model_demos/cv_demos/resnet/onnx_resnet.py @@ -0,0 +1,116 @@ +# ResNet Demo Script - ONNX +# Uses torch and torchvision for data pre- and post-processing; +# can use other frameworks such as MXNet, TensorFlow or Numpy + +import os +import urllib + +import onnx +import pybuda +import requests +import torch +from PIL import Image +from torchvision import transforms + + +def preprocess(image: Image) -> torch.tensor: + """Image preprocessing for ResNet50 + + Parameters + ---------- + image : PIL.Image + PIL Image sample + + Returns + ------- + torch.tensor + Preprocessed input tensor + """ + transform_fn = transforms.Compose( + [ + transforms.Resize([256, 256]), + transforms.RandomCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ] + ) + pixel_values = transform_fn(image).unsqueeze(0) + + return pixel_values + + +def postprocess(predictions: torch.tensor) -> tuple: + """Model prediction postprocessing for ResNet50 + + Parameters + ---------- + predictions : torch.tensor + Model predictions + + Returns + ------- + tuple + topk probability and category ID + """ + + # Get probabilities + probabilities = torch.nn.functional.softmax(predictions, dim=0) + + # Get top-k prediction + top1_prob, top1_catid = torch.topk(probabilities, 1) + + return top1_prob, top1_catid + + +def run_resnet_onnx(): + + # Download model weights + url = "https://github.com/onnx/models/raw/main/validated/vision/classification/resnet/model/resnet50-v1-7.onnx?download=" + load_path = "cv_demos/resnet/" + url.split("/")[-1] + response = requests.get(url, stream=True) + with open(load_path, "wb") as f: + f.write(response.content) + + # Load ResNet feature extractor and model checkpoint from HuggingFace + model = onnx.load(load_path) + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() # load global compiler config object + compiler_cfg.balancer_policy = "CNN" + compiler_cfg.enable_t_streaming = True + compiler_cfg.enable_auto_fusing = False + os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" + + # Load data sample + url = "https://datasets-server.huggingface.co/assets/imagenet-1k/--/default/train/18/image/image.jpg" + image = Image.open(requests.get(url, stream=True).raw) + label = "tiger" + + # Data preprocessing + pixel_values = preprocess(image) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference( + pybuda.OnnxModule("onnx_resnet50", model, load_path), + inputs=[(pixel_values,)], + ) + output = output_q.get() + + # Data postprocessing + top1_prob, top1_catid = postprocess(output[0].value()[0]) + + # Get ImageNet class mappings + url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" + image_classes = urllib.request.urlopen(url) + categories = [s.decode("utf-8").strip() for s in image_classes.readlines()] + predicted_label = categories[top1_catid] + + # Results + print(f"True Label: {label} | Predicted Label: {predicted_label} | Predicted Probability: {top1_prob.item():.2f}") + + # Remove weight file + os.remove(load_path) + + +if __name__ == "__main__": + run_resnet_onnx() diff --git a/model_demos/cv_demos/retinanet/onnx_retinanet_r101.py b/model_demos/cv_demos/retinanet/onnx_retinanet_r101.py new file mode 100644 index 00000000..7e3e4945 --- /dev/null +++ b/model_demos/cv_demos/retinanet/onnx_retinanet_r101.py @@ -0,0 +1,81 @@ +# import PyBuda library + +import os + +import numpy as np +import onnx +import pybuda +import requests +import torch +from PIL import Image + + +def img_preprocess(scal_val=1): + url = "http://images.cocodataset.org/val2017/000000039769.jpg" + pil_img = Image.open(requests.get(url, stream=True).raw) + scale = scal_val + w, h = pil_img.size + print("----", w, h) + newW, newH = int(scale * w), int(scale * h) + newW, newH = 640, 480 + assert newW > 0 and newH > 0, "Scale is too small, resized images would have no pixel" + pil_img = pil_img.resize((newW, newH), resample=Image.BICUBIC) + img = np.asarray(pil_img, dtype=np.float32) + if img.ndim == 2: + img = img[np.newaxis, ...] + else: + img = img.transpose((2, 0, 1)) + if (img > 1).any(): + img = img / 255.0 + img = torch.from_numpy(img) + img = img.unsqueeze(0) + return img + + +def run_retinanet_r101_640x480_onnx(): + + # Set PyBuda configuration parameters + os.environ["PYBUDA_DECOMPOSE_SIGMOID"] = "1" + os.environ["PYBUDA_DISABLE_CONV_MULTI_OP_FRACTURE"] = "1" + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{75*1024}" + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] == pybuda.BackendDevice.Grayskull: + os.environ["PYBUDA_RIBBON2"] = "1" + os.environ["PYBUDA_FORCE_EMULATE_HARVESTED"] = "1" + + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + compiler_cfg.graph_solver_self_cut_type = "ConsumerOperandDataEdgesFirst" + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + compiler_cfg.enable_auto_fusing = False + compiler_cfg.conv_multi_op_fracture_factor_override["conv2d_356"] = 3 + + # Download model weights + url = "https://github.com/onnx/models/raw/main/validated/vision/object_detection_segmentation/retinanet/model/retinanet-9.onnx?download=" + load_path = "cv_demos/retinanet/" + url.split("/")[-1] + response = requests.get(url, stream=True) + with open(load_path, "wb") as f: + f.write(response.content) + + # Create PyBuda module from PyTorch model + model = onnx.load(load_path) + tt_model = pybuda.OnnxModule("onnx_retinanet", model, load_path) + + # Image preprocessing + img_tensor = img_preprocess() + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([img_tensor])) + output = output_q.get() + + # Print outputs + print(output) + + # Remove weight file + os.remove(load_path) + + +if __name__ == "__main__": + run_retinanet_r101_640x480_onnx() diff --git a/model_demos/cv_demos/stable_diffusion/pytorch_stable_diffusion.py b/model_demos/cv_demos/stable_diffusion/pytorch_stable_diffusion.py index 7f20b359..6f8ce01d 100644 --- a/model_demos/cv_demos/stable_diffusion/pytorch_stable_diffusion.py +++ b/model_demos/cv_demos/stable_diffusion/pytorch_stable_diffusion.py @@ -4,7 +4,6 @@ from typing import List, Optional, Union import pybuda -import pytest import torch from diffusers import StableDiffusionPipeline from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput @@ -236,8 +235,8 @@ def run_stable_diffusion_pytorch(variant="CompVis/stable-diffusion-v1-4"): available_devices = pybuda.detect_available_devices() if available_devices: if available_devices[0] == pybuda._C.backend_api.BackendDevice.Grayskull: - pytest.skip("Model not supported on Grayskull") raise NotImplementedError("Model not supported on Grayskull") + # Set inference steps num_inference_steps = 50 diff --git a/model_demos/cv_demos/unet/pytorch_unet_torchhub.py b/model_demos/cv_demos/unet/pytorch_unet_torchhub.py index 2375e3dd..0530e883 100644 --- a/model_demos/cv_demos/unet/pytorch_unet_torchhub.py +++ b/model_demos/cv_demos/unet/pytorch_unet_torchhub.py @@ -15,7 +15,7 @@ def run_unet_torchhub_pytorch(): # Set PyBuda configuration parameters compiler_cfg = pybuda.config._get_global_compiler_config() - compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.balancer_policy = "CNN" compiler_cfg.enable_t_streaming = True compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" diff --git a/model_demos/cv_demos/vgg/pytorch_vgg_hf.py b/model_demos/cv_demos/vgg/pytorch_vgg_hf.py index adf6e267..725ed201 100644 --- a/model_demos/cv_demos/vgg/pytorch_vgg_hf.py +++ b/model_demos/cv_demos/vgg/pytorch_vgg_hf.py @@ -25,6 +25,9 @@ def run_vgg_19_hf_pytorch(variant="vgg19"): if available_devices: if available_devices[0] == BackendDevice.Grayskull: os.environ["PYBUDA_FORCE_EMULATE_HARVESTED"] = "1" + else: + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "65536" + os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1" """ # https://pypi.org/project/vgg-pytorch/ diff --git a/model_demos/cv_demos/vgg/pytorch_vgg_osmr.py b/model_demos/cv_demos/vgg/pytorch_vgg_osmr.py index 5e879c96..d01cdf89 100644 --- a/model_demos/cv_demos/vgg/pytorch_vgg_osmr.py +++ b/model_demos/cv_demos/vgg/pytorch_vgg_osmr.py @@ -22,10 +22,13 @@ def run_vgg_osmr_pytorch(variant="vgg11"): # Device specific configurations available_devices = pybuda.detect_available_devices() - if variant in ["vgg11", "vgg13", "vgg16", "vgg19", "bn_vgg19", "bn_vgg19b"]: - if available_devices: - if available_devices[0] == BackendDevice.Grayskull: + if available_devices: + if available_devices[0] == BackendDevice.Grayskull: + if variant in ["vgg11", "vgg13", "vgg16", "vgg19", "bn_vgg19", "bn_vgg19b"]: os.environ["PYBUDA_FORCE_EMULATE_HARVESTED"] = "1" + else: + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "65536" + os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1" # Create PyBuda module from PyTorch model model_ckpt = variant diff --git a/model_demos/cv_demos/vgg/pytorch_vgg_timm.py b/model_demos/cv_demos/vgg/pytorch_vgg_timm.py index a64cc4ae..4ad29bb2 100644 --- a/model_demos/cv_demos/vgg/pytorch_vgg_timm.py +++ b/model_demos/cv_demos/vgg/pytorch_vgg_timm.py @@ -1,5 +1,6 @@ # VGG +import os import urllib import pybuda @@ -7,6 +8,7 @@ import timm import torch from PIL import Image +from pybuda._C.backend_api import BackendDevice from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform @@ -49,6 +51,13 @@ def run_vgg_bn19_timm_pytorch(variant="vgg19_bn"): compiler_cfg.balancer_policy = "Ribbon" compiler_cfg.default_df_override = pybuda._C.DataFormat.Float16_b + # Device specific configurations + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] == BackendDevice.Wormhole_B0: + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "65536" + os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1" + # Create PyBuda module from PyTorch model tt_model = pybuda.PyTorchModule(model_name + "_timm_pt", model) diff --git a/model_demos/cv_demos/vgg/pytorch_vgg_torchhub.py b/model_demos/cv_demos/vgg/pytorch_vgg_torchhub.py index 9a8aeb63..82de2740 100644 --- a/model_demos/cv_demos/vgg/pytorch_vgg_torchhub.py +++ b/model_demos/cv_demos/vgg/pytorch_vgg_torchhub.py @@ -24,6 +24,9 @@ def run_vgg_bn19_torchhub_pytorch(variant="vgg19_bn"): if available_devices: if available_devices[0] == BackendDevice.Grayskull: os.environ["PYBUDA_FORCE_EMULATE_HARVESTED"] = "1" + else: + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "65536" + os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1" # Create PyBuda module from PyTorch model model = torch.hub.load("pytorch/vision:v0.10.0", variant, pretrained=True) diff --git a/model_demos/cv_demos/vilt/pytorch_vilt_maskedlm.py b/model_demos/cv_demos/vilt/pytorch_vilt_maskedlm.py new file mode 100644 index 00000000..fa8228a1 --- /dev/null +++ b/model_demos/cv_demos/vilt/pytorch_vilt_maskedlm.py @@ -0,0 +1,73 @@ +import os + +import pybuda +import requests +import torch +from PIL import Image +from transformers import ViltConfig, ViltForMaskedLM, ViltProcessor + +from .vilt_model import ViLtEmbeddingWrapper, ViltModelWrapper + + +def run_vilt_maskedlm_pytorch(variant="dandelin/vilt-b32-mlm"): + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.enable_t_streaming = True + compiler_cfg.default_df_override = pybuda._C.DataFormat.Float16_b + compiler_cfg.balancer_policy = "Ribbon" + + os.environ["PYBUDA_FORCE_EMULATE_HARVESTED"] = "1" + os.environ["PYBUDA_RIBBON2"] = "1" + + # Sample Image + url = "http://images.cocodataset.org/val2017/000000039769.jpg" + sample_image = Image.open(requests.get(url, stream=True).raw) + + # Sample text + sample_text = "a bunch of cats laying on a [MASK]." + + model_ckpt = variant + + # Set model configurations + config = ViltConfig.from_pretrained(model_ckpt) + config_dict = config.to_dict() + config_dict["return_dict"] = False + config = ViltConfig(**config_dict) + + # Load model and processor from HuggingFace + processor = ViltProcessor.from_pretrained(model_ckpt) + model = ViltForMaskedLM.from_pretrained(model_ckpt, config=config) + model.eval() + + # prepare inputs + encoding = processor(sample_image, sample_text, return_tensors="pt") + + # Wrapper + text_vision_embedding_model = ViLtEmbeddingWrapper(model) + vilt_model = ViltModelWrapper(model=model, task="maskedlm", text_seq_len=encoding["input_ids"].shape[1]) + + embedding_output, attention_mask = text_vision_embedding_model(**encoding) + + tt0 = pybuda.TTDevice("tt0", module=pybuda.PyTorchModule("pt_vilt_maskedlm", vilt_model)) + tt0.push_to_inputs((embedding_output.detach().cpu(), attention_mask.detach().cpu().to(torch.float32))) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(_sequential=True) + mlm_logits = output_q.get()[0].value().detach().float() + + # PostProcessing + input_ids = encoding["input_ids"][0][1:-1] + mlm_logits = mlm_logits[0, 1 : encoding.input_ids.shape[1] - 1, :] + + mlm_values, mlm_ids = mlm_logits.softmax(dim=-1).max(dim=-1) + mlm_values[input_ids != 103] = 0 + select = mlm_values.argmax().item() + inferred_token = processor.decode(mlm_ids[select].item()) + + # Model Output (i.e Masked token: Couch) + print("Masked token: ", inferred_token) + + +if __name__ == "__main__": + run_vilt_maskedlm_pytorch() diff --git a/model_demos/cv_demos/vilt/pytorch_vilt_question_answering.py b/model_demos/cv_demos/vilt/pytorch_vilt_question_answering.py new file mode 100644 index 00000000..991b3a8e --- /dev/null +++ b/model_demos/cv_demos/vilt/pytorch_vilt_question_answering.py @@ -0,0 +1,66 @@ +import os + +import pybuda +import requests +import torch +from PIL import Image +from transformers import ViltConfig, ViltForQuestionAnswering, ViltProcessor + +from .vilt_model import ViLtEmbeddingWrapper, ViltModelWrapper + + +def run_vilt_for_question_answering_pytorch(variant="dandelin/vilt-b32-finetuned-vqa"): + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.enable_t_streaming = True + compiler_cfg.default_df_override = pybuda._C.DataFormat.Float16_b + compiler_cfg.balancer_policy = "Ribbon" + + os.environ["PYBUDA_FORCE_EMULATE_HARVESTED"] = "1" + os.environ["PYBUDA_RIBBON2"] = "1" + + # Sample Image + url = "http://images.cocodataset.org/val2017/000000039769.jpg" + sample_image = Image.open(requests.get(url, stream=True).raw) + + # Sample text + sample_text = "How many cats are there?" + + model_ckpt = variant + + # Set model configurations + config = ViltConfig.from_pretrained(model_ckpt) # matmul_2008 + config_dict = config.to_dict() + config_dict["return_dict"] = False + config = ViltConfig(**config_dict) + + # Load model and processor from HuggingFace + processor = ViltProcessor.from_pretrained(model_ckpt) + model = ViltForQuestionAnswering.from_pretrained(model_ckpt, config=config) + model.eval() + + # Sample inputs + encoding = processor(sample_image, sample_text, return_tensors="pt") + + # Wrapper + text_vision_embedding_model = ViLtEmbeddingWrapper(model) + viltquestionanswering_model = ViltModelWrapper(model, task="qa") + + embedding_output, attention_mask = text_vision_embedding_model(**encoding) + + tt0 = pybuda.TTDevice("tt0", module=pybuda.PyTorchModule("pt_viltquestionanswering", viltquestionanswering_model)) + + tt0.push_to_inputs(embedding_output.detach().cpu(), attention_mask.detach().cpu().to(torch.float32)) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(_sequential=True) + + # Model output (i.e Predicted answer: 2) + output = output_q.get()[0].value().detach().float() + idx = output.argmax(-1).item() + print("Predicted answer: ", model.config.id2label[idx]) + + +if __name__ == "__main__": + run_vilt_for_question_answering_pytorch() diff --git a/model_demos/cv_demos/vilt/vilt_model.py b/model_demos/cv_demos/vilt/vilt_model.py new file mode 100644 index 00000000..d1ac342e --- /dev/null +++ b/model_demos/cv_demos/vilt/vilt_model.py @@ -0,0 +1,83 @@ +import torch + + +class ViLtEmbeddingWrapper(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.vilt_model = model + + def forward( + self, + input_ids=None, + attention_mask=None, + token_type_ids=None, + pixel_values=None, + pixel_mask=None, + inputs_embeds=None, + image_embeds=None, + image_token_type_idx=None, + ): + + embeddings, masks = self.vilt_model.vilt.embeddings( + input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + pixel_values=pixel_values, + pixel_mask=pixel_mask, + inputs_embeds=inputs_embeds, + image_embeds=image_embeds, + image_token_type_idx=image_token_type_idx, + ) + return embeddings, masks + + +class ViltModelWrapper(torch.nn.Module): + def __init__(self, model, task=None, text_seq_len=None): + super().__init__() + self.vilt_model = model + self.task = task + self.text_seq_len = text_seq_len + + def forward(self, embedding_output, attention_mask, head_mask=None): + + head_mask = self.vilt_model.vilt.get_head_mask(head_mask, self.vilt_model.vilt.config.num_hidden_layers) + + extended_attention_mask = attention_mask[:, None, None, :] + extended_attention_mask = (1.0 - extended_attention_mask) * torch.finfo(torch.float32).min + + encoder_outputs = self.vilt_model.vilt.encoder( + embedding_output, + attention_mask=extended_attention_mask, + head_mask=head_mask, + return_dict=False, + ) + + sequence_output = encoder_outputs[0] + + sequence_output = self.vilt_model.vilt.layernorm(sequence_output) + pooled_output = ( + self.vilt_model.vilt.pooler(sequence_output) if self.vilt_model.vilt.pooler is not None else None + ) + + viltmodel_output = (sequence_output, pooled_output) + encoder_outputs[1:] + + sequence_output, pooled_output = viltmodel_output[:2] + + if self.task == "maskedlm": + + if self.text_seq_len is None: + raise ValueError("You cannot must provide text sequence length") + + text_features, _ = (sequence_output[:, : self.text_seq_len], sequence_output[:, self.text_seq_len :]) + + mlm_logits = self.vilt_model.mlm_score(text_features) + + viltmodel_output = (mlm_logits,) + viltmodel_output[2:] + + if self.task == "qa": + + logits = self.vilt_model.classifier(pooled_output) + + viltmodel_output = (logits,) + viltmodel_output[2:] + + return viltmodel_output diff --git a/model_demos/cv_demos/wideresnet/pytorch_wideresnet_timm.py b/model_demos/cv_demos/wideresnet/pytorch_wideresnet_timm.py new file mode 100644 index 00000000..3fa3f2b4 --- /dev/null +++ b/model_demos/cv_demos/wideresnet/pytorch_wideresnet_timm.py @@ -0,0 +1,62 @@ +# Wideresnet + +import os +import urllib + +import pybuda +import requests +import timm +import torch +from PIL import Image +from timm.data import resolve_data_config +from timm.data.transforms_factory import create_transform + + +def run_wideresnet_timm_pytorch(variant="wide_resnet50_2"): + """ + Variants = { + 'wide_resnet50_2', + 'wide_resnet101_2' + } + """ + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() # load global compiler config object + compiler_cfg.enable_t_streaming = True + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + os.environ["PYBUDA_RIBBON2"] = "1" + + model_name = variant + model = timm.create_model(model_name, pretrained=True) + + config = resolve_data_config({}, model=model) + transform = create_transform(**config) + + url = "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg" + img = Image.open(requests.get(url, stream=True).raw).convert("RGB") + tensor = transform(img).unsqueeze(0) # transform and add batch dimension + + # Get imagenet class mappings + url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" + image_classes = urllib.request.urlopen(url) + categories = [s.decode("utf-8").strip() for s in image_classes.readlines()] + + # Create PyBuda module from PyTorch model + tt_model = pybuda.PyTorchModule(model_name + "_timm_pt", model) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([tensor])) + output = output_q.get()[0].value() + + # Postprocessing + probabilities = torch.nn.functional.softmax(output[0], dim=0) + + # Print top categories per image + top5_prob, top5_catid = torch.topk(probabilities, 5) + for i in range(top5_prob.size(0)): + print(categories[top5_catid[i]], top5_prob[i].item()) + + +if __name__ == "__main__": + run_wideresnet_timm_pytorch() diff --git a/model_demos/cv_demos/wideresnet/pytorch_wideresnet_torchhub.py b/model_demos/cv_demos/wideresnet/pytorch_wideresnet_torchhub.py new file mode 100644 index 00000000..608d1203 --- /dev/null +++ b/model_demos/cv_demos/wideresnet/pytorch_wideresnet_torchhub.py @@ -0,0 +1,64 @@ +# Wideresnet Demo Script + +import os +import urllib + +import pybuda +import requests +import torch +from PIL import Image +from torchvision import transforms + + +def run_wideresnet_torchhub_pytorch(variant="wide_resnet50_2"): + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() # load global compiler config object + compiler_cfg.enable_t_streaming = True + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + os.environ["PYBUDA_RIBBON2"] = "1" + + # Create PyBuda module from PyTorch model + model = torch.hub.load("pytorch/vision:v0.10.0", variant, pretrained=True) + + model_name = f"pt_{variant}" + + tt_model = pybuda.PyTorchModule(model_name, model) + + url = "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg" + input_image = Image.open(requests.get(url, stream=True).raw).convert("RGB") + + # preprocessing + preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) + input_tensor = preprocess(input_image) + input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([input_batch])) + output = output_q.get()[0].value() + + # Data postprocessing + probabilities = torch.nn.functional.softmax(output[0], dim=0) + + # Get imagenet class mappings + url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" + image_classes = urllib.request.urlopen(url) + categories = [s.decode("utf-8").strip() for s in image_classes.readlines()] + + # Print top categories per image + top5_prob, top5_catid = torch.topk(probabilities, 5) + result = {} + for i in range(top5_prob.size(0)): + result[categories[top5_catid[i]]] = top5_prob[i].item() + print(result) + + +if __name__ == "__main__": + run_wideresnet_torchhub_pytorch() diff --git a/model_demos/cv_demos/xception/timm_xception.py b/model_demos/cv_demos/xception/timm_xception.py new file mode 100644 index 00000000..956a7266 --- /dev/null +++ b/model_demos/cv_demos/xception/timm_xception.py @@ -0,0 +1,76 @@ +# Xception + +import os +import urllib + +import pybuda +import requests +import timm +import torch +from PIL import Image +from pybuda._C.backend_api import BackendDevice +from timm.data import resolve_data_config +from timm.data.transforms_factory import create_transform + + +def run_xception_timm(variant="xception"): + """ + Variants = { + 'xception', + 'xception41', + 'xception65', + 'xception71' + } + """ + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() # load global compiler config object + compiler_cfg.enable_t_streaming = True + compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b + compiler_cfg.balancer_policy = "Ribbon" + os.environ["PYBUDA_RIBBON2"] = "1" + os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" + available_devices = pybuda.detect_available_devices() + + if variant == "xception": + if available_devices[0] == BackendDevice.Wormhole_B0: + compiler_cfg.balancer_policy = "CNN" + elif available_devices[0] == BackendDevice.Grayskull: + compiler_cfg.amp_level = 1 + compiler_cfg.place_on_new_epoch("relu_74") + os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1" + os.environ["PYBUDA_PAD_SPARSE_MM"] = "{43:48}" + + model_name = variant + model = timm.create_model(model_name, pretrained=True) + + # preprocessing + config = resolve_data_config({}, model=model) + transform = create_transform(**config) + url = "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg" + img = Image.open(requests.get(url, stream=True).raw).convert("RGB") + tensor = transform(img).unsqueeze(0) # transform and add batch dimension + + # Create PyBuda module from PyTorch model + tt_model = pybuda.PyTorchModule(f"{variant}_timm_pt", model) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([tensor])) + output = output_q.get()[0].value() + + # postprocessing + probabilities = torch.nn.functional.softmax(output[0], dim=0) + + # Get imagenet class mappings + url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt" + image_classes = urllib.request.urlopen(url) + categories = [s.decode("utf-8").strip() for s in image_classes.readlines()] + + # Print top categories per image + top5_prob, top5_catid = torch.topk(probabilities, 5) + for i in range(top5_prob.size(0)): + print(categories[top5_catid[i]], top5_prob[i].item()) + + +if __name__ == "__main__": + run_xception_timm() diff --git a/model_demos/cv_demos/yolo_v3/holli_src/utils.py b/model_demos/cv_demos/yolo_v3/holli_src/utils.py new file mode 100644 index 00000000..bfbcb5a2 --- /dev/null +++ b/model_demos/cv_demos/yolo_v3/holli_src/utils.py @@ -0,0 +1,256 @@ +import math + +import numpy as np +import PIL +import torch +from matplotlib import patches, patheffects +from matplotlib import pyplot as plt +from PIL import Image + + +def nms(boxes, nms_thresh): + if len(boxes) == 0: + return boxes + + confs = [(1 - b[4]) for b in boxes] + sorted_idx = np.argsort(confs) + out_boxes = [] + + for i in range(len(boxes)): + box_i = boxes[sorted_idx[i]] + if confs[i] > -1: + out_boxes.append(box_i) + for j in range(i + 1, len(boxes)): + if confs[j] > -1: + box_j = boxes[sorted_idx[j]] + if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh: + confs[j] = -1 + return out_boxes + + +def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=True): + model.eval() + img = image2torch(img) + img = img.to(torch.device("cuda" if use_cuda else "cpu")) + all_boxes = model.predict_img(img)[0] + boxes = nms(all_boxes, nms_thresh) + return boxes + + +def image2torch(img): + if isinstance(img, Image.Image): + width = img.width + height = img.height + img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())) + img = img.view(height, width, 3).transpose(0, 1).transpose(0, 2).contiguous() + img = img.view(1, 3, height, width) + img = img.float().div(255.0) + elif type(img) == np.ndarray: # cv2 image + img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0) + else: + print("unknown image type") + exit(-1) + return img + + +def bbox_iou(box1, box2, x1y1x2y2=True): + if x1y1x2y2: + x1_min = min(box1[0], box2[0]) + x2_max = max(box1[2], box2[2]) + y1_min = min(box1[1], box2[1]) + y2_max = max(box1[3], box2[3]) + w1, h1 = box1[2] - box1[0], box1[3] - box1[1] + w2, h2 = box2[2] - box2[0], box2[3] - box2[1] + else: + w1, h1 = box1[2], box1[3] + w2, h2 = box2[2], box2[3] + x1_min = min(box1[0] - w1 / 2.0, box2[0] - w2 / 2.0) + x2_max = max(box1[0] + w1 / 2.0, box2[0] + w2 / 2.0) + y1_min = min(box1[1] - h1 / 2.0, box2[1] - h2 / 2.0) + y2_max = max(box1[1] + h1 / 2.0, box2[1] + h2 / 2.0) + + w_union = x2_max - x1_min + h_union = y2_max - y1_min + w_cross = w1 + w2 - w_union + h_cross = h1 + h2 - h_union + carea = 0 + if w_cross <= 0 or h_cross <= 0: + return 0.0 + + area1 = w1 * h1 + area2 = w2 * h2 + carea = w_cross * h_cross + uarea = area1 + area2 - carea + return float(carea / uarea) + + +def multi_bbox_ious(boxes1, boxes2, x1y1x2y2=True): + if x1y1x2y2: + x1_min = torch.min(boxes1[0], boxes2[0]) + x2_max = torch.max(boxes1[2], boxes2[2]) + y1_min = torch.min(boxes1[1], boxes2[1]) + y2_max = torch.max(boxes1[3], boxes2[3]) + w1, h1 = boxes1[2] - boxes1[0], boxes1[3] - boxes1[1] + w2, h2 = boxes2[2] - boxes2[0], boxes2[3] - boxes2[1] + else: + w1, h1 = boxes1[2], boxes1[3] + w2, h2 = boxes2[2], boxes2[3] + x1_min = torch.min(boxes1[0] - w1 / 2.0, boxes2[0] - w2 / 2.0) + x2_max = torch.max(boxes1[0] + w1 / 2.0, boxes2[0] + w2 / 2.0) + y1_min = torch.min(boxes1[1] - h1 / 2.0, boxes2[1] - h2 / 2.0) + y2_max = torch.max(boxes1[1] + h1 / 2.0, boxes2[1] + h2 / 2.0) + + w_union = x2_max - x1_min + h_union = y2_max - y1_min + w_cross = w1 + w2 - w_union + h_cross = h1 + h2 - h_union + mask = ((w_cross <= 0) + (h_cross <= 0)) > 0 + area1 = w1 * h1 + area2 = w2 * h2 + carea = w_cross * h_cross + carea[mask] = 0 + uarea = area1 + area2 - carea + return carea / uarea + + +# Plotting helpers + +# e.g. plot_multi_detections(img_tensor, model.predict_img(img_tensor)) +def plot_multi_detections(imgs, results, figsize=None, **kwargs): + if not figsize: + figsize = (12, min(math.ceil(len(imgs) / 3) * 4, 30)) + _, axes = plt.subplots(math.ceil(len(imgs) / 3), 3, figsize=figsize) + + if type(imgs) == np.ndarray and len(imgs.shape) == 4: + imgs = [imgs] + + classes = [] + boxes = [] + extras = [] + for r in results: + res = np.array([[float(b) for b in arr] for arr in r]) + if len(res) > 0: + cla = res[:, -1].astype(int) + b = res[:, 0:4] + e = ["{:.2f} ({:.2f})".format(float(y[4]), float(y[5])) for y in res] + else: + cla, b, e = [], [], [] + classes.append(cla) + boxes.append(b) + extras.append(e) + + for j, ax in enumerate(axes.flat): + if j >= len(imgs): + # break + plt.delaxes(ax) + else: + plot_img_boxes(imgs[j], boxes[j], classes[j], extras[j], plt_ax=ax, **kwargs) + + plt.tight_layout() + + +def plot_img_detections(img, result_boxes, **kwargs): + b = np.array(result_boxes) + if len(b) > 0: + classes = b[:, -1].astype(int) + boxes = b[:, 0:4] + else: + classes, boxes = [], [] + extras = ["{:.2f} ({:.2f})".format(b[4], b[5]) for b in result_boxes] + return plot_img_boxes(img, boxes, classes, extras=extras, **kwargs) + + +def plot_img_data(x, y, rows=2, figsize=(12, 8), **kwargs): + _, axes = plt.subplots(rows, 3, figsize=figsize) + + for j, ax in enumerate(axes.flat): + if j >= len(y): + break + targets = y[j] + if isinstance(targets, torch.Tensor): + targets = targets.clone().reshape(-1, 5) + classes = targets[:, 0].cpu().numpy().astype(int) + else: + classes = targets[:, 0].astype(int) + plot_img_boxes(x[j], targets[:, 1:], classes, plt_ax=ax, **kwargs) + + plt.tight_layout() + + +def plot_img_boxes( + img, + boxes, + classes, + extras=None, + plt_ax=None, + figsize=None, + class_names=None, + real_pixels=False, + box_centered=True, +): + if not plt_ax: + _, plt_ax = plt.subplots(figsize=figsize) + colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]]) + + if type(img) == PIL.Image.Image: + width = img.width + height = img.height + elif type(img) in [torch.Tensor, np.ndarray]: + if type(img) == torch.Tensor: + img = img.clone().cpu().numpy() + width = img.shape[2] + height = img.shape[1] + img = img.transpose(1, 2, 0) + if (img < 1.01).all() and (img >= 0).all(): + img = img.clip(0, 1) # avoid "Clipping input data to the valid range" warning after tensor roundings + else: + raise (f"Unkown type for image: {type(img)}") + + if len(boxes) > 0 and not real_pixels: + boxes[:, 0] *= width + boxes[:, 2] *= width + boxes[:, 1] *= height + boxes[:, 3] *= height + + for i in range(len(boxes)): + b, class_id = boxes[i], classes[i] + if b[0] == 0: + break + + color = colors[class_id % len(colors)] + + if box_centered: + x, y = (b[0] - b[2] / 2, b[1] - b[3] / 2) + w, h = (b[2], b[3]) + else: + x, y = b[0], b[1] + w, h = b[2], b[3] + + patch = plt_ax.add_patch(patches.Rectangle([x, y], w, h, fill=False, edgecolor=color, lw=2)) + patch.set_path_effects( + [ + patheffects.Stroke(linewidth=3, foreground="black", alpha=0.5), + patheffects.Normal(), + ] + ) + + s = class_names[class_id] if class_names else str(class_id) + if extras: + s += "\n" + str(extras[i]) + patch = plt_ax.text( + x + 2, + y, + s, + verticalalignment="top", + color=color, + fontsize=16, + weight="bold", + ) + patch.set_path_effects( + [ + patheffects.Stroke(linewidth=1, foreground="black", alpha=0.5), + patheffects.Normal(), + ] + ) + + _ = plt_ax.imshow(img) diff --git a/model_demos/cv_demos/yolo_v3/holli_src/yolo_layer.py b/model_demos/cv_demos/yolo_v3/holli_src/yolo_layer.py new file mode 100644 index 00000000..ddd4a268 --- /dev/null +++ b/model_demos/cv_demos/yolo_v3/holli_src/yolo_layer.py @@ -0,0 +1,227 @@ +import math + +import numpy as np +import torch +import torch.nn as nn + +from .utils import multi_bbox_ious + + +class YoloLayer(nn.Module): + def __init__(self, anchors, stride, num_classes): + super().__init__() + self.anchors, self.stride = np.array(anchors), stride + self.num_classes = num_classes + + def get_masked_anchors(self): + return self.anchors / self.stride + + def get_region_boxes(self, output, conf_thresh): + if output.dim() == 3: + output = output.unsqueeze(0) + device = output.device # torch.device(torch_device) + anchors = torch.from_numpy(self.get_masked_anchors().astype(np.float32)).to(device) + + nB = output.size(0) + nA = len(anchors) + nC = self.num_classes + nH = output.size(2) + nW = output.size(3) + cls_anchor_dim = nB * nA * nH * nW + + assert output.size(1) == (5 + nC) * nA + + output = output.view(nB * nA, 5 + nC, nH * nW).transpose(0, 1).contiguous().view(5 + nC, cls_anchor_dim) + + grid_x = torch.linspace(0, nW - 1, nW).repeat(nB * nA, nH, 1).view(cls_anchor_dim).to(device) + grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(cls_anchor_dim).to(device) + ix = torch.LongTensor(range(0, 2)).to(device) + anchor_w = anchors.index_select(1, ix[0]).repeat(1, nB, nH * nW).view(cls_anchor_dim) + anchor_h = anchors.index_select(1, ix[1]).repeat(1, nB, nH * nW).view(cls_anchor_dim) + + xs, ys = ( + torch.sigmoid(output[0]) + grid_x, + torch.sigmoid(output[1]) + grid_y, + ) + ws, hs = ( + torch.exp(output[2]) * anchor_w.detach(), + torch.exp(output[3]) * anchor_h.detach(), + ) + det_confs = torch.sigmoid(output[4]) + + cls_confs = torch.nn.Softmax(dim=1)(output[5 : 5 + nC].transpose(0, 1)).detach() + cls_max_confs, cls_max_ids = torch.max(cls_confs, 1) + cls_max_confs = cls_max_confs.view(-1) + cls_max_ids = cls_max_ids.view(-1) + + det_confs = det_confs.to("cpu") # , non_blocking=True for torch 4.1? + cls_max_confs = cls_max_confs.to("cpu") + cls_max_ids = cls_max_ids.to("cpu") + xs, ys = xs.to("cpu"), ys.to("cpu") + ws, hs = ws.to("cpu"), hs.to("cpu") + + all_boxes = [[] for i in range(nB)] + + inds = torch.LongTensor(range(0, len(det_confs))) + for ind in inds[det_confs > conf_thresh]: + bcx = xs[ind] + bcy = ys[ind] + bw = ws[ind] + bh = hs[ind] + box = [ + bcx / nW, + bcy / nH, + bw / nW, + bh / nH, + det_confs[ind], + cls_max_confs[ind], + cls_max_ids[ind], + ] + box = [i.item() for i in box] + + batch = math.ceil(ind / (nA * nH * nW)) + all_boxes[batch].append(box) + + return all_boxes + + def build_targets(self, pred_boxes, target, anchors, nH, nW): + self.ignore_thresh = 0.5 + self.truth_thresh = 1.0 + + # Works faster on CPU than on GPU. + devi = torch.device("cpu") + pred_boxes = pred_boxes.to(devi) + target = target.to(devi) + anchors = anchors.to(devi) + + nB = target.size(0) + nA = len(anchors) + + anchor_step = anchors.size(1) # anchors[nA][anchor_step] + conf_mask = torch.ones(nB, nA, nH, nW) + coord_mask = torch.zeros(nB, nA, nH, nW) + cls_mask = torch.zeros(nB, nA, nH, nW) + tcoord = torch.zeros(4, nB, nA, nH, nW) + tconf = torch.zeros(nB, nA, nH, nW) + tcls = torch.zeros(nB, nA, nH, nW) + twidth, theight = nW, nH + nAnchors = nA * nH * nW + + for b in range(nB): + cur_pred_boxes = pred_boxes[b * nAnchors : (b + 1) * nAnchors].t() + cur_ious = torch.zeros(nAnchors) + tbox = target[b].view(-1, 5) + + # If the bounding box prior is not the best but does overlap a ground truth object by + # more than some threshold we ignore the prediction (conf_mask) + for t in range(tbox.size(0)): + if tbox[t][1] == 0: + break + gx, gy = tbox[t][1] * nW, tbox[t][2] * nH + gw, gh = tbox[t][3] * twidth, tbox[t][4] * theight + cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t() + cur_ious = torch.max( + cur_ious, + multi_bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False), + ) + ignore_ix = cur_ious > self.ignore_thresh + conf_mask[b][ignore_ix.view(nA, nH, nW)] = 0 + + for t in range(tbox.size(0)): + if tbox[t][1] == 0: + break + gx, gy = tbox[t][1] * nW, tbox[t][2] * nH + gw, gh = tbox[t][3] * twidth, tbox[t][4] * theight + gw, gh = gw.float(), gh.float() + gi, gj = int(gx), int(gy) + + tmp_gt_boxes = torch.FloatTensor([0, 0, gw, gh]).repeat(nA, 1).t() + anchor_boxes = torch.cat((torch.zeros(nA, anchor_step), anchors), 1).t() + _, best_n = torch.max( + multi_bbox_ious(tmp_gt_boxes, anchor_boxes, x1y1x2y2=False), + 0, + ) + + coord_mask[b][best_n][gj][gi] = 1 + cls_mask[b][best_n][gj][gi] = 1 + conf_mask[b][best_n][gj][gi] = 1 + tcoord[0][b][best_n][gj][gi] = gx - gi + tcoord[1][b][best_n][gj][gi] = gy - gj + tcoord[2][b][best_n][gj][gi] = math.log(gw / anchors[best_n][0]) + tcoord[3][b][best_n][gj][gi] = math.log(gh / anchors[best_n][1]) + tcls[b][best_n][gj][gi] = tbox[t][0] + tconf[b][best_n][gj][gi] = 1 # yolov1 would have used iou-value here + + return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls + + def get_loss(self, output, target, return_single_value=True): + device = output.device + + anchors = torch.from_numpy(self.get_masked_anchors().astype(np.float32)).to(device) + + nB = output.data.size(0) # batch size + nA = len(anchors) + nC = self.num_classes + nH = output.data.size(2) + nW = output.data.size(3) + cls_anchor_dim = nB * nA * nH * nW + + output = output.view(nB, nA, (5 + nC), nH, nW) + + ix = torch.LongTensor(range(0, 5)).to(device) + coord = ( + output.index_select(2, ix[0:4]) + .view(nB * nA, -1, nH * nW) + .transpose(0, 1) + .contiguous() + .view(4, cls_anchor_dim) + ) # x, y, w, h + coord[0:2] = coord[0:2].sigmoid() # x, y: bx = σ(tx) (+ cx) + conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid() + + grid_x = torch.linspace(0, nW - 1, nW).repeat(nB * nA, nH, 1).view(cls_anchor_dim).to(device) + grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(cls_anchor_dim).to(device) + anchor_w = anchors.index_select(1, ix[0]).repeat(1, nB * nH * nW).view(cls_anchor_dim) + anchor_h = anchors.index_select(1, ix[1]).repeat(1, nB * nH * nW).view(cls_anchor_dim) + + pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(device) + pred_boxes[0] = coord[0] + grid_x # bx = σ(tx) + cx + pred_boxes[1] = coord[1] + grid_y + pred_boxes[2] = coord[2].exp() * anchor_w # pw*e(tw) + pred_boxes[3] = coord[3].exp() * anchor_h + pred_boxes = pred_boxes.transpose(0, 1).contiguous().view(-1, 4) + + ( + coord_mask, + conf_mask, + cls_mask, + tcoord, + tconf, + tcls, + ) = self.build_targets(pred_boxes.detach(), target.detach(), anchors.detach(), nH, nW) + + cls_grid = torch.linspace(5, 5 + nC - 1, nC).long().to(device) + cls = output.index_select(2, cls_grid) + cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(cls_anchor_dim, nC) + cls_mask = cls_mask == 1 + tcls = tcls[cls_mask].long().view(-1) + cls_mask = cls_mask.view(-1, 1).repeat(1, nC).to(device) + cls = cls[cls_mask].view(-1, nC) + + tcoord = tcoord.view(4, cls_anchor_dim).to(device) + tconf, tcls = tconf.to(device), tcls.to(device) + coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(device), conf_mask.to(device) + + loss_coord = nn.MSELoss(size_average=False)(coord * coord_mask, tcoord * coord_mask) / 2 + loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) + loss_cls = nn.CrossEntropyLoss(size_average=False)(cls, tcls) if cls.size(0) > 0 else 0 + loss = loss_coord + loss_conf + loss_cls + + if math.isnan(loss.item()): + print(conf, tconf) + raise ValueError("YoloLayer has isnan in loss") + + if return_single_value: + return loss + else: + return [loss, loss_coord, loss_conf, loss_cls] diff --git a/model_demos/cv_demos/yolo_v3/holli_src/yolov3.py b/model_demos/cv_demos/yolo_v3/holli_src/yolov3.py new file mode 100644 index 00000000..10b303c9 --- /dev/null +++ b/model_demos/cv_demos/yolo_v3/holli_src/yolov3.py @@ -0,0 +1,128 @@ +import torch +import torch.nn as nn + +from .yolo_layer import * +from .yolov3_base import * + + +class Yolov3(Yolov3Base): + def __init__(self, num_classes=80): + super().__init__() + self.backbone = Darknet([1, 2, 8, 8, 4]) + + anchors_per_region = 3 + self.yolo_0_pre = Yolov3UpsamplePrep([512, 1024], 1024, anchors_per_region * (5 + num_classes)) + self.yolo_0 = YoloLayer( + anchors=[(116.0, 90.0), (156.0, 198.0), (373.0, 326.0)], + stride=32, + num_classes=num_classes, + ) + + self.yolo_1_c = ConvBN(512, 256, 1) + self.yolo_1_prep = Yolov3UpsamplePrep([256, 512], 512 + 256, anchors_per_region * (5 + num_classes)) + self.yolo_1 = YoloLayer( + anchors=[(30.0, 61.0), (62.0, 45.0), (59.0, 119.0)], + stride=16, + num_classes=num_classes, + ) + + self.yolo_2_c = ConvBN(256, 128, 1) + self.yolo_2_prep = Yolov3UpsamplePrep([128, 256], 256 + 128, anchors_per_region * (5 + num_classes)) + self.yolo_2 = YoloLayer( + anchors=[(10.0, 13.0), (16.0, 30.0), (33.0, 23.0)], + stride=8, + num_classes=num_classes, + ) + + def get_loss_layers(self): + return [self.yolo_0, self.yolo_1, self.yolo_2] + + def forward_yolo(self, xb): + x, y0 = self.yolo_0_pre(xb[-1]) + + x = self.yolo_1_c(x) + x = nn.Upsample(scale_factor=2, mode="nearest")(x) + x = torch.cat([x, xb[-2]], 1) + x, y1 = self.yolo_1_prep(x) + + x = self.yolo_2_c(x) + x = nn.Upsample(scale_factor=2, mode="nearest")(x) + x = torch.cat([x, xb[-3]], 1) + x, y2 = self.yolo_2_prep(x) + + return [y0, y1, y2] + + +# Backbone and helper modules + + +class DarknetBlock(nn.Module): + def __init__(self, ch_in): + super().__init__() + ch_hid = ch_in // 2 + self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0) + self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1) + + def forward(self, x): + return self.conv2(self.conv1(x)) + x + + +class Darknet(nn.Module): + def __init__(self, num_blocks, start_nf=32): + super().__init__() + nf = start_nf + self.base = ConvBN(3, nf, kernel_size=3, stride=1) # , padding=1) + self.layers = [] + for i, nb in enumerate(num_blocks): + # dn_layer = make_group_layer(nf, nb, stride=(1 if i==-1 else 2)) + dn_layer = self.make_group_layer(nf, nb, stride=2) + self.add_module(f"darknet_{i}", dn_layer) + self.layers.append(dn_layer) + nf *= 2 + + def make_group_layer(self, ch_in, num_blocks, stride=2): + layers = [ConvBN(ch_in, ch_in * 2, stride=stride)] + for i in range(num_blocks): + layers.append(DarknetBlock(ch_in * 2)) + return nn.Sequential(*layers) + + def forward(self, x): + y = [self.base(x)] + for layer in self.layers: + y.append(layer(y[-1])) + return y + + +class Yolov3UpsamplePrep(nn.Module): + def __init__(self, filters_list, in_filters, out_filters): + super().__init__() + self.branch = nn.ModuleList( + [ + ConvBN(in_filters, filters_list[0], 1), + ConvBN(filters_list[0], filters_list[1], kernel_size=3), + ConvBN(filters_list[1], filters_list[0], kernel_size=1), + ConvBN(filters_list[0], filters_list[1], kernel_size=3), + ConvBN(filters_list[1], filters_list[0], kernel_size=1), + ] + ) + self.for_yolo = nn.ModuleList( + [ + ConvBN(filters_list[0], filters_list[1], kernel_size=3), + nn.Conv2d( + filters_list[1], + out_filters, + kernel_size=1, + stride=1, + padding=0, + bias=True, + ), + ] + ) + + def forward(self, x): + for m in self.branch: + x = m(x) + branch_out = x + for m in self.for_yolo: + x = m(x) + return branch_out, x diff --git a/model_demos/cv_demos/yolo_v3/holli_src/yolov3_base.py b/model_demos/cv_demos/yolo_v3/holli_src/yolov3_base.py new file mode 100644 index 00000000..565374ff --- /dev/null +++ b/model_demos/cv_demos/yolo_v3/holli_src/yolov3_base.py @@ -0,0 +1,113 @@ +import importlib +from abc import ABCMeta, abstractmethod +from collections import Iterable, OrderedDict, defaultdict + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .yolo_layer import * + + +class Yolov3Base(nn.Module, metaclass=ABCMeta): + def __init__(self): + super().__init__() + + @abstractmethod + def get_loss_layers(self): + return [self.yolo_0, self.yolo_1] + + def forward_backbone(self, x): + return self.backbone(x) + + def forward(self, x): + shape = x.shape + assert ( + shape[1] == 3 and shape[2] % 32 == 0 and shape[3] % 32 == 0 + ), f"Tensor shape should be [bs, 3, x*32, y*32], was {shape}" + xb = self.forward_backbone(x) + return self.forward_yolo(xb) + + def boxes_from_output(self, outputs, conf_thresh=0.25): + all_boxes = [[] for j in range(outputs[0].size(0))] + for i, layer in enumerate(self.get_loss_layers()): + layer_boxes = layer.get_region_boxes(outputs[i], conf_thresh=conf_thresh) + for j, layer_box in enumerate(layer_boxes): + all_boxes[j] += layer_box + + return all_boxes + + def predict_img(self, imgs, conf_thresh=0.25): + self.eval() + if len(imgs.shape) == 3: + imgs = imgs.unsqueeze(-1) + + outputs = self.forward(imgs) + return self.boxes_from_output(outputs, conf_thresh) + + def freeze_backbone(self, requires_grad=False): + for _, p in self.backbone.named_parameters(): + p.requires_grad = requires_grad + + def unfreeze(self): + for _, p in self.named_parameters(): + p.requires_grad = True + + def freeze_info(self, print_all=False): + d = defaultdict(set) + print("Layer: param.requires_grad") + for name, param in self.named_parameters(): + if print_all: + print(f"{name}: {param.requires_grad}") + else: + d[name.split(".")[0]].add(param.requires_grad) + if not print_all: + for k, v in d.items(): + print(k, ": ", v) + + def load_backbone(self, h5_path): + state_old = self.state_dict() + state_new = torch.load(h5_path) + + skipped_layers = [] + for k in list(state_new.keys()): + if state_old[k].shape != state_new[k].shape: + skipped_layers.append(k) + del state_new[k] + + return self.load_state_dict(state_new, strict=False), skipped_layers + + +# Common helper modules + + +class ConvBN(nn.Module): + "convolutional layer then batchnorm" + + def __init__(self, ch_in, ch_out, kernel_size=3, stride=1, padding=None): + super().__init__() + if padding is None: + padding = (kernel_size - 1) // 2 # we should never need to set padding + self.conv = nn.Conv2d( + ch_in, + ch_out, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias=False, + ) + self.bn = nn.BatchNorm2d(ch_out, momentum=0.01) + self.relu = nn.LeakyReLU(0.1, inplace=True) + + def forward(self, x): + return self.relu(self.bn(self.conv(x))) + + +class Upsample(nn.Module): + def __init__(self, stride=2): + super().__init__() + self.stride = stride + + def forward(self, x): + assert x.data.dim() == 4 + return nn.Upsample(scale_factor=self.stride, mode="nearest")(x) diff --git a/model_demos/cv_demos/yolo_v3/holli_src/yolov3_tiny.py b/model_demos/cv_demos/yolo_v3/holli_src/yolov3_tiny.py new file mode 100644 index 00000000..7e87202f --- /dev/null +++ b/model_demos/cv_demos/yolo_v3/holli_src/yolov3_tiny.py @@ -0,0 +1,134 @@ +import torch +import torch.nn as nn + +from .yolo_layer import * +from .yolov3_base import * + + +class Yolov3Tiny(Yolov3Base): + def __init__(self, num_classes, use_wrong_previous_anchors=False): + super().__init__() + + self.num_classes = num_classes + self.return_out_boxes = False + self.skip_backbone = False + + self.backbone = Yolov3TinyBackbone() + + anchors_per_region = 3 + self.yolo_0_pre = nn.Sequential( + OrderedDict( + [ + ("14_convbatch", ConvBN(256, 512, 3, 1, 1)), + ( + "15_conv", + nn.Conv2d( + 512, + anchors_per_region * (5 + self.num_classes), + 1, + 1, + 0, + ), + ), + ] + ) + ) + self.yolo_0 = YoloLayer( + anchors=[(81.0, 82.0), (135.0, 169.0), (344.0, 319.0)], + stride=32, + num_classes=num_classes, + ) + + self.up_1 = nn.Sequential( + OrderedDict( + [ + ("17_convbatch", ConvBN(256, 128, 1, 1, 0)), + ("18_upsample", Upsample(2)), + ] + ) + ) + + self.yolo_1_pre = nn.Sequential( + OrderedDict( + [ + ("19_convbatch", ConvBN(128 + 256, 256, 3, 1, 1)), + ( + "20_conv", + nn.Conv2d( + 256, + anchors_per_region * (5 + self.num_classes), + 1, + 1, + 0, + ), + ), + ] + ) + ) + + # Tiny yolo weights were originally trained using wrong anchor mask + # https://github.com/pjreddie/darknet/commit/f86901f6177dfc6116360a13cc06ab680e0c86b0#diff-2b0e16f442a744897f1606ff1a0f99d3L175 + if use_wrong_previous_anchors: + yolo_1_anchors = [(23.0, 27.0), (37.0, 58.0), (81.0, 82.0)] + else: + yolo_1_anchors = [(10.0, 14.0), (23.0, 27.0), (37.0, 58.0)] + + self.yolo_1 = YoloLayer(anchors=yolo_1_anchors, stride=16.0, num_classes=num_classes) + + def get_loss_layers(self): + return [self.yolo_0, self.yolo_1] + + def forward_yolo(self, xb): + x_b_0, x_b_full = xb[0], xb[1] + y0 = self.yolo_0_pre(x_b_full) + + x_up = self.up_1(x_b_full) + x_up = torch.cat((x_up, x_b_0), 1) + y1 = self.yolo_1_pre(x_up) + + return [y0, y1] + + +# Backbone and helper modules + + +class MaxPoolStride1(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, x): + x = F.max_pool2d(F.pad(x, (0, 1, 0, 1), mode="replicate"), 2, stride=1) + return x + + +class Yolov3TinyBackbone(nn.Module): + def __init__(self, input_channels=3): + super().__init__() + self.layers_list = OrderedDict( + [ + ("0_convbatch", ConvBN(input_channels, 16, 3, 1, 1)), + ("1_max", nn.MaxPool2d(2, 2)), + ("2_convbatch", ConvBN(16, 32, 3, 1, 1)), + ("3_max", nn.MaxPool2d(2, 2)), + ("4_convbatch", ConvBN(32, 64, 3, 1, 1)), + ("5_max", nn.MaxPool2d(2, 2)), + ("6_convbatch", ConvBN(64, 128, 3, 1, 1)), + ("7_max", nn.MaxPool2d(2, 2)), + ("8_convbatch", ConvBN(128, 256, 3, 1, 1)), + ("9_max", nn.MaxPool2d(2, 2)), + ("10_convbatch", ConvBN(256, 512, 3, 1, 1)), + ("11_max", MaxPoolStride1()), + ("12_convbatch", ConvBN(512, 1024, 3, 1, 1)), + ( + "13_convbatch", + ConvBN(1024, 256, 1, 1, 0), + ), # padding = kernel_size-1//2 + ] + ) + self.layers = nn.Sequential(self.layers_list) + self.idx = 9 + + def forward(self, x): + x_b_0 = self.layers[: self.idx](x) + x_b_full = self.layers[self.idx :](x_b_0) + return x_b_0, x_b_full diff --git a/model_demos/cv_demos/yolo_v3/pytorch_yolov3_holli.py b/model_demos/cv_demos/yolo_v3/pytorch_yolov3_holli.py new file mode 100644 index 00000000..baa6c01c --- /dev/null +++ b/model_demos/cv_demos/yolo_v3/pytorch_yolov3_holli.py @@ -0,0 +1,63 @@ +import os + +import pybuda +import requests +from PIL import Image +from pybuda._C.backend_api import BackendDevice + +from cv_demos.yolo_v3.holli_src import utils +from cv_demos.yolo_v3.holli_src.yolov3 import * + + +def run_yolov3_holli_pytorch(): + + # et PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() # load global compiler config object + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + compiler_cfg.default_df_override = pybuda._C.Float16_b + os.environ["PYBUDA_RIBBON2"] = "1" + + # Device specific configurations + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] == BackendDevice.Grayskull: + os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" + + # Download model weights + url = "https://www.ollihuotari.com/data/yolov3_pytorch/yolov3_coco_01.h5" + load_path = "cv_demos/yolo_v3/" + url.split("/")[-1] + response = requests.get(url, stream=True) + with open(load_path, "wb") as f: + f.write(response.content) + + # Load model + model = Yolov3(num_classes=80) + model.load_state_dict( + torch.load( + load_path, + map_location=torch.device("cpu"), + ) + ) + model.eval() + + # Create PyBuda module from PyTorch model + tt_model = pybuda.PyTorchModule("pytorch_yolov3_holli", model) + + sz = 512 + image_url = "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg" + img_org = Image.open(requests.get(image_url, stream=True).raw).convert("RGB") + img_resized = img_org.resize((sz, sz)) + img_tensor = utils.image2torch(img_resized) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([img_tensor])) + output = output_q.get() + print(output) + + # Remove weight file + os.remove(load_path) + + +if __name__ == "__main__": + run_yolov3_holli_pytorch() diff --git a/model_demos/cv_demos/yolo_v3/pytorch_yolov3_holli_1x1.py b/model_demos/cv_demos/yolo_v3/pytorch_yolov3_holli_1x1.py new file mode 100644 index 00000000..84928790 --- /dev/null +++ b/model_demos/cv_demos/yolo_v3/pytorch_yolov3_holli_1x1.py @@ -0,0 +1,74 @@ +import os + +import pybuda +import requests +from PIL import Image +from pybuda._C.backend_api import BackendDevice + +from cv_demos.yolo_v3.holli_src import utils +from cv_demos.yolo_v3.holli_src.yolov3 import * + + +def run_yolov3_holli_pytorch_1x1(): + + # Set PyBUDA configuration parameters + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] != BackendDevice.Wormhole_B0: + raise NotImplementedError("Model not supported on Grayskull") + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + compiler_cfg.default_df_override = pybuda._C.Float16_b + + # Device specific configurations + available_devices = pybuda.detect_available_devices() + if available_devices: + if available_devices[0] == BackendDevice.Grayskull: + os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" + + # Set PyBDUA environment variable + os.environ["PYBUDA_OVERRIDE_DEVICE_YAML"] = "wormhole_b0_1x1.yaml" + os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" + os.environ["PYBUDA_RIBBON2"] = "1" + + # Download model weights + url = "https://www.ollihuotari.com/data/yolov3_pytorch/yolov3_coco_01.h5" + load_path = "cv_demos/yolo_v3/" + url.split("/")[-1] + response = requests.get(url, stream=True) + with open(load_path, "wb") as f: + f.write(response.content) + + # Load model + model = Yolov3(num_classes=80) + model.load_state_dict( + torch.load( + load_path, + map_location=torch.device("cpu"), + ) + ) + model.eval() + + # Create PyBuda module from PyTorch model + tt_model = pybuda.PyTorchModule("pytorch_yolov3_holli_1x1", model) + + # Load sample image + sz = 512 + image_url = "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg" + img_org = Image.open(requests.get(image_url, stream=True).raw).convert("RGB") + img_resized = img_org.resize((sz, sz)) + img_tensor = utils.image2torch(img_resized) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([img_tensor])) + output = output_q.get() + print(output) + + # Remove weight file + os.remove(load_path) + + +if __name__ == "__main__": + run_yolov3_holli_pytorch_1x1() diff --git a/model_demos/cv_demos/yolo_v3/pytorch_yolov3_tiny_holli.py b/model_demos/cv_demos/yolo_v3/pytorch_yolov3_tiny_holli.py new file mode 100644 index 00000000..c0330c7e --- /dev/null +++ b/model_demos/cv_demos/yolo_v3/pytorch_yolov3_tiny_holli.py @@ -0,0 +1,57 @@ +import os + +import pybuda +import requests +from PIL import Image + +from cv_demos.yolo_v3.holli_src import utils +from cv_demos.yolo_v3.holli_src.yolo_layer import * +from cv_demos.yolo_v3.holli_src.yolov3_tiny import * + + +def run_yolov3_tiny_holli_pytorch(): + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + compiler_cfg.enable_auto_fusing = False + compiler_cfg.default_df_override = pybuda._C.Float16_b + + # Download model weights + url = "https://www.ollihuotari.com/data/yolov3_pytorch/yolov3_tiny_coco_01.h5" + load_path = "cv_demos/yolo_v3/" + url.split("/")[-1] + response = requests.get(url, stream=True) + with open(load_path, "wb") as f: + f.write(response.content) + + # Load model + model = Yolov3Tiny(num_classes=80, use_wrong_previous_anchors=True) + model.load_state_dict( + torch.load( + load_path, + map_location=torch.device("cpu"), + ) + ) + model.eval() + + # Create PyBuda module from PyTorch model + tt_model = pybuda.PyTorchModule("pytorch_yolov3_tiny_holli", model) + + sz = 512 + image_url = "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg" + img_org = Image.open(requests.get(image_url, stream=True).raw).convert("RGB") + img_resized = img_org.resize((sz, sz)) + img_tensor = utils.image2torch(img_resized) + + # Run inference on Tenstorrent device + output_q = pybuda.run_inference(tt_model, inputs=([img_tensor])) + output = output_q.get() + print(output) + + # Remove weight file + os.remove(load_path) + + +if __name__ == "__main__": + run_yolov3_tiny_holli_pytorch() diff --git a/model_demos/cv_demos/yolo_v5/pytorch_yolov5_320.py b/model_demos/cv_demos/yolo_v5/pytorch_yolov5_320.py index 899a0b8c..17bbc3ea 100644 --- a/model_demos/cv_demos/yolo_v5/pytorch_yolov5_320.py +++ b/model_demos/cv_demos/yolo_v5/pytorch_yolov5_320.py @@ -20,9 +20,7 @@ def run_pytorch_yolov5_320(variant="yolov5s"): compiler_cfg.enable_conv_prestride = True compiler_cfg.enable_tvm_constant_prop = True os.environ["PYBUDA_DECOMPOSE_SIGMOID"] = "1" - - if variant == "yolov5m": - os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" + os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1" # Load YOLOv5 model # Variants: yolov5n, yolov5s, yolov5m, yolov5l, yolov5x @@ -49,7 +47,7 @@ def run_pytorch_yolov5_320(variant="yolov5s"): os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" os.environ["PYBUDA_BALANCER_PREPASS_DISABLED"] = "1" compiler_cfg.enable_auto_fusing = False - elif model_ckpt in ["yolov5n", "yolov5s"]: + elif model_ckpt in ["yolov5n", "yolov5s", "yolov5m"]: compiler_cfg.enable_auto_fusing = False else: print("not a supported device!") diff --git a/model_demos/cv_demos/yolo_v5/pytorch_yolov5_480.py b/model_demos/cv_demos/yolo_v5/pytorch_yolov5_480.py index aed0d84b..485574b9 100644 --- a/model_demos/cv_demos/yolo_v5/pytorch_yolov5_480.py +++ b/model_demos/cv_demos/yolo_v5/pytorch_yolov5_480.py @@ -20,39 +20,52 @@ def run_pytorch_yolov5_480(variant="yolov5s"): compiler_cfg.enable_t_streaming = True compiler_cfg.enable_tm_cpu_fallback = True os.environ["PYBUDA_DECOMPOSE_SIGMOID"] = "1" + os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1" # Device specific configurations available_devices = pybuda.detect_available_devices() if available_devices: if available_devices[0] == BackendDevice.Grayskull: - compiler_cfg.default_dram_parameters = True # Set PyBUDA environment variables os.environ["PYBUDA_PAD_SPARSE_MM"] = "{113:128}" os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{16*1024}" - - if variant in ["yolov5s"]: - os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" - if variant in ["yolov5m", "yolov5l"]: - compiler_cfg.enable_auto_fusing = False - compiler_cfg.enable_enumerate_u_kt = False - - if variant in ["yolov5n", "yolov5x"]: + if variant == "yolov5m": + os.environ["PYBUDA_INSERT_SLICE_FOR_CONCAT"] = "1" + os.environ["PYBUDA_CONCAT_SLICE_Y"] = "10" + compiler_cfg.balancer_op_override( + "concatenate_26.dc.concatenate.30.dc.concatenate.1.dc.buffer.0", "t_stream_shape", (6, 1) + ) + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{32*1024}" + if variant == "yolov5l": os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" + if variant == "yolov5x": + os.environ["PYBUDA_TEMP_ELT_UNARY_ESTIMATES_LEGACY"] = "1" + os.environ["PYBUDA_INSERT_SLICE_FOR_CONCAT"] = "1" + os.environ["PYBUDA_CONCAT_SLICE_Y"] = "10" + compiler_cfg.balancer_op_override( + "concatenate_40.dc.concatenate.30.dc.concatenate.1.dc.buffer.0", "t_stream_shape", (6, 1) + ) + compiler_cfg.balancer_op_override("conv2d_41.dc.matmul.8", "grid_shape", (5, 5)) elif available_devices[0] == BackendDevice.Wormhole_B0: # Set PyBUDA environment variables compiler_cfg.enable_auto_fusing = False compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b compiler_cfg.default_dram_parameters = True + os.environ["PYBUDA_RIBBON2"] = "1" os.environ["PYBUDA_PAD_SPARSE_MM"] = "{13:16, 3:4}" + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{64*1024}" if variant == "yolov5m": - os.environ["PYBUDA_RIBBON2"] = "1" - if variant == "yolov5x": + os.environ["PYBUDA_INSERT_SLICE_FOR_CONCAT"] = "1" + os.environ["PYBUDA_CONCAT_SLICE_Y"] = "10" + compiler_cfg.balancer_op_override( + "concatenate_26.dc.concatenate.30.dc.concatenate.1.dc.buffer.0", "t_stream_shape", (6, 1) + ) + elif variant == "yolov5l": + compiler_cfg.place_on_new_epoch("concatenate_208.dc.concatenate.0") + elif variant == "yolov5x": + os.environ["PYBUDA_INSERT_SLICE_FOR_CONCAT"] = "1" + os.environ["PYBUDA_CONCAT_SLICE_Y"] = "10" os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" - if variant == "yolov5n" or variant == "yolov5l" or variant == "yolov5x": - if variant == "yolov5l": - compiler_cfg.place_on_new_epoch("concatenate_208.dc.concatenate.0") - elif variant == "yolov5x": - os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1" else: print("not a supported device!") sys.exit() diff --git a/model_demos/cv_demos/yolo_v5/pytorch_yolov5_640.py b/model_demos/cv_demos/yolo_v5/pytorch_yolov5_640.py index f021d8a9..b53b2412 100644 --- a/model_demos/cv_demos/yolo_v5/pytorch_yolov5_640.py +++ b/model_demos/cv_demos/yolo_v5/pytorch_yolov5_640.py @@ -24,24 +24,39 @@ def run_pytorch_yolov5_640(variant="yolov5s"): compiler_cfg.enable_t_streaming = True compiler_cfg.enable_auto_fusing = False os.environ["PYBUDA_DECOMPOSE_SIGMOID"] = "1" - - # Model specific configurations - if model_ckpt == "yolov5l": - compiler_cfg.enable_auto_transposing_placement = True + os.environ["PYBUDA_DISABLE_CAP_SPARSE_MM_FIDELITY"] = "1" + os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1" # Device specific configurations available_devices = pybuda.detect_available_devices() if available_devices: if available_devices[0] == BackendDevice.Grayskull: # Set PyBUDA environment variables - if model_ckpt in ["yolov5s", "yolov5m", "yolov5l", "yolov5x"]: - os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{65*1024}" - os.environ["PYBUDA_FORCE_EMULATE_HARVESTED"] = "1" + compiler_cfg.enable_enumerate_u_kt = False + os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" compiler_cfg.enable_tm_cpu_fallback = True compiler_cfg.enable_conv_prestride = True - compiler_cfg.enable_enumerate_u_kt = False os.environ["PYBUDA_PAD_SPARSE_MM"] = "{13:16, 3:4}" - os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" + if model_ckpt in ["yolov5s", "yolov5m", "yolov5l", "yolov5x"]: + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{65*1024}" + os.environ["PYBUDA_FORCE_EMULATE_HARVESTED"] = "1" + if model_ckpt in ["yolov5l", "yolov5x"]: + os.environ["PYBUDA_GRAPHSOLVER_SELF_CUT_TYPE"] = "ConsumerOperandDataEdgesFirst" + os.environ["PYBUDA_TEMP_ELT_UNARY_ESTIMATES_LEGACY"] = "1" + os.environ["PYBUDA_INSERT_SLICE_FOR_CONCAT"] = "1" + os.environ["PYBUDA_CONCAT_SLICE_Y"] = "10" + os.environ["PYBUDA_RIBBON2"] = "1" + compiler_cfg.balancer_op_override("conv2d_41.dc.matmul.8", "grid_shape", (5, 5)) + if model_ckpt == "yolov5x": + compiler_cfg.enable_enumerate_u_kt = True + compiler_cfg.place_on_new_epoch("concatenate_40.dc.select.28") + compiler_cfg.place_on_new_epoch("conv2d_210.dc.matmul.11") + if model_ckpt in ["yolov5m"]: + os.environ["PYBUDA_RIBBON2"] = "1" + os.environ["PYBUDA_INSERT_SLICE_FOR_CONCAT"] = "1" + os.environ["PYBUDA_CONCAT_SLICE_Y"] = "10" + if variant == "yolov5n": + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "65536" elif available_devices[0] == BackendDevice.Wormhole_B0: os.environ["PYBUDA_PAD_SPARSE_MM"] = "{13:16, 3:4}" os.environ["PYBUDA_MAX_GRAPH_CUT_RETRY"] = "100" @@ -56,10 +71,16 @@ def run_pytorch_yolov5_640(variant="yolov5s"): compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b if model_ckpt in ["yolov5n", "yolov5m"]: compiler_cfg.enable_tm_cpu_fallback = False + if model_ckpt in ["yolov5s", "yolov5n", "yolov5l"]: + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{64*1024}" + if model_ckpt == "yolov5n": + compiler_cfg.balancer_op_override( + "concatenate_19.dc.concatenate.30.dc.concatenate.1.dc.buffer.0", "t_stream_shape", (3, 1) + ) if model_ckpt == "yolov5m": compiler_cfg.balancer_op_override("concatenate_260.dc.concatenate.0", "grid_shape", (1, 1)) os.environ["PYBUDA_RIBBON2"] = "1" - os.environ["PYBUDA_RIBBON2_OPTIMIZATION_ITERATIONS"] = "10" + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{112*1024}" if model_ckpt == "yolov5l": compiler_cfg.enable_auto_transposing_placement = True compiler_cfg.enable_tm_cpu_fallback = True @@ -67,8 +88,10 @@ def run_pytorch_yolov5_640(variant="yolov5s"): os.environ["PYBUDA_RIBBON2"] = "1" if model_ckpt == "yolov5x": compiler_cfg.balancer_op_override("concatenate_363.dc.concatenate.0", "grid_shape", (1, 1)) + compiler_cfg.balancer_op_override("conv2d_41.dc.matmul.8", "t_stream_shape", (1, 1)) os.environ["PYBUDA_RIBBON2"] = "1" compiler_cfg.enable_tm_cpu_fallback = True + os.environ["PYBUDA_DISABLE_CAP_SPARSE_MM_FIDELITY"] = "0" else: print("not a supported device!") sys.exit() diff --git a/model_demos/nlp_demos/codegen/pytorch_codegen_causal_lm.py b/model_demos/nlp_demos/codegen/pytorch_codegen_causal_lm.py index 84c078d2..7b44a52c 100644 --- a/model_demos/nlp_demos/codegen/pytorch_codegen_causal_lm.py +++ b/model_demos/nlp_demos/codegen/pytorch_codegen_causal_lm.py @@ -24,6 +24,7 @@ def run_codegen_causal_lm(variant="Salesforce/codegen-350M-mono"): if available_devices: if available_devices[0] == BackendDevice.Grayskull: compiler_cfg.default_dram_parameters = False + compiler_cfg.balancer_policy = "Ribbon" # DRAM stream limit compiler_cfg.balancer_op_override("matmul_1829", "grid_shape", (2, 8)) diff --git a/model_demos/nlp_demos/falcon/pytorch_falcon.py b/model_demos/nlp_demos/falcon/pytorch_falcon.py index 4633a287..9831582c 100644 --- a/model_demos/nlp_demos/falcon/pytorch_falcon.py +++ b/model_demos/nlp_demos/falcon/pytorch_falcon.py @@ -1,7 +1,6 @@ # Falcon-7B Demo Script import pybuda -import pytest from pybuda._C.backend_api import BackendDevice from nlp_demos.falcon.utils.model import Falcon @@ -11,7 +10,6 @@ def run_falcon_pytorch(): available_devices = pybuda.detect_available_devices() if available_devices: if available_devices[0] == BackendDevice.Grayskull: - pytest.skip("Model not supported on Grayskull") raise NotImplementedError("Model not supported on Grayskull") # Load model from HuggingFace diff --git a/model_demos/nlp_demos/fuyu8b/pytorch_fuyu8b_past_cache.py b/model_demos/nlp_demos/fuyu8b/pytorch_fuyu8b_past_cache.py new file mode 100644 index 00000000..ea6d7054 --- /dev/null +++ b/model_demos/nlp_demos/fuyu8b/pytorch_fuyu8b_past_cache.py @@ -0,0 +1,282 @@ +# Fuyu8b Demo - Conditional Generation + +import os + +import pybuda +import requests +import torch +import torch.nn as nn +from PIL import Image +from pybuda._C.backend_api import BackendDevice, BackendType +from pybuda.pybudaglobal import TILE_DIM +from pybuda.utils import align_up_tile +from transformers import ( + AutoTokenizer, + FuyuConfig, + FuyuForCausalLM, + FuyuImageProcessor, + FuyuProcessor, + LogitsProcessorList, +) +from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask + + +def generate_fuyu_embedding(model, input_ids, image_patches, image_patches_indices): + inputs_embeds = model.language_model.get_input_embeddings()(input_ids) + patch_embeddings = model.vision_embed_tokens(image_patches.to(model.vision_embed_tokens.weight.dtype)) + inputs_embeds = model.gather_continuous_embeddings( + word_embeddings=inputs_embeds, + continuous_embeddings=patch_embeddings, + image_patch_input_indices=image_patches_indices, + ) + return inputs_embeds + + +class FuyuModelImgDecoderWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.fuyu_model = model + self.fuyu_config = model.config + + def forward(self, inputs_embeds, attention_mask): + batch_size, seq_length, hidden_dim = inputs_embeds.shape + position_ids = torch.arange(seq_length, dtype=torch.long) + position_ids = position_ids.unsqueeze(0).view(-1, seq_length) + hidden_states = inputs_embeds + + presents = [] + for idx, decoder_layer in enumerate(self.fuyu_model.language_model.model.layers): + layer_outputs = decoder_layer( + hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + output_attentions=False, + use_cache=True, + ) + + hidden_states = layer_outputs[0] + presents.append(layer_outputs[1]) + + hidden_states = self.fuyu_model.language_model.model.final_layernorm(hidden_states) + return hidden_states, *presents + + +class FuyuModelTxtDecoderWrapper(nn.Module): + def __init__(self, model): + super().__init__() + self.fuyu_model = model + self.fuyu_config = model.config + + def forward(self, inputs_embeds, attention_mask, position_ids, *past_key_values): + batch_size, seq_length, _ = inputs_embeds.shape + attention_mask = _prepare_4d_causal_attention_mask( + attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values[0].shape[-2] + ) + + position_ids = position_ids.unsqueeze(0).view(-1, seq_length) + hidden_states = inputs_embeds + + presents = [] + for idx, decoder_layer in enumerate(self.fuyu_model.language_model.model.layers): + pkv = tuple([past_key_values[(idx * 2) + j] for j in range(2)]) + + layer_outputs = decoder_layer( + hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=pkv, + output_attentions=False, + use_cache=True, + ) + + hidden_states = layer_outputs[0] + presents.append(layer_outputs[1]) + + hidden_states = self.fuyu_model.language_model.model.final_layernorm(hidden_states) + return hidden_states, *presents + + +def run_fuyu8b_past_cache(): + # Skip tests + available_devices = pybuda.detect_available_devices() + if available_devices[0] == BackendDevice.Grayskull or available_devices[0] == BackendDevice.Wormhole_B0: + raise NotImplementedError("Model not supported.") + + # Set PyBuda configuration parameters + compiler_cfg = pybuda.config._get_global_compiler_config() + compiler_cfg.balancer_policy = "Ribbon" + compiler_cfg.enable_t_streaming = True + compiler_cfg.default_df_override = pybuda._C.DataFormat.Float16_b + compiler_cfg.enable_tvm_cpu_fallback = False + compiler_cfg.compile_subgraphs = True + compiler_cfg.convert_framework_params_to_tvm = False + compiler_cfg.enable_link_past_cache_ios = True + compiler_cfg.amp_level = 2 + compiler_cfg.default_dram_parameters = True + os.environ["PYBUDA_GRAPHSOLVER_SELF_CUT_TYPE"] = "FastCut" + os.environ["PYBUDA_RIBBON2"] = "1" + os.environ["PYBUDA_FORCE_SEQUENTIAL"] = "1" + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{84*1024}" + for i in range(0, 36): + compiler_cfg.balancer_op_override(f"matmul_{i*80+68}", "grid_shape", (1, 8)) + compiler_cfg.balancer_op_override( + f"pt_fuyu8b_past_cache_img.output_concatenate_{i*80+41}_stack", "grid_shape", (1, 1) + ) + compiler_cfg.balancer_op_override( + f"pt_fuyu8b_past_cache_img.output_transpose_{i*80+53}_stack", "grid_shape", (1, 1) + ) + compiler_cfg.balancer_op_override(f"transpose_{i*80+91}.dc.sparse_matmul.4.lc2", "grid_shape", (8, 1)) + compiler_cfg.balancer_op_override(f"transpose_{i*80+111}.dc.sparse_matmul.4.lc2", "grid_shape", (8, 1)) + compiler_cfg.balancer_op_override(f"transpose_{(i-1)*160+281}.dc.sparse_matmul.4.lc2", "grid_shape", (8, 1)) + for i in range(69): + compiler_cfg.balancer_op_override(f"transpose_{i*80+262}.dc.sparse_matmul.4.lc2", "grid_shape", (2, 1)) + for i in range(17): + compiler_cfg.balancer_op_override(f"transpose_{i*160+3081}.dc.sparse_matmul.4.lc2", "grid_shape", (8, 1)) + + # Setup Fuyu8b config + config = FuyuConfig.from_pretrained("adept/fuyu-8b") + config_dict = config.to_dict() + config_dict["return_dict"] = False + config_dict["use_cache"] = False + config_dict["text_config"]["max_position_embeddings"] = 448 # 512 + config_dict["text_config"][ + "pad_token_id" + ] = 0 # set '' equivalent id as pad-token-id of persimmon model (no default value is set) + config = FuyuConfig(**config_dict) + + # Load post-processing modules (run on CPU) + tokenizer = AutoTokenizer.from_pretrained("adept/fuyu-8b") + image_processor = FuyuImageProcessor() + processor = FuyuProcessor(image_processor=image_processor, tokenizer=tokenizer) + + # Create PyBuda module from PyTorch model + fuyu_model = FuyuForCausalLM.from_pretrained("adept/fuyu-8b", config=config) + + # Prepare inputs + text_prompt = "Generate a coco-style caption. " + url = "https://huggingface.co/adept/fuyu-8b/resolve/main/bus.png" + image_pil = Image.open(requests.get(url, stream=True).raw).convert("RGB") + model_inputs = processor(text=text_prompt, images=[image_pil], device="cpu", return_tensor="pt") + + # Retrieve config numbers and logit function + persimmon_config = fuyu_model.language_model.model.config + max_length = persimmon_config.max_position_embeddings + _, emb_seq_length = model_inputs["input_ids"].shape + + # Pad input_ids and image_patches_indices + pad_inputs = True + if pad_inputs: + tmp_padding_token = 71128 # set \n as temporary padding string (does not matter) + target_length = max_length - TILE_DIM + org_length = model_inputs["input_ids"].shape[-1] + model_inputs["input_ids"] = torch.nn.functional.pad( + model_inputs["input_ids"], (0, target_length - org_length), "constant", tmp_padding_token + ) + model_inputs["input_ids"][:, org_length - 1] = tmp_padding_token + model_inputs["input_ids"][:, -1] = 71122 + model_inputs["image_patches_indices"] = torch.nn.functional.pad( + model_inputs["image_patches_indices"], + (0, target_length + 10 - model_inputs["image_patches_indices"].shape[-1]), + "constant", + -1, + ) + + # Generate input embedding for the 1st iteration + inputs_embeds = generate_fuyu_embedding( + fuyu_model, model_inputs["input_ids"], model_inputs["image_patches"][0], model_inputs["image_patches_indices"] + ) + inputs_embeds = inputs_embeds.clone().detach() + + # Obtain logit function + logits_processor = fuyu_model._get_logits_processor( + fuyu_model.generation_config, TILE_DIM, inputs_embeds, None, LogitsProcessorList() + ) + + # Prepare compile-inputs for img-decoder + attention_mask = torch.zeros((1, max_length)) + attention_mask[0, :emb_seq_length] = 1 + img_attention_mask = torch.zeros((1, max_length - TILE_DIM), dtype=torch.bool) + img_attention_mask[0, :emb_seq_length] = 1 + img_attention_mask = _prepare_4d_causal_attention_mask( + img_attention_mask, (1, max_length - TILE_DIM), inputs_embeds, 0 + ) + img_decoder_inputs = [inputs_embeds, img_attention_mask] + + # Prepare compile-inputs for txt-decoder + input_ids = torch.zeros((1, TILE_DIM), dtype=torch.int) # 0 (corresponds to '') + inputs_embeds_dummy = torch.zeros((1, TILE_DIM, 4096)) # 4096 is hidden-state dim + position_ids = torch.arange(TILE_DIM, dtype=torch.int).reshape(1, TILE_DIM) + align_up_tile(emb_seq_length) + first_current_index = max_length - TILE_DIM + past_cache_self_shape = ( + 1, + persimmon_config.num_attention_heads, + max_length - TILE_DIM, + persimmon_config.hidden_size // persimmon_config.num_attention_heads, + ) + txt_decoder_inputs = [inputs_embeds_dummy, attention_mask, position_ids.long()] + for _ in range(len(fuyu_model.language_model.model.layers)): + txt_decoder_inputs += [ + torch.zeros(past_cache_self_shape), + torch.zeros(past_cache_self_shape), + ] + + # Instantiate modules + img_decoder = pybuda.PyTorchModule( + "pt_fuyu8b_past_cache_img", FuyuModelImgDecoderWrapper(fuyu_model) + ) # feed inputs_embeds + txt_decoder = pybuda.PyTorchModule( + "pt_fuyu8b_past_cache_txt", FuyuModelTxtDecoderWrapper(fuyu_model) + ) # feed inputs_embeds + + # Place modules + tt0 = pybuda.TTDevice("tt0", module=[img_decoder, txt_decoder]) + + output_q = pybuda.initialize_pipeline(training=False, sample_inputs=((img_decoder_inputs), (txt_decoder_inputs))) + + generated_tokens = [] + current_token_index = align_up_tile(emb_seq_length) + tokens_to_generate = 7 + for idx in range(tokens_to_generate): + if idx == 0: + tt0.set_active_subgraph(0) + tt0.push_to_inputs([inputs_embeds, img_attention_mask]) + pybuda.run_generate(input_count=1, write_index=0) + ans = output_q.get() + tt0.set_active_subgraph(1) + else: + tt0.push_to_inputs([inputs_embeds, attention_mask, position_ids]) + pybuda.run_generate( + input_count=1, + write_index=current_token_index // TILE_DIM, + ) + ans = output_q.get() + + hidden_states = ans[0].value().detach() + lm_head = fuyu_model.language_model.lm_head(hidden_states.float()).detach() + _input_ids = torch.cat([torch.tensor([[1]]), input_ids[:, : current_token_index % TILE_DIM]], dim=-1) + if idx == 0: + tokens_scores = logits_processor(_input_ids, lm_head[:, current_token_index - 1, :]) + else: + tokens_scores = logits_processor(_input_ids, lm_head[:, (current_token_index - 1) % TILE_DIM, :]) + next_token = torch.argmax(tokens_scores, dim=-1).item() + generated_tokens.append(next_token) + + current_token_index += 1 + if current_token_index % TILE_DIM == 0: + attention_mask[0, :current_token_index] = 1 + attention_mask[0, first_current_index:] = 0 + position_ids = position_ids + TILE_DIM + input_ids[0, :] = 0 + + input_ids[0, (current_token_index - 1) % TILE_DIM] = next_token + attention_mask[0, first_current_index + ((current_token_index - 1) % TILE_DIM)] = 1 + inputs_embeds = fuyu_model.language_model.model.embed_tokens(input_ids).detach() + + # Post-process + print("generated-tokens = ", generated_tokens) + generated_text = processor.batch_decode(torch.tensor([generated_tokens]), skip_special_tokens=True) + print("generated-text = ", generated_text) + + +if __name__ == "__main__": + run_fuyu8b_past_cache() diff --git a/model_demos/nlp_demos/opt/pytorch_opt_causal_lm.py b/model_demos/nlp_demos/opt/pytorch_opt_causal_lm.py index 621ae6f6..42a9317b 100644 --- a/model_demos/nlp_demos/opt/pytorch_opt_causal_lm.py +++ b/model_demos/nlp_demos/opt/pytorch_opt_causal_lm.py @@ -23,6 +23,8 @@ def run_opt_casual_lm(variant="facebook/opt-350m"): # Disable expanding output buffer of fork nodes - causes out of memory issue in blobgen. os.environ["PYBUDA_FORK_JOIN_EXPAND_FORK_OUTPUT_BUF"] = "0" + if variant == "facebook/opt-350m": + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "65536" # Set model configurations config = OPTConfig.from_pretrained(model_ckpt) diff --git a/model_demos/nlp_demos/t5/pytorch_t5_generation.py b/model_demos/nlp_demos/t5/pytorch_t5_generation.py index 8ac77a40..ba8aac04 100644 --- a/model_demos/nlp_demos/t5/pytorch_t5_generation.py +++ b/model_demos/nlp_demos/t5/pytorch_t5_generation.py @@ -3,11 +3,13 @@ import os import pybuda +from pybuda._C.backend_api import BackendDevice from pybuda.transformers.pipeline import pipeline as pybuda_pipeline from transformers import T5Config, T5ForConditionalGeneration, T5Tokenizer def run_t5_pybuda_pipeline(variant="t5-small"): + available_devices = pybuda.detect_available_devices() # Add PyBUDA configurations os.environ["PYBUDA_DISABLE_STREAM_OUTPUT"] = "1" @@ -26,6 +28,11 @@ def run_t5_pybuda_pipeline(variant="t5-small"): compiler_cfg.enable_auto_fusing = False compiler_cfg.enable_enumerate_u_kt = False compiler_cfg.enable_amp_light() + if "large" in variant: + os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1" + if available_devices[0] == BackendDevice.Grayskull: + if "base" in variant: + os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1" # Variants: t5-small, t5-base, t5-large model_ckpt = variant diff --git a/model_demos/nlp_demos/xglm/pytorch_xglm_causal_lm.py b/model_demos/nlp_demos/xglm/pytorch_xglm_causal_lm.py index 3c9469b5..5278d61c 100644 --- a/model_demos/nlp_demos/xglm/pytorch_xglm_causal_lm.py +++ b/model_demos/nlp_demos/xglm/pytorch_xglm_causal_lm.py @@ -2,6 +2,7 @@ import os import pybuda +from pybuda import BackendDevice from pybuda.transformers.pipeline import pipeline as pybuda_pipeline from transformers import AutoTokenizer, XGLMConfig, XGLMForCausalLM @@ -17,9 +18,18 @@ def run_xglm_causal_lm(variant="facebook/xglm-564M"): # Variants: "facebook/xglm-564M", "facebook/xglm-1.7B" model_ckpt = variant - if model_ckpt == "facebook/xglm-1.7B": - os.environ["PYBUDA_FORK_JOIN_SKIP_EXPANDING_BUFFERS"] = "1" - compiler_cfg.amp_level = 1 + available_devices = pybuda.detect_available_devices() + if available_devices: + if model_ckpt == "facebook/xglm-1.7B": + compiler_cfg.amp_level = 1 + if available_devices[0] == BackendDevice.Grayskull: + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = f"{16*1024}" + if (available_devices[0] == BackendDevice.Grayskull and model_ckpt == "facebook/xglm-564M") or ( + available_devices[0] == BackendDevice.Wormhole_B0 + ): + os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "65536" + if available_devices[0] == BackendDevice.Grayskull and model_ckpt == "facebook/xglm-564M": + compiler_cfg.default_dram_parameters = True # set model configurations config = XGLMConfig.from_pretrained(model_ckpt) diff --git a/model_demos/pyproject.toml b/model_demos/pyproject.toml index f615178f..c682d7eb 100644 --- a/model_demos/pyproject.toml +++ b/model_demos/pyproject.toml @@ -67,4 +67,17 @@ markers = [ "unet: tests that involve U-Net", "falcon: tests that involve Falcon", "stablediffusion: tests that involve Stable Diffusion", + "retinanet: tests that involve RetinaNet", + "beit: tests that involve BeiT", + "fuyu8b: tests that involve Fuyu-8B", + "mlpmixer: tests that involve MLP-Mixer", + "openpose: tests that involve OpenPose", + "vilt: tests that involve ViLT", + "landmark: tests that involve Landmark", + "yolov3: tests that involve YOLOv3", + "efficientnetlite: tests that involve EfficientNet-Lite", + "mobilenetssd: tests that involve MobileNet-SSD", + "wideresnet: tests that involve WideResNet", + "xception: tests that involve Xception", + "ghostnet: tests that involve GhostNet", ] diff --git a/model_demos/requirements.txt b/model_demos/requirements.txt index f554d4d6..9c2942a0 100644 --- a/model_demos/requirements.txt +++ b/model_demos/requirements.txt @@ -11,3 +11,4 @@ numba==0.53.1 # For Whisper segmentation-models-pytorch==0.3.3 # For U-Net pylocron==0.2.1 # For U-Net diffusers==0.14.0 # For Stable Diffusion +transformers==4.35.2 # For Fuyu8B diff --git a/model_demos/tests/conftest.py b/model_demos/tests/conftest.py index efbaac7e..affe165b 100644 --- a/model_demos/tests/conftest.py +++ b/model_demos/tests/conftest.py @@ -65,15 +65,9 @@ def archive_files(src_directory=Path("./"), dest_directory=Path("archive")): def pytest_addoption(parser): parser.addoption( - "--silicon-only", - action="store_true", - default=False, - help="run silicon tests only, skip golden/model", + "--silicon-only", action="store_true", default=False, help="run silicon tests only, skip golden/model" ) parser.addoption("--no-silicon", action="store_true", default=False, help="skip silicon tests") parser.addoption( - "--no-skips", - action="store_true", - default=False, - help="ignore pytest.skip() calls, and continue on with test", + "--no-skips", action="store_true", default=False, help="ignore pytest.skip() calls, and continue on with test" ) diff --git a/model_demos/tests/test_onnx_resnet.py b/model_demos/tests/test_onnx_resnet.py new file mode 100644 index 00000000..9cb2fac8 --- /dev/null +++ b/model_demos/tests/test_onnx_resnet.py @@ -0,0 +1,8 @@ +import pytest + +from cv_demos.resnet.onnx_resnet import run_resnet_onnx + + +@pytest.mark.resnet +def test_resnet_onnx(clear_pybuda): + run_resnet_onnx() diff --git a/model_demos/tests/test_onnx_retinanet.py b/model_demos/tests/test_onnx_retinanet.py new file mode 100644 index 00000000..79911b7f --- /dev/null +++ b/model_demos/tests/test_onnx_retinanet.py @@ -0,0 +1,8 @@ +import pytest + +from cv_demos.retinanet.onnx_retinanet_r101 import run_retinanet_r101_640x480_onnx + + +@pytest.mark.retinanet +def test_retinanet_onnx(clear_pybuda): + run_retinanet_r101_640x480_onnx() diff --git a/model_demos/tests/test_pytorch_beit.py b/model_demos/tests/test_pytorch_beit.py new file mode 100644 index 00000000..c8c8e25b --- /dev/null +++ b/model_demos/tests/test_pytorch_beit.py @@ -0,0 +1,11 @@ +import pytest + +from cv_demos.beit.pytorch_beit_classify_16_224_hf import run_beit_classify_224_hf_pytorch + +variants = ["microsoft/beit-base-patch16-224", "microsoft/beit-large-patch16-224"] + + +@pytest.mark.parametrize("variant", variants, ids=variants) +@pytest.mark.beit +def test_beit_classify_224_hf_pytorch(clear_pybuda, variant): + run_beit_classify_224_hf_pytorch(variant) diff --git a/model_demos/tests/test_pytorch_distilbert.py b/model_demos/tests/test_pytorch_distilbert.py index 8b22fe5a..9ec60eb5 100644 --- a/model_demos/tests/test_pytorch_distilbert.py +++ b/model_demos/tests/test_pytorch_distilbert.py @@ -7,11 +7,7 @@ ) from nlp_demos.distilbert.pytorch_distilbert_token_classification import run_distilbert_token_classification_pytorch -variants = [ - "distilbert-base-uncased", - "distilbert-base-cased", - "distilbert-base-multilingual-cased", -] +variants = ["distilbert-base-uncased", "distilbert-base-cased", "distilbert-base-multilingual-cased"] @pytest.mark.parametrize("variant", variants, ids=variants) diff --git a/model_demos/tests/test_pytorch_dpr.py b/model_demos/tests/test_pytorch_dpr.py index 1f7e14b9..98079889 100644 --- a/model_demos/tests/test_pytorch_dpr.py +++ b/model_demos/tests/test_pytorch_dpr.py @@ -4,18 +4,9 @@ from nlp_demos.dpr.pytorch_dpr_question_encoder import run_dpr_question_encoder_pytorch from nlp_demos.dpr.pytorch_dpr_reader import run_dpr_reader_pytorch -variants_ctx = [ - "facebook/dpr-ctx_encoder-single-nq-base", - "facebook/dpr-ctx_encoder-multiset-base", -] -variants_qe = [ - "facebook/dpr-question_encoder-single-nq-base", - "facebook/dpr-question_encoder-multiset-base", -] -variants_reader = [ - "facebook/dpr-reader-single-nq-base", - "facebook/dpr-reader-multiset-base", -] +variants_ctx = ["facebook/dpr-ctx_encoder-single-nq-base", "facebook/dpr-ctx_encoder-multiset-base"] +variants_qe = ["facebook/dpr-question_encoder-single-nq-base", "facebook/dpr-question_encoder-multiset-base"] +variants_reader = ["facebook/dpr-reader-single-nq-base", "facebook/dpr-reader-multiset-base"] @pytest.mark.parametrize("variant", variants_ctx, ids=variants_ctx) diff --git a/model_demos/tests/test_pytorch_fuyu8b.py b/model_demos/tests/test_pytorch_fuyu8b.py new file mode 100644 index 00000000..0320fed0 --- /dev/null +++ b/model_demos/tests/test_pytorch_fuyu8b.py @@ -0,0 +1,8 @@ +import pytest + +from nlp_demos.fuyu8b.pytorch_fuyu8b_past_cache import run_fuyu8b_past_cache + + +@pytest.mark.fuyu8b +def test_fuyu8b_past_cache_pytorch(clear_pybuda): + run_fuyu8b_past_cache() diff --git a/model_demos/tests/test_pytorch_ghostnet.py b/model_demos/tests/test_pytorch_ghostnet.py new file mode 100644 index 00000000..aa980e32 --- /dev/null +++ b/model_demos/tests/test_pytorch_ghostnet.py @@ -0,0 +1,8 @@ +import pytest + +from cv_demos.ghostnet.timm_ghostnet import run_ghostnet_timm + + +@pytest.mark.ghostnet +def test_ghostnet_timm_pytorch(clear_pybuda): + run_ghostnet_timm() diff --git a/model_demos/tests/test_pytorch_mlpmixer.py b/model_demos/tests/test_pytorch_mlpmixer.py new file mode 100644 index 00000000..3030eebe --- /dev/null +++ b/model_demos/tests/test_pytorch_mlpmixer.py @@ -0,0 +1,8 @@ +import pytest + +from cv_demos.mlpmixer.timm_mlpmixer import run_mlpmixer_timm + + +@pytest.mark.mlpmixer +def test_mlpmixer_timm(clear_pybuda): + run_mlpmixer_timm() diff --git a/model_demos/tests/test_pytorch_openpose.py b/model_demos/tests/test_pytorch_openpose.py new file mode 100644 index 00000000..d097e014 --- /dev/null +++ b/model_demos/tests/test_pytorch_openpose.py @@ -0,0 +1,14 @@ +import pytest + +from cv_demos.openpose.pytorch_lwopenpose_2d_osmr import run_lwopenpose_2d_osmr_pytorch +from cv_demos.openpose.pytorch_lwopenpose_3d_osmr import run_lwopenpose_3d_osmr_pytorch + + +@pytest.mark.openpose +def test_openpose_2d_osmr(clear_pybuda): + run_lwopenpose_2d_osmr_pytorch() + + +@pytest.mark.openpose +def test_openpose_3d_osmr(clear_pybuda): + run_lwopenpose_3d_osmr_pytorch() diff --git a/model_demos/tests/test_pytorch_vgg.py b/model_demos/tests/test_pytorch_vgg.py index 415e123f..0442fb91 100644 --- a/model_demos/tests/test_pytorch_vgg.py +++ b/model_demos/tests/test_pytorch_vgg.py @@ -5,16 +5,7 @@ from cv_demos.vgg.pytorch_vgg_timm import run_vgg_bn19_timm_pytorch from cv_demos.vgg.pytorch_vgg_torchhub import run_vgg_bn19_torchhub_pytorch -variants1 = [ - "vgg11", - "vgg13", - "vgg16", - "vgg19", - "vgg11_bn", - "vgg13_bn", - "vgg16_bn", - "vgg19_bn", -] +variants1 = ["vgg11", "vgg13", "vgg16", "vgg19", "vgg11_bn", "vgg13_bn", "vgg16_bn", "vgg19_bn"] variants2 = ["vgg11", "vgg13", "vgg16", "vgg19", "bn_vgg19", "bn_vgg19b"] diff --git a/model_demos/tests/test_pytorch_vilt.py b/model_demos/tests/test_pytorch_vilt.py new file mode 100644 index 00000000..cddea4f9 --- /dev/null +++ b/model_demos/tests/test_pytorch_vilt.py @@ -0,0 +1,14 @@ +import pytest + +from cv_demos.vilt.pytorch_vilt_maskedlm import run_vilt_maskedlm_pytorch +from cv_demos.vilt.pytorch_vilt_question_answering import run_vilt_for_question_answering_pytorch + + +@pytest.mark.vilt +def test_vilt_for_question_answering_pytorch(clear_pybuda): + run_vilt_for_question_answering_pytorch() + + +@pytest.mark.vilt +def test_vilt_maskedlm_pytorch(clear_pybuda): + run_vilt_maskedlm_pytorch() diff --git a/model_demos/tests/test_pytorch_wideresnet.py b/model_demos/tests/test_pytorch_wideresnet.py new file mode 100644 index 00000000..db3251cd --- /dev/null +++ b/model_demos/tests/test_pytorch_wideresnet.py @@ -0,0 +1,18 @@ +import pytest + +from cv_demos.wideresnet.pytorch_wideresnet_timm import run_wideresnet_timm_pytorch +from cv_demos.wideresnet.pytorch_wideresnet_torchhub import run_wideresnet_torchhub_pytorch + +variants = ["wide_resnet50_2", "wide_resnet101_2"] + + +@pytest.mark.parametrize("variant", variants, ids=variants) +@pytest.mark.wideresnet +def test_wideresnet_torchhub_pytorch(clear_pybuda, variant): + run_wideresnet_torchhub_pytorch(variant) + + +@pytest.mark.parametrize("variant", variants, ids=variants) +@pytest.mark.wideresnet +def test_wideresnet_timm_pytorch(clear_pybuda, variant): + run_wideresnet_timm_pytorch(variant) diff --git a/model_demos/tests/test_pytorch_xception.py b/model_demos/tests/test_pytorch_xception.py new file mode 100644 index 00000000..d4aa3206 --- /dev/null +++ b/model_demos/tests/test_pytorch_xception.py @@ -0,0 +1,11 @@ +import pytest + +from cv_demos.xception.timm_xception import run_xception_timm + +variants = ["xception", "xception41", "xception65", "xception71"] + + +@pytest.mark.parametrize("variant", variants, ids=variants) +@pytest.mark.xception +def test_xception_timm_pytorch(clear_pybuda, variant): + run_xception_timm(variant) diff --git a/model_demos/tests/test_pytorch_yolov3.py b/model_demos/tests/test_pytorch_yolov3.py new file mode 100644 index 00000000..745183c3 --- /dev/null +++ b/model_demos/tests/test_pytorch_yolov3.py @@ -0,0 +1,20 @@ +import pytest + +from cv_demos.yolo_v3.pytorch_yolov3_holli import run_yolov3_holli_pytorch +from cv_demos.yolo_v3.pytorch_yolov3_holli_1x1 import run_yolov3_holli_pytorch_1x1 +from cv_demos.yolo_v3.pytorch_yolov3_tiny_holli import run_yolov3_tiny_holli_pytorch + + +@pytest.mark.yolov3 +def test_yolov3_holli(clear_pybuda): + run_yolov3_holli_pytorch() + + +@pytest.mark.yolov3 +def test_yolov3_holli_tiny(clear_pybuda): + run_yolov3_tiny_holli_pytorch() + + +@pytest.mark.yolov3 +def test_yolov3_holli_1x1(clear_pybuda): + run_yolov3_holli_pytorch_1x1() diff --git a/model_demos/tests/test_tflite_efficientnet_lite.py b/model_demos/tests/test_tflite_efficientnet_lite.py new file mode 100644 index 00000000..64cb61d0 --- /dev/null +++ b/model_demos/tests/test_tflite_efficientnet_lite.py @@ -0,0 +1,14 @@ +import pytest + +from cv_demos.efficientnet_lite.tflite_efficientnet_lite0_1x1 import run_efficientnet_lite0_1x1 +from cv_demos.efficientnet_lite.tflite_efficientnet_lite4_1x1 import run_efficientnet_lite4_1x1 + + +@pytest.mark.efficientnetlite +def test_efficientnet_lite0_1x1(clear_pybuda): + run_efficientnet_lite0_1x1() + + +@pytest.mark.efficientnetlite +def test_efficientnet_lite4_1x1(clear_pybuda): + run_efficientnet_lite4_1x1() diff --git a/model_demos/tests/test_tflite_landmark.py b/model_demos/tests/test_tflite_landmark.py new file mode 100644 index 00000000..b3f14632 --- /dev/null +++ b/model_demos/tests/test_tflite_landmark.py @@ -0,0 +1,20 @@ +import pytest + +from cv_demos.landmark.hand_landmark_lite_1x1 import run_hand_landmark_lite_1x1 +from cv_demos.landmark.palm_detection_lite_1x1 import run_palm_detection_lite_1x1 +from cv_demos.landmark.pose_landmark_lite_1x1 import run_pose_landmark_lite_1x1 + + +@pytest.mark.landmark +def test_hand_landmark_lite_1x1(): + run_hand_landmark_lite_1x1() + + +@pytest.mark.landmark +def test_palm_detection_lite_1x1(): + run_palm_detection_lite_1x1() + + +@pytest.mark.landmark +def test_pose_landmark_lite_1x1(): + run_pose_landmark_lite_1x1() diff --git a/model_demos/tests/test_tflite_mobilenet_ssd.py b/model_demos/tests/test_tflite_mobilenet_ssd.py new file mode 100644 index 00000000..baf99e5b --- /dev/null +++ b/model_demos/tests/test_tflite_mobilenet_ssd.py @@ -0,0 +1,8 @@ +import pytest + +from cv_demos.mobilenet_ssd.tflite_mobilenet_v2_ssd_1x1 import run_mobilenetv2_ssd_1x1_tflite + + +@pytest.mark.mobilenetssd +def test_mobilenetv2_ssd_1x1_tflite(clear_pybuda): + run_mobilenetv2_ssd_1x1_tflite()