2312 TT-BUDA release alignment (#3)

* initial track_pybuda; 2313 rc * fix typo in ViLT tests * Modify test case paths * Modify clean up command to include onnx and tflite file formats * Fix ONNX download paths for ResNet and RetinaNet * Add NotImplemented error to Fuyu-8B model * Fix ONNX model paths * Add clean up for .h5 files * Remove .png files from clean up * Add wideresnet in model_demos * Add Xception in model_demos * Add GhostNet in model_demos * Fix model demos table * Fix WideResNet and Xception file paths * Stream image and label files * Patch Xception variant for GS silicon * Skip Fuyu-8B (WIP) * Remove commented code
tenstorrent · Jan 5, 2024 · bb43aa2 · bb43aa2
1 parent 79d6d42
commit bb43aa2
Show file tree

Hide file tree

Showing 69 changed files with 2,921 additions and 87 deletions.
diff --git a/model_demos/Makefile b/model_demos/Makefile
@@ -41,4 +41,8 @@ clean_tt:
 	@find . | grep -E ".pkl_memoize_py3" | xargs rm -rf
 	@find . | grep -E "generated_modules" | xargs rm -rf
 	@find . | grep -E "tt_build" | xargs rm -rf
+	@find . -type f -name "*.onnx" | xargs rm -f
+	@find . -type f -name "*.tflite" | xargs rm -f
+	@find . -type f -name "*.h5" | xargs rm -f
+	@find . -type f -name "*.png" | xargs rm -f
 	@echo "All done cleaning TT files!"
diff --git a/model_demos/README.md b/model_demos/README.md
@@ -31,37 +31,51 @@ python cv_demos/resnet/pytorch_resnet.py
 |-------------------------------------------|:--------:|
 |   [ALBERT](nlp_demos/albert/)            |     GS, WH   |
 |   [Autoencoder](cv_demos/autoencoder/)  |     GS, WH   |
+|   [BeiT](nlp_demos/beit/)                |     GS, WH   |
 |   [BERT](nlp_demos/bert/)                |     GS, WH   |
 |   [CLIP](cv_demos/clip/)                |     GS, WH   |
 |   [CodeGen](nlp_demos/codegen/)          |     GS, WH   |
 |   [DeiT](cv_demos/deit/)                |     GS, WH   |
 |   [DenseNet](cv_demos/densenet/)        |     GS, WH   |
 |   [DistilBERT](nlp_demos/distilbert/)    |     GS, WH   |
 |   [DPR](nlp_demos/dpr/)                  |     GS, WH   |
-|   [Falcon](nlp_demos/falcon/)               |    WH   |
+|   [EfficientNet-Lite](cv_demos/efficientnet_lite/) |     WH   |
+|   [Falcon-7B](nlp_demos/falcon/)               |    WH   |
 |   [FLAN-T5](nlp_demos/flant5/)           |     GS, WH   |
+|   [Fuyu-8B](nlp_demos/fuyu8b/)          |       |
+|   [GhostNet](cv_demos/ghostnet/)         |     GS, WH   |
 |   [GoogLeNet](cv_demos/googlenet/)      |     GS, WH   |
 |   [GPT-2](nlp_demos/gpt2/)               |     GS, WH   |
 |   [GPT Neo](nlp_demos/gptneo/)           |     GS, WH   |
+|   [Hand Landmark](nlp_demos/landmark/)  |    WH   |
 |   [HRNet](cv_demos/hrnet/)              |     GS, WH   |
 |   [Inception-v4](cv_demos/inceptionv4/) |    GS, WH   |
-|   [MobileNetV1](cv_demos/mobilenetv1/)  |     GS, WH   |
-|   [MobileNetV2](cv_demos/mobilenetv2/)  |     GS, WH   |
-|   [MobileNetV3](cv_demos/mobilenetv3/)  |     GS, WH   |
+|   [MLP-Mixer](cv_demos/mlpmixer/)  |     GS, WH   |
+|   [MobileNetSSD](cv_demos/mobilenet_ssd/)  |     WH   |
+|   [MobileNetV1](cv_demos/mobilenet_v1/)  |     GS, WH   |
+|   [MobileNetV2](cv_demos/mobilenet_v2/)  |     GS, WH   |
+|   [MobileNetV3](cv_demos/mobilenet_v3/)  |     GS, WH   |
+|   [OpenPose](nlp_demos/openpose/)          |     GS, WH   |
 |   [OPT](nlp_demos/opt/)                  |     GS, WH   |
+|   [Pose Landmark](nlp_demos/landmark/)  |    WH   |
 |   [ResNet](cv_demos/resnet/)            |     GS, WH   |
 |   [ResNeXt](cv_demos/resnext/)          |     GS, WH   |
+|   [RetinaNet](cv_demos/retinanet/)      |     GS, WH   |
 |   [RoBERTa](nlp_demos/roberta/)          |     GS, WH   |
 |   [SqueezeBERT](nlp_demos/squeezebert/)  |     GS, WH   |
 |   [Stable Diffusion](cv_demos/stable_diffusion/)    |    WH   |
 |   [T5](nlp_demos/t5/)                    |     GS, WH   |
 |   [U-Net](cv_demos/unet/)               |    GS, WH   |
 |   [VGG](cv_demos/vgg/)                  |     GS, WH   |
 |   [ViT](cv_demos/vit/)                  |     GS, WH   |
+|   [ViLT](cv_demos/vilt/)                  |     GS, WH   |
 |   [VoVNet](cv_demos/vovnet/)            |     GS, WH   |
+|   [WideResNet](cv_demos/wideresnet/)      |     GS, WH   |
 |   [Whisper](audio_demos/whisper/)          |     GS, WH   |
+|   [Xception](cv_demos/xception/)        |     GS, WH   |
 |   [XGLM](nlp_demos/xglm/)                |     GS, WH   |
-|   [YOLOv5](cv_demos/yolov5/)            |     GS, WH   |
+|   [YOLOv3](cv_demos/yolo_v3/)            |     GS, WH   |
+|   [YOLOv5](cv_demos/yolo_v5/)            |     GS, WH   |
 
 ## Contributing
 

diff --git a/model_demos/audio_demos/whisper/pytorch_whisper_generation.py b/model_demos/audio_demos/whisper/pytorch_whisper_generation.py
@@ -18,9 +18,11 @@ def run_whisper_generation(variant="openai/whisper-small"):
     compiler_cfg.amp_level = 2
     compiler_cfg.enable_enumerate_u_kt = False
     compiler_cfg.default_df_override = pybuda._C.DataFormat.Float16_b
+    os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1"
     if "small" in variant:
         os.environ["PYBUDA_NLP_MANUAL_TARGET"] = "35000"
-
+    elif "medium" in variant or "large" in variant:
+        os.environ["TT_BACKEND_OVERLAY_MAX_EXTRA_BLOB_SIZE"] = "65536"
     available_devices = pybuda.detect_available_devices()
     if available_devices[0] == BackendDevice.Grayskull:
         softmax_ops_to_override = [

diff --git a/model_demos/cv_demos/beit/pytorch_beit_classify_16_224_hf.py b/model_demos/cv_demos/beit/pytorch_beit_classify_16_224_hf.py
@@ -0,0 +1,57 @@
+# BeiT Model Demo
+
+import os
+
+import pybuda
+import requests
+from PIL import Image
+from pybuda._C.backend_api import BackendDevice
+from transformers import BeitForImageClassification, BeitImageProcessor
+
+
+def run_beit_classify_224_hf_pytorch(variant="microsoft/beit-base-patch16-224"):
+
+    # Set PyBuda configuration parameters
+    compiler_cfg = pybuda.config._get_global_compiler_config()
+    available_devices = pybuda.detect_available_devices()
+
+    compiler_cfg.enable_t_streaming = True
+    if variant == "microsoft/beit-base-patch16-224":
+        compiler_cfg.retain_tvm_python_files = True
+        compiler_cfg.enable_tvm_constant_prop = True
+        if available_devices[0] == BackendDevice.Grayskull:
+            os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1"
+    elif variant == "microsoft/beit-large-patch16-224":
+        if available_devices[0] == BackendDevice.Grayskull:
+            compiler_cfg.retain_tvm_python_files = True
+            compiler_cfg.enable_tvm_constant_prop = True
+            os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1"
+        else:
+            compiler_cfg.default_df_override = pybuda._C.DataFormat.Float16_b
+
+    # Create PyBuda module from PyTorch model
+    image_processor = BeitImageProcessor.from_pretrained(variant)
+    model = BeitForImageClassification.from_pretrained(variant)
+    tt_model = pybuda.PyTorchModule("pt_beit_classif_16_224", model)
+
+    # Get sample image
+    url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+    sample_image = Image.open(requests.get(url, stream=True).raw)
+
+    # Preprocessing
+    img_tensor = image_processor(sample_image, return_tensors="pt").pixel_values
+
+    # Run inference on Tenstorrent device
+    output_q = pybuda.run_inference(tt_model, inputs=([img_tensor]))
+    output = output_q.get()[0].value().detach().float().numpy()
+
+    # Postprocessing
+    predicted_class_idx = output.argmax(-1).item()
+
+    # Print output
+    print("Predicted class:", predicted_class_idx)
+    print(model.config.id2label[predicted_class_idx])
+
+
+if __name__ == "__main__":
+    run_beit_classify_224_hf_pytorch()
diff --git a/model_demos/cv_demos/clip/pytorch_clip.py b/model_demos/cv_demos/clip/pytorch_clip.py
@@ -5,7 +5,9 @@
 import torch
 from PIL import Image
 from transformers import CLIPModel, CLIPProcessor
-from transformers.models.clip.modeling_clip import _expand_mask, _make_causal_mask
+
+# from transformers.models.clip.modeling_clip import _expand_mask, _make_causal_mask
+from transformers.modeling_attn_mask_utils import _create_4d_causal_attention_mask, _prepare_4d_attention_mask
 
 
 class CLIPVisionWrapper(torch.nn.Module):
@@ -32,11 +34,15 @@ def forward(self, input_ids, attention_mask):
         hidden_states = self.clip_model.text_model.embeddings(input_ids=input_ids, position_ids=None)
 
         bsz, seq_len = input_shape
-        causal_attention_mask = _make_causal_mask(input_shape, hidden_states.dtype, device=hidden_states.device)
+        # CLIP's text model uses causal mask, prepare it here.
+        # https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324
+        causal_attention_mask = _create_4d_causal_attention_mask(
+            input_shape, hidden_states.dtype, device=hidden_states.device
+        )
         # expand attention_mask
         if attention_mask is not None:
             # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-            attention_mask = _expand_mask(attention_mask, hidden_states.dtype)
+            attention_mask = _prepare_4d_attention_mask(attention_mask, hidden_states.dtype)
 
         encoder_outputs = self.clip_model.text_model.encoder(
             inputs_embeds=hidden_states,

diff --git a/model_demos/cv_demos/densenet/pytorch_densenet.py b/model_demos/cv_demos/densenet/pytorch_densenet.py
@@ -50,14 +50,13 @@ def run_densenet_pytorch(variant="densenet121"):
             if available_devices[0] == BackendDevice.Wormhole_B0:
                 compiler_cfg.default_dram_parameters = False
                 compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b
-            elif available_devices[0] == BackendDevice.Grayskull:
-                os.environ["PYBUDA_RIBBON2"] = "1"
 
     elif variant == "densenet161":
         compiler_cfg.balancer_policy = "CNN"
         compiler_cfg.enable_t_streaming = True
         compiler_cfg.place_on_new_epoch("concatenate_131.dc.sparse_matmul.7.lc2")
         os.environ["PYBUDA_DISABLE_CONSTANT_FOLDING"] = "1"
+        os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1"
         os.environ["PYBUDA_GRAPHSOLVER_SELF_CUT_TYPE"] = "ConsumerOperandDataEdgesFirst"
         # Device specific configurations
         available_devices = pybuda.detect_available_devices()

diff --git a/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite0_1x1.py
@@ -0,0 +1,63 @@
+# EfficientNet-Lite0 1x1 demo
+
+import os
+import shutil
+import tarfile
+
+import pybuda
+import requests
+import torch
+from pybuda import TFLiteModule
+from pybuda._C.backend_api import BackendDevice
+
+
+def run_efficientnet_lite0_1x1():
+
+    # Device specific configurations
+    available_devices = pybuda.detect_available_devices()
+    if available_devices:
+        if available_devices[0] != BackendDevice.Wormhole_B0:
+            raise NotImplementedError("Model not supported on Grayskull")
+
+    # Set PyBuda configuration parameters
+    compiler_cfg = pybuda.config._get_global_compiler_config()
+    compiler_cfg.balancer_policy = "Ribbon"
+    compiler_cfg.enable_t_streaming = True
+    compiler_cfg.enable_tvm_constant_prop = True
+    compiler_cfg.graph_solver_self_cut_type = "FastCut"
+    compiler_cfg.default_df_override = pybuda.DataFormat.Float16
+
+    # Set PyBDUA environment variable
+    os.environ["PYBUDA_OVERRIDE_DEVICE_YAML"] = "wormhole_b0_1x1.yaml"
+    os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1"
+
+    # Download model weights
+    MODEL = "efficientnet-lite0"
+    url = f"https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/lite/{MODEL}.tar.gz"
+    extract_to = "cv_demos/efficientnet_lite"
+    file_name = url.split("/")[-1]
+    response = requests.get(url, stream=True)
+    with open(file_name, "wb") as f:
+        f.write(response.content)
+    with tarfile.open(file_name, "r:gz") as tar:
+        tar.extractall(path=extract_to)
+    os.remove(file_name)
+
+    # Load model path
+    tflite_path = f"cv_demos/efficientnet_lite/{MODEL}/{MODEL}-fp32.tflite"
+    tt_model = TFLiteModule("tflite_efficientnet_lite0", tflite_path)
+
+    # Run inference on Tenstorrent device
+    input_shape = (1, 224, 224, 3)
+    input_tensor = torch.rand(input_shape)
+
+    output_q = pybuda.run_inference(tt_model, inputs=([input_tensor]))
+    output = output_q.get()[0].value().detach().float().numpy()
+    print(output)
+
+    # Remove remanent files
+    shutil.rmtree(extract_to + "/" + MODEL)
+
+
+if __name__ == "__main__":
+    run_efficientnet_lite0_1x1()
diff --git a/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py b/model_demos/cv_demos/efficientnet_lite/tflite_efficientnet_lite4_1x1.py
@@ -0,0 +1,63 @@
+# EfficientNet-Lite4 1x1 demo
+
+import os
+import shutil
+import tarfile
+
+import pybuda
+import requests
+import torch
+from pybuda import TFLiteModule
+from pybuda._C.backend_api import BackendDevice
+
+
+def run_efficientnet_lite4_1x1():
+
+    # Device specific configurations
+    available_devices = pybuda.detect_available_devices()
+    if available_devices:
+        if available_devices[0] != BackendDevice.Wormhole_B0:
+            raise NotImplementedError("Model not supported on Grayskull")
+
+    # Set PyBuda configuration parameters
+    compiler_cfg = pybuda.config._get_global_compiler_config()
+    compiler_cfg.balancer_policy = "Ribbon"
+    compiler_cfg.enable_t_streaming = True
+    compiler_cfg.enable_tvm_constant_prop = True
+    compiler_cfg.graph_solver_self_cut_type = "FastCut"
+    compiler_cfg.default_df_override = pybuda.DataFormat.Float16
+
+    # Set PyBDUA environment variable
+    os.environ["PYBUDA_OVERRIDE_DEVICE_YAML"] = "wormhole_b0_1x1.yaml"
+    os.environ["PYBUDA_FORCE_CONV_MULTI_OP_FRACTURE"] = "1"
+
+    # Download model weights
+    MODEL = "efficientnet-lite4"
+    url = f"https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/lite/{MODEL}.tar.gz"
+    extract_to = "cv_demos/efficientnet_lite"
+    file_name = url.split("/")[-1]
+    response = requests.get(url, stream=True)
+    with open(file_name, "wb") as f:
+        f.write(response.content)
+    with tarfile.open(file_name, "r:gz") as tar:
+        tar.extractall(path=extract_to)
+    os.remove(file_name)
+
+    # Load model path
+    tflite_path = f"cv_demos/efficientnet_lite/{MODEL}/{MODEL}-fp32.tflite"
+    tt_model = TFLiteModule("tflite_efficientnet_lite4", tflite_path)
+
+    # STEP 3: Run inference on Tenstorrent device
+    input_shape = (1, 320, 320, 3)
+    input_tensor = torch.rand(input_shape)
+
+    output_q = pybuda.run_inference(tt_model, inputs=([input_tensor]))
+    output = output_q.get()[0].value().detach().float().numpy()
+    print(output)
+
+    # Remove remanent files
+    shutil.rmtree(extract_to + "/" + MODEL)
+
+
+if __name__ == "__main__":
+    run_efficientnet_lite4_1x1()
diff --git a/model_demos/cv_demos/ghostnet/timm_ghostnet.py b/model_demos/cv_demos/ghostnet/timm_ghostnet.py
@@ -0,0 +1,53 @@
+# Ghostnet
+
+import os
+import urllib
+
+import pybuda
+import requests
+import timm
+import torch
+from PIL import Image
+
+
+def run_ghostnet_timm():
+    # Set PyBuda configuration parameters
+    compiler_cfg = pybuda.config._get_global_compiler_config()  # load global compiler config object
+    compiler_cfg.enable_t_streaming = True
+    compiler_cfg.balancer_policy = "Ribbon"
+    compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b
+    os.environ["PYBUDA_RIBBON2"] = "1"
+
+    model = timm.create_model("ghostnet_100", pretrained=True)
+
+    # Create PyBuda module from PyTorch model
+    tt_model = pybuda.PyTorchModule("ghostnet_100_timm_pt", model)
+
+    data_config = timm.data.resolve_data_config({}, model=model)
+    transforms = timm.data.create_transform(**data_config)
+
+    url = "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg"
+    img = Image.open(requests.get(url, stream=True).raw).convert("RGB")
+    img_tensor = transforms(img).unsqueeze(0)
+
+    # Run inference on Tenstorrent device
+    output_q = pybuda.run_inference(tt_model, inputs=([img_tensor]))
+    output = output_q.get()[0].value()
+
+    top5_probabilities, top5_class_indices = torch.topk(output.softmax(dim=1) * 100, k=5)
+
+    # Get imagenet class mappings
+    url = "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt"
+    image_classes = urllib.request.urlopen(url)
+    categories = [s.decode("utf-8").strip() for s in image_classes.readlines()]
+
+    for i in range(top5_probabilities.size(1)):
+        class_idx = top5_class_indices[0, i].item()
+        class_prob = top5_probabilities[0, i].item()
+        class_label = categories[class_idx]
+
+        print(f"{class_label} : {class_prob}")
+
+
+if __name__ == "__main__":
+    run_ghostnet_timm()
diff --git a/model_demos/cv_demos/hrnet/pytorch_hrnet_osmr.py b/model_demos/cv_demos/hrnet/pytorch_hrnet_osmr.py
@@ -1,5 +1,6 @@
 # HRNet Demo Script
 
+import os
 import urllib
 
 import pybuda
@@ -22,6 +23,7 @@ def run_hrnet_osmr_pytorch(variant="hrnet_w18_small_v1"):
     compiler_cfg.balancer_policy = "CNN"
     compiler_cfg.enable_t_streaming = True
     compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b
+    os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1"
 
     # Variant specific configurations
     if variant in ["hrnetv2_w44", "hrnetv2_w48"]:

diff --git a/model_demos/cv_demos/hrnet/pytorch_hrnet_timm.py b/model_demos/cv_demos/hrnet/pytorch_hrnet_timm.py
@@ -1,5 +1,6 @@
 # HRNet Demo Script
 
+import os
 import urllib
 
 import pybuda
@@ -23,6 +24,7 @@ def run_hrnet_timm_pytorch(variant="hrnet_w18_small"):
     compiler_cfg.balancer_policy = "CNN"
     compiler_cfg.enable_t_streaming = True
     compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b
+    os.environ["PYBUDA_LEGACY_UBLOCK_SHAPE"] = "1"
 
     # Variant specific configurations
     if variant == "hrnet_w48":