diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb index ce7abec4..ea07ce28 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/image_classification_torch.ipynb @@ -547,7 +547,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Optional: create a unique file for the TensorRT timing cache\n", + "# Optional: set the filename for the TensorRT timing cache\n", "timestamp = time.time()\n", "timing_cache = f\"/tmp/timing_cache-{timestamp}.bin\"\n", "with open(timing_cache, \"wb\") as f:\n", @@ -687,7 +687,6 @@ "# Compile the traced graph to produce an optimized module\n", "trt_gm = trt.dynamo.compile(exp_program, \n", " inputs=inputs, \n", - " require_full_compilation=True, \n", " timing_cache_path=timing_cache)\n", "\n", "stream = torch.cuda.Stream()\n", diff --git a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/regression_torch.ipynb b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/regression_torch.ipynb index d2fd9157..5ccc22ec 100644 --- a/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/regression_torch.ipynb +++ b/examples/ML+DL-Examples/Spark-DL/dl_inference/pytorch/regression_torch.ipynb @@ -539,6 +539,31 @@ "(You may see a warning about modelopt quantization. This is safe to ignore, as [implicit quantization](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#intro-quantization) is deprecated in the latest TensorRT. See [this link](https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/vgg16_fp8_ptq.html) for a guide to explicit quantization.)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ffb27fc", + "metadata": {}, + "outputs": [], + "source": [ + "import torch_tensorrt as trt\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c10f90", + "metadata": {}, + "outputs": [], + "source": [ + "# Optional: set the filename for the TensorRT timing cache\n", + "timestamp = time.time()\n", + "timing_cache = f\"/tmp/timing_cache-{timestamp}.bin\"\n", + "with open(timing_cache, \"wb\") as f:\n", + " pass" + ] + }, { "cell_type": "code", "execution_count": 20, @@ -599,8 +624,6 @@ } ], "source": [ - "import torch_tensorrt as trt\n", - "\n", "inputs_bs1 = torch.randn((10, 8), dtype=torch.float).to(\"cuda\")\n", "# This indicates dimension 0 of inputs_bs1 is dynamic whose range of values is [1, 50]. No recompilation will happen when the batch size changes.\n", "torch._dynamo.mark_dynamic(inputs_bs1, 0, min=1, max=50)\n", @@ -609,6 +632,7 @@ " ir=\"torch_compile\",\n", " inputs=inputs_bs1,\n", " enabled_precisions={torch.float},\n", + " timing_cache_path=timing_cache,\n", ")\n", "\n", "stream = torch.cuda.Stream()\n", @@ -719,7 +743,9 @@ "# Produce traced graph in the ExportedProgram format\n", "exp_program = trt.dynamo.trace(loaded_mlp, inputs)\n", "# Compile the traced graph to produce an optimized module\n", - "trt_gm = trt.dynamo.compile(exp_program, inputs=inputs, device='cuda:0')\n", + "trt_gm = trt.dynamo.compile(exp_program,\n", + " inputs=inputs,\n", + " timing_cache_path=timing_cache)\n", "\n", "stream = torch.cuda.Stream()\n", "with torch.no_grad(), torch.cuda.stream(stream):\n",