Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for CI/CD compatibility #446

Merged
merged 7 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -263,78 +263,78 @@
"text": [
"Epoch 1\n",
"-------------------------------\n",
"loss: 2.299719 [ 64/60000]\n",
"loss: 2.293332 [ 6464/60000]\n",
"loss: 2.269917 [12864/60000]\n",
"loss: 2.260744 [19264/60000]\n",
"loss: 2.247810 [25664/60000]\n",
"loss: 2.222256 [32064/60000]\n",
"loss: 2.225422 [38464/60000]\n",
"loss: 2.195026 [44864/60000]\n",
"loss: 2.194622 [51264/60000]\n",
"loss: 2.158175 [57664/60000]\n",
"loss: 2.301038 [ 64/60000]\n",
"loss: 2.289769 [ 6464/60000]\n",
"loss: 2.268618 [12864/60000]\n",
"loss: 2.264085 [19264/60000]\n",
"loss: 2.244277 [25664/60000]\n",
"loss: 2.209504 [32064/60000]\n",
"loss: 2.220515 [38464/60000]\n",
"loss: 2.185288 [44864/60000]\n",
"loss: 2.186121 [51264/60000]\n",
"loss: 2.149065 [57664/60000]\n",
"Test Error: \n",
" Accuracy: 47.1%, Avg loss: 2.153042 \n",
" Accuracy: 37.8%, Avg loss: 2.151644 \n",
"\n",
"Epoch 2\n",
"-------------------------------\n",
"loss: 2.162534 [ 64/60000]\n",
"loss: 2.154336 [ 6464/60000]\n",
"loss: 2.091042 [12864/60000]\n",
"loss: 2.104471 [19264/60000]\n",
"loss: 2.054451 [25664/60000]\n",
"loss: 2.001035 [32064/60000]\n",
"loss: 2.025180 [38464/60000]\n",
"loss: 1.949615 [44864/60000]\n",
"loss: 1.957106 [51264/60000]\n",
"loss: 1.876436 [57664/60000]\n",
"loss: 2.164946 [ 64/60000]\n",
"loss: 2.157853 [ 6464/60000]\n",
"loss: 2.100765 [12864/60000]\n",
"loss: 2.117897 [19264/60000]\n",
"loss: 2.058581 [25664/60000]\n",
"loss: 1.995217 [32064/60000]\n",
"loss: 2.026708 [38464/60000]\n",
"loss: 1.948186 [44864/60000]\n",
"loss: 1.959582 [51264/60000]\n",
"loss: 1.881658 [57664/60000]\n",
"Test Error: \n",
" Accuracy: 54.6%, Avg loss: 1.876885 \n",
" Accuracy: 52.5%, Avg loss: 1.886264 \n",
"\n",
"Epoch 3\n",
"-------------------------------\n",
"loss: 1.906243 [ 64/60000]\n",
"loss: 1.879715 [ 6464/60000]\n",
"loss: 1.758657 [12864/60000]\n",
"loss: 1.795318 [19264/60000]\n",
"loss: 1.692177 [25664/60000]\n",
"loss: 1.652430 [32064/60000]\n",
"loss: 1.669603 [38464/60000]\n",
"loss: 1.583420 [44864/60000]\n",
"loss: 1.603508 [51264/60000]\n",
"loss: 1.493881 [57664/60000]\n",
"loss: 1.922469 [ 64/60000]\n",
"loss: 1.893279 [ 6464/60000]\n",
"loss: 1.780482 [12864/60000]\n",
"loss: 1.822908 [19264/60000]\n",
"loss: 1.696129 [25664/60000]\n",
"loss: 1.653140 [32064/60000]\n",
"loss: 1.675662 [38464/60000]\n",
"loss: 1.584822 [44864/60000]\n",
"loss: 1.609127 [51264/60000]\n",
"loss: 1.500899 [57664/60000]\n",
"Test Error: \n",
" Accuracy: 61.9%, Avg loss: 1.514976 \n",
" Accuracy: 60.3%, Avg loss: 1.521902 \n",
"\n",
"Epoch 4\n",
"-------------------------------\n",
"loss: 1.573342 [ 64/60000]\n",
"loss: 1.548722 [ 6464/60000]\n",
"loss: 1.402007 [12864/60000]\n",
"loss: 1.461628 [19264/60000]\n",
"loss: 1.353920 [25664/60000]\n",
"loss: 1.358175 [32064/60000]\n",
"loss: 1.361608 [38464/60000]\n",
"loss: 1.302804 [44864/60000]\n",
"loss: 1.330850 [51264/60000]\n",
"loss: 1.224925 [57664/60000]\n",
"loss: 1.593910 [ 64/60000]\n",
"loss: 1.555975 [ 6464/60000]\n",
"loss: 1.412051 [12864/60000]\n",
"loss: 1.480928 [19264/60000]\n",
"loss: 1.348195 [25664/60000]\n",
"loss: 1.352939 [32064/60000]\n",
"loss: 1.361179 [38464/60000]\n",
"loss: 1.298819 [44864/60000]\n",
"loss: 1.325064 [51264/60000]\n",
"loss: 1.226879 [57664/60000]\n",
"Test Error: \n",
" Accuracy: 63.8%, Avg loss: 1.254037 \n",
" Accuracy: 63.2%, Avg loss: 1.254962 \n",
"\n",
"Epoch 5\n",
"-------------------------------\n",
"loss: 1.321162 [ 64/60000]\n",
"loss: 1.315946 [ 6464/60000]\n",
"loss: 1.152864 [12864/60000]\n",
"loss: 1.244943 [19264/60000]\n",
"loss: 1.130193 [25664/60000]\n",
"loss: 1.160290 [32064/60000]\n",
"loss: 1.168214 [38464/60000]\n",
"loss: 1.123758 [44864/60000]\n",
"loss: 1.158085 [51264/60000]\n",
"loss: 1.063427 [57664/60000]\n",
"loss: 1.337471 [ 64/60000]\n",
"loss: 1.314826 [ 6464/60000]\n",
"loss: 1.155245 [12864/60000]\n",
"loss: 1.257553 [19264/60000]\n",
"loss: 1.123370 [25664/60000]\n",
"loss: 1.155071 [32064/60000]\n",
"loss: 1.168100 [38464/60000]\n",
"loss: 1.119365 [44864/60000]\n",
"loss: 1.149572 [51264/60000]\n",
"loss: 1.067573 [57664/60000]\n",
"Test Error: \n",
" Accuracy: 65.1%, Avg loss: 1.089330 \n",
" Accuracy: 64.4%, Avg loss: 1.090368 \n",
"\n",
"Done!\n"
]
Expand Down Expand Up @@ -531,18 +531,42 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 24,
"id": "362b266b",
"metadata": {},
"outputs": [],
"source": [
"import torch_tensorrt as trt\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "f0ac1362",
"metadata": {},
"outputs": [],
"source": [
"# Optional: set the filename for the TensorRT timing cache\n",
"timestamp = time.time()\n",
"timing_cache = f\"/tmp/timing_cache-{timestamp}.bin\"\n",
"with open(timing_cache, \"wb\") as f:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "f3e3bdc4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:torch_tensorrt.dynamo.conversion.aten_ops_converters:Unable to import quantization op. Please install modelopt library (https://github.com/NVIDIA/TensorRT-Model-Optimizer?tab=readme-ov-file#installation) to add support for compiling quantized models\n",
"INFO:torch_tensorrt.dynamo.utils:Using Default Torch-TRT Runtime (as requested by user)\n",
"INFO:torch_tensorrt.dynamo.utils:Device not specified, using Torch default current device - cuda:0. If this is incorrect, please specify an input device, via the device keyword.\n",
"INFO:torch_tensorrt.dynamo.utils:Compilation Settings: CompilationSettings(enabled_precisions={<dtype.f32: 7>}, debug=False, workspace_size=0, min_block_size=5, torch_executed_ops=set(), pass_through_build_failures=False, max_aux_streams=None, version_compatible=False, optimization_level=None, use_python_runtime=False, truncate_double=False, use_fast_partitioner=True, enable_experimental_decompositions=False, device=Device(type=DeviceType.GPU, gpu_id=0), require_full_compilation=False, disable_tf32=False, assume_dynamic_shape_support=False, sparse_weights=False, refit=False, engine_capability=<EngineCapability.STANDARD: 1>, num_avg_timing_iters=1, dla_sram_size=1048576, dla_local_dram_size=1073741824, dla_global_dram_size=536870912, dryrun=False, hardware_compatible=False, timing_cache_path='/tmp/timing_cache.bin')\n",
"INFO:torch_tensorrt.dynamo.utils:Compilation Settings: CompilationSettings(enabled_precisions={<dtype.f32: 7>}, debug=False, workspace_size=0, min_block_size=5, torch_executed_ops=set(), pass_through_build_failures=False, max_aux_streams=None, version_compatible=False, optimization_level=None, use_python_runtime=False, truncate_double=False, use_fast_partitioner=True, enable_experimental_decompositions=False, device=Device(type=DeviceType.GPU, gpu_id=0), require_full_compilation=False, disable_tf32=False, assume_dynamic_shape_support=False, sparse_weights=False, refit=False, engine_capability=<EngineCapability.STANDARD: 1>, num_avg_timing_iters=1, dla_sram_size=1048576, dla_local_dram_size=1073741824, dla_global_dram_size=536870912, dryrun=False, hardware_compatible=False, timing_cache_path='/tmp/timing_cache-1729187850.4862776.bin')\n",
"\n",
"WARNING:torch_tensorrt.dynamo._compiler:Node _param_constant1 of op type get_attr does not have metadata. This could sometimes lead to undefined behavior.\n",
"WARNING:torch_tensorrt.dynamo._compiler:Some nodes do not have metadata (shape and dtype information). This could lead to problems sometimes if the graph has PyTorch and TensorRT segments.\n",
Expand All @@ -552,23 +576,23 @@
"WARNING:py.warnings:/home/rishic/anaconda3/envs/spark-dl-torch/lib/python3.11/site-packages/torch_tensorrt/dynamo/conversion/impl/activation/base.py:40: DeprecationWarning: Use Deprecated in TensorRT 10.1. Superseded by explicit quantization. instead.\n",
" if input_val.dynamic_range is not None and dyn_range_fn is not None:\n",
"\n",
"INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:TRT INetwork construction elapsed time: 0:00:00.005708\n",
"INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:TRT INetwork construction elapsed time: 0:00:00.005662\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Global timing cache in use. Profiling results in this builder pass will be stored.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Detected 1 inputs and 1 output network tensors.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Total Host Persistent Memory: 21984\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Total Device Persistent Memory: 0\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Total Scratch Memory: 0\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[BlockAssignment] Started assigning block shifts. This will take 4 steps to complete.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[BlockAssignment] Algorithm ShiftNTopDown took 0.257559ms to assign 2 blocks to 4 nodes requiring 4096 bytes.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[BlockAssignment] Algorithm ShiftNTopDown took 0.115746ms to assign 2 blocks to 4 nodes requiring 4096 bytes.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Total Activation Memory: 4096\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Total Weights Memory: 2678824\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Engine generation completed in 0.023755 seconds.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Engine generation completed in 1.58824 seconds.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 1 MiB, GPU 5 MiB\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[MemUsageStats] Peak memory usage during Engine building and serialization: CPU: 3800 MiB\n",
"INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Build TRT engine elapsed time: 0:00:00.027501\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[MemUsageStats] Peak memory usage during Engine building and serialization: CPU: 3950 MiB\n",
"INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Build TRT engine elapsed time: 0:00:01.591865\n",
"INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:TRT Engine uses: 2832188 bytes of Memory\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Serialized 26 bytes of code generator cache.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Serialized 176 timing cache entries\n"
"INFO:torch_tensorrt [TensorRT Conversion Context]:Serialized 43 timing cache entries\n"
]
},
{
Expand All @@ -580,8 +604,6 @@
}
],
"source": [
"import torch_tensorrt as trt\n",
"\n",
"inputs_bs1 = torch.randn((1, 784), dtype=torch.float).to(\"cuda\")\n",
"# This indicates dimension 0 of inputs_bs1 is dynamic whose range of values is [1, 50]. No recompilation will happen when the batch size changes.\n",
"torch._dynamo.mark_dynamic(inputs_bs1, 0, min=1, max=64)\n",
Expand All @@ -590,6 +612,7 @@
" ir=\"torch_compile\",\n",
" inputs=inputs_bs1,\n",
" enabled_precisions={torch.float},\n",
" timing_cache_path=timing_cache,\n",
")\n",
"\n",
"stream = torch.cuda.Stream()\n",
Expand All @@ -612,39 +635,39 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 21,
"id": "6b8f1b45",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:torch_tensorrt.dynamo._compiler:Compilation Settings: CompilationSettings(enabled_precisions={<dtype.f32: 7>}, debug=False, workspace_size=0, min_block_size=5, torch_executed_ops=set(), pass_through_build_failures=False, max_aux_streams=None, version_compatible=False, optimization_level=None, use_python_runtime=False, truncate_double=False, use_fast_partitioner=True, enable_experimental_decompositions=False, device=Device(type=DeviceType.GPU, gpu_id=0), require_full_compilation=True, disable_tf32=False, assume_dynamic_shape_support=False, sparse_weights=False, refit=False, engine_capability=<EngineCapability.STANDARD: 1>, num_avg_timing_iters=1, dla_sram_size=1048576, dla_local_dram_size=1073741824, dla_global_dram_size=536870912, dryrun=False, hardware_compatible=False, timing_cache_path='/tmp/timing_cache.bin')\n",
"INFO:torch_tensorrt.dynamo._compiler:Compilation Settings: CompilationSettings(enabled_precisions={<dtype.f32: 7>}, debug=False, workspace_size=0, min_block_size=5, torch_executed_ops=set(), pass_through_build_failures=False, max_aux_streams=None, version_compatible=False, optimization_level=None, use_python_runtime=False, truncate_double=False, use_fast_partitioner=True, enable_experimental_decompositions=False, device=Device(type=DeviceType.GPU, gpu_id=0), require_full_compilation=True, disable_tf32=False, assume_dynamic_shape_support=False, sparse_weights=False, refit=False, engine_capability=<EngineCapability.STANDARD: 1>, num_avg_timing_iters=1, dla_sram_size=1048576, dla_local_dram_size=1073741824, dla_global_dram_size=536870912, dryrun=False, hardware_compatible=False, timing_cache_path='/tmp/timing_cache-1729187850.4862776.bin')\n",
"\n",
"INFO:torch_tensorrt.dynamo._compiler:Partitioning the graph via the fast partitioner\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 624, GPU 715 (MiB)\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[MemUsageChange] Init builder kernel library: CPU +1632, GPU +286, now: CPU 2256, GPU 1001 (MiB)\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 758, GPU 715 (MiB)\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[MemUsageChange] Init builder kernel library: CPU +1633, GPU +286, now: CPU 2391, GPU 1001 (MiB)\n",
"WARNING:py.warnings:/home/rishic/anaconda3/envs/spark-dl-torch/lib/python3.11/site-packages/torch_tensorrt/dynamo/conversion/impl/activation/base.py:40: DeprecationWarning: Use Deprecated in TensorRT 10.1. Superseded by explicit quantization. instead.\n",
" if input_val.dynamic_range is not None and dyn_range_fn is not None:\n",
"\n",
"INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:TRT INetwork construction elapsed time: 0:00:00.004551\n",
"INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:TRT INetwork construction elapsed time: 0:00:00.004664\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Global timing cache in use. Profiling results in this builder pass will be stored.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Detected 1 inputs and 1 output network tensors.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Total Host Persistent Memory: 21984\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Total Device Persistent Memory: 0\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Total Scratch Memory: 0\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[BlockAssignment] Started assigning block shifts. This will take 4 steps to complete.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[BlockAssignment] Algorithm ShiftNTopDown took 0.133258ms to assign 2 blocks to 4 nodes requiring 4096 bytes.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[BlockAssignment] Algorithm ShiftNTopDown took 0.113766ms to assign 2 blocks to 4 nodes requiring 4096 bytes.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Total Activation Memory: 4096\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Total Weights Memory: 2678824\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Engine generation completed in 0.0190609 seconds.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Engine generation completed in 0.022595 seconds.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 1 MiB, GPU 5 MiB\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[MemUsageStats] Peak memory usage during Engine building and serialization: CPU: 3818 MiB\n",
"INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Build TRT engine elapsed time: 0:00:00.021306\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:[MemUsageStats] Peak memory usage during Engine building and serialization: CPU: 3968 MiB\n",
"INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:Build TRT engine elapsed time: 0:00:00.025016\n",
"INFO:torch_tensorrt.dynamo.conversion._TRTInterpreter:TRT Engine uses: 2833124 bytes of Memory\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Serialized 26 bytes of code generator cache.\n",
"INFO:torch_tensorrt [TensorRT Conversion Context]:Serialized 176 timing cache entries\n"
"INFO:torch_tensorrt [TensorRT Conversion Context]:Serialized 43 timing cache entries\n"
]
},
{
Expand All @@ -662,7 +685,9 @@
"# Produce traced graph in the ExportedProgram format\n",
"exp_program = trt.dynamo.trace(model_from_state, inputs)\n",
"# Compile the traced graph to produce an optimized module\n",
"trt_gm = trt.dynamo.compile(exp_program, inputs=inputs, require_full_compilation=True)\n",
"trt_gm = trt.dynamo.compile(exp_program, \n",
" inputs=inputs, \n",
" timing_cache_path=timing_cache)\n",
"\n",
"stream = torch.cuda.Stream()\n",
"with torch.no_grad(), torch.cuda.stream(stream):\n",
Expand All @@ -681,22 +706,10 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 23,
"id": "d87e4b20",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:py.warnings:/home/rishic/anaconda3/envs/spark-dl-torch/lib/python3.11/site-packages/torch_tensorrt/dynamo/_exporter.py:364: UserWarning: Attempted to insert a get_attr Node with no underlying reference in the owning GraphModule! Call GraphModule.add_submodule to add the necessary submodule, GraphModule.add_parameter to add the necessary Parameter, or nn.Module.register_buffer to add the necessary buffer\n",
" engine_node = gm.graph.get_attr(engine_name)\n",
"\n",
"WARNING:py.warnings:/home/rishic/anaconda3/envs/spark-dl-torch/lib/python3.11/site-packages/torch/fx/graph.py:1545: UserWarning: Node _run_on_acc_0_engine target _run_on_acc_0_engine _run_on_acc_0_engine of does not reference an nn.Module, nn.Parameter, or buffer, which is what 'get_attr' Nodes typically target\n",
" warnings.warn(f'Node {node} target {node.target} {atom} of {seen_qualname} does '\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
Expand Down
Loading