From 3127725f9c769945309df5c979abe55cc145aea8 Mon Sep 17 00:00:00 2001 From: Charles Frye Date: Fri, 22 Nov 2024 09:45:26 -0800 Subject: [PATCH] stop slandering H100s (#992) Messed up my SI prefixes -- peta is quadrillion, not trillion --- 06_gpu_and_ml/llm-serving/trtllm_llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/06_gpu_and_ml/llm-serving/trtllm_llama.py b/06_gpu_and_ml/llm-serving/trtllm_llama.py index add12a5bf..571902e0f 100644 --- a/06_gpu_and_ml/llm-serving/trtllm_llama.py +++ b/06_gpu_and_ml/llm-serving/trtllm_llama.py @@ -151,7 +151,7 @@ def download_model(): # NVIDIA's Ada Lovelace/Hopper chips, like the 4090, L40S, and H100, # are capable of native calculations in 8bit floating point numbers, so we choose that as our quantization format (`qformat`). # These GPUs are capable of twice as many floating point operations per second in 8bit as in 16bit -- -# about a trillion per second on an H100. +# about two quadrillion per second on an H100 SXM. N_GPUS = 1 # Heads up: this example has not yet been tested with multiple GPUs GPU_CONFIG = modal.gpu.H100(count=N_GPUS)