Skip to content

Commit

Permalink
Merge branch 'master' into patemotter_maxtext_offline
Browse files Browse the repository at this point in the history
  • Loading branch information
patemotter authored Dec 6, 2024
2 parents d1a1ac1 + 4afadb4 commit 911a1c3
Show file tree
Hide file tree
Showing 38 changed files with 1,505 additions and 382 deletions.
12 changes: 6 additions & 6 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Default owners for everything in the repo, unless a later match takes precedence.
* @RissyRan @allenwang28
* @mbzomowski @RissyRan @allenwang28

dags/solutions_team/configs/tensorflow @chandrasekhard2 @ZhaoyueCheng
dags/solutions_team/solutionsteam_tf* @chandrasekhard2 @ZhaoyueCheng
dags/solutions_team/configs/tensorflow @chandrasekhard2 @ZhaoyueCheng @richardsliu
dags/solutions_team/solutionsteam_tf* @chandrasekhard2 @ZhaoyueCheng @richardsliu

dags/pytorch_xla @JackCaoG @vanbasten23 @zpcore @ManfeiBai
dags/legacy_test/tests/pytorch @JackCaoG @vanbasten23 @zpcore @ManfeiBai
Expand All @@ -11,8 +11,8 @@ dags/multipod @jonb377 @tonyjohnchen @raymondzouu @gobbleturk @shralex @RissyRan

dags/mlcompass @ortibazar @sganeshb @brajiang @wlzhg

dags/imagegen_devx @RissyRan @parambole @jiangjy1982 @aireenmei @michelle-yooh
dags/imagegen_devx/project_bite* @RissyRan @parambole @jiangjy1982 @aireenmei @michelle-yooh @jiya-zhang
dags/imagegen_devx/configs/project_bite* @RissyRan @parambole @jiangjy1982 @aireenmei @michelle-yooh @jiya-zhang
dags/sparsity_diffusion_devx @RissyRan @parambole @jiangjy1982 @aireenmei @michelle-yooh
dags/sparsity_diffusion_devx/project_bite* @RissyRan @parambole @jiangjy1982 @aireenmei @michelle-yooh @jiya-zhang
dags/sparsity_diffusion_devx/configs/project_bite* @RissyRan @parambole @jiangjy1982 @aireenmei @michelle-yooh @jiya-zhang

dags/inference @yeandy @vipannalla @morgandu @mailvijayasingh @sixiang-google @joezijunzhou @singh-mitali
1 change: 1 addition & 0 deletions .github/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ jsonlines
tensorflow-cpu
kubernetes
pyarrow
apache-airflow-providers-google
5 changes: 5 additions & 0 deletions dags/inference/configs/trt_llm_inference_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def get_trt_llm_gpu_config(
"gsutil -m cp -r gs://tohaowu/llama_3_8B_Instruct_HF_model .",
"gsutil -m cp -r gs://tohaowu/llama_3.1_70B_Instruct_HF_model .",
"gsutil -m cp -r gs://tohaowu/Mixtral-8x22B-Instruct-v0.1 .",
"gsutil -m cp -r gs://yijiaj/gemma/gemma-2-27b-it .",
"sudo apt-get update",
"sudo apt-get -y install git git-lfs",
"git clone https://github.com/NVIDIA/TensorRT-LLM.git",
Expand Down Expand Up @@ -103,10 +104,14 @@ def get_trt_llm_gpu_config(
"trtllm-build --checkpoint_dir /scratch/tllm_checkpoint_8gpu_tp8 --output_dir /scratch/llama/70B/trt_engines/fp16/8-gpu/ --gemm_plugin auto",
"python ../llama/convert_checkpoint.py --model_dir /scratch/Mixtral-8x22B-Instruct-v0.1 --output_dir /scratch/tllm_checkpoint_mixtral_8gpu --dtype float16 --tp_size 8 --moe_tp_size 2 --moe_ep_size 4",
"trtllm-build --checkpoint_dir /scratch/tllm_checkpoint_mixtral_8gpu --output_dir /scratch/trt_engines/mixtral/tp2ep4",
"cd ../gemma",
"python3 convert_checkpoint.py --ckpt-type hf --model-dir /scratch/gemma-2-27b-it/ --dtype bfloat16 --world-size 1 --output-model-dir /scratch/checkpoints/tmp_27b_it_tensorrt_llm/bf16/tp1/",
"trtllm-build --checkpoint_dir /scratch/checkpoints/tmp_27b_it_tensorrt_llm/bf16/tp1/ --gemm_plugin auto --max_batch_size 8 --max_input_len 3000 --max_seq_len 3100 --output_dir /scratch/gemma2/27b/bf16/1-gpu/",
"cd ../../benchmarks/python",
"python benchmark.py -m dec --engine_dir /scratch/llama/8B/trt_engines/fp16/1-gpu/ --csv",
"OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 mpirun -n 8 python benchmark.py -m dec --engine_dir /scratch/llama/70B/trt_engines/fp16/8-gpu/ --csv",
"OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 mpirun -n 8 python benchmark.py -m dec --engine_dir /scratch/trt_engines/mixtral/tp2ep4 --csv",
"python benchmark.py -m dec --engine_dir /scratch/gemma2/27b/bf16/1-gpu/ --dtype bfloat16 --csv",
make_jsonl_convert_cmd,
f"python jsonl_converter.py {jsonl_output_path}",
)
Expand Down
28 changes: 17 additions & 11 deletions dags/inference/configs/trt_llm_mlperf_v41_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_trt_llm_mlperf_gpu_config(
project: Project,
network: str,
subnetwork: str,
general_configs: Dict = {},
benchmark_configs: Dict = {},
model_parameters: Dict = {},
parameter_positions: Dict = {},
binary_search_steps: int = 1,
Expand All @@ -59,10 +59,11 @@ def get_trt_llm_mlperf_gpu_config(
"sudo chmod a+w /scratch",
"cd /scratch",
# Prepare data
f"gsutil -m cp -n -r gs://yijiaj/mlperf/v41/Google_GPU .",
f"gsutil -m cp -n -r {general_configs['models']} .",
f"gsutil -m cp -n -r {general_configs['preprocessed_data']} .",
f"gsutil -m cp -n -r {general_configs['docker_config']} .",
"gsutil -m cp -n -r gs://yijiaj/mlperf/v41/Google_GPU .",
"gsutil -m cp -n -r gs://tohaowu/mlpinf-v40/mlperf_inf_dlrmv2 .",
f"gsutil -m cp -n -r {benchmark_configs['models']} .",
f"gsutil -m cp -n -r {benchmark_configs['preprocessed_data']} .",
f"gsutil -m cp -n -r {benchmark_configs['docker_config']} .",
"curl -sSL https://get.docker.com/ | sh",
"sudo mkdir -p /home/cloud-ml-auto-solutions/.docker",
"sudo touch ~/.docker/config.json",
Expand All @@ -78,7 +79,7 @@ def get_trt_llm_mlperf_gpu_config(
# Build and launch a docker container
"PARTNER_DROP=1 make prebuild DOCKER_DETACH=1",
"make docker_add_user",
f"make launch_docker DOCKER_NAME={docker_container_name} DOCKER_ARGS='-d'",
f"make launch_docker DOCKER_NAME={docker_container_name} DOCKER_ARGS='-v /scratch/mlperf_inf_dlrmv2:/home/mlperf_inf_dlrmv2 -d'",
)

jsonl_output_path = "metric_report.jsonl"
Expand Down Expand Up @@ -108,17 +109,18 @@ def get_trt_llm_mlperf_gpu_config(
make_jsonl_converter_cmd = f'echo "{py_script}" > jsonl_converter.py'

model_parameters_sweep_cmds = []
for model_name in general_configs["model_name"].split(","):
for model_name in benchmark_configs["model_name"].split(","):
scenario = ",".join(model_parameters[model_name])
if accelerator_type == GpuVersion.L4:
model_parameters_sweep_cmds.append(
f'CUDA_VISIBLE_DEVICES=0 make generate_engines RUN_ARGS=\'--benchmarks={model_name} --scenarios={general_configs["scenario"]}\''
f"CUDA_VISIBLE_DEVICES=0 make generate_engines RUN_ARGS='--benchmarks={model_name} --scenarios={scenario}'"
)
else:
model_parameters_sweep_cmds.append(
f'make generate_engines RUN_ARGS=\'--benchmarks={model_name} --scenarios={general_configs["scenario"]}\''
f"make generate_engines RUN_ARGS='--benchmarks={model_name} --scenarios={scenario}'"
)

for model_name in general_configs["model_name"].split(","):
for model_name in benchmark_configs["model_name"].split(","):
for scenario in model_parameters[model_name]:
for parameter in model_parameters[model_name][scenario]:
steps = 2 ** (binary_search_steps - 1) + 1
Expand Down Expand Up @@ -153,6 +155,8 @@ def get_trt_llm_mlperf_gpu_config(
docker_cmds = [
"make link_dirs",
"make build BUILD_TRTLLM=1",
"pip install huggingface_hub==0.24.7",
"lscpu",
]
if accelerator_type == GpuVersion.L4:
docker_cmds.append(
Expand Down Expand Up @@ -180,7 +184,9 @@ def get_trt_llm_mlperf_gpu_config(
runtime_version=RUNTIME_IMAGE,
network=network,
subnetwork=subnetwork,
attach_local_ssd=True,
attach_local_ssd=True
if accelerator_type != GpuVersion.H100
else False,
disk_size_gb=1000,
),
test_name=test_name,
Expand Down
2 changes: 1 addition & 1 deletion dags/inference/trt_llm_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
# Running on H100 GPU
trt_llm_inference_config.get_trt_llm_gpu_config(
machine_type=MachineVersion.A3_HIGHGPU_8G,
image_project=ImageProject.DEEP_LEARNING_PLATFORM_RELEASE,
image_project=ImageProject.ML_IMAGES,
image_family=ImageFamily.COMMON_CU121_DEBIAN_11,
accelerator_type=GpuVersion.H100,
count=8,
Expand Down
2 changes: 1 addition & 1 deletion dags/inference/trt_llm_mlperf_v40_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
# Running on H100 GPU
trt_llm_mlperf_v40_config.get_trt_llm_mlperf_v40_gpu_config(
machine_type=MachineVersion.A3_HIGHGPU_8G,
image_project=ImageProject.DEEP_LEARNING_PLATFORM_RELEASE,
image_project=ImageProject.ML_IMAGES,
image_family=ImageFamily.COMMON_CU121_DEBIAN_11,
accelerator_type=GpuVersion.H100,
count=8,
Expand Down
Loading

0 comments on commit 911a1c3

Please sign in to comment.