From 057879cc3bb7c50678c3b642979c444c9b7578e8 Mon Sep 17 00:00:00 2001 From: Pate Motter Date: Thu, 21 Nov 2024 21:35:04 +0000 Subject: [PATCH] Formatting fixes. --- ..._inference_offline_benchmark_gce_config.py | 24 ---- .../maxtext_inference_offline_benchmark.py | 118 +++++++++--------- 2 files changed, 58 insertions(+), 84 deletions(-) delete mode 100644 dags/inference/configs/maxtext_inference_offline_benchmark_gce_config.py diff --git a/dags/inference/configs/maxtext_inference_offline_benchmark_gce_config.py b/dags/inference/configs/maxtext_inference_offline_benchmark_gce_config.py deleted file mode 100644 index 124d83e2..00000000 --- a/dags/inference/configs/maxtext_inference_offline_benchmark_gce_config.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""A DAG to run MaxText offline benchmarks.""" - -import datetime -import json -from typing import Dict -from xlml.apis import gcp_config, metric_config, task, test_config -from dags import test_owner -from dags.multipod.configs import common -from dags.vm_resource import TpuVersion, Project, RuntimeVersion - diff --git a/dags/inference/maxtext_inference_offline_benchmark.py b/dags/inference/maxtext_inference_offline_benchmark.py index 6dcc07d8..48475e30 100644 --- a/dags/inference/maxtext_inference_offline_benchmark.py +++ b/dags/inference/maxtext_inference_offline_benchmark.py @@ -30,7 +30,7 @@ RUNTIME_IMAGE = RuntimeVersion.V2_ALPHA_TPUV6.value GCS_SUBFOLDER_PREFIX = test_owner.Team.INFERENCE.value -create_converter_script = r'''cat > convert_logs.py << 'EOL' +create_mlperf_log_converter_script = r"""cat > convert_logs.py << 'EOL' import json import re import jsonlines @@ -74,7 +74,7 @@ def convert_mlperf_log_to_jsonlines(log_file_path: str, output_path: str): if __name__ == "__main__": convert_mlperf_log_to_jsonlines("./mlperf_log_detail.txt", "metric_report.jsonl") -EOL''' +EOL""" def maxtext_inference_offline_benchmark_config( @@ -93,73 +93,73 @@ def maxtext_inference_offline_benchmark_config( maxtext_branch: str = "", ): job_gcp_config = gcp_config.GCPConfig( - project_name=project_name, - zone=tpu_zone, - dataset_name=metric_config.DatasetOption.BENCHMARK_DATASET, + project_name=project_name, + zone=tpu_zone, + dataset_name=metric_config.DatasetOption.BENCHMARK_DATASET, ) git_clone_maxtext = "git clone https://github.com/google/maxtext.git" if maxtext_branch: git_clone_maxtext += f" -b {maxtext_branch}" set_up_cmds = ( - "pip install --upgrade pip", - "sudo apt-get -y update", - "sudo apt-get -y install python3.10-venv", - "sudo apt-get -y install jq", - "python -m venv .env", - "source .env/bin/activate", - # Setup MaxText - git_clone_maxtext, - f"cd maxtext && bash setup.sh MODE={test_mode.value} && cd ..", - "pip install torch --index-url https://download.pytorch.org/whl/cpu", - # Setup Loadgen - "git clone https://github.com/mlcommons/inference.git", - "cd inference/loadgen && pip install . && cd ../.." + "pip install --upgrade pip", + "sudo apt-get -y update", + "sudo apt-get -y install python3.10-venv", + "sudo apt-get -y install jq", + "python -m venv .env", + "source .env/bin/activate", + # Setup MaxText + git_clone_maxtext, + f"cd maxtext && bash setup.sh MODE={test_mode.value} && cd ..", + "pip install torch --index-url https://download.pytorch.org/whl/cpu", + # Setup Loadgen + "git clone https://github.com/mlcommons/inference.git", + "cd inference/loadgen && pip install . && cd ../..", ) run_model_cmds = ( - "source .env/bin/activate", - "cd maxtext/MaxText/inference_mlperf/trillium", - "gsutil cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl /tmp/processed-data.pkl", - "export DATA_DISK_DIR=/tmp", - "export CHECKPOINT=gs://inference-benchmarks/models/llama2-70b-chat/quant/int8_", - "export TOKENIZER_PATH=/home/ml-auto-solutions/maxtext/assets/tokenizer.llama2", - "echo $TOKENIZER_PATH", - "bash benchmarks_llama2-70b-trillium_2x4.sh -x -t -s", - "cp /tmp/logs/*/mlperf_log_detail.txt ./", - create_converter_script, - "python3 convert_logs.py", - "cat metric_report.jsonl", - f"gsutil cp metric_report.jsonl {metric_config.SshEnvVars.GCS_OUTPUT.value}", + "source .env/bin/activate", + "cd maxtext/MaxText/inference_mlperf/trillium", + "gsutil cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl /tmp/processed-data.pkl", + "export DATA_DISK_DIR=/tmp", + "export CHECKPOINT=gs://inference-benchmarks/models/llama2-70b-chat/quant/int8_", + "export TOKENIZER_PATH=/home/ml-auto-solutions/maxtext/assets/tokenizer.llama2", + "echo $TOKENIZER_PATH", + "bash benchmarks_llama2-70b-trillium_2x4.sh -x -t -s", + "cp /tmp/logs/*/mlperf_log_detail.txt ./", + create_mlperf_log_converter_script, + "python3 convert_logs.py", + "cat metric_report.jsonl", + f"gsutil cp metric_report.jsonl {metric_config.SshEnvVars.GCS_OUTPUT.value}", ) job_test_config = test_config.TpuVmTest( - test_config.Tpu( - version=tpu_version, - cores=tpu_cores, - runtime_version=runtime_version, - reserved=is_tpu_reserved, - network=network, - subnetwork=subnetwork, - ), - test_name=test_name, - set_up_cmds=set_up_cmds, - run_model_cmds=run_model_cmds, - timeout=datetime.timedelta(minutes=time_out_in_min), - task_owner=test_owner.PATE_M, - num_slices=num_slices, - gcs_subfolder=f"{GCS_SUBFOLDER_PREFIX}/maxtext", + test_config.Tpu( + version=tpu_version, + cores=tpu_cores, + runtime_version=runtime_version, + reserved=is_tpu_reserved, + network=network, + subnetwork=subnetwork, + ), + test_name=test_name, + set_up_cmds=set_up_cmds, + run_model_cmds=run_model_cmds, + timeout=datetime.timedelta(minutes=time_out_in_min), + task_owner=test_owner.PATE_M, + num_slices=num_slices, + gcs_subfolder=f"{GCS_SUBFOLDER_PREFIX}/maxtext", ) job_metric_config = metric_config.MetricConfig( - json_lines=metric_config.JSONLinesConfig("metric_report.jsonl"), - use_runtime_generated_gcs_folder=True, + json_lines=metric_config.JSONLinesConfig("metric_report.jsonl"), + use_runtime_generated_gcs_folder=True, ) return task.run_queued_resource_test( - task_test_config=job_test_config, - task_gcp_config=job_gcp_config, - task_metric_config=job_metric_config, + task_test_config=job_test_config, + task_gcp_config=job_gcp_config, + task_metric_config=job_metric_config, ) @@ -176,15 +176,14 @@ def maxtext_inference_offline_benchmark_config( dag_id = "maxtext_inference_offline_benchmark" with models.DAG( - dag_id=dag_id, - tags=tags, - start_date=datetime.datetime(2024, 1, 19), - schedule=None, - catchup=False, + dag_id=dag_id, + tags=tags, + start_date=datetime.datetime(2024, 1, 19), + schedule=None, + catchup=False, ) as dag: test_name_prefix = dag_id - maxtext_offline_benchmark = ( - maxtext_inference_offline_benchmark_config( + maxtext_offline_benchmark = maxtext_inference_offline_benchmark_config( tpu_version=TpuVersion.TRILLIUM, tpu_cores=8, tpu_zone=Zone.EUROPE_WEST4_A.value, @@ -196,6 +195,5 @@ def maxtext_inference_offline_benchmark_config( network=V6E_GCE_NETWORK, subnetwork=V6E_GCE_SUBNETWORK, is_tpu_reserved=True, - maxtext_branch="patemotter-offline-benchmark", - ) + maxtext_branch="", )