diff --git a/.gitignore b/.gitignore index 40f592ce..c1cec71f 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,4 @@ tmp # Build artifacts *.whl +venv diff --git a/dags/inference/maxtext_inference_offline_benchmark.py b/dags/inference/maxtext_inference_offline_benchmark.py new file mode 100644 index 00000000..a5e70b02 --- /dev/null +++ b/dags/inference/maxtext_inference_offline_benchmark.py @@ -0,0 +1,298 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities to construct configs for maxtext offline benchmarks DAG.""" + +import datetime + +from airflow import models +from dags import test_owner, composer_env +from dags.vm_resource import TpuVersion, Zone, Project, RuntimeVersion, V6E_GCE_NETWORK, V6E_GCE_SUBNETWORK +from dags.multipod.configs import common +from dags.multipod.configs.common import SetupMode +from xlml.apis import gcp_config, metric_config, task, test_config + +PROJECT_NAME = Project.CLOUD_TPU_INFERENCE_TEST.value +RUNTIME_IMAGE = RuntimeVersion.V2_ALPHA_TPUV6.value +GCS_SUBFOLDER_PREFIX = test_owner.Team.INFERENCE.value + +# Run once a day at 5 am UTC (9 pm PST) +SCHEDULED_TIME = "0 5 * * *" if composer_env.is_prod_env() else None + + +def get_mlperf_converter_script(): + return """cat << 'EOL' > convert_logs.py +import json +import re +import jsonlines +import pkg_resources +import os +from typing import Optional + +def find_git_dir(start_path: str) -> Optional[str]: + current_path = os.path.abspath(start_path) + while current_path != "/": + git_path = os.path.join(current_path, ".git") + if os.path.exists(git_path) and os.path.isdir(git_path): + return current_path + current_path = os.path.dirname(current_path) + return None + +def get_git_commit(repo_path: Optional[str] = None) -> str: + try: + if repo_path is None: + repo_path = find_git_dir(os.getcwd()) + if repo_path is None: + return "unknown" + + head_path = os.path.join(repo_path, ".git", "HEAD") + with open(head_path, "r") as f: + head_content = f.read().strip() + + if head_content.startswith("ref: "): + ref_path = head_content[5:] + ref_full_path = os.path.join(repo_path, ".git", ref_path) + with open(ref_full_path, "r") as f: + return f.read().strip() + return head_content + except Exception as e: + print(f"Warning: Could not get git commit: {str(e)}") + return "unknown" + +def get_package_version(package_name: str) -> str: + try: + return pkg_resources.get_distribution(package_name).version + except: + return "unknown" + +def convert_mlperf_log_to_jsonlines( + log_file_path: str, + output_path: str, + repo_path: Optional[str] = None +) -> dict: + dimension_keys = { + "loadgen_version", "test_datetime", "requested_scenario", + "requested_test_mode", "effective_scenario", "effective_test_mode", + "power_begin", "power_end", "result_validity", + "early_stopping_ttft_result", "early_stopping_tpot_result" + } + + metrics = {} + dimensions = {} + + with open(log_file_path, "r") as f: + log_content = f.read() + + log_pattern = r":::MLLOG ({.*})" + for line in log_content.split("\\n"): + match = re.search(log_pattern, line) + if match: + try: + entry = json.loads(match.group(1)) + key = entry.get("key", "") + value = entry.get("value") + + if isinstance(value, (int, float)): + metrics[key] = value + elif key in dimension_keys: + dimensions[key] = value + except json.JSONDecodeError: + continue + + dimensions["maxtext_commit_id"] = get_git_commit(repo_path) + dimensions["jax_version"] = get_package_version("jax") + dimensions["libtpu_version"] = get_package_version("libtpu") + dimensions["libtpu_nightly_version"] = get_package_version("libtpu-nightly") + + result = {"metrics": metrics, "dimensions": dimensions} + + output_dir = os.path.dirname(output_path) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + + with jsonlines.open(output_path, mode="w") as writer: + writer.write(result) + + return result + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Convert MLPerf log to jsonlines format") + parser.add_argument("--log-file", type=str, required=True, + help="Path to the MLPerf log file") + parser.add_argument("--output-file", type=str, required=True, + help="Path for the output jsonlines file") + parser.add_argument("--repo-path", type=str, default=None, + help="Path to the git repository (optional, will auto-detect if not provided)") + + args = parser.parse_args() + + log_file = os.path.abspath(args.log_file) + output_file = os.path.abspath(args.output_file) + repo_path = os.path.abspath(args.repo_path) if args.repo_path else None + + result = convert_mlperf_log_to_jsonlines( + log_file, + output_file, + repo_path + ) + print(f"Conversion complete. Output written to: {output_file}") +EOL""" + + +def maxtext_inference_offline_benchmark_config( + tpu_version: TpuVersion, + tpu_cores: int, + tpu_zone: str, + time_out_in_min: int, + test_name: str, + test_mode: common.SetupMode, + project_name: str = PROJECT_NAME, + runtime_version: str = RUNTIME_IMAGE, + network: str = "default", + subnetwork: str = "default", + is_tpu_reserved: bool = True, + num_slices: int = 1, + maxtext_branch: str = "", +): + job_gcp_config = gcp_config.GCPConfig( + project_name=project_name, + zone=tpu_zone, + dataset_name=metric_config.DatasetOption.BENCHMARK_DATASET, + ) + + git_clone_maxtext = "git clone https://github.com/google/maxtext.git" + if maxtext_branch: + git_clone_maxtext += f" -b {maxtext_branch}" + + set_up_cmds = ( + "pip install --upgrade pip", + "sudo apt-get -y update", + "sudo apt-get -y install python3.10-venv", + "sudo apt-get -y install jq", + "python -m venv .env", + "source .env/bin/activate", + # Setup Loadgen + "git clone https://github.com/mlcommons/inference.git", + "cd inference/loadgen && pip install . && cd ../..", + # Setup MaxText + git_clone_maxtext, + f"cd maxtext && bash setup.sh MODE={test_mode.value} && cd ..", + "pip install -r maxtext/MaxText/inference_mlperf/requirements.txt", + "cd maxtext/MaxText/inference_mlperf/trillium", + # Copy Dataset + "gsutil cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl /tmp/processed-data.pkl", + "cp ../user100.conf ./", + ) + + add_accuracy_to_metrics = r"""tac evaluate_offline_accuracy_log.log | grep -m1 '{.*}' | \ # read file in reverse, grep first json-like pattern + tr -d "'" | \ # Removes all single quotes from the output + tr -d '\000-\037' | \ # Removes all ASCII control characters (characters 0-31 in decimal) + sed 's/\([a-zA-Z0-9_]*\):/"\1":/g' | \ # Adds double quotes around JSON keys that aren't already quoted + sed 's/np\.[a-zA-Z0-9_]*(\([0-9.]*\))/\1/g' | \ # Converts numpy function calls with numbers (like np.float64(0.123)) to just the number + sed 's/{/{"metrics":{/; s/}/}}/' | \ # Wraps the JSON object in a "metrics" field + jq -sc '.[0].metrics += .[1].metrics | .[0]' acc_metric_report.jsonl - > acc_combined_output.jsonl""" # Combines metrics objects + + run_performance = ( + "source .env/bin/activate", + "export DATA_DISK_DIR=/tmp", + "export CHECKPOINT=gs://inference-benchmarks/models/llama2-70b-chat/quant/int8_", + "export TOKENIZER_PATH=/home/ml-auto-solutions/maxtext/assets/tokenizer.llama2", + "export LOGLEVEL=WARNING", # the logging at the INFO level was too much and hit some quotas + "cd maxtext/MaxText/inference_mlperf/trillium", + "bash benchmarks_llama2-70b-trillium_2x4.sh -x -s -t -b performance", + 'cp "$(ls -t /tmp/logs/*performance*/mlperf_log_detail.txt | head -n1)" ./perf_log.txt', + get_mlperf_converter_script(), + "python3 convert_logs.py --log-file perf_log.txt --output-file perf_metric_report.jsonl", + ) + + run_accuracy = ( + "export FAST_EVAL=true", + "bash benchmarks_llama2-70b-trillium_2x4.sh -x -s -t -b accuracy", + 'cp "$(ls -t /tmp/logs/*accuracy*/mlperf_log_detail.txt | head -n1)" ./acc_log.txt', + 'cp "$(ls -t /tmp/logs/*accuracy*/evaluate_offline_accuracy_log.log | head -n1)" ./evaluate_offline_accuracy_log.log', + "python3 convert_logs.py --log-file acc_log.txt --output-file acc_metric_report.jsonl", + add_accuracy_to_metrics, + 'jq -c "." perf_metric_report.jsonl > temp_perf.jsonl', + 'jq -c "." acc_combined_output.jsonl > temp_acc.jsonl', + "cat temp_perf.jsonl temp_acc.jsonl > combined_results.jsonl", + f"gsutil cp combined_results.jsonl {metric_config.SshEnvVars.GCS_OUTPUT.value}", + ) + + run_model_cmds = run_performance + run_accuracy + + job_test_config = test_config.TpuVmTest( + test_config.Tpu( + version=tpu_version, + cores=tpu_cores, + runtime_version=runtime_version, + reserved=is_tpu_reserved, + network=network, + subnetwork=subnetwork, + ), + test_name=test_name, + set_up_cmds=set_up_cmds, + run_model_cmds=run_model_cmds, + timeout=datetime.timedelta(minutes=time_out_in_min), + task_owner=test_owner.PATE_M, + num_slices=num_slices, + gcs_subfolder=f"{GCS_SUBFOLDER_PREFIX}/maxtext", + ) + + job_metric_config = metric_config.MetricConfig( + json_lines=metric_config.JSONLinesConfig("combined_results.jsonl"), + use_runtime_generated_gcs_folder=True, + ) + + return task.run_queued_resource_test( + task_test_config=job_test_config, + task_gcp_config=job_gcp_config, + task_metric_config=job_metric_config, + ) + + +USER_PREFIX = "" +gcs_subfolder_prefix = test_owner.Team.INFERENCE.value + +tags = ["inference_team", "maxtext", "offline", "benchmark"] + +if USER_PREFIX: + dag_id = f"{USER_PREFIX}_maxtext_inference_offline_benchmark" + tags.append(USER_PREFIX) +else: + dag_id = "maxtext_inference_offline_benchmark" + +with models.DAG( + dag_id=dag_id, + tags=tags, + start_date=datetime.datetime(2024, 1, 19), + schedule=SCHEDULED_TIME, + catchup=False, +) as dag: + test_name_prefix = dag_id + maxtext_offline_benchmark = maxtext_inference_offline_benchmark_config( + tpu_version=TpuVersion.TRILLIUM, + tpu_cores=8, + tpu_zone=Zone.EUROPE_WEST4_A.value, + time_out_in_min=300, + test_name="maxtext_inference_offline_benchmark", + test_mode=SetupMode.STABLE, + project_name=Project.CLOUD_ML_AUTO_SOLUTIONS.value, + runtime_version=RuntimeVersion.V2_ALPHA_TPUV6.value, + network=V6E_GCE_NETWORK, + subnetwork=V6E_GCE_SUBNETWORK, + is_tpu_reserved=True, + maxtext_branch="", + ) diff --git a/dags/test_owner.py b/dags/test_owner.py index e96a8c5e..1ac995be 100644 --- a/dags/test_owner.py +++ b/dags/test_owner.py @@ -62,6 +62,7 @@ class Team(enum.Enum): XIANG_S = "Xiang S." MORGAN_D = "Morgan D." YIJIA_J = "Yijia J." +PATE_M = "Pate M." # 3P Ecosystems RICHARD_L = "Richard L."