Skip to content

Commit

Permalink
Formatting fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
patemotter committed Nov 21, 2024
1 parent e971662 commit 057879c
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 84 deletions.

This file was deleted.

118 changes: 58 additions & 60 deletions dags/inference/maxtext_inference_offline_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
RUNTIME_IMAGE = RuntimeVersion.V2_ALPHA_TPUV6.value
GCS_SUBFOLDER_PREFIX = test_owner.Team.INFERENCE.value

create_converter_script = r'''cat > convert_logs.py << 'EOL'
create_mlperf_log_converter_script = r"""cat > convert_logs.py << 'EOL'
import json
import re
import jsonlines
Expand Down Expand Up @@ -74,7 +74,7 @@ def convert_mlperf_log_to_jsonlines(log_file_path: str, output_path: str):
if __name__ == "__main__":
convert_mlperf_log_to_jsonlines("./mlperf_log_detail.txt", "metric_report.jsonl")
EOL'''
EOL"""


def maxtext_inference_offline_benchmark_config(
Expand All @@ -93,73 +93,73 @@ def maxtext_inference_offline_benchmark_config(
maxtext_branch: str = "",
):
job_gcp_config = gcp_config.GCPConfig(
project_name=project_name,
zone=tpu_zone,
dataset_name=metric_config.DatasetOption.BENCHMARK_DATASET,
project_name=project_name,
zone=tpu_zone,
dataset_name=metric_config.DatasetOption.BENCHMARK_DATASET,
)
git_clone_maxtext = "git clone https://github.com/google/maxtext.git"
if maxtext_branch:
git_clone_maxtext += f" -b {maxtext_branch}"

set_up_cmds = (
"pip install --upgrade pip",
"sudo apt-get -y update",
"sudo apt-get -y install python3.10-venv",
"sudo apt-get -y install jq",
"python -m venv .env",
"source .env/bin/activate",
# Setup MaxText
git_clone_maxtext,
f"cd maxtext && bash setup.sh MODE={test_mode.value} && cd ..",
"pip install torch --index-url https://download.pytorch.org/whl/cpu",
# Setup Loadgen
"git clone https://github.com/mlcommons/inference.git",
"cd inference/loadgen && pip install . && cd ../.."
"pip install --upgrade pip",
"sudo apt-get -y update",
"sudo apt-get -y install python3.10-venv",
"sudo apt-get -y install jq",
"python -m venv .env",
"source .env/bin/activate",
# Setup MaxText
git_clone_maxtext,
f"cd maxtext && bash setup.sh MODE={test_mode.value} && cd ..",
"pip install torch --index-url https://download.pytorch.org/whl/cpu",
# Setup Loadgen
"git clone https://github.com/mlcommons/inference.git",
"cd inference/loadgen && pip install . && cd ../..",
)

run_model_cmds = (
"source .env/bin/activate",
"cd maxtext/MaxText/inference_mlperf/trillium",
"gsutil cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl /tmp/processed-data.pkl",
"export DATA_DISK_DIR=/tmp",
"export CHECKPOINT=gs://inference-benchmarks/models/llama2-70b-chat/quant/int8_",
"export TOKENIZER_PATH=/home/ml-auto-solutions/maxtext/assets/tokenizer.llama2",
"echo $TOKENIZER_PATH",
"bash benchmarks_llama2-70b-trillium_2x4.sh -x -t -s",
"cp /tmp/logs/*/mlperf_log_detail.txt ./",
create_converter_script,
"python3 convert_logs.py",
"cat metric_report.jsonl",
f"gsutil cp metric_report.jsonl {metric_config.SshEnvVars.GCS_OUTPUT.value}",
"source .env/bin/activate",
"cd maxtext/MaxText/inference_mlperf/trillium",
"gsutil cp gs://cloud-tpu-inference-public/mlcommons/inference/language/llama2-70b/data/processed-openorca/open_orca_gpt4_tokenized_llama.sampled_24576.pkl /tmp/processed-data.pkl",
"export DATA_DISK_DIR=/tmp",
"export CHECKPOINT=gs://inference-benchmarks/models/llama2-70b-chat/quant/int8_",
"export TOKENIZER_PATH=/home/ml-auto-solutions/maxtext/assets/tokenizer.llama2",
"echo $TOKENIZER_PATH",
"bash benchmarks_llama2-70b-trillium_2x4.sh -x -t -s",
"cp /tmp/logs/*/mlperf_log_detail.txt ./",
create_mlperf_log_converter_script,
"python3 convert_logs.py",
"cat metric_report.jsonl",
f"gsutil cp metric_report.jsonl {metric_config.SshEnvVars.GCS_OUTPUT.value}",
)

job_test_config = test_config.TpuVmTest(
test_config.Tpu(
version=tpu_version,
cores=tpu_cores,
runtime_version=runtime_version,
reserved=is_tpu_reserved,
network=network,
subnetwork=subnetwork,
),
test_name=test_name,
set_up_cmds=set_up_cmds,
run_model_cmds=run_model_cmds,
timeout=datetime.timedelta(minutes=time_out_in_min),
task_owner=test_owner.PATE_M,
num_slices=num_slices,
gcs_subfolder=f"{GCS_SUBFOLDER_PREFIX}/maxtext",
test_config.Tpu(
version=tpu_version,
cores=tpu_cores,
runtime_version=runtime_version,
reserved=is_tpu_reserved,
network=network,
subnetwork=subnetwork,
),
test_name=test_name,
set_up_cmds=set_up_cmds,
run_model_cmds=run_model_cmds,
timeout=datetime.timedelta(minutes=time_out_in_min),
task_owner=test_owner.PATE_M,
num_slices=num_slices,
gcs_subfolder=f"{GCS_SUBFOLDER_PREFIX}/maxtext",
)

job_metric_config = metric_config.MetricConfig(
json_lines=metric_config.JSONLinesConfig("metric_report.jsonl"),
use_runtime_generated_gcs_folder=True,
json_lines=metric_config.JSONLinesConfig("metric_report.jsonl"),
use_runtime_generated_gcs_folder=True,
)

return task.run_queued_resource_test(
task_test_config=job_test_config,
task_gcp_config=job_gcp_config,
task_metric_config=job_metric_config,
task_test_config=job_test_config,
task_gcp_config=job_gcp_config,
task_metric_config=job_metric_config,
)


Expand All @@ -176,15 +176,14 @@ def maxtext_inference_offline_benchmark_config(
dag_id = "maxtext_inference_offline_benchmark"

with models.DAG(
dag_id=dag_id,
tags=tags,
start_date=datetime.datetime(2024, 1, 19),
schedule=None,
catchup=False,
dag_id=dag_id,
tags=tags,
start_date=datetime.datetime(2024, 1, 19),
schedule=None,
catchup=False,
) as dag:
test_name_prefix = dag_id
maxtext_offline_benchmark = (
maxtext_inference_offline_benchmark_config(
maxtext_offline_benchmark = maxtext_inference_offline_benchmark_config(
tpu_version=TpuVersion.TRILLIUM,
tpu_cores=8,
tpu_zone=Zone.EUROPE_WEST4_A.value,
Expand All @@ -196,6 +195,5 @@ def maxtext_inference_offline_benchmark_config(
network=V6E_GCE_NETWORK,
subnetwork=V6E_GCE_SUBNETWORK,
is_tpu_reserved=True,
maxtext_branch="patemotter-offline-benchmark",
)
maxtext_branch="",
)

0 comments on commit 057879c

Please sign in to comment.