From 9711bfbb62df4f73f18eea1dd7adc580dffaa10f Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Thu, 27 Jun 2024 02:40:10 +0000 Subject: [PATCH] fix bad merge --- .../nm-run-lm-eval-gsm-vllm-baseline.sh | 52 ------------------- 1 file changed, 52 deletions(-) diff --git a/.github/scripts/nm-run-lm-eval-gsm-vllm-baseline.sh b/.github/scripts/nm-run-lm-eval-gsm-vllm-baseline.sh index c2223635a90a3..d6b38752945ce 100644 --- a/.github/scripts/nm-run-lm-eval-gsm-vllm-baseline.sh +++ b/.github/scripts/nm-run-lm-eval-gsm-vllm-baseline.sh @@ -49,55 +49,3 @@ lm_eval --model vllm \ --model_args pretrained=$MODEL,tensor_parallel_size=$TP_SIZE \ --tasks gsm8k --num_fewshot $FEWSHOT --limit $LIMIT \ --batch_size $BATCH_SIZE - -#!/bin/bash -# We can use this script to compute baseline accuracy on GSM for vllm. -# We use this for fp8, which HF does not support. -# -# Make sure you have lm-eval-harness installed: -# pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git@9516087b81a61d0e220b22cc1b75be76de23bc10 - -usage() { - echo`` - echo "Runs lm eval harness on GSM8k using huggingface transformers." - echo "This pathway is intended to be used to create baselines for " - echo "our automated nm-test-accuracy workflow" - echo - echo "usage: ${0} " - echo - echo " -m - huggingface stub or local directory of the model" - echo " -b - batch size to run the evaluation at" - echo " -l - limit number of samples to run" - echo " -f - number of fewshot samples to use" - echo " -t - tensor parallel size to run at" - echo -} - -while getopts "m:b:l:f:t:" OPT; do - case ${OPT} in - m ) - MODEL="$OPTARG" - ;; - b ) - BATCH_SIZE="$OPTARG" - ;; - l ) - LIMIT="$OPTARG" - ;; - f ) - FEWSHOT="$OPTARG" - ;; - t ) - TP_SIZE="$OPTARG" - ;; - \? ) - usage - exit 1 - ;; - esac -done - -lm_eval --model vllm \ - --model_args pretrained=$MODEL,tensor_parallel_size=$TP_SIZE \ - --tasks gsm8k --num_fewshot $FEWSHOT --limit $LIMIT \ - --batch_size $BATCH_SIZE