-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(experiments): scripts to run various experiments one after another
- Loading branch information
1 parent
7f29519
commit 7307939
Showing
7 changed files
with
235 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
echo "Running in $(pwd)" | ||
|
||
echo "Beginning Llama 3.1 8B on CatQA" | ||
python experiments/prompt_benchmarks.py -m llama3.1-8b -d catqa --verbose > ./experiments/runs/llama3.1-8b-catqa.log 2> ./experiments/runs/llama3.1-8b-catqa.log | ||
echo "Beginning Llama 3 8B on CatQA" | ||
python experiments/prompt_benchmarks.py -m llama3-8b -d catqa --verbose > ./experiments/runs/llama3-8b-catqa.log 2> ./experiments/runs/llama3-8b-catqa.log | ||
echo "Beginning Llama 2 7B on CatQA" | ||
python experiments/prompt_benchmarks.py -m llama2-7b -d catqa --verbose > ./experiments/runs/llama2-7b-catqa.log 2> ./experiments/runs/llama2-7b-catqa.log | ||
echo "Experiments on Llama Completed" | ||
|
||
# echo "Beginning Qwen 2 0.5B on CatQA" | ||
# python experiments/prompt_benchmarks.py -m qwen2-0.5b -d catqa --verbose > ./experiments/runs/qwen2-0.5b-catqa.log 2> ./experiments/runs/qwen2-0.5b-catqa.log | ||
# echo "Beginning Qwen 2 1.5B on CatQA" | ||
# python experiments/prompt_benchmarks.py -m qwen2-1.5b -d catqa --verbose > ./experiments/runs/qwen2-1.5b-catqa.log 2> ./experiments/runs/qwen2-1.5b-catqa.log | ||
# echo "Beginning Qwen 2 7B on CatQA" | ||
# python experiments/prompt_benchmarks.py -m qwen2-7b -d catqa --verbose > ./experiments/runs/qwen2-7b-catqa.log 2> ./experiments/runs/qwen2-7b-catqa.log | ||
# echo "Experiments on Qwen 2 Completed" | ||
|
||
# echo "Beginning Phi 3 Mini on CatQA" | ||
# python experiments/prompt_benchmarks.py -m phi3-mini -d catqa --verbose > ./experiments/runs/phi3-mini-catqa.log 2> ./experiments/runs/phi3-mini-catqa.log | ||
# echo "Experiments on Phi 3 Mini Completed" | ||
|
||
# echo "Beginning Mistral Nemo 12B on CatQA" | ||
# python experiments/prompt_benchmarks.py -m mistral-nemo-12b -d catqa --verbose > ./experiments/runs/mistral-nemo-12b-catqa.log 2> ./experiments/runs/mistral-nemo-12b-catqa.log | ||
# echo "Beginning Mixtral 8x7B on CatQA" | ||
# python experiments/prompt_benchmarks.py -m mistral-8x7b -d catqa --verbose > ./experiments/runs/mistral-8x7b-catqa.log 2> ./experiments/runs/mixtral-8x7b-catqa.log | ||
# echo "Beginning Mistral 7B on CatQA" | ||
# python experiments/prompt_benchmarks.py -m mistral-7b -d catqa --verbose > ./experiments/runs/mistral-7b-catqa.log 2> ./experiments/runs/mistral-7b-catqa.log | ||
# echo "Experiments on Mistral Completed" | ||
|
||
# echo "Beginning Gemma 2 9B on CatQA" | ||
# python experiments/prompt_benchmarks.py -m gemma2-9b -d catqa --verbose > ./experiments/runs/gemma2-9b-catqa.log 2> ./experiments/runs/gemma2-9b-catqa.log | ||
# echo "Beginning Gemma 1.1 7B on CatQA" | ||
# python experiments/prompt_benchmarks.py -m gemma-1.1-7b -d catqa --verbose > ./experiments/runs/gemma-1.1-7b-catqa.log 2> ./experiments/runs/gemma-1.1-7b-catqa.log | ||
# echo "Beginning Gemma 7B on CatQA" | ||
# python experiments/prompt_benchmarks.py -m gemma-7b -d catqa --verbose > ./experiments/runs/gemma-7b-catqa.log 2> ./experiments/runs/gemma-7b-catqa.log | ||
# echo "Experiments on Gemma Completed" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
echo "Running in $(pwd)" | ||
|
||
# echo "Beginning Tests on xstest" | ||
# python experiments/judge_benchmarks.py -j llamaguard3 -d xstest -v > ./experiments/runs/llamaguard3-xstest.log 2> ./experiments/runs/llamaguard3-xstest.log | ||
# python experiments/judge_benchmarks.py -j llamaguard2 -d xstest -v > ./experiments/runs/llamaguard2-xstest.log 2> ./experiments/runs/llamaguard2-xstest.log | ||
# python experiments/judge_benchmarks.py -j llamaguard -d xstest -v > ./experiments/runs/llamaguard-xstest.log 2> ./experiments/runs/llamaguard-xstest.log | ||
# python experiments/judge_benchmarks.py -j walledguard -d xstest -v > ./experiments/runs/walledguard-xstest.log 2> ./experiments/runs/walledguard-xstest.log | ||
# python experiments/judge_benchmarks.py -j lionguard -d xstest -v > ./experiments/runs/lionguard-xstest.log 2> ./experiments/runs/lionguard-xstest.log | ||
# python experiments/judge_benchmarks.py -j promptguard -d xstest -v > ./experiments/runs/promptguard-xstest.log 2> ./experiments/runs/promptguard-xstest.log | ||
# python experiments/judge_benchmarks.py -j toxicitymodel -d xstest -v > ./experiments/runs/toxicitymodel-xstest.log 2> ./experiments/runs/toxicitymodel-xstest.log | ||
# python experiments/judge_benchmarks.py -j toxic-bert -d xstest -v > ./experiments/runs/toxic-bert-xstest.log 2> ./experiments/runs/toxic-bert-xstest.log | ||
# python experiments/judge_benchmarks.py -j multilingual-toxic-xlm-roberta -d xstest -v > ./experiments/runs/multilingual-toxic-xlm-roberta-xstest.log 2> ./experiments/runs/multilingual-toxic-xlm-roberta-xstest.log | ||
# python experiments/judge_benchmarks.py -j unbiased-toxic-roberta -d xstest -v > ./experiments/runs/unbiased-toxic-roberta-xstest.log 2> ./experiments/runs/unbiased-toxic-roberta-xstest.log | ||
|
||
# echo "Beginning Tests on aya-ar" | ||
# python experiments/judge_benchmarks.py -j llamaguard3 -d aya-ar -v > ./experiments/runs/llamaguard3-aya-ar.log 2> ./experiments/runs/llamaguard3-aya-ar.log | ||
# python experiments/judge_benchmarks.py -j llamaguard2 -d aya-ar -v > ./experiments/runs/llamaguard2-aya-ar.log 2> ./experiments/runs/llamaguard2-aya-ar.log | ||
# python experiments/judge_benchmarks.py -j llamaguard -d aya-ar -v > ./experiments/runs/llamaguard-aya-ar.log 2> ./experiments/runs/llamaguard-aya-ar.log | ||
# python experiments/judge_benchmarks.py -j walledguard -d aya-ar -v > ./experiments/runs/walledguard-aya-ar.log 2> ./experiments/runs/walledguard-aya-ar.log | ||
# python experiments/judge_benchmarks.py -j lionguard -d aya-ar -v > ./experiments/runs/lionguard-aya-ar.log 2> ./experiments/runs/lionguard-aya-ar.log | ||
# python experiments/judge_benchmarks.py -j promptguard -d aya-ar -v > ./experiments/runs/promptguard-aya-ar.log 2> ./experiments/runs/promptguard-aya-ar.log | ||
# python experiments/judge_benchmarks.py -j toxicitymodel -d aya-ar -v > ./experiments/runs/toxicitymodel-aya-ar.log 2> ./experiments/runs/toxicitymodel-aya-ar.log | ||
# python experiments/judge_benchmarks.py -j toxic-bert -d aya-ar -v > ./experiments/runs/toxic-bert-aya-ar.log 2> ./experiments/runs/toxic-bert-aya-ar.log | ||
# python experiments/judge_benchmarks.py -j multilingual-toxic-xlm-roberta -d aya-ar -v > ./experiments/runs/multilingual-toxic-xlm-roberta-aya-ar.log 2> ./experiments/runs/multilingual-toxic-xlm-roberta-aya-ar.log | ||
# python experiments/judge_benchmarks.py -j unbiased-toxic-roberta -d aya-ar -v > ./experiments/runs/unbiased-toxic-roberta-aya-ar.log 2> ./experiments/runs/unbiased-toxic-roberta-aya-ar.log | ||
|
||
# echo "Beginning Tests on aya-en" | ||
# python experiments/judge_benchmarks.py -j llamaguard3 -d aya-en -v > ./experiments/runs/llamaguard3-aya-en.log 2> ./experiments/runs/llamaguard3-aya-en.log | ||
# python experiments/judge_benchmarks.py -j llamaguard2 -d aya-en -v > ./experiments/runs/llamaguard2-aya-en.log 2> ./experiments/runs/llamaguard2-aya-en.log | ||
# python experiments/judge_benchmarks.py -j llamaguard -d aya-en -v > ./experiments/runs/llamaguard-aya-en.log 2> ./experiments/runs/llamaguard-aya-en.log | ||
# python experiments/judge_benchmarks.py -j walledguard -d aya-en -v > ./experiments/runs/walledguard-aya-en.log 2> ./experiments/runs/walledguard-aya-en.log | ||
# python experiments/judge_benchmarks.py -j lionguard -d aya-en -v > ./experiments/runs/lionguard-aya-en.log 2> ./experiments/runs/lionguard-aya-en.log | ||
# python experiments/judge_benchmarks.py -j promptguard -d aya-en -v > ./experiments/runs/promptguard-aya-en.log 2> ./experiments/runs/promptguard-aya-en.log | ||
# python experiments/judge_benchmarks.py -j toxicitymodel -d aya-en -v > ./experiments/runs/toxicitymodel-aya-en.log 2> ./experiments/runs/toxicitymodel-aya-en.log | ||
# python experiments/judge_benchmarks.py -j toxic-bert -d aya-en -v > ./experiments/runs/toxic-bert-aya-en.log 2> ./experiments/runs/toxic-bert-aya-en.log | ||
# python experiments/judge_benchmarks.py -j multilingual-toxic-xlm-roberta -d aya-en -v > ./experiments/runs/multilingual-toxic-xlm-roberta-aya-en.log 2> ./experiments/runs/multilingual-toxic-xlm-roberta-aya-en.log | ||
# python experiments/judge_benchmarks.py -j unbiased-toxic-roberta -d aya-en -v > ./experiments/runs/unbiased-toxic-roberta-aya-en.log 2> ./experiments/runs/unbiased-toxic-roberta-aya-en.log | ||
|
||
# echo "Beginning Tests on aya-fi" | ||
# python experiments/judge_benchmarks.py -j llamaguard3 -d aya-fi -v > ./experiments/runs/llamaguard3-aya-fi.log 2> ./experiments/runs/llamaguard3-aya-fi.log | ||
# python experiments/judge_benchmarks.py -j llamaguard2 -d aya-fi -v > ./experiments/runs/llamaguard2-aya-fi.log 2> ./experiments/runs/llamaguard2-aya-fi.log | ||
# python experiments/judge_benchmarks.py -j llamaguard -d aya-fi -v > ./experiments/runs/llamaguard-aya-fi.log 2> ./experiments/runs/llamaguard-aya-fi.log | ||
# python experiments/judge_benchmarks.py -j walledguard -d aya-fi -v > ./experiments/runs/walledguard-aya-fi.log 2> ./experiments/runs/walledguard-aya-fi.log | ||
# python experiments/judge_benchmarks.py -j lionguard -d aya-fi -v > ./experiments/runs/lionguard-aya-fi.log 2> ./experiments/runs/lionguard-aya-fi.log | ||
# python experiments/judge_benchmarks.py -j promptguard -d aya-fi -v > ./experiments/runs/promptguard-aya-fi.log 2> ./experiments/runs/promptguard-aya-fi.log | ||
# python experiments/judge_benchmarks.py -j toxicitymodel -d aya-fi -v > ./experiments/runs/toxicitymodel-aya-fi.log 2> ./experiments/runs/toxicitymodel-aya-fi.log | ||
# python experiments/judge_benchmarks.py -j toxic-bert -d aya-fi -v > ./experiments/runs/toxic-bert-aya-fi.log 2> ./experiments/runs/toxic-bert-aya-fi.log | ||
# python experiments/judge_benchmarks.py -j multilingual-toxic-xlm-roberta -d aya-fi -v > ./experiments/runs/multilingual-toxic-xlm-roberta-aya-fi.log 2> ./experiments/runs/multilingual-toxic-xlm-roberta-aya-fi.log | ||
# python experiments/judge_benchmarks.py -j unbiased-toxic-roberta -d aya-fi -v > ./experiments/runs/unbiased-toxic-roberta-aya-fi.log 2> ./experiments/runs/unbiased-toxic-roberta-aya-fi.log | ||
|
||
echo "Beginning Tests on aya-fr" | ||
# python experiments/judge_benchmarks.py -j llamaguard3 -d aya-fr -v > ./experiments/runs/llamaguard3-aya-fr.log 2> ./experiments/runs/llamaguard3-aya-fr.log | ||
# python experiments/judge_benchmarks.py -j llamaguard2 -d aya-fr -v > ./experiments/runs/llamaguard2-aya-fr.log 2> ./experiments/runs/llamaguard2-aya-fr.log | ||
# python experiments/judge_benchmarks.py -j llamaguard -d aya-fr -v > ./experiments/runs/llamaguard-aya-fr.log 2> ./experiments/runs/llamaguard-aya-fr.log | ||
# python experiments/judge_benchmarks.py -j walledguard -d aya-fr -v > ./experiments/runs/walledguard-aya-fr.log 2> ./experiments/runs/walledguard-aya-fr.log | ||
python experiments/judge_benchmarks.py -j lionguard -d aya-fr -v > ./experiments/runs/lionguard-aya-fr.log 2> ./experiments/runs/lionguard-aya-fr.log | ||
python experiments/judge_benchmarks.py -j promptguard -d aya-fr -v > ./experiments/runs/promptguard-aya-fr.log 2> ./experiments/runs/promptguard-aya-fr.log | ||
# python experiments/judge_benchmarks.py -j toxicitymodel -d aya-fr -v > ./experiments/runs/toxicitymodel-aya-fr.log 2> ./experiments/runs/toxicitymodel-aya-fr.log | ||
# python experiments/judge_benchmarks.py -j toxic-bert -d aya-fr -v > ./experiments/runs/toxic-bert-aya-fr.log 2> ./experiments/runs/toxic-bert-aya-fr.log | ||
# python experiments/judge_benchmarks.py -j multilingual-toxic-xlm-roberta -d aya-fr -v > ./experiments/runs/multilingual-toxic-xlm-roberta-aya-fr.log 2> ./experiments/runs/multilingual-toxic-xlm-roberta-aya-fr.log | ||
# python experiments/judge_benchmarks.py -j unbiased-toxic-roberta -d aya-fr -v > ./experiments/runs/unbiased-toxic-roberta-aya-fr.log 2> ./experiments/runs/unbiased-toxic-roberta-aya-fr.log | ||
|
||
echo "Beginning Tests on aya-hi" | ||
python experiments/judge_benchmarks.py -j llamaguard3 -d aya-hi -v > ./experiments/runs/llamaguard3-aya-hi.log 2> ./experiments/runs/llamaguard3-aya-hi.log | ||
python experiments/judge_benchmarks.py -j llamaguard2 -d aya-hi -v > ./experiments/runs/llamaguard2-aya-hi.log 2> ./experiments/runs/llamaguard2-aya-hi.log | ||
python experiments/judge_benchmarks.py -j llamaguard -d aya-hi -v > ./experiments/runs/llamaguard-aya-hi.log 2> ./experiments/runs/llamaguard-aya-hi.log | ||
python experiments/judge_benchmarks.py -j walledguard -d aya-hi -v > ./experiments/runs/walledguard-aya-hi.log 2> ./experiments/runs/walledguard-aya-hi.log | ||
python experiments/judge_benchmarks.py -j lionguard -d aya-hi -v > ./experiments/runs/lionguard-aya-hi.log 2> ./experiments/runs/lionguard-aya-hi.log | ||
python experiments/judge_benchmarks.py -j promptguard -d aya-hi -v > ./experiments/runs/promptguard-aya-hi.log 2> ./experiments/runs/promptguard-aya-hi.log | ||
# python experiments/judge_benchmarks.py -j toxicitymodel -d aya-hi -v > ./experiments/runs/toxicitymodel-aya-hi.log 2> ./experiments/runs/toxicitymodel-aya-hi.log | ||
# python experiments/judge_benchmarks.py -j toxic-bert -d aya-hi -v > ./experiments/runs/toxic-bert-aya-hi.log 2> ./experiments/runs/toxic-bert-aya-hi.log | ||
# python experiments/judge_benchmarks.py -j multilingual-toxic-xlm-roberta -d aya-hi -v > ./experiments/runs/multilingual-toxic-xlm-roberta-aya-hi.log 2> ./experiments/runs/multilingual-toxic-xlm-roberta-aya-hi.log | ||
# python experiments/judge_benchmarks.py -j unbiased-toxic-roberta -d aya-hi -v > ./experiments/runs/unbiased-toxic-roberta-aya-hi.log 2> ./experiments/runs/unbiased-toxic-roberta-aya-hi.log | ||
|
||
echo "Beginning Tests on aya-ru" | ||
python experiments/judge_benchmarks.py -j llamaguard3 -d aya-ru -v > ./experiments/runs/llamaguard3-aya-ru.log 2> ./experiments/runs/llamaguard3-aya-ru.log | ||
python experiments/judge_benchmarks.py -j llamaguard2 -d aya-ru -v > ./experiments/runs/llamaguard2-aya-ru.log 2> ./experiments/runs/llamaguard2-aya-ru.log | ||
python experiments/judge_benchmarks.py -j llamaguard -d aya-ru -v > ./experiments/runs/llamaguard-aya-ru.log 2> ./experiments/runs/llamaguard-aya-ru.log | ||
python experiments/judge_benchmarks.py -j walledguard -d aya-ru -v > ./experiments/runs/walledguard-aya-ru.log 2> ./experiments/runs/walledguard-aya-ru.log | ||
python experiments/judge_benchmarks.py -j lionguard -d aya-ru -v > ./experiments/runs/lionguard-aya-ru.log 2> ./experiments/runs/lionguard-aya-ru.log | ||
python experiments/judge_benchmarks.py -j promptguard -d aya-ru -v > ./experiments/runs/promptguard-aya-ru.log 2> ./experiments/runs/promptguard-aya-ru.log | ||
# python experiments/judge_benchmarks.py -j toxicitymodel -d aya-ru -v > ./experiments/runs/toxicitymodel-aya-ru.log 2> ./experiments/runs/toxicitymodel-aya-ru.log | ||
# python experiments/judge_benchmarks.py -j toxic-bert -d aya-ru -v > ./experiments/runs/toxic-bert-aya-ru.log 2> ./experiments/runs/toxic-bert-aya-ru.log | ||
# python experiments/judge_benchmarks.py -j multilingual-toxic-xlm-roberta -d aya-ru -v > ./experiments/runs/multilingual-toxic-xlm-roberta-aya-ru.log 2> ./experiments/runs/multilingual-toxic-xlm-roberta-aya-ru.log | ||
# python experiments/judge_benchmarks.py -j unbiased-toxic-roberta -d aya-ru -v > ./experiments/runs/unbiased-toxic-roberta-aya-ru.log 2> ./experiments/runs/unbiased-toxic-roberta-aya-ru.log | ||
|
||
echo "Beginning Tests on aya-se" | ||
python experiments/judge_benchmarks.py -j llamaguard3 -d aya-se -v > ./experiments/runs/llamaguard3-aya-se.log 2> ./experiments/runs/llamaguard3-aya-se.log | ||
python experiments/judge_benchmarks.py -j llamaguard2 -d aya-se -v > ./experiments/runs/llamaguard2-aya-se.log 2> ./experiments/runs/llamaguard2-aya-se.log | ||
python experiments/judge_benchmarks.py -j llamaguard -d aya-se -v > ./experiments/runs/llamaguard-aya-se.log 2> ./experiments/runs/llamaguard-aya-se.log | ||
python experiments/judge_benchmarks.py -j walledguard -d aya-se -v > ./experiments/runs/walledguard-aya-se.log 2> ./experiments/runs/walledguard-aya-se.log | ||
python experiments/judge_benchmarks.py -j lionguard -d aya-se -v > ./experiments/runs/lionguard-aya-se.log 2> ./experiments/runs/lionguard-aya-se.log | ||
python experiments/judge_benchmarks.py -j promptguard -d aya-se -v > ./experiments/runs/promptguard-aya-se.log 2> ./experiments/runs/promptguard-aya-se.log | ||
# python experiments/judge_benchmarks.py -j toxicitymodel -d aya-se -v > ./experiments/runs/toxicitymodel-aya-se.log 2> ./experiments/runs/toxicitymodel-aya-se.log | ||
# python experiments/judge_benchmarks.py -j toxic-bert -d aya-se -v > ./experiments/runs/toxic-bert-aya-se.log 2> ./experiments/runs/toxic-bert-aya-se.log | ||
# python experiments/judge_benchmarks.py -j multilingual-toxic-xlm-roberta -d aya-se -v > ./experiments/runs/multilingual-toxic-xlm-roberta-aya-se.log 2> ./experiments/runs/multilingual-toxic-xlm-roberta-aya-se.log | ||
# python experiments/judge_benchmarks.py -j unbiased-toxic-roberta -d aya-se -v > ./experiments/runs/unbiased-toxic-roberta-aya-se.log 2> ./experiments/runs/unbiased-toxic-roberta-aya-se.log | ||
|
||
echo "Beginning Tests on aya-sp" | ||
python experiments/judge_benchmarks.py -j llamaguard3 -d aya-sp -v > ./experiments/runs/llamaguard3-aya-sp.log 2> ./experiments/runs/llamaguard3-aya-sp.log | ||
python experiments/judge_benchmarks.py -j llamaguard2 -d aya-sp -v > ./experiments/runs/llamaguard2-aya-sp.log 2> ./experiments/runs/llamaguard2-aya-sp.log | ||
python experiments/judge_benchmarks.py -j llamaguard -d aya-sp -v > ./experiments/runs/llamaguard-aya-sp.log 2> ./experiments/runs/llamaguard-aya-sp.log | ||
python experiments/judge_benchmarks.py -j walledguard -d aya-sp -v > ./experiments/runs/walledguard-aya-sp.log 2> ./experiments/runs/walledguard-aya-sp.log | ||
python experiments/judge_benchmarks.py -j lionguard -d aya-sp -v > ./experiments/runs/lionguard-aya-sp.log 2> ./experiments/runs/lionguard-aya-sp.log | ||
python experiments/judge_benchmarks.py -j promptguard -d aya-sp -v > ./experiments/runs/promptguard-aya-sp.log 2> ./experiments/runs/promptguard-aya-sp.log | ||
# python experiments/judge_benchmarks.py -j toxicitymodel -d aya-sp -v > ./experiments/runs/toxicitymodel-aya-sp.log 2> ./experiments/runs/toxicitymodel-aya-sp.log | ||
# python experiments/judge_benchmarks.py -j toxic-bert -d aya-sp -v > ./experiments/runs/toxic-bert-aya-sp.log 2> ./experiments/runs/toxic-bert-aya-sp.log | ||
# python experiments/judge_benchmarks.py -j multilingual-toxic-xlm-roberta -d aya-sp -v > ./experiments/runs/multilingual-toxic-xlm-roberta-aya-sp.log 2> ./experiments/runs/multilingual-toxic-xlm-roberta-aya-sp.log | ||
# python experiments/judge_benchmarks.py -j unbiased-toxic-roberta -d aya-sp -v > ./experiments/runs/unbiased-toxic-roberta-aya-sp.log 2> ./experiments/runs/unbiased-toxic-roberta-aya-sp.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
echo "Running in $(pwd)" | ||
|
||
echo "Beginning Llama 2 7B on HarmBench" | ||
python experiments/prompt_benchmarks.py -m llama2-7b -d harmbench --verbose > ./experiments/runs/llama2-7b-harmbench.log 2> ./experiments/runs/llama2-7b-harmbench.log | ||
echo "Beginning Llama 2 7B on AdvBench" | ||
python experiments/prompt_benchmarks.py -m llama2-7b -d advbench --verbose > ./experiments/runs/llama2-7b-advbench.log 2> ./experiments/runs/llama2-7b-advbench.log | ||
echo "Experiments on Llama 2 7B Completed" |
Oops, something went wrong.