Skip to content

Commit

Permalink
tweak
Browse files Browse the repository at this point in the history
  • Loading branch information
hkiyomaru committed Dec 18, 2023
1 parent b392de1 commit 92fcc53
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 15 deletions.
25 changes: 14 additions & 11 deletions llm_judge/show_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
}

pd.set_option("display.max_colwidth", 1000)
pd.set_option("display.float_format", "{:.1f}".format)


def calculate_average_score(results: list[dict]):
Expand All @@ -36,21 +37,23 @@ def calculate_win_rate(results: list[dict]):
Args:
results: A list of results.
"""
num_win = 0
num_win_1 = 0
num_win_2 = 0
num_tie = 0
for result in results:
if result["g1_winner"] == "tie" or result["g1_winner"] != result["g2_winner"]:
num_tie += 1
elif result["g1_winner"] == "model_1":
num_win += 1
win_rate = num_win / len(results)
adjusted_win_rate = (num_win + 0.5 * num_tie) / len(results)
num_win_1 += 1
else:
num_win_2 += 1
win_rate_1 = num_win_1 / len(results)
adjusted_win_rate_1 = (num_win_1 + 0.5 * num_tie) / len(results)
win_rate_2 = num_win_2 / len(results)
adjusted_win_rate_2 = (num_win_2 + 0.5 * num_tie) / len(results)
return {
"model_1": {"win_rate": win_rate, "adjusted_win_rate": adjusted_win_rate},
"model_2": {
"win_rate": 1 - win_rate,
"adjusted_win_rate": 1 - adjusted_win_rate,
},
"model_1": {"win_rate": win_rate_1, "adjusted_win_rate": adjusted_win_rate_1},
"model_2": {"win_rate": win_rate_2, "adjusted_win_rate": adjusted_win_rate_2},
}


Expand Down Expand Up @@ -107,8 +110,8 @@ def display_result_pairwise(
{
"model_1": model_1,
"model_2": model_2,
"win_rate": win_rate,
"adjusted_win_rate": adjusted_win_rate,
"win_rate": win_rate * 100,
"adjusted_win_rate": adjusted_win_rate * 100,
}
)

Expand Down
8 changes: 4 additions & 4 deletions llm_judge/upload_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ def upload_results(
example = results[0]
if baseline_model == example["model_2"]:
model = example["model_1"]
win_rate = win_rate_map["model_1"]["win_rate"]
adjusted_win_rate = win_rate_map["model_1"]["adjusted_win_rate"]
win_rate = win_rate_map["model_1"]["win_rate"] * 100
adjusted_win_rate = win_rate_map["model_1"]["adjusted_win_rate"] * 100
else:
model = example["model_2"]
win_rate = win_rate_map["model_2"]["win_rate"]
adjusted_win_rate = win_rate_map["model_2"]["adjusted_win_rate"]
win_rate = win_rate_map["model_2"]["win_rate"] * 100
adjusted_win_rate = win_rate_map["model_2"]["adjusted_win_rate"] * 100

leaderboard_table = wandb.Table(
columns=[
Expand Down

0 comments on commit 92fcc53

Please sign in to comment.