Skip to content

Commit

Permalink
Merge pull request #48 from ku-nlp/fix/tie-handling
Browse files Browse the repository at this point in the history
Fix tie handling
  • Loading branch information
hkiyomaru authored Dec 27, 2023
2 parents 909bd7c + 9049ba1 commit 3c60e86
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 51 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

39 changes: 21 additions & 18 deletions llm_judge/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,24 +167,27 @@ def play(answer_a, answer_b):
}
if self.ref_answer is not None:
kwargs["ref_answer_1"] = self.ref_answer["choices"][0]["turns"][0]
judgment = self.judge.judge(**kwargs)

if "[[A]]" in judgment:
winner = "A"
elif "[[B]]" in judgment:
winner = "B"
elif "[[C]]" in judgment:
winner = "tie"
else:
winner = "error"

return winner, judgment

g1_winner, g1_judgment = play(self.answer_1, self.answer_2)
g1_winner = "model_1" if g1_winner == "A" else "model_2"

g2_winner, g2_judgment = play(self.answer_2, self.answer_1)
g2_winner = "model_2" if g2_winner == "A" else "model_1"
return self.judge.judge(**kwargs)

g1_judgment = play(self.answer_1, self.answer_2)
if "[[A]]" in g1_judgment:
g1_winner = "model_1"
elif "[[B]]" in g1_judgment:
g1_winner = "model_2"
elif "[[C]]" in g1_judgment:
g1_winner = "tie"
else:
g1_winner = "error"

g2_judgment = play(self.answer_2, self.answer_1)
if "[[A]]" in g2_judgment:
g2_winner = "model_2"
elif "[[B]]" in g2_judgment:
g2_winner = "model_1"
elif "[[C]]" in g2_judgment:
g2_winner = "tie"
else:
g2_winner = "error"

result = {
"model_1": self.model_1,
Expand Down
74 changes: 74 additions & 0 deletions llm_judge/reparse_pairwise_judgement.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import argparse
import json
import logging

from common import JUDGEMENT_DIR, load_judgements

logger = logging.getLogger(__name__)


def reparse_result_pairwise(result: dict) -> dict:
"""Reparse the result to determine the winner.
Args:
result: A result.
"""
reparsed_result = result.copy()

g1_judgment = result["g1_judgment"]
if "[[A]]" in g1_judgment:
g1_winner = "model_1"
elif "[[B]]" in g1_judgment:
g1_winner = "model_2"
elif "[[C]]" in g1_judgment:
g1_winner = "tie"
else:
g1_winner = "error"
reparsed_result["g1_winner"] = g1_winner

g2_judgment = result["g2_judgment"]
if "[[A]]" in g2_judgment:
g2_winner = "model_2"
elif "[[B]]" in g2_judgment:
g2_winner = "model_1"
elif "[[C]]" in g2_judgment:
g2_winner = "tie"
else:
g2_winner = "error"
reparsed_result["g2_winner"] = g2_winner

return reparsed_result


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--verbose", "-v", action="count", default=0, help="Verbosity level"
)
args = parser.parse_args()

if args.verbose == 0:
level = logging.INFO
else:
level = logging.DEBUG
logging.basicConfig(
format="| %(asctime)s | %(levelname)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=level,
)

logger.info("Load judgements")
for judgement_dir in (JUDGEMENT_DIR / "pairwise").iterdir():
result_id_results_map = load_judgements(judgement_dir)
for result_id, results in result_id_results_map.items():
reparsed_results = [reparse_result_pairwise(result) for result in results]
if any(
result != reparsed_result
for result, reparsed_result in zip(results, reparsed_results)
):
output_file = judgement_dir / f"{result_id}.jsonl"
with open(output_file, "w") as f:
for result in reparsed_results:
f.write(json.dumps(result, ensure_ascii=False) + "\n")
logger.info(f"Fixed {output_file}")
logger.info("Done")

0 comments on commit 3c60e86

Please sign in to comment.