Skip to content

Commit

Permalink
Merge pull request #54 from ku-nlp/support-azure
Browse files Browse the repository at this point in the history
Support Azure API
  • Loading branch information
hkiyomaru authored Feb 13, 2024
2 parents 724d208 + 46f3a98 commit b722b64
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 13 deletions.
33 changes: 22 additions & 11 deletions llm_judge/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.organization = os.getenv("OPENAI_ORGANIZATION")
openai.api_type = os.getenv("OPENAI_API_TYPE")
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = os.getenv("OPENAI_API_VERSION")

# Data paths
JP_BENCH_DIR = Path(__file__).resolve().parent.parent / "data" / "jp_bench"
Expand Down Expand Up @@ -56,12 +59,16 @@ def judge(self, **kwargs):
]
for _ in range(API_MAX_RETRY):
try:
response = openai.ChatCompletion.create(
model=self.model,
messages=messages,
temperature=0,
max_tokens=2048,
)
params = {
"messages": messages,
"temperature": 0,
"max_tokens": 2048,
}
if openai.api_type == "azure":
params["engine"] = self.model
else:
params["model"] = self.model
response = openai.ChatCompletion.create(**params)
return response["choices"][0]["message"]["content"]
except openai.error.OpenAIError as e:
logger.warning(f"OpenAI API error: {e}")
Expand Down Expand Up @@ -121,10 +128,12 @@ def estimate_cost(self) -> float:
enc.encode(self.ref_answer["choices"][0]["turns"][0])
)
num_output_tokens = 200 # Estimated from a few samples
if self.judge.model == "gpt-4":
if self.judge.model in {"gpt-4", "gpt-4-0613"}:
return (0.03 * num_input_tokens + 0.06 * num_output_tokens) / 1_000
elif self.judge.model == "gpt-4-1106-preview":
return (0.01 * num_input_tokens + 0.03 * num_output_tokens) / 1_000
elif self.judge.model == "gpt-3.5-turbo":
return (0.001 * num_input_tokens + 0.002 * num_output_tokens) / 1_000
return (0.0005 * num_input_tokens + 0.0015 * num_output_tokens) / 1_000
raise AssertionError

@staticmethod
Expand Down Expand Up @@ -209,10 +218,12 @@ def estimate_cost(self) -> float:
enc.encode(self.ref_answer["choices"][0]["turns"][0])
)
num_output_tokens = 200 # Estimated from a few samples
if self.judge.model == "gpt-4":
return 2 * (0.03 * num_input_tokens + 0.06 * num_output_tokens) / 1_000
if self.judge.model in {"gpt-4", "gpt-4-0613"}:
return (0.03 * num_input_tokens + 0.06 * num_output_tokens) / 1_000
elif self.judge.model == "gpt-4-1106-preview":
return (0.01 * num_input_tokens + 0.03 * num_output_tokens) / 1_000
elif self.judge.model == "gpt-3.5-turbo":
return 2 * (0.001 * num_input_tokens + 0.002 * num_output_tokens) / 1_000
return (0.0005 * num_input_tokens + 0.0015 * num_output_tokens) / 1_000
raise AssertionError

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions llm_judge/gen_judgment.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def make_match_groups_pairwise(
"--judge-model",
type=str,
default="gpt-4",
choices=["gpt-4", "gpt-3.5-turbo"],
choices=["gpt-4", "gpt-4-0613", "gpt-4-1106-preview", "gpt-3.5-turbo"],
help="The judge model.",
)
parser.add_argument(
Expand Down Expand Up @@ -211,7 +211,7 @@ def make_match_groups_pairwise(

logger.info("Load reference answers")
judge_model = args.judge_model
answers = load_model_answers(REFERENCE_DIR / judge_model)
answers = load_model_answers(REFERENCE_DIR / "gpt-4")
for question in filter(lambda x: x["category"] in NEED_REF_CATS, questions):
assert question["question_id"] in answers
ref_answers = {judge_model: answers}
Expand Down

0 comments on commit b722b64

Please sign in to comment.