Skip to content

Commit

Permalink
add litellm + bg setup
Browse files Browse the repository at this point in the history
  • Loading branch information
YanxinLu committed Jul 24, 2024
1 parent 1813032 commit 89fccc3
Show file tree
Hide file tree
Showing 291 changed files with 113 additions and 371 deletions.
130 changes: 65 additions & 65 deletions eval/data/problems_all.jsonl

Large diffs are not rendered by default.

35 changes: 25 additions & 10 deletions eval/scripts/gencode_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,27 @@

class Gencode:
def __init__(self, model: str, output_dir: Path,
prompt_dir: Path, temperature: float):
prompt_dir: Path, with_background: bool, temperature: float):
self.model = model
self.output_dir = output_dir
self.prompt_dir = prompt_dir
self.with_background = with_background
self.temperature = temperature
self.previous_llm_code = []

def save_prompt_with_steps(self, prob_data: dict, prompt: str, num_steps: int, tot_steps: int) -> None:
output_dir = Path(self.prompt_dir, self.model)
def _get_background_dir(self):
return "with_background" if self.with_background else "without_background"

def save_prompt_with_steps(self, prob_data: dict, prompt: str, num_steps: int) -> None:
output_dir = Path(self.prompt_dir, Path(self.model).parts[-1], self._get_background_dir())
output_dir.mkdir(parents=True, exist_ok=True)
output_file_path = output_dir / f"{prob_data['problem_id']}.{num_steps}.txt"
output_file_path.write_text(prompt, encoding="utf-8")

def save_response_with_steps(self, prob_data: dict, response: str, previous_code: str,
num_steps: int, model="gpt-4o",) -> None:
def save_response_with_steps(self, prob_data: dict, response: str,
previous_code: str, num_steps: int) -> None:
output_dir = (
self.output_dir / model
self.output_dir / Path(self.model).parts[-1] / self._get_background_dir()
)
output_dir.mkdir(parents=True, exist_ok=True)
prob_id = prob_data["problem_id"]
Expand Down Expand Up @@ -78,7 +82,7 @@ def generate_response_with_steps(
raise Exception(f'Generating {prob_id} step {num_steps} ahead of step {prev_step + 1}.')
prompt, previous_code = self.generate_prompt_with_steps(prob_data, num_steps, prompt_template)
if save:
self.save_prompt_with_steps(prob_data, prompt, num_steps, tot_steps)
self.save_prompt_with_steps(prob_data, prompt, num_steps)

model_kwargs = {}
if "claude" in model:
Expand All @@ -94,7 +98,7 @@ def generate_response_with_steps(
model_fct = get_model_function(model, **model_kwargs)
response_from_llm = model_fct(prompt)
self.previous_llm_code[num_steps - 1] = extract_python_script(response_from_llm)
self.save_response_with_steps(prob_data, response_from_llm, previous_code, num_steps, model)
self.save_response_with_steps(prob_data, response_from_llm, previous_code, num_steps)

@staticmethod
def process_problem_code(prob_data: dict, num_steps: int) -> str:
Expand All @@ -109,11 +113,16 @@ def process_problem_steps(self, problem_data: dict, num_steps: int):
next_step = []
previous_code = []
for i in range(num_steps - 1):
output_lines.append(problem_data["sub_steps"][i]["step_description_prompt"] + '\n' +
problem_data["sub_steps"][i]["step_background"] if self.with_background
else problem_data["sub_steps"][i]["step_description_prompt"])
output_lines.append(self.previous_llm_code[i])
previous_code.append(self.previous_llm_code[i])
output_lines.append("------")

next_step.append(problem_data["sub_steps"][num_steps - 1]["step_description_prompt"])
next_step.append(problem_data["sub_steps"][num_steps - 1]["step_description_prompt"] + '\n' +
problem_data["sub_steps"][num_steps - 1]["step_background"] if self.with_background
else problem_data["sub_steps"][num_steps - 1]["step_description_prompt"])
next_step.append(self.process_problem_code(problem_data, num_steps))
output_str = "\n\n".join(output_lines[:-1]) # Remove the last "------"
next_step_str = "\n\n".join(next_step)
Expand Down Expand Up @@ -160,6 +169,11 @@ def get_cli() -> argparse.ArgumentParser:
default=Path("eval_results", "prompt"),
help="Prompt directory",
)
parser.add_argument(
"--with-background",
action="store_true",
help="Include problem background if enabled",
)
parser.add_argument(
"--temperature",
type=float,
Expand All @@ -173,11 +187,12 @@ def main(model: str,
output_dir: Path,
input_path: Path,
prompt_dir: Path,
with_background: bool,
temperature: float
) -> None:
gcode = Gencode(
model=model, output_dir=output_dir,
prompt_dir=prompt_dir, temperature=temperature
prompt_dir=prompt_dir, with_background=with_background, temperature=temperature
)
data = read_from_jsonl(input_path)
for problem in data:
Expand Down
31 changes: 23 additions & 8 deletions eval/scripts/test_generated_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import argparse

from scicode.parse.parse import H5PY_FILE
from scicode.parse.parse import read_from_jsonl


Expand All @@ -15,7 +16,12 @@
DEV_STEP_NUM = 50


def test_code(model_name, code_dir, log_dir, output_dir, jsonl_path, dev_set=False):
def _get_background_dir(with_background):
return "with_background" if with_background else "without_background"


def test_code(model_name, code_dir, log_dir, output_dir,
jsonl_path, dev_set=False, with_background=False):

jsonl_data = read_from_jsonl(jsonl_path)
json_dct = {}
Expand All @@ -26,7 +32,7 @@ def test_code(model_name, code_dir, log_dir, output_dir, jsonl_path, dev_set=Fal
json_idx[prob_data['problem_id']] = jsonl_data.index(prob_data)
start_time = time.time()

code_dir_ = Path(code_dir, model_name)
code_dir_ = Path(code_dir, model_name, _get_background_dir(with_background))
tmp_dir = Path(f'tmp_{start_time}')

tmp_dir.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -82,7 +88,7 @@ def run_script(script_path):
prob_id = func_id.split('.')[0]
print(f'Testing function {func_id} ...')
tot_prob[int(prob_id) - 1] += 1
logs_dir_ = Path(log_dir, model_name)
logs_dir_ = Path(log_dir, model_name, _get_background_dir(with_background))
logs_dir_.mkdir(parents=True, exist_ok=True)
logs_file = Path(logs_dir_, f'{file_path.stem}.txt')
if logs_file.exists():
Expand Down Expand Up @@ -116,16 +122,16 @@ def run_script(script_path):
print(f'correct problems: {correct_prob_num}/{DEV_PROB_NUM if dev_set else PROB_NUM - DEV_PROB_NUM}')
print(f'correct steps: {len(correct_step)}/{DEV_STEP_NUM if dev_set else STEP_NUM}')

Path(f'{output_dir}/{Path(model_name)}').mkdir(parents=True, exist_ok=True)
Path(output_dir).mkdir(parents=True, exist_ok=True)

with open(f'{output_dir}/{model_name}.txt', 'w') as f:
with open(f'{output_dir}/{model_name}_{_get_background_dir(with_background)}.txt', 'w') as f:
f.write(f'correct problems: {correct_prob_num}/{DEV_PROB_NUM if dev_set else PROB_NUM - DEV_PROB_NUM}\n')
f.write(f'correct steps: {len(correct_step)}/{DEV_STEP_NUM if dev_set else STEP_NUM}\n\n')
f.write(f'duration: {test_time} seconds\n')
f.write('\ncorrect problems: ')
f.write(f'\n\n{[i + 1 for i in range(PROB_NUM) if correct_prob[i] == tot_prob[i] and tot_prob[i] != 0]}\n')

with open(f'{output_dir}/{model_name}.json', 'w', encoding='utf-8') as f:
with open(f'{output_dir}/{model_name}_{_get_background_dir(with_background)}.json', 'w', encoding='utf-8') as f:
json.dump(correct_dict, f, indent=4)

shutil.rmtree(tmp_dir)
Expand Down Expand Up @@ -166,6 +172,11 @@ def get_cli() -> argparse.ArgumentParser:
"--dev-set",
action='store_true',
help="Test dev set if enabled",
),
parser.add_argument(
"--with-background",
action="store_true",
help="Include problem background if enabled",
)
return parser

Expand All @@ -175,9 +186,13 @@ def main(model: str,
log_dir: Path,
output_dir: Path,
jsonl_path: Path,
dev_set: bool
dev_set: bool,
with_background: bool
) -> None:
test_code(model, code_dir, log_dir, output_dir, jsonl_path, dev_set)
if not Path(H5PY_FILE).exists():
raise FileNotFoundError("Please download the numeric test results before testing generated code.")
model = Path(model).parts[-1]
test_code(model, code_dir, log_dir, output_dir, jsonl_path, dev_set, with_background)


if __name__ == "__main__":
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit 89fccc3

Please sign in to comment.