Skip to content

Commit

Permalink
Merge pull request #4 from YanxinLu/main
Browse files Browse the repository at this point in the history
support litellm and w/ background setup
  • Loading branch information
mtian8 authored Jul 24, 2024
2 parents 8ab4166 + e5ae4fd commit a722196
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 86 deletions.
2 changes: 2 additions & 0 deletions eval/data/background_comment_template.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ RESPONSE GUIDELINES:
5. Ensure your response is in the format of ```python``` and includes the necessary background as a comment at the top.

Example:
```python
# Background: [Here, insert the necessary scientific knowledge required for the next step.]

[Insert the Python code here based on the provided function header and dependencies.]
```
130 changes: 65 additions & 65 deletions eval/data/problems_all.jsonl

Large diffs are not rendered by default.

35 changes: 25 additions & 10 deletions eval/scripts/gencode_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,27 @@

class Gencode:
def __init__(self, model: str, output_dir: Path,
prompt_dir: Path, temperature: float):
prompt_dir: Path, with_background: bool, temperature: float):
self.model = model
self.output_dir = output_dir
self.prompt_dir = prompt_dir
self.with_background = with_background
self.temperature = temperature
self.previous_llm_code = []

def save_prompt_with_steps(self, prob_data: dict, prompt: str, num_steps: int, tot_steps: int) -> None:
output_dir = Path(self.prompt_dir, self.model)
def _get_background_dir(self):
return "with_background" if self.with_background else "without_background"

def save_prompt_with_steps(self, prob_data: dict, prompt: str, num_steps: int) -> None:
output_dir = Path(self.prompt_dir, Path(self.model).parts[-1], self._get_background_dir())
output_dir.mkdir(parents=True, exist_ok=True)
output_file_path = output_dir / f"{prob_data['problem_id']}.{num_steps}.txt"
output_file_path.write_text(prompt, encoding="utf-8")

def save_response_with_steps(self, prob_data: dict, response: str, previous_code: str,
num_steps: int, model="gpt-4o",) -> None:
def save_response_with_steps(self, prob_data: dict, response: str,
previous_code: str, num_steps: int) -> None:
output_dir = (
self.output_dir / model
self.output_dir / Path(self.model).parts[-1] / self._get_background_dir()
)
output_dir.mkdir(parents=True, exist_ok=True)
prob_id = prob_data["problem_id"]
Expand Down Expand Up @@ -78,7 +82,7 @@ def generate_response_with_steps(
raise Exception(f'Generating {prob_id} step {num_steps} ahead of step {prev_step + 1}.')
prompt, previous_code = self.generate_prompt_with_steps(prob_data, num_steps, prompt_template)
if save:
self.save_prompt_with_steps(prob_data, prompt, num_steps, tot_steps)
self.save_prompt_with_steps(prob_data, prompt, num_steps)

model_kwargs = {}
if "claude" in model:
Expand All @@ -94,7 +98,7 @@ def generate_response_with_steps(
model_fct = get_model_function(model, **model_kwargs)
response_from_llm = model_fct(prompt)
self.previous_llm_code[num_steps - 1] = extract_python_script(response_from_llm)
self.save_response_with_steps(prob_data, response_from_llm, previous_code, num_steps, model)
self.save_response_with_steps(prob_data, response_from_llm, previous_code, num_steps)

@staticmethod
def process_problem_code(prob_data: dict, num_steps: int) -> str:
Expand All @@ -109,11 +113,16 @@ def process_problem_steps(self, problem_data: dict, num_steps: int):
next_step = []
previous_code = []
for i in range(num_steps - 1):
output_lines.append(problem_data["sub_steps"][i]["step_description_prompt"] + '\n' +
problem_data["sub_steps"][i]["step_background"] if self.with_background
else problem_data["sub_steps"][i]["step_description_prompt"])
output_lines.append(self.previous_llm_code[i])
previous_code.append(self.previous_llm_code[i])
output_lines.append("------")

next_step.append(problem_data["sub_steps"][num_steps - 1]["step_description_prompt"])
next_step.append(problem_data["sub_steps"][num_steps - 1]["step_description_prompt"] + '\n' +
problem_data["sub_steps"][num_steps - 1]["step_background"] if self.with_background
else problem_data["sub_steps"][num_steps - 1]["step_description_prompt"])
next_step.append(self.process_problem_code(problem_data, num_steps))
output_str = "\n\n".join(output_lines[:-1]) # Remove the last "------"
next_step_str = "\n\n".join(next_step)
Expand Down Expand Up @@ -160,6 +169,11 @@ def get_cli() -> argparse.ArgumentParser:
default=Path("eval_results", "prompt"),
help="Prompt directory",
)
parser.add_argument(
"--with-background",
action="store_true",
help="Include problem background if enabled",
)
parser.add_argument(
"--temperature",
type=float,
Expand All @@ -173,11 +187,12 @@ def main(model: str,
output_dir: Path,
input_path: Path,
prompt_dir: Path,
with_background: bool,
temperature: float
) -> None:
gcode = Gencode(
model=model, output_dir=output_dir,
prompt_dir=prompt_dir, temperature=temperature
prompt_dir=prompt_dir, with_background=with_background, temperature=temperature
)
data = read_from_jsonl(input_path)
for problem in data:
Expand Down
31 changes: 23 additions & 8 deletions eval/scripts/test_generated_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import argparse

from scicode.parse.parse import H5PY_FILE
from scicode.parse.parse import read_from_jsonl


Expand All @@ -15,7 +16,12 @@
DEV_STEP_NUM = 50


def test_code(model_name, code_dir, log_dir, output_dir, jsonl_path, dev_set=False):
def _get_background_dir(with_background):
return "with_background" if with_background else "without_background"


def test_code(model_name, code_dir, log_dir, output_dir,
jsonl_path, dev_set=False, with_background=False):

jsonl_data = read_from_jsonl(jsonl_path)
json_dct = {}
Expand All @@ -26,7 +32,7 @@ def test_code(model_name, code_dir, log_dir, output_dir, jsonl_path, dev_set=Fal
json_idx[prob_data['problem_id']] = jsonl_data.index(prob_data)
start_time = time.time()

code_dir_ = Path(code_dir, model_name)
code_dir_ = Path(code_dir, model_name, _get_background_dir(with_background))
tmp_dir = Path(f'tmp_{start_time}')

tmp_dir.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -82,7 +88,7 @@ def run_script(script_path):
prob_id = func_id.split('.')[0]
print(f'Testing function {func_id} ...')
tot_prob[int(prob_id) - 1] += 1
logs_dir_ = Path(log_dir, model_name)
logs_dir_ = Path(log_dir, model_name, _get_background_dir(with_background))
logs_dir_.mkdir(parents=True, exist_ok=True)
logs_file = Path(logs_dir_, f'{file_path.stem}.txt')
if logs_file.exists():
Expand Down Expand Up @@ -116,16 +122,16 @@ def run_script(script_path):
print(f'correct problems: {correct_prob_num}/{DEV_PROB_NUM if dev_set else PROB_NUM - DEV_PROB_NUM}')
print(f'correct steps: {len(correct_step)}/{DEV_STEP_NUM if dev_set else STEP_NUM}')

output_dir.mkdir(parents=True, exist_ok=True)
Path(output_dir).mkdir(parents=True, exist_ok=True)

with open(f'{output_dir}/{model_name}.txt', 'w') as f:
with open(f'{output_dir}/{model_name}_{_get_background_dir(with_background)}.txt', 'w') as f:
f.write(f'correct problems: {correct_prob_num}/{DEV_PROB_NUM if dev_set else PROB_NUM - DEV_PROB_NUM}\n')
f.write(f'correct steps: {len(correct_step)}/{DEV_STEP_NUM if dev_set else STEP_NUM}\n\n')
f.write(f'duration: {test_time} seconds\n')
f.write('\ncorrect problems: ')
f.write(f'\n\n{[i + 1 for i in range(PROB_NUM) if correct_prob[i] == tot_prob[i] and tot_prob[i] != 0]}\n')

with open(f'{output_dir}/{model_name}.json', 'w', encoding='utf-8') as f:
with open(f'{output_dir}/{model_name}_{_get_background_dir(with_background)}.json', 'w', encoding='utf-8') as f:
json.dump(correct_dict, f, indent=4)

shutil.rmtree(tmp_dir)
Expand Down Expand Up @@ -166,6 +172,11 @@ def get_cli() -> argparse.ArgumentParser:
"--dev-set",
action='store_true',
help="Test dev set if enabled",
),
parser.add_argument(
"--with-background",
action="store_true",
help="Include problem background if enabled",
)
return parser

Expand All @@ -175,9 +186,13 @@ def main(model: str,
log_dir: Path,
output_dir: Path,
jsonl_path: Path,
dev_set: bool
dev_set: bool,
with_background: bool
) -> None:
test_code(model, code_dir, log_dir, output_dir, jsonl_path, dev_set)
if not Path(H5PY_FILE).exists():
raise FileNotFoundError("Please download the numeric test results before testing generated code.")
model = Path(model).parts[-1]
test_code(model, code_dir, log_dir, output_dir, jsonl_path, dev_set, with_background)


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies = [
"rich",
"pytest",
"pytest-cov",
"litellm",
# requirements for execution
"numpy",
"scipy",
Expand Down
40 changes: 38 additions & 2 deletions src/scicode/gen/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,43 @@
import google.generativeai as genai
import config
import re
import os
import litellm
from litellm.utils import validate_environment as litellm_validate_environment

from scicode import keys_cfg_path
from scicode.utils.log import get_logger

logger = get_logger("models")


def get_config():
if not keys_cfg_path.exists():
raise FileNotFoundError(f"Config file not found: {keys_cfg_path}")
return config.Config(str(keys_cfg_path))

def generate_litellm_response(prompt: str, *, model: str, **kwargs) -> str:
"""Call the litellm api to generate a response"""
# litellm expects all keys as env variables
config = get_config()
for key, value in config.as_dict().items():
if key in os.environ and os.environ[key] != value:
logger.warning(f"Overwriting {key} from config with environment variable")
else:
os.environ[key] = value
# Let's validate that we have everythong for this model
env_validation = litellm_validate_environment(model)
if not env_validation.get("keys_in_environment") or env_validation.get("missing_keys", []):
msg = f"Environment validation for litellm failed for model {model}: {env_validation}"
raise ValueError(msg)
response = litellm.completion(
model=model,
messages = [
{"role": "user", "content": prompt},
],
**kwargs,
)
return response.choices[0].message.content

def generate_openai_response(prompt: str, *, model="gpt-4-turbo-2024-04-09",
temperature: float = 0) -> str:
Expand Down Expand Up @@ -87,7 +116,10 @@ def generate_google_response(prompt: str, *, model: str = "gemini-pro",

def get_model_function(model: str, **kwargs):
"""Return the appropriate function to generate a response based on the model"""
if "gpt" in model:
if model.startswith("litellm/"):
model = model.removeprefix("litellm/")
fct = generate_litellm_response
elif "gpt" in model:
fct = generate_openai_response
elif "claude" in model:
fct = generate_anthropic_response
Expand All @@ -107,7 +139,11 @@ def generate_dummy_response(prompt: str, **kwargs) -> str:

def extract_python_script(response: str):
# We will extract the python script from the response
python_script = response.split("```python")[1].split("```")[0]
if '```' in response:
python_script = response.split("```python")[1].split("```")[0] if '```python' in response else response.split('```')[1].split('```')[0]
else:
print("Fail to extract python code from specific format.")
python_script = response
python_script = re.sub(r'^\s*(import .*|from .*\s+import\s+.*)', '', python_script, flags=re.MULTILINE)
return python_script

2 changes: 1 addition & 1 deletion tests/test_data/first_problem.jsonl

Large diffs are not rendered by default.

0 comments on commit a722196

Please sign in to comment.