Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support litellm and bg setup #4

Merged
merged 5 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions eval/data/background_comment_template.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ RESPONSE GUIDELINES:
5. Ensure your response is in the format of ```python``` and includes the necessary background as a comment at the top.

Example:
```python
# Background: [Here, insert the necessary scientific knowledge required for the next step.]

[Insert the Python code here based on the provided function header and dependencies.]
```
130 changes: 65 additions & 65 deletions eval/data/problems_all.jsonl

Large diffs are not rendered by default.

35 changes: 25 additions & 10 deletions eval/scripts/gencode_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,27 @@

class Gencode:
def __init__(self, model: str, output_dir: Path,
prompt_dir: Path, temperature: float):
prompt_dir: Path, with_background: bool, temperature: float):
self.model = model
self.output_dir = output_dir
self.prompt_dir = prompt_dir
self.with_background = with_background
self.temperature = temperature
self.previous_llm_code = []

def save_prompt_with_steps(self, prob_data: dict, prompt: str, num_steps: int, tot_steps: int) -> None:
output_dir = Path(self.prompt_dir, self.model)
def _get_background_dir(self):
return "with_background" if self.with_background else "without_background"

def save_prompt_with_steps(self, prob_data: dict, prompt: str, num_steps: int) -> None:
output_dir = Path(self.prompt_dir, Path(self.model).parts[-1], self._get_background_dir())
output_dir.mkdir(parents=True, exist_ok=True)
output_file_path = output_dir / f"{prob_data['problem_id']}.{num_steps}.txt"
output_file_path.write_text(prompt, encoding="utf-8")

def save_response_with_steps(self, prob_data: dict, response: str, previous_code: str,
num_steps: int, model="gpt-4o",) -> None:
def save_response_with_steps(self, prob_data: dict, response: str,
previous_code: str, num_steps: int) -> None:
output_dir = (
self.output_dir / model
self.output_dir / Path(self.model).parts[-1] / self._get_background_dir()
)
output_dir.mkdir(parents=True, exist_ok=True)
prob_id = prob_data["problem_id"]
Expand Down Expand Up @@ -78,7 +82,7 @@ def generate_response_with_steps(
raise Exception(f'Generating {prob_id} step {num_steps} ahead of step {prev_step + 1}.')
prompt, previous_code = self.generate_prompt_with_steps(prob_data, num_steps, prompt_template)
if save:
self.save_prompt_with_steps(prob_data, prompt, num_steps, tot_steps)
self.save_prompt_with_steps(prob_data, prompt, num_steps)

model_kwargs = {}
if "claude" in model:
Expand All @@ -94,7 +98,7 @@ def generate_response_with_steps(
model_fct = get_model_function(model, **model_kwargs)
response_from_llm = model_fct(prompt)
self.previous_llm_code[num_steps - 1] = extract_python_script(response_from_llm)
self.save_response_with_steps(prob_data, response_from_llm, previous_code, num_steps, model)
self.save_response_with_steps(prob_data, response_from_llm, previous_code, num_steps)

@staticmethod
def process_problem_code(prob_data: dict, num_steps: int) -> str:
Expand All @@ -109,11 +113,16 @@ def process_problem_steps(self, problem_data: dict, num_steps: int):
next_step = []
previous_code = []
for i in range(num_steps - 1):
output_lines.append(problem_data["sub_steps"][i]["step_description_prompt"] + '\n' +
problem_data["sub_steps"][i]["step_background"] if self.with_background
else problem_data["sub_steps"][i]["step_description_prompt"])
output_lines.append(self.previous_llm_code[i])
previous_code.append(self.previous_llm_code[i])
output_lines.append("------")

next_step.append(problem_data["sub_steps"][num_steps - 1]["step_description_prompt"])
next_step.append(problem_data["sub_steps"][num_steps - 1]["step_description_prompt"] + '\n' +
problem_data["sub_steps"][num_steps - 1]["step_background"] if self.with_background
else problem_data["sub_steps"][num_steps - 1]["step_description_prompt"])
next_step.append(self.process_problem_code(problem_data, num_steps))
output_str = "\n\n".join(output_lines[:-1]) # Remove the last "------"
next_step_str = "\n\n".join(next_step)
Expand Down Expand Up @@ -160,6 +169,11 @@ def get_cli() -> argparse.ArgumentParser:
default=Path("eval_results", "prompt"),
help="Prompt directory",
)
parser.add_argument(
"--with-background",
action="store_true",
help="Include problem background if enabled",
)
parser.add_argument(
"--temperature",
type=float,
Expand All @@ -173,11 +187,12 @@ def main(model: str,
output_dir: Path,
input_path: Path,
prompt_dir: Path,
with_background: bool,
temperature: float
) -> None:
gcode = Gencode(
model=model, output_dir=output_dir,
prompt_dir=prompt_dir, temperature=temperature
prompt_dir=prompt_dir, with_background=with_background, temperature=temperature
)
data = read_from_jsonl(input_path)
for problem in data:
Expand Down
31 changes: 23 additions & 8 deletions eval/scripts/test_generated_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import argparse

from scicode.parse.parse import H5PY_FILE
from scicode.parse.parse import read_from_jsonl


Expand All @@ -15,7 +16,12 @@
DEV_STEP_NUM = 50


def test_code(model_name, code_dir, log_dir, output_dir, jsonl_path, dev_set=False):
def _get_background_dir(with_background):
return "with_background" if with_background else "without_background"


def test_code(model_name, code_dir, log_dir, output_dir,
jsonl_path, dev_set=False, with_background=False):

jsonl_data = read_from_jsonl(jsonl_path)
json_dct = {}
Expand All @@ -26,7 +32,7 @@ def test_code(model_name, code_dir, log_dir, output_dir, jsonl_path, dev_set=Fal
json_idx[prob_data['problem_id']] = jsonl_data.index(prob_data)
start_time = time.time()

code_dir_ = Path(code_dir, model_name)
code_dir_ = Path(code_dir, model_name, _get_background_dir(with_background))
tmp_dir = Path(f'tmp_{start_time}')

tmp_dir.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -82,7 +88,7 @@ def run_script(script_path):
prob_id = func_id.split('.')[0]
print(f'Testing function {func_id} ...')
tot_prob[int(prob_id) - 1] += 1
logs_dir_ = Path(log_dir, model_name)
logs_dir_ = Path(log_dir, model_name, _get_background_dir(with_background))
logs_dir_.mkdir(parents=True, exist_ok=True)
logs_file = Path(logs_dir_, f'{file_path.stem}.txt')
if logs_file.exists():
Expand Down Expand Up @@ -116,16 +122,16 @@ def run_script(script_path):
print(f'correct problems: {correct_prob_num}/{DEV_PROB_NUM if dev_set else PROB_NUM - DEV_PROB_NUM}')
print(f'correct steps: {len(correct_step)}/{DEV_STEP_NUM if dev_set else STEP_NUM}')

output_dir.mkdir(parents=True, exist_ok=True)
Path(output_dir).mkdir(parents=True, exist_ok=True)

with open(f'{output_dir}/{model_name}.txt', 'w') as f:
with open(f'{output_dir}/{model_name}_{_get_background_dir(with_background)}.txt', 'w') as f:
f.write(f'correct problems: {correct_prob_num}/{DEV_PROB_NUM if dev_set else PROB_NUM - DEV_PROB_NUM}\n')
f.write(f'correct steps: {len(correct_step)}/{DEV_STEP_NUM if dev_set else STEP_NUM}\n\n')
f.write(f'duration: {test_time} seconds\n')
f.write('\ncorrect problems: ')
f.write(f'\n\n{[i + 1 for i in range(PROB_NUM) if correct_prob[i] == tot_prob[i] and tot_prob[i] != 0]}\n')

with open(f'{output_dir}/{model_name}.json', 'w', encoding='utf-8') as f:
with open(f'{output_dir}/{model_name}_{_get_background_dir(with_background)}.json', 'w', encoding='utf-8') as f:
json.dump(correct_dict, f, indent=4)

shutil.rmtree(tmp_dir)
Expand Down Expand Up @@ -166,6 +172,11 @@ def get_cli() -> argparse.ArgumentParser:
"--dev-set",
action='store_true',
help="Test dev set if enabled",
),
parser.add_argument(
"--with-background",
action="store_true",
help="Include problem background if enabled",
)
return parser

Expand All @@ -175,9 +186,13 @@ def main(model: str,
log_dir: Path,
output_dir: Path,
jsonl_path: Path,
dev_set: bool
dev_set: bool,
with_background: bool
) -> None:
test_code(model, code_dir, log_dir, output_dir, jsonl_path, dev_set)
if not Path(H5PY_FILE).exists():
raise FileNotFoundError("Please download the numeric test results before testing generated code.")
model = Path(model).parts[-1]
test_code(model, code_dir, log_dir, output_dir, jsonl_path, dev_set, with_background)


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies = [
"rich",
"pytest",
"pytest-cov",
"litellm",
# requirements for execution
"numpy",
"scipy",
Expand Down
40 changes: 38 additions & 2 deletions src/scicode/gen/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,43 @@
import google.generativeai as genai
import config
import re
import os
import litellm
from litellm.utils import validate_environment as litellm_validate_environment

from scicode import keys_cfg_path
from scicode.utils.log import get_logger

logger = get_logger("models")


def get_config():
if not keys_cfg_path.exists():
raise FileNotFoundError(f"Config file not found: {keys_cfg_path}")
return config.Config(str(keys_cfg_path))

def generate_litellm_response(prompt: str, *, model: str, **kwargs) -> str:
"""Call the litellm api to generate a response"""
# litellm expects all keys as env variables
config = get_config()
for key, value in config.as_dict().items():
if key in os.environ and os.environ[key] != value:
logger.warning(f"Overwriting {key} from config with environment variable")
else:
os.environ[key] = value
# Let's validate that we have everythong for this model
env_validation = litellm_validate_environment(model)
if not env_validation.get("keys_in_environment") or env_validation.get("missing_keys", []):
msg = f"Environment validation for litellm failed for model {model}: {env_validation}"
raise ValueError(msg)
response = litellm.completion(
model=model,
messages = [
{"role": "user", "content": prompt},
],
**kwargs,
)
return response.choices[0].message.content

def generate_openai_response(prompt: str, *, model="gpt-4-turbo-2024-04-09",
temperature: float = 0) -> str:
Expand Down Expand Up @@ -87,7 +116,10 @@ def generate_google_response(prompt: str, *, model: str = "gemini-pro",

def get_model_function(model: str, **kwargs):
"""Return the appropriate function to generate a response based on the model"""
if "gpt" in model:
if model.startswith("litellm/"):
model = model.removeprefix("litellm/")
fct = generate_litellm_response
elif "gpt" in model:
fct = generate_openai_response
elif "claude" in model:
fct = generate_anthropic_response
Expand All @@ -107,7 +139,11 @@ def generate_dummy_response(prompt: str, **kwargs) -> str:

def extract_python_script(response: str):
# We will extract the python script from the response
python_script = response.split("```python")[1].split("```")[0]
if '```' in response:
python_script = response.split("```python")[1].split("```")[0] if '```python' in response else response.split('```')[1].split('```')[0]
else:
print("Fail to extract python code from specific format.")
python_script = response
python_script = re.sub(r'^\s*(import .*|from .*\s+import\s+.*)', '', python_script, flags=re.MULTILINE)
return python_script

2 changes: 1 addition & 1 deletion tests/test_data/first_problem.jsonl

Large diffs are not rendered by default.

Loading