Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update openai wrapper to work with tiktoken interface and newest openai version #794

Merged
merged 22 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions llmfoundry/models/inference_api_wrapper/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ def __init__(self, model_cfg: Dict, tokenizer: AutoTokenizer):

def get_metrics(self, is_train: bool = False):
if is_train:
raise NotImplementedError(
'You cannot use inference wrappers for training')
metrics = None
else:
metrics = self.eval_metrics

Expand All @@ -55,6 +54,7 @@ def rebatch(self, batch: Batch):
return batch

def eval_forward(self, batch: Batch, outputs: Optional[Any] = None):
padding_tok = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id else self.tokenizer.eos_token_id
# If the batch mode is generate, we will generate a requested number of tokens using the underlying
# model's generate function. Extra generation kwargs can be passed in via the batch. Strings will
# be returned from eval_forward
Expand All @@ -80,8 +80,7 @@ def eval_forward(self, batch: Batch, outputs: Optional[Any] = None):
[output_logits,
next_logit_tensor.reshape(1, -1)])
padding = torch.nn.functional.one_hot(
torch.full((seqlen - output_logits.shape[0],),
self.tokenizer.pad_token_id),
torch.full((seqlen - output_logits.shape[0],), padding_tok),
num_classes=self.tokenizer.vocab_size)
output_logits = torch.cat([output_logits, padding])
output_logits_batch.append(output_logits)
Expand Down
76 changes: 37 additions & 39 deletions llmfoundry/models/inference_api_wrapper/openai_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import logging
import os
import random
from time import sleep
from typing import Any, Dict, List, Optional, Union

Expand All @@ -30,20 +31,23 @@ class OpenAIEvalInterface(InferenceAPIEvalWrapper):

def __init__(self, model_cfg: Dict, tokenizer: AutoTokenizer) -> None:
super().__init__(model_cfg, tokenizer)
assert os.getenv(
'OPENAI_API_KEY'
) is not None, 'No OpenAI API Key found. Ensure it is saved as an environmental variable called OPENAI_API_KEY.'
try:
import openai
except ImportError as e:
raise MissingConditionalImportError(
extra_deps_group='openai',
conda_package='openai',
conda_channel='conda-forge') from e
openai.api_key = os.getenv('OPENAI_API_KEY')
self.client = openai.OpenAI()
self.model_name = model_cfg['version']

def generate_completion(self, prompt: str, num_tokens: int):
raise NotImplementedError()

def process_result(self, completion: Optional[dict]):
def process_result(self, completion): # pyright: ignore
bmosaicml marked this conversation as resolved.
Show resolved Hide resolved
raise NotImplementedError()

def get_next_token_logit_tensor(self, prompt: str, num_tokens: int = 1):
Expand All @@ -52,26 +56,30 @@ def get_next_token_logit_tensor(self, prompt: str, num_tokens: int = 1):

def try_generate_completion(self, prompt: str, num_tokens: int):
try:
from openai.error import RateLimitError
from openai import RateLimitError
except ImportError as e:
raise MissingConditionalImportError(
extra_deps_group='openai',
conda_package='openai',
conda_channel='conda-forge') from e
tries = 0
completion = None
delay = 1
while tries < MAX_RETRIES:
tries += 1
try:

completion = self.generate_completion(prompt, num_tokens)
break
except RateLimitError as e:
if 'You exceeded your current quota' in str(e._message):
if 'You exceeded your current quota' in str(
e._message): # pyright: ignore
raise e
sleep(60)
delay *= 2 * (1 + random.random())
sleep(delay)
continue
except Exception:
except Exception as e:
print(f'Found Exception: {e}')
# TODO: Why continue on unspecified Exception?
bmosaicml marked this conversation as resolved.
Show resolved Hide resolved
continue
return completion

Expand All @@ -80,23 +88,23 @@ class OpenAIChatAPIEvalWrapper(OpenAIEvalInterface):

def __init__(self, model_cfg: Dict, tokenizer: AutoTokenizer) -> None:
super().__init__(model_cfg, tokenizer)
try:
import openai
except ImportError as e:
raise MissingConditionalImportError(
extra_deps_group='openai',
conda_package='openai',
conda_channel='conda-forge') from e

self.generate_completion = lambda prompt, num_tokens: openai.ChatCompletion.create(
self.model_name,
self.generate_completion = lambda prompt, num_tokens: self.client.chat.completions.create(
model=self.model_name,
messages=[{
'role':
'system',
'content':
model_cfg.get('sytsem_role_prompt',
'Please complete the following text: ')
}, {
'role': 'user',
'content': prompt
}],
max_tokens=num_tokens,
temperature=0.0)

# TODO: Do we still need retokenize, rebatch, and eval_forward?
bmosaicml marked this conversation as resolved.
Show resolved Hide resolved
def retokenize(self, tokens: List[int], cont_idxs: List[int]):
"""Chat API will never respond with a word-initial space.

Expand Down Expand Up @@ -162,6 +170,7 @@ def eval_forward(self, batch: Batch, outputs: Optional[Any] = None):
# than what the continuation would expect.
# Get around this issue by retokenizing the batch to remove spacing from the continuation as well as
# decoding the whole continuation at once.
padding_tok = self.tokenizer.pad_token_id if self.tokenizer.pad_token_id else self.tokenizer.eos_token_id
output_logits_batch = []
batch = self.rebatch(batch)
for tokens, cont_idxs in zip(batch['input_ids'],
Expand All @@ -182,20 +191,18 @@ def eval_forward(self, batch: Batch, outputs: Optional[Any] = None):
if next_logit_tensor is not None:
output_logits = torch.cat([output_logits, next_logit_tensor])
padding = torch.nn.functional.one_hot(
torch.full((seqlen - output_logits.shape[0],),
self.tokenizer.pad_token_id),
torch.full((seqlen - output_logits.shape[0],), padding_tok),
num_classes=self.tokenizer.vocab_size)
output_logits = torch.cat([output_logits, padding])
output_logits_batch.append(output_logits)

return torch.stack(output_logits_batch).to(batch['input_ids'].device)

def process_result(self, completion: Optional[dict]):
assert isinstance(completion, dict)
if len(completion['choices']) > 0:
def process_result(self, completion): # pyright: ignore
if len(completion.choices) > 0: # pyright: ignore
tensors = []
for t in self.tokenizer(completion['choices'][0]['message']
['content'])['input_ids']:
for t in self.tokenizer(completion.choices[0].message.content
)['input_ids']: # pyright: ignore
tensors.append(
self.tokenizer.construct_logit_tensor(
{self.tokenizer.decode([t]): 0.0}))
Expand All @@ -213,29 +220,20 @@ class OpenAICausalLMEvalWrapper(OpenAIEvalInterface):

def __init__(self, model_cfg: Dict, tokenizer: AutoTokenizer) -> None:
super().__init__(model_cfg, tokenizer)
try:
import openai
except ImportError as e:
raise MissingConditionalImportError(
extra_deps_group='openai',
conda_package='openai',
conda_channel='conda-forge') from e

self.generate_completion = lambda prompt, num_tokens: openai.Completion.create(
engine=self.model_name,
# TODO: this will be deprecated
self.generate_completion = lambda prompt, num_tokens: self.client.completions.create(
model=self.model_name,
prompt=prompt,
max_tokens=1,
max_tokens=num_tokens,
logprobs=5,
temperature=0.0)

def process_result(self, completion: Optional[dict]):
def process_result(self, completion): # pyright: ignore
if completion is None:
raise ValueError("Couldn't generate model output")

assert isinstance(completion, dict)
if len(completion['choices'][0]['logprobs']['top_logprobs']) > 0:
if len(completion.choices[0].logprobs.top_logprobs[0]) > 0:
tensor = self.tokenizer.construct_logit_tensor(
dict(completion['choices'][0]['logprobs']['top_logprobs'][0]))
dict(completion.choices[0].logprobs.top_logprobs[0]))
return tensor
else:
# the model sometimes stops early even though we are still requesting tokens!
Expand Down
27 changes: 12 additions & 15 deletions scripts/eval/yamls/openai_eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,24 @@ models:
name: openai_causal_lm
version: davinci
tokenizer:
name: openai
name: tiktoken
kwargs:
name: davinci
-
model_name: openai/gpt-4
model:
name: openai_chat
version: gpt-4
tokenizer:
name: openai
kwargs:
name: gpt-4
model_name: davinci
-
model_name: openai/gpt-3.5-turbo
model:
name: openai_chat
version: gpt-3.5-turbo
tokenizer:
name: openai
name: tiktoken
kwargs:
name: gpt-3.5-turbo
model_name: gpt-3.5-turbo

icl_tasks: 'eval/yamls/lm_tasks.yaml'
eval_gauntlet: 'eval/yamls/eval_gauntlet.yaml'
icl_tasks: # OpenAI API only works tasks having `icl_task_type: language_modeling`
-
label: jeopardy
dataset_uri: eval/local_data/world_knowledge/jeopardy_all.jsonl # ADD YOUR OWN DATASET URI
num_fewshot: [10]
icl_task_type: language_modeling
continuation_delimiter: "\nAnswer: " # this separates questions from answers
has_categories: true
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@
]

extra_deps['openai'] = [
'openai==0.27.8',
'openai==1.3.8',
'tiktoken==0.4.0',
]
extra_deps['all-cpu'] = set(
Expand Down
Loading
Loading