Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement - Improve SDK output format #1025

Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions agenta-cli/agenta/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@
)
from .sdk.utils.preinit import PreInitObject
from .sdk.agenta_init import Config, init
from .sdk.utils.helper.openai_cost import calculate_token_usage

config = PreInitObject("agenta.config", Config)
1 change: 1 addition & 0 deletions agenta-cli/agenta/sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@
FileInputURL,
)
from .agenta_init import Config, init
from .utils.helper.openai_cost import calculate_token_usage

config = PreInitObject("agenta.config", Config)
22 changes: 16 additions & 6 deletions agenta-cli/agenta/sdk/agenta_decorator.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
"""The code for the Agenta SDK"""
import os
import sys
import time
import inspect
import argparse
import traceback
import functools
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Any, Callable, Dict, Optional, Tuple, List
from typing import Any, Callable, Dict, Optional, Tuple, List, Union

from fastapi import Body, FastAPI, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware

import agenta
from .context import save_context
Expand All @@ -26,6 +27,7 @@
TextParam,
MessagesInput,
FileInputURL,
FuncResponse,
)

app = FastAPI()
Expand Down Expand Up @@ -90,15 +92,15 @@ async def wrapper_deployed(*args, **kwargs) -> Any:

update_function_signature(wrapper, func_signature, config_params, ingestible_files)
route = f"/{endpoint_name}"
app.post(route)(wrapper)
app.post(route, response_model=FuncResponse)(wrapper)

update_deployed_function_signature(
wrapper_deployed,
func_signature,
ingestible_files,
)
route_deployed = f"/{endpoint_name}_deployed"
app.post(route_deployed)(wrapper_deployed)
app.post(route_deployed, response_model=FuncResponse)(wrapper_deployed)
override_schema(
openapi_schema=app.openapi(),
func_name=func.__name__,
Expand Down Expand Up @@ -148,7 +150,9 @@ def ingest_files(
func_params[name] = ingest_file(func_params[name])


async def execute_function(func: Callable[..., Any], *args, **func_params) -> Any:
async def execute_function(
func: Callable[..., Any], *args, **func_params
) -> Union[Dict[str, Any], JSONResponse]:
"""Execute the function and handle any exceptions."""

try:
Expand All @@ -158,14 +162,20 @@ async def execute_function(func: Callable[..., Any], *args, **func_params) -> An
it awaits their execution.
"""
is_coroutine_function = inspect.iscoroutinefunction(func)
start_time = time.perf_counter()
if is_coroutine_function:
result = await func(*args, **func_params)
else:
result = func(*args, **func_params)
end_time = time.perf_counter()
latency = end_time - start_time

if isinstance(result, Context):
save_context(result)
return result
if isinstance(result, Dict):
return FuncResponse(**result, latency=round(latency, 4)).dict()
if isinstance(result, str):
return FuncResponse(message=result, latency=round(latency, 4)).dict()
except Exception as e:
return handle_exception(e)

Expand Down
15 changes: 14 additions & 1 deletion agenta-cli/agenta/sdk/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Extra, HttpUrl

Expand All @@ -10,6 +10,19 @@ def __init__(self, file_name: str, file_path: str):
self.file_path = file_path


class LLMTokenUsage(BaseModel):
completion_tokens: int
prompt_tokens: int
total_tokens: int


class FuncResponse(BaseModel):
message: str
usage: Optional[LLMTokenUsage]
cost: Optional[float]
latency: float


class DictInput(dict):
def __new__(cls, default_keys=None):
instance = super().__new__(cls, default_keys)
Expand Down
165 changes: 165 additions & 0 deletions agenta-cli/agenta/sdk/utils/helper/openai_cost.py
aakrem marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
MODEL_COST_PER_1K_TOKENS = {
aakrem marked this conversation as resolved.
Show resolved Hide resolved
# GPT-4 input
"gpt-4": 0.03,
"gpt-4-0314": 0.03,
"gpt-4-0613": 0.03,
"gpt-4-32k": 0.06,
"gpt-4-32k-0314": 0.06,
"gpt-4-32k-0613": 0.06,
"gpt-4-vision-preview": 0.01,
"gpt-4-1106-preview": 0.01,
# GPT-4 output
"gpt-4-completion": 0.06,
"gpt-4-0314-completion": 0.06,
"gpt-4-0613-completion": 0.06,
"gpt-4-32k-completion": 0.12,
"gpt-4-32k-0314-completion": 0.12,
"gpt-4-32k-0613-completion": 0.12,
"gpt-4-vision-preview-completion": 0.03,
"gpt-4-1106-preview-completion": 0.03,
# GPT-3.5 input
"gpt-3.5-turbo": 0.0015,
"gpt-3.5-turbo-0301": 0.0015,
"gpt-3.5-turbo-0613": 0.0015,
"gpt-3.5-turbo-1106": 0.001,
"gpt-3.5-turbo-instruct": 0.0015,
"gpt-3.5-turbo-16k": 0.003,
"gpt-3.5-turbo-16k-0613": 0.003,
# GPT-3.5 output
"gpt-3.5-turbo-completion": 0.002,
"gpt-3.5-turbo-0301-completion": 0.002,
"gpt-3.5-turbo-0613-completion": 0.002,
"gpt-3.5-turbo-1106-completion": 0.002,
"gpt-3.5-turbo-instruct-completion": 0.002,
"gpt-3.5-turbo-16k-completion": 0.004,
"gpt-3.5-turbo-16k-0613-completion": 0.004,
# Azure GPT-35 input
"gpt-35-turbo": 0.0015, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0301": 0.0015, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0613": 0.0015,
"gpt-35-turbo-instruct": 0.0015,
"gpt-35-turbo-16k": 0.003,
"gpt-35-turbo-16k-0613": 0.003,
# Azure GPT-35 output
"gpt-35-turbo-completion": 0.002, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0301-completion": 0.002, # Azure OpenAI version of ChatGPT
"gpt-35-turbo-0613-completion": 0.002,
"gpt-35-turbo-instruct-completion": 0.002,
"gpt-35-turbo-16k-completion": 0.004,
"gpt-35-turbo-16k-0613-completion": 0.004,
# Others
"text-ada-001": 0.0004,
"ada": 0.0004,
"text-babbage-001": 0.0005,
"babbage": 0.0005,
"text-curie-001": 0.002,
"curie": 0.002,
"text-davinci-003": 0.02,
"text-davinci-002": 0.02,
"code-davinci-002": 0.02,
# Fine Tuned input
"babbage-002-finetuned": 0.0016,
"davinci-002-finetuned": 0.012,
"gpt-3.5-turbo-0613-finetuned": 0.012,
# Fine Tuned output
"babbage-002-finetuned-completion": 0.0016,
"davinci-002-finetuned-completion": 0.012,
"gpt-3.5-turbo-0613-finetuned-completion": 0.016,
# Azure Fine Tuned input
"babbage-002-azure-finetuned": 0.0004,
"davinci-002-azure-finetuned": 0.002,
"gpt-35-turbo-0613-azure-finetuned": 0.0015,
# Azure Fine Tuned output
"babbage-002-azure-finetuned-completion": 0.0004,
"davinci-002-azure-finetuned-completion": 0.002,
"gpt-35-turbo-0613-azure-finetuned-completion": 0.002,
# Legacy fine-tuned models
"ada-finetuned-legacy": 0.0016,
"babbage-finetuned-legacy": 0.0024,
"curie-finetuned-legacy": 0.012,
"davinci-finetuned-legacy": 0.12,
}


def standardize_model_name(
model_name: str,
is_completion: bool = False,
) -> str:
"""
Standardize the model name to a format that can be used in the OpenAI API.

Args:
model_name: Model name to standardize.
is_completion: Whether the model is used for completion or not.
Defaults to False.

Returns:
Standardized model name.
"""

model_name = model_name.lower()
if ".ft-" in model_name:
model_name = model_name.split(".ft-")[0] + "-azure-finetuned"
if ":ft-" in model_name:
model_name = model_name.split(":")[0] + "-finetuned-legacy"
if "ft:" in model_name:
model_name = model_name.split(":")[1] + "-finetuned"
if is_completion and (
model_name.startswith("gpt-4")
or model_name.startswith("gpt-3.5")
or model_name.startswith("gpt-35")
or ("finetuned" in model_name and "legacy" not in model_name)
):
return model_name + "-completion"
else:
return model_name


def get_openai_token_cost_for_model(
model_name: str, num_tokens: int, is_completion: bool = False
) -> float:
"""
Get the cost in USD for a given model and number of tokens.

Args:
model_name: Name of the model
num_tokens: Number of tokens.
is_completion: Whether the model is used for completion or not.
Defaults to False.

Returns:
Cost in USD.
"""

model_name = standardize_model_name(model_name, is_completion=is_completion)
if model_name not in MODEL_COST_PER_1K_TOKENS:
raise ValueError(
f"Unknown model: {model_name}. Please provide a valid OpenAI model name."
"Known models are: " + ", ".join(MODEL_COST_PER_1K_TOKENS.keys())
)
return MODEL_COST_PER_1K_TOKENS[model_name] * (num_tokens / 1000)


def calculate_token_usage(model_name: str, token_usage: dict) -> float:
"""Calculates the total cost of using a language model based on the model name and token
usage.

Args:
model_name: The name of the model used to determine the cost per token.
token_usage: Contains information about the usage of tokens for a particular model.

Returns:
Total cost of using a model.
"""

completion_tokens = token_usage.get("completion_tokens", 0)
prompt_tokens = token_usage.get("prompt_tokens", 0)
model_name = standardize_model_name(model_name)
if model_name in MODEL_COST_PER_1K_TOKENS:
completion_cost = get_openai_token_cost_for_model(
model_name, completion_tokens, is_completion=True
)
prompt_cost = get_openai_token_cost_for_model(model_name, prompt_tokens)
total_cost = prompt_cost + completion_cost
return total_cost
return 0
41 changes: 41 additions & 0 deletions examples/async_chat_sdk_output_format/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import agenta as ag
from agenta import FloatParam, MessagesInput, MultipleChoiceParam
from openai import AsyncOpenAI


client = AsyncOpenAI()

SYSTEM_PROMPT = "You have expertise in offering technical ideas to startups."
CHAT_LLM_GPT = [
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-0301",
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
"gpt-4",
]

ag.init()
ag.config.default(
temperature=FloatParam(0.2),
model=MultipleChoiceParam("gpt-3.5-turbo", CHAT_LLM_GPT),
max_tokens=ag.IntParam(-1, -1, 4000),
prompt_system=ag.TextParam(SYSTEM_PROMPT),
)


@ag.entrypoint
async def chat(inputs: MessagesInput = MessagesInput()):
messages = [{"role": "system", "content": ag.config.prompt_system}] + inputs
max_tokens = ag.config.max_tokens if ag.config.max_tokens != -1 else None
chat_completion = await client.chat.completions.create(
model=ag.config.model,
messages=messages,
temperature=ag.config.temperature,
max_tokens=max_tokens,
)
token_usage = chat_completion.usage.dict()
return {
"message": chat_completion.choices[0].message.content,
**{"usage": token_usage},
"cost": ag.calculate_token_usage(ag.config.model, token_usage),
}
2 changes: 2 additions & 0 deletions examples/async_chat_sdk_output_format/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
agenta
openai