Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ai-monitoring): Add global config for AI model costs #70823

Merged
merged 4 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements-base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ sentry-arroyo>=2.16.5
sentry-kafka-schemas>=0.1.81
sentry-ophio==0.2.7
sentry-redis-tools>=0.1.7
sentry-relay>=0.8.60
sentry-relay>=0.8.64
sentry-sdk==2.1.1
snuba-sdk>=2.0.33
simplejson>=3.17.6
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev-frozen.txt
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ sentry-forked-djangorestframework-stubs==3.15.0.post1
sentry-kafka-schemas==0.1.81
sentry-ophio==0.2.7
sentry-redis-tools==0.1.7
sentry-relay==0.8.60
sentry-relay==0.8.64
sentry-sdk==2.1.1
sentry-usage-accountant==0.0.10
simplejson==3.17.6
Expand Down
2 changes: 1 addition & 1 deletion requirements-frozen.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ sentry-arroyo==2.16.5
sentry-kafka-schemas==0.1.81
sentry-ophio==0.2.7
sentry-redis-tools==0.1.7
sentry-relay==0.8.60
sentry-relay==0.8.64
sentry-sdk==2.1.1
sentry-usage-accountant==0.0.10
simplejson==3.17.6
Expand Down
151 changes: 151 additions & 0 deletions src/sentry/relay/config/ai_model_costs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from typing import TypedDict


class AIModelCost(TypedDict):
modelId: str
forCompletion: bool
costPer1kTokens: float


class AIModelCosts(TypedDict):
version: int
costs: list[AIModelCost]


def ai_model_costs_config() -> AIModelCosts:
return {
"version": 1,
"costs": [
{
"modelId": row[0],
"forCompletion": row[1],
"costPer1kTokens": row[2],
}
for row in [
# GPT-4 input
("gpt-4", False, 0.03),
("gpt-4-0314", False, 0.03),
("gpt-4-0613", False, 0.03),
("gpt-4-32k", False, 0.06),
("gpt-4-32k-0314", False, 0.06),
("gpt-4-32k-0613", False, 0.06),
("gpt-4-vision-preview", False, 0.01),
("gpt-4-1106-preview", False, 0.01),
("gpt-4-0125-preview", False, 0.01),
("gpt-4-turbo-preview", False, 0.01),
("gpt-4-turbo", False, 0.01),
("gpt-4-turbo-2024-04-09", False, 0.01),
# GPT-4 output
("gpt-4", True, 0.06),
("gpt-4-0314", True, 0.06),
("gpt-4-0613", True, 0.06),
("gpt-4-32k", True, 0.12),
("gpt-4-32k-0314", True, 0.12),
("gpt-4-32k-0613", True, 0.12),
("gpt-4-vision-preview", True, 0.03),
("gpt-4-1106-preview", True, 0.03),
("gpt-4-0125-preview", True, 0.03),
("gpt-4-turbo-preview", True, 0.03),
("gpt-4-turbo", True, 0.03),
("gpt-4-turbo-2024-04-09", True, 0.03),
# GPT-3.5 input
("gpt-3.5-turbo", False, 0.0005),
("gpt-3.5-turbo-0125", False, 0.0005),
("gpt-3.5-turbo-0301", False, 0.0015),
("gpt-3.5-turbo-0613", False, 0.0015),
("gpt-3.5-turbo-1106", False, 0.001),
("gpt-3.5-turbo-instruct", False, 0.0015),
("gpt-3.5-turbo-16k", False, 0.003),
("gpt-3.5-turbo-16k-0613", False, 0.003),
# GPT-3.5 output
("gpt-3.5-turbo", True, 0.0015),
("gpt-3.5-turbo-0125", True, 0.0015),
("gpt-3.5-turbo-0301", True, 0.002),
("gpt-3.5-turbo-0613", True, 0.002),
("gpt-3.5-turbo-1106", True, 0.002),
("gpt-3.5-turbo-instruct", True, 0.002),
("gpt-3.5-turbo-16k", True, 0.004),
("gpt-3.5-turbo-16k-0613", True, 0.004),
# Azure GPT-35 input
("gpt-35-turbo", False, 0.0015), # Azure OpenAI version of ChatGPT
("gpt-35-turbo-0301", False, 0.0015), # Azure OpenAI version of ChatGPT
("gpt-35-turbo-0613", False, 0.0015),
("gpt-35-turbo-instruct", False, 0.0015),
("gpt-35-turbo-16k", False, 0.003),
("gpt-35-turbo-16k-0613", False, 0.003),
# Azure GPT-35 output
("gpt-35-turbo", True, 0.002), # Azure OpenAI version of ChatGPT
("gpt-35-turbo-0301", True, 0.002), # Azure OpenAI version of ChatGPT
("gpt-35-turbo-0613", True, 0.002),
("gpt-35-turbo-instruct", True, 0.002),
("gpt-35-turbo-16k", True, 0.004),
("gpt-35-turbo-16k-0613", True, 0.004),
# Other OpenAI models
("text-ada-001", True, 0.0004),
("text-ada-001", False, 0.0004),
("ada", True, 0.0004),
("ada", False, 0.0004),
("text-babbage-001", True, 0.0005),
("text-babbage-001", False, 0.0005),
("babbage", True, 0.0005),
("babbage", False, 0.0005),
("text-curie-001", True, 0.002),
("text-curie-001", False, 0.002),
("curie", True, 0.002),
("curie", False, 0.002),
("text-davinci-003", True, 0.02),
("text-davinci-003", False, 0.02),
("text-davinci-002", True, 0.02),
("text-davinci-002", False, 0.02),
("code-davinci-002", True, 0.02),
("code-davinci-002", False, 0.02),
# Fine-tuned OpenAI input
("ft:babbage-002", False, 0.0016),
("ft:davinci-002", False, 0.012),
("ft:gpt-3.5-turbo-0613", False, 0.012),
("ft:gpt-3.5-turbo-1106", False, 0.012),
# Fine-tuned OpenAI output
("ft:babbage-002", True, 0.0016),
("ft:davinci-002", True, 0.012),
("ft:gpt-3.5-turbo-0613", True, 0.016),
("ft:gpt-3.5-turbo-1106", True, 0.016),
# Azure OpenAI Fine-tuned input
("babbage-002.ft-*", False, 0.0004),
("davinci-002.ft-*", False, 0.002),
("gpt-35-turbo-0613.ft-*", False, 0.0015),
# Azure OpenAI Fine-tuned output
("babbage-002.ft-*", True, 0.0004),
("davinci-002.ft-*", True, 0.002),
("gpt-35-turbo-0613.ft-*", True, 0.002),
# Legacy OpenAI Fine-tuned models input
("ada:ft-*", True, 0.0016),
("babbage:ft-*", True, 0.0024),
("curie:ft-*", True, 0.012),
("davinci:ft-*", True, 0.12),
# Anthropic Claude 3 input
("claude-3-haiku", False, 0.00025),
("claude-3-sonnet", False, 0.003),
("claude-3-opus", False, 0.015),
# Anthropic Claude 3 output
("claude-3-haiku", True, 0.00125),
("claude-3-sonnet", True, 0.015),
("claude-3-opus", True, 0.075),
# Anthropic Claude 2 input
("claude-2.*", False, 0.008),
("claude-instant*", False, 0.0008),
# Anthropic Claude 2 output
("claude-2.*", True, 0.024),
("claude-instant*", True, 0.0024),
# Cohere command input
("command", False, 0.001),
("command-light", False, 0.0003),
("command-r", False, 0.0005),
("command-r-plus", False, 0.003),
# Cohere command output
("command", True, 0.002),
("command-light", True, 0.0006),
("command-r", True, 0.0015),
("command-r-plus", True, 0.015),
]
],
}
3 changes: 3 additions & 0 deletions src/sentry/relay/globalconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import sentry.options
from sentry.relay.config import GenericFiltersConfig
from sentry.relay.config.ai_model_costs import AIModelCosts, ai_model_costs_config
from sentry.relay.config.measurements import MeasurementsConfig, get_measurements_config
from sentry.utils import metrics

Expand All @@ -27,6 +28,7 @@

class GlobalConfig(TypedDict, total=False):
measurements: MeasurementsConfig
aiModelCosts: AIModelCosts
filters: GenericFiltersConfig | None
options: dict[str, Any]

Expand All @@ -44,6 +46,7 @@ def get_global_config():

global_config: GlobalConfig = {
"measurements": get_measurements_config(),
"aiModelCosts": ai_model_costs_config(),
}

filters = get_global_generic_filters()
Expand Down
Loading