From bdac5e7575d04924d56dfc1c0ebb651ef82fc729 Mon Sep 17 00:00:00 2001 From: colin-sentry <161344340+colin-sentry@users.noreply.github.com> Date: Tue, 14 May 2024 14:21:52 -0400 Subject: [PATCH] feat(ai-monitoring): Add global config for AI model costs (#70823) Context is here: https://github.com/getsentry/relay/pull/3554#pullrequestreview-2045384821 --- requirements-base.txt | 2 +- requirements-dev-frozen.txt | 2 +- requirements-frozen.txt | 2 +- src/sentry/relay/config/ai_model_costs.py | 151 ++++++++++++++++++++++ src/sentry/relay/globalconfig.py | 3 + 5 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 src/sentry/relay/config/ai_model_costs.py diff --git a/requirements-base.txt b/requirements-base.txt index 0b431dbc82f36f..e622673adacde3 100644 --- a/requirements-base.txt +++ b/requirements-base.txt @@ -66,7 +66,7 @@ sentry-arroyo>=2.16.5 sentry-kafka-schemas>=0.1.81 sentry-ophio==0.2.7 sentry-redis-tools>=0.1.7 -sentry-relay>=0.8.60 +sentry-relay>=0.8.64 sentry-sdk==2.1.1 snuba-sdk>=2.0.33 simplejson>=3.17.6 diff --git a/requirements-dev-frozen.txt b/requirements-dev-frozen.txt index 47270c6d094104..c4eb69b6436fb4 100644 --- a/requirements-dev-frozen.txt +++ b/requirements-dev-frozen.txt @@ -183,7 +183,7 @@ sentry-forked-djangorestframework-stubs==3.15.0.post1 sentry-kafka-schemas==0.1.81 sentry-ophio==0.2.7 sentry-redis-tools==0.1.7 -sentry-relay==0.8.60 +sentry-relay==0.8.64 sentry-sdk==2.1.1 sentry-usage-accountant==0.0.10 simplejson==3.17.6 diff --git a/requirements-frozen.txt b/requirements-frozen.txt index dc97340ac8709e..6b0d9b17c8937d 100644 --- a/requirements-frozen.txt +++ b/requirements-frozen.txt @@ -123,7 +123,7 @@ sentry-arroyo==2.16.5 sentry-kafka-schemas==0.1.81 sentry-ophio==0.2.7 sentry-redis-tools==0.1.7 -sentry-relay==0.8.60 +sentry-relay==0.8.64 sentry-sdk==2.1.1 sentry-usage-accountant==0.0.10 simplejson==3.17.6 diff --git a/src/sentry/relay/config/ai_model_costs.py b/src/sentry/relay/config/ai_model_costs.py new file mode 100644 index 00000000000000..72baaf5f89895d --- /dev/null +++ b/src/sentry/relay/config/ai_model_costs.py @@ -0,0 +1,151 @@ +from typing import TypedDict + + +class AIModelCost(TypedDict): + modelId: str + forCompletion: bool + costPer1kTokens: float + + +class AIModelCosts(TypedDict): + version: int + costs: list[AIModelCost] + + +def ai_model_costs_config() -> AIModelCosts: + return { + "version": 1, + "costs": [ + { + "modelId": row[0], + "forCompletion": row[1], + "costPer1kTokens": row[2], + } + for row in [ + # GPT-4 input + ("gpt-4", False, 0.03), + ("gpt-4-0314", False, 0.03), + ("gpt-4-0613", False, 0.03), + ("gpt-4-32k", False, 0.06), + ("gpt-4-32k-0314", False, 0.06), + ("gpt-4-32k-0613", False, 0.06), + ("gpt-4-vision-preview", False, 0.01), + ("gpt-4-1106-preview", False, 0.01), + ("gpt-4-0125-preview", False, 0.01), + ("gpt-4-turbo-preview", False, 0.01), + ("gpt-4-turbo", False, 0.01), + ("gpt-4-turbo-2024-04-09", False, 0.01), + # GPT-4 output + ("gpt-4", True, 0.06), + ("gpt-4-0314", True, 0.06), + ("gpt-4-0613", True, 0.06), + ("gpt-4-32k", True, 0.12), + ("gpt-4-32k-0314", True, 0.12), + ("gpt-4-32k-0613", True, 0.12), + ("gpt-4-vision-preview", True, 0.03), + ("gpt-4-1106-preview", True, 0.03), + ("gpt-4-0125-preview", True, 0.03), + ("gpt-4-turbo-preview", True, 0.03), + ("gpt-4-turbo", True, 0.03), + ("gpt-4-turbo-2024-04-09", True, 0.03), + # GPT-3.5 input + ("gpt-3.5-turbo", False, 0.0005), + ("gpt-3.5-turbo-0125", False, 0.0005), + ("gpt-3.5-turbo-0301", False, 0.0015), + ("gpt-3.5-turbo-0613", False, 0.0015), + ("gpt-3.5-turbo-1106", False, 0.001), + ("gpt-3.5-turbo-instruct", False, 0.0015), + ("gpt-3.5-turbo-16k", False, 0.003), + ("gpt-3.5-turbo-16k-0613", False, 0.003), + # GPT-3.5 output + ("gpt-3.5-turbo", True, 0.0015), + ("gpt-3.5-turbo-0125", True, 0.0015), + ("gpt-3.5-turbo-0301", True, 0.002), + ("gpt-3.5-turbo-0613", True, 0.002), + ("gpt-3.5-turbo-1106", True, 0.002), + ("gpt-3.5-turbo-instruct", True, 0.002), + ("gpt-3.5-turbo-16k", True, 0.004), + ("gpt-3.5-turbo-16k-0613", True, 0.004), + # Azure GPT-35 input + ("gpt-35-turbo", False, 0.0015), # Azure OpenAI version of ChatGPT + ("gpt-35-turbo-0301", False, 0.0015), # Azure OpenAI version of ChatGPT + ("gpt-35-turbo-0613", False, 0.0015), + ("gpt-35-turbo-instruct", False, 0.0015), + ("gpt-35-turbo-16k", False, 0.003), + ("gpt-35-turbo-16k-0613", False, 0.003), + # Azure GPT-35 output + ("gpt-35-turbo", True, 0.002), # Azure OpenAI version of ChatGPT + ("gpt-35-turbo-0301", True, 0.002), # Azure OpenAI version of ChatGPT + ("gpt-35-turbo-0613", True, 0.002), + ("gpt-35-turbo-instruct", True, 0.002), + ("gpt-35-turbo-16k", True, 0.004), + ("gpt-35-turbo-16k-0613", True, 0.004), + # Other OpenAI models + ("text-ada-001", True, 0.0004), + ("text-ada-001", False, 0.0004), + ("ada", True, 0.0004), + ("ada", False, 0.0004), + ("text-babbage-001", True, 0.0005), + ("text-babbage-001", False, 0.0005), + ("babbage", True, 0.0005), + ("babbage", False, 0.0005), + ("text-curie-001", True, 0.002), + ("text-curie-001", False, 0.002), + ("curie", True, 0.002), + ("curie", False, 0.002), + ("text-davinci-003", True, 0.02), + ("text-davinci-003", False, 0.02), + ("text-davinci-002", True, 0.02), + ("text-davinci-002", False, 0.02), + ("code-davinci-002", True, 0.02), + ("code-davinci-002", False, 0.02), + # Fine-tuned OpenAI input + ("ft:babbage-002", False, 0.0016), + ("ft:davinci-002", False, 0.012), + ("ft:gpt-3.5-turbo-0613", False, 0.012), + ("ft:gpt-3.5-turbo-1106", False, 0.012), + # Fine-tuned OpenAI output + ("ft:babbage-002", True, 0.0016), + ("ft:davinci-002", True, 0.012), + ("ft:gpt-3.5-turbo-0613", True, 0.016), + ("ft:gpt-3.5-turbo-1106", True, 0.016), + # Azure OpenAI Fine-tuned input + ("babbage-002.ft-*", False, 0.0004), + ("davinci-002.ft-*", False, 0.002), + ("gpt-35-turbo-0613.ft-*", False, 0.0015), + # Azure OpenAI Fine-tuned output + ("babbage-002.ft-*", True, 0.0004), + ("davinci-002.ft-*", True, 0.002), + ("gpt-35-turbo-0613.ft-*", True, 0.002), + # Legacy OpenAI Fine-tuned models input + ("ada:ft-*", True, 0.0016), + ("babbage:ft-*", True, 0.0024), + ("curie:ft-*", True, 0.012), + ("davinci:ft-*", True, 0.12), + # Anthropic Claude 3 input + ("claude-3-haiku", False, 0.00025), + ("claude-3-sonnet", False, 0.003), + ("claude-3-opus", False, 0.015), + # Anthropic Claude 3 output + ("claude-3-haiku", True, 0.00125), + ("claude-3-sonnet", True, 0.015), + ("claude-3-opus", True, 0.075), + # Anthropic Claude 2 input + ("claude-2.*", False, 0.008), + ("claude-instant*", False, 0.0008), + # Anthropic Claude 2 output + ("claude-2.*", True, 0.024), + ("claude-instant*", True, 0.0024), + # Cohere command input + ("command", False, 0.001), + ("command-light", False, 0.0003), + ("command-r", False, 0.0005), + ("command-r-plus", False, 0.003), + # Cohere command output + ("command", True, 0.002), + ("command-light", True, 0.0006), + ("command-r", True, 0.0015), + ("command-r-plus", True, 0.015), + ] + ], + } diff --git a/src/sentry/relay/globalconfig.py b/src/sentry/relay/globalconfig.py index 759dca2d391845..7907180a776e1e 100644 --- a/src/sentry/relay/globalconfig.py +++ b/src/sentry/relay/globalconfig.py @@ -2,6 +2,7 @@ import sentry.options from sentry.relay.config import GenericFiltersConfig +from sentry.relay.config.ai_model_costs import AIModelCosts, ai_model_costs_config from sentry.relay.config.measurements import MeasurementsConfig, get_measurements_config from sentry.utils import metrics @@ -27,6 +28,7 @@ class GlobalConfig(TypedDict, total=False): measurements: MeasurementsConfig + aiModelCosts: AIModelCosts filters: GenericFiltersConfig | None options: dict[str, Any] @@ -44,6 +46,7 @@ def get_global_config(): global_config: GlobalConfig = { "measurements": get_measurements_config(), + "aiModelCosts": ai_model_costs_config(), } filters = get_global_generic_filters()