feat(ai-monitoring): Add global config for AI model costs (#70823)

Context is here: getsentry/relay#3554 (review)
getsentry · May 14, 2024 · bdac5e7 · bdac5e7
1 parent 1a25154
commit bdac5e7
Show file tree

Hide file tree

Showing 5 changed files with 157 additions and 3 deletions.
diff --git a/requirements-base.txt b/requirements-base.txt
@@ -66,7 +66,7 @@ sentry-arroyo>=2.16.5
 sentry-kafka-schemas>=0.1.81
 sentry-ophio==0.2.7
 sentry-redis-tools>=0.1.7
-sentry-relay>=0.8.60
+sentry-relay>=0.8.64
 sentry-sdk==2.1.1
 snuba-sdk>=2.0.33
 simplejson>=3.17.6

diff --git a/requirements-dev-frozen.txt b/requirements-dev-frozen.txt
@@ -183,7 +183,7 @@ sentry-forked-djangorestframework-stubs==3.15.0.post1
 sentry-kafka-schemas==0.1.81
 sentry-ophio==0.2.7
 sentry-redis-tools==0.1.7
-sentry-relay==0.8.60
+sentry-relay==0.8.64
 sentry-sdk==2.1.1
 sentry-usage-accountant==0.0.10
 simplejson==3.17.6

diff --git a/requirements-frozen.txt b/requirements-frozen.txt
@@ -123,7 +123,7 @@ sentry-arroyo==2.16.5
 sentry-kafka-schemas==0.1.81
 sentry-ophio==0.2.7
 sentry-redis-tools==0.1.7
-sentry-relay==0.8.60
+sentry-relay==0.8.64
 sentry-sdk==2.1.1
 sentry-usage-accountant==0.0.10
 simplejson==3.17.6

diff --git a/src/sentry/relay/config/ai_model_costs.py b/src/sentry/relay/config/ai_model_costs.py
@@ -0,0 +1,151 @@
+from typing import TypedDict
+
+
+class AIModelCost(TypedDict):
+    modelId: str
+    forCompletion: bool
+    costPer1kTokens: float
+
+
+class AIModelCosts(TypedDict):
+    version: int
+    costs: list[AIModelCost]
+
+
+def ai_model_costs_config() -> AIModelCosts:
+    return {
+        "version": 1,
+        "costs": [
+            {
+                "modelId": row[0],
+                "forCompletion": row[1],
+                "costPer1kTokens": row[2],
+            }
+            for row in [
+                # GPT-4 input
+                ("gpt-4", False, 0.03),
+                ("gpt-4-0314", False, 0.03),
+                ("gpt-4-0613", False, 0.03),
+                ("gpt-4-32k", False, 0.06),
+                ("gpt-4-32k-0314", False, 0.06),
+                ("gpt-4-32k-0613", False, 0.06),
+                ("gpt-4-vision-preview", False, 0.01),
+                ("gpt-4-1106-preview", False, 0.01),
+                ("gpt-4-0125-preview", False, 0.01),
+                ("gpt-4-turbo-preview", False, 0.01),
+                ("gpt-4-turbo", False, 0.01),
+                ("gpt-4-turbo-2024-04-09", False, 0.01),
+                # GPT-4 output
+                ("gpt-4", True, 0.06),
+                ("gpt-4-0314", True, 0.06),
+                ("gpt-4-0613", True, 0.06),
+                ("gpt-4-32k", True, 0.12),
+                ("gpt-4-32k-0314", True, 0.12),
+                ("gpt-4-32k-0613", True, 0.12),
+                ("gpt-4-vision-preview", True, 0.03),
+                ("gpt-4-1106-preview", True, 0.03),
+                ("gpt-4-0125-preview", True, 0.03),
+                ("gpt-4-turbo-preview", True, 0.03),
+                ("gpt-4-turbo", True, 0.03),
+                ("gpt-4-turbo-2024-04-09", True, 0.03),
+                # GPT-3.5 input
+                ("gpt-3.5-turbo", False, 0.0005),
+                ("gpt-3.5-turbo-0125", False, 0.0005),
+                ("gpt-3.5-turbo-0301", False, 0.0015),
+                ("gpt-3.5-turbo-0613", False, 0.0015),
+                ("gpt-3.5-turbo-1106", False, 0.001),
+                ("gpt-3.5-turbo-instruct", False, 0.0015),
+                ("gpt-3.5-turbo-16k", False, 0.003),
+                ("gpt-3.5-turbo-16k-0613", False, 0.003),
+                # GPT-3.5 output
+                ("gpt-3.5-turbo", True, 0.0015),
+                ("gpt-3.5-turbo-0125", True, 0.0015),
+                ("gpt-3.5-turbo-0301", True, 0.002),
+                ("gpt-3.5-turbo-0613", True, 0.002),
+                ("gpt-3.5-turbo-1106", True, 0.002),
+                ("gpt-3.5-turbo-instruct", True, 0.002),
+                ("gpt-3.5-turbo-16k", True, 0.004),
+                ("gpt-3.5-turbo-16k-0613", True, 0.004),
+                # Azure GPT-35 input
+                ("gpt-35-turbo", False, 0.0015),  # Azure OpenAI version of ChatGPT
+                ("gpt-35-turbo-0301", False, 0.0015),  # Azure OpenAI version of ChatGPT
+                ("gpt-35-turbo-0613", False, 0.0015),
+                ("gpt-35-turbo-instruct", False, 0.0015),
+                ("gpt-35-turbo-16k", False, 0.003),
+                ("gpt-35-turbo-16k-0613", False, 0.003),
+                # Azure GPT-35 output
+                ("gpt-35-turbo", True, 0.002),  # Azure OpenAI version of ChatGPT
+                ("gpt-35-turbo-0301", True, 0.002),  # Azure OpenAI version of ChatGPT
+                ("gpt-35-turbo-0613", True, 0.002),
+                ("gpt-35-turbo-instruct", True, 0.002),
+                ("gpt-35-turbo-16k", True, 0.004),
+                ("gpt-35-turbo-16k-0613", True, 0.004),
+                # Other OpenAI models
+                ("text-ada-001", True, 0.0004),
+                ("text-ada-001", False, 0.0004),
+                ("ada", True, 0.0004),
+                ("ada", False, 0.0004),
+                ("text-babbage-001", True, 0.0005),
+                ("text-babbage-001", False, 0.0005),
+                ("babbage", True, 0.0005),
+                ("babbage", False, 0.0005),
+                ("text-curie-001", True, 0.002),
+                ("text-curie-001", False, 0.002),
+                ("curie", True, 0.002),
+                ("curie", False, 0.002),
+                ("text-davinci-003", True, 0.02),
+                ("text-davinci-003", False, 0.02),
+                ("text-davinci-002", True, 0.02),
+                ("text-davinci-002", False, 0.02),
+                ("code-davinci-002", True, 0.02),
+                ("code-davinci-002", False, 0.02),
+                # Fine-tuned OpenAI input
+                ("ft:babbage-002", False, 0.0016),
+                ("ft:davinci-002", False, 0.012),
+                ("ft:gpt-3.5-turbo-0613", False, 0.012),
+                ("ft:gpt-3.5-turbo-1106", False, 0.012),
+                # Fine-tuned OpenAI output
+                ("ft:babbage-002", True, 0.0016),
+                ("ft:davinci-002", True, 0.012),
+                ("ft:gpt-3.5-turbo-0613", True, 0.016),
+                ("ft:gpt-3.5-turbo-1106", True, 0.016),
+                # Azure OpenAI Fine-tuned input
+                ("babbage-002.ft-*", False, 0.0004),
+                ("davinci-002.ft-*", False, 0.002),
+                ("gpt-35-turbo-0613.ft-*", False, 0.0015),
+                # Azure OpenAI Fine-tuned output
+                ("babbage-002.ft-*", True, 0.0004),
+                ("davinci-002.ft-*", True, 0.002),
+                ("gpt-35-turbo-0613.ft-*", True, 0.002),
+                # Legacy OpenAI Fine-tuned models input
+                ("ada:ft-*", True, 0.0016),
+                ("babbage:ft-*", True, 0.0024),
+                ("curie:ft-*", True, 0.012),
+                ("davinci:ft-*", True, 0.12),
+                # Anthropic Claude 3 input
+                ("claude-3-haiku", False, 0.00025),
+                ("claude-3-sonnet", False, 0.003),
+                ("claude-3-opus", False, 0.015),
+                # Anthropic Claude 3 output
+                ("claude-3-haiku", True, 0.00125),
+                ("claude-3-sonnet", True, 0.015),
+                ("claude-3-opus", True, 0.075),
+                # Anthropic Claude 2 input
+                ("claude-2.*", False, 0.008),
+                ("claude-instant*", False, 0.0008),
+                # Anthropic Claude 2 output
+                ("claude-2.*", True, 0.024),
+                ("claude-instant*", True, 0.0024),
+                # Cohere command input
+                ("command", False, 0.001),
+                ("command-light", False, 0.0003),
+                ("command-r", False, 0.0005),
+                ("command-r-plus", False, 0.003),
+                # Cohere command output
+                ("command", True, 0.002),
+                ("command-light", True, 0.0006),
+                ("command-r", True, 0.0015),
+                ("command-r-plus", True, 0.015),
+            ]
+        ],
+    }
diff --git a/src/sentry/relay/globalconfig.py b/src/sentry/relay/globalconfig.py
@@ -2,6 +2,7 @@
 
 import sentry.options
 from sentry.relay.config import GenericFiltersConfig
+from sentry.relay.config.ai_model_costs import AIModelCosts, ai_model_costs_config
 from sentry.relay.config.measurements import MeasurementsConfig, get_measurements_config
 from sentry.utils import metrics
 
@@ -27,6 +28,7 @@
 
 class GlobalConfig(TypedDict, total=False):
     measurements: MeasurementsConfig
+    aiModelCosts: AIModelCosts
     filters: GenericFiltersConfig | None
     options: dict[str, Any]
 
@@ -44,6 +46,7 @@ def get_global_config():
 
     global_config: GlobalConfig = {
         "measurements": get_measurements_config(),
+        "aiModelCosts": ai_model_costs_config(),
     }
 
     filters = get_global_generic_filters()