feat: allow setting X-Base-URL header (unkeyed#1870)

* feat: allow setting X-Base-URL header * feat: allow setting X-Base-URL header * fix(docs): correct X-Min-Similarity value to 0.92 in settings.mdx
AkshayBandi027 · Jul 6, 2024 · 318e82c · 318e82c
1 parent 7ff939a
commit 318e82c
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 22 deletions.
diff --git a/apps/docs/mint.json b/apps/docs/mint.json
@@ -112,8 +112,8 @@
       "group": "Semantic cache",
       "pages": [
         "semantic-cache/introduction",
-        "semantic-cache/similarity-threshold",
-        "semantic-cache/why-semantic-caching"
+        "semantic-cache/why-semantic-caching",
+        "semantic-cache/settings"
       ]
     },
     {

diff --git a/apps/docs/semantic-cache/settings.mdx b/apps/docs/semantic-cache/settings.mdx
@@ -0,0 +1,41 @@
+---
+title: Settings
+description: Configure the behavior of the semantic cache
+---
+
+
+## Customizing the base URL
+
+By default, the semantic cache will forward requests to the OpenAI API at `https://api.openai.com/v1`.
+
+You can customize this, if you want to use a different LLM API or have a different base URL.
+
+To do so, set the `X-Base-Url` header when sending requests to the cache:
+
+```
+  const openai = new OpenAI({
+    apiKey: process.env.OPENAI_API_KEY,
+    baseURL: "https://<gateway>.llm.unkey.io",
+    defaultHeaders: {
+      'X-Base-Url': 'https://api.openai.com/v2'
+    }
+  });
+```
+
+## Similarity threshold
+
+By default, the semantic cache will return a HIT if a previous response is found with a similarity score of 0.9 or above.
+
+You can customize this, if you want to increase cache hit ratio and/or have a higher standard for returning cached responses.
+
+To do so, set the `X-Min-Similarity` header when sending requests to the cache:
+
+```
+  const openai = new OpenAI({
+    apiKey: process.env.OPENAI_API_KEY,
+    baseURL: "https://<gateway>.llm.unkey.io",
+    defaultHeaders: {
+      'X-Min-Similarity': 0.92
+    }
+  });
+```
diff --git a/apps/docs/semantic-cache/similarity-threshold.mdx b/apps/docs/semantic-cache/similarity-threshold.mdx
diff --git a/apps/semantic-cache/src/worker.ts b/apps/semantic-cache/src/worker.ts
@@ -29,6 +29,7 @@ app.all("*", async (c) => {
   const apiKey = bearer.replace("Bearer ", "");
   const openai = new OpenAI({
     apiKey,
+    baseURL: c.req.header("X-Base-Url"),
   });
   const request = (await c.req.json()) as OpenAI.Chat.Completions.ChatCompletionCreateParams;
   const { db, analytics } = c.get("services");