From 318e82cb87afc2ba37f02314d1002811685cb646 Mon Sep 17 00:00:00 2001 From: Andreas Thomas Date: Sat, 6 Jul 2024 12:37:56 +0200 Subject: [PATCH] feat: allow setting X-Base-URL header (#1870) * feat: allow setting X-Base-URL header * feat: allow setting X-Base-URL header * fix(docs): correct X-Min-Similarity value to 0.92 in settings.mdx --- apps/docs/mint.json | 4 +- apps/docs/semantic-cache/settings.mdx | 41 +++++++++++++++++++ .../semantic-cache/similarity-threshold.mdx | 20 --------- apps/semantic-cache/src/worker.ts | 1 + 4 files changed, 44 insertions(+), 22 deletions(-) create mode 100644 apps/docs/semantic-cache/settings.mdx delete mode 100644 apps/docs/semantic-cache/similarity-threshold.mdx diff --git a/apps/docs/mint.json b/apps/docs/mint.json index 22f7d73abc..964cbafd08 100644 --- a/apps/docs/mint.json +++ b/apps/docs/mint.json @@ -112,8 +112,8 @@ "group": "Semantic cache", "pages": [ "semantic-cache/introduction", - "semantic-cache/similarity-threshold", - "semantic-cache/why-semantic-caching" + "semantic-cache/why-semantic-caching", + "semantic-cache/settings" ] }, { diff --git a/apps/docs/semantic-cache/settings.mdx b/apps/docs/semantic-cache/settings.mdx new file mode 100644 index 0000000000..4f63a16719 --- /dev/null +++ b/apps/docs/semantic-cache/settings.mdx @@ -0,0 +1,41 @@ +--- +title: Settings +description: Configure the behavior of the semantic cache +--- + + +## Customizing the base URL + +By default, the semantic cache will forward requests to the OpenAI API at `https://api.openai.com/v1`. + +You can customize this, if you want to use a different LLM API or have a different base URL. + +To do so, set the `X-Base-Url` header when sending requests to the cache: + +``` + const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + baseURL: "https://.llm.unkey.io", + defaultHeaders: { + 'X-Base-Url': 'https://api.openai.com/v2' + } + }); +``` + +## Similarity threshold + +By default, the semantic cache will return a HIT if a previous response is found with a similarity score of 0.9 or above. + +You can customize this, if you want to increase cache hit ratio and/or have a higher standard for returning cached responses. + +To do so, set the `X-Min-Similarity` header when sending requests to the cache: + +``` + const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + baseURL: "https://.llm.unkey.io", + defaultHeaders: { + 'X-Min-Similarity': 0.92 + } + }); +``` \ No newline at end of file diff --git a/apps/docs/semantic-cache/similarity-threshold.mdx b/apps/docs/semantic-cache/similarity-threshold.mdx deleted file mode 100644 index 2a62a790f1..0000000000 --- a/apps/docs/semantic-cache/similarity-threshold.mdx +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: Setting similarity threshold -description: Configure the threshold for returning responses from the cache ---- - -By default, the semantic cache will return a HIT if a previous response is found with a similarity score of 0.9 or above. - -You can customize this, if you want to increase cache hit ratio and/or have a higher standard for returning cached responses. - -To do so, set the `X-Min-Similarity` header when sending requests to the cache: - -``` - const openai = new OpenAI({ - apiKey: process.env.OPENAI_API_KEY, - baseURL: "https://.llm.unkey.io", - defaultHeaders: { - 'X-Min-Similarity': 0.92x - } - }); -``` \ No newline at end of file diff --git a/apps/semantic-cache/src/worker.ts b/apps/semantic-cache/src/worker.ts index 1f72b6402e..a634b6c599 100644 --- a/apps/semantic-cache/src/worker.ts +++ b/apps/semantic-cache/src/worker.ts @@ -29,6 +29,7 @@ app.all("*", async (c) => { const apiKey = bearer.replace("Bearer ", ""); const openai = new OpenAI({ apiKey, + baseURL: c.req.header("X-Base-Url"), }); const request = (await c.req.json()) as OpenAI.Chat.Completions.ChatCompletionCreateParams; const { db, analytics } = c.get("services");