From f622544b18d9b56079976ef21c84368c378afc00 Mon Sep 17 00:00:00 2001 From: oilbeater Date: Mon, 7 Oct 2024 18:30:29 +0800 Subject: [PATCH] add fallback middleware Signed-off-by: oilbeater --- README.md | 2 ++ src/middlewares/fallback.ts | 34 ++++++++++++++++++++++++++++++++++ src/middlewares/index.ts | 1 + src/providers/azureOpenAI.ts | 5 +++-- 4 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 src/middlewares/fallback.ts diff --git a/README.md b/README.md index 1ea5b23..b653dd8 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +# Malacca + ![Malacca Logo](./docs/malacca.jpg) **Malacca** is an open-source AI gateway designed to become the central hub in the world of AI. diff --git a/src/middlewares/fallback.ts b/src/middlewares/fallback.ts new file mode 100644 index 0000000..17cdf34 --- /dev/null +++ b/src/middlewares/fallback.ts @@ -0,0 +1,34 @@ +import { Context, Next } from 'hono'; +import { AppContext } from '.'; + +export const fallbackMiddleware = async (c: Context, next: Next) => { + try { + await next(); + + // Check if the response status is in the 5xx range + if (c.res && c.res.status >= 500 && c.res.status < 600) { + throw new Error(`Upstream returned ${c.res.status} status`); + } + } catch (error) { + try { + // Call CF Workers AI as a fallback + const fallbackResponse = await c.env.AI.run( + "@cf/meta/llama-3.1-8b-instruct", + await c.req.json() + ); + + let response: Response; + if (fallbackResponse instanceof ReadableStream) { + response = new Response(fallbackResponse); + } else { + response = new Response(fallbackResponse.response); + } + + // Add a header to indicate fallback was used + response.headers.set('X-Fallback-Used', 'true'); + return response; + } catch (fallbackError) { + return new Response('Both primary and fallback providers failed', { status: 500 }); + } + } +}; diff --git a/src/middlewares/index.ts b/src/middlewares/index.ts index 32ac49e..0a63565 100644 --- a/src/middlewares/index.ts +++ b/src/middlewares/index.ts @@ -7,6 +7,7 @@ export { loggingMiddleware } from './logging'; export { virtualKeyMiddleware } from './virtualKey'; export { rateLimiterMiddleware } from './rateLimiter'; export { guardMiddleware } from './guard'; +export { fallbackMiddleware } from './fallback'; export interface AppContext { Bindings: Env, Variables: { diff --git a/src/providers/azureOpenAI.ts b/src/providers/azureOpenAI.ts index ec3ed3d..e002daf 100644 --- a/src/providers/azureOpenAI.ts +++ b/src/providers/azureOpenAI.ts @@ -7,7 +7,8 @@ import { loggingMiddleware, virtualKeyMiddleware, rateLimiterMiddleware, - guardMiddleware + guardMiddleware, + fallbackMiddleware } from '../middlewares'; const BasePath = '/azure-openai/:resource_name/deployments/:deployment_name'; @@ -22,7 +23,7 @@ const initMiddleware = async (c: Context, next: Next) => { }; -azureOpenAIRoute.use(initMiddleware, metricsMiddleware, loggingMiddleware, bufferMiddleware, virtualKeyMiddleware, rateLimiterMiddleware, guardMiddleware, cacheMiddleware); +azureOpenAIRoute.use(initMiddleware, metricsMiddleware, loggingMiddleware, bufferMiddleware, virtualKeyMiddleware, rateLimiterMiddleware, guardMiddleware, cacheMiddleware, fallbackMiddleware); azureOpenAIRoute.post('/*', async (c: Context) => { return azureOpenAIProvider.handleRequest(c);