From a69071c401c702a1c5d79bc8cd83ec783b4e9d45 Mon Sep 17 00:00:00 2001 From: James Humphries Date: Mon, 2 Sep 2024 17:21:32 +0100 Subject: [PATCH] Move search and r2 caching up to the top level handler Signed-off-by: James Humphries --- search/worker/src/index.ts | 62 +++++++++++++++++-------- search/worker/src/query.ts | 90 +++++++++++++------------------------ search/worker/wrangler.toml | 5 +++ 3 files changed, 79 insertions(+), 78 deletions(-) diff --git a/search/worker/src/index.ts b/search/worker/src/index.ts index b7c9f59b..3d556036 100644 --- a/search/worker/src/index.ts +++ b/search/worker/src/index.ts @@ -7,22 +7,25 @@ async function getClient(databaseUrl: string): Promise { throw new Error('DATABASE_URL is required'); } + const now = performance.now(); const client = new Client(databaseUrl); await client.connect(); + console.log('Connected to database in', performance.now() - now, 'ms'); return client; } -function applyCorsHeaders(response: Response) { - response.headers.set('Access-Control-Allow-Origin', '*'); - response.headers.set('Access-Control-Allow-Methods', 'GET'); - return response; -} - async function fetchData(client: Client, queryParam: string, ctx: ExecutionContext): Promise { try { + const start = performance.now(); const results = await query(client, queryParam); - ctx.waitUntil(client.end()); - return Response.json(results); + const end = performance.now(); + console.log(`Query took ${end - start}ms`); + ctx.waitUntil(client.end()); // Don't block on closing the connection + return Response.json(results, { + headers: { + 'Cache-Control': 'public, max-age=300', // Cache for 5 mins + }, + }); } catch (error) { console.error('Error during fetch:', error); return new Response('An internal server error occurred', { status: 500 }); @@ -36,16 +39,18 @@ async function handleSearchRequest(request: Request, env: Env, ctx: ExecutionCon } const client = await getClient(env.DATABASE_URL); + console.log('Querying for:', validation.queryParam); const response = await fetchData(client, validation.queryParam, ctx); - - return applyCorsHeaders(response); + return response; } async function serveR2Object(request: Request, env: Env, objectKey: string) { - const cache = caches.default; - let response = await cache.match(request); - if (response) { - return applyCorsHeaders(new Response(response.body, response)); + if (!objectKey) { + return new Response('Not Found', { status: 404 }); + } + + if (!env.BUCKET) { + return new Response('Internal Server Error, bucket not found', { status: 500 }); } const object = await env.BUCKET.get(objectKey); @@ -53,14 +58,19 @@ async function serveR2Object(request: Request, env: Env, objectKey: string) { return new Response('Not Found', { status: 404 }); } - response = new Response(object.body, { + const response = new Response(object.body, { headers: { 'Content-Type': object.httpMetadata!.contentType || 'application/octet-stream', 'Cache-Control': 'public, max-age=3600', // Cache for 1 hour }, }); - await cache.put(request, response.clone()); - return applyCorsHeaders(response); + return response; +} + +function applyCorsHeaders(response: Response) { + response.headers.set('Access-Control-Allow-Origin', '*'); + response.headers.set('Access-Control-Allow-Methods', 'GET'); + return response; } export default { @@ -70,9 +80,18 @@ export default { return new Response('Method Not Allowed', { status: 405 }); } + const log = (message: string) => console.log(`[${request.method}]${url.pathname}${url.search} - ${message}`); + const url = new URL(request.url); + log('Request received'); + + const cache = caches.default; + let response = await cache.match(request); + if (response) { + log('Cache hit'); + return applyCorsHeaders(new Response(response.body, response)); + } - let response: Response; switch (url.pathname) { case '/search': response = await handleSearchRequest(request, env, ctx); @@ -86,6 +105,11 @@ export default { break; } - return response; + if (response.status === 200) { + log('Cache miss, storing response'); + ctx.waitUntil(cache.put(request, response.clone())); + } + + return applyCorsHeaders(response); }, }; diff --git a/search/worker/src/query.ts b/search/worker/src/query.ts index 0a7451c3..6ec23284 100644 --- a/search/worker/src/query.ts +++ b/search/worker/src/query.ts @@ -10,68 +10,40 @@ const searchQuery = ` WITH search_terms AS ( SELECT unnest(regexp_split_to_array($1, '[ /]+')) AS term ), -matched_entities AS ( +term_matches AS ( + SELECT e.*, + COUNT(*) FILTER (WHERE e.title ILIKE '%' || st.term || '%' + OR e.description ILIKE '%' || st.term || '%' + OR e.link_variables->>'name' ILIKE '%' || st.term || '%') AS term_match_count + FROM entities e + CROSS JOIN search_terms st + WHERE e.id ILIKE '%' || st.term || '%' + OR e.addr ILIKE '%' || st.term || '%' + OR e.title ILIKE '%' || st.term || '%' + OR e.description ILIKE '%' || st.term || '%' + OR e.link_variables->>'name' ILIKE '%' || st.term || '%' + GROUP BY e.id +), +ranked_entities AS ( SELECT *, - -- Count the number of search terms that match across title, description, and link_variables->>'name' - ( - SELECT COUNT(*) - FROM search_terms st - WHERE title ILIKE '%' || st.term || '%' - OR description ILIKE '%' || st.term || '%' - OR link_variables->>'name' ILIKE '%' || st.term || '%' - ) AS term_match_count - FROM entities - WHERE EXISTS ( - SELECT 1 - FROM search_terms st - WHERE id ILIKE '%' || st.term || '%' - OR addr ILIKE '%' || st.term || '%' - OR title ILIKE '%' || st.term || '%' - OR description ILIKE '%' || st.term || '%' - OR link_variables->>'name' ILIKE '%' || st.term || '%' - ) + CASE + WHEN title = $1 THEN 1 + WHEN link_variables->>'name' = $1 THEN 2 + WHEN description = $1 THEN 3 + WHEN title ILIKE st.term || '%' THEN 4 + WHEN link_variables->>'name' ILIKE st.term || '%' THEN 5 + WHEN description ILIKE st.term || '%' THEN 6 + WHEN title ILIKE '%' || st.term || '%' THEN 7 + WHEN link_variables->>'name' ILIKE '%' || st.term || '%' THEN 8 + WHEN description ILIKE '%' || st.term || '%' THEN 9 + ELSE 10 + END AS rank + FROM term_matches tm + CROSS JOIN search_terms st ) -SELECT *, - CASE - -- Exact match for the entire search phrase - WHEN title = $1 THEN 1 - WHEN link_variables->>'name' = $1 THEN 2 - WHEN description = $1 THEN 3 -- Exact match for individual words at the start - WHEN EXISTS ( - SELECT 1 - FROM search_terms st - WHERE title ILIKE st.term || '%' - ) THEN 4 - WHEN EXISTS ( - SELECT 1 - FROM search_terms st - WHERE link_variables->>'name' ILIKE st.term || '%' - ) THEN 5 - WHEN EXISTS ( - SELECT 1 - FROM search_terms st - WHERE description ILIKE st.term || '%' - ) THEN 6 -- Partial match for individual words - WHEN EXISTS ( - SELECT 1 - FROM search_terms st - WHERE title ILIKE '%' || st.term || '%' - ) THEN 7 - WHEN EXISTS ( - SELECT 1 - FROM search_terms st - WHERE link_variables->>'name' ILIKE '%' || st.term || '%' - ) THEN 8 - WHEN EXISTS ( - SELECT 1 - FROM search_terms st - WHERE description ILIKE '%' || st.term || '%' - ) THEN 9 - ELSE 10 - END AS rank -FROM matched_entities +SELECT * +FROM ranked_entities ORDER BY term_match_count DESC, - -- Prioritize rows with more matching terms rank, LENGTH(title), LENGTH(link_variables->>'name'), diff --git a/search/worker/wrangler.toml b/search/worker/wrangler.toml index a0f5010b..cc35b318 100644 --- a/search/worker/wrangler.toml +++ b/search/worker/wrangler.toml @@ -3,3 +3,8 @@ name = "registry-ui-search" main = "src/index.ts" compatibility_date = "2024-08-21" compatibility_flags = ["nodejs_compat"] + +[[r2_buckets]] +binding = 'BUCKET' +bucket_name = 'registry-ui-api' +preview_bucket_name = 'registry-ui-api' \ No newline at end of file