From 9acaf752074d1a1cf9671f5ee4f8f27ad16d3cdd Mon Sep 17 00:00:00 2001 From: AbstractionFactory <179820029+abstractionfactory@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:53:39 +0100 Subject: [PATCH 1/2] Updated search query Signed-off-by: AbstractionFactory <179820029+abstractionfactory@users.noreply.github.com> Co-authored-by: Christian Mesh --- search/worker/package-lock.json | 16 +++--- search/worker/src/query.ts | 87 +++++++++++++++++---------------- 2 files changed, 54 insertions(+), 49 deletions(-) diff --git a/search/worker/package-lock.json b/search/worker/package-lock.json index 20f379d7..82e6c83a 100644 --- a/search/worker/package-lock.json +++ b/search/worker/package-lock.json @@ -1146,10 +1146,11 @@ } }, "node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", "dev": true, + "license": "MIT", "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", @@ -1741,10 +1742,11 @@ "dev": true }, "node_modules/path-to-regexp": { - "version": "6.2.2", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-6.2.2.tgz", - "integrity": "sha512-GQX3SSMokngb36+whdpRXE+3f9V8UzyAorlYvOGx87ufGHehNTn5lCxrKtLyZ4Yl/wEKnNnr98ZzOwwDZV5ogw==", - "dev": true + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-6.3.0.tgz", + "integrity": "sha512-Yhpw4T9C6hPpgPeA28us07OJeqZ5EzQTkbfwuhsUg0c237RomFoETJgmp2sa3F/41gfLE6G5cqcYwznmeEeOlQ==", + "dev": true, + "license": "MIT" }, "node_modules/pathe": { "version": "1.1.2", diff --git a/search/worker/src/query.ts b/search/worker/src/query.ts index 6ec23284..b56bae38 100644 --- a/search/worker/src/query.ts +++ b/search/worker/src/query.ts @@ -7,46 +7,49 @@ export const query = async (client: Client, queryParam: string): Promise>'name' ILIKE '%' || st.term || '%') AS term_match_count - FROM entities e - CROSS JOIN search_terms st - WHERE e.id ILIKE '%' || st.term || '%' - OR e.addr ILIKE '%' || st.term || '%' - OR e.title ILIKE '%' || st.term || '%' - OR e.description ILIKE '%' || st.term || '%' - OR e.link_variables->>'name' ILIKE '%' || st.term || '%' - GROUP BY e.id -), -ranked_entities AS ( - SELECT *, - CASE - WHEN title = $1 THEN 1 - WHEN link_variables->>'name' = $1 THEN 2 - WHEN description = $1 THEN 3 - WHEN title ILIKE st.term || '%' THEN 4 - WHEN link_variables->>'name' ILIKE st.term || '%' THEN 5 - WHEN description ILIKE st.term || '%' THEN 6 - WHEN title ILIKE '%' || st.term || '%' THEN 7 - WHEN link_variables->>'name' ILIKE '%' || st.term || '%' THEN 8 - WHEN description ILIKE '%' || st.term || '%' THEN 9 - ELSE 10 - END AS rank - FROM term_matches tm - CROSS JOIN search_terms st -) -SELECT * -FROM ranked_entities -ORDER BY term_match_count DESC, - rank, - LENGTH(title), - LENGTH(link_variables->>'name'), - LENGTH(description) -LIMIT 20; + WITH search_terms AS ( + SELECT unnest(regexp_split_to_array($1, '[ /]+')) AS term + ), + term_matches AS ( + SELECT e.* + FROM entities e + INNER JOIN search_terms st + ON e.addr ILIKE '%' || st.term || '%' + OR e.description ILIKE '%' || st.term || '%' + GROUP BY e.id + ), + ranked_entities AS ( + SELECT *, + /* TODO: remove hard-coded hashicorp/opentofu preferential treatment */ + CASE WHEN link_variables->>'namespace' = 'hashicorp' THEN 1 WHEN link_variables->>'namespace' = 'opentofu' THEN 0 ELSE 0.5 END AS popularity_fudge, + similarity(tm.addr, $1) AS title_sim, + similarity(tm.description, $1) AS description_sim, + similarity(link_variables->>'name', $1) AS name_sim + FROM term_matches tm + ), + providers AS ( + SELECT * + FROM ranked_entities + WHERE type LIKE 'provider%' + ORDER BY + popularity_fudge DESC, + title_sim DESC, + name_sim DESC, + description_sim DESC + LIMIT 5 + ), + modules AS ( + SELECT * + FROM ranked_entities + WHERE type LIKE 'module%' + ORDER BY + popularity_fudge DESC, + title_sim DESC, + name_sim DESC, + description_sim DESC + LIMIT 5 + ) + SELECT * FROM providers + UNION ALL + SELECT * FROM modules; `; From ad9ce1992c134b37839a905dadd7266786e0fec7 Mon Sep 17 00:00:00 2001 From: AbstractionFactory <179820029+abstractionfactory@users.noreply.github.com> Date: Wed, 27 Nov 2024 17:29:24 +0100 Subject: [PATCH 2/2] Fixing the search query Signed-off-by: AbstractionFactory <179820029+abstractionfactory@users.noreply.github.com> Co-Authored-By: Christian Mesh --- search/worker/src/query.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/search/worker/src/query.ts b/search/worker/src/query.ts index b56bae38..63c928e6 100644 --- a/search/worker/src/query.ts +++ b/search/worker/src/query.ts @@ -22,6 +22,7 @@ const searchQuery = ` SELECT *, /* TODO: remove hard-coded hashicorp/opentofu preferential treatment */ CASE WHEN link_variables->>'namespace' = 'hashicorp' THEN 1 WHEN link_variables->>'namespace' = 'opentofu' THEN 0 ELSE 0.5 END AS popularity_fudge, + CASE WHEN type = 'provider' THEN 1 ELSE 0 END AS provider_rank_fudge, similarity(tm.addr, $1) AS title_sim, similarity(tm.description, $1) AS description_sim, similarity(link_variables->>'name', $1) AS name_sim @@ -32,6 +33,7 @@ const searchQuery = ` FROM ranked_entities WHERE type LIKE 'provider%' ORDER BY + provider_rank_fudge DESC, popularity_fudge DESC, title_sim DESC, name_sim DESC,