Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated search query #251

Merged
merged 2 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions search/worker/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

89 changes: 47 additions & 42 deletions search/worker/src/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,46 +7,51 @@ export const query = async (client: Client, queryParam: string): Promise<Entity[
};

const searchQuery = `
WITH search_terms AS (
SELECT unnest(regexp_split_to_array($1, '[ /]+')) AS term
),
term_matches AS (
SELECT e.*,
COUNT(*) FILTER (WHERE e.title ILIKE '%' || st.term || '%'
OR e.description ILIKE '%' || st.term || '%'
OR e.link_variables->>'name' ILIKE '%' || st.term || '%') AS term_match_count
FROM entities e
CROSS JOIN search_terms st
WHERE e.id ILIKE '%' || st.term || '%'
OR e.addr ILIKE '%' || st.term || '%'
OR e.title ILIKE '%' || st.term || '%'
OR e.description ILIKE '%' || st.term || '%'
OR e.link_variables->>'name' ILIKE '%' || st.term || '%'
GROUP BY e.id
),
ranked_entities AS (
SELECT *,
CASE
WHEN title = $1 THEN 1
WHEN link_variables->>'name' = $1 THEN 2
WHEN description = $1 THEN 3
WHEN title ILIKE st.term || '%' THEN 4
WHEN link_variables->>'name' ILIKE st.term || '%' THEN 5
WHEN description ILIKE st.term || '%' THEN 6
WHEN title ILIKE '%' || st.term || '%' THEN 7
WHEN link_variables->>'name' ILIKE '%' || st.term || '%' THEN 8
WHEN description ILIKE '%' || st.term || '%' THEN 9
ELSE 10
END AS rank
FROM term_matches tm
CROSS JOIN search_terms st
)
SELECT *
FROM ranked_entities
ORDER BY term_match_count DESC,
rank,
LENGTH(title),
LENGTH(link_variables->>'name'),
LENGTH(description)
LIMIT 20;
WITH search_terms AS (
SELECT unnest(regexp_split_to_array($1, '[ /]+')) AS term
),
term_matches AS (
SELECT e.*
FROM entities e
INNER JOIN search_terms st
ON e.addr ILIKE '%' || st.term || '%'
OR e.description ILIKE '%' || st.term || '%'
GROUP BY e.id
),
ranked_entities AS (
SELECT *,
/* TODO: remove hard-coded hashicorp/opentofu preferential treatment */
CASE WHEN link_variables->>'namespace' = 'hashicorp' THEN 1 WHEN link_variables->>'namespace' = 'opentofu' THEN 0 ELSE 0.5 END AS popularity_fudge,
CASE WHEN type = 'provider' THEN 1 ELSE 0 END AS provider_rank_fudge,
similarity(tm.addr, $1) AS title_sim,
similarity(tm.description, $1) AS description_sim,
similarity(link_variables->>'name', $1) AS name_sim
FROM term_matches tm
),
providers AS (
SELECT *
FROM ranked_entities
WHERE type LIKE 'provider%'
ORDER BY
provider_rank_fudge DESC,
popularity_fudge DESC,
title_sim DESC,
name_sim DESC,
description_sim DESC
LIMIT 5
),
modules AS (
SELECT *
FROM ranked_entities
WHERE type LIKE 'module%'
ORDER BY
popularity_fudge DESC,
title_sim DESC,
name_sim DESC,
description_sim DESC
LIMIT 5
)
SELECT * FROM providers
UNION ALL
SELECT * FROM modules;
`;
Loading