From b38da68a1adef9a0a2cc24a847250fda165ff552 Mon Sep 17 00:00:00 2001 From: Andy Chosak Date: Fri, 11 Oct 2024 14:14:20 -0400 Subject: [PATCH] Explicitly query case-insensitively Use of Django filtering with `__contains` queries case-insensitively on SQLite [0], but case-sensitively on PostgreSQL. In order to keep case-insensitive search, this commit modifies the logic to use Django `__icontains` instead [1]. [0] https://docs.djangoproject.com/en/5.1/ref/databases/#substring-matching-and-case-sensitivity [1] https://docs.djangoproject.com/en/5.1/ref/models/querysets/#icontains --- crawler/search.py | 14 +++++++------- viewer/tests/test_views.py | 10 ++++++++-- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/crawler/search.py b/crawler/search.py index 84fbfa6..82771fd 100644 --- a/crawler/search.py +++ b/crawler/search.py @@ -10,7 +10,7 @@ def search_components(class_name_contains, include_class_names=False): queryset = Page.objects.prefetch_related("components").filter( - components__class_name__contains=class_name_contains + components__class_name__icontains=class_name_contains ) values = _page_values @@ -26,10 +26,10 @@ def search_components(class_name_contains, include_class_names=False): def search_links(href_contains, include_hrefs=False, or_urlencoded=True): queryset = Page.objects.prefetch_related("links") - href_filter = Q(links__href__contains=href_contains) + href_filter = Q(links__href__icontains=href_contains) if or_urlencoded: # pragma: no branch - href_filter |= Q(links__href__contains=quote_plus(href_contains)) + href_filter |= Q(links__href__icontains=quote_plus(href_contains)) queryset = queryset.filter(href_filter) @@ -52,16 +52,16 @@ def search_empty(): def search_html(html_contains): - return _search_pages(html__contains=html_contains) + return _search_pages(html__icontains=html_contains) def search_text(text_contains): - return _search_pages(text__contains=text_contains) + return _search_pages(text__icontains=text_contains) def search_title(title_contains): - return _search_pages(title__contains=title_contains) + return _search_pages(title__icontains=title_contains) def search_url(url_contains): - return _search_pages(url__contains=url_contains) + return _search_pages(url__icontains=url_contains) diff --git a/viewer/tests/test_views.py b/viewer/tests/test_views.py index 90cd78f..4c8ed03 100644 --- a/viewer/tests/test_views.py +++ b/viewer/tests/test_views.py @@ -56,11 +56,17 @@ def test_search_by_html(self): self.assertEqual(len(results), 1) self.assertEqual(results[0]["title"], "Sample homepage") - def test_search_by_text(self): - results = self.get_pages_api(search_type="text", q="Sample child page") + def check_search_by_text(self, q): + results = self.get_pages_api(search_type="text", q=q) self.assertEqual(len(results), 2) self.assertEqual(results[0]["title"], "Sample child page") + def test_search_by_text(self): + self.check_search_by_text("Sample child page") + + def test_search_by_text_case_insensitive(self): + self.check_search_by_text("SAMPLE CHILD PAGE") + def test_search_by_title(self): results = self.get_pages_api(search_type="title", q="Sample child page") self.assertEqual(len(results), 2)