Skip to content

Commit

Permalink
Explicitly query case-insensitively
Browse files Browse the repository at this point in the history
Use of Django filtering with `__contains` queries case-insensitively
on SQLite [0], but case-sensitively on PostgreSQL. In order to keep
case-insensitive search, this commit modifies the logic to use Django
`__icontains` instead [1].

[0] https://docs.djangoproject.com/en/5.1/ref/databases/#substring-matching-and-case-sensitivity
[1] https://docs.djangoproject.com/en/5.1/ref/models/querysets/#icontains
  • Loading branch information
chosak committed Oct 11, 2024
1 parent 74a53fd commit b38da68
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 9 deletions.
14 changes: 7 additions & 7 deletions crawler/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

def search_components(class_name_contains, include_class_names=False):
queryset = Page.objects.prefetch_related("components").filter(
components__class_name__contains=class_name_contains
components__class_name__icontains=class_name_contains
)

values = _page_values
Expand All @@ -26,10 +26,10 @@ def search_components(class_name_contains, include_class_names=False):
def search_links(href_contains, include_hrefs=False, or_urlencoded=True):
queryset = Page.objects.prefetch_related("links")

href_filter = Q(links__href__contains=href_contains)
href_filter = Q(links__href__icontains=href_contains)

if or_urlencoded: # pragma: no branch
href_filter |= Q(links__href__contains=quote_plus(href_contains))
href_filter |= Q(links__href__icontains=quote_plus(href_contains))

queryset = queryset.filter(href_filter)

Expand All @@ -52,16 +52,16 @@ def search_empty():


def search_html(html_contains):
return _search_pages(html__contains=html_contains)
return _search_pages(html__icontains=html_contains)


def search_text(text_contains):
return _search_pages(text__contains=text_contains)
return _search_pages(text__icontains=text_contains)


def search_title(title_contains):
return _search_pages(title__contains=title_contains)
return _search_pages(title__icontains=title_contains)


def search_url(url_contains):
return _search_pages(url__contains=url_contains)
return _search_pages(url__icontains=url_contains)
10 changes: 8 additions & 2 deletions viewer/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,17 @@ def test_search_by_html(self):
self.assertEqual(len(results), 1)
self.assertEqual(results[0]["title"], "Sample homepage")

def test_search_by_text(self):
results = self.get_pages_api(search_type="text", q="Sample child page")
def check_search_by_text(self, q):
results = self.get_pages_api(search_type="text", q=q)
self.assertEqual(len(results), 2)
self.assertEqual(results[0]["title"], "Sample child page")

def test_search_by_text(self):
self.check_search_by_text("Sample child page")

def test_search_by_text_case_insensitive(self):
self.check_search_by_text("SAMPLE CHILD PAGE")

def test_search_by_title(self):
results = self.get_pages_api(search_type="title", q="Sample child page")
self.assertEqual(len(results), 2)
Expand Down

0 comments on commit b38da68

Please sign in to comment.