Skip to content

Commit

Permalink
Merge pull request #51 from open-craft/kshitij/fix-empty-search-quotes
Browse files Browse the repository at this point in the history
Fix missing search excerpts when query has quotes
  • Loading branch information
robrap authored Jul 5, 2018
2 parents efe2243 + 629c70b commit 4062361
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 62 deletions.
18 changes: 12 additions & 6 deletions search/result_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import logging
import re
import shlex
import textwrap

from django.conf import settings
Expand Down Expand Up @@ -128,19 +129,24 @@ def excerpt(self):
if "content" not in self._results_fields:
return None

match_words = [self._match_phrase]
separate_words = self._match_phrase.split(' ')
if len(separate_words) > 1:
match_words.extend(self._match_phrase.split(' '))
match_phrases = [self._match_phrase]
separate_phrases = [
phrase.decode('utf-8')
for phrase in shlex.split(self._match_phrase.encode('utf-8'))
]
if len(separate_phrases) > 1:
match_phrases.extend(separate_phrases)
else:
match_phrases = separate_phrases

matches = SearchResultProcessor.find_matches(
SearchResultProcessor.strings_in_dictionary(self._results_fields["content"]),
match_words,
match_phrases,
DESIRED_EXCERPT_LENGTH
)
excerpt_text = ELLIPSIS.join(matches)

for match_word in match_words:
for match_word in match_phrases:
excerpt_text = SearchResultProcessor.decorate_matches(excerpt_text, match_word)

return excerpt_text
96 changes: 41 additions & 55 deletions search/tests/test_search_result_processor.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# -*- coding: utf-8 -*-
""" Tests for result processors """
import ddt

from django.test import TestCase
from django.test.utils import override_settings
from search.result_processor import SearchResultProcessor


# Any class that inherits from TestCase will cause too-many-public-methods pylint error
# pylint: disable=too-many-public-methods
@ddt.ddt
class SearchResultProcessorTests(TestCase):
""" Tests to check SearchResultProcessor is working as desired """

Expand Down Expand Up @@ -182,33 +185,7 @@ def test_excerpt_front(self):
"Dog - match upon first word "
"The long and winding road "
"That leads to your door "
"Will never disappear "
"I've seen that road before "
"It always leads me here "
"Lead me to you door "
"The wild and windy night "
"That the rain washed away "
"Has left a pool of tears "
"Crying for the day "
"Why leave me standing here "
"Let me know the way "
"Many times I've been alone "
"And many times I've cried "
"Any way you'll never know "
"The many ways I've tried "
"But still they lead me back "
"To the long winding road "
"You left me standing here "
"A long long time ago "
"Don't leave me waiting here "
"Lead me to your door "
"But still they lead me back "
"To the long winding road "
"You left me standing here "
"A long long time ago "
"Don't leave me waiting here "
"Lead me to your door "
"Yeah, yeah, yeah, yeah "
"Will never disappear ..."
),
}
}
Expand All @@ -229,41 +206,50 @@ def test_excerpt_back(self):
"content": {
"notes": (
"The long and winding road "
"That leads to your door "
"Will never disappear "
"I've seen that road before "
"It always leads me here "
"Lead me to you door "
"The wild and windy night "
"That the rain washed away "
"Has left a pool of tears "
"Crying for the day "
"Why leave me standing here "
"Let me know the way "
"Many times I've been alone "
"And many times I've cried "
"Any way you'll never know "
"The many ways I've tried "
"But still they lead me back "
"To the long winding road "
"You left me standing here "
"A long long time ago "
"Don't leave me waiting here "
"Lead me to your door "
"But still they lead me back "
"To the long winding road "
"You left me standing here "
"A long long time ago "
"Don't leave me waiting here "
"Lead me to your door "
"Yeah, yeah, yeah, yeah "
"That leads to your door ..."
"... Yeah, yeah, yeah, yeah "
"Match upon last word - Dog"
),
}
}
srp = SearchResultProcessor(test_result, "dog")
self.assertEqual(srp.excerpt[-33:], "Match upon last word - <b>Dog</b>")

@ddt.data(
(u'"never disappear"', u"leads to your door Will <b>never disappear</b>"),
(u'"I\'ve seen"', u"<b>I've seen</b> that road before It always"),
(
u'"long and winding" leads',
u'The <b>long and winding</b> road That <b>leads</b> to your door'
),
(u'"search"', u"इसको <b>search</b> करें| Lead"),
(u'"हिंदी में"', u"It always leads me here यह एक <b>हिंदी में</b>"),
(u'"इसको search"', u"वाक्य है| <b>इसको search</b> करें| Lead me"),
# Match at the beginning
(u'"The long"', u'<b>The long</b> and winding road That'),
# Match at the end
(u'"rain washed away"', u'windy night That the <b>rain washed away</b>'),
)
@ddt.unpack
def test_excerpt_quoted(self, search_phrase, expected_excerpt):
test_result = {
"content": {
"notes": (
u"The long and winding road "
u"That leads to your door "
u"Will never disappear "
u"I've seen that road before "
u"It always leads me here "
u"यह एक हिंदी में लिखा हुआ वाक्य है| इसको search करें| "
u"Lead me to you door "
u"The wild and windy night "
u"That the rain washed away "
),
}
}
srp = SearchResultProcessor(test_result, search_phrase)
self.assertIn(expected_excerpt, srp.excerpt)


class TestSearchResultProcessor(SearchResultProcessor):
"""
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='edx-search',
version='1.2.0',
version='1.2.1',
description='Search and index routines for index access',
author='edX',
author_email='[email protected]',
Expand Down

0 comments on commit 4062361

Please sign in to comment.