From a0021765e22b37db9fffd24f3f0cb3b77b937811 Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Mon, 7 Nov 2011 14:50:53 -0500 Subject: [PATCH 01/12] make per-item highlighting available on individual result document --- sunburnt/search.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sunburnt/search.py b/sunburnt/search.py index 6360bc5..c99e277 100644 --- a/sunburnt/search.py +++ b/sunburnt/search.py @@ -469,6 +469,19 @@ def options(self): def transform_result(self, result, constructor): if constructor is not dict: result.result.docs = [constructor(**d) for d in result.result.docs] + # perhaps make highlighting available in custom results if + # the class provides a set_highlighting method or some such ? + else: + if result.highlighting: + for d in result.result.docs: + # if the unique key for a result doc is present in highlighting, + # add the highlighting for that document into the result dict + # (but don't override any existing content) + if 'highlighting' not in d and \ + d[self.schema.unique_key] in result.highlighting: + d['highlighting'] = result.highlighting[d[self.schema.unique_key]] + # NOTE: should this be a more unique field + # name to reduce potential conflicts? return result def params(self): From 7c0d661cae566c9722c375cedc69c86258b34b3e Mon Sep 17 00:00:00 2001 From: Toby White Date: Mon, 24 Oct 2011 22:45:15 +0100 Subject: [PATCH 02/12] Clean up the handling of query args --- docs/addingdocuments.rst | 5 +++++ docs/indexmanagement.rst | 20 +++++++++++++------ sunburnt/sunburnt.py | 42 +++++++++++++++++++++++++++------------- 3 files changed, 48 insertions(+), 19 deletions(-) diff --git a/docs/addingdocuments.rst b/docs/addingdocuments.rst index 7318f9f..9f4424e 100644 --- a/docs/addingdocuments.rst +++ b/docs/addingdocuments.rst @@ -91,3 +91,8 @@ huge update POST. You can do this by doing: si.add(Book.objects.iterator(), chunk=1000) where ``chunk`` controls how many documents are put into each update chunk. + +.. note:: Optional arguments to add: + + ``add()`` takes two additional optional arguments: ``commit``, ``commitWithin``, ``softCommit``. + See http://wiki.apache.org/solr/UpdateXmlMessages for details. diff --git a/docs/indexmanagement.rst b/docs/indexmanagement.rst index 2f79134..29ea832 100644 --- a/docs/indexmanagement.rst +++ b/docs/indexmanagement.rst @@ -3,8 +3,16 @@ Managing your index =================== -We mentioned the use of ``commit()`` above. -There’s a couple of other housekeeping methods that might be useful. +Committing changes +------------------ + +We mentioned the use of ``commit()`` above. It takes three optional arguments: + +* waitSearcher +* expungeDeletes +* softCommit + +See http://wiki.apache.org/solr/UpdateXmlMessages for details. Optimizing ---------- @@ -25,12 +33,12 @@ Either way, to optimize an index, simply call: A Solr optimize also performs a commit, so if you’re about to ``optimize()`` anyway, you can leave off the preceding ``commit()``. It doesn’t particularly hurt to do both though. -Both ``commit()`` and ``optimize()`` take two optional arguments, which you -almost never need to worry about. See http://wiki.apache.org/solr/UpdateXmlMessages for details. +``optimize()`` takes two optional arguments: -:: +* waitSearcher +* maxSegments - wait_flush, wait_searcher +See http://wiki.apache.org/solr/UpdateXmlMessages for details. Rollback -------- diff --git a/sunburnt/sunburnt.py b/sunburnt/sunburnt.py index c02084d..2672b21 100644 --- a/sunburnt/sunburnt.py +++ b/sunburnt/sunburnt.py @@ -37,13 +37,13 @@ def request(self, *args, **kwargs): time.sleep(self.retry_timeout) return self.http_connection.request(*args, **kwargs) - def commit(self, waitFlush=True, waitSearcher=True, expungeDeletes=False): - response = self.update('', - waitFlush=waitFlush, waitSearcher=waitSearcher, expungeDeletes=expungeDeletes) + def commit(self, waitSearcher=None, expungeDeletes=None, softCommit=None): + response = self.update('', commit=True, + waitSearcher=waitSearcher, expungeDeletes=expungeDeletes, softCommit=softCommit) - def optimize(self, waitFlush=None, waitSearcher=None, maxSegments=None): + def optimize(self, waitSearcher=None, maxSegments=None): response = self.update('', optimize=True, - waitFlush=waitFlush, waitSearcher=waitSearcher, maxSegments=maxSegments) + waitSearcher=waitSearcher, maxSegments=maxSegments) # For both commit & optimize above, we use the XML body instead # of the URL parameter, because if we're using POST (which we @@ -52,7 +52,7 @@ def optimize(self, waitFlush=None, waitSearcher=None, maxSegments=None): def rollback(self): response = self.update("") - def update(self, update_doc, commit=None, optimize=None, waitFlush=None, waitSearcher=None, expungeDeletes=None, maxSegments=None): + def update(self, update_doc, commit=None, commitWithin=None, softCommit=None, optimize=None, waitSearcher=None, expungeDeletes=None, maxSegments=None): body = update_doc if body: headers = {"Content-Type":"text/xml; charset=utf-8"} @@ -61,16 +61,32 @@ def update(self, update_doc, commit=None, optimize=None, waitFlush=None, waitSea extra_params = {} if commit is not None: extra_params['commit'] = "true" if commit else "false" + if commitWithin is not None: + try: + extra_params['commitWithin'] = str(float(commitWithin)) + except (TypeError, ValueError): + raise ValueError("commitWithin should be a number in milliseconds") + if extra_params['commitWithin'] < 0: + raise ValueError("commitWithin should be a number in milliseconds") + if softCommit is not None: + extra_params['softCommit'] = "true" if softCommit else "false" if optimize is not None: extra_params['optimize'] = "true" if optimize else "false" - if waitFlush is not None: - extra_params['waitFlush'] = "true" if waitFlush else "false" if waitSearcher is not None: extra_params['waitSearcher'] = "true" if waitSearcher else "false" if expungeDeletes is not None: extra_params['expungeDeletes'] = "true" if expungeDeletes else "false" if maxSegments is not None: - extra_params['maxSegments'] = str(int(maxSegments)) + try: + extra_params['maxSegments'] = str(int(maxSegments)) + except (TypeError, ValueError): + raise ValueError("maxSegments") + if extra_params['maxSegments'] <= 0: + raise ValueError("maxSegments should be a positive number") + if 'expungeDeletes' in extra_params and 'commit' not in extra_params: + raise ValueError("Can't do expungeDeletes without commit") + if 'maxSegments' in extra_params and 'optimize' not in extra_params: + raise ValueError("Can't do expungeDeletes without commit") if extra_params: url = "%s?%s" % (self.update_url, urllib.urlencode(extra_params)) else: @@ -138,7 +154,7 @@ def init_schema(self): schemadoc = StringIO.StringIO(c) self.schema = SolrSchema(schemadoc) - def add(self, docs, chunk=100, commit=None, waitFlush=None, waitSearcher=None): + def add(self, docs, chunk=100, **kwargs): if not self.writeable: raise TypeError("This Solr instance is only for reading") if hasattr(docs, "items") or not hasattr(docs, "__iter__"): @@ -147,9 +163,9 @@ def add(self, docs, chunk=100, commit=None, waitFlush=None, waitSearcher=None): # chunk docs. for doc_chunk in grouper(docs, chunk): update_message = self.schema.make_update(doc_chunk) - self.conn.update(str(update_message), commit=commit, waitFlush=waitFlush, waitSearcher=waitSearcher) + self.conn.update(str(update_message), **kwargs) - def delete(self, docs=None, queries=None, commit=None, waitFlush=None, waitSearcher=None): + def delete(self, docs=None, queries=None, **kwargs): if not self.writeable: raise TypeError("This Solr instance is only for reading") if not docs and not queries: @@ -157,7 +173,7 @@ def delete(self, docs=None, queries=None, commit=None, waitFlush=None, waitSearc elif docs is not None and (hasattr(docs, "items") or not hasattr(docs, "__iter__")): docs = [docs] delete_message = self.schema.make_delete(docs, queries) - self.conn.update(str(delete_message), commit=commit, waitFlush=waitFlush, waitSearcher=waitSearcher) + self.conn.update(str(delete_message), **kwargs) def commit(self, *args, **kwargs): if not self.writeable: From cbaf46283d8a075dbeca216548445d88185f3ee0 Mon Sep 17 00:00:00 2001 From: Toby White Date: Tue, 25 Oct 2011 08:17:23 +0100 Subject: [PATCH 03/12] Fix up documentation for commit-related args --- docs/addingdocuments.rst | 2 +- docs/deletingdocuments.rst | 5 +++++ docs/indexmanagement.rst | 10 +++++----- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/addingdocuments.rst b/docs/addingdocuments.rst index 9f4424e..d1e151d 100644 --- a/docs/addingdocuments.rst +++ b/docs/addingdocuments.rst @@ -94,5 +94,5 @@ where ``chunk`` controls how many documents are put into each update chunk. .. note:: Optional arguments to add: - ``add()`` takes two additional optional arguments: ``commit``, ``commitWithin``, ``softCommit``. + ``add()`` takes additional optional arguments: ``commit``, ``commitWithin``, ``softCommit``, ``expungeDeletes``, ``waitSearcher``, ``optimize``, ``maxSegments``. See http://wiki.apache.org/solr/UpdateXmlMessages for details. diff --git a/docs/deletingdocuments.rst b/docs/deletingdocuments.rst index 7a2f827..6b338f5 100644 --- a/docs/deletingdocuments.rst +++ b/docs/deletingdocuments.rst @@ -44,3 +44,8 @@ To clear the entire index, there is a shortcut which simply deletes every docume si.delete_all() Deletions, like additions, only take effect after a commit (or autocommit). + +.. note:: Optional arguments to delete: + + ``delete()`` takes additional optional arguments: ``commit``, ``commitWithin``, ``softCommit``, ``expungeDeletes``, ``waitSearcher``, ``optimize``, ``maxSegments``. + See http://wiki.apache.org/solr/UpdateXmlMessages for details. diff --git a/docs/indexmanagement.rst b/docs/indexmanagement.rst index 29ea832..3123282 100644 --- a/docs/indexmanagement.rst +++ b/docs/indexmanagement.rst @@ -8,9 +8,9 @@ Committing changes We mentioned the use of ``commit()`` above. It takes three optional arguments: -* waitSearcher -* expungeDeletes -* softCommit +* ``waitSearcher`` +* ``expungeDeletes`` +* ``softCommit`` See http://wiki.apache.org/solr/UpdateXmlMessages for details. @@ -35,8 +35,8 @@ you can leave off the preceding ``commit()``. It doesn’t particularly hurt to ``optimize()`` takes two optional arguments: -* waitSearcher -* maxSegments +* ``waitSearcher`` +* ``maxSegments`` See http://wiki.apache.org/solr/UpdateXmlMessages for details. From 05d21f948c7b1b1faa7e85566b380605f5202725 Mon Sep 17 00:00:00 2001 From: Toby White Date: Tue, 25 Oct 2011 08:27:58 +0100 Subject: [PATCH 04/12] Refactor url_for_update for easier testing --- sunburnt/sunburnt.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/sunburnt/sunburnt.py b/sunburnt/sunburnt.py index 2672b21..52b7f83 100644 --- a/sunburnt/sunburnt.py +++ b/sunburnt/sunburnt.py @@ -52,12 +52,19 @@ def optimize(self, waitSearcher=None, maxSegments=None): def rollback(self): response = self.update("") - def update(self, update_doc, commit=None, commitWithin=None, softCommit=None, optimize=None, waitSearcher=None, expungeDeletes=None, maxSegments=None): + def update(self, **kwargs): body = update_doc if body: headers = {"Content-Type":"text/xml; charset=utf-8"} else: headers = {} + url = self.url_for_update(**kwargs) + r, c = self.request(url, method="POST", body=body, + headers=headers) + if r.status != 200: + raise SolrError(r, c) + + def url_for_update(self, commit=None, commitWithin=None, softCommit=None, optimize=None, waitSearcher=None, expungeDeletes=None, maxSegments=None): extra_params = {} if commit is not None: extra_params['commit'] = "true" if commit else "false" @@ -88,13 +95,9 @@ def update(self, update_doc, commit=None, commitWithin=None, softCommit=None, op if 'maxSegments' in extra_params and 'optimize' not in extra_params: raise ValueError("Can't do expungeDeletes without commit") if extra_params: - url = "%s?%s" % (self.update_url, urllib.urlencode(extra_params)) + return "%s?%s" % (self.update_url, urllib.urlencode(sorted(extra_params.items()))) else: - url = self.update_url - r, c = self.request(url, method="POST", body=body, - headers=headers) - if r.status != 200: - raise SolrError(r, c) + return self.update_url def select(self, params): qs = urllib.urlencode(params) From ef884583aeb28a8609e5fa5dc751590d496e514b Mon Sep 17 00:00:00 2001 From: Toby White Date: Tue, 25 Oct 2011 08:29:19 +0100 Subject: [PATCH 05/12] Fix error message in url_for_update --- sunburnt/sunburnt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sunburnt/sunburnt.py b/sunburnt/sunburnt.py index 52b7f83..29ab766 100644 --- a/sunburnt/sunburnt.py +++ b/sunburnt/sunburnt.py @@ -93,7 +93,7 @@ def url_for_update(self, commit=None, commitWithin=None, softCommit=None, optimi if 'expungeDeletes' in extra_params and 'commit' not in extra_params: raise ValueError("Can't do expungeDeletes without commit") if 'maxSegments' in extra_params and 'optimize' not in extra_params: - raise ValueError("Can't do expungeDeletes without commit") + raise ValueError("Can't do maxSegments without optimize") if extra_params: return "%s?%s" % (self.update_url, urllib.urlencode(sorted(extra_params.items()))) else: From 3b386c028775805ce64bfd24b434e631441962b3 Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Thu, 1 Dec 2011 16:52:52 -0500 Subject: [PATCH 06/12] clarify in the docs that execute should not be called when paginating with django paginator --- docs/queryingsolr.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/queryingsolr.rst b/docs/queryingsolr.rst index 3cde48b..b0228da 100644 --- a/docs/queryingsolr.rst +++ b/docs/queryingsolr.rst @@ -268,6 +268,16 @@ anywhere else you want to paginate contents) exactly as described in the `paginator example in the Django documentation `_. +.. Note:: + + When using a sunburnt query object with a Django paginator, you can + chain any number of filters or any of the other methods that return + a :class:`~sunburnt.SolrSearch` instance; however, you should *not* + call :meth:`~sunburnt.SolrSearch.execute`, as that will execute the + query and return the result set for the current query; to function + properly, the paginator needs to be able to query Solr for the total + number of matches for the query and then add pagination options to + slice up the results appropriately. Returning different fields -------------------------- From b339e9dc902d354a30a3b7cf23cd1ec9151ba374 Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Thu, 1 Dec 2011 17:30:11 -0500 Subject: [PATCH 07/12] undo 3b386c0 --- docs/queryingsolr.rst | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/docs/queryingsolr.rst b/docs/queryingsolr.rst index b0228da..3cde48b 100644 --- a/docs/queryingsolr.rst +++ b/docs/queryingsolr.rst @@ -268,16 +268,6 @@ anywhere else you want to paginate contents) exactly as described in the `paginator example in the Django documentation `_. -.. Note:: - - When using a sunburnt query object with a Django paginator, you can - chain any number of filters or any of the other methods that return - a :class:`~sunburnt.SolrSearch` instance; however, you should *not* - call :meth:`~sunburnt.SolrSearch.execute`, as that will execute the - query and return the result set for the current query; to function - properly, the paginator needs to be able to query Solr for the total - number of matches for the query and then add pagination options to - slice up the results appropriately. Returning different fields -------------------------- From 5791271ecde761fa57ba33be0e1ad18723e0b686 Mon Sep 17 00:00:00 2001 From: Rebecca Sutton Koeser Date: Thu, 1 Dec 2011 17:31:11 -0500 Subject: [PATCH 08/12] clarify in the docs that execute should not be called when paginating with django paginator --- docs/queryingsolr.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/queryingsolr.rst b/docs/queryingsolr.rst index 3cde48b..b0228da 100644 --- a/docs/queryingsolr.rst +++ b/docs/queryingsolr.rst @@ -268,6 +268,16 @@ anywhere else you want to paginate contents) exactly as described in the `paginator example in the Django documentation `_. +.. Note:: + + When using a sunburnt query object with a Django paginator, you can + chain any number of filters or any of the other methods that return + a :class:`~sunburnt.SolrSearch` instance; however, you should *not* + call :meth:`~sunburnt.SolrSearch.execute`, as that will execute the + query and return the result set for the current query; to function + properly, the paginator needs to be able to query Solr for the total + number of matches for the query and then add pagination options to + slice up the results appropriately. Returning different fields -------------------------- From a4c99ff08601ad2c798ca54c6b74974de7653140 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 1 May 2014 18:32:14 -0400 Subject: [PATCH 09/12] remove merge artifact missed in upstream update --- sunburnt/search.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sunburnt/search.py b/sunburnt/search.py index 2693700..652bce0 100644 --- a/sunburnt/search.py +++ b/sunburnt/search.py @@ -490,11 +490,6 @@ def results_as(self, constructor): def transform_result(self, result, constructor): if constructor is not dict: -<<<<<<< HEAD - result.result.docs = [constructor(**d) for d in result.result.docs] - # perhaps make highlighting available in custom results if - # the class provides a set_highlighting method or some such ? -======= construct_docs = lambda docs: [constructor(**d) for d in docs] result.result.docs = construct_docs(result.result.docs) for key in result.more_like_these: From 31bb9b588fb0490ad6f49ccb15f7e6ab51934c33 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 1 May 2014 18:42:54 -0400 Subject: [PATCH 10/12] simple support for Solr 4.x join queries --- sunburnt/search.py | 28 ++++++++++++++++++++++++++-- sunburnt/test_search.py | 10 ++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/sunburnt/search.py b/sunburnt/search.py index 652bce0..2f285a1 100644 --- a/sunburnt/search.py +++ b/sunburnt/search.py @@ -16,6 +16,7 @@ def __init__(self, schema, option_flag=None, original=None): self.terms = collections.defaultdict(set) self.phrases = collections.defaultdict(set) self.ranges = set() + self.joins = set() self.subqueries = [] self._and = True self._or = self._not = self._pow = False @@ -25,6 +26,7 @@ def __init__(self, schema, option_flag=None, original=None): self.terms = copy.copy(original.terms) self.phrases = copy.copy(original.phrases) self.ranges = copy.copy(original.ranges) + self.joins = copy.copy(original.joins) self.subqueries = copy.copy(original.subqueries) self._or = original._or self._and = original._and @@ -101,6 +103,13 @@ def serialize_range_queries(self): s.append(u"%s:%s" % (name, range_s)) return u' AND '.join(s) + def serialize_join_queries(self): + s = [] + for join_from, join_to, query in sorted(self.joins): + s.append(u"{!join from=%s to=%s}%s" % (join_from, join_to, query)) + return u' AND '.join(s) + + def child_needs_parens(self, child): if len(child) == 1: return False @@ -207,7 +216,8 @@ def serialize_to_unicode(self, level=0, op=None): else: u = [s for s in [self.serialize_term_queries(self.terms), self.serialize_term_queries(self.phrases), - self.serialize_range_queries()] + self.serialize_range_queries(), + self.serialize_join_queries()] if s] for q in self.subqueries: op_ = u'OR' if self._or else u'AND' @@ -358,6 +368,16 @@ def add_range(self, field_name, rel, value): insts = (field.instance_from_user_data(value),) self.ranges.add((field_name, rel, insts)) + def join(self, join_from, join_to, *args, **kwargs): + for fieldname in [join_from, join_to]: + field = self.schema.match_field(fieldname) + if not field: + raise ValueError("%s is not a valid field name" % fieldname) + elif not field.indexed: + raise SolrError("Can't join on non-indexed field '%s'" % fieldname) + query = self.Q(*args, **kwargs) + self.joins.add((join_from, join_to, query)) + def term_or_phrase(self, arg, force=None): return 'terms' if self.default_term_re.match(arg) else 'phrases' @@ -372,7 +392,6 @@ def add_boost(self, kwargs, boost_score): self.boosts.append((kwargs, boost_score)) - class BaseSearch(object): """Base class for common search options management""" option_modules = ('query_obj', 'filter_obj', 'paginator', @@ -476,6 +495,11 @@ def field_limit(self, fields=None, score=False, all_fields=False): newself.field_limiter.update(fields, score, all_fields) return newself + def join(self, join_from, join_to, *args, **kwargs): + newself = self.clone() + newself.query_obj.join(join_from, join_to, *args, **kwargs) + return newself + def options(self): options = {} for option_module in self.option_modules: diff --git a/sunburnt/test_search.py b/sunburnt/test_search.py index 8acc2c9..987ac1a 100644 --- a/sunburnt/test_search.py +++ b/sunburnt/test_search.py @@ -224,6 +224,16 @@ class MockInterface(object): ([], {'string_field':RawString("abc*???")}, [("q", "string_field:abc\\*\\?\\?\\?")]), ), + + # test join queries (solr 4.x) + "join":( + (["string_field", "int_field", "hello"], {}, + [("q", u"{!join from=string_field to=int_field}hello")]), + (["string_field", "int_field"], {"string_field": "hello"}, + [("q", u"{!join from=string_field to=int_field}string_field:hello")]), + (["string_field", "int_field"], {"boolean_field": True}, + [("q", u"{!join from=string_field to=int_field}boolean_field:true")]), + ), } if HAS_MX_DATETIME: good_query_data['query'] += \ From 75757e16795a07982e0fc78f3605a76339bd086c Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 1 May 2014 18:54:00 -0400 Subject: [PATCH 11/12] add sample join usage and link to documentation to sunburnt docs --- docs/queryingsolr.rst | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/docs/queryingsolr.rst b/docs/queryingsolr.rst index b2bb4dd..e52e9d6 100644 --- a/docs/queryingsolr.rst +++ b/docs/queryingsolr.rst @@ -244,7 +244,7 @@ Finally, ``response.result`` itself has the following attributes * ``response.result.numFound`` : total number of docs in the index which fulfilled the query. * ``response.result.docs`` : the actual results themselves (more easily extracted as ``list(response)``). -* ``response.result.start`` : if the number of docs is less than numFound, then this is the pagination offset. +* ``response.result.start`` : if the number of docs is less than numFound, then this is the pagination offset. Pagination @@ -329,7 +329,7 @@ selection of fields. {'score': 1.1931472000000001, 'id': u'0553573403'} {'score': 1.1931472000000001, 'id': u'0812550706'} - + More complex queries -------------------- @@ -703,7 +703,7 @@ will also return zero results, just the facet output. The ``facet_counts`` objects contains several sets of results - here, we're only interested in the ``facet_fields`` object. This contains a dictionary of results, keyed by each field where faceting was requested. (In this case, we only requested -faceting on one field). The dictionary value is a list of two-tuples, mapping the +faceting on one field). The dictionary value is a list of two-tuples, mapping the value of the faceted field (in this case, ``sequence_i`` takes the values '1', '2', or '3') to the numbers of results for each value. @@ -713,7 +713,7 @@ title, 2 of them have ``sequence_i=1``, 0 of them have ``sequence_i=2``, and 0 o You can facet on more than one field at a time: -:: +:: si.query(...).facet_by(field=["field1", "field2, ...]) @@ -798,7 +798,7 @@ convenient for displaying highlighted text snippets in a template; e.g., displaying highlights in a Django template might look like this: :: - + {% for snippet in book.solr_highlights.name %}

... {{ snippet|safe }} ...

{% endfor %} @@ -817,7 +817,7 @@ and all of these are exposed through sunburnt. The full list of supported option fields, snippets, fragsize, mergeContinuous, requireFieldMatch, maxAnalyzedChars, alternateField, maxAlternateFieldLength, formatter, simple.pre.simple.post, fragmenter, usePhrasehighlighter, hilightMultiTerm, regex.slop, regex.pattern, - regex.maxAnalyzedChars + regex.maxAnalyzedChars See the note above in `Faceting`_ about using keyword arguments with periods. @@ -868,7 +868,7 @@ standard behaviour. The ``SolrResponse`` object has a ``more_like_these`` attribute. This is a dictionary of ``SolrResult`` objects, one dictionary entry for each result of the main query. Here, the query only produced one result (because -we searched on the ``uniqueKey``. Inspecting the ``SolrResult`` object, we +we searched on the ``uniqueKey``. Inspecting the ``SolrResult`` object, we find that it contains only one document. We can read the above result as saying that under the ``mlt()`` parameters @@ -893,6 +893,22 @@ to avoid having to do the extra dictionary lookup. fields, count, mintf, mindf, minwl, mawl, maxqt, maxntp, boost +Join Queries +------------ + +From version 4.0 of Solr, join queries are supported (see http://wiki.apache.org/solr/Join). + +The join method takes a from field, a to field, and then a search term as +supported in other sunburnt query methods. + +Here are the sunburnt equivalents of the first two examples from the Solr +documentation. + +:: + + si.query().join("manu_id", "id", "ipod") + si.query().join("manu_id", "id", compName_s="Belkin) + Spatial fields -------------- From 435ee92f7439477a428bf60071d6ec906ee8e716 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 6 May 2014 13:01:32 -0400 Subject: [PATCH 12/12] fix for join queries when combined with other search terms; include joins for nonzero check, etc --- sunburnt/search.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/sunburnt/search.py b/sunburnt/search.py index 2f285a1..cb01665 100644 --- a/sunburnt/search.py +++ b/sunburnt/search.py @@ -57,6 +57,8 @@ def serialize_debug(self, indent=0): print '%s%s' % (indentspace, phrase) for range in self.ranges: print '%s%s' % (indentspace, range) + for join in self.joins: + print '%s%s' % (indentspace, join) if self.subqueries: if self._and: print '%sAND:' % indentspace @@ -160,6 +162,7 @@ def normalize_node(obj): terms = [obj.terms] phrases = [obj.phrases] ranges = [obj.ranges] + joins = [obj.joins] subqueries = [] mutated = False @@ -172,6 +175,7 @@ def normalize_node(obj): terms.append(s.terms) phrases.append(s.phrases) ranges.append(s.ranges) + joins.append(s.joins) subqueries.extend(s.subqueries) mutated = True else: # just keep it unchanged @@ -182,6 +186,7 @@ def normalize_node(obj): obj = obj.clone(terms = obj.merge_term_dicts(terms), phrases = obj.merge_term_dicts(phrases), ranges = reduce(operator.or_, ranges), + joins = reduce(operator.or_, joins), # ?? subqueries = subqueries) # having recalculated subqueries, there may be the opportunity for further normalization, if we have zero or one subqueries left @@ -193,7 +198,8 @@ def normalize_node(obj): elif len(obj.subqueries) == 1: if obj._not and obj.subqueries[0]._not: obj = obj.clone(subqueries=obj.subqueries[0].subqueries, _not=False, _and=True) - elif (obj._and or obj._or) and not obj.terms and not obj.phrases and not obj.ranges and not obj.boosts: + elif (obj._and or obj._or) and not obj.terms and not obj.phrases \ + and not obj.ranges and not obj.joins and not obj.boosts: obj = obj.subqueries[0] obj.normalized = True return obj @@ -216,8 +222,7 @@ def serialize_to_unicode(self, level=0, op=None): else: u = [s for s in [self.serialize_term_queries(self.terms), self.serialize_term_queries(self.phrases), - self.serialize_range_queries(), - self.serialize_join_queries()] + self.serialize_range_queries()] if s] for q in self.subqueries: op_ = u'OR' if self._or else u'AND' @@ -225,6 +230,14 @@ def serialize_to_unicode(self, level=0, op=None): u.append(u"(%s)"%q.serialize_to_unicode(level=level+1, op=op_)) else: u.append(u"%s"%q.serialize_to_unicode(level=level+1, op=op_)) + + # NOTE: for some reason, combining other search terms with AND directly + # after join query generates no results; correct results are present + # without the AND + # for now, simply add any join queries last to avoid this behavior + if self.serialize_join_queries(): + u.append(self.serialize_join_queries()) + if self._and: return u' AND '.join(u) elif self._or: @@ -250,6 +263,7 @@ def __len__(self): return sum([sum(len(v) for v in self.terms.values()), sum(len(v) for v in self.phrases.values()), len(self.ranges), + len(self.joins), subquery_length]) def Q(self, *args, **kwargs): @@ -258,7 +272,8 @@ def Q(self, *args, **kwargs): return q def __nonzero__(self): - return bool(self.terms) or bool(self.phrases) or bool(self.ranges) or bool(self.subqueries) + return bool(self.terms) or bool(self.phrases) or bool(self.ranges) or \ + bool(self.joins) or bool(self.subqueries) def __or__(self, other): q = LuceneQuery(self.schema)