tow · rlskoeser · Nov 7, 2011 · Nov 7, 2011 · Oct 24, 2011 · Oct 25, 2011
diff --git a/docs/queryingsolr.rst b/docs/queryingsolr.rst
@@ -244,7 +244,7 @@ Finally, ``response.result`` itself has the following attributes
 
 * ``response.result.numFound`` : total number of docs in the index which fulfilled the query.
 * ``response.result.docs`` : the actual results themselves (more easily extracted as ``list(response)``).
-* ``response.result.start`` : if the number of docs is less than numFound, then this is the pagination offset. 
+* ``response.result.start`` : if the number of docs is less than numFound, then this is the pagination offset.
 
 
 Pagination
@@ -329,7 +329,7 @@ selection of fields.
  {'score': 1.1931472000000001, 'id': u'0553573403'}
  {'score': 1.1931472000000001, 'id': u'0812550706'}
 
-  
+
 
 More complex queries
 --------------------
@@ -703,7 +703,7 @@ will also return zero results, just the facet output.
 The ``facet_counts`` objects contains several sets of results - here, we're only
 interested in the ``facet_fields`` object. This contains a dictionary of results,
 keyed by each field where faceting was requested. (In this case, we only requested
-faceting on one field). The dictionary value is a list of two-tuples, mapping the 
+faceting on one field). The dictionary value is a list of two-tuples, mapping the
 value of the faceted field (in this case, ``sequence_i`` takes the values '1', '2', or '3')
 to the numbers of results for each value.
 
@@ -713,7 +713,7 @@ title, 2 of them have ``sequence_i=1``, 0 of them have ``sequence_i=2``, and 0 o
 
 You can facet on more than one field at a time:
 
-:: 
+::
 
  si.query(...).facet_by(field=["field1", "field2, ...])
 
@@ -798,7 +798,7 @@ convenient for displaying highlighted text snippets in a template;
 e.g., displaying highlights in a Django template might look like this:
 
 ::
-    
+
   {% for snippet in book.solr_highlights.name %}
      <p>... {{ snippet|safe }} ...</p>
   {% endfor %}
@@ -817,7 +817,7 @@ and all of these are exposed through sunburnt. The full list of supported option
  fields, snippets, fragsize, mergeContinuous, requireFieldMatch, maxAnalyzedChars,
  alternateField, maxAlternateFieldLength, formatter, simple.pre.simple.post,
  fragmenter, usePhrasehighlighter, hilightMultiTerm, regex.slop, regex.pattern,
- regex.maxAnalyzedChars            
+ regex.maxAnalyzedChars
 
 See the note above in `Faceting`_ about using keyword arguments with periods.
 
@@ -868,7 +868,7 @@ standard behaviour.
 The ``SolrResponse`` object has a ``more_like_these`` attribute. This is
 a dictionary of ``SolrResult`` objects, one dictionary entry for each
 result of the main query. Here, the query only produced one result (because
-we searched on the ``uniqueKey``. Inspecting the ``SolrResult`` object, we 
+we searched on the ``uniqueKey``. Inspecting the ``SolrResult`` object, we
 find that it contains only one document.
 
 We can read the above result as saying that under the ``mlt()`` parameters
@@ -893,6 +893,22 @@ to avoid having to do the extra dictionary lookup.
 
  fields, count, mintf, mindf, minwl, mawl, maxqt, maxntp, boost
 
+Join Queries
+------------
+
+From version 4.0 of Solr, join queries are supported (see http://wiki.apache.org/solr/Join).
+
+The join method takes a from field, a to field, and then a search term as
+supported in other sunburnt query methods.
+
+Here are the sunburnt equivalents of the first two examples from the Solr
+documentation.
+
+::
+
+ si.query().join("manu_id", "id", "ipod")
+ si.query().join("manu_id", "id", compName_s="Belkin)
+
 
 Spatial fields
 --------------

diff --git a/sunburnt/search.py b/sunburnt/search.py
@@ -16,6 +16,7 @@ def __init__(self, schema, option_flag=None, original=None):
             self.terms = collections.defaultdict(set)
             self.phrases = collections.defaultdict(set)
             self.ranges = set()
+            self.joins = set()
             self.subqueries = []
             self._and = True
             self._or = self._not = self._pow = False
@@ -25,6 +26,7 @@ def __init__(self, schema, option_flag=None, original=None):
             self.terms = copy.copy(original.terms)
             self.phrases = copy.copy(original.phrases)
             self.ranges = copy.copy(original.ranges)
+            self.joins = copy.copy(original.joins)
             self.subqueries = copy.copy(original.subqueries)
             self._or = original._or
             self._and = original._and
@@ -55,6 +57,8 @@ def serialize_debug(self, indent=0):
             print '%s%s' % (indentspace, phrase)
         for range in self.ranges:
             print '%s%s' % (indentspace, range)
+        for join in self.joins:
+            print '%s%s' % (indentspace, join)
         if self.subqueries:
             if self._and:
                 print '%sAND:' % indentspace
@@ -101,6 +105,13 @@ def serialize_range_queries(self):
             s.append(u"%s:%s" % (name, range_s))
         return u' AND '.join(s)
 
+    def serialize_join_queries(self):
+        s = []
+        for join_from, join_to, query in sorted(self.joins):
+            s.append(u"{!join from=%s to=%s}%s" % (join_from, join_to, query))
+        return u' AND '.join(s)
+
+
     def child_needs_parens(self, child):
         if len(child) == 1:
             return False
@@ -151,6 +162,7 @@ def normalize_node(obj):
         terms = [obj.terms]
         phrases = [obj.phrases]
         ranges = [obj.ranges]
+        joins = [obj.joins]
         subqueries = []
 
         mutated = False
@@ -163,6 +175,7 @@ def normalize_node(obj):
                 terms.append(s.terms)
                 phrases.append(s.phrases)
                 ranges.append(s.ranges)
+                joins.append(s.joins)
                 subqueries.extend(s.subqueries)
                 mutated = True
             else: # just keep it unchanged
@@ -173,6 +186,7 @@ def normalize_node(obj):
             obj = obj.clone(terms = obj.merge_term_dicts(terms),
                             phrases = obj.merge_term_dicts(phrases),
                             ranges = reduce(operator.or_, ranges),
+                            joins = reduce(operator.or_, joins),  # ??
                             subqueries = subqueries)
 
         # having recalculated subqueries, there may be the opportunity for further normalization, if we have zero or one subqueries left
@@ -184,7 +198,8 @@ def normalize_node(obj):
         elif len(obj.subqueries) == 1:
             if obj._not and obj.subqueries[0]._not:
                 obj = obj.clone(subqueries=obj.subqueries[0].subqueries, _not=False, _and=True)
-            elif (obj._and or obj._or) and not obj.terms and not obj.phrases and not obj.ranges and not obj.boosts:
+            elif (obj._and or obj._or) and not obj.terms and not obj.phrases \
+              and not obj.ranges and not obj.joins and not obj.boosts:
                 obj = obj.subqueries[0]
         obj.normalized = True
         return obj
@@ -215,6 +230,14 @@ def serialize_to_unicode(self, level=0, op=None):
                     u.append(u"(%s)"%q.serialize_to_unicode(level=level+1, op=op_))
                 else:
                     u.append(u"%s"%q.serialize_to_unicode(level=level+1, op=op_))
+
+            # NOTE: for some reason, combining other search terms with AND directly
+            # after join query generates no results; correct results are present
+            # without the AND
+            # for now, simply add any join queries last to avoid this behavior
+            if self.serialize_join_queries():
+                u.append(self.serialize_join_queries())
+
             if self._and:
                 return u' AND '.join(u)
             elif self._or:
@@ -240,6 +263,7 @@ def __len__(self):
         return sum([sum(len(v) for v in self.terms.values()),
                     sum(len(v) for v in self.phrases.values()),
                     len(self.ranges),
+                    len(self.joins),
                     subquery_length])
 
     def Q(self, *args, **kwargs):
@@ -248,7 +272,8 @@ def Q(self, *args, **kwargs):
         return q
 
     def __nonzero__(self):
-        return bool(self.terms) or bool(self.phrases) or bool(self.ranges) or bool(self.subqueries)
+        return bool(self.terms) or bool(self.phrases) or bool(self.ranges) or \
+               bool(self.joins) or bool(self.subqueries)
 
     def __or__(self, other):
         q = LuceneQuery(self.schema)
@@ -279,7 +304,7 @@ def __pow__(self, value):
         q._and = False
         q._pow = value
         return q
-        
+
     def add(self, args, kwargs):
         self.normalized = False
         _args = []
@@ -358,6 +383,16 @@ def add_range(self, field_name, rel, value):
             insts = (field.instance_from_user_data(value),)
         self.ranges.add((field_name, rel, insts))
 
+    def join(self, join_from, join_to, *args, **kwargs):
+        for fieldname in [join_from, join_to]:
+            field = self.schema.match_field(fieldname)
+            if not field:
+                raise ValueError("%s is not a valid field name" % fieldname)
+            elif not field.indexed:
+                raise SolrError("Can't join on non-indexed field '%s'" % fieldname)
+        query = self.Q(*args, **kwargs)
+        self.joins.add((join_from, join_to, query))
+
     def term_or_phrase(self, arg, force=None):
         return 'terms' if self.default_term_re.match(arg) else 'phrases'
 
@@ -372,7 +407,6 @@ def add_boost(self, kwargs, boost_score):
         self.boosts.append((kwargs, boost_score))
 
 
-
 class BaseSearch(object):
     """Base class for common search options management"""
     option_modules = ('query_obj', 'filter_obj', 'paginator',
@@ -476,6 +510,11 @@ def field_limit(self, fields=None, score=False, all_fields=False):
         newself.field_limiter.update(fields, score, all_fields)
         return newself
 
+    def join(self, join_from, join_to, *args, **kwargs):
+        newself = self.clone()
+        newself.query_obj.join(join_from, join_to, *args, **kwargs)
+        return newself
+
     def options(self):
         options = {}
         for option_module in self.option_modules:
@@ -520,7 +559,7 @@ def params(self):
 
     _count = None
     def count(self):
-        # get the total count for the current query without retrieving any results 
+        # get the total count for the current query without retrieving any results
         # cache it, since it may be needed multiple times when used with django paginator
         if self._count is None:
             # are we already paginated? then we'll behave as if that's

diff --git a/sunburnt/test_search.py b/sunburnt/test_search.py
@@ -224,6 +224,16 @@ class MockInterface(object):
         ([], {'string_field':RawString("abc*???")},
          [("q", "string_field:abc\\*\\?\\?\\?")]),
         ),
+
+    # test join queries (solr 4.x)
+    "join":(
+        (["string_field", "int_field", "hello"], {},
+         [("q", u"{!join from=string_field to=int_field}hello")]),
+        (["string_field", "int_field"], {"string_field": "hello"},
+         [("q", u"{!join from=string_field to=int_field}string_field:hello")]),
+        (["string_field", "int_field"], {"boolean_field": True},
+         [("q", u"{!join from=string_field to=int_field}boolean_field:true")]),
+        ),
     }
 if HAS_MX_DATETIME:
     good_query_data['query'] += \