From 661099cc9d2f620157283da6be14584e7f6eb1fc Mon Sep 17 00:00:00 2001 From: m-vdb Date: Tue, 16 Oct 2018 12:22:12 +0200 Subject: [PATCH 01/16] AlgoliaIndexBatch helper --- algoliasearch_django/models.py | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index 82cdc46..c6308f1 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -528,3 +528,39 @@ def reindex_all(self, batch_size=1000): else: logger.warning('ERROR DURING REINDEXING %s: %s', self.model, e) + + +class AlgoliaIndexBatch(object): + """Helper to construct batches of requests and send them.""" + + def __init__(self, method, size, queue=None): + """ + Initialize the batch. + + :param method: the method used to flush the batch + :param size: the batch size + :param queue: optional queue to initialize the batch + """ + self.method = method + self.size = size + self._queue = queue or [] + + def __len__(self): + """Return the length of the batch.""" + return len(self._queue) + + def add(self, objs): + """Add objects to the batch.""" + self._queue.extend(objs) + + def flush(self): + """ + Flush the batch, i.e. perform the requests, by ensuring that + each batched request doesn't contain more than `self.size` operations. + """ + results = [] + with self._queue: + results.append(self.method(self._queue[:self.size])) + self._queue = self._queue[self.size:] + + return results From 8fb9d7dc4636a4bbc46e1b60b2fcfa8ba566bbea Mon Sep 17 00:00:00 2001 From: m-vdb Date: Tue, 16 Oct 2018 12:22:49 +0200 Subject: [PATCH 02/16] AlgoliaIndex.duplication_method setting --- algoliasearch_django/models.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index c6308f1..4e0ecfd 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -8,6 +8,7 @@ import sys from algoliasearch.helpers import AlgoliaException from django.db.models.query_utils import DeferredAttribute +from django.utils.inspect import func_supports_parameter from .settings import DEBUG @@ -61,6 +62,9 @@ class AlgoliaIndex(object): # Use to specify the settings of the index. settings = None + # User to specify a duplication method + duplication_method = None + # Used to specify if the instance should be indexed. # The attribute should be either: # - a callable that returns a boolean. @@ -145,6 +149,16 @@ def __init__(self, model, client, settings): if self.geo_field: self.geo_field = check_and_get_attr(model, self.geo_field) + # Check duplication_method + if self.duplication_method: + self.duplication_method = check_and_get_attr(model, self.duplication_method) + if not func_supports_parameter(self.duplication_method, 'raw_record'): + raise AlgoliaIndexError('{} doesnt accept a `raw_record` parameter.'.format( + self.duplication_method + )) + if not self.settings.get('attributeForDistinct'): + raise AlgoliaIndexError('Missing attributeForDistinct setting.') + # Check should_index + get the callable or attribute/field name if self.should_index: if hasattr(model, self.should_index): From 85600c3f5e756149ab0bada3539e40a4fcb6238d Mon Sep 17 00:00:00 2001 From: m-vdb Date: Tue, 16 Oct 2018 12:23:35 +0200 Subject: [PATCH 03/16] define DEFAULT_BATCH_SIZE constant --- algoliasearch_django/models.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index 4e0ecfd..fde845e 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -75,6 +75,8 @@ class AlgoliaIndex(object): # Name of the attribute to check on instances if should_index is not a callable _should_index_is_method = False + DEFAULT_BATCH_SIZE = 1000 + def __init__(self, model, client, settings): """Initializes the index.""" self.__init_index(client, model, settings) @@ -341,7 +343,7 @@ def delete_record(self, instance): logger.warning('%s FROM %s NOT DELETED: %s', objectID, self.model, e) - def update_records(self, qs, batch_size=1000, **kwargs): + def update_records(self, qs, batch_size=DEFAULT_BATCH_SIZE, **kwargs): """ Updates multiple records. @@ -435,7 +437,7 @@ def wait_task(self, task_id): else: logger.warning('%s NOT WAIT: %s', self.model, e) - def reindex_all(self, batch_size=1000): + def reindex_all(self, batch_size=DEFAULT_BATCH_SIZE): """ Reindex all the records. From 838d95edc9c7422b1dc24282fb32e1374a16652d Mon Sep 17 00:00:00 2001 From: m-vdb Date: Tue, 16 Oct 2018 12:23:58 +0200 Subject: [PATCH 04/16] sadly, update_records() is not supported for duplication --- algoliasearch_django/models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index fde845e..e7abc41 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -356,6 +356,11 @@ def update_records(self, qs, batch_size=DEFAULT_BATCH_SIZE, **kwargs): >>> update_records(MyModel, qs, myField=True) >>> qs.update(myField=True) """ + if self._has_duplication_method(): + raise AlgoliaIndexError( + 'update_records() with record duplication is not supported yet' + ) + tmp = {} for key, value in kwargs.items(): name = self.__translate_fields.get(key, None) From dee1b2e82317bff3975fd877071638e58c9b64a7 Mon Sep 17 00:00:00 2001 From: m-vdb Date: Tue, 16 Oct 2018 12:24:42 +0200 Subject: [PATCH 05/16] support record duplication when saving/deleting/reindex models --- algoliasearch_django/models.py | 79 ++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 22 deletions(-) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index e7abc41..6d17745 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -251,8 +251,34 @@ def get_raw_record(self, instance, update_fields=None): tmp['_tags'] = list(tmp['_tags']) logger.debug('BUILD %s FROM %s', tmp['objectID'], self.model) + if self._has_duplication_method(): + logger.debug('DUPLICATING MODEL %s', self.model) + records = self.duplication_method(instance, raw_record=tmp) + if isinstance(records, (list, tuple, types.GeneratorType)): + return records + + # unsupported type + raise TypeError( + '{} should return a list, tuple or generator.'.format( + self.duplication_method + ) + ) + return tmp + def _get_duplicated_raw_records(self, *args, **kwargs): + """ + Return a list of records, possibly duplicated if + `_has_duplication_method()` returns True. + It is an internal helper. + """ + objs = self.get_raw_record(*args, **kwargs) + return objs if self._has_duplication_method() else [objs] + + def _has_duplication_method(self): + """Return True if this AlgoliaIndex has a duplication_method method""" + return self.duplication_method is not None + def _has_should_index(self): """Return True if this AlgoliaIndex has a should_index method or attribute""" return self.should_index is not None @@ -315,32 +341,44 @@ def save_record(self, instance, update_fields=None, **kwargs): try: if update_fields: - obj = self.get_raw_record(instance, - update_fields=update_fields) - result = self.__index.partial_update_object(obj) + index_method = self.__index.partial_update_objects else: - obj = self.get_raw_record(instance) - result = self.__index.save_object(obj) - logger.info('SAVE %s FROM %s', obj['objectID'], self.model) + index_method = self.__index.save_objects + + objs = self._get_duplicated_raw_records(instance, update_fields=update_fields) + batch = AlgoliaIndexBatch( + index_method, self.DEFAULT_BATCH_SIZE, objs + ) + result = batch.flush() + logger.info('SAVE %s FROM %s', instance.pk, self.model) return result except AlgoliaException as e: if DEBUG: raise e else: - logger.warning('%s FROM %s NOT SAVED: %s', obj['objectID'], + logger.warning('%s FROM %s NOT SAVED: %s', instance.pk, self.model, e) def delete_record(self, instance): """Deletes the record.""" - objectID = self.objectID(instance) + if self._has_duplication_method(): + object_ids = [ + obj['objectID'] for obj in self.get_raw_record(instance) + ] + else: + object_ids = [self.objectID(instance)] try: - self.__index.delete_object(objectID) - logger.info('DELETE %s FROM %s', objectID, self.model) + batch = AlgoliaIndexBatch( + self.__index.delete_objects, + self.DEFAULT_BATCH_SIZE, object_ids + ) + batch.flush() + logger.info('DELETE %s FROM %s', instance.pk, self.model) except AlgoliaException as e: if DEBUG: raise e else: - logger.warning('%s FROM %s NOT DELETED: %s', objectID, + logger.warning('%s FROM %s NOT DELETED: %s', instance.pk, self.model, e) def update_records(self, qs, batch_size=DEFAULT_BATCH_SIZE, **kwargs): @@ -497,7 +535,7 @@ def reindex_all(self, batch_size=DEFAULT_BATCH_SIZE): logger.debug('CLEAR INDEX %s_tmp', self.index_name) counts = 0 - batch = [] + batch = AlgoliaIndexBatch(self.__tmp_index.save_objects, batch_size) if hasattr(self, 'get_queryset'): qs = self.get_queryset() @@ -508,17 +546,14 @@ def reindex_all(self, batch_size=DEFAULT_BATCH_SIZE): if not self._should_index(instance): continue # should not index - batch.append(self.get_raw_record(instance)) - if len(batch) >= batch_size: - self.__tmp_index.save_objects(batch) - logger.info('SAVE %d OBJECTS TO %s_tmp', len(batch), - self.index_name) - batch = [] + + objs = self._get_duplicated_raw_records(instance) + batch.add(objs) counts += 1 - if len(batch) > 0: - self.__tmp_index.save_objects(batch) - logger.info('SAVE %d OBJECTS TO %s_tmp', len(batch), - self.index_name) + + logger.info('SAVE %d OBJECTS TO %s_tmp', len(batch), + self.index_name) + batch.flush() self.__client.move_index(self.__tmp_index.index_name, self.__index.index_name) From 169a148837e09ac74233048c9db497a8e5219928 Mon Sep 17 00:00:00 2001 From: m-vdb Date: Tue, 16 Oct 2018 12:29:52 +0200 Subject: [PATCH 06/16] use inspect.signature() directly --- algoliasearch_django/models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index 6d17745..89fe3ba 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -8,7 +8,6 @@ import sys from algoliasearch.helpers import AlgoliaException from django.db.models.query_utils import DeferredAttribute -from django.utils.inspect import func_supports_parameter from .settings import DEBUG @@ -154,7 +153,7 @@ def __init__(self, model, client, settings): # Check duplication_method if self.duplication_method: self.duplication_method = check_and_get_attr(model, self.duplication_method) - if not func_supports_parameter(self.duplication_method, 'raw_record'): + if 'raw_record' not in inspect.signature(self.duplication_method).parameters: raise AlgoliaIndexError('{} doesnt accept a `raw_record` parameter.'.format( self.duplication_method )) From 0c2d75db6fbb322d550a10ac49e77a72fc090772 Mon Sep 17 00:00:00 2001 From: m-vdb Date: Tue, 16 Oct 2018 12:40:33 +0200 Subject: [PATCH 07/16] do not hardcode instance.pk --- algoliasearch_django/models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index 89fe3ba..59e25a9 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -349,13 +349,13 @@ def save_record(self, instance, update_fields=None, **kwargs): index_method, self.DEFAULT_BATCH_SIZE, objs ) result = batch.flush() - logger.info('SAVE %s FROM %s', instance.pk, self.model) + logger.info('SAVE %s FROM %s', self.objectID(instance), self.model) return result except AlgoliaException as e: if DEBUG: raise e else: - logger.warning('%s FROM %s NOT SAVED: %s', instance.pk, + logger.warning('%s FROM %s NOT SAVED: %s', self.objectID(instance), self.model, e) def delete_record(self, instance): @@ -372,12 +372,12 @@ def delete_record(self, instance): self.DEFAULT_BATCH_SIZE, object_ids ) batch.flush() - logger.info('DELETE %s FROM %s', instance.pk, self.model) + logger.info('DELETE %s FROM %s', self.objectID(instance), self.model) except AlgoliaException as e: if DEBUG: raise e else: - logger.warning('%s FROM %s NOT DELETED: %s', instance.pk, + logger.warning('%s FROM %s NOT DELETED: %s', self.objectID(instance), self.model, e) def update_records(self, qs, batch_size=DEFAULT_BATCH_SIZE, **kwargs): From e2efd2065042719db7bfb74fd5e3296631e8c569 Mon Sep 17 00:00:00 2001 From: m-vdb Date: Tue, 16 Oct 2018 12:43:23 +0200 Subject: [PATCH 08/16] sad typo: with -> while --- algoliasearch_django/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index 59e25a9..64ac678 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -614,7 +614,7 @@ def flush(self): each batched request doesn't contain more than `self.size` operations. """ results = [] - with self._queue: + while self._queue: results.append(self.method(self._queue[:self.size])) self._queue = self._queue[self.size:] From a803be75350a4ae57fda95fcb05eaa6238149b30 Mon Sep 17 00:00:00 2001 From: m-vdb Date: Tue, 16 Oct 2018 12:51:34 +0200 Subject: [PATCH 09/16] fix test_cyrillic --- tests/test_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_index.py b/tests/test_index.py index 1e8f1ec..4b6c7d3 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -655,7 +655,7 @@ class CyrillicIndex(AlgoliaIndex): self.user.bio = "крупнейших" self.user.save() self.index = CyrillicIndex(User, self.client, settings.ALGOLIA) - self.index.wait_task(self.index.save_record(self.user)["taskID"]) + self.index.wait_task(self.index.save_record(self.user)[0]["taskID"]) result = self.index.raw_search("крупнейших") self.assertEqual(result['nbHits'], 1, "Search should return one result") self.assertEqual(result['hits'][0]['name'], 'Algolia', "The result should be self.user") From 617839f24ceb0833eb50b0332cbb0fb15e861394 Mon Sep 17 00:00:00 2001 From: m-vdb Date: Fri, 9 Nov 2018 11:09:47 +0000 Subject: [PATCH 10/16] leverage django.utils.inspect --- algoliasearch_django/models.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index 64ac678..d7e48c3 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -8,6 +8,10 @@ import sys from algoliasearch.helpers import AlgoliaException from django.db.models.query_utils import DeferredAttribute +from django.utils.inspect import ( + get_func_args, func_supports_parameter, + func_accepts_kwargs +) from .settings import DEBUG @@ -153,8 +157,15 @@ def __init__(self, model, client, settings): # Check duplication_method if self.duplication_method: self.duplication_method = check_and_get_attr(model, self.duplication_method) - if 'raw_record' not in inspect.signature(self.duplication_method).parameters: - raise AlgoliaIndexError('{} doesnt accept a `raw_record` parameter.'.format( + if ( + not func_accepts_kwargs(self.duplication_method) and not ( + func_supports_parameter(self.duplication_method, 'raw_record') and + func_supports_parameter(self.duplication_method, 'only_duplicated_ids') + ) + ): + raise AlgoliaIndexError( + '{} doesnt accept a `raw_record` or' + ' `only_duplicated_ids` parameter.'.format( self.duplication_method )) if not self.settings.get('attributeForDistinct'): @@ -293,13 +304,9 @@ def _should_really_index(self, instance): """Return True if according to should_index the object should be indexed.""" if self._should_index_is_method: is_method = inspect.ismethod(self.should_index) - try: - count_args = len(inspect.signature(self.should_index).parameters) - except AttributeError: - # noinspection PyDeprecation - count_args = len(inspect.getargspec(self.should_index).args) + count_args = len(get_func_args(self.should_index)) - if is_method or count_args is 1: + if is_method or count_args == 0: # bound method, call with instance return self.should_index(instance) else: From 76895236b10ce85cf75efa35b3df209dee076c66 Mon Sep 17 00:00:00 2001 From: m-vdb Date: Fri, 9 Nov 2018 11:10:08 +0000 Subject: [PATCH 11/16] fix missing import --- algoliasearch_django/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index d7e48c3..357d4eb 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -4,6 +4,7 @@ from functools import partial from itertools import chain import logging +import types import sys from algoliasearch.helpers import AlgoliaException From affcb159c21ed66f72bcd4f929bb4eaa2f389207 Mon Sep 17 00:00:00 2001 From: m-vdb Date: Fri, 9 Nov 2018 11:58:36 +0000 Subject: [PATCH 12/16] testing de-duplication in real world --- algoliasearch_django/models.py | 38 ++++-- tests/models.py | 18 +++ tests/test_index.py | 220 +++++++++++++++++++++++++++++++++ 3 files changed, 266 insertions(+), 10 deletions(-) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index 357d4eb..0563ea4 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -221,13 +221,16 @@ def _validate_geolocation(geolocation): ) ) - def get_raw_record(self, instance, update_fields=None): + def get_raw_record(self, instance, update_fields=None, only_duplicated_ids=None): """ Gets the raw record. If `update_fields` is set, the raw record will be build with only the objectID and the given fields. Also, `_geoloc` and `_tags` will not be included. + + If `only_duplicated_ids` is set, it needs to be a list of duplicated data + that was added to a record. """ tmp = {'objectID': self.objectID(instance)} @@ -264,7 +267,10 @@ def get_raw_record(self, instance, update_fields=None): logger.debug('BUILD %s FROM %s', tmp['objectID'], self.model) if self._has_duplication_method(): logger.debug('DUPLICATING MODEL %s', self.model) - records = self.duplication_method(instance, raw_record=tmp) + records = self.duplication_method( + instance, raw_record=tmp, + only_duplicated_ids=only_duplicated_ids + ) if isinstance(records, (list, tuple, types.GeneratorType)): return records @@ -330,7 +336,7 @@ def _should_really_index(self, instance): instance.__class__.__name__, self.should_index)) return attr_value - def save_record(self, instance, update_fields=None, **kwargs): + def save_record(self, instance, update_fields=None, only_duplicated_ids=None, **kwargs): """Saves the record. If `update_fields` is set, this method will use partial_update_object() @@ -338,6 +344,9 @@ def save_record(self, instance, update_fields=None, **kwargs): For more information about partial_update_object: https://github.com/algolia/algoliasearch-client-python#update-an-existing-object-in-the-index + + If `only_duplicated_ids` is set, it needs to be a list + of duplicated data that was added to the record. """ if not self._should_index(instance): # Should not index, but since we don't now the state of the @@ -352,7 +361,10 @@ def save_record(self, instance, update_fields=None, **kwargs): else: index_method = self.__index.save_objects - objs = self._get_duplicated_raw_records(instance, update_fields=update_fields) + objs = self._get_duplicated_raw_records( + instance, update_fields=update_fields, + only_duplicated_ids=only_duplicated_ids + ) batch = AlgoliaIndexBatch( index_method, self.DEFAULT_BATCH_SIZE, objs ) @@ -366,12 +378,16 @@ def save_record(self, instance, update_fields=None, **kwargs): logger.warning('%s FROM %s NOT SAVED: %s', self.objectID(instance), self.model, e) - def delete_record(self, instance): + def delete_record(self, instance, only_duplicated_ids=None): """Deletes the record.""" if self._has_duplication_method(): - object_ids = [ - obj['objectID'] for obj in self.get_raw_record(instance) - ] + if only_duplicated_ids: + object_ids = only_duplicated_ids + else: + object_ids = [ + obj['objectID'] + for obj in self.get_raw_record(instance) + ] else: object_ids = [self.objectID(instance)] try: @@ -379,8 +395,9 @@ def delete_record(self, instance): self.__index.delete_objects, self.DEFAULT_BATCH_SIZE, object_ids ) - batch.flush() + result = batch.flush() logger.info('DELETE %s FROM %s', self.objectID(instance), self.model) + return result except AlgoliaException as e: if DEBUG: raise e @@ -606,7 +623,8 @@ def __init__(self, method, size, queue=None): """ self.method = method self.size = size - self._queue = queue or [] + # casting the queue to a list here + self._queue = list(queue) if queue else [] def __len__(self): """Return the length of the batch.""" diff --git a/tests/models.py b/tests/models.py index 58e5723..99e7e98 100644 --- a/tests/models.py +++ b/tests/models.py @@ -68,6 +68,24 @@ def property_should_not_index(self): def property_string(self): return "foo" + def duplication_get_locations(self): + return [ + {'id': '1', 'name': 'Paris'}, + {'id': '2', 'name': 'San Francisco'}, + {'id': '3', 'name': 'Berlin'} + ] + + def duplication_get_records_per_location(self, raw_record, only_duplicated_ids=None): + for loc in self.duplication_get_locations(): + lang_record = raw_record.copy() + lang_record['objectID'] = '{}-{}'.format(lang_record['objectID'], loc['id']) + lang_record['location'] = loc['name'] + if not only_duplicated_ids or lang_record['objectID'] in only_duplicated_ids: + yield lang_record + + def duplication_get_records_per_location_wrong_type(self, **kwargs): + return 'oups' + class BlogPost(models.Model): author = models.ForeignKey( diff --git a/tests/test_index.py b/tests/test_index.py index 4b6c7d3..b8af092 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -1,5 +1,6 @@ # coding=utf-8 import time +import types from django.conf import settings from django.test import TestCase @@ -524,6 +525,225 @@ class UserIndex(AlgoliaIndex): self.assertNotIn('_tags', obj) self.assertEqual(len(obj), 3) + def test_save_record_with_duplication_method(self): + class ExampleIndex(AlgoliaIndex): + fields = ('name', ) + custom_objectID = 'uid' + duplication_method = 'duplication_get_records_per_location' + settings = { + 'attributeForDistinct': 'name', + 'attributesToIndex': ['name', 'location'], + } + + ex = Example(uid=42, name='Algolia') + self.index = ExampleIndex(Example, self.client, settings.ALGOLIA) + self.index.set_settings() + results = self.index.save_record(ex) + for result in results: + self.index.wait_task(result['taskID']) + + data = self.index.raw_search('algolia', {'distinct': 1}) + self.assertEqual(data['nbHits'], 1) # distinct works + # matches are correct + data_paris = self.index.raw_search('paris', {'distinct': 1}) + self.assertEqual(data_paris['nbHits'], 1) + self.assertEqual(data_paris['hits'][0]['objectID'], '42-1') + data_sf = self.index.raw_search('san fran', {'distinct': 1}) + self.assertEqual(data_sf['nbHits'], 1) + self.assertEqual(data_sf['hits'][0]['objectID'], '42-2') + data_berlin = self.index.raw_search('berlin', {'distinct': 1}) + self.assertEqual(data_berlin['nbHits'], 1) + self.assertEqual(data_berlin['hits'][0]['objectID'], '42-3') + + def test_save_record_with_duplication_method_only_some_ids(self): + class ExampleIndex(AlgoliaIndex): + fields = ('name', ) + custom_objectID = 'uid' + duplication_method = 'duplication_get_records_per_location' + settings = { + 'attributeForDistinct': 'name', + 'attributesToIndex': ['name', 'location'], + } + + ex = Example(uid=42, name='Algolia') + self.index = ExampleIndex(Example, self.client, settings.ALGOLIA) + self.index.set_settings() + results = self.index.save_record(ex, only_duplicated_ids=['42-1']) + for result in results: + self.index.wait_task(result['taskID']) + + data = self.index.raw_search('algolia', {'distinct': 1}) + self.assertEqual(data['nbHits'], 1) # distinct works + # matches are correct + data_paris = self.index.raw_search('paris', {'distinct': 1}) + self.assertEqual(data_paris['nbHits'], 1) + self.assertEqual(data_paris['hits'][0]['objectID'], '42-1') + # nothing + data_sf = self.index.raw_search('san fran', {'distinct': 1}) + self.assertEqual(data_sf['nbHits'], 0) + data_berlin = self.index.raw_search('berlin', {'distinct': 1}) + self.assertEqual(data_berlin['nbHits'], 0) + + def test_delete_record_with_duplication_method(self): + class ExampleIndex(AlgoliaIndex): + fields = ('name', ) + custom_objectID = 'uid' + duplication_method = 'duplication_get_records_per_location' + settings = { + 'attributeForDistinct': 'name', + 'attributesToIndex': ['name', 'location'], + } + + ex = Example(uid=42, name='Algolia') + self.index = ExampleIndex(Example, self.client, settings.ALGOLIA) + self.index.set_settings() + + results = self.index.save_record(ex) + for result in results: + self.index.wait_task(result['taskID']) + + data = self.index.raw_search('algolia', {'distinct': 1}) + self.assertEqual(data['nbHits'], 1) # distinct works + # deleting + results = self.index.delete_record(ex) + for result in results: + self.index.wait_task(result['taskID']) + + data = self.index.raw_search('algolia', {'distinct': 1}) + self.assertEqual(data['nbHits'], 0) # nothing! + + def test_delete_record_with_duplication_method_only_some_ids(self): + class ExampleIndex(AlgoliaIndex): + fields = ('name', ) + custom_objectID = 'uid' + duplication_method = 'duplication_get_records_per_location' + settings = { + 'attributeForDistinct': 'name', + 'attributesToIndex': ['name', 'location'], + } + + ex = Example(uid=42, name='Algolia') + self.index = ExampleIndex(Example, self.client, settings.ALGOLIA) + self.index.set_settings() + + results = self.index.save_record(ex) + for result in results: + self.index.wait_task(result['taskID']) + + data = self.index.raw_search('algolia', {'distinct': 1}) + self.assertEqual(data['nbHits'], 1) # distinct works + # deleting + results = self.index.delete_record(ex, only_duplicated_ids=['42-1', '42-2']) + for result in results: + self.index.wait_task(result['taskID']) + + data = self.index.raw_search('algolia', {'distinct': 1}) + self.assertEqual(data['nbHits'], 1) # yes + data = self.index.raw_search('berlin', {'distinct': 1}) + self.assertEqual(data['nbHits'], 1) # yes + data = self.index.raw_search('paris', {'distinct': 1}) + self.assertEqual(data['nbHits'], 0) # no + + def test_get_raw_record_with_duplication_method(self): + class ExampleIndex(AlgoliaIndex): + fields = ('name', ) + custom_objectID = 'uid' + duplication_method = 'duplication_get_records_per_location' + settings = { + 'attributeForDistinct': 'name', + } + + ex = Example(uid=42, name='Algolia') + self.index = ExampleIndex(Example, self.client, settings.ALGOLIA) + objs = self.index.get_raw_record(ex) + self.assertIsInstance(objs, types.GeneratorType) + objs = list(objs) + self.assertEqual(len(objs), 3) + self.assertEqual(objs, [ + { + 'objectID': '42-1', + 'name': 'Algolia', + 'location': 'Paris' + }, + { + 'objectID': '42-2', + 'name': 'Algolia', + 'location': 'San Francisco' + }, + { + 'objectID': '42-3', + 'name': 'Algolia', + 'location': 'Berlin' + }, + ]) + + def test_get_raw_record_with_duplication_method_filtering(self): + class ExampleIndex(AlgoliaIndex): + fields = ('name', ) + custom_objectID = 'uid' + duplication_method = 'duplication_get_records_per_location' + settings = { + 'attributeForDistinct': 'name', + } + + ex = Example(uid=42, name='Algolia') + self.index = ExampleIndex(Example, self.client, settings.ALGOLIA) + objs = self.index.get_raw_record(ex, only_duplicated_ids=['42-1']) + self.assertIsInstance(objs, types.GeneratorType) + objs = list(objs) + self.assertEqual(len(objs), 1) + self.assertEqual(objs, [ + { + 'objectID': '42-1', + 'name': 'Algolia', + 'location': 'Paris' + }, + ]) + + def test_get_raw_record_with_duplication_method_wrong_return_type(self): + class ExampleIndex(AlgoliaIndex): + fields = ('name', ) + custom_objectID = 'uid' + duplication_method = 'duplication_get_records_per_location_wrong_type' + settings = { + 'attributeForDistinct': 'name', + } + + ex = Example(uid=42, name='Algolia') + self.index = ExampleIndex(Example, self.client, settings.ALGOLIA) + expected_msg = ( + " " + "should return a list, tuple or generator." + ) + with self.assertRaises(TypeError, msg=expected_msg): + self.index.get_raw_record(ex) + + def test_wrong_duplication_method(self): + class ExampleIndex(AlgoliaIndex): + fields = ('name', ) + custom_objectID = 'uid' + duplication_method = 'duplication_get_locations' + settings = { + 'attributeForDistinct': 'name', + } + + expected_msg = ( + " doesnt accept a " + "`raw_record` or `only_duplicated_ids` parameter." + ) + with self.assertRaises(AlgoliaIndexError, msg=expected_msg): + ExampleIndex(Example, self.client, settings.ALGOLIA) + + def test_missing_attributeForDistinct(self): + class ExampleIndex(AlgoliaIndex): + fields = ('name', ) + custom_objectID = 'uid' + duplication_method = 'duplication_get_records_per_location' + + expected_msg = "Missing attributeForDistinct setting." + with self.assertRaises(AlgoliaIndexError, msg=expected_msg): + ExampleIndex(Example, self.client, settings.ALGOLIA) + def test_should_index_method(self): class ExampleIndex(AlgoliaIndex): fields = 'name' From 319396e21e513bbf1eaea48601e173d36e2f3e94 Mon Sep 17 00:00:00 2001 From: m-vdb Date: Fri, 9 Nov 2018 12:09:59 +0000 Subject: [PATCH 13/16] add 2 new methods to AlgoliaEngine API --- algoliasearch_django/registration.py | 22 ++++++++++++++++++++++ tests/test_index.py | 23 ++++++++++++++++++----- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/algoliasearch_django/registration.py b/algoliasearch_django/registration.py index acf977e..71cb2f2 100644 --- a/algoliasearch_django/registration.py +++ b/algoliasearch_django/registration.py @@ -176,6 +176,28 @@ def reset(self, settings=None): """ self.__init__(settings=settings if settings is not None else SETTINGS) + # Record duplication specifics + + def add_duplicated_records(self, instance, new_record_ids): + """ + Add new duplicated records to an instance. + + See documentation here: + https://www.algolia.com/doc/guides/ranking/distinct/#distinct-to-index-large-records + """ + adapter = self.get_adapter_from_instance(instance) + return adapter.save_record(instance, only_duplicated_ids=new_record_ids) + + def delete_duplicated_records(self, instance, old_record_ids, **kwargs): + """ + Delete old duplicated records from an instance. + + See documentation here: + https://www.algolia.com/doc/guides/ranking/distinct/#distinct-to-index-large-records + """ + adapter = self.get_adapter_from_instance(instance) + return adapter.delete_record(instance, only_duplicated_ids=old_record_ids, **kwargs) + # Signalling hooks. def __post_save_receiver(self, instance, **kwargs): diff --git a/tests/test_index.py b/tests/test_index.py index b8af092..650bcd0 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -565,9 +565,11 @@ class ExampleIndex(AlgoliaIndex): 'attributesToIndex': ['name', 'location'], } - ex = Example(uid=42, name='Algolia') - self.index = ExampleIndex(Example, self.client, settings.ALGOLIA) + algolia_engine.register(Example, ExampleIndex) + self.index = algolia_engine.get_adapter(Example) self.index.set_settings() + + ex = Example(uid=42, name='Algolia') results = self.index.save_record(ex, only_duplicated_ids=['42-1']) for result in results: self.index.wait_task(result['taskID']) @@ -584,6 +586,16 @@ class ExampleIndex(AlgoliaIndex): data_berlin = self.index.raw_search('berlin', {'distinct': 1}) self.assertEqual(data_berlin['nbHits'], 0) + # adding new records + results = algolia_engine.add_duplicated_records(ex, ['42-2']) + for result in results: + self.index.wait_task(result['taskID']) + + data_sf = self.index.raw_search('san fran', {'distinct': 1}) + self.assertEqual(data_sf['nbHits'], 1) + data_berlin = self.index.raw_search('berlin', {'distinct': 1}) + self.assertEqual(data_berlin['nbHits'], 0) + def test_delete_record_with_duplication_method(self): class ExampleIndex(AlgoliaIndex): fields = ('name', ) @@ -622,10 +634,11 @@ class ExampleIndex(AlgoliaIndex): 'attributesToIndex': ['name', 'location'], } - ex = Example(uid=42, name='Algolia') - self.index = ExampleIndex(Example, self.client, settings.ALGOLIA) + algolia_engine.register(Example, ExampleIndex) + self.index = algolia_engine.get_adapter(Example) self.index.set_settings() + ex = Example(uid=42, name='Algolia') results = self.index.save_record(ex) for result in results: self.index.wait_task(result['taskID']) @@ -633,7 +646,7 @@ class ExampleIndex(AlgoliaIndex): data = self.index.raw_search('algolia', {'distinct': 1}) self.assertEqual(data['nbHits'], 1) # distinct works # deleting - results = self.index.delete_record(ex, only_duplicated_ids=['42-1', '42-2']) + results = algolia_engine.delete_duplicated_records(ex, ['42-1', '42-2']) for result in results: self.index.wait_task(result['taskID']) From e56cf145cfe09ba4f25bc9ade788d82c0695febf Mon Sep 17 00:00:00 2001 From: m-vdb Date: Fri, 9 Nov 2018 12:24:52 +0000 Subject: [PATCH 14/16] revert part of 617839f --- algoliasearch_django/models.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index 0563ea4..60eb865 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -9,10 +9,7 @@ import sys from algoliasearch.helpers import AlgoliaException from django.db.models.query_utils import DeferredAttribute -from django.utils.inspect import ( - get_func_args, func_supports_parameter, - func_accepts_kwargs -) +from django.utils.inspect import func_supports_parameter, func_accepts_kwargs from .settings import DEBUG @@ -311,9 +308,13 @@ def _should_really_index(self, instance): """Return True if according to should_index the object should be indexed.""" if self._should_index_is_method: is_method = inspect.ismethod(self.should_index) - count_args = len(get_func_args(self.should_index)) + try: + count_args = len(inspect.signature(self.should_index).parameters) + except AttributeError: + # noinspection PyDeprecation + count_args = len(inspect.getargspec(self.should_index).args) - if is_method or count_args == 0: + if is_method or count_args is 1: # bound method, call with instance return self.should_index(instance) else: From 02374b96e91dd5ada8002f9dbcac994bf95840f6 Mon Sep 17 00:00:00 2001 From: m-vdb Date: Fri, 9 Nov 2018 15:13:42 +0000 Subject: [PATCH 15/16] fix multiple registration issues --- tests/test_index.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/test_index.py b/tests/test_index.py index 650bcd0..e173e56 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -41,10 +41,17 @@ def setUp(self): {'lat': 10.3, 'lng': -20.0}, {'lat': 22.3, 'lng': 10.0}, ] + self._registrations = [] def tearDown(self): if hasattr(self, 'index') and hasattr(self.index, 'index_name'): self.client.delete_index(self.index.index_name) + for model in self._registrations: + algolia_engine.unregister(model) + + def register(self, model, index): + algolia_engine.register(model, index) + self._registrations.append(model) def test_default_index_name(self): self.index = AlgoliaIndex(Website, self.client, settings.ALGOLIA) @@ -565,7 +572,7 @@ class ExampleIndex(AlgoliaIndex): 'attributesToIndex': ['name', 'location'], } - algolia_engine.register(Example, ExampleIndex) + self.register(Example, ExampleIndex) self.index = algolia_engine.get_adapter(Example) self.index.set_settings() @@ -634,7 +641,7 @@ class ExampleIndex(AlgoliaIndex): 'attributesToIndex': ['name', 'location'], } - algolia_engine.register(Example, ExampleIndex) + self.register(Example, ExampleIndex) self.index = algolia_engine.get_adapter(Example) self.index.set_settings() From 4f02b45040fe39d0d81defc117a1f5babd33b80e Mon Sep 17 00:00:00 2001 From: m-vdb Date: Fri, 9 Nov 2018 15:44:11 +0000 Subject: [PATCH 16/16] add compat code for Django 1.7 --- algoliasearch_django/models.py | 5 +++- algoliasearch_django/utils.py | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 algoliasearch_django/utils.py diff --git a/algoliasearch_django/models.py b/algoliasearch_django/models.py index 60eb865..50460d0 100644 --- a/algoliasearch_django/models.py +++ b/algoliasearch_django/models.py @@ -9,7 +9,10 @@ import sys from algoliasearch.helpers import AlgoliaException from django.db.models.query_utils import DeferredAttribute -from django.utils.inspect import func_supports_parameter, func_accepts_kwargs +try: + from django.utils.inspect import func_supports_parameter, func_accepts_kwargs +except ImportError: # Django 1.7 + from .utils import func_supports_parameter, func_accepts_kwargs from .settings import DEBUG diff --git a/algoliasearch_django/utils.py b/algoliasearch_django/utils.py new file mode 100644 index 0000000..b2c8dbd --- /dev/null +++ b/algoliasearch_django/utils.py @@ -0,0 +1,42 @@ +import inspect + +from django.utils import six + + +def func_accepts_kwargs(func): + """ + Shameless copy-paste from django.utils.inspect. + + FIXME: when dropping support for Django 1.7, remove this code + """ + if six.PY2: + # Not all callables are inspectable with getargspec, so we'll + # try a couple different ways but in the end fall back on assuming + # it is -- we don't want to prevent registration of valid but weird + # callables. + try: + argspec = inspect.getargspec(func) + except TypeError: + try: + argspec = inspect.getargspec(func.__call__) + except (TypeError, AttributeError): + argspec = None + return not argspec or argspec[2] is not None + + return any( + p for p in inspect.signature(func).parameters.values() + if p.kind == p.VAR_KEYWORD + ) + + +def func_supports_parameter(func, parameter): + """ + Shameless copy-paste from django.utils.inspect. + + FIXME: when dropping support for Django 1.7, remove this code + """ + if six.PY3: + return parameter in inspect.signature(func).parameters + else: + args, varargs, varkw, defaults = inspect.getargspec(func) + return parameter in args