From d038f113cbeb94f56326d2c9ea13da56b9f12bf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 21 Jun 2018 14:09:40 -0400 Subject: [PATCH 01/32] Added 'has_kpi_hooks' boolean field to 'XForm' model and 'mongo_success' boolean field to 'Instance' model --- onadata/apps/logger/fields.py | 70 +++++++++++++++++++ ...e_mongo_success_and_xform_has_kpi_hooks.py | 25 +++++++ onadata/apps/logger/models/instance.py | 5 ++ onadata/apps/logger/models/xform.py | 3 + onadata/libs/serializers/xform_serializer.py | 1 + 5 files changed, 104 insertions(+) create mode 100644 onadata/apps/logger/fields.py create mode 100644 onadata/apps/logger/migrations/0008_add_instance_mongo_success_and_xform_has_kpi_hooks.py diff --git a/onadata/apps/logger/fields.py b/onadata/apps/logger/fields.py new file mode 100644 index 000000000..3d592d1f8 --- /dev/null +++ b/onadata/apps/logger/fields.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +from django.db import models + + +class LazyDefaultBooleanField(models.IntegerField): + """ + Allows specifying a default value for a new field without having to rewrite + every row in the corresponding table when migrating the database. + + Whenever the database contains a null: + 1. The field will present the default value instead of None; + 2. The field will overwrite the null with the default value if the + instance it belongs to is saved. + + models.BooleanField can't be nullable, so we use models.IntegerField to mimic + models.BooleanField behaviour + + Based on `kpi.fields.LazyDefaultJSONBField` + """ + def __init__(self, *args, **kwargs): + if kwargs.get('null', False): + raise FieldError('Do not manually specify null=True for a ' + 'LazyDefaultBooleanField') + self.lazy_default = kwargs.get('default') + if self.lazy_default is None: + raise FieldError('LazyDefaultBooleanField requires a default that ' + 'is not None') + elif not isinstance(self.lazy_default, bool): + raise FieldError("LazyDefaultBooleanField requires the default value " + "to be a boolean") + + kwargs['null'] = True + kwargs['default'] = None + super(LazyDefaultBooleanField, self).__init__(*args, **kwargs) + + def db_type(self, connection): + # No need to play with integer or biginteger. + return "smallint" + + def _get_lazy_default(self): + if callable(self.lazy_default): + return self.lazy_default() + else: + return self.lazy_default + + def deconstruct(self): + name, path, args, kwargs = super( + LazyDefaultBooleanField, self).deconstruct() + kwargs['default'] = self.lazy_default + del kwargs['null'] + return name, path, args, kwargs + + def from_db_value(self, value, *args, **kwargs): + if value is None: + return self._get_lazy_default() + # We want to play with booleans on Python side. + return True if value == 1 else False + + def pre_save(self, model_instance, add): + print("IN PRE_SAVE") + value = getattr(model_instance, self.attname) + if value is None: + setattr(model_instance, self.attname, self._get_lazy_default()) + value = self.__to_integer(self._get_lazy_default()) + + return value + + def __to_integer(self, value): + # We want to play with integers on DB side. + return 1 if value is True else 0 diff --git a/onadata/apps/logger/migrations/0008_add_instance_mongo_success_and_xform_has_kpi_hooks.py b/onadata/apps/logger/migrations/0008_add_instance_mongo_success_and_xform_has_kpi_hooks.py new file mode 100644 index 000000000..e2bf728b6 --- /dev/null +++ b/onadata/apps/logger/migrations/0008_add_instance_mongo_success_and_xform_has_kpi_hooks.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models +import onadata.apps.logger.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('logger', '0007_add_validate_permission_on_xform'), + ] + + operations = [ + migrations.AddField( + model_name='instance', + name='mongo_success', + field=onadata.apps.logger.fields.LazyDefaultBooleanField(default=False), + ), + migrations.AddField( + model_name='xform', + name='has_kpi_hooks', + field=onadata.apps.logger.fields.LazyDefaultBooleanField(default=False), + ), + ] diff --git a/onadata/apps/logger/models/instance.py b/onadata/apps/logger/models/instance.py index 873e8930c..420c17d6e 100644 --- a/onadata/apps/logger/models/instance.py +++ b/onadata/apps/logger/models/instance.py @@ -21,6 +21,7 @@ DELETEDAT, GEOLOCATION, ID, MONGO_STRFTIME, NOTES, SUBMISSION_TIME, TAGS,\ UUID, XFORM_ID_STRING, SUBMITTED_BY from onadata.libs.utils.model_tools import set_uuid +from onadata.apps.logger.fields import LazyDefaultBooleanField class FormInactiveError(Exception): @@ -137,6 +138,10 @@ class Instance(models.Model): validation_status = JSONField(null=True, default=None) + # TODO All old records will return False even they've been saved successfully in Mongo + # Update all records with correct value after the migration is done. + mongo_success = LazyDefaultBooleanField(default=False) + class Meta: app_label = 'logger' diff --git a/onadata/apps/logger/models/xform.py b/onadata/apps/logger/models/xform.py index 80a03aeaf..33ae5b533 100644 --- a/onadata/apps/logger/models/xform.py +++ b/onadata/apps/logger/models/xform.py @@ -25,6 +25,7 @@ from onadata.apps.logger.xform_instance_parser import XLSFormError from onadata.libs.models.base_model import BaseModel from ....koboform.pyxform_utils import convert_csv_to_xls +from onadata.apps.logger.fields import LazyDefaultBooleanField try: from formpack.utils.xls_to_ss_structure import xls_to_dicts @@ -95,6 +96,8 @@ class XForm(BaseModel): tags = TaggableManager() + has_kpi_hooks = LazyDefaultBooleanField(default=False) + class Meta: app_label = 'logger' unique_together = (("user", "id_string"), ("user", "sms_id_string")) diff --git a/onadata/libs/serializers/xform_serializer.py b/onadata/libs/serializers/xform_serializer.py index 245704a0e..90ca5b1d0 100644 --- a/onadata/libs/serializers/xform_serializer.py +++ b/onadata/libs/serializers/xform_serializer.py @@ -27,6 +27,7 @@ class XFormSerializer(serializers.HyperlinkedModelSerializer): lookup_field='pk') users = serializers.SerializerMethodField('get_xform_permissions') hash = serializers.SerializerMethodField() + has_kpi_hooks = serializers.BooleanField() @check_obj def get_hash(self, obj): From ec2694b29c517373bb389269d1df6780b5e1f8e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 21 Jun 2018 17:54:10 -0400 Subject: [PATCH 02/32] Renamed 'mongo_success' to 'is_sync_with_mongo'. Update its value on ParsedInstance.save() --- onadata/apps/api/tools.py | 3 +-- onadata/apps/logger/fields.py | 1 - ...ynced_with_mongo_and_xform_has_kpi_hooks.py} | 2 +- onadata/apps/logger/models/instance.py | 2 +- onadata/apps/viewer/models/parsed_instance.py | 17 +++++++++++++---- 5 files changed, 16 insertions(+), 9 deletions(-) rename onadata/apps/logger/migrations/{0008_add_instance_mongo_success_and_xform_has_kpi_hooks.py => 0008_add_instance_is_synced_with_mongo_and_xform_has_kpi_hooks.py} (94%) diff --git a/onadata/apps/api/tools.py b/onadata/apps/api/tools.py index 888554174..dfa452446 100644 --- a/onadata/apps/api/tools.py +++ b/onadata/apps/api/tools.py @@ -440,8 +440,7 @@ def add_validation_status_to_instance(request, instance): if validation_status: instance.validation_status = validation_status instance.save() - instance.parsed_instance.update_mongo() - success = True + success = instance.parsed_instance.update_mongo(async=False) return success diff --git a/onadata/apps/logger/fields.py b/onadata/apps/logger/fields.py index 3d592d1f8..54bf181b9 100644 --- a/onadata/apps/logger/fields.py +++ b/onadata/apps/logger/fields.py @@ -57,7 +57,6 @@ def from_db_value(self, value, *args, **kwargs): return True if value == 1 else False def pre_save(self, model_instance, add): - print("IN PRE_SAVE") value = getattr(model_instance, self.attname) if value is None: setattr(model_instance, self.attname, self._get_lazy_default()) diff --git a/onadata/apps/logger/migrations/0008_add_instance_mongo_success_and_xform_has_kpi_hooks.py b/onadata/apps/logger/migrations/0008_add_instance_is_synced_with_mongo_and_xform_has_kpi_hooks.py similarity index 94% rename from onadata/apps/logger/migrations/0008_add_instance_mongo_success_and_xform_has_kpi_hooks.py rename to onadata/apps/logger/migrations/0008_add_instance_is_synced_with_mongo_and_xform_has_kpi_hooks.py index e2bf728b6..0a3b0ee70 100644 --- a/onadata/apps/logger/migrations/0008_add_instance_mongo_success_and_xform_has_kpi_hooks.py +++ b/onadata/apps/logger/migrations/0008_add_instance_is_synced_with_mongo_and_xform_has_kpi_hooks.py @@ -14,7 +14,7 @@ class Migration(migrations.Migration): operations = [ migrations.AddField( model_name='instance', - name='mongo_success', + name='is_synced_with_mongo', field=onadata.apps.logger.fields.LazyDefaultBooleanField(default=False), ), migrations.AddField( diff --git a/onadata/apps/logger/models/instance.py b/onadata/apps/logger/models/instance.py index 420c17d6e..5efdda72f 100644 --- a/onadata/apps/logger/models/instance.py +++ b/onadata/apps/logger/models/instance.py @@ -140,7 +140,7 @@ class Instance(models.Model): # TODO All old records will return False even they've been saved successfully in Mongo # Update all records with correct value after the migration is done. - mongo_success = LazyDefaultBooleanField(default=False) + is_synced_with_mongo = LazyDefaultBooleanField(default=False) class Meta: app_label = 'logger' diff --git a/onadata/apps/viewer/models/parsed_instance.py b/onadata/apps/viewer/models/parsed_instance.py index 25fb86c97..9caf48f53 100644 --- a/onadata/apps/viewer/models/parsed_instance.py +++ b/onadata/apps/viewer/models/parsed_instance.py @@ -48,10 +48,12 @@ def update_mongo_instance(record): # http://api.mongodb.org/python/current/api/pymongo/collection.html#pymong\ # o.collection.Collection.save try: - return xform_instances.save(record) - except Exception: + xform_instances.save(record) + return True + except Exception as e: + logging.getLogger().error("update_mongo_instance - {}".format(str(e)), exc_info=True) logging.getLogger().warning('Submission could not be saved to Mongo.', exc_info=True) - pass + return False class ParsedInstance(models.Model): @@ -257,9 +259,16 @@ def update_mongo(self, async=True): return False else: if async: + # TODO update self.instance after async save is made update_mongo_instance.apply_async((), {"record": d}) else: - update_mongo_instance(d) + success = update_mongo_instance(d) + # Only update self.instance is `success` is different from + # current_value (`self.instance.is_sync_with_mongo`) + if success != self.instance.is_synced_with_mongo: + self.instance.is_synced_with_mongo = success + self.instance.save() + return True @staticmethod From 0e8b2981819cb2f4ee661e22fc82fe79e473185e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 22 Jun 2018 14:43:41 -0400 Subject: [PATCH 03/32] New rest service for KPI and (un)register it on XForm.save() --- onadata/apps/logger/models/xform.py | 8 +++++ onadata/apps/restservice/__init__.py | 18 +++++++++-- onadata/apps/restservice/app.py | 11 +++++++ ...add_related_name_with_delete_on_cascade.py | 24 +++++++++++++++ onadata/apps/restservice/models.py | 2 +- onadata/apps/restservice/services/kpi_hook.py | 26 ++++++++++++++++ onadata/apps/restservice/signals.py | 30 +++++++++++++++++++ 7 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 onadata/apps/restservice/app.py create mode 100644 onadata/apps/restservice/migrations/0002_add_related_name_with_delete_on_cascade.py create mode 100644 onadata/apps/restservice/services/kpi_hook.py create mode 100644 onadata/apps/restservice/signals.py diff --git a/onadata/apps/logger/models/xform.py b/onadata/apps/logger/models/xform.py index 33ae5b533..45f9a132a 100644 --- a/onadata/apps/logger/models/xform.py +++ b/onadata/apps/logger/models/xform.py @@ -133,6 +133,14 @@ def data_dictionary(self): def has_instances_with_geopoints(self): return self.instances_with_geopoints + @property + def kpi_hook_service(self): + """ + Returns kpi hook service if it exists. XForm should have only one occurrence in any case. + :return: RestService + """ + return self.restservices.filter(name="kpi_hook").first() + def _set_id_string(self): matches = self.instance_id_regex.findall(self.xml) if len(matches) != 1: diff --git a/onadata/apps/restservice/__init__.py b/onadata/apps/restservice/__init__.py index f243e9446..8d809d6cf 100644 --- a/onadata/apps/restservice/__init__.py +++ b/onadata/apps/restservice/__init__.py @@ -1,2 +1,16 @@ -SERVICE_CHOICES = ((u'f2dhis2', u'f2dhis2'), (u'generic_json', u'JSON POST'), - (u'generic_xml', u'XML POST'), (u'bamboo', u'bamboo')) +SERVICE_F2DHIS2 = (u"f2dhis2", u"f2dhis2") +SERVICE_BAMBOO = (u"bamboo", u"bamboo") # Deprecated TODO to remove +SERVICE_GENERIC_XML = (u"generic_xml", u"XML POST") +SERVICE_GENERIC_JSON = (u"generic_json", u"JSON POST") +SERVICE_KPI_HOOK = (u"kpi_hook", u"KPI Hook POST") + +SERVICE_CHOICES = ( + SERVICE_F2DHIS2, + SERVICE_BAMBOO, + SERVICE_GENERIC_XML, + SERVICE_GENERIC_JSON, + SERVICE_KPI_HOOK +) + + +default_app_config = "onadata.apps.restservice.app.RestServiceConfig" \ No newline at end of file diff --git a/onadata/apps/restservice/app.py b/onadata/apps/restservice/app.py new file mode 100644 index 000000000..585caf282 --- /dev/null +++ b/onadata/apps/restservice/app.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- +from django.apps import AppConfig + + +class RestServiceConfig(AppConfig): + name = "onadata.apps.restservice" + verbose_name = "restservice" + + def ready(self): + # Register RestService signals + import onadata.apps.restservice.signals \ No newline at end of file diff --git a/onadata/apps/restservice/migrations/0002_add_related_name_with_delete_on_cascade.py b/onadata/apps/restservice/migrations/0002_add_related_name_with_delete_on_cascade.py new file mode 100644 index 000000000..214be7bbd --- /dev/null +++ b/onadata/apps/restservice/migrations/0002_add_related_name_with_delete_on_cascade.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('restservice', '0001_initial'), + ] + + operations = [ + migrations.AlterField( + model_name='restservice', + name='name', + field=models.CharField(max_length=50, choices=[('f2dhis2', 'f2dhis2'), ('generic_json', 'JSON POST'), ('generic_xml', 'XML POST'), ('bamboo', 'bamboo'), ('kpi_hook', 'KPI Hook POST')]), + ), + migrations.AlterField( + model_name='restservice', + name='xform', + field=models.ForeignKey(related_name='restservices', to='logger.XForm'), + ), + ] diff --git a/onadata/apps/restservice/models.py b/onadata/apps/restservice/models.py index 34cf3256e..a3aeda45e 100644 --- a/onadata/apps/restservice/models.py +++ b/onadata/apps/restservice/models.py @@ -12,7 +12,7 @@ class Meta: unique_together = ('service_url', 'xform', 'name') service_url = models.URLField(ugettext_lazy("Service URL")) - xform = models.ForeignKey(XForm) + xform = models.ForeignKey(XForm, related_name="restservices", on_delete=models.CASCADE) name = models.CharField(max_length=50, choices=SERVICE_CHOICES) def __unicode__(self): diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py new file mode 100644 index 000000000..590f6e6f8 --- /dev/null +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +import json +import requests + +from django.conf import settings +from onadata.apps.restservice.RestServiceInterface import RestServiceInterface + + +class ServiceDefinition(RestServiceInterface): + id = u"kpi_hook" + verbose_name = u"KPI Hook POST" + + def send(self, endpoint, parsed_instance): + post_data = json.dumps(parsed_instance.to_dict_for_mongo()) + headers = {"Content-Type": "application/json"} + # Build the url in the service to avoid saving hardcoded domain name in the DB + url = "{}{}".format( + settings.KPI_URL, + endpoint + ) + try: + response = requests.post(url, headers=headers, data=post_data) + response.raise_for_status() + except requests.exceptions.RequestException as e: + # TODO Save failure in ParseInstance or Instance for later retries + pass \ No newline at end of file diff --git a/onadata/apps/restservice/signals.py b/onadata/apps/restservice/signals.py new file mode 100644 index 000000000..0e4633fcf --- /dev/null +++ b/onadata/apps/restservice/signals.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +from django.db.models.signals import post_save +from django.dispatch import receiver + +from onadata.apps.restservice import SERVICE_KPI_HOOK +from onadata.apps.logger.models import XForm +from onadata.apps.restservice.models import RestService + + +@receiver(post_save, sender=XForm) +def save_kpi_hook_service(sender, instance, **kwargs): + """ + Creates/Deletes Kpi hook Rest service related to XForm instance + :param sender: XForm class + :param instance: XForm instance + :param kwargs: dict + """ + kpi_hook_service = instance.kpi_hook_service + if instance.has_kpi_hooks: + # Only register the service if it hasn't been created yet. + if kpi_hook_service is None: + kpi_hook_service = RestService( + service_url="/assets/{}/submissions/".format(instance.id_string), + xform=instance, + name=SERVICE_KPI_HOOK[0] + ) + kpi_hook_service.save() + elif kpi_hook_service is not None: + # Only delete the service if it already exists. + kpi_hook_service.delete() \ No newline at end of file From 9f83d34253e4059e15b8d35b4ff27c2bfe14dea4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 22 Jun 2018 16:08:26 -0400 Subject: [PATCH 04/32] Use KPI_INTERNAL_URL to make call between KC and KPI in Rest Service --- onadata/apps/restservice/services/kpi_hook.py | 2 +- onadata/settings/common.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py index 590f6e6f8..18377e3c1 100644 --- a/onadata/apps/restservice/services/kpi_hook.py +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -15,7 +15,7 @@ def send(self, endpoint, parsed_instance): headers = {"Content-Type": "application/json"} # Build the url in the service to avoid saving hardcoded domain name in the DB url = "{}{}".format( - settings.KPI_URL, + settings.KPI_INTERNAL_URL, endpoint ) try: diff --git a/onadata/settings/common.py b/onadata/settings/common.py index 0c6839fd3..88e2ac009 100644 --- a/onadata/settings/common.py +++ b/onadata/settings/common.py @@ -115,6 +115,7 @@ ENKETO_API_INSTANCE_IFRAME_URL = ENKETO_URL + ENKETO_API_ROOT + ENKETO_API_ENDPOINT_INSTANCE_IFRAME KPI_URL = os.environ.get('KPI_URL', False) +KPI_INTERNAL_URL = os.environ.get("KPI_INTERNAL_URL", KPI_URL) # specifically for site urls sent to enketo for form retrieval # `ENKETO_PROTOCOL` variable is overridden when internal domain name is used. From e10b8c00b010894e31142869b97a7ae1f20d183b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Tue, 3 Jul 2018 11:22:09 -0400 Subject: [PATCH 05/32] Updated pip requirements. Requests 2.4.1 > Request 2.19.1 --- requirements/base.pip | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements/base.pip b/requirements/base.pip index d7cfcc6d6..101d8f80c 100644 --- a/requirements/base.pip +++ b/requirements/base.pip @@ -38,8 +38,9 @@ raven==6.1.0 # new export code relies on pandas>=0.12.0 -requests==2.4.1 elaphe==0.5.6 +requests==2.19.1 + # formpack exports git+https://github.com/kobotoolbox/formpack@kobocat-requirement From b14da1880ebdaaf331c7fca84531674af73b6f23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Tue, 3 Jul 2018 11:24:05 -0400 Subject: [PATCH 06/32] Updated 'kpi_hook' service to post 'xml' and 'json' to kpi endpoint --- onadata/apps/restservice/services/kpi_hook.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py index 18377e3c1..16d8831a1 100644 --- a/onadata/apps/restservice/services/kpi_hook.py +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import logging import json import requests @@ -11,7 +12,12 @@ class ServiceDefinition(RestServiceInterface): verbose_name = u"KPI Hook POST" def send(self, endpoint, parsed_instance): - post_data = json.dumps(parsed_instance.to_dict_for_mongo()) + + post_data = { + "xml": parsed_instance.instance.xml, + "json": parsed_instance.to_dict(), + "uuid": parsed_instance.instance.uuid + } headers = {"Content-Type": "application/json"} # Build the url in the service to avoid saving hardcoded domain name in the DB url = "{}{}".format( @@ -19,8 +25,10 @@ def send(self, endpoint, parsed_instance): endpoint ) try: - response = requests.post(url, headers=headers, data=post_data) + response = requests.post(url, headers=headers, json=post_data) response.raise_for_status() except requests.exceptions.RequestException as e: + logger = logging.getLogger("console_logger") + logger.error("KPI Hook - ServiceDefinition.send - {}".format(str(e))) # TODO Save failure in ParseInstance or Instance for later retries pass \ No newline at end of file From 0ceccc7d9373ddc49afdaf647845911fbb53d5e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Tue, 3 Jul 2018 11:25:04 -0400 Subject: [PATCH 07/32] Added XML representation of an instance in API endpoint --- onadata/apps/api/viewsets/data_viewset.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/onadata/apps/api/viewsets/data_viewset.py b/onadata/apps/api/viewsets/data_viewset.py index 77bd538e0..8a1160249 100644 --- a/onadata/apps/api/viewsets/data_viewset.py +++ b/onadata/apps/api/viewsets/data_viewset.py @@ -561,6 +561,14 @@ def enketo(self, request, *args, **kwargs): return Response(data=data) + def retrieve(self, request, *args, **kwargs): + # XML rendering does not a serializer + if request.accepted_renderer.format == "xml": + instance = self.get_object() + return Response(instance.xml) + else: + super(DataViewSet, self).retrieve(request, *args, **kwargs) + def destroy(self, request, *args, **kwargs): self.object = self.get_object() From bf66b7709a9ed3d6d2d1a092478a9b7cd5c5df2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Tue, 3 Jul 2018 15:05:08 -0400 Subject: [PATCH 08/32] Added XML format in Instance detail endpoint --- onadata/apps/api/viewsets/data_viewset.py | 5 +++-- onadata/apps/api/viewsets/xform_viewset.py | 2 +- onadata/libs/renderers/renderers.py | 20 +++++++++++++++++++- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/onadata/apps/api/viewsets/data_viewset.py b/onadata/apps/api/viewsets/data_viewset.py index 8a1160249..57adb29d1 100644 --- a/onadata/apps/api/viewsets/data_viewset.py +++ b/onadata/apps/api/viewsets/data_viewset.py @@ -395,9 +395,10 @@ class DataViewSet(AnonymousUserPublicFormsMixin, ModelViewSet): renderers.CSVRenderer, renderers.CSVZIPRenderer, renderers.SAVZIPRenderer, - renderers.SurveyRenderer + renderers.RawXMLRenderer ] + content_negotiation_class = renderers.InstanceContentNegotiation filter_backends = (filters.AnonDjangoObjectPermissionFilter, filters.XFormOwnerFilter) permission_classes = (XFormPermissions,) @@ -567,7 +568,7 @@ def retrieve(self, request, *args, **kwargs): instance = self.get_object() return Response(instance.xml) else: - super(DataViewSet, self).retrieve(request, *args, **kwargs) + return super(DataViewSet, self).retrieve(request, *args, **kwargs) def destroy(self, request, *args, **kwargs): self.object = self.get_object() diff --git a/onadata/apps/api/viewsets/xform_viewset.py b/onadata/apps/api/viewsets/xform_viewset.py index d7437f26e..e152283bf 100644 --- a/onadata/apps/api/viewsets/xform_viewset.py +++ b/onadata/apps/api/viewsets/xform_viewset.py @@ -718,7 +718,7 @@ class XFormViewSet(AnonymousUserPublicFormsMixin, LabelsMixin, ModelViewSet): renderers.CSVRenderer, renderers.CSVZIPRenderer, renderers.SAVZIPRenderer, - renderers.SurveyRenderer + renderers.RawXMLRenderer ] queryset = XForm.objects.all() serializer_class = XFormSerializer diff --git a/onadata/libs/renderers/renderers.py b/onadata/libs/renderers/renderers.py index b1f7e0615..b02e6b05a 100644 --- a/onadata/libs/renderers/renderers.py +++ b/onadata/libs/renderers/renderers.py @@ -44,7 +44,7 @@ class SAVZIPRenderer(BaseRenderer): # TODO add KML, ZIP(attachments) support -class SurveyRenderer(BaseRenderer): +class RawXMLRenderer(BaseRenderer): media_type = 'application/xml' format = 'xml' charset = 'utf-8' @@ -158,3 +158,21 @@ def render(self, data, accepted_media_type=None, renderer_context=None): class StaticXMLRenderer(StaticHTMLRenderer): format = 'xml' media_type = 'text/xml' + + +class InstanceContentNegotiation(negotiation.DefaultContentNegotiation): + + def filter_renderers(self, renderers, format): + """ + Removes `rest_framework_xml.renderers.XMLRenderer` from the renderers list to + prioritize `RawXMLRenderer`. + Useful to display xml of Instance without any parsing. + + :param renderers: list + :param format: str + :return: list + """ + renderers = [renderer for renderer in renderers + if renderer.format == format and + isinstance(renderer, XMLRenderer) is False] + return renderers \ No newline at end of file From 6969e3e695d3c136f7fd01cbc5f50a0db956273a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 5 Jul 2018 11:07:11 -0400 Subject: [PATCH 09/32] Added instance_id in payload of kpi_hook service, renamed uuid to uid to be consistent with the kpi code --- onadata/apps/restservice/services/kpi_hook.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py index 16d8831a1..c4faaa59d 100644 --- a/onadata/apps/restservice/services/kpi_hook.py +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -16,7 +16,8 @@ def send(self, endpoint, parsed_instance): post_data = { "xml": parsed_instance.instance.xml, "json": parsed_instance.to_dict(), - "uuid": parsed_instance.instance.uuid + "uid": parsed_instance.instance.uuid, # Will be used to expose data in API + "id": parsed_instance.instance.id # Will be used internally by KPI to retry in case of failure } headers = {"Content-Type": "application/json"} # Build the url in the service to avoid saving hardcoded domain name in the DB From 576b7dd1c8c350425291d297569094aa2a8f92e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 6 Jul 2018 16:43:09 -0400 Subject: [PATCH 10/32] Changed json representation to match Mongo data --- onadata/apps/restservice/services/kpi_hook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py index c4faaa59d..6b05dc711 100644 --- a/onadata/apps/restservice/services/kpi_hook.py +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -15,7 +15,7 @@ def send(self, endpoint, parsed_instance): post_data = { "xml": parsed_instance.instance.xml, - "json": parsed_instance.to_dict(), + "json": parsed_instance.to_dict_for_mongo(), "uid": parsed_instance.instance.uuid, # Will be used to expose data in API "id": parsed_instance.instance.id # Will be used internally by KPI to retry in case of failure } From c5d97fbdc0830ca3c3efe53ad1ef9ced70b824a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Mon, 9 Jul 2018 17:48:16 -0400 Subject: [PATCH 11/32] Removed logger_instance.uid from kpi_hook payload --- onadata/apps/restservice/services/kpi_hook.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py index 6b05dc711..5324e29a3 100644 --- a/onadata/apps/restservice/services/kpi_hook.py +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -16,7 +16,6 @@ def send(self, endpoint, parsed_instance): post_data = { "xml": parsed_instance.instance.xml, "json": parsed_instance.to_dict_for_mongo(), - "uid": parsed_instance.instance.uuid, # Will be used to expose data in API "id": parsed_instance.instance.id # Will be used internally by KPI to retry in case of failure } headers = {"Content-Type": "application/json"} From fc4a6f69a9713cddddecaf4c6133afe34ff21ba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 8 Aug 2018 15:37:38 -0400 Subject: [PATCH 12/32] Moved rest_service process to a Celery task --- onadata/apps/logger/fields.py | 6 +--- ..._posted_to_kpi_field_to_logger_instance.py | 20 ++++++++++++ onadata/apps/logger/models/instance.py | 4 +++ onadata/apps/restservice/services/bamboo.py | 12 +++++-- onadata/apps/restservice/services/f2dhis2.py | 12 ++++--- .../apps/restservice/services/generic_json.py | 8 +++-- .../apps/restservice/services/generic_xml.py | 10 ++++-- onadata/apps/restservice/services/kpi_hook.py | 24 +++++++------- onadata/apps/restservice/tasks.py | 32 +++++++++++++++++++ onadata/apps/restservice/utils.py | 25 +++++++++------ onadata/apps/viewer/tasks.py | 2 +- onadata/settings/kc_environ.py | 3 ++ 12 files changed, 119 insertions(+), 39 deletions(-) create mode 100644 onadata/apps/logger/migrations/0009_add_posted_to_kpi_field_to_logger_instance.py create mode 100644 onadata/apps/restservice/tasks.py diff --git a/onadata/apps/logger/fields.py b/onadata/apps/logger/fields.py index 54bf181b9..9bb613906 100644 --- a/onadata/apps/logger/fields.py +++ b/onadata/apps/logger/fields.py @@ -2,7 +2,7 @@ from django.db import models -class LazyDefaultBooleanField(models.IntegerField): +class LazyDefaultBooleanField(models.PositiveSmallIntegerField): """ Allows specifying a default value for a new field without having to rewrite every row in the corresponding table when migrating the database. @@ -33,10 +33,6 @@ def __init__(self, *args, **kwargs): kwargs['default'] = None super(LazyDefaultBooleanField, self).__init__(*args, **kwargs) - def db_type(self, connection): - # No need to play with integer or biginteger. - return "smallint" - def _get_lazy_default(self): if callable(self.lazy_default): return self.lazy_default() diff --git a/onadata/apps/logger/migrations/0009_add_posted_to_kpi_field_to_logger_instance.py b/onadata/apps/logger/migrations/0009_add_posted_to_kpi_field_to_logger_instance.py new file mode 100644 index 000000000..13ba748b6 --- /dev/null +++ b/onadata/apps/logger/migrations/0009_add_posted_to_kpi_field_to_logger_instance.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models +import onadata.apps.logger.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('logger', '0008_add_instance_is_synced_with_mongo_and_xform_has_kpi_hooks'), + ] + + operations = [ + migrations.AddField( + model_name='instance', + name='posted_to_kpi', + field=onadata.apps.logger.fields.LazyDefaultBooleanField(default=False), + ), + ] diff --git a/onadata/apps/logger/models/instance.py b/onadata/apps/logger/models/instance.py index 5efdda72f..fc474aa36 100644 --- a/onadata/apps/logger/models/instance.py +++ b/onadata/apps/logger/models/instance.py @@ -142,6 +142,10 @@ class Instance(models.Model): # Update all records with correct value after the migration is done. is_synced_with_mongo = LazyDefaultBooleanField(default=False) + # If XForm.has_kpi_hooks` is True, this field should be True either. + # It tells whether the instance has been successfully sent to KPI. + posted_to_kpi = LazyDefaultBooleanField(default=False) + class Meta: app_label = 'logger' diff --git a/onadata/apps/restservice/services/bamboo.py b/onadata/apps/restservice/services/bamboo.py index 959eb932b..ebdd60f4e 100644 --- a/onadata/apps/restservice/services/bamboo.py +++ b/onadata/apps/restservice/services/bamboo.py @@ -3,17 +3,23 @@ from pybamboo.connection import Connection from onadata.apps.restservice.RestServiceInterface import RestServiceInterface +from onadata.apps.logger.models import XForm from onadata.libs.utils.bamboo import get_new_bamboo_dataset, get_bamboo_url class ServiceDefinition(RestServiceInterface): + """ + @deprecated. + This service should not be used anymore + """ + id = u'bamboo' verbose_name = u'bamboo POST' - def send(self, url, parsed_instance): + def send(self, url, data): - xform = parsed_instance.instance.xform - rows = [parsed_instance.to_dict_for_mongo()] + xform = XForm.objects.get(id=data.get("xform_id")) + rows = [data.get("json")] # prefix meta columns names for bamboo prefix = (u'%(id_string)s_%(id)s' diff --git a/onadata/apps/restservice/services/f2dhis2.py b/onadata/apps/restservice/services/f2dhis2.py index 32f81c235..07393cf70 100644 --- a/onadata/apps/restservice/services/f2dhis2.py +++ b/onadata/apps/restservice/services/f2dhis2.py @@ -5,12 +5,16 @@ class ServiceDefinition(RestServiceInterface): + """ + @deprecated. + This service should not be used anymore + """ + id = u'f2dhis2' verbose_name = u'Formhub to DHIS2' - def send(self, url, parsed_instance): - instance = parsed_instance.instance - info = {"id_string": instance.xform.id_string, "uuid": instance.uuid} + def send(self, url, data): + info = {"id_string": data.get("xform_id_string"), "uuid": data.get("instance_uuid")} valid_url = url % info http = httplib2.Http() resp, content = http.request(valid_url, 'GET') @@ -19,4 +23,4 @@ def send_ziggy(self, url, ziggy_instance, uuid): info = {"id_string": ziggy_instance.xform.id_string, "uuid": uuid} valid_url = url % info response = requests.get(valid_url) - return response + return response \ No newline at end of file diff --git a/onadata/apps/restservice/services/generic_json.py b/onadata/apps/restservice/services/generic_json.py index f31ce553c..a89c0702b 100644 --- a/onadata/apps/restservice/services/generic_json.py +++ b/onadata/apps/restservice/services/generic_json.py @@ -6,10 +6,14 @@ class ServiceDefinition(RestServiceInterface): + """ + @deprecated. + This service should not be used anymore. + """ id = u'json' verbose_name = u'JSON POST' - def send(self, url, parsed_instance): - post_data = json.dumps(parsed_instance.to_dict_for_mongo()) + def send(self, url, data): + post_data = json.dumps(data.get("json")) headers = {"Content-Type": "application/json"} requests.post(url, headers=headers, data=post_data) diff --git a/onadata/apps/restservice/services/generic_xml.py b/onadata/apps/restservice/services/generic_xml.py index 81ed6d0f1..f328c8aae 100644 --- a/onadata/apps/restservice/services/generic_xml.py +++ b/onadata/apps/restservice/services/generic_xml.py @@ -4,12 +4,16 @@ class ServiceDefinition(RestServiceInterface): + """ + @deprecated. + This service should not be used anymore. + """ + id = u'xml' verbose_name = u'XML POST' - def send(self, url, parsed_instance): - instance = parsed_instance.instance + def send(self, url, data): headers = {"Content-Type": "application/xml"} http = httplib2.Http() resp, content = http.request( - url, method="POST", body=instance.xml, headers=headers) + url, method="POST", body=data.get("xml"), headers=headers) diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py index 5324e29a3..d482c53f0 100644 --- a/onadata/apps/restservice/services/kpi_hook.py +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -5,18 +5,19 @@ from django.conf import settings from onadata.apps.restservice.RestServiceInterface import RestServiceInterface +from onadata.apps.logger.models import Instance class ServiceDefinition(RestServiceInterface): id = u"kpi_hook" verbose_name = u"KPI Hook POST" - def send(self, endpoint, parsed_instance): + def send(self, endpoint, data): post_data = { - "xml": parsed_instance.instance.xml, - "json": parsed_instance.to_dict_for_mongo(), - "id": parsed_instance.instance.id # Will be used internally by KPI to retry in case of failure + "xml": data.get("xml"), + "json": data.get("json"), + "id": data.get("instance_id") # Will be used internally by KPI to retry in case of failure } headers = {"Content-Type": "application/json"} # Build the url in the service to avoid saving hardcoded domain name in the DB @@ -24,11 +25,10 @@ def send(self, endpoint, parsed_instance): settings.KPI_INTERNAL_URL, endpoint ) - try: - response = requests.post(url, headers=headers, json=post_data) - response.raise_for_status() - except requests.exceptions.RequestException as e: - logger = logging.getLogger("console_logger") - logger.error("KPI Hook - ServiceDefinition.send - {}".format(str(e))) - # TODO Save failure in ParseInstance or Instance for later retries - pass \ No newline at end of file + response = requests.post(url, headers=headers, json=post_data) + response.raise_for_status() + + # Save successful + instance = Instance.objects.get(id=data.get("instance_id")) + instance.posted_to_kpi = True + instance.save() \ No newline at end of file diff --git a/onadata/apps/restservice/tasks.py b/onadata/apps/restservice/tasks.py new file mode 100644 index 000000000..ffd1afe96 --- /dev/null +++ b/onadata/apps/restservice/tasks.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +import logging + +from celery import shared_task +from django.conf import settings + + +@shared_task(bind=True) +def service_definition_task(self, rest_service, data): + """ + Tries to send data to the endpoint of the hook + It retries 3 times maximum. + - after 2 minutes, + - after 20 minutes, + - after 200 minutes + + :param self: Celery.Task. + :param rest_service: RestService model. + :param data: dict. + """ + try: + service = rest_service.get_service_definition()() + service.send(rest_service.service_url, data) + except Exception as e: + logger = logging.getLogger("console_logger") + logger.error("service_definition_task - {}".format(str(e)), exc_info=True) + # Countdown is in seconds + countdown = 120 * (10 ** self.request.retries) + # Max retries is 3 by default. + raise self.retry(countdown=countdown, max_retries=settings.REST_SERVICE_MAX_RETRIES) + + return True \ No newline at end of file diff --git a/onadata/apps/restservice/utils.py b/onadata/apps/restservice/utils.py index 5f5b1c7aa..cc5b6e8b4 100644 --- a/onadata/apps/restservice/utils.py +++ b/onadata/apps/restservice/utils.py @@ -1,19 +1,26 @@ from onadata.apps.restservice.models import RestService +from onadata.apps.restservice.tasks import service_definition_task def call_service(parsed_instance): # lookup service instance = parsed_instance.instance - services = RestService.objects.filter(xform=instance.xform) + rest_services = RestService.objects.filter(xform=instance.xform) # call service send with url and data parameters - for sv in services: - # TODO: Queue service - try: - service = sv.get_service_definition()() - service.send(sv.service_url, parsed_instance) - except: - # TODO: Handle gracefully | requeue/resend - pass + for rest_service in rest_services: + # Celery can't pickle ParsedInstance object, + # let's use build a serializable object instead + # We don't really need `xform_id`, `xform_id_string`, `instance_uuid` + # We use them only for retro compatibility with all services (even if they are deprecated) + data = { + "xform_id": instance.xform.id, + "xform_id_string": instance.xform.id_string, + "instance_uuid": instance.uuid, + "instance_id": instance.id, + "xml": parsed_instance.instance.xml, + "json": parsed_instance.to_dict_for_mongo() + } + service_definition_task.delay(rest_service, data) def call_ziggy_services(ziggy_instance, uuid): diff --git a/onadata/apps/viewer/tasks.py b/onadata/apps/viewer/tasks.py index f569e7c1a..51e989bab 100644 --- a/onadata/apps/viewer/tasks.py +++ b/onadata/apps/viewer/tasks.py @@ -423,4 +423,4 @@ def log_stuck_exports_and_mark_failed(): ) # Export.save() is a busybody; bypass it with update() stuck_exports.filter(pk=stuck_export.pk).update( - internal_status=Export.FAILED) + internal_status=Export.FAILED) \ No newline at end of file diff --git a/onadata/settings/kc_environ.py b/onadata/settings/kc_environ.py index c3ae5c28c..7475d0b4b 100644 --- a/onadata/settings/kc_environ.py +++ b/onadata/settings/kc_environ.py @@ -259,3 +259,6 @@ } } ###### END ISSUE 242 FIX ###### + +# Number of times Celery retries to send data to external rest service +REST_SERVICE_MAX_RETRIES = 3 \ No newline at end of file From 4962943bf08765fcce8ed03a5831dd146d5ebcb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 8 Aug 2018 18:35:26 -0400 Subject: [PATCH 13/32] Updated remongo & sync mongo command to handle is_synced_with_mongo field, added command to update is_synced_with_mongo according to Mongo records --- .../commands/update_is_sync_with_mongo.py | 51 +++++++++++++++++++ .../viewer/management/commands/remongo.py | 2 + onadata/apps/viewer/models/parsed_instance.py | 21 ++++---- onadata/libs/utils/logger_tools.py | 3 ++ 4 files changed, 65 insertions(+), 12 deletions(-) create mode 100644 onadata/apps/logger/management/commands/update_is_sync_with_mongo.py diff --git a/onadata/apps/logger/management/commands/update_is_sync_with_mongo.py b/onadata/apps/logger/management/commands/update_is_sync_with_mongo.py new file mode 100644 index 000000000..7c38bfd66 --- /dev/null +++ b/onadata/apps/logger/management/commands/update_is_sync_with_mongo.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# vim: ai ts=4 sts=4 et sw=4 coding=utf-8 +from time import sleep + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError +from django.utils.translation import ugettext as _, ugettext_lazy +from optparse import make_option + +from onadata.apps.logger.models.instance import Instance + + +class Command(BaseCommand): + + help = ugettext_lazy("Updates is_synced_with_mongo property of Instance model") + option_list = BaseCommand.option_list + ( + make_option( + '--batchsize', + type='int', + default=100, + help=ugettext_lazy("Number of records to process per query")),) + + + def handle(self, *args, **kwargs): + batchsize = kwargs.get("batchsize", 100) + xform_instances = settings.MONGO_DB.instances + stop = False + offset = 0 + while stop is not True: + limit = offset + batchsize + instances_ids = Instance.objects.values_list("id", flat=True)[offset:limit] + if instances_ids: + instances_ids = [int(instance_id) for instance_id in instances_ids] + query = {"_id": {"$in": instances_ids}} + cursor = xform_instances.find(query, { "_id": 1 }) + mongo_ids = list(record.get("_id") for record in cursor) + not_synced_ids = set(instances_ids).difference(mongo_ids) + + self.stdout.write(_("Updating instances from #{} to #{}\n").format( + instances_ids[0], + instances_ids[-1])) + + if not_synced_ids: + Instance.objects.filter(id__in=not_synced_ids).update(is_synced_with_mongo=False) + + if mongo_ids: + Instance.objects.filter(id__in=mongo_ids).update(is_synced_with_mongo=True) + + offset += batchsize + else: + stop = True \ No newline at end of file diff --git a/onadata/apps/viewer/management/commands/remongo.py b/onadata/apps/viewer/management/commands/remongo.py index 31f136d3f..91b19c304 100644 --- a/onadata/apps/viewer/management/commands/remongo.py +++ b/onadata/apps/viewer/management/commands/remongo.py @@ -49,6 +49,8 @@ def handle(self, *args, **kwargs): for pi in queryset.iterator(): if pi.update_mongo(async=False): i += 1 + pi.instance.is_synced_with_mongo = True + pi.instance.save() else: print("\033[91m[ERROR] Could not parse instance {}\033[0m".format(pi.instance.uuid)) diff --git a/onadata/apps/viewer/models/parsed_instance.py b/onadata/apps/viewer/models/parsed_instance.py index 9caf48f53..fc9cf2319 100644 --- a/onadata/apps/viewer/models/parsed_instance.py +++ b/onadata/apps/viewer/models/parsed_instance.py @@ -322,12 +322,19 @@ def _set_geopoint(self): def save(self, async=False, *args, **kwargs): # start/end_time obsolete: originally used to approximate for # instanceID, before instanceIDs were implemented + created = self.pk is None self.start_time = None self.end_time = None self._set_geopoint() super(ParsedInstance, self).save(*args, **kwargs) - # insert into Mongo - return self.update_mongo(async) + + # insert into Mongo. + # Signal has been removed because of a race condition. + # Rest Services were called before data was saved in DB. + success = self.update_mongo(async) + if success and created: + call_service(self) + return success def add_note(self, note): note = Note(instance=self.instance, note=note) @@ -370,13 +377,3 @@ def _remove_from_mongo(sender, **kwargs): xform_instances.remove(instance_id) pre_delete.connect(_remove_from_mongo, sender=ParsedInstance) - - -def rest_service_form_submission(sender, **kwargs): - parsed_instance = kwargs.get('instance') - created = kwargs.get('created') - if created: - call_service(parsed_instance) - - -post_save.connect(rest_service_form_submission, sender=ParsedInstance) diff --git a/onadata/libs/utils/logger_tools.py b/onadata/libs/utils/logger_tools.py index 6e5d4fed9..fe4be4b13 100644 --- a/onadata/libs/utils/logger_tools.py +++ b/onadata/libs/utils/logger_tools.py @@ -565,6 +565,7 @@ def inject_instanceid(xml_str, uuid): def update_mongo_for_xform(xform, only_update_missing=True): + instance_ids = set( [i.id for i in Instance.objects.only('id').filter(xform=xform)]) sys.stdout.write("Total no of instances: %d\n" % len(instance_ids)) @@ -596,6 +597,8 @@ def update_mongo_for_xform(xform, only_update_missing=True): print("\033[91m[ERROR] - Instance #{}/uuid:{} - Could not save the parsed instance\033[0m".format( id, instance.uuid)) else: + instance.is_synced_with_mongo = True + instance.save() done += 1 # if 1000 records are done, flush mongo From a3eb2d2e5ff5f276fc12afdc642ebe325266962f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 9 Aug 2018 18:08:52 -0400 Subject: [PATCH 14/32] Changed Instance.save() to update only with need fields --- onadata/apps/logger/models/instance.py | 3 +-- onadata/apps/restservice/services/kpi_hook.py | 2 +- onadata/apps/viewer/management/commands/remongo.py | 2 +- onadata/apps/viewer/models/parsed_instance.py | 2 +- onadata/libs/utils/logger_tools.py | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/onadata/apps/logger/models/instance.py b/onadata/apps/logger/models/instance.py index fc474aa36..659fb4c7e 100644 --- a/onadata/apps/logger/models/instance.py +++ b/onadata/apps/logger/models/instance.py @@ -138,8 +138,7 @@ class Instance(models.Model): validation_status = JSONField(null=True, default=None) - # TODO All old records will return False even they've been saved successfully in Mongo - # Update all records with correct value after the migration is done. + # TODO Don't forget to update all records with command `update_is_sync_with_mongo`. is_synced_with_mongo = LazyDefaultBooleanField(default=False) # If XForm.has_kpi_hooks` is True, this field should be True either. diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py index d482c53f0..668301bdc 100644 --- a/onadata/apps/restservice/services/kpi_hook.py +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -31,4 +31,4 @@ def send(self, endpoint, data): # Save successful instance = Instance.objects.get(id=data.get("instance_id")) instance.posted_to_kpi = True - instance.save() \ No newline at end of file + self.instance.save(update_fields=["posted_to_kpi"]) \ No newline at end of file diff --git a/onadata/apps/viewer/management/commands/remongo.py b/onadata/apps/viewer/management/commands/remongo.py index 91b19c304..af60fa2b6 100644 --- a/onadata/apps/viewer/management/commands/remongo.py +++ b/onadata/apps/viewer/management/commands/remongo.py @@ -50,7 +50,7 @@ def handle(self, *args, **kwargs): if pi.update_mongo(async=False): i += 1 pi.instance.is_synced_with_mongo = True - pi.instance.save() + pi.instance.save(update_fields=["is_synced_with_mongo"]) else: print("\033[91m[ERROR] Could not parse instance {}\033[0m".format(pi.instance.uuid)) diff --git a/onadata/apps/viewer/models/parsed_instance.py b/onadata/apps/viewer/models/parsed_instance.py index fc9cf2319..465b1524d 100644 --- a/onadata/apps/viewer/models/parsed_instance.py +++ b/onadata/apps/viewer/models/parsed_instance.py @@ -267,7 +267,7 @@ def update_mongo(self, async=True): # current_value (`self.instance.is_sync_with_mongo`) if success != self.instance.is_synced_with_mongo: self.instance.is_synced_with_mongo = success - self.instance.save() + self.instance.save(update_fields=["is_synced_with_mongo"]) return True diff --git a/onadata/libs/utils/logger_tools.py b/onadata/libs/utils/logger_tools.py index fe4be4b13..4baf6e8f7 100644 --- a/onadata/libs/utils/logger_tools.py +++ b/onadata/libs/utils/logger_tools.py @@ -598,7 +598,7 @@ def update_mongo_for_xform(xform, only_update_missing=True): id, instance.uuid)) else: instance.is_synced_with_mongo = True - instance.save() + instance.save(update_fields=["is_synced_with_mongo"]) done += 1 # if 1000 records are done, flush mongo From 5194de41111a46f6733823db064d4d1b33e50858 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 10 Aug 2018 17:46:32 -0400 Subject: [PATCH 15/32] Removed useless and redundant Instance.save(), fixed sync_mongo --all for inactive XForm --- onadata/apps/logger/models/instance.py | 5 +---- onadata/apps/viewer/management/commands/remongo.py | 2 -- onadata/apps/viewer/models/parsed_instance.py | 6 +++--- onadata/libs/utils/logger_tools.py | 2 -- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/onadata/apps/logger/models/instance.py b/onadata/apps/logger/models/instance.py index 659fb4c7e..b6bc3b903 100644 --- a/onadata/apps/logger/models/instance.py +++ b/onadata/apps/logger/models/instance.py @@ -363,10 +363,7 @@ def point(self): return gc[0] def save(self, *args, **kwargs): - force = kwargs.get('force') - - if force: - del kwargs['force'] + force = kwargs.pop("force", False) self._check_active(force) diff --git a/onadata/apps/viewer/management/commands/remongo.py b/onadata/apps/viewer/management/commands/remongo.py index af60fa2b6..31f136d3f 100644 --- a/onadata/apps/viewer/management/commands/remongo.py +++ b/onadata/apps/viewer/management/commands/remongo.py @@ -49,8 +49,6 @@ def handle(self, *args, **kwargs): for pi in queryset.iterator(): if pi.update_mongo(async=False): i += 1 - pi.instance.is_synced_with_mongo = True - pi.instance.save(update_fields=["is_synced_with_mongo"]) else: print("\033[91m[ERROR] Could not parse instance {}\033[0m".format(pi.instance.uuid)) diff --git a/onadata/apps/viewer/models/parsed_instance.py b/onadata/apps/viewer/models/parsed_instance.py index 465b1524d..7316f3fbe 100644 --- a/onadata/apps/viewer/models/parsed_instance.py +++ b/onadata/apps/viewer/models/parsed_instance.py @@ -266,9 +266,9 @@ def update_mongo(self, async=True): # Only update self.instance is `success` is different from # current_value (`self.instance.is_sync_with_mongo`) if success != self.instance.is_synced_with_mongo: - self.instance.is_synced_with_mongo = success - self.instance.save(update_fields=["is_synced_with_mongo"]) - + self.instance.is_synced_with_mongo = success + self.instance.save(update_fields=["is_synced_with_mongo"], force=True) + return True @staticmethod diff --git a/onadata/libs/utils/logger_tools.py b/onadata/libs/utils/logger_tools.py index 4baf6e8f7..8db53f22f 100644 --- a/onadata/libs/utils/logger_tools.py +++ b/onadata/libs/utils/logger_tools.py @@ -597,8 +597,6 @@ def update_mongo_for_xform(xform, only_update_missing=True): print("\033[91m[ERROR] - Instance #{}/uuid:{} - Could not save the parsed instance\033[0m".format( id, instance.uuid)) else: - instance.is_synced_with_mongo = True - instance.save(update_fields=["is_synced_with_mongo"]) done += 1 # if 1000 records are done, flush mongo From a14f3886aa74dd26b21e834d41c24c6551c3008e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Tue, 21 Aug 2018 18:30:30 -0400 Subject: [PATCH 16/32] Send only UUID to kpi hook service --- onadata/apps/restservice/services/kpi_hook.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py index 668301bdc..f0c375e13 100644 --- a/onadata/apps/restservice/services/kpi_hook.py +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -15,9 +15,7 @@ class ServiceDefinition(RestServiceInterface): def send(self, endpoint, data): post_data = { - "xml": data.get("xml"), - "json": data.get("json"), - "id": data.get("instance_id") # Will be used internally by KPI to retry in case of failure + "uuid": data.get("instance_uuid") # Will be used internally by KPI to retry in case of failure } headers = {"Content-Type": "application/json"} # Build the url in the service to avoid saving hardcoded domain name in the DB @@ -29,6 +27,6 @@ def send(self, endpoint, data): response.raise_for_status() # Save successful - instance = Instance.objects.get(id=data.get("instance_id")) + instance = Instance.objects.get(uuid=data.get("instance_uuid")) instance.posted_to_kpi = True self.instance.save(update_fields=["posted_to_kpi"]) \ No newline at end of file From 5f67122df99ccfed5a2948ab847d90ef9829891b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 30 Aug 2018 17:23:39 -0400 Subject: [PATCH 17/32] Bugfix when saving kpi_hook result --- onadata/apps/restservice/services/kpi_hook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py index f0c375e13..0a976cfae 100644 --- a/onadata/apps/restservice/services/kpi_hook.py +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -29,4 +29,4 @@ def send(self, endpoint, data): # Save successful instance = Instance.objects.get(uuid=data.get("instance_uuid")) instance.posted_to_kpi = True - self.instance.save(update_fields=["posted_to_kpi"]) \ No newline at end of file + instance.save(update_fields=["posted_to_kpi"]) \ No newline at end of file From 961561712d763643e028d38fe5124cf35334e54b Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 20 Sep 2018 17:23:31 -0400 Subject: [PATCH 18/32] Refactor submission counting without locking --- onadata/apps/logger/models/instance.py | 30 ++++++++++++-------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/onadata/apps/logger/models/instance.py b/onadata/apps/logger/models/instance.py index 807ab1db3..1854bcba6 100644 --- a/onadata/apps/logger/models/instance.py +++ b/onadata/apps/logger/models/instance.py @@ -3,6 +3,7 @@ import reversion +from django.db.models import F from django.db import transaction from django.contrib.gis.db import models from django.db.models.signals import post_save @@ -64,25 +65,22 @@ def update_xform_submission_count(sender, instance, created, **kwargs): if not created: return with transaction.atomic(): - xform = XForm.objects.select_for_update().only( - 'user_id', 'num_of_submissions' - ).get(pk=instance.xform_id) - xform.num_of_submissions += 1 - xform.last_submission_time = instance.date_created + xform = XForm.objects.only('user_id').get(pk=instance.xform_id) + # Update with `F` expression instead of `select_for_update` to avoid + # locks, which were mysteriously piling up during periods of high + # traffic + XForm.objects.filter(pk=instance.xform_id).update( + num_of_submissions=F('num_of_submissions') + 1, + last_submission_time=instance.date_created, + ) # Hack to avoid circular imports UserProfile = User.profile.related.related_model - profile, created = UserProfile.objects.select_for_update().only( - 'num_of_submissions' - ).get_or_create(user_id=xform.user_id) - profile.num_of_submissions += 1 - # Don't call `XForm.save()` since it reads a whole bunch of other - # attributes and makes our `only()` call less than worthless - XForm.objects.filter(pk=xform.pk).update( - num_of_submissions=xform.num_of_submissions, - last_submission_time=xform.last_submission_time + profile, created = UserProfile.objects.only('pk').get_or_create( + user_id=xform.user_id + ) + UserProfile.objects.filter(pk=profile.pk).update( + num_of_submissions=F('num_of_submissions') + 1, ) - # `UserProfile.save()` is well-mannered - profile.save(update_fields=['num_of_submissions']) def update_xform_submission_count_delete(sender, instance, **kwargs): From f4fb829dce1b1a1d4c5fccb43a4d376aa737467c Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Mon, 24 Sep 2018 12:49:03 -0400 Subject: [PATCH 19/32] Remove SPSS label export feature which is now provided by formpack via KPI --- onadata/apps/logger/views.py | 32 -------------------------------- onadata/apps/main/urls.py | 2 -- 2 files changed, 34 deletions(-) diff --git a/onadata/apps/logger/views.py b/onadata/apps/logger/views.py index 112fd87f1..778c21321 100644 --- a/onadata/apps/logger/views.py +++ b/onadata/apps/logger/views.py @@ -33,7 +33,6 @@ from django.views.decorators.csrf import csrf_exempt from django_digest import HttpDigestAuthenticator from pyxform import Survey -from pyxform.spss import survey_to_spss_label_zip from wsgiref.util import FileWrapper from onadata.apps.main.models import UserProfile, MetaData @@ -443,37 +442,6 @@ def download_jsonform(request, username, id_string): return response -def download_spss_labels(request, username, form_id_string): - xform = get_object_or_404(XForm, - user__username__iexact=username, - id_string__exact=form_id_string) - owner = User.objects.get(username__iexact=username) - helper_auth_helper(request) - - if not has_permission(xform, owner, request, xform.shared): - return HttpResponseForbidden('Not shared.') - - try: - xlsform_io= xform.to_xlsform() - if not xlsform_io: - messages.add_message(request, messages.WARNING, - _(u'No XLS file for your form ' - u'%(id)s') - % {'id': form_id_string}) - return HttpResponseRedirect("/%s" % username) - except: - return HttpResponseServerError('Error retrieving XLSForm.') - - survey= Survey.from_xls(filelike_obj=xlsform_io) - zip_filename= '{}_spss_labels.zip'.format(xform.id_string) - zip_io= survey_to_spss_label_zip(survey, xform.id_string) - - response = StreamingHttpResponse(FileWrapper(zip_io), - content_type='application/zip; charset=utf-8') - response['Content-Disposition'] = 'attachment; filename={}'.format(zip_filename) - return response - - @is_owner @require_POST def delete_xform(request, username, id_string): diff --git a/onadata/apps/main/urls.py b/onadata/apps/main/urls.py index 4b06b6078..28fd615ee 100644 --- a/onadata/apps/main/urls.py +++ b/onadata/apps/main/urls.py @@ -186,8 +186,6 @@ url(r"^(?P\w+)/exports/(?P[^/]+)/(?P\w+)" "/(?P[^/]+)$", 'onadata.apps.viewer.views.export_download'), - url(r'^(?P\w+)/forms/(?P[^/]+)/spss_labels\.zip$', - 'onadata.apps.logger.views.download_spss_labels', name='download_spss_labels'), url(r'^(?P\w+)/exports/', include('onadata.apps.export.urls')), url(r'^(?P\w+)/reports/', include('onadata.apps.survey_report.urls')), From f439a21d6524e6d14e9543856b2d3ac3e1014b4f Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Mon, 24 Sep 2018 12:49:42 -0400 Subject: [PATCH 20/32] Upgrade pyxform to 0.11.5; closes #480 --- onadata/apps/api/__init__.py | 54 ------------------------------------ requirements/base.pip | 4 +-- 2 files changed, 1 insertion(+), 57 deletions(-) diff --git a/onadata/apps/api/__init__.py b/onadata/apps/api/__init__.py index 47057b871..e69de29bb 100644 --- a/onadata/apps/api/__init__.py +++ b/onadata/apps/api/__init__.py @@ -1,54 +0,0 @@ - -import re - -import pyxform.odk_validate - - -############################################################################# -# WARNING -# This is a monkey patch to fix a bug in odk and should be removed as soon as -# the fix is upstream -############################################################################# - -def _cleanup_errors(error_message): - - # this is the same code as the original function - def get_last_item(xpathStr): - l = xpathStr.split("/") - return l[len(l) - 1] - - def replace_function(match): - strmatch = match.group() - if strmatch.startswith("/html/body") \ - or strmatch.startswith("/root/item") \ - or strmatch.startswith("/html/head/model/bind") \ - or strmatch.endswith("/item/value"): - return strmatch - return "${%s}" % get_last_item(match.group()) - pattern = "(/[a-z0-9\-_]+(?:/[a-z0-9\-_]+)+)" - error_message = re.compile(pattern, flags=re.I).sub(replace_function, - error_message) - k = [] - lastline = '' - for line in error_message.splitlines(): - has_java_filename = line.find('.java:') is not -1 - is_a_java_method = line.find('\tat') is not -1 - is_duplicate = (line == lastline) - lastline = line - if not has_java_filename and not is_a_java_method and not is_duplicate: - if line.startswith('java.lang.RuntimeException: '): - line = line.replace('java.lang.RuntimeException: ', '') - if line.startswith('org.javarosa.xpath.XPathUnhandledException: '): - line = line.replace('org.javarosa.xpath.XPathUnhandledException: ', '') - if line.startswith('java.lang.NullPointerException'): - continue - k.append(line) - - # original value causing UnicodeDecodeError - #return u'\n'.join(k) - - # Fix: - return '\n'.join(k).decode('ascii', errors="replace") - - -pyxform.odk_validate._cleanup_errors = _cleanup_errors diff --git a/requirements/base.pip b/requirements/base.pip index d7cfcc6d6..9d1d06ce5 100644 --- a/requirements/base.pip +++ b/requirements/base.pip @@ -18,9 +18,7 @@ poster==0.8.1 psycopg2==2.5.4 pymongo==2.7.2 lxml==3.4.0 -#-e git+https://github.com/onaio/pyxform.git@onaio#egg=pyxform -# kobo fork supports csvs with utf, character escaping, etc. --e git+https://github.com/kobotoolbox/pyxform.git@2.017.36#egg=pyxform +pyxform==0.11.5 django-reversion==2.0.8 xlrd==0.9.3 xlwt==0.7.5 From 0c0f55db14a927412c1094cfc8f6e55195c3992b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 28 Sep 2018 11:33:06 -0400 Subject: [PATCH 21/32] Changed Instance.save() to Instance.objects.update() to gain performance --- onadata/apps/restservice/services/kpi_hook.py | 4 +--- onadata/apps/viewer/models/parsed_instance.py | 5 +++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py index 0a976cfae..43aed22ad 100644 --- a/onadata/apps/restservice/services/kpi_hook.py +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -27,6 +27,4 @@ def send(self, endpoint, data): response.raise_for_status() # Save successful - instance = Instance.objects.get(uuid=data.get("instance_uuid")) - instance.posted_to_kpi = True - instance.save(update_fields=["posted_to_kpi"]) \ No newline at end of file + Instance.objects.filter(pk=data.get("instance_id")).update(posted_to_kpi=True) \ No newline at end of file diff --git a/onadata/apps/viewer/models/parsed_instance.py b/onadata/apps/viewer/models/parsed_instance.py index 7316f3fbe..52cd4e700 100644 --- a/onadata/apps/viewer/models/parsed_instance.py +++ b/onadata/apps/viewer/models/parsed_instance.py @@ -266,8 +266,9 @@ def update_mongo(self, async=True): # Only update self.instance is `success` is different from # current_value (`self.instance.is_sync_with_mongo`) if success != self.instance.is_synced_with_mongo: - self.instance.is_synced_with_mongo = success - self.instance.save(update_fields=["is_synced_with_mongo"], force=True) + # Skip the labor-intensive stuff in Instance.save() to gain performance + # Use .update() instead of .save() + Instance.objects.filter(pk=self.instance.id).update(is_synced_with_mongo=success) return True From 4038ebdf4f1aa838c5c5d6fedbc32c0dc5f10b29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 28 Sep 2018 12:27:12 -0400 Subject: [PATCH 22/32] Send instance_id instead of instance_uuid to KPI --- onadata/apps/restservice/services/kpi_hook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onadata/apps/restservice/services/kpi_hook.py b/onadata/apps/restservice/services/kpi_hook.py index 43aed22ad..aa4cbffe8 100644 --- a/onadata/apps/restservice/services/kpi_hook.py +++ b/onadata/apps/restservice/services/kpi_hook.py @@ -15,7 +15,7 @@ class ServiceDefinition(RestServiceInterface): def send(self, endpoint, data): post_data = { - "uuid": data.get("instance_uuid") # Will be used internally by KPI to retry in case of failure + "instance_id": data.get("instance_id") # Will be used internally by KPI to fetch data with KoboCatBackend } headers = {"Content-Type": "application/json"} # Build the url in the service to avoid saving hardcoded domain name in the DB From f0dcf514dffc577867f82e267311e2459dd56f32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Mon, 1 Oct 2018 09:01:37 -0400 Subject: [PATCH 23/32] update_is_sync_with_mongo - Order queryset by id --- .../logger/management/commands/update_is_sync_with_mongo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onadata/apps/logger/management/commands/update_is_sync_with_mongo.py b/onadata/apps/logger/management/commands/update_is_sync_with_mongo.py index 7c38bfd66..bd8b5abda 100644 --- a/onadata/apps/logger/management/commands/update_is_sync_with_mongo.py +++ b/onadata/apps/logger/management/commands/update_is_sync_with_mongo.py @@ -28,7 +28,7 @@ def handle(self, *args, **kwargs): offset = 0 while stop is not True: limit = offset + batchsize - instances_ids = Instance.objects.values_list("id", flat=True)[offset:limit] + instances_ids = Instance.objects.values_list("id", flat=True).order_by("id")[offset:limit] if instances_ids: instances_ids = [int(instance_id) for instance_id in instances_ids] query = {"_id": {"$in": instances_ids}} From 40c67f219778065d24f405b28de790179e1fc4b2 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Wed, 3 Oct 2018 22:36:23 -0400 Subject: [PATCH 24/32] Disable automatic export creation in `export_list` Exports can still be generated with the "New Export" button. Closes #484 --- onadata/apps/viewer/views.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/onadata/apps/viewer/views.py b/onadata/apps/viewer/views.py index 6e19cf311..5e3ccfdf5 100644 --- a/onadata/apps/viewer/views.py +++ b/onadata/apps/viewer/views.py @@ -405,15 +405,6 @@ def export_list(request, username, id_string, export_type): 'token': export_token, } - if should_create_new_export(xform, export_type): - try: - create_async_export( - xform, export_type, query=None, force_xlsx=True, - options=options) - except Export.ExportTypeError: - return HttpResponseBadRequest( - _("%s is not a valid export type" % export_type)) - metadata = MetaData.objects.filter(xform=xform, data_type="external_export")\ .values('id', 'data_value') From 877e155a142c59cf80a286eb8ad8ea0fab772e85 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 25 Oct 2018 03:16:56 -0400 Subject: [PATCH 25/32] Just get the XForm: don't bother counting first! --- onadata/libs/utils/logger_tools.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/onadata/libs/utils/logger_tools.py b/onadata/libs/utils/logger_tools.py index 8db53f22f..99dbf287c 100644 --- a/onadata/libs/utils/logger_tools.py +++ b/onadata/libs/utils/logger_tools.py @@ -117,9 +117,11 @@ def get_xform_from_submission(xml, username, uuid=None): if uuid: # try find the form by its uuid which is the ideal condition - if XForm.objects.filter(uuid=uuid).count() > 0: + try: xform = XForm.objects.get(uuid=uuid) - + except XForm.DoesNotExist: + pass + else: return xform id_string = get_id_string_from_xml_str(xml) From 8370d27275c94bffa3d5e37f075df7b50e77ada1 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Thu, 25 Oct 2018 03:17:53 -0400 Subject: [PATCH 26/32] Do not lock any rows until attachments are saved Try not to do any locking until as close as possible to the end of the transaction. Closes #490 --- onadata/apps/logger/models/instance.py | 3 ++ onadata/libs/utils/logger_tools.py | 54 +++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/onadata/apps/logger/models/instance.py b/onadata/apps/logger/models/instance.py index 254de3629..8b7cb63ad 100644 --- a/onadata/apps/logger/models/instance.py +++ b/onadata/apps/logger/models/instance.py @@ -65,6 +65,9 @@ def submission_time(): def update_xform_submission_count(sender, instance, created, **kwargs): if not created: return + # `defer_counting` is a Python-only attribute + if getattr(instance, 'defer_counting', False): + return with transaction.atomic(): xform = XForm.objects.only('user_id').get(pk=instance.xform_id) # Update with `F` expression instead of `select_for_update` to avoid diff --git a/onadata/libs/utils/logger_tools.py b/onadata/libs/utils/logger_tools.py index 99dbf287c..b706cf981 100644 --- a/onadata/libs/utils/logger_tools.py +++ b/onadata/libs/utils/logger_tools.py @@ -38,7 +38,9 @@ from onadata.apps.logger.models.instance import ( FormInactiveError, InstanceHistory, - get_id_string_from_xml_str) + get_id_string_from_xml_str, + update_xform_submission_count, +) from onadata.apps.logger.models import XForm from onadata.apps.logger.models.xform import XLSFormError from onadata.apps.logger.xform_instance_parser import ( @@ -69,7 +71,14 @@ mongo_instances = settings.MONGO_DB.instances -def _get_instance(xml, new_uuid, submitted_by, status, xform): +def _get_instance(xml, new_uuid, submitted_by, status, xform, + defer_counting=False): + ''' + `defer_counting=False` will set a Python-only attribute of the same name on + the *new* `Instance` if one is created. This will prevent + `update_xform_submission_count()` from doing anything, which avoids locking + any rows in `logger_xform` or `main_userprofile`. + ''' # check if its an edit submission old_uuid = get_deprecated_uuid_from_xml(xml) instances = Instance.objects.filter(uuid=old_uuid) @@ -86,8 +95,19 @@ def _get_instance(xml, new_uuid, submitted_by, status, xform): instance.save() else: # new submission - instance = Instance.objects.create( - xml=xml, user=submitted_by, status=status, xform=xform) + + # Avoid `Instance.objects.create()` so that we can set a Python-only + # attribute, `defer_counting`, before saving + instance = Instance() + instance.xml = xml + instance.user = submitted_by + instance.status = status + instance.xform = xform + if defer_counting: + # Only set the attribute if requested, i.e. don't bother ever + # setting it to `False` + instance.defer_counting = True + instance.save() return instance @@ -210,7 +230,22 @@ def save_submission(xform, xml, media_files, new_uuid, submitted_by, status, if not date_created_override: date_created_override = get_submission_date_from_xml(xml) - instance = _get_instance(xml, new_uuid, submitted_by, status, xform) + # We have to save the `Instance` to the database before we can associate + # any `Attachment`s with it, but we are inside a transaction and saving + # attachments is slow! Usually creating an `Instance` updates the + # submission count of the parent `XForm` automatically via a `post_save` + # signal, but that takes a lock on `logger_xform` that persists until the + # end of the transaction. We must avoid doing that until all attachments + # are saved, and we are as close as possible to the end of the transaction. + # See https://github.com/kobotoolbox/kobocat/issues/490. + # + # `_get_instance(..., defer_counting=True)` skips incrementing the + # submission counters and returns an `Instance` with a `defer_counting` + # attribute set to `True` *if* a new instance was created. We are + # responsible for calling `update_xform_submission_count()` if the returned + # `Instance` has `defer_counting = True`. + instance = _get_instance(xml, new_uuid, submitted_by, status, xform, + defer_counting=True) save_attachments(instance, media_files) @@ -230,6 +265,15 @@ def save_submission(xform, xml, media_files, new_uuid, submitted_by, status, if not created: pi.save(async=False) + # Now that the slow tasks are complete and we are (hopefully!) close to the + # end of the transaction, update the submission count if the `Instance` was + # newly created + if getattr(instance, 'defer_counting', False): + # Remove the Python-only attribute + del instance.defer_counting + update_xform_submission_count(sender=None, instance=instance, + created=True) + return instance From 75eb550b64f9f77a342fb30eda9999a80fe77e95 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Tue, 20 Nov 2018 08:48:04 -0500 Subject: [PATCH 27/32] Add a new `media_file_basename` column to improve performance when a single `auth_user` has may rows in `logger_attachment`. This commit lacks a management command to populate the `media_file_basename` column, but we'll say that it closes #495. --- .../0010_attachment_media_file_basename.py | 19 ++++++++++++++++ onadata/apps/logger/models/attachment.py | 22 +++++++++++++++++++ onadata/apps/viewer/views.py | 21 +++++++++++------- 3 files changed, 54 insertions(+), 8 deletions(-) create mode 100644 onadata/apps/logger/migrations/0010_attachment_media_file_basename.py diff --git a/onadata/apps/logger/migrations/0010_attachment_media_file_basename.py b/onadata/apps/logger/migrations/0010_attachment_media_file_basename.py new file mode 100644 index 000000000..393bfb732 --- /dev/null +++ b/onadata/apps/logger/migrations/0010_attachment_media_file_basename.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('logger', '0009_add_posted_to_kpi_field_to_logger_instance'), + ] + + operations = [ + migrations.AddField( + model_name='attachment', + name='media_file_basename', + field=models.CharField(db_index=True, max_length=260, null=True, blank=True), + ), + ] diff --git a/onadata/apps/logger/models/attachment.py b/onadata/apps/logger/models/attachment.py index f40b48be5..6b3106e2e 100644 --- a/onadata/apps/logger/models/attachment.py +++ b/onadata/apps/logger/models/attachment.py @@ -1,4 +1,5 @@ import os +import re import mimetypes from hashlib import md5 @@ -28,18 +29,39 @@ def hash_attachment_contents(contents): class Attachment(models.Model): instance = models.ForeignKey(Instance, related_name="attachments") media_file = models.FileField(upload_to=upload_to, max_length=380, db_index=True) + media_file_basename = models.CharField( + max_length=260, null=True, blank=True, db_index=True) mimetype = models.CharField( max_length=100, null=False, blank=True, default='') + MEDIA_FILE_BASENAME_PATTERN = re.compile(r'/([^/]+)$') + class Meta: app_label = 'logger' + def _populate_media_file_basename(self): + # TODO: write a management command to call this (and save) for all + # existing attachments? For the moment, the `media_file_basename` + # column can be populated directly in Postgres using: + # UPDATE logger_attachment + # SET media_file_basename = substring(media_file from '/([^/]+)$'); + if self.media_file: + match = re.search( + self.MEDIA_FILE_BASENAME_PATTERN, self.media_file.name) + if match: + self.media_file_basename = match.groups()[0] + else: + self.media_file_basename = '' + def save(self, *args, **kwargs): if self.media_file and self.mimetype == '': # guess mimetype mimetype, encoding = mimetypes.guess_type(self.media_file.name) if mimetype: self.mimetype = mimetype + + self._populate_media_file_basename() + super(Attachment, self).save(*args, **kwargs) @property diff --git a/onadata/apps/viewer/views.py b/onadata/apps/viewer/views.py index 5e3ccfdf5..36803438c 100644 --- a/onadata/apps/viewer/views.py +++ b/onadata/apps/viewer/views.py @@ -14,6 +14,7 @@ from django.core.files.storage import get_storage_class from django.core.servers.basehttp import FileWrapper from django.core.urlresolvers import reverse +from django.db.models import Q from django.http import ( HttpResponseForbidden, HttpResponseRedirect, HttpResponseNotFound, HttpResponseBadRequest, HttpResponse) @@ -674,26 +675,30 @@ def attachment_url(request, size='medium'): # TODO: how to make sure we have the right media file, # this assumes duplicates are the same file if media_file: - mtch = re.search('^([^\/]+)/attachments(/[^\/]+)$', media_file) + mtch = re.search(r'^([^/]+)/attachments/([^/]+)$', media_file) if mtch: # in cases where the media_file url created by instance.html's # _attachment_url function is in the wrong format, this will # match attachments with the correct owner and the same file name (username, filename) = mtch.groups() - result = Attachment.objects.filter(**{ - 'instance__xform__user__username': username, - }).filter(**{ - 'media_file__endswith': filename, - })[0:1] + result = Attachment.objects.filter( + instance__xform__user__username=username, + ).filter( + Q(media_file_basename=filename) | Q( + media_file_basename=None, + media_file__endswith='/' + filename + ) + )[0:1] else: # search for media_file with exact matching name result = Attachment.objects.filter(media_file=media_file)[0:1] - if len(result) == 0: + try: + attachment = result[0] + except IndexError: media_file_logger.info('attachment not found') return HttpResponseNotFound(_(u'Attachment not found')) - attachment = result[0] if not attachment.mimetype.startswith('image'): return redirect(attachment.media_file.url) From 93cea58b217c8e6941f68d00ce09fbff157d7869 Mon Sep 17 00:00:00 2001 From: "John N. Milner" Date: Tue, 20 Nov 2018 15:36:31 -0500 Subject: [PATCH 28/32] Remove Fabric configuration. Closes #497 --- fabfile/__init__.py | 1 - fabfile/docker.py | 261 -------------------------------------------- 2 files changed, 262 deletions(-) delete mode 100644 fabfile/__init__.py delete mode 100644 fabfile/docker.py diff --git a/fabfile/__init__.py b/fabfile/__init__.py deleted file mode 100644 index 8a9fef770..000000000 --- a/fabfile/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .docker import deploy, publish_docker_image diff --git a/fabfile/docker.py b/fabfile/docker.py deleted file mode 100644 index 9a17f12bf..000000000 --- a/fabfile/docker.py +++ /dev/null @@ -1,261 +0,0 @@ -import os -import re -import json -import tempfile - -from fabric.api import ( - abort, - cd, - env, - hide, - lcd, - local, - prompt, - run, - settings, - sudo, -) -from fabric.contrib import files - - -SERVICE_NAME = 'kobocat' -GIT_REPO = 'https://github.com/kobotoolbox/{}.git'.format(SERVICE_NAME) -DOCKER_HUB_REPO = 'kobotoolbox/{}'.format(SERVICE_NAME) -DOCKER_COMPOSE_IMAGE_UPDATE_PATTERN = re.compile( - r'^( *image: *){}.*$'.format(DOCKER_HUB_REPO) -) -CONTAINER_SRC_DIR_ENV_VAR = '{}_SRC_DIR'.format(SERVICE_NAME.upper()) -UPDATE_STATIC_FILE = '{}/LAST_UPDATE.txt'.format(SERVICE_NAME) -# These may be defined in deployments.json -DEPLOYMENT_SETTINGS = ( - 'host_string', # user@host for SSH connection - 'docker_config_path', # Location must house `docker_compose.yml` - ####### For deploying pre-built images ####### - 'docker_git_compose_file', # YML file to update with tag being deployed - 'docker_git_repo', # Git repo housing Docker Compose YML file - 'docker_git_branch', # Branch to update when committing YML change - 'docker_compose_command', # Docker Compose invocation to use when deploying - # (include options like `-f`, but do not include - # commands like `up`) - ####### For building images from source in situ ####### - 'build_root', # Temporary location for cloning repo; deleted at end - 'static_path' # `UPDATE_STATIC_FILE` will be written here -) - -DEPLOYMENTS = {} -IMPORTED_DEPLOYMENTS = {} -deployments_file = os.environ.get('DEPLOYMENTS_JSON', 'deployments.json') -if os.path.exists(deployments_file): - with open(deployments_file, 'r') as f: - IMPORTED_DEPLOYMENTS = json.load(f) -else: - raise Exception("Cannot find {}".format(deployments_file)) - - -def run_no_pty(*args, **kwargs): - # Avoids control characters being returned in the output - kwargs['pty'] = False - return run(*args, **kwargs) - - -def sudo_no_pty(*args, **kwargs): - # Avoids control characters being returned in the output - kwargs['pty'] = False - return sudo(*args, **kwargs) - - -def setup_env(deployment_name): - deployment = DEPLOYMENTS.get(deployment_name, {}) - - if deployment_name in IMPORTED_DEPLOYMENTS: - deployment.update(IMPORTED_DEPLOYMENTS[deployment_name]) - - unrecognized_settings = set(deployment.keys()) - set(DEPLOYMENT_SETTINGS) - if unrecognized_settings: - raise Exception('Unrecognized deployment settings in {}: {}'.format( - deployments_file, ','.join(unrecognized_settings)) - ) - env.update(deployment) - - -def check_required_settings(required_settings): - for required_setting in required_settings: - if required_setting not in env: - raise Exception('Please define {} in {} and try again'.format( - required_setting, deployments_file)) - - -def get_base_image_from_dockerfile(): - from_line = run_no_pty("sed -n '/^FROM /p;q' Dockerfile") - base_image_name = from_line.strip().split(' ')[-1] - return base_image_name - - -def deploy(deployment_name, tag_or_branch): - setup_env(deployment_name) - if 'docker_git_repo' in env: - check_required_settings(( - 'docker_git_compose_file', - 'docker_git_repo', - 'docker_git_branch', - 'docker_compose_command', - )) - commit_pull_and_deploy(tag_or_branch) - else: - check_required_settings(( - 'build_root', - 'docker_config_path', - 'static_path', - )) - build_and_deploy(tag_or_branch) - - -def commit_pull_and_deploy(tag): - # Clone the Docker configuration in a local temporary directory - local_tmpdir = tempfile.mkdtemp(prefix='fab-deploy') - local_compose_file = os.path.join( - local_tmpdir, env.docker_git_compose_file) - with lcd(local_tmpdir): - local("git clone --quiet --depth=1 --branch='{}' '{}' .".format( - env.docker_git_branch, env.docker_git_repo) - ) - # Update the image tag used by Docker Compose - image_name = '{}:{}'.format(DOCKER_HUB_REPO, tag) - updated_compose_image = False - with open(local_compose_file, 'r') as f: - compose_file_lines = f.readlines() - with open(local_compose_file, 'w') as f: - for line in compose_file_lines: - matches = re.match(DOCKER_COMPOSE_IMAGE_UPDATE_PATTERN, line) - if not matches: - f.write(line) - continue - else: - # https://docs.python.org/2/library/os.html#os.linesep - f.write('{prefix}{image_name}\n'.format( - prefix=matches.group(1), image_name=image_name) - ) - updated_compose_image = True - if not updated_compose_image: - raise Exception( - 'Failed to update image to {} in Docker Compose ' - 'configuration'.format(image_name) - ) - # Did we actually make a change? - if local('git diff', capture=True): - # Commit the change - local("git add '{}'".format(local_compose_file)) - local("git commit -am 'Upgrade {service} to {tag}'".format( - service=SERVICE_NAME, tag=tag) - ) - # Push the commit - local('git show') - response = prompt( - 'OK to push the above commit to {} branch of {}? (y/n)'.format( - env.docker_git_branch, env.docker_git_repo) - ) - if response != 'y': - abort('Push cancelled') - local("git push origin '{}'".format(env.docker_git_branch)) - # Make a note of the commit to verify later that it's pulled to the - # remote server - pushed_config_commit = local("git show --no-patch", capture=True) - - # Deploy to the remote server - with cd(env.docker_config_path): - run('git pull') - pulled_config_commit = run_no_pty("git show --no-patch") - if pulled_config_commit != pushed_config_commit: - raise Exception( - 'The configuration commit on the remote server does not match ' - 'what was pushed locally. Please make sure {} is checked out ' - 'on the remote server.'.format(env.docker_git_branch) - ) - run_no_pty("{doco} pull '{service}'".format( - doco=env.docker_compose_command, service=SERVICE_NAME) - ) - run("{doco} up -d".format(doco=env.docker_compose_command)) - - -def build_and_deploy(branch): - build_dir = os.path.join(env.build_root, SERVICE_NAME) - with cd(build_dir): - # Start from scratch - run("find -delete") - # Shallow clone the requested branch to a temporary directory - run("git clone --quiet --depth=1 --branch='{}' '{}' .".format( - branch, GIT_REPO)) - # Note which commit is at the tip of the cloned branch - cloned_commit = run_no_pty("git show --no-patch") - # Update the base image - run_no_pty("docker pull '{}'".format(get_base_image_from_dockerfile())) - with cd(env.docker_config_path): - # Build the image - run("docker-compose build '{}'".format(SERVICE_NAME)) - # Don't specify a service name to avoid "Cannot link to a non running - # container" - run("docker-compose up -d") - running_commit = run_no_pty( - "docker exec $(docker-compose ps -q '{service}') bash -c '" - "cd \"${src_dir_var}\" && git show --no-patch'".format( - service=SERVICE_NAME, - src_dir_var=CONTAINER_SRC_DIR_ENV_VAR - ) - ) - with cd(env.static_path): - # Write the date and running commit to a publicly-accessible file - sudo("(date; echo) > '{}'".format(UPDATE_STATIC_FILE)) - files.append(UPDATE_STATIC_FILE, running_commit.decode('utf-8'), use_sudo=True) - if running_commit != cloned_commit: - raise Exception( - 'The running commit does not match the tip of the cloned' - 'branch! Make sure docker-compose.yml is set to build from ' - '{}'.format(build_dir) - ) - - -def publish_docker_image(tag, deployment_name='_image_builder'): - def _get_commit_from_docker_image(image_name): - with hide('output'): - return run_no_pty( - "docker run --rm {image_name} bash -c '" - "cd \"${src_dir_var}\" && git show --no-patch'".format( - image_name=image_name, - src_dir_var=CONTAINER_SRC_DIR_ENV_VAR - ) - ) - - setup_env(deployment_name) - check_required_settings(('build_root',)) - build_dir = os.path.join(env.build_root, SERVICE_NAME, tag) - image_name = '{}:{}'.format(DOCKER_HUB_REPO, tag) - - run("mkdir -p '{}'".format(build_dir)) - with cd(build_dir): - # Start from scratch - run("find -delete") - # Shallow clone the requested tag to a temporary directory - with hide('output'): - run("git clone --quiet --depth=1 --branch='{}' '{}' .".format( - tag, GIT_REPO)) - # Note which commit is at the tip of the cloned tag - cloned_commit = run_no_pty("git show --no-patch") - # Check if a suitable image was built already - with settings(warn_only=True): - commit_inside_image = _get_commit_from_docker_image(image_name) - if commit_inside_image != cloned_commit: - # Update the base image - run_no_pty("docker pull '{}'".format( - get_base_image_from_dockerfile() - )) - # Build the image - run("docker build -t '{}' .".format(image_name)) - # Make sure the resulting image has the expected code - commit_inside_image = _get_commit_from_docker_image(image_name) - if commit_inside_image != cloned_commit: - raise Exception( - 'The code inside the built image does not match the' - 'specified tag. This script is probably broken.' - ) - # Push the image to Docker Hub - run_no_pty("docker push '{}'".format(image_name)) From ed1b0111a2e9684b09526d83551e03fd2a34f2bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 28 Nov 2018 13:50:26 -0500 Subject: [PATCH 29/32] Bug fix for 'media_file_basename' always saved as empty string --- onadata/apps/logger/models/attachment.py | 14 +------------- onadata/apps/viewer/views.py | 1 - 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/onadata/apps/logger/models/attachment.py b/onadata/apps/logger/models/attachment.py index 6b3106e2e..06c8972cc 100644 --- a/onadata/apps/logger/models/attachment.py +++ b/onadata/apps/logger/models/attachment.py @@ -34,24 +34,12 @@ class Attachment(models.Model): mimetype = models.CharField( max_length=100, null=False, blank=True, default='') - MEDIA_FILE_BASENAME_PATTERN = re.compile(r'/([^/]+)$') - class Meta: app_label = 'logger' def _populate_media_file_basename(self): - # TODO: write a management command to call this (and save) for all - # existing attachments? For the moment, the `media_file_basename` - # column can be populated directly in Postgres using: - # UPDATE logger_attachment - # SET media_file_basename = substring(media_file from '/([^/]+)$'); if self.media_file: - match = re.search( - self.MEDIA_FILE_BASENAME_PATTERN, self.media_file.name) - if match: - self.media_file_basename = match.groups()[0] - else: - self.media_file_basename = '' + self.media_file_basename = self.media_file.name def save(self, *args, **kwargs): if self.media_file and self.mimetype == '': diff --git a/onadata/apps/viewer/views.py b/onadata/apps/viewer/views.py index 36803438c..820f00cad 100644 --- a/onadata/apps/viewer/views.py +++ b/onadata/apps/viewer/views.py @@ -699,7 +699,6 @@ def attachment_url(request, size='medium'): media_file_logger.info('attachment not found') return HttpResponseNotFound(_(u'Attachment not found')) - if not attachment.mimetype.startswith('image'): return redirect(attachment.media_file.url) From 4549276b303f009d58651e9905a80afbea9f4351 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 28 Nov 2018 13:53:29 -0500 Subject: [PATCH 30/32] Management command to update 'media_file_basename' --- .../commands/populate_media_file_basename.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 onadata/apps/logger/management/commands/populate_media_file_basename.py diff --git a/onadata/apps/logger/management/commands/populate_media_file_basename.py b/onadata/apps/logger/management/commands/populate_media_file_basename.py new file mode 100644 index 000000000..7131196b0 --- /dev/null +++ b/onadata/apps/logger/management/commands/populate_media_file_basename.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# vim: ai ts=4 sts=4 et sw=4 coding=utf-8 +from django.conf import settings +from django.db import connection +from django.db.models import Q, Func +from django.db.models.functions import Substr +from django.core.management.base import BaseCommand, CommandError +from django.utils.translation import ugettext as _, ugettext_lazy +from optparse import make_option + +from onadata.apps.logger.models.attachment import Attachment + + +class SubstrFromPattern(Func): + function = "SUBSTRING" + template = "%(function)s(%(expressions)s from '%(pattern)s')" + + +class Command(BaseCommand): + + help = ugettext_lazy("Updates indexed field `media_file_basename` which is empty or null") + option_list = BaseCommand.option_list + ( + make_option( + '--batchsize', + type='int', + default=100, + help=ugettext_lazy("Number of records to process per query")),) + + def handle(self, *args, **kwargs): + batchsize = kwargs.get("batchsize", 100) + stop = False + offset = 0 + while stop is not True: + limit = offset + batchsize + attachments_ids = list(Attachment.objects.values_list("id", flat=True) + .filter(Q(media_file_basename=None) | Q(media_file_basename="")) + .order_by("id")[offset:limit]) + if attachments_ids: + self.stdout.write(_("Updating attachments from #{} to #{}\n").format( + attachments_ids[0], + attachments_ids[-1])) + + Attachment.objects.filter(id__in=attachments_ids)\ + .update(media_file_basename=SubstrFromPattern("media_file", pattern="/([^/]+)$")) + + offset += batchsize + else: + stop = True From 85244d0a46b21cba38d20d0f996cc1dd4d7dd60c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Wed, 28 Nov 2018 14:22:54 -0500 Subject: [PATCH 31/32] Use already existing 'filename' property' to initialize 'media_file_basename' field --- onadata/apps/logger/models/attachment.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/onadata/apps/logger/models/attachment.py b/onadata/apps/logger/models/attachment.py index 06c8972cc..ed62a852d 100644 --- a/onadata/apps/logger/models/attachment.py +++ b/onadata/apps/logger/models/attachment.py @@ -37,18 +37,14 @@ class Attachment(models.Model): class Meta: app_label = 'logger' - def _populate_media_file_basename(self): - if self.media_file: - self.media_file_basename = self.media_file.name - def save(self, *args, **kwargs): - if self.media_file and self.mimetype == '': - # guess mimetype - mimetype, encoding = mimetypes.guess_type(self.media_file.name) - if mimetype: - self.mimetype = mimetype - - self._populate_media_file_basename() + if self.media_file: + self.media_file_basename = self.filename + if self.mimetype == '': + # guess mimetype + mimetype, encoding = mimetypes.guess_type(self.media_file.name) + if mimetype: + self.mimetype = mimetype super(Attachment, self).save(*args, **kwargs) From 43f8e1ef85d29acb13fe0745429bc420fa44ae31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Fri, 30 Nov 2018 16:40:57 -0500 Subject: [PATCH 32/32] Applied PR#501 suggested changes --- .../management/commands/populate_media_file_basename.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onadata/apps/logger/management/commands/populate_media_file_basename.py b/onadata/apps/logger/management/commands/populate_media_file_basename.py index 7131196b0..4453f8ad3 100644 --- a/onadata/apps/logger/management/commands/populate_media_file_basename.py +++ b/onadata/apps/logger/management/commands/populate_media_file_basename.py @@ -23,11 +23,11 @@ class Command(BaseCommand): make_option( '--batchsize', type='int', - default=100, + default=1000, help=ugettext_lazy("Number of records to process per query")),) def handle(self, *args, **kwargs): - batchsize = kwargs.get("batchsize", 100) + batchsize = kwargs.get("batchsize") stop = False offset = 0 while stop is not True: