kobotoolbox · noliveleger · Dec 13, 2018 · Dec 13, 2018 · Dec 13, 2018 · Dec 14, 2018
diff --git a/src/formpack/pack.py b/src/formpack/pack.py
@@ -77,7 +77,6 @@ def version_id_keys(self, _versions=None):
                 _id_keys.append(_id_key)
         return _id_keys
 
-
     @property
     def available_translations(self):
         translations = set()
@@ -191,25 +190,29 @@ def summr(v):
         return ''.join(out)
 
     @staticmethod
-    def _combine_field_choices(old_field, new_field):
+    def _combine_field_choices(older_version_field, current_field):
         """
-        Update `new_field.choice` so that it contains everything from
-        `old_field.choice`. In the event of a conflict, `new_field.choice`
+        Updates `current_field.choice` so that it contains everything from
+        `older_version_field.choice`. In the event of a conflict, `current_field.choice`
         wins. If either field does not have a `choice` attribute, do
         nothing
 
-        :param old_field: FormField
-        :param new_field: FormField
+        :param older_version_field: FormField
+        :param current_field: FormField
         :return: FormField. Updated new_field
         """
+
         try:
-            old_choice = old_field.choice
-            new_choice = new_field.choice
-            new_field.merge_choice(old_choice)
+            older_version_choice = older_version_field.choice
+            current_field.merge_choice(older_version_choice)
         except AttributeError:
             pass
 
-        return new_field
+        return current_field
+
+    @staticmethod
+    def _do_fields_match(older_versioned_field, current_field):
+        return older_versioned_field.signature == current_field.signature
 
     def get_fields_for_versions(self, versions=-1, data_types=None):
 
@@ -271,16 +274,23 @@ def get_fields_for_versions(self, versions=-1, data_types=None):
             for section_name, section in version.sections.items():
                 for field_name, field_object in section.fields.items():
                     if not isinstance(field_object, CopyField):
+                        add_field = True
                         if field_name in positions:
                             position = positions[field_name]
                             latest_field_object = tmp2d[position[0]][position[1]]
                             # Because versions_desc are ordered from latest to oldest,
                             # we use current field object as the old one and the one already
                             # in position as the latest one.
-                            new_object = self._combine_field_choices(
-                                field_object, latest_field_object)
-                            tmp2d[position[0]][position[1]] = new_object
-                        else:
+
+                            if self._do_fields_match(field_object, latest_field_object):
+                                new_object = self._combine_field_choices(
+                                    field_object, latest_field_object)
+                                tmp2d[position[0]][position[1]] = new_object
+                                add_field = False
+                            else:
+                                field_object.use_unique_name = True
+
+                        if add_field:
                             try:
                                 current_index_list = tmp2d[index]
                                 current_index_list.append(field_object)
@@ -291,7 +301,7 @@ def get_fields_for_versions(self, versions=-1, data_types=None):
                                 # it can happen when current version has more items than newest one.
                                 index = len(tmp2d) - 1
 
-                            positions[field_name] = (index, len(tmp2d[index]) - 1)
+                            positions[field_object.contextual_name] = (index, len(tmp2d[index]) - 1)
 
                         index += 1
 

diff --git a/src/formpack/reporting/autoreport.py b/src/formpack/reporting/autoreport.py
@@ -50,11 +50,21 @@ def _get_version_id_from_submission(self, submission):
 
     def _calculate_stats(self, submissions, fields, versions, lang):
 
-        metrics = {field.name: Counter() for field in fields}
+        metrics = {field.contextual_name: Counter() for field in fields}
 
         submissions_count = 0
         submission_counts_by_version = Counter()
 
+        # When form contains questions with the same name with different types,
+        # Older found versions are pushed at the end of the list `fields`
+        # Because we want to match submission values with fields, we need to try
+        # to match with older version first.
+        # For example: Form contains two versions with one question.
+        # `reversed_fields` look this:
+        # [<FormField type="text" contextual_name="question_text_v123456">,
+        #  <FormField type="integer" contextual_name="question">]
+        reversed_fields = list(reversed(fields))
+
         for entry in submissions:
 
             version_id = self._get_version_id_from_submission(entry)
@@ -63,20 +73,38 @@ def _calculate_stats(self, submissions, fields, versions, lang):
 
             submissions_count += 1
             submission_counts_by_version[version_id] += 1
+            fields_to_skip = []
 
             # TODO: do we really need FormSubmission ?
             entry = FormSubmission(entry).data
-            for field in fields:
-                if field.has_stats:
-                    counter = metrics[field.name]
+
+            for field in reversed_fields:
+                if field.has_stats and field.name not in fields_to_skip:
+                    counter = metrics[field.contextual_name]
                     raw_value = entry.get(field.path)
+
                     if raw_value is not None:
+                        # Because `field.path` is the same for all fields which
+                        # have the same name, we want to be sure we don't append
+                        # data multiple times.
+
+                        # If `field.use_unique_name` is `True`, `data` could be
+                        # mapped to it depending on entry's version ID.
+                        if field.use_unique_name:
+                            if field.contextual_name == field.get_unique_name(version_id):
+                                # We have a match. Skip other fields with the same name
+                                # for this submission
+                                fields_to_skip.append(field.name)
+                            else:
+                                # If we reach this line, it's because user has changed
+                                # the type of question more than once and
+                                # version is not the correct one yet.
+                                # We need to keep looking for the good one.
+                                continue
+
                         try:
                             values = list(field.parse_values(raw_value))
                         except ValueError as e:
-                            # TODO: Remove try/except when
-                            # https://github.com/kobotoolbox/formpack/issues/151
-                            # is fixed?
                             logging.warning(str(e), exc_info=True)
                             # Treat the bad value as a blank response
                             counter[None] += 1
@@ -90,7 +118,7 @@ def stats_generator():
             for field in fields:
                 yield (field,
                        field.get_labels(lang)[0],
-                       field.get_stats(metrics[field.name], lang=lang))
+                       field.get_stats(metrics[field.contextual_name], lang=lang))
 
         return AutoReportStats(self, stats_generator(), submissions_count,
                                submission_counts_by_version)
@@ -107,6 +135,7 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel
         submission_counts_by_version = Counter()
 
         fields = [f for f in fields if f != split_by_field]
+        reversed_fields = list(reversed(fields))
 
         # Then we map fields, values and splitters:
         #          {field_name1: {
@@ -120,12 +149,12 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel
         #              field_name2...},
         #         ...}
         #
-        metrics = {f.name: defaultdict(Counter) for f in fields}
+        metrics = {f.contextual_name: defaultdict(Counter) for f in fields}
 
-        for sbmssn in submissions:
+        for submission in submissions:
 
             # Skip unrequested versions
-            version_id = self._get_version_id_from_submission(sbmssn)
+            version_id = self._get_version_id_from_submission(submission)
             if version_id not in versions:
                 continue
 
@@ -138,21 +167,40 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel
 
             # since we are going to pop one entry, we make a copy
             # of it to avoid side effect
-            entry = dict(FormSubmission(sbmssn).data)
+            entry = dict(FormSubmission(submission).data)
             splitter = entry.pop(split_by_field.path, None)
+            fields_to_skip = []
 
-            for field in fields:
+            for field in reversed_fields:
 
                 if field.has_stats:
 
                     raw_value = entry.get(field.path)
 
                     if raw_value is not None:
+                        # Because `field.path` is the same for all fields which
+                        # have the same name, we want to be sure we don't append
+                        # data multiple times.
+
+                        # If `field.use_unique_name` is `True`, `data` could be
+                        # mapped to it depending on entry's version ID.
+                        if field.use_unique_name:
+                            if field.contextual_name == field.get_unique_name(version_id):
+                                # We have a match. Skip other fields with the same name
+                                # for this submission
+                                fields_to_skip.append(field.name)
+                            else:
+                                # If we reach this line, it's because user has changed
+                                # the type of question more than once and
+                                # version is not the correct one yet.
+                                # We need to keep looking for the good one.
+                                continue
+
                         values = field.parse_values(raw_value)
                     else:
                         values = (None,)
 
-                    value_metrics = metrics[field.name]
+                    value_metrics = metrics[field.contextual_name]
 
                     for value in values:
                         counters = value_metrics[value]
@@ -187,7 +235,7 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel
 
         def stats_generator():
             for field in fields:
-                stats = field.get_disaggregated_stats(metrics[field.name], lang=lang,
+                stats = field.get_disaggregated_stats(metrics[field.contextual_name], lang=lang,
                                                       top_splitters=top_splitters)
                 yield (field, field.get_labels(lang)[0], stats)
 
@@ -204,11 +252,11 @@ def get_stats(self, submissions, fields=(), lang=UNSPECIFIED_TRANSLATION, split_
             fields = all_fields
         else:
             fields.add(split_by)
-            fields = [field for field in all_fields if field.name in fields]
+            fields = [field for field in all_fields if field.contextual_name in fields]
 
         if split_by:
             try:
-                split_by_field = next(f for f in fields if f.name == split_by)
+                split_by_field = next(f for f in fields if f.contextual_name == split_by)
             except StopIteration:
                 raise ValueError('No field matching name "%s" '
                                  'for split_by' % split_by)

diff --git a/src/formpack/reporting/export.py b/src/formpack/reporting/export.py
@@ -170,7 +170,7 @@ def get_fields_labels_tags_for_all_versions(self,
 
         for field in all_fields:
             section_fields.setdefault(field.section.name, []).append(
-                (field.name, field)
+                (field.contextual_name, field)
             )
             section_labels.setdefault(field.section.name, []).append(
                 field.get_labels(lang, group_sep,
@@ -197,7 +197,6 @@ def get_fields_labels_tags_for_all_versions(self,
                         auto_field_names.append(
                             "_submission_{}".format(copy_field))
 
-
         # Flatten field labels and names. Indeed, field.get_labels()
         # and self.names return a list because a multiple select field can
         # have several values. We needed them grouped to insert them at the
@@ -217,8 +216,8 @@ def get_fields_labels_tags_for_all_versions(self,
                 # Add the tags for this field. If the field has multiple
                 # labels, add the tags once for each label
                 tags.extend(
-                    [flatten_tag_list(field.tags, tag_cols_and_seps)] *
-                        len(field.value_names)
+                    [flatten_tag_list(field.tags, tag_cols_and_seps)]
+                    * len(field.value_names)
                 )
 
             names = [name for name_list in name_lists for name in name_list]

diff --git a/src/formpack/schema/datadef.py b/src/formpack/schema/datadef.py
@@ -23,15 +23,26 @@ class FormDataDef(object):
 
     def __init__(self, name, labels=None, has_stats=False, *args, **kwargs):
         self.name = name
+        self.unique_name = name
+        self.use_unique_name = False
         self.labels = labels or {}
-        self.value_names = self.get_value_names()
         self.has_stats = has_stats
 
     def __repr__(self):
-        return "<%s name='%s'>" % (self.__class__.__name__, self.name)
+        return "<%s contextual_name='%s'>" % (self.__class__.__name__, self.contextual_name)
+
+    @property
+    def contextual_name(self):
+        if self.use_unique_name:
+            return self.unique_name
+        return self.name
+
+    @property
+    def value_names(self):
+        return self.get_value_names()
 
     def get_value_names(self):
-        return [self.name]
+        return [self.contextual_name]
 
     @classmethod
     def from_json_definition(cls, definition, translations=None):
@@ -48,6 +59,9 @@ def _extract_json_labels(cls, definition, translations):
             labels = {}
         return labels
 
+    def create_unique_name(self, suffix):
+        pass
+
 
 class FormGroup(FormDataDef):  # useful to get __repr__
     pass
@@ -79,7 +93,7 @@ def from_json_definition(cls, definition, hierarchy=(None,), parent=None,
         return cls(definition['name'], labels, hierarchy=hierarchy, parent=parent)
 
     def get_label(self, lang=UNSPECIFIED_TRANSLATION):
-        return [self.labels.get(lang) or self.name]
+        return [self.labels.get(lang) or self.contextual_name]
 
     def __repr__(self):
         parent_name = getattr(self.parent, 'name', None)
@@ -120,7 +134,6 @@ def all_from_json_definition(cls, definition, translation_list):
             option['name'] = choice_name
         return all_choices
 
-
     @property
     def translations(self):
         for option in self.options.values():