Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle a question whose type changes but name doesn't. #187

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
7fb1ad9
Creates a unique_name when field is added to FormVersion
noliveleger Dec 13, 2018
c15fc18
Merge branch '182-shifted-value-with-deleted-multiple-options' into 1…
noliveleger Dec 13, 2018
5af7218
Exports supports data for same questions with different types
noliveleger Dec 13, 2018
9652455
Changed unittest to match new representation of FormField
noliveleger Dec 14, 2018
5953cde
Fixed stats calculation when multiple versions contains same question…
noliveleger Dec 14, 2018
cc58e06
Fixed typo
noliveleger Dec 14, 2018
f4809a4
Used property 'contextual_name' instead of 'name'
noliveleger Dec 14, 2018
dc69313
Changed unittests for multiple versions with questions with same name…
noliveleger Dec 14, 2018
3985489
Removed useless try/except when trying parsing values of NumField
noliveleger Dec 14, 2018
a042146
Merge branch 'master' into 151-question-with-different-types
noliveleger Dec 17, 2018
3b0db4d
Merge branch 'master' into 151-question-with-different-types
noliveleger Dec 18, 2018
dc05c1d
Applied requested changes for PR #187
noliveleger Dec 18, 2018
50b5a2e
Changed FormDataDef representation string
noliveleger Dec 18, 2018
16a896a
Applied PEP-8 guidance
noliveleger Dec 19, 2018
d1c5429
Removed useless import
noliveleger Dec 19, 2018
f0ddf7d
Applied logic of '_calculate_stats()' to '_disaggregate_stats()' in o…
noliveleger Dec 20, 2018
bd25c51
Merge 'master' branch into 151-question-with-different-types
noliveleger Jul 8, 2019
785ebda
Restored try/except to handle unexpected values as blank when calcula…
noliveleger Jul 8, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 25 additions & 15 deletions src/formpack/pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def version_id_keys(self, _versions=None):
_id_keys.append(_id_key)
return _id_keys


@property
def available_translations(self):
translations = set()
Expand Down Expand Up @@ -191,25 +190,29 @@ def summr(v):
return ''.join(out)

@staticmethod
def _combine_field_choices(old_field, new_field):
def _combine_field_choices(older_version_field, current_field):
"""
Update `new_field.choice` so that it contains everything from
`old_field.choice`. In the event of a conflict, `new_field.choice`
Updates `current_field.choice` so that it contains everything from
`older_version_field.choice`. In the event of a conflict, `current_field.choice`
wins. If either field does not have a `choice` attribute, do
nothing

:param old_field: FormField
:param new_field: FormField
:param older_version_field: FormField
:param current_field: FormField
:return: FormField. Updated new_field
"""

try:
old_choice = old_field.choice
new_choice = new_field.choice
new_field.merge_choice(old_choice)
older_version_choice = older_version_field.choice
current_field.merge_choice(older_version_choice)
except AttributeError:
pass

return new_field
return current_field

@staticmethod
def _do_fields_match(older_versioned_field, current_field):
return older_versioned_field.signature == current_field.signature

def get_fields_for_versions(self, versions=-1, data_types=None):

Expand Down Expand Up @@ -271,16 +274,23 @@ def get_fields_for_versions(self, versions=-1, data_types=None):
for section_name, section in version.sections.items():
for field_name, field_object in section.fields.items():
if not isinstance(field_object, CopyField):
add_field = True
if field_name in positions:
position = positions[field_name]
latest_field_object = tmp2d[position[0]][position[1]]
# Because versions_desc are ordered from latest to oldest,
# we use current field object as the old one and the one already
# in position as the latest one.
new_object = self._combine_field_choices(
field_object, latest_field_object)
tmp2d[position[0]][position[1]] = new_object
else:

if self._do_fields_match(field_object, latest_field_object):
new_object = self._combine_field_choices(
field_object, latest_field_object)
tmp2d[position[0]][position[1]] = new_object
add_field = False
else:
field_object.use_unique_name = True

if add_field:
try:
current_index_list = tmp2d[index]
current_index_list.append(field_object)
Expand All @@ -291,7 +301,7 @@ def get_fields_for_versions(self, versions=-1, data_types=None):
# it can happen when current version has more items than newest one.
index = len(tmp2d) - 1

positions[field_name] = (index, len(tmp2d[index]) - 1)
positions[field_object.contextual_name] = (index, len(tmp2d[index]) - 1)

index += 1

Expand Down
82 changes: 65 additions & 17 deletions src/formpack/reporting/autoreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,21 @@ def _get_version_id_from_submission(self, submission):

def _calculate_stats(self, submissions, fields, versions, lang):

metrics = {field.name: Counter() for field in fields}
metrics = {field.contextual_name: Counter() for field in fields}

submissions_count = 0
submission_counts_by_version = Counter()

# When form contains questions with the same name with different types,
# Older found versions are pushed at the end of the list `fields`
# Because we want to match submission values with fields, we need to try
# to match with older version first.
# For example: Form contains two versions with one question.
# `reversed_fields` look this:
# [<FormField type="text" contextual_name="question_text_v123456">,
# <FormField type="integer" contextual_name="question">]
reversed_fields = list(reversed(fields))

for entry in submissions:

version_id = self._get_version_id_from_submission(entry)
Expand All @@ -63,20 +73,38 @@ def _calculate_stats(self, submissions, fields, versions, lang):

submissions_count += 1
submission_counts_by_version[version_id] += 1
fields_to_skip = []

# TODO: do we really need FormSubmission ?
entry = FormSubmission(entry).data
for field in fields:
if field.has_stats:
counter = metrics[field.name]

for field in reversed_fields:
if field.has_stats and field.name not in fields_to_skip:
counter = metrics[field.contextual_name]
raw_value = entry.get(field.path)

if raw_value is not None:
# Because `field.path` is the same for all fields which
# have the same name, we want to be sure we don't append
# data multiple times.

# If `field.use_unique_name` is `True`, `data` could be
# mapped to it depending on entry's version ID.
if field.use_unique_name:
if field.contextual_name == field.get_unique_name(version_id):
# We have a match. Skip other fields with the same name
# for this submission
fields_to_skip.append(field.name)
else:
# If we reach this line, it's because user has changed
# the type of question more than once and
# version is not the correct one yet.
# We need to keep looking for the good one.
continue

try:
values = list(field.parse_values(raw_value))
except ValueError as e:
# TODO: Remove try/except when
# https://github.com/kobotoolbox/formpack/issues/151
# is fixed?
logging.warning(str(e), exc_info=True)
# Treat the bad value as a blank response
counter[None] += 1
Expand All @@ -90,7 +118,7 @@ def stats_generator():
for field in fields:
yield (field,
field.get_labels(lang)[0],
field.get_stats(metrics[field.name], lang=lang))
field.get_stats(metrics[field.contextual_name], lang=lang))

return AutoReportStats(self, stats_generator(), submissions_count,
submission_counts_by_version)
Expand All @@ -107,6 +135,7 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel
submission_counts_by_version = Counter()

fields = [f for f in fields if f != split_by_field]
reversed_fields = list(reversed(fields))

# Then we map fields, values and splitters:
# {field_name1: {
Expand All @@ -120,12 +149,12 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel
# field_name2...},
# ...}
#
metrics = {f.name: defaultdict(Counter) for f in fields}
metrics = {f.contextual_name: defaultdict(Counter) for f in fields}

for sbmssn in submissions:
for submission in submissions:

# Skip unrequested versions
version_id = self._get_version_id_from_submission(sbmssn)
version_id = self._get_version_id_from_submission(submission)
if version_id not in versions:
continue

Expand All @@ -138,21 +167,40 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel

# since we are going to pop one entry, we make a copy
# of it to avoid side effect
entry = dict(FormSubmission(sbmssn).data)
entry = dict(FormSubmission(submission).data)
splitter = entry.pop(split_by_field.path, None)
fields_to_skip = []

for field in fields:
for field in reversed_fields:

if field.has_stats:

raw_value = entry.get(field.path)

if raw_value is not None:
# Because `field.path` is the same for all fields which
# have the same name, we want to be sure we don't append
# data multiple times.

# If `field.use_unique_name` is `True`, `data` could be
# mapped to it depending on entry's version ID.
if field.use_unique_name:
if field.contextual_name == field.get_unique_name(version_id):
# We have a match. Skip other fields with the same name
# for this submission
fields_to_skip.append(field.name)
else:
# If we reach this line, it's because user has changed
# the type of question more than once and
# version is not the correct one yet.
# We need to keep looking for the good one.
continue

values = field.parse_values(raw_value)
else:
values = (None,)

value_metrics = metrics[field.name]
value_metrics = metrics[field.contextual_name]

for value in values:
counters = value_metrics[value]
Expand Down Expand Up @@ -187,7 +235,7 @@ def _disaggregate_stats(self, submissions, fields, versions, lang, split_by_fiel

def stats_generator():
for field in fields:
stats = field.get_disaggregated_stats(metrics[field.name], lang=lang,
stats = field.get_disaggregated_stats(metrics[field.contextual_name], lang=lang,
top_splitters=top_splitters)
yield (field, field.get_labels(lang)[0], stats)

Expand All @@ -204,11 +252,11 @@ def get_stats(self, submissions, fields=(), lang=UNSPECIFIED_TRANSLATION, split_
fields = all_fields
else:
fields.add(split_by)
fields = [field for field in all_fields if field.name in fields]
fields = [field for field in all_fields if field.contextual_name in fields]

if split_by:
try:
split_by_field = next(f for f in fields if f.name == split_by)
split_by_field = next(f for f in fields if f.contextual_name == split_by)
except StopIteration:
raise ValueError('No field matching name "%s" '
'for split_by' % split_by)
Expand Down
7 changes: 3 additions & 4 deletions src/formpack/reporting/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def get_fields_labels_tags_for_all_versions(self,

for field in all_fields:
section_fields.setdefault(field.section.name, []).append(
(field.name, field)
(field.contextual_name, field)
)
section_labels.setdefault(field.section.name, []).append(
field.get_labels(lang, group_sep,
Expand All @@ -197,7 +197,6 @@ def get_fields_labels_tags_for_all_versions(self,
auto_field_names.append(
"_submission_{}".format(copy_field))


# Flatten field labels and names. Indeed, field.get_labels()
# and self.names return a list because a multiple select field can
# have several values. We needed them grouped to insert them at the
Expand All @@ -217,8 +216,8 @@ def get_fields_labels_tags_for_all_versions(self,
# Add the tags for this field. If the field has multiple
# labels, add the tags once for each label
tags.extend(
[flatten_tag_list(field.tags, tag_cols_and_seps)] *
len(field.value_names)
[flatten_tag_list(field.tags, tag_cols_and_seps)]
* len(field.value_names)
)

names = [name for name_list in name_lists for name in name_list]
Expand Down
23 changes: 18 additions & 5 deletions src/formpack/schema/datadef.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,26 @@ class FormDataDef(object):

def __init__(self, name, labels=None, has_stats=False, *args, **kwargs):
self.name = name
self.unique_name = name
self.use_unique_name = False
self.labels = labels or {}
self.value_names = self.get_value_names()
self.has_stats = has_stats

def __repr__(self):
return "<%s name='%s'>" % (self.__class__.__name__, self.name)
return "<%s contextual_name='%s'>" % (self.__class__.__name__, self.contextual_name)

@property
def contextual_name(self):
if self.use_unique_name:
return self.unique_name
return self.name

@property
def value_names(self):
jnm marked this conversation as resolved.
Show resolved Hide resolved
return self.get_value_names()

def get_value_names(self):
return [self.name]
return [self.contextual_name]

@classmethod
def from_json_definition(cls, definition, translations=None):
Expand All @@ -48,6 +59,9 @@ def _extract_json_labels(cls, definition, translations):
labels = {}
return labels

def create_unique_name(self, suffix):
pass


class FormGroup(FormDataDef): # useful to get __repr__
pass
Expand Down Expand Up @@ -79,7 +93,7 @@ def from_json_definition(cls, definition, hierarchy=(None,), parent=None,
return cls(definition['name'], labels, hierarchy=hierarchy, parent=parent)

def get_label(self, lang=UNSPECIFIED_TRANSLATION):
return [self.labels.get(lang) or self.name]
return [self.labels.get(lang) or self.contextual_name]

def __repr__(self):
parent_name = getattr(self.parent, 'name', None)
Expand Down Expand Up @@ -120,7 +134,6 @@ def all_from_json_definition(cls, definition, translation_list):
option['name'] = choice_name
return all_choices


@property
def translations(self):
for option in self.options.values():
Expand Down
Loading