Skip to content

Commit

Permalink
imapping __init__ update
Browse files Browse the repository at this point in the history
  • Loading branch information
leonardbinet committed Mar 7, 2020
1 parent 7bc8aaf commit 8d84c6f
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 94 deletions.
2 changes: 1 addition & 1 deletion pandagg/interactive/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def set_mapping(self, mapping):
self.mapping = IMapping(
client=self.client,
index_name=self.name,
tree=Mapping(mapping),
from_=Mapping(mapping),
depth=1
)

Expand Down
14 changes: 10 additions & 4 deletions pandagg/interactive/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pandagg.interactive._field_agg_factory import field_classes_per_name
from pandagg.interactive.abstract import TreeBasedObj
from pandagg.node.mapping.field_datatypes import Object
from pandagg.tree.mapping import Mapping


Expand All @@ -22,11 +23,16 @@ class IMapping(TreeBasedObj):
"""
_NODE_PATH_ATTR = 'name'

def __init__(self, tree, client=None, root_path=None, depth=1, initial_tree=None, index_name=None):
def __init__(self, from_=None, properties=None, dynamic=False, client=None, root_path=None, depth=1, initial_tree=None, index_name=None):
if from_ is not None and properties is not None:
raise ValueError('Can provide at most one of "from_" and "properties"')
if properties is not None:
from_ = Object(name='', properties=properties, dynamic=dynamic)
tree = Mapping.deserialize(from_)

self._client = client
self._index_name = index_name
if isinstance(tree, dict):
tree = Mapping(tree)

super(IMapping, self).__init__(
tree=tree,
root_path=root_path,
Expand All @@ -46,7 +52,7 @@ def _bind(self, client, index_name=None):
def _clone(self, nid, root_path, depth):
return IMapping(
client=self._client,
tree=self._tree.subtree(nid),
from_=self._tree.subtree(nid),
root_path=root_path,
depth=depth,
initial_tree=self._initial_tree,
Expand Down
3 changes: 0 additions & 3 deletions tests/base/interactive/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,6 @@ def test_index_agg(self):
)
self.assertEqual(agg.__str__(), equivalent_agg.__str__())


class ClientBoundTestCase(TestCase):

@staticmethod
def get_client_bound_index(es_response=None):
client_mock = Elasticsearch()
Expand Down
168 changes: 168 additions & 0 deletions tests/base/interactive/test_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from mock import Mock
from unittest import TestCase

from pandagg.interactive._field_agg_factory import field_classes_per_name
from pandagg.interactive.mapping import IMapping
from pandagg.node.mapping.abstract import Field
from pandagg.node.mapping.field_datatypes import Keyword, Text, Nested, Object, Integer
from pandagg.tree.mapping import Mapping

from tests.base.mapping_example import MAPPING


class IMappingTestCase(TestCase):

def test_mapping_aggregations(self):
mapping_tree = Mapping(from_=MAPPING)
# check that leaves are expanded, based on 'field_name' attribute of nodes
mapping = IMapping(from_=mapping_tree, depth=1)
for field_name in ('classification_type', 'date', 'global_metrics', 'id', 'language', 'local_metrics', 'workflow'):
self.assertTrue(hasattr(mapping, field_name))

workflow = mapping.workflow
# Check that calling a tree will return its root node.
workflow_node = workflow()
self.assertTrue(isinstance(workflow_node, Field))

def test_imapping_init(self):

mapping_dict = {
"dynamic": False,
"properties": {
"classification_type": {
"type": "keyword",
"fields": {
"raw": {
"type": "text"
}
}
},
"local_metrics": {
"type": "nested",
"dynamic": False,
"properties": {
"dataset": {
"dynamic": False,
"properties": {
"support_test": {
"type": "integer"
},
"support_train": {
"type": "integer"
}
}
}
}
}
}
}

mapping_tree = Mapping(from_=mapping_dict)
client_mock = Mock(spec=['search'])
index_name = 'classification_report_index_name'

# from dict
im1 = IMapping(
client=client_mock,
from_=mapping_dict,
index_name=index_name
)
# from tree
im2 = IMapping(
client=client_mock,
from_=mapping_tree,
index_name=index_name
)

# from nodes
im3 = IMapping(
properties={
Keyword('classification_type', fields=[
Text('raw')
]),
Nested('local_metrics', dynamic=False, properties=[
Object('dataset', dynamic=False, properties=[
Integer('support_test'),
Integer('support_train')
])
])
},
dynamic=False,
client=client_mock,
index_name=index_name
)
for i, m in enumerate((im1, im2, im3)):
self.assertEqual(m._tree.serialize(), mapping_dict, "failed at m%d" % (i + 1))
self.assertEqual(m._index_name, index_name)
self.assertIs(m._client, client_mock)

def test_client_bound(self):
"""Check that when reaching leaves (fields without children) leaves have the "a" attribute that can generate
aggregations on that field type.
"""
client_mock = Mock(spec=['search'])
es_response_mock = {
"_shards": {
"failed": 0,
"successful": 135,
"total": 135
},
"aggregations": {
"terms_agg": {
"buckets": [
{
"doc_count": 25,
"key": 1
},
{
"doc_count": 50,
"key": 2
}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 4
}
},
"hits": {
"hits": [],
"max_score": 0.0,
"total": 300
},
"timed_out": False,
"took": 30
}
client_mock.search = Mock(return_value=es_response_mock)

mapping_tree = Mapping(from_=MAPPING)
client_bound_mapping = IMapping(
client=client_mock,
from_=mapping_tree,
index_name='classification_report_index_name'
)

workflow_field = client_bound_mapping.workflow
self.assertTrue(hasattr(workflow_field, 'a'))
# workflow type is String
self.assertIsInstance(workflow_field.a, field_classes_per_name['keyword'])

response = workflow_field.a.terms(
size=20,
output=None,
query={'term': {'classification_type': 'multiclass'}}
)
self.assertEqual(response, [
(1, {"doc_count": 25, "key": 1}),
(2, {"doc_count": 50, "key": 2}),
])
client_mock.search.assert_called_once()
client_mock.search.assert_called_with(
body={
'aggs': {'terms_agg': {'terms': {'field': 'workflow', 'size': 20}}},
'size': 0,
'query': {'term': {'classification_type': 'multiclass'}}
},
index='classification_report_index_name'
)
4 changes: 2 additions & 2 deletions tests/base/tree/test_aggs.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ def test_groupby_method(self):
def test_mapping_from_init(self):
agg_from_dict_mapping = Agg(mapping=MAPPING)
agg_from_tree_mapping = Agg(mapping=Mapping(from_=MAPPING))
agg_from_obj_mapping = Agg(mapping=IMapping(tree=Mapping(from_=MAPPING)))
agg_from_obj_mapping = Agg(mapping=IMapping(from_=Mapping(from_=MAPPING)))
self.assertEqual(
agg_from_dict_mapping.tree_mapping.__repr__(),
agg_from_tree_mapping.tree_mapping.__repr__()
Expand All @@ -637,7 +637,7 @@ def test_set_mapping(self):
agg_from_tree_mapping = Agg() \
.set_mapping(mapping=Mapping(from_=MAPPING))
agg_from_obj_mapping = Agg() \
.set_mapping(mapping=IMapping(tree=Mapping(from_=MAPPING), client=None))
.set_mapping(mapping=IMapping(from_=Mapping(from_=MAPPING), client=None))
self.assertEqual(
agg_from_dict_mapping.tree_mapping.__repr__(),
agg_from_tree_mapping.tree_mapping.__repr__()
Expand Down
84 changes: 0 additions & 84 deletions tests/base/tree/test_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,11 @@
# -*- coding: utf-8 -*-

from unittest import TestCase
from mock import Mock

from pandagg.exceptions import AbsentMappingFieldError
from pandagg.interactive._field_agg_factory import field_classes_per_name
from pandagg.node.mapping.abstract import Field
from pandagg.node.mapping.field_datatypes import Keyword, Object, Text, Nested, Integer
from pandagg.tree.mapping import Mapping
from pandagg.interactive.mapping import IMapping
from tests.base.mapping_example import MAPPING, EXPECTED_MAPPING_TREE_REPR


Expand Down Expand Up @@ -135,84 +132,3 @@ def test_node_path(self):
self.assertIsInstance(node, Field)
self.assertEqual(node.name, 'support_test')
self.assertEqual(mapping_tree.node_path(node.identifier), 'local_metrics.dataset.support_test')

def test_mapping_aggregations(self):
mapping_tree = Mapping(from_=MAPPING)
# check that leaves are expanded, based on 'field_name' attribute of nodes
mapping = IMapping(tree=mapping_tree, depth=1)
for field_name in ('classification_type', 'date', 'global_metrics', 'id', 'language', 'local_metrics', 'workflow'):
self.assertTrue(hasattr(mapping, field_name))

workflow = mapping.workflow
# Check that calling a tree will return its root node.
workflow_node = workflow()
self.assertTrue(isinstance(workflow_node, Field))

def test_client_bound(self):
"""Check that when reaching leaves (fields without children) leaves have the "a" attribute that can generate
aggregations on that field type.
"""
client_mock = Mock(spec=['search'])
es_response_mock = {
"_shards": {
"failed": 0,
"successful": 135,
"total": 135
},
"aggregations": {
"terms_agg": {
"buckets": [
{
"doc_count": 25,
"key": 1
},
{
"doc_count": 50,
"key": 2
}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 4
}
},
"hits": {
"hits": [],
"max_score": 0.0,
"total": 300
},
"timed_out": False,
"took": 30
}
client_mock.search = Mock(return_value=es_response_mock)

mapping_tree = Mapping(from_=MAPPING)
client_bound_mapping = IMapping(
client=client_mock,
tree=mapping_tree,
depth=1,
index_name='classification_report_index_name'
)

workflow_field = client_bound_mapping.workflow
self.assertTrue(hasattr(workflow_field, 'a'))
# workflow type is String
self.assertIsInstance(workflow_field.a, field_classes_per_name['keyword'])

response = workflow_field.a.terms(
size=20,
output=None,
query={'term': {'classification_type': 'multiclass'}}
)
self.assertEqual(response, [
(1, {"doc_count": 25, "key": 1}),
(2, {"doc_count": 50, "key": 2}),
])
client_mock.search.assert_called_once()
client_mock.search.assert_called_with(
body={
'aggs': {'terms_agg': {'terms': {'field': 'workflow', 'size': 20}}},
'size': 0,
'query': {'term': {'classification_type': 'multiclass'}}
},
index='classification_report_index_name'
)

0 comments on commit 8d84c6f

Please sign in to comment.