diff --git a/pandagg/interactive/index.py b/pandagg/interactive/index.py index 57cd6798..fd9b8e7b 100644 --- a/pandagg/interactive/index.py +++ b/pandagg/interactive/index.py @@ -23,7 +23,7 @@ def set_mapping(self, mapping): self.mapping = IMapping( client=self.client, index_name=self.name, - tree=Mapping(mapping), + from_=Mapping(mapping), depth=1 ) diff --git a/pandagg/interactive/mapping.py b/pandagg/interactive/mapping.py index 8ddb5c31..e29dceb4 100644 --- a/pandagg/interactive/mapping.py +++ b/pandagg/interactive/mapping.py @@ -3,6 +3,7 @@ from pandagg.interactive._field_agg_factory import field_classes_per_name from pandagg.interactive.abstract import TreeBasedObj +from pandagg.node.mapping.field_datatypes import Object from pandagg.tree.mapping import Mapping @@ -22,11 +23,16 @@ class IMapping(TreeBasedObj): """ _NODE_PATH_ATTR = 'name' - def __init__(self, tree, client=None, root_path=None, depth=1, initial_tree=None, index_name=None): + def __init__(self, from_=None, properties=None, dynamic=False, client=None, root_path=None, depth=1, initial_tree=None, index_name=None): + if from_ is not None and properties is not None: + raise ValueError('Can provide at most one of "from_" and "properties"') + if properties is not None: + from_ = Object(name='', properties=properties, dynamic=dynamic) + tree = Mapping.deserialize(from_) + self._client = client self._index_name = index_name - if isinstance(tree, dict): - tree = Mapping(tree) + super(IMapping, self).__init__( tree=tree, root_path=root_path, @@ -46,7 +52,7 @@ def _bind(self, client, index_name=None): def _clone(self, nid, root_path, depth): return IMapping( client=self._client, - tree=self._tree.subtree(nid), + from_=self._tree.subtree(nid), root_path=root_path, depth=depth, initial_tree=self._initial_tree, diff --git a/tests/base/interactive/test_index.py b/tests/base/interactive/test_index.py index d64bfef4..394b418c 100644 --- a/tests/base/interactive/test_index.py +++ b/tests/base/interactive/test_index.py @@ -56,9 +56,6 @@ def test_index_agg(self): ) self.assertEqual(agg.__str__(), equivalent_agg.__str__()) - -class ClientBoundTestCase(TestCase): - @staticmethod def get_client_bound_index(es_response=None): client_mock = Elasticsearch() diff --git a/tests/base/interactive/test_mapping.py b/tests/base/interactive/test_mapping.py new file mode 100644 index 00000000..e18647d3 --- /dev/null +++ b/tests/base/interactive/test_mapping.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from mock import Mock +from unittest import TestCase + +from pandagg.interactive._field_agg_factory import field_classes_per_name +from pandagg.interactive.mapping import IMapping +from pandagg.node.mapping.abstract import Field +from pandagg.node.mapping.field_datatypes import Keyword, Text, Nested, Object, Integer +from pandagg.tree.mapping import Mapping + +from tests.base.mapping_example import MAPPING + + +class IMappingTestCase(TestCase): + + def test_mapping_aggregations(self): + mapping_tree = Mapping(from_=MAPPING) + # check that leaves are expanded, based on 'field_name' attribute of nodes + mapping = IMapping(from_=mapping_tree, depth=1) + for field_name in ('classification_type', 'date', 'global_metrics', 'id', 'language', 'local_metrics', 'workflow'): + self.assertTrue(hasattr(mapping, field_name)) + + workflow = mapping.workflow + # Check that calling a tree will return its root node. + workflow_node = workflow() + self.assertTrue(isinstance(workflow_node, Field)) + + def test_imapping_init(self): + + mapping_dict = { + "dynamic": False, + "properties": { + "classification_type": { + "type": "keyword", + "fields": { + "raw": { + "type": "text" + } + } + }, + "local_metrics": { + "type": "nested", + "dynamic": False, + "properties": { + "dataset": { + "dynamic": False, + "properties": { + "support_test": { + "type": "integer" + }, + "support_train": { + "type": "integer" + } + } + } + } + } + } + } + + mapping_tree = Mapping(from_=mapping_dict) + client_mock = Mock(spec=['search']) + index_name = 'classification_report_index_name' + + # from dict + im1 = IMapping( + client=client_mock, + from_=mapping_dict, + index_name=index_name + ) + # from tree + im2 = IMapping( + client=client_mock, + from_=mapping_tree, + index_name=index_name + ) + + # from nodes + im3 = IMapping( + properties={ + Keyword('classification_type', fields=[ + Text('raw') + ]), + Nested('local_metrics', dynamic=False, properties=[ + Object('dataset', dynamic=False, properties=[ + Integer('support_test'), + Integer('support_train') + ]) + ]) + }, + dynamic=False, + client=client_mock, + index_name=index_name + ) + for i, m in enumerate((im1, im2, im3)): + self.assertEqual(m._tree.serialize(), mapping_dict, "failed at m%d" % (i + 1)) + self.assertEqual(m._index_name, index_name) + self.assertIs(m._client, client_mock) + + def test_client_bound(self): + """Check that when reaching leaves (fields without children) leaves have the "a" attribute that can generate + aggregations on that field type. + """ + client_mock = Mock(spec=['search']) + es_response_mock = { + "_shards": { + "failed": 0, + "successful": 135, + "total": 135 + }, + "aggregations": { + "terms_agg": { + "buckets": [ + { + "doc_count": 25, + "key": 1 + }, + { + "doc_count": 50, + "key": 2 + } + ], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 4 + } + }, + "hits": { + "hits": [], + "max_score": 0.0, + "total": 300 + }, + "timed_out": False, + "took": 30 + } + client_mock.search = Mock(return_value=es_response_mock) + + mapping_tree = Mapping(from_=MAPPING) + client_bound_mapping = IMapping( + client=client_mock, + from_=mapping_tree, + index_name='classification_report_index_name' + ) + + workflow_field = client_bound_mapping.workflow + self.assertTrue(hasattr(workflow_field, 'a')) + # workflow type is String + self.assertIsInstance(workflow_field.a, field_classes_per_name['keyword']) + + response = workflow_field.a.terms( + size=20, + output=None, + query={'term': {'classification_type': 'multiclass'}} + ) + self.assertEqual(response, [ + (1, {"doc_count": 25, "key": 1}), + (2, {"doc_count": 50, "key": 2}), + ]) + client_mock.search.assert_called_once() + client_mock.search.assert_called_with( + body={ + 'aggs': {'terms_agg': {'terms': {'field': 'workflow', 'size': 20}}}, + 'size': 0, + 'query': {'term': {'classification_type': 'multiclass'}} + }, + index='classification_report_index_name' + ) diff --git a/tests/base/tree/test_aggs.py b/tests/base/tree/test_aggs.py index 51eca141..48523ae0 100644 --- a/tests/base/tree/test_aggs.py +++ b/tests/base/tree/test_aggs.py @@ -618,7 +618,7 @@ def test_groupby_method(self): def test_mapping_from_init(self): agg_from_dict_mapping = Agg(mapping=MAPPING) agg_from_tree_mapping = Agg(mapping=Mapping(from_=MAPPING)) - agg_from_obj_mapping = Agg(mapping=IMapping(tree=Mapping(from_=MAPPING))) + agg_from_obj_mapping = Agg(mapping=IMapping(from_=Mapping(from_=MAPPING))) self.assertEqual( agg_from_dict_mapping.tree_mapping.__repr__(), agg_from_tree_mapping.tree_mapping.__repr__() @@ -637,7 +637,7 @@ def test_set_mapping(self): agg_from_tree_mapping = Agg() \ .set_mapping(mapping=Mapping(from_=MAPPING)) agg_from_obj_mapping = Agg() \ - .set_mapping(mapping=IMapping(tree=Mapping(from_=MAPPING), client=None)) + .set_mapping(mapping=IMapping(from_=Mapping(from_=MAPPING), client=None)) self.assertEqual( agg_from_dict_mapping.tree_mapping.__repr__(), agg_from_tree_mapping.tree_mapping.__repr__() diff --git a/tests/base/tree/test_mapping.py b/tests/base/tree/test_mapping.py index c36d4c69..645e1f57 100644 --- a/tests/base/tree/test_mapping.py +++ b/tests/base/tree/test_mapping.py @@ -2,14 +2,11 @@ # -*- coding: utf-8 -*- from unittest import TestCase -from mock import Mock from pandagg.exceptions import AbsentMappingFieldError -from pandagg.interactive._field_agg_factory import field_classes_per_name from pandagg.node.mapping.abstract import Field from pandagg.node.mapping.field_datatypes import Keyword, Object, Text, Nested, Integer from pandagg.tree.mapping import Mapping -from pandagg.interactive.mapping import IMapping from tests.base.mapping_example import MAPPING, EXPECTED_MAPPING_TREE_REPR @@ -135,84 +132,3 @@ def test_node_path(self): self.assertIsInstance(node, Field) self.assertEqual(node.name, 'support_test') self.assertEqual(mapping_tree.node_path(node.identifier), 'local_metrics.dataset.support_test') - - def test_mapping_aggregations(self): - mapping_tree = Mapping(from_=MAPPING) - # check that leaves are expanded, based on 'field_name' attribute of nodes - mapping = IMapping(tree=mapping_tree, depth=1) - for field_name in ('classification_type', 'date', 'global_metrics', 'id', 'language', 'local_metrics', 'workflow'): - self.assertTrue(hasattr(mapping, field_name)) - - workflow = mapping.workflow - # Check that calling a tree will return its root node. - workflow_node = workflow() - self.assertTrue(isinstance(workflow_node, Field)) - - def test_client_bound(self): - """Check that when reaching leaves (fields without children) leaves have the "a" attribute that can generate - aggregations on that field type. - """ - client_mock = Mock(spec=['search']) - es_response_mock = { - "_shards": { - "failed": 0, - "successful": 135, - "total": 135 - }, - "aggregations": { - "terms_agg": { - "buckets": [ - { - "doc_count": 25, - "key": 1 - }, - { - "doc_count": 50, - "key": 2 - } - ], - "doc_count_error_upper_bound": 0, - "sum_other_doc_count": 4 - } - }, - "hits": { - "hits": [], - "max_score": 0.0, - "total": 300 - }, - "timed_out": False, - "took": 30 - } - client_mock.search = Mock(return_value=es_response_mock) - - mapping_tree = Mapping(from_=MAPPING) - client_bound_mapping = IMapping( - client=client_mock, - tree=mapping_tree, - depth=1, - index_name='classification_report_index_name' - ) - - workflow_field = client_bound_mapping.workflow - self.assertTrue(hasattr(workflow_field, 'a')) - # workflow type is String - self.assertIsInstance(workflow_field.a, field_classes_per_name['keyword']) - - response = workflow_field.a.terms( - size=20, - output=None, - query={'term': {'classification_type': 'multiclass'}} - ) - self.assertEqual(response, [ - (1, {"doc_count": 25, "key": 1}), - (2, {"doc_count": 50, "key": 2}), - ]) - client_mock.search.assert_called_once() - client_mock.search.assert_called_with( - body={ - 'aggs': {'terms_agg': {'terms': {'field': 'workflow', 'size': 20}}}, - 'size': 0, - 'query': {'term': {'classification_type': 'multiclass'}} - }, - index='classification_report_index_name' - )