From 14e9c62c200e08e49c695c4e4a138712421e71cd Mon Sep 17 00:00:00 2001 From: Leonard Binet Date: Mon, 4 May 2020 09:51:14 +0200 Subject: [PATCH 1/3] aggregations serialization tabular - choice of separator --- pandagg/response.py | 6 ++++-- tests/test_response.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandagg/response.py b/pandagg/response.py index bf378718..93d980ef 100644 --- a/pandagg/response.py +++ b/pandagg/response.py @@ -202,6 +202,7 @@ def serialize_as_tabular( row_as_tuple=False, grouped_by=None, expand_columns=True, + expand_sep='|', normalize=True, with_single_bucket_groups=False, ): @@ -260,13 +261,14 @@ def serialize_as_tabular( normalize=normalize, total_agg=grouping_agg, expand_columns=expand_columns, + expand_sep=expand_sep, ), ) for row_index, row_values in index_values ] return index_names, rows - def serialize_columns(self, row_data, normalize, expand_columns, total_agg=None): + def serialize_columns(self, row_data, normalize, expand_columns, expand_sep, total_agg=None): # extract value (usually 'doc_count') of grouping agg node result = {} if total_agg is not None and not isinstance(total_agg, ShadowRoot): @@ -285,7 +287,7 @@ def serialize_columns(self, row_data, normalize, expand_columns, total_agg=None) result[child.name] = child.extract_bucket_value(row_data[child.name]) elif expand_columns: for key, bucket in child.extract_buckets(row_data[child.name]): - result["%s|%s" % (child.name, key)] = child.extract_bucket_value( + result["%s%s%s" % (child.name, expand_sep, key)] = child.extract_bucket_value( bucket ) elif normalize: diff --git a/tests/test_response.py b/tests/test_response.py index 8b2d9aa6..79dc3514 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -11,7 +11,7 @@ from pandagg.tree.aggs import Aggs import tests.testing_samples.data_sample as sample -from pandagg.utils import equal_queries, ordered +from pandagg.utils import ordered from tests.testing_samples.mapping_example import MAPPING From b65c36d6cdc7d663e369340c1acd7733becc9456 Mon Sep 17 00:00:00 2001 From: Leonard Binet Date: Fri, 8 May 2020 22:14:04 +0200 Subject: [PATCH 2/3] clarifications --- examples/imdb/load.py | 79 +++++++++++++---------- pandagg/discovery.py | 2 +- pandagg/interactive/_field_agg_factory.py | 50 +++++++------- pandagg/interactive/mapping.py | 13 +--- pandagg/node/aggs/abstract.py | 4 +- pandagg/node/aggs/bucket.py | 14 +++- pandagg/response.py | 30 ++++++--- pandagg/search.py | 10 +-- pandagg/tree/aggs.py | 3 +- pandagg/tree/mapping.py | 5 +- pandagg/tree/query.py | 8 +-- tests/interactive/test_mapping.py | 12 ++-- tests/test_response.py | 41 ++++++++++++ 13 files changed, 162 insertions(+), 109 deletions(-) diff --git a/examples/imdb/load.py b/examples/imdb/load.py index 6c3b3bf7..eef6f375 100644 --- a/examples/imdb/load.py +++ b/examples/imdb/load.py @@ -2,43 +2,52 @@ from os.path import join from elasticsearch import Elasticsearch, helpers from examples.imdb.conf import ES_HOST, DATA_DIR +from pandagg.node.mapping.field_datatypes import ( + Keyword, + Text, + Date, + Float, + Nested, + Integer, +) +from pandagg.tree.mapping import Mapping index_name = "movies" -mapping = { - "properties": { - "movie_id": {"type": "integer"}, - "name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}, - "year": {"type": "date", "format": "yyyy"}, - "rank": {"type": "float"}, - # array - "genres": {"type": "keyword"}, - # nested - "roles": { - "type": "nested", - "properties": { - "role": {"type": "keyword"}, - "actor_id": {"type": "integer"}, - "gender": {"type": "keyword"}, - "first_name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}, - "last_name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}, - "full_name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}, - }, - }, - # nested - "directors": { - "type": "nested", - "properties": { - "director_id": {"type": "integer"}, - "first_name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}, - "last_name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}, - "full_name": {"type": "text", "fields": {"raw": {"type": "keyword"}}}, - "genres": {"type": "keyword"}, - }, - }, - "nb_directors": {"type": "integer"}, - "nb_roles": {"type": "integer"}, - } -} +mapping = Mapping( + properties=[ + Keyword("movie_id"), + Text("name", fields=Keyword("raw")), + Date("year"), + Float("rank"), + Keyword("genres"), + Nested( + "roles", + properties=[ + Keyword("role"), + Keyword("actor_id"), + Keyword("gender"), + Text("first_name", copy_to="roles.full_name", fields=Keyword("raw")), + Text("last_name", copy_to="roles.full_name", fields=Keyword("raw")), + Text("full_name"), + ], + ), + Nested( + "directors", + properties=[ + Keyword("role"), + Keyword("director_id"), + Keyword("gender"), + Text( + "first_name", copy_to="directors.full_name", fields=Keyword("raw") + ), + Text("last_name", copy_to="directors.full_name", fields=Keyword("raw")), + Text("full_name"), + ], + ), + Integer("nb_directors"), + Integer("nb_roles"), + ] +).to_dict() def bulk_index(client, docs): diff --git a/pandagg/discovery.py b/pandagg/discovery.py index 5649d058..d5834af4 100644 --- a/pandagg/discovery.py +++ b/pandagg/discovery.py @@ -37,7 +37,7 @@ def __init__(self, name, settings, mapping, aliases, client=None): self.name = name self.settings = settings self._mapping = mapping - self.mapping = IMapping(mapping, client=client, index_name=name) + self.mapping = IMapping(mapping, client=client, index=name) self.aliases = aliases def search(self): diff --git a/pandagg/interactive/_field_agg_factory.py b/pandagg/interactive/_field_agg_factory.py index 794ac1d5..ba24787b 100644 --- a/pandagg/interactive/_field_agg_factory.py +++ b/pandagg/interactive/_field_agg_factory.py @@ -24,50 +24,46 @@ def list_available_aggs_on_field(field_type): ] -def field_klass_init(self, mapping_tree, client, field, index_name): +def field_klass_init(self, mapping_tree, client, field, index): self._mapping_tree = mapping_tree self._client = client self._field = field - self._index_name = index_name + self._index = index def aggregator_factory(agg_klass): - def aggregator( - self, index=None, execute=True, raw_output=False, query=None, **kwargs - ): + def aggregator(self, index=None, raw_output=False, query=None, **kwargs): node = agg_klass(name="%s_agg" % agg_klass.KEY, field=self._field, **kwargs) - return self._operate(node, index, execute, raw_output, query) + return self._operate(node, index, raw_output, query) aggregator.__doc__ = agg_klass.__init__.__doc__ or agg_klass.__doc__ return aggregator -def _operate(self, agg_node, index, execute, raw_output, query): - index = index or self._index_name +def _operate(self, agg_node, index, raw_output, query): + index = index or self._index aggregation = {agg_node.name: agg_node.to_dict()} nesteds = self._mapping_tree.list_nesteds_at_field(self._field) or [] for nested in nesteds: aggregation = {nested: {"nested": {"path": nested}, "aggs": aggregation}} - if self._client is not None and execute: - body = {"aggs": aggregation, "size": 0} - if query is not None: - body["query"] = query - raw_response = self._client.search(index=index, body=body)["aggregations"] - for nested in nesteds: - raw_response = raw_response[nested] - result = list(agg_node.extract_buckets(raw_response[agg_node.name])) - - if raw_output: - return result - try: - import pandas as pd - except ImportError: - return result - keys = map(itemgetter(0), result) - raw_values = map(itemgetter(1), result) - return pd.DataFrame(index=keys, data=raw_values) - return aggregation + body = {"aggs": aggregation, "size": 0} + if query is not None: + body["query"] = query + raw_response = self._client.search(index=index, body=body)["aggregations"] + for nested in nesteds: + raw_response = raw_response[nested] + result = list(agg_node.extract_buckets(raw_response[agg_node.name])) + + if raw_output: + return result + try: + import pandas as pd + except ImportError: + return result + keys = map(itemgetter(0), result) + raw_values = map(itemgetter(1), result) + return pd.DataFrame(index=keys, data=raw_values) def field_type_klass_factory(field_type): diff --git a/pandagg/interactive/mapping.py b/pandagg/interactive/mapping.py index 64f97d34..7bab2dcb 100644 --- a/pandagg/interactive/mapping.py +++ b/pandagg/interactive/mapping.py @@ -14,7 +14,7 @@ class IMapping(TreeBasedObj): def __init__(self, *args, **kwargs): self._client = kwargs.pop("client", None) - self._index_name = kwargs.pop("index_name", None) + self._index = kwargs.pop("index", None) root_path = kwargs.pop("root_path", None) depth = kwargs.pop("depth", 1) initial_tree = kwargs.pop("initial_tree", None) @@ -25,13 +25,6 @@ def __init__(self, *args, **kwargs): # if we reached a leave, add aggregation capabilities based on reached mapping type self._set_agg_property_if_required() - def _bind(self, client, index_name=None): - self._client = client - if index_name is not None: - self._index_name = index_name - self._set_agg_property_if_required() - return self - def _clone(self, nid, root_path, depth): return IMapping( self._tree.subtree(nid), @@ -39,7 +32,7 @@ def _clone(self, nid, root_path, depth): root_path=root_path, depth=depth, initial_tree=self._initial_tree, - index_name=self._index_name, + index=self._index, ) def _set_agg_property_if_required(self): @@ -50,7 +43,7 @@ def _set_agg_property_if_required(self): mapping_tree=self._initial_tree, client=self._client, field=self._initial_tree.node_path(field_node.identifier), - index_name=self._index_name, + index=self._index, ) def __call__(self, *args, **kwargs): diff --git a/pandagg/node/aggs/abstract.py b/pandagg/node/aggs/abstract.py index eb80219d..dd9af88e 100644 --- a/pandagg/node/aggs/abstract.py +++ b/pandagg/node/aggs/abstract.py @@ -50,10 +50,10 @@ def _type_deserializer(cls, name_or_agg, **params): - either Agg instance if provided """ # hack for now - if isinstance(name_or_agg, Tree) and name_or_agg.__class__.__name__ == "Agg": + if isinstance(name_or_agg, Tree) and name_or_agg.__class__.__name__ == "Aggs": if params: raise ValueError( - "Cannot accept parameters when passing in an Agg object." + "Cannot accept parameters when passing in an Aggs object." ) return name_or_agg diff --git a/pandagg/node/aggs/bucket.py b/pandagg/node/aggs/bucket.py index e00c5663..0460f2fa 100644 --- a/pandagg/node/aggs/bucket.py +++ b/pandagg/node/aggs/bucket.py @@ -34,9 +34,17 @@ class Filter(UniqueBucketAgg): KEY = "filter" VALUE_ATTRS = ["doc_count"] - def __init__(self, name, filter, meta=None, aggs=None): - self.filter = filter.copy() - super(Filter, self).__init__(name=name, meta=meta, aggs=aggs, **filter) + def __init__(self, name, filter=None, meta=None, aggs=None, **kwargs): + if (filter is not None) != (not kwargs): + raise ValueError( + 'Filter aggregation requires exactly one of "filter" or "kwargs"' + ) + if filter: + filter_ = filter.copy() + else: + filter_ = kwargs.copy() + self.filter = filter_ + super(Filter, self).__init__(name=name, meta=meta, aggs=aggs, **filter_) def get_filter(self, key): return self.filter diff --git a/pandagg/response.py b/pandagg/response.py index 93d980ef..0ccb221f 100644 --- a/pandagg/response.py +++ b/pandagg/response.py @@ -189,12 +189,22 @@ def _normalize_buckets(self, agg_response, agg_name=None): yield result def _grouping_agg(self, name=None): - """return agg node or None""" - name = self.__aggs.deepest_linear_bucket_agg if name is None else name + """Return aggregation node that used as grouping node.""" + # if provided + if name is not None: + if name not in self.__aggs: + raise ValueError("Cannot group by <%s>, agg node does not exist" % name) + if not self.__aggs._is_eligible_grouping_node(name): + raise ValueError( + "Cannot group by <%s>, not a valid grouping aggregation" % name + ) + return self.__aggs.get(name) + + if isinstance(self.__aggs.get(self.__aggs.root), ShadowRoot): + return None + name = self.__aggs.deepest_linear_bucket_agg if name is None: return None - if name not in self.__aggs: - raise ValueError("Cannot group by <%s>, agg node does not exist" % name) return self.__aggs.get(name) def serialize_as_tabular( @@ -202,7 +212,7 @@ def serialize_as_tabular( row_as_tuple=False, grouped_by=None, expand_columns=True, - expand_sep='|', + expand_sep="|", normalize=True, with_single_bucket_groups=False, ): @@ -268,7 +278,9 @@ def serialize_as_tabular( ] return index_names, rows - def serialize_columns(self, row_data, normalize, expand_columns, expand_sep, total_agg=None): + def serialize_columns( + self, row_data, normalize, expand_columns, expand_sep, total_agg=None + ): # extract value (usually 'doc_count') of grouping agg node result = {} if total_agg is not None and not isinstance(total_agg, ShadowRoot): @@ -287,9 +299,9 @@ def serialize_columns(self, row_data, normalize, expand_columns, expand_sep, tot result[child.name] = child.extract_bucket_value(row_data[child.name]) elif expand_columns: for key, bucket in child.extract_buckets(row_data[child.name]): - result["%s%s%s" % (child.name, expand_sep, key)] = child.extract_bucket_value( - bucket - ) + result[ + "%s%s%s" % (child.name, expand_sep, key) + ] = child.extract_bucket_value(bucket) elif normalize: result[child.name] = next( self._normalize_buckets(row_data, child.name), None diff --git a/pandagg/search.py b/pandagg/search.py index 073da5e4..9bf9ee16 100644 --- a/pandagg/search.py +++ b/pandagg/search.py @@ -186,12 +186,12 @@ def __getitem__(self, n): s._params["size"] = 1 return s - def from_(self, from_): - s = self._clone() - s._params["from"] = from_ - return s - def size(self, size): + """Equivalent to:: + + s = Search().params(size=size) + + """ s = self._clone() s._params["size"] = size return s diff --git a/pandagg/tree/aggs.py b/pandagg/tree/aggs.py index 1ba5914a..7ca1a795 100644 --- a/pandagg/tree/aggs.py +++ b/pandagg/tree/aggs.py @@ -66,9 +66,8 @@ def _clone_init(self, deep=False): return Aggs(mapping=self.mapping.clone(deep=deep)) def _is_eligible_grouping_node(self, nid): + """Return whether node can be used as grouping node.""" node = self.get(nid) - if isinstance(node, ShadowRoot): - return False if not isinstance(node, BucketAggNode): return False # special aggregations not returning anything diff --git a/pandagg/tree/mapping.py b/pandagg/tree/mapping.py index de9e4555..a0ca3ec7 100644 --- a/pandagg/tree/mapping.py +++ b/pandagg/tree/mapping.py @@ -23,7 +23,10 @@ class Mapping(Tree): def __init__(self, *args, **kwargs): super(Mapping, self).__init__() if (args and kwargs) or len(args) > 1: - raise ValueError() + raise ValueError( + "Invalid mapping declaration. Got:\n*args: %s\n**kwargs: %s" + % (args, kwargs) + ) if args: arg = args[0] if isinstance(arg, Mapping): diff --git a/pandagg/tree/query.py b/pandagg/tree/query.py index 116f3683..043803e2 100644 --- a/pandagg/tree/query.py +++ b/pandagg/tree/query.py @@ -173,13 +173,7 @@ def _insert_into( child=None, child_param=None, ): - """Insert node in query. - :param inserted: - :param mode: - :param parent: - :param parent_param: - :param child: - :param child_param: + """Insert element (node or tree) in query. If compound query with existing identifier: merge according to mode (place in-between parent and child). If no parent nor child is provided, place on top (wrapped in bool-must if necessary). diff --git a/tests/interactive/test_mapping.py b/tests/interactive/test_mapping.py index aaabd115..914ec209 100644 --- a/tests/interactive/test_mapping.py +++ b/tests/interactive/test_mapping.py @@ -64,9 +64,9 @@ def test_imapping_init(self): index_name = "classification_report_index_name" # from dict - im1 = IMapping(mapping_dict, client=client_mock, index_name=index_name) + im1 = IMapping(mapping_dict, client=client_mock, index=index_name) # from tree - im2 = IMapping(mapping_tree, client=client_mock, index_name=index_name) + im2 = IMapping(mapping_tree, client=client_mock, index=index_name) # from nodes im3 = IMapping( @@ -89,13 +89,13 @@ def test_imapping_init(self): }, dynamic=False, client=client_mock, - index_name=index_name, + index=index_name, ) for i, m in enumerate((im1, im2, im3)): self.assertEqual( m._tree.serialize(), mapping_dict, "failed at m%d" % (i + 1) ) - self.assertEqual(m._index_name, index_name) + self.assertEqual(m._index, index_name) self.assertIs(m._client, client_mock) def test_client_bound(self): @@ -123,9 +123,7 @@ def test_client_bound(self): mapping_tree = Mapping(MAPPING) client_bound_mapping = IMapping( - mapping_tree, - client=client_mock, - index_name="classification_report_index_name", + mapping_tree, client=client_mock, index="classification_report_index_name", ) workflow_field = client_bound_mapping.workflow diff --git a/tests/test_response.py b/tests/test_response.py index 79dc3514..b34f55f6 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -173,6 +173,47 @@ def test_parse_as_tabular(self): ], ) + def test_parse_as_tabular_multiple_roots(self): + # with multiple aggs at root + my_agg = Aggs( + { + "classification_type": {"terms": {"field": "classification_type"}}, + "avg_f1_score": { + "avg": {"field": "global_metrics.performance.test.micro.f1_score"} + }, + } + ) + + raw_response = { + "classification_type": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + {"key": "multiclass", "doc_count": 439}, + {"key": "multilabel", "doc_count": 433}, + ], + }, + "avg_f1_score": {"value": 0.815}, + } + index_names, index_values = Aggregations( + data=raw_response, aggs=my_agg, index=None, client=None, query=None, + ).serialize_as_tabular(row_as_tuple=True, expand_sep=" || ") + + self.assertEqual(index_names, []) + self.assertEqual( + index_values, + [ + ( + (), + { + "avg_f1_score": 0.815, + "classification_type || multiclass": 439, + "classification_type || multilabel": 433, + }, + ) + ], + ) + def test_parse_as_dataframe(self): my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING) df = Aggregations( From 46b2b4db1245547619256c4950510e77493c36fd Mon Sep 17 00:00:00 2001 From: Leonard Binet Date: Fri, 8 May 2020 23:47:07 +0200 Subject: [PATCH 3/3] aggs tests checking to_dict output --- docs/source/reference/pandagg.agg.rst | 7 - docs/source/reference/pandagg.client.rst | 7 - .../pandagg.interactive.abstract.rst | 7 - .../reference/pandagg.interactive.client.rst | 7 - .../reference/pandagg.interactive.index.rst | 7 - .../reference/pandagg.node.agg.abstract.rst | 7 - .../reference/pandagg.node.agg.bucket.rst | 7 - .../pandagg.node.agg.deserializer.rst | 7 - .../reference/pandagg.node.agg.metric.rst | 7 - .../reference/pandagg.node.agg.pipeline.rst | 7 - docs/source/reference/pandagg.node.agg.rst | 21 - .../pandagg.node.mapping.deserializer.rst | 7 - docs/source/reference/pandagg.node.mixins.rst | 7 - .../pandagg.node.query.deserializer.rst | 7 - docs/source/reference/pandagg.tree.agg.rst | 7 - pandagg/node/aggs/abstract.py | 4 +- pandagg/search.py | 14 + pandagg/tree/aggs.py | 62 +- tests/testing_samples/data_sample.py | 7 - tests/tree/test_aggs.py | 570 +++++++++++------- 20 files changed, 405 insertions(+), 371 deletions(-) delete mode 100644 docs/source/reference/pandagg.agg.rst delete mode 100644 docs/source/reference/pandagg.client.rst delete mode 100644 docs/source/reference/pandagg.interactive.abstract.rst delete mode 100644 docs/source/reference/pandagg.interactive.client.rst delete mode 100644 docs/source/reference/pandagg.interactive.index.rst delete mode 100644 docs/source/reference/pandagg.node.agg.abstract.rst delete mode 100644 docs/source/reference/pandagg.node.agg.bucket.rst delete mode 100644 docs/source/reference/pandagg.node.agg.deserializer.rst delete mode 100644 docs/source/reference/pandagg.node.agg.metric.rst delete mode 100644 docs/source/reference/pandagg.node.agg.pipeline.rst delete mode 100644 docs/source/reference/pandagg.node.agg.rst delete mode 100644 docs/source/reference/pandagg.node.mapping.deserializer.rst delete mode 100644 docs/source/reference/pandagg.node.mixins.rst delete mode 100644 docs/source/reference/pandagg.node.query.deserializer.rst delete mode 100644 docs/source/reference/pandagg.tree.agg.rst diff --git a/docs/source/reference/pandagg.agg.rst b/docs/source/reference/pandagg.agg.rst deleted file mode 100644 index 3a896741..00000000 --- a/docs/source/reference/pandagg.agg.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.agg module -================== - -.. automodule:: pandagg.agg - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.client.rst b/docs/source/reference/pandagg.client.rst deleted file mode 100644 index 4c70082a..00000000 --- a/docs/source/reference/pandagg.client.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.client module -===================== - -.. automodule:: pandagg.client - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.interactive.abstract.rst b/docs/source/reference/pandagg.interactive.abstract.rst deleted file mode 100644 index 9c6710fa..00000000 --- a/docs/source/reference/pandagg.interactive.abstract.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.interactive.abstract module -=================================== - -.. automodule:: pandagg.interactive.abstract - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.interactive.client.rst b/docs/source/reference/pandagg.interactive.client.rst deleted file mode 100644 index 910fd0ce..00000000 --- a/docs/source/reference/pandagg.interactive.client.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.interactive.client module -================================= - -.. automodule:: pandagg.interactive.client - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.interactive.index.rst b/docs/source/reference/pandagg.interactive.index.rst deleted file mode 100644 index 9f8a9c8c..00000000 --- a/docs/source/reference/pandagg.interactive.index.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.interactive.index module -================================ - -.. automodule:: pandagg.interactive.index - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.node.agg.abstract.rst b/docs/source/reference/pandagg.node.agg.abstract.rst deleted file mode 100644 index 20ed2e40..00000000 --- a/docs/source/reference/pandagg.node.agg.abstract.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.node.agg.abstract module -================================ - -.. automodule:: pandagg.node.agg.abstract - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.node.agg.bucket.rst b/docs/source/reference/pandagg.node.agg.bucket.rst deleted file mode 100644 index 48e3ed41..00000000 --- a/docs/source/reference/pandagg.node.agg.bucket.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.node.agg.bucket module -============================== - -.. automodule:: pandagg.node.agg.bucket - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.node.agg.deserializer.rst b/docs/source/reference/pandagg.node.agg.deserializer.rst deleted file mode 100644 index fcd2acc7..00000000 --- a/docs/source/reference/pandagg.node.agg.deserializer.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.node.agg.deserializer module -==================================== - -.. automodule:: pandagg.node.agg.deserializer - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.node.agg.metric.rst b/docs/source/reference/pandagg.node.agg.metric.rst deleted file mode 100644 index 14771923..00000000 --- a/docs/source/reference/pandagg.node.agg.metric.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.node.agg.metric module -============================== - -.. automodule:: pandagg.node.agg.metric - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.node.agg.pipeline.rst b/docs/source/reference/pandagg.node.agg.pipeline.rst deleted file mode 100644 index 08651f5f..00000000 --- a/docs/source/reference/pandagg.node.agg.pipeline.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.node.agg.pipeline module -================================ - -.. automodule:: pandagg.node.agg.pipeline - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.node.agg.rst b/docs/source/reference/pandagg.node.agg.rst deleted file mode 100644 index 2e585566..00000000 --- a/docs/source/reference/pandagg.node.agg.rst +++ /dev/null @@ -1,21 +0,0 @@ -pandagg.node.agg package -======================== - -Submodules ----------- - -.. toctree:: - :maxdepth: 8 - - pandagg.node.agg.abstract - pandagg.node.agg.bucket - pandagg.node.agg.metric - pandagg.node.agg.pipeline - -Module contents ---------------- - -.. automodule:: pandagg.node.agg - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.node.mapping.deserializer.rst b/docs/source/reference/pandagg.node.mapping.deserializer.rst deleted file mode 100644 index 5dcbf7b6..00000000 --- a/docs/source/reference/pandagg.node.mapping.deserializer.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.node.mapping.deserializer module -======================================== - -.. automodule:: pandagg.node.mapping.deserializer - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.node.mixins.rst b/docs/source/reference/pandagg.node.mixins.rst deleted file mode 100644 index b3d0927e..00000000 --- a/docs/source/reference/pandagg.node.mixins.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.node.mixins module -========================== - -.. automodule:: pandagg.node.mixins - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.node.query.deserializer.rst b/docs/source/reference/pandagg.node.query.deserializer.rst deleted file mode 100644 index d2ade350..00000000 --- a/docs/source/reference/pandagg.node.query.deserializer.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.node.query.deserializer module -====================================== - -.. automodule:: pandagg.node.query.deserializer - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/source/reference/pandagg.tree.agg.rst b/docs/source/reference/pandagg.tree.agg.rst deleted file mode 100644 index a0e16fe3..00000000 --- a/docs/source/reference/pandagg.tree.agg.rst +++ /dev/null @@ -1,7 +0,0 @@ -pandagg.tree.agg module -======================= - -.. automodule:: pandagg.tree.agg - :members: - :undoc-members: - :show-inheritance: diff --git a/pandagg/node/aggs/abstract.py b/pandagg/node/aggs/abstract.py index dd9af88e..db1a5ea1 100644 --- a/pandagg/node/aggs/abstract.py +++ b/pandagg/node/aggs/abstract.py @@ -103,11 +103,13 @@ def _type_deserializer(cls, name_or_agg, **params): if not isinstance(name_or_agg, string_types): raise ValueError("Invalid") # "tags", size=10 (by default apply a terms agg) - if "name" not in params: + if "name" not in params and "field" not in params: return cls.get_dsl_class("terms")( name=name_or_agg, field=name_or_agg, **params ) # "terms", field="tags", name="per_tags" + if "name" not in params: + raise ValueError("Aggregation expects a 'name'. Got %s." % params) return cls.get_dsl_class(name_or_agg)(**params) def line_repr(self, depth, **kwargs): diff --git a/pandagg/search.py b/pandagg/search.py index 9bf9ee16..478986ff 100644 --- a/pandagg/search.py +++ b/pandagg/search.py @@ -118,36 +118,50 @@ def query(self, *args, **kwargs): s._query = s._query.query(*args, **kwargs) return s + query.__doc__ = Query.query.__doc__ + def filter(self, *args, **kwargs): s = self._clone() s._query = s._query.filter(*args, **kwargs) return s + filter.__doc__ = Query.filter.__doc__ + def must_not(self, *args, **kwargs): s = self._clone() s._query = s._query.must_not(*args, **kwargs) return s + must_not.__doc__ = Query.must_not.__doc__ + def should(self, *args, **kwargs): s = self._clone() s._query = s._query.should(*args, **kwargs) return s + should.__doc__ = Query.should.__doc__ + def must(self, *args, **kwargs): s = self._clone() s._query = s._query.must(*args, **kwargs) return s + must.__doc__ = Query.must.__doc__ + def aggs(self, *args, **kwargs): s = self._clone() s._aggs = s._aggs.aggs(*args, **kwargs) return s + aggs.__doc__ = Aggs.aggs.__doc__ + def groupby(self, *args, **kwargs): s = self._clone() s._aggs = s._aggs.groupby(*args, **kwargs) return s + groupby.__doc__ = Aggs.groupby.__doc__ + def __iter__(self): """ Iterate over the hits. diff --git a/pandagg/tree/aggs.py b/pandagg/tree/aggs.py index 7ca1a795..a1ef7803 100644 --- a/pandagg/tree/aggs.py +++ b/pandagg/tree/aggs.py @@ -3,8 +3,8 @@ from __future__ import unicode_literals +import json -from builtins import str as text from future.utils import python_2_unicode_compatible from pandagg.tree._tree import Tree @@ -120,7 +120,6 @@ def _validate_aggs_parent_id(self, pid): return pid leaves = self.leaves(id_only=False) # root - # TODO if len(leaves) == 0: return None @@ -132,8 +131,7 @@ def _validate_aggs_parent_id(self, pid): return leaves[0].identifier def groupby(self, *args, **kwargs): - """Arrange passed aggregations in `by` arguments "vertically" (nested manner), above or below another agg - clause. + r"""Arrange passed aggregations in vertical/nested manner, above or below another agg clause. Given the initial aggregation:: @@ -164,10 +162,48 @@ def groupby(self, *args, **kwargs): A──> B : KO, ambiguous, must precise either A, B or C └──> C - :param by: aggregation(s) clauses to insert "vertically" - :param insert_below: parent aggregation id under which these aggregations should be placed - :param insert_above: aggregation id above which these aggregations should be placed - :param kwargs: agg body arguments when using "string" syntax for terms aggregation + + Accepted declarations for single aggregation: + + Official DSL like: + + >>> Aggs().groupby('terms', name='per_user_id', field='user_id') + {"terms_on_my_field":{"terms":{"field":"some_field"}}} + + Passing a dict: + + >>> Aggs().groupby({"terms_on_my_field":{"terms":{"field":"some_field"}}}) + {"terms_on_my_field":{"terms":{"field":"some_field"}}} + + + Using DSL class: + + >>> from pandagg.aggs import Terms + >>> Aggs().groupby(Terms('terms_on_my_field', field='some_field')) + {"terms_on_my_field":{"terms":{"field":"some_field"}}} + + Shortcut syntax for terms aggregation: creates a terms aggregation, using field as aggregation name + + >>> Aggs().groupby('some_field') + {"some_field":{"terms":{"field":"some_field"}}} + + Using a Aggs object: + + >>> Aggs().groupby(Aggs('terms', name='per_user_id', field='user_id')) + {"terms_on_my_field":{"terms":{"field":"some_field"}}} + + Accepted declarations for multiple aggregations: + + + :Keyword Arguments: + * *insert_below* (``string``) -- + Parent aggregation name under which these aggregations should be placed + * *insert_above* (``string``) -- + Aggregation name above which these aggregations should be placed + + * remaining kwargs: + Used as body in aggregation + :rtype: pandagg.aggs.Aggs """ insert_below = kwargs.pop("insert_below", None) @@ -182,12 +218,16 @@ def groupby(self, *args, **kwargs): # groupby({}, {}) if len(args) > 1: if kwargs: - raise ValueError() + raise ValueError( + "Kwargs not allowed when passing multiple aggregations in args." + ) inserted_aggs = [self.deserialize(arg) for arg in args] # groupby([{}, {}]) elif len(args) == 1 and isinstance(args[0], (list, tuple)): if kwargs: - raise ValueError() + raise ValueError( + "Kwargs not allowed when passing multiple aggregations in args." + ) inserted_aggs = [self.deserialize(arg) for arg in args[0]] # groupby({}) # groupby(Terms()) @@ -378,4 +418,4 @@ def _insert_node_below(self, node, parent_id, with_children=True): ) def __str__(self): - return "\n%s" % text(self.show()) + return json.dumps(self.to_dict(), indent=2) diff --git a/tests/testing_samples/data_sample.py b/tests/testing_samples/data_sample.py index a7ce3554..d1631b8b 100644 --- a/tests/testing_samples/data_sample.py +++ b/tests/testing_samples/data_sample.py @@ -11,13 +11,6 @@ from tests.testing_samples.mapping_example import MAPPING -EXPECTED_REPR = """ -[classification_type] terms -└── [global_metrics.field.name] terms - ├── [avg_f1_micro] avg - └── [avg_nb_classes] avg -""" - EXPECTED_AGG_QUERY = { "classification_type": { "aggs": { diff --git a/tests/tree/test_aggs.py b/tests/tree/test_aggs.py index 27ce618f..cb6dd190 100644 --- a/tests/tree/test_aggs.py +++ b/tests/tree/test_aggs.py @@ -14,7 +14,6 @@ AbsentMappingFieldError, InvalidOperationMappingFieldError, ) -from pandagg.tree.mapping import Mapping from pandagg.node.aggs.bucket import DateHistogram, Terms, Filter from pandagg.node.aggs.metric import Avg, Min @@ -129,12 +128,24 @@ def test_add_node_with_mapping(self): ) self.assertEqual(len(with_mapping.list()), 3) self.assertEqual( - with_mapping.__str__(), - """ -[workflow] terms -└── [nested_below_workflow] nested - └── [local_f1_score] avg -""", + with_mapping.to_dict(), + { + "workflow": { + "aggs": { + "nested_below_workflow": { + "aggs": { + "local_f1_score": { + "avg": { + "field": "local_metrics.performance.test.f1_score" + } + } + }, + "nested": {"path": "local_metrics"}, + } + }, + "terms": {"field": "workflow"}, + } + }, ) self.assertIn("nested_below_workflow", with_mapping) nested_node = with_mapping.get("nested_below_workflow") @@ -149,13 +160,29 @@ def test_add_node_with_mapping(self): parent_id="workflow", ) self.assertEqual( - with_mapping.__str__(), - """ -[workflow] terms -└── [nested_below_workflow] nested - ├── [local_f1_score] avg - └── [local_precision] avg -""", + with_mapping.to_dict(), + { + "workflow": { + "aggs": { + "nested_below_workflow": { + "aggs": { + "local_f1_score": { + "avg": { + "field": "local_metrics.performance.test.f1_score" + } + }, + "local_precision": { + "avg": { + "field": "local_metrics.performance.test.precision" + } + }, + }, + "nested": {"path": "local_metrics"}, + } + }, + "terms": {"field": "workflow"}, + } + }, ) self.assertEqual(len(with_mapping.list()), 4) @@ -167,15 +194,37 @@ def test_add_node_with_mapping(self): ) self.assertEqual(len(with_mapping.list()), 6) self.assertEqual( - with_mapping.__str__(), - """ -[workflow] terms -└── [nested_below_workflow] nested - ├── [local_f1_score] avg - ├── [local_precision] avg - └── [reverse_nested_below_nested_below_workflow] reverse_nested - └── [language_terms] terms -""", + with_mapping.to_dict(), + { + "workflow": { + "aggs": { + "nested_below_workflow": { + "aggs": { + "local_f1_score": { + "avg": { + "field": "local_metrics.performance.test.f1_score" + } + }, + "local_precision": { + "avg": { + "field": "local_metrics.performance.test.precision" + } + }, + "reverse_nested_below_nested_below_workflow": { + "aggs": { + "language_terms": { + "terms": {"field": "language"} + } + }, + "reverse_nested": {}, + }, + }, + "nested": {"path": "local_metrics"}, + } + }, + "terms": {"field": "workflow"}, + } + }, ) def test_add_node_without_mapping(self): @@ -230,12 +279,29 @@ def test_paste_tree_with_mapping(self): {"week", "nested_below_week", "local_metrics.field_class.name"}, ) self.assertEqual( - initial_agg_1.__str__(), - """ -[week] date_histogram -└── [nested_below_week] nested - └── [local_metrics.field_class.name] terms -""", + initial_agg_1.to_dict(), + { + "week": { + "date_histogram": { + "field": "date", + "format": "yyyy-MM-dd", + "interval": "1w", + }, + "aggs": { + "nested_below_week": { + "nested": {"path": "local_metrics"}, + "aggs": { + "local_metrics.field_class.name": { + "terms": { + "field": "local_metrics.field_class.name", + "size": 10, + } + } + }, + } + }, + } + }, ) # without explicit nested @@ -269,12 +335,29 @@ def test_paste_tree_with_mapping(self): {"week", "nested_below_week", "local_metrics.field_class.name"}, ) self.assertEqual( - initial_agg_2.__str__(), - """ -[week] date_histogram -└── [nested_below_week] nested - └── [local_metrics.field_class.name] terms -""", + initial_agg_2.to_dict(), + { + "week": { + "date_histogram": { + "field": "date", + "format": "yyyy-MM-dd", + "interval": "1w", + }, + "aggs": { + "nested_below_week": { + "nested": {"path": "local_metrics"}, + "aggs": { + "local_metrics.field_class.name": { + "terms": { + "field": "local_metrics.field_class.name", + "size": 10, + } + } + }, + } + }, + } + }, ) def test_insert_tree_without_mapping(self): @@ -289,7 +372,6 @@ def test_insert_tree_without_mapping(self): } } }, - mapping=None, ) self.assertEqual({n.identifier for n in initial_agg_1.list()}, {"week"}) @@ -319,50 +401,29 @@ def test_insert_tree_without_mapping(self): {"week", "nested_below_week", "local_metrics.field_class.name"}, ) self.assertEqual( - initial_agg_1.__str__(), - """ -[week] date_histogram -└── [nested_below_week] nested - └── [local_metrics.field_class.name] terms -""", - ) - - # without explicit nested (will NOT add nested) - initial_agg_2 = Aggs( + initial_agg_1.to_dict(), { "week": { "date_histogram": { "field": "date", "format": "yyyy-MM-dd", "interval": "1w", - } + }, + "aggs": { + "nested_below_week": { + "nested": {"path": "local_metrics"}, + "aggs": { + "local_metrics.field_class.name": { + "terms": { + "field": "local_metrics.field_class.name", + "size": 10, + } + } + }, + } + }, } }, - mapping=None, - ) - self.assertEqual(to_id_set(initial_agg_2.list()), {"week"}) - - pasted_agg_2 = Aggs( - { - "local_metrics.field_class.name": { - "terms": {"field": "local_metrics.field_class.name", "size": 10} - } - } - ) - self.assertEqual( - to_id_set(pasted_agg_2.list()), {"local_metrics.field_class.name"} - ) - - initial_agg_2.insert_tree(pasted_agg_2, "week") - self.assertEqual( - to_id_set(initial_agg_2.list()), {"week", "local_metrics.field_class.name"} - ) - self.assertEqual( - initial_agg_2.__str__(), - """ -[week] date_histogram -└── [local_metrics.field_class.name] terms -""", ) def test_interpret_agg_string(self): @@ -457,46 +518,6 @@ def test_interpret_node(self): }, ) - def test_query_dict(self): - # empty - self.assertEqual(Aggs().to_dict(), {}) - - # single node - agg = Aggs() - node = Terms(name="root_agg", field="some_field", size=10) - agg.insert_node(node) - self.assertEqual( - agg.to_dict(), {"root_agg": {"terms": {"field": "some_field", "size": 10}}}, - ) - - # hierarchy - agg.insert_node( - Terms(name="other_name", field="other_field", size=30), "root_agg" - ) - agg.insert_node( - Avg(name="avg_some_other_field", field="some_other_field"), "root_agg" - ) - self.assertEqual( - agg.__str__(), - """ -[root_agg] terms -├── [avg_some_other_field] avg -└── [other_name] terms -""", - ) - self.assertEqual( - agg.to_dict(), - { - "root_agg": { - "aggs": { - "avg_some_other_field": {"avg": {"field": "some_other_field"}}, - "other_name": {"terms": {"field": "other_field", "size": 30}}, - }, - "terms": {"field": "some_field", "size": 10}, - } - }, - ) - def test_validate_aggs_parent_id(self): """ @@ -552,36 +573,11 @@ def test_validate_aggs_parent_id(self): # TODO - pipeline aggregation under metric agg - def test_agg_method(self): - pass - - def test_groupby_method(self): - pass - - def test_mapping_from_init(self): - agg_from_dict_mapping = Aggs(mapping=MAPPING) - agg_from_tree_mapping = Aggs(mapping=Mapping(MAPPING)) - self.assertIsInstance(agg_from_dict_mapping, Aggs) - self.assertIsInstance(agg_from_tree_mapping, Aggs) - self.assertEqual( - agg_from_dict_mapping.mapping.__repr__(), - agg_from_tree_mapping.mapping.__repr__(), - ) - self.assertEqual( - agg_from_dict_mapping.to_dict(), agg_from_tree_mapping.to_dict() - ) - - def test_init_from_dict(self): - my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING) - self.assertEqual(my_agg.to_dict(), sample.EXPECTED_AGG_QUERY) - self.assertEqual(my_agg.__str__(), sample.EXPECTED_REPR) - def test_init_from_node_hierarchy(self): node_hierarchy = sample.get_node_hierarchy() agg = Aggs(node_hierarchy, mapping=MAPPING) self.assertEqual(agg.to_dict(), sample.EXPECTED_AGG_QUERY) - self.assertEqual(agg.__str__(), sample.EXPECTED_REPR) # with nested node_hierarchy = DateHistogram( @@ -632,65 +628,119 @@ def test_init_from_node_hierarchy(self): }, ) self.assertEqual( - agg.__str__(), - """ -[week] date_histogram -└── [nested_below_week] nested - └── [local_metrics.field_class.name] terms - └── [min_f1_score] min -""", + agg.to_dict(), + { + "week": { + "aggs": { + "nested_below_week": { + "aggs": { + "local_metrics.field_class.name": { + "aggs": { + "min_f1_score": { + "min": { + "field": "local_metrics.performance.test.f1_score" + } + } + }, + "terms": { + "field": "local_metrics.field_class.name", + "size": 10, + }, + } + }, + "nested": {"path": "local_metrics"}, + } + }, + "date_histogram": {"field": "date", "interval": "1w"}, + } + }, ) - def test_groupby_and_agg(self): + def test_agg_init(self): agg = sample.get_wrapper_declared_agg() self.assertEqual(agg.to_dict(), sample.EXPECTED_AGG_QUERY) - self.assertEqual(agg.__str__(), sample.EXPECTED_REPR) def test_groupby_insert_below(self): a1 = Aggs( Terms("A", field="A", aggs=[Terms("B", field="B"), Terms("C", field="C")]) ) self.assertEqual( - a1.__repr__(), - """ -[A] terms -├── [B] terms -└── [C] terms -""", + a1.to_dict(), + { + "A": { + "terms": {"field": "A"}, + "aggs": { + "C": {"terms": {"field": "C"}}, + "B": {"terms": {"field": "B"}}, + }, + } + }, ) self.assertEqual( - a1.groupby(Terms("D", field="D"), insert_below="A").__repr__(), - """ -[A] terms -└── [D] terms - ├── [B] terms - └── [C] terms -""", + a1.groupby(Terms("D", field="D"), insert_below="A").to_dict(), + { + "A": { + "terms": {"field": "A"}, + "aggs": { + "D": { + "terms": {"field": "D"}, + "aggs": { + "B": {"terms": {"field": "B"}}, + "C": {"terms": {"field": "C"}}, + }, + } + }, + } + }, ) self.assertEqual( a1.groupby( [Terms("D", field="D"), Terms("E", field="E")], insert_below="A" - ).__repr__(), - """ -[A] terms -└── [D] terms - └── [E] terms - ├── [B] terms - └── [C] terms -""", + ).to_dict(), + { + "A": { + "terms": {"field": "A"}, + "aggs": { + "D": { + "terms": {"field": "D"}, + "aggs": { + "E": { + "terms": {"field": "E"}, + "aggs": { + "C": {"terms": {"field": "C"}}, + "B": {"terms": {"field": "B"}}, + }, + } + }, + } + }, + } + }, ) self.assertEqual( a1.groupby( Terms("D", field="D", aggs=Terms("E", field="E")), insert_below="A" - ).__repr__(), - """ -[A] terms -└── [D] terms - └── [E] terms - ├── [B] terms - └── [C] terms -""", + ).to_dict(), + { + "A": { + "terms": {"field": "A"}, + "aggs": { + "D": { + "terms": {"field": "D"}, + "aggs": { + "E": { + "terms": {"field": "E"}, + "aggs": { + "B": {"terms": {"field": "B"}}, + "C": {"terms": {"field": "C"}}, + }, + } + }, + } + }, + } + }, ) def test_groupby_insert_above(self): @@ -698,59 +748,101 @@ def test_groupby_insert_above(self): Terms("A", field="A", aggs=[Terms("B", field="B"), Terms("C", field="C")]) ) self.assertEqual( - a1.__repr__(), - """ -[A] terms -├── [B] terms -└── [C] terms -""", + a1.to_dict(), + { + "A": { + "terms": {"field": "A"}, + "aggs": { + "B": {"terms": {"field": "B"}}, + "C": {"terms": {"field": "C"}}, + }, + } + }, ) self.assertEqual( - a1.groupby(Terms("D", field="D"), insert_above="B").__repr__(), - """ -[A] terms -├── [C] terms -└── [D] terms - └── [B] terms -""", + a1.groupby(Terms("D", field="D"), insert_above="B").to_dict(), + { + "A": { + "terms": {"field": "A"}, + "aggs": { + "C": {"terms": {"field": "C"}}, + "D": { + "terms": {"field": "D"}, + "aggs": {"B": {"terms": {"field": "B"}}}, + }, + }, + } + }, ) self.assertEqual( a1.groupby( [Terms("D", field="D"), Terms("E", field="E")], insert_above="B" - ).__repr__(), - """ -[A] terms -├── [C] terms -└── [D] terms - └── [E] terms - └── [B] terms -""", + ).to_dict(), + { + "A": { + "terms": {"field": "A"}, + "aggs": { + "C": {"terms": {"field": "C"}}, + "D": { + "terms": {"field": "D"}, + "aggs": { + "E": { + "terms": {"field": "E"}, + "aggs": {"B": {"terms": {"field": "B"}}}, + } + }, + }, + }, + } + }, ) self.assertEqual( a1.groupby( Terms("D", field="D", aggs=Terms("E", field="E")), insert_above="B" - ).__repr__(), - """ -[A] terms -├── [C] terms -└── [D] terms - └── [E] terms - └── [B] terms -""", + ).to_dict(), + { + "A": { + "aggs": { + "C": {"terms": {"field": "C"}}, + "D": { + "aggs": { + "E": { + "aggs": {"B": {"terms": {"field": "B"}}}, + "terms": {"field": "E"}, + } + }, + "terms": {"field": "D"}, + }, + }, + "terms": {"field": "A"}, + } + }, ) # above root self.assertEqual( a1.groupby( Terms("D", field="D", aggs=Terms("E", field="E")), insert_above="A" - ).__repr__(), - """ -[D] terms -└── [E] terms - └── [A] terms - ├── [B] terms - └── [C] terms -""", + ).to_dict(), + { + "D": { + "terms": {"field": "D"}, + "aggs": { + "E": { + "terms": {"field": "E"}, + "aggs": { + "A": { + "terms": {"field": "A"}, + "aggs": { + "B": {"terms": {"field": "B"}}, + "C": {"terms": {"field": "C"}}, + }, + } + }, + } + }, + } + }, ) def test_agg_insert_below(self): @@ -758,34 +850,46 @@ def test_agg_insert_below(self): Terms("A", field="A", aggs=[Terms("B", field="B"), Terms("C", field="C")]) ) self.assertEqual( - a1.__repr__(), - """ -[A] terms -├── [B] terms -└── [C] terms -""", + a1.to_dict(), + { + "A": { + "terms": {"field": "A"}, + "aggs": { + "C": {"terms": {"field": "C"}}, + "B": {"terms": {"field": "B"}}, + }, + } + }, ) self.assertEqual( - a1.aggs(Terms("D", field="D"), insert_below="A").__repr__(), - """ -[A] terms -├── [B] terms -├── [C] terms -└── [D] terms -""", + a1.aggs(Terms("D", field="D"), insert_below="A").to_dict(), + { + "A": { + "aggs": { + "B": {"terms": {"field": "B"}}, + "C": {"terms": {"field": "C"}}, + "D": {"terms": {"field": "D"}}, + }, + "terms": {"field": "A"}, + } + }, ) self.assertEqual( a1.aggs( [Terms("D", field="D"), Terms("E", field="E")], insert_below="A" - ).__repr__(), - """ -[A] terms -├── [B] terms -├── [C] terms -├── [D] terms -└── [E] terms -""", + ).to_dict(), + { + "A": { + "aggs": { + "B": {"terms": {"field": "B"}}, + "C": {"terms": {"field": "C"}}, + "D": {"terms": {"field": "D"}}, + "E": {"terms": {"field": "E"}}, + }, + "terms": {"field": "A"}, + } + }, ) def test_applied_nested_path_at_node(self):