aggs tests checking to_dict output

alkemics · May 8, 2020 · 46b2b4d · 46b2b4d
1 parent b65c36d
commit 46b2b4d
Show file tree

Hide file tree

Showing 20 changed files with 405 additions and 371 deletions.
diff --git a/docs/source/reference/pandagg.agg.rst b/docs/source/reference/pandagg.agg.rst
diff --git a/docs/source/reference/pandagg.client.rst b/docs/source/reference/pandagg.client.rst
diff --git a/docs/source/reference/pandagg.interactive.abstract.rst b/docs/source/reference/pandagg.interactive.abstract.rst
diff --git a/docs/source/reference/pandagg.interactive.client.rst b/docs/source/reference/pandagg.interactive.client.rst
diff --git a/docs/source/reference/pandagg.interactive.index.rst b/docs/source/reference/pandagg.interactive.index.rst
diff --git a/docs/source/reference/pandagg.node.agg.abstract.rst b/docs/source/reference/pandagg.node.agg.abstract.rst
diff --git a/docs/source/reference/pandagg.node.agg.bucket.rst b/docs/source/reference/pandagg.node.agg.bucket.rst
diff --git a/docs/source/reference/pandagg.node.agg.deserializer.rst b/docs/source/reference/pandagg.node.agg.deserializer.rst
diff --git a/docs/source/reference/pandagg.node.agg.metric.rst b/docs/source/reference/pandagg.node.agg.metric.rst
diff --git a/docs/source/reference/pandagg.node.agg.pipeline.rst b/docs/source/reference/pandagg.node.agg.pipeline.rst
diff --git a/docs/source/reference/pandagg.node.agg.rst b/docs/source/reference/pandagg.node.agg.rst
diff --git a/docs/source/reference/pandagg.node.mapping.deserializer.rst b/docs/source/reference/pandagg.node.mapping.deserializer.rst
diff --git a/docs/source/reference/pandagg.node.mixins.rst b/docs/source/reference/pandagg.node.mixins.rst
diff --git a/docs/source/reference/pandagg.node.query.deserializer.rst b/docs/source/reference/pandagg.node.query.deserializer.rst
diff --git a/docs/source/reference/pandagg.tree.agg.rst b/docs/source/reference/pandagg.tree.agg.rst
diff --git a/pandagg/node/aggs/abstract.py b/pandagg/node/aggs/abstract.py
@@ -103,11 +103,13 @@ def _type_deserializer(cls, name_or_agg, **params):
         if not isinstance(name_or_agg, string_types):
             raise ValueError("Invalid")
         # "tags", size=10  (by default apply a terms agg)
-        if "name" not in params:
+        if "name" not in params and "field" not in params:
             return cls.get_dsl_class("terms")(
                 name=name_or_agg, field=name_or_agg, **params
             )
         # "terms", field="tags", name="per_tags"
+        if "name" not in params:
+            raise ValueError("Aggregation expects a 'name'. Got %s." % params)
         return cls.get_dsl_class(name_or_agg)(**params)
 
     def line_repr(self, depth, **kwargs):

diff --git a/pandagg/search.py b/pandagg/search.py
@@ -118,36 +118,50 @@ def query(self, *args, **kwargs):
         s._query = s._query.query(*args, **kwargs)
         return s
 
+    query.__doc__ = Query.query.__doc__
+
     def filter(self, *args, **kwargs):
         s = self._clone()
         s._query = s._query.filter(*args, **kwargs)
         return s
 
+    filter.__doc__ = Query.filter.__doc__
+
     def must_not(self, *args, **kwargs):
         s = self._clone()
         s._query = s._query.must_not(*args, **kwargs)
         return s
 
+    must_not.__doc__ = Query.must_not.__doc__
+
     def should(self, *args, **kwargs):
         s = self._clone()
         s._query = s._query.should(*args, **kwargs)
         return s
 
+    should.__doc__ = Query.should.__doc__
+
     def must(self, *args, **kwargs):
         s = self._clone()
         s._query = s._query.must(*args, **kwargs)
         return s
 
+    must.__doc__ = Query.must.__doc__
+
     def aggs(self, *args, **kwargs):
         s = self._clone()
         s._aggs = s._aggs.aggs(*args, **kwargs)
         return s
 
+    aggs.__doc__ = Aggs.aggs.__doc__
+
     def groupby(self, *args, **kwargs):
         s = self._clone()
         s._aggs = s._aggs.groupby(*args, **kwargs)
         return s
 
+    groupby.__doc__ = Aggs.groupby.__doc__
+
     def __iter__(self):
         """
         Iterate over the hits.

diff --git a/pandagg/tree/aggs.py b/pandagg/tree/aggs.py
@@ -3,8 +3,8 @@
 
 from __future__ import unicode_literals
 
+import json
 
-from builtins import str as text
 from future.utils import python_2_unicode_compatible
 
 from pandagg.tree._tree import Tree
@@ -120,7 +120,6 @@ def _validate_aggs_parent_id(self, pid):
             return pid
         leaves = self.leaves(id_only=False)
         # root
-        # TODO
         if len(leaves) == 0:
             return None
 
@@ -132,8 +131,7 @@ def _validate_aggs_parent_id(self, pid):
         return leaves[0].identifier
 
     def groupby(self, *args, **kwargs):
-        """Arrange passed aggregations in `by` arguments "vertically" (nested manner), above or below another agg
-        clause.
+        r"""Arrange passed aggregations in vertical/nested manner, above or below another agg clause.
 
         Given the initial aggregation::
 
@@ -164,10 +162,48 @@ def groupby(self, *args, **kwargs):
             A──> B      : KO, ambiguous, must precise either A, B or C
             └──> C
 
-        :param by: aggregation(s) clauses to insert "vertically"
-        :param insert_below: parent aggregation id under which these aggregations should be placed
-        :param insert_above: aggregation id above which these aggregations should be placed
-        :param kwargs: agg body arguments when using "string" syntax for terms aggregation
+
+        Accepted declarations for single aggregation:
+
+        Official DSL like:
+
+        >>> Aggs().groupby('terms', name='per_user_id', field='user_id')
+        {"terms_on_my_field":{"terms":{"field":"some_field"}}}
+
+        Passing a dict:
+
+        >>> Aggs().groupby({"terms_on_my_field":{"terms":{"field":"some_field"}}})
+        {"terms_on_my_field":{"terms":{"field":"some_field"}}}
+
+
+        Using DSL class:
+
+        >>> from pandagg.aggs import Terms
+        >>> Aggs().groupby(Terms('terms_on_my_field', field='some_field'))
+        {"terms_on_my_field":{"terms":{"field":"some_field"}}}
+
+        Shortcut syntax for terms aggregation: creates a terms aggregation, using field as aggregation name
+
+        >>> Aggs().groupby('some_field')
+        {"some_field":{"terms":{"field":"some_field"}}}
+
+        Using a Aggs object:
+
+        >>> Aggs().groupby(Aggs('terms', name='per_user_id', field='user_id'))
+        {"terms_on_my_field":{"terms":{"field":"some_field"}}}
+
+        Accepted declarations for multiple aggregations:
+
+
+        :Keyword Arguments:
+            * *insert_below* (``string``) --
+              Parent aggregation name under which these aggregations should be placed
+            * *insert_above* (``string``) --
+              Aggregation name above which these aggregations should be placed
+
+            * remaining kwargs:
+              Used as body in aggregation
+
         :rtype: pandagg.aggs.Aggs
         """
         insert_below = kwargs.pop("insert_below", None)
@@ -182,12 +218,16 @@ def groupby(self, *args, **kwargs):
         # groupby({}, {})
         if len(args) > 1:
             if kwargs:
-                raise ValueError()
+                raise ValueError(
+                    "Kwargs not allowed when passing multiple aggregations in args."
+                )
             inserted_aggs = [self.deserialize(arg) for arg in args]
         # groupby([{}, {}])
         elif len(args) == 1 and isinstance(args[0], (list, tuple)):
             if kwargs:
-                raise ValueError()
+                raise ValueError(
+                    "Kwargs not allowed when passing multiple aggregations in args."
+                )
             inserted_aggs = [self.deserialize(arg) for arg in args[0]]
         # groupby({})
         # groupby(Terms())
@@ -378,4 +418,4 @@ def _insert_node_below(self, node, parent_id, with_children=True):
         )
 
     def __str__(self):
-        return "<Aggregation>\n%s" % text(self.show())
+        return json.dumps(self.to_dict(), indent=2)
diff --git a/tests/testing_samples/data_sample.py b/tests/testing_samples/data_sample.py
@@ -11,13 +11,6 @@
 from tests.testing_samples.mapping_example import MAPPING
 
 
-EXPECTED_REPR = """<Aggregation>
-[classification_type] terms
-└── [global_metrics.field.name] terms
-    ├── [avg_f1_micro] avg
-    └── [avg_nb_classes] avg
-"""
-
 EXPECTED_AGG_QUERY = {
     "classification_type": {
         "aggs": {