From c71fd43da62351946c6e9ce4379a5393b22102ba Mon Sep 17 00:00:00 2001 From: Leonard Binet Date: Mon, 22 Jun 2020 08:17:31 +0200 Subject: [PATCH 1/2] black codestyle badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 966ecdb3..b13028eb 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ ![Python package](https://github.com/alkemics/pandagg/workflows/Python%202%20Tests/badge.svg) [![Coverage](https://codecov.io/github/alkemics/pandagg/coverage.svg?branch=master)](https://codecov.io/gh/alkemics/pandagg) [![Docs](https://readthedocs.org/projects/pandagg/badge/?version=latest&style=flat)](https://pandagg.readthedocs.io/en/latest/) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) ## What is it? From 1f970cf0e31c79b66f1e4a1087eec73a7d2f3165 Mon Sep 17 00:00:00 2001 From: Leonard Binet Date: Mon, 22 Jun 2020 08:59:43 +0200 Subject: [PATCH 2/2] user-guide on aggregations declaration --- .gitignore | 1 + docs/source/user-guide.rst | 126 ++++++++++++++++++++++++++++++++++--- pandagg/tree/aggs/aggs.py | 2 +- 3 files changed, 119 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index e8ac7865..174b1f4f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .* !.github !.pre-commit-config.yaml +_private/ *.py[co] *.egg *.egg-info diff --git a/docs/source/user-guide.rst b/docs/source/user-guide.rst index f5691571..bb8677af 100644 --- a/docs/source/user-guide.rst +++ b/docs/source/user-guide.rst @@ -20,8 +20,8 @@ The :class:`~pandagg.tree.query.abstract.Query` class provides : - ability to insert clauses at specific points - tree-like visual representation -Instantiation -============= +Declaration +=========== From native "dict" query ------------------------ @@ -112,8 +112,8 @@ All these classes inherit from :class:`~pandagg.tree.query.abstract.Query` and t >>> isinstance(q, Query) True -With single clause as flattened syntax --------------------------------------- +With flattened syntax +--------------------- In the flattened syntax, the query clause type is used as first argument: @@ -288,17 +288,125 @@ Aggregation The :class:`~pandagg.tree.aggs.aggs.Aggs` class provides : - multiple syntaxes to declare and udpate a aggregation -- clause validation (with nested clauses validation) -- ability to insert clauses at specific points +- aggregation clause validation +- ability to insert clauses at specific locations (and not just below last manipulated clause) + + +Declaration +=========== + +From native "dict" query +------------------------ + +Given the following aggregation: + + >>> expected_aggs = { + >>> "decade": { + >>> "histogram": {"field": "year", "interval": 10}, + >>> "aggs": { + >>> "genres": { + >>> "terms": {"field": "genres", "size": 3}, + >>> "aggs": { + >>> "max_nb_roles": { + >>> "max": {"field": "nb_roles"} + >>> }, + >>> "avg_rank": { + >>> "avg": {"field": "rank"} + >>> } + >>> } + >>> } + >>> } + >>> } + >>> } + +To declare :class:`~pandagg.tree.aggs.aggs.Aggs`, simply pass "dict" query as argument: + + >>> from pandagg.aggs import Aggs + >>> a = Aggs(expected_aggs) + +A visual representation of the query is available with :func:`~pandagg.tree.aggs.aggs.Aggs.show`: + + >>> a.show() + + decade + └── genres + ├── max_nb_roles + └── avg_rank + + +Call :func:`~pandagg.tree.aggs.aggs.Aggs.to_dict` to convert it to native dict: + + >>> a.to_dict() == expected_aggs + True + +With DSL classes +---------------- + +Pandagg provides a DSL to declare this query in a quite similar fashion: + + >>> from pandagg.aggs import Histogram, Terms, Max, Avg + >>> + >>> a = Histogram("decade", field='year', interval=10, aggs=[ + >>> Terms("genres", field="genres", size=3, aggs=[ + >>> Max("max_nb_roles", field="nb_roles"), + >>> Avg("avg_rank", field="range") + >>> ]), + >>> ]) +All these classes inherit from :class:`~pandagg.tree.aggs.aggs.Aggs` and thus provide the same interface. -Aggregation declaration + >>> from pandagg.aggs import Aggs + >>> isinstance(a, Aggs) + True + +With flattened syntax +--------------------- + +In the flattened syntax, the first argument is the aggregation name, the second argument is the aggregation type, the +following keyword arguments define the aggregation body: + + >>> from pandagg.query import Aggs + >>> a = Aggs('genres', 'terms', size=3) + >>> a.to_dict() + {'genres': {'terms': {'field': 'genres', 'size': 3}}} + + +Aggregations enrichment ======================= +Aggregations can be enriched using two methods: +- :func:`~pandagg.tree.aggs.aggs.Aggs.aggs` +- :func:`~pandagg.tree.aggs.aggs.Aggs.groupby` + +Both methods return a new :class:`~pandagg.tree.aggs.aggs.Aggs` instance, and keep unchanged the initial Aggregation. + +For instance: + + >>> from pandagg.aggs import Aggs + >>> initial_a = Aggs() + >>> enriched_a = initial_a.aggs('genres_agg', 'terms', field='genres') + + >>> initial_q.to_dict() + None + + >>> enriched_q.to_dict() + {'genres_agg': {'terms': {'field': 'genres'}}} + +.. note:: + + Calling :func:`~pandagg.tree.aggs.aggs.Aggs.to_dict` on an empty Aggregation returns `None` + + >>> from pandagg.aggs import Aggs + >>> Aggs().to_dict() + None + + +TODO -Aggregation response -==================== +******** +Response +******** TODO diff --git a/pandagg/tree/aggs/aggs.py b/pandagg/tree/aggs/aggs.py index 236d83d3..3ea1486c 100644 --- a/pandagg/tree/aggs/aggs.py +++ b/pandagg/tree/aggs/aggs.py @@ -449,7 +449,7 @@ def aggs(self, *args, **kwargs): def to_dict(self, from_=None, depth=None, with_name=True): if self.root is None: - return {} + return None from_ = self.root if from_ is None else from_ node = self.get(from_) children_queries = {}