diff --git a/.github/workflows/python-3-tests.yml b/.github/workflows/python-3-tests.yml
index 60f3824c..25e6286a 100644
--- a/.github/workflows/python-3-tests.yml
+++ b/.github/workflows/python-3-tests.yml
@@ -3,9 +3,9 @@ name: Python 3 Tests
 
 on:
   push:
-    branches: [ master ]
+    branches: [ master, dev ]
   pull_request:
-    branches: [ master ]
+    branches: [ master, dev ]
 
 jobs:
   static_analysis:
@@ -32,6 +32,10 @@ jobs:
         pip install mypy
         pip install -e ".[develop]"
         mypy --install-types --non-interactive pandagg
+    - name: Isort check
+      run: |
+        pip install isort
+        isort pandagg examples tests -c
 
   run_tests:
     runs-on: ubuntu-latest
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ea40e16b..3a473966 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -17,3 +17,9 @@ repos:
           pass_filenames: false
           language: system
           types: [ python ]
+  -   repo: https://github.com/pycqa/isort
+      rev: 5.10.1
+      hooks:
+        - id: isort
+          name: isort (python)
+          args: ["--profile", "black", "--filter-files"]
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..af97dfa8
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,34 @@
+
+# Change Log
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](http://keepachangelog.com/)
+and this project adheres to [Semantic Versioning](http://semver.org/).
+
+## [Unreleased] - 2022-02-11
+
+### Added
+- Use isort to automatically sort imports
+
+### Changed
+
+### Fixed
+
+## [0.2.4] - 2022-02-11
+
+Introduction of the repository changelog.
+
+### Added
+
+- Github actions run when pushing changes or making a pull request to `dev` branch ([#113](https://github.com/alkemics/pandagg/pull/113)).
+- `match_all`, and `match_none` query clauses ([103](https://github.com/alkemics/pandagg/issues/103#issuecomment-1040425685), [#112](https://github.com/alkemics/pandagg/pull/112)).
+
+### Changed
+
+- Handle deprecation warnings introduced in [elasticsearch-py](https://github.com/elastic/elasticsearch-py/issues/1698) ([#109](https://github.com/alkemics/pandagg/pull/109)).
+- Improved IMDB and NY-restaurants examples, by allowing them to be ingested on client cluster by a simple command line ([#116](https://github.com/alkemics/pandagg/pull/116)).
+
+### Fixed
+
+- Fix aggregation scan via composite aggregation, the first batch was not yielded ([#101](https://github.com/alkemics/pandagg/issues/101), [#110](https://github.com/alkemics/pandagg/pull/110)).
+- Fix search scan, by allowing passing of parameters ([#103](https://github.com/alkemics/pandagg/issues/103#issuecomment-1040445479), [#111](https://github.com/alkemics/pandagg/pull/111)).
diff --git a/Makefile b/Makefile
index b08d7777..af1e33a4 100644
--- a/Makefile
+++ b/Makefile
@@ -13,14 +13,18 @@ lint-diff:
 	git diff upstream/master --name-only -- "*.py" | xargs flake8
 
 lint:
-	# ignore "line break before binary operator", and "invalid escape sequence '\_'" useful for doc
-	flake8 --count --ignore=W503,W605 --show-source --statistics pandagg
+	flake8 --count --show-source --statistics pandagg
 	# on tests, more laxist: allow "missing whitespace after ','" and "line too long"
 	flake8 --count --ignore=W503,W605,E231,E501 --show-source --statistics tests
 
 black:
 	black examples docs pandagg tests setup.py
 
+isort:
+	isort examples docs pandagg tests setup.py
+
+format: isort black lint
+
 develop:
 	-python -m pip install -e ".[develop]"
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 21fb7ff8..e4add382 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -19,14 +19,14 @@
 
 # -- Project information -----------------------------------------------------
 
-project = u"pandagg"
-copyright = u"2020, Léonard Binet"
-author = u"Léonard Binet"
+project = "pandagg"
+copyright = "2020, Léonard Binet"
+author = "Léonard Binet"
 
 # The short X.Y version
-version = u""
+version = ""
 # The full version, including alpha/beta/rc tags
-release = u"0.1"
+release = "0.1"
 
 
 # -- General configuration ---------------------------------------------------
@@ -130,7 +130,7 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, "pandagg.tex", u"pandagg Documentation", u"Léonard Binet", "manual")
+    (master_doc, "pandagg.tex", "pandagg Documentation", "Léonard Binet", "manual")
 ]
 
 
@@ -138,7 +138,7 @@
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [(master_doc, "pandagg", u"pandagg Documentation", [author], 1)]
+man_pages = [(master_doc, "pandagg", "pandagg Documentation", [author], 1)]
 
 
 # -- Options for Texinfo output ----------------------------------------------
@@ -150,7 +150,7 @@
     (
         master_doc,
         "pandagg",
-        u"pandagg Documentation",
+        "pandagg Documentation",
         author,
         "pandagg",
         "One line description of project.",
diff --git a/examples/NY-restaurants/ingest.py b/examples/NY-restaurants/ingest.py
index 8be76bd3..2b2de25c 100644
--- a/examples/NY-restaurants/ingest.py
+++ b/examples/NY-restaurants/ingest.py
@@ -3,11 +3,11 @@
 """Script that downloads a public dataset and streams it to an Elasticsearch cluster"""
 
 import csv
-from os.path import abspath, join, dirname, exists
+from os.path import abspath, dirname, exists, join
+
 import urllib3
 from elasticsearch import Elasticsearch
-
-from pandagg.index import DeclarativeIndex
+from model import NYCRestaurants
 
 NYC_RESTAURANTS = (
     "https://data.cityofnewyork.us/api/views/43nn-pn8j/rows.csv?accessType=DOWNLOAD"
@@ -16,22 +16,6 @@
 CHUNK_SIZE = 16384
 
 
-class NYCRestaurants(DeclarativeIndex):
-    name = "nyc-restaurants"
-    mappings = {
-        "properties": {
-            "name": {"type": "text"},
-            "borough": {"type": "keyword"},
-            "cuisine": {"type": "keyword"},
-            "grade": {"type": "keyword"},
-            "score": {"type": "integer"},
-            "location": {"type": "geo_point"},
-            "inspection_date": {"type": "date", "format": "MM/dd/yyyy"},
-        }
-    }
-    settings = {"number_of_shards": 1}
-
-
 def download_dataset():
     """Downloads the public dataset if not locally downloaded
     and returns the number of rows are in the .csv file.
diff --git a/examples/NY-restaurants/model.py b/examples/NY-restaurants/model.py
new file mode 100644
index 00000000..6451f75e
--- /dev/null
+++ b/examples/NY-restaurants/model.py
@@ -0,0 +1,31 @@
+from pandagg.document import DocumentSource
+from pandagg.index import DeclarativeIndex
+from pandagg.mappings import Date, GeoPoint, Integer, Keyword, Text
+
+
+class Inspection(DocumentSource):
+    name = Text()
+    borough = Keyword()
+    cuisine = Keyword()
+    grade = Keyword()
+    score = Integer()
+    location = GeoPoint()
+    inspection_date = Date(format="MM/dd/yyyy")
+
+
+class NYCRestaurants(DeclarativeIndex):
+    name = "nyc-restaurants"
+    document = Inspection
+    # Note: "mappings" attribute take precedence over "document" attribute in mappings definition
+    mappings = {
+        "properties": {
+            "name": {"type": "text"},
+            "borough": {"type": "keyword"},
+            "cuisine": {"type": "keyword"},
+            "grade": {"type": "keyword"},
+            "score": {"type": "integer"},
+            "location": {"type": "geo_point"},
+            "inspection_date": {"type": "date", "format": "MM/dd/yyyy"},
+        }
+    }
+    settings = {"number_of_shards": 1}
diff --git a/examples/imdb/README.md b/examples/imdb/README.md
index ed694829..62bdd400 100644
--- a/examples/imdb/README.md
+++ b/examples/imdb/README.md
@@ -21,7 +21,7 @@ The index should provide good performances trying to answer these kind question
 
 
 ## Data source
-I exported following SQL tables from MariaDB [following these instructions](https://relational.fit.cvut.cz/dataset/IMDb).
+https://relational.fit.cvut.cz/dataset/IMDb.
 
 Relational schema is the following:
 
@@ -98,66 +98,21 @@ _
 ## Steps to start playing with your index
 
 
-You can either directly use the demo index available [here]('https://beba020ee88d49488d8f30c163472151.eu-west-2.aws.cloud.es.io:9243/')
-with credentials user: `pandagg`, password: `pandagg`:
+Follow below steps to install it yourself locally.
 
-Access it with following client instantiation:
-```
-from elasticsearch import Elasticsearch
-client = Elasticsearch(
-    hosts=['https://beba020ee88d49488d8f30c163472151.eu-west-2.aws.cloud.es.io:9243/'],
-    http_auth=('pandagg', 'pandagg')
-)
-```
-
-
-Or follow below steps to install it yourself locally.
-In this case, you can either generate yourself the files, or download them from [here](https://drive.google.com/file/d/1po3T18l9QoYxPEGh-iKV4oN3DslWGu8-/view?usp=sharing) (file md5 `b363dee23720052501e24d15361ed605`).
-
-#### Dump tables
-Follow instruction on bottom of https://relational.fit.cvut.cz/dataset/IMDb page and dump following tables in a
-directory:
-- movies.csv
-- movies_genres.csv
-- movies_directors.csv
-- directors.csv
-- directors_genres.csv
-- roles.csv
-- actors.csv
-
-#### Clone pandagg and setup environment
 ```
+# clone repo
 git clone git@github.com:alkemics/pandagg.git
 cd pandagg
 
+# create and activate your virtual environment using virtualenv or any similar tool
 virtualenv env
-python setup.py develop
-pip install pandas simplejson jupyter seaborn
-```
-Then copy `conf.py.dist` file into `conf.py` and edit variables as suits you, for instance:
-```
-# your cluster address
-ES_HOST = 'localhost:9200'
+source env/bin/activate
 
-# where your table dumps are stored, and where serialized output will be written
-DATA_DIR = '/path/to/dumps/'
-OUTPUT_FILE_NAME = 'serialized.json'
-```
-
-#### Serialize movie documents and insert them
-
-```
-# generate serialized movies documents, ready to be inserted in ES
-# can take a while
-python examples/imdb/serialize.py
+# install dependencies for this example
+make develop
+pip install pandas simplejson mysqlclient mariadb
 
-# create index with mappings if necessary, bulk insert documents in ES
-python examples/imdb/load.py
+# run ingestion script (type `python examples/imdb/ingest.py --help` for options)
+python examples/imdb/ingest.py
 ```
-
-
-#### Explore pandagg notebooks
-
-An example notebook is available to showcase some of `pandagg` functionalities: [here it is](https://gistpreview.github.io/?4cedcfe49660cd6757b94ba491abb95a).
-
-Code is present in `examples/imdb/IMDB exploration.py` file.
diff --git a/examples/imdb/conf.py.dist b/examples/imdb/conf.py.dist
deleted file mode 100644
index f0c82beb..00000000
--- a/examples/imdb/conf.py.dist
+++ /dev/null
@@ -1,10 +0,0 @@
-# your cluster address, if local probably 'localhost:9200'
-ES_HOST = '__FILL_ME__'
-ES_USE_AUTH = False
-# if auth = True:
-ES_USER = 'xxx'
-ES_PASSWORD = 'xxx'
-
-# where your table dumps are stored, and where serialized output will be written
-DATA_DIR = '__FILL_ME__'
-OUTPUT_FILE_NAME = 'serialized.json'
diff --git a/examples/imdb/ingest.py b/examples/imdb/ingest.py
new file mode 100644
index 00000000..534410a9
--- /dev/null
+++ b/examples/imdb/ingest.py
@@ -0,0 +1,302 @@
+import argparse
+import csv
+import json
+import logging
+import os
+from os import path, remove
+from os.path import join
+from typing import Iterator, List
+
+import mariadb
+import numpy as np
+import pandas as pd
+import simplejson
+from elasticsearch import Elasticsearch
+
+from examples.imdb.model import Movies
+from pandagg.index import Action
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+
+class NpEncoder(simplejson.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, np.integer):
+            return int(obj)
+        elif isinstance(obj, np.floating):
+            return float(obj)
+        elif isinstance(obj, np.ndarray):
+            return obj.tolist()
+        else:
+            return super(NpEncoder, self).default(obj)
+
+
+def download_table_as_csv(
+    data_dir: str, overwrite: bool, cursor, table_name: str, column_names: List[str]
+):
+    cursor.execute("SELECT * FROM %s" % table_name)
+
+    file_path = join(data_dir, "%s.csv" % table_name)
+    if path.exists(file_path):
+        if overwrite:
+            remove(file_path)
+        else:
+            return
+
+    with open(file_path, "w") as f:
+        csv_dump = csv.writer(f)
+        csv_dump.writerow(column_names)
+        csv_dump.writerows(cursor.fetchall())
+
+
+def download_tables(data_dir: str, overwrite: bool) -> None:
+
+    # Using credentials defined here https://relational.fit.cvut.cz/dataset/IMDb
+    conn = mariadb.connect(
+        user="guest",
+        password="relational",
+        host="relational.fit.cvut.cz",
+        port=3306,
+        database="imdb_ijs",
+    )
+    cursor = conn.cursor()
+
+    download_table_as_csv(
+        data_dir=data_dir,
+        overwrite=overwrite,
+        cursor=cursor,
+        table_name="directors_genres",
+        column_names=["director_id", "genre", "prob"],
+    )
+    download_table_as_csv(
+        data_dir=data_dir,
+        overwrite=overwrite,
+        cursor=cursor,
+        table_name="directors",
+        column_names=["id", "first_name", "last_name"],
+    )
+    download_table_as_csv(
+        data_dir=data_dir,
+        overwrite=overwrite,
+        cursor=cursor,
+        table_name="movies_directors",
+        column_names=["director_id", "movie_id"],
+    )
+    download_table_as_csv(
+        data_dir=data_dir,
+        overwrite=overwrite,
+        cursor=cursor,
+        table_name="movies_genres",
+        column_names=["movie_id", "genre"],
+    )
+    download_table_as_csv(
+        data_dir=data_dir,
+        overwrite=overwrite,
+        cursor=cursor,
+        table_name="movies",
+        column_names=["id", "name", "year", "rank"],
+    )
+    download_table_as_csv(
+        data_dir=data_dir,
+        overwrite=overwrite,
+        cursor=cursor,
+        table_name="roles",
+        column_names=["actor_id", "movie_id", "role"],
+    )
+    download_table_as_csv(
+        data_dir=data_dir,
+        overwrite=overwrite,
+        cursor=cursor,
+        table_name="actors",
+        column_names=["id", "first_name", "last_name", "gender"],
+    )
+
+
+def serialize_documents(data_dir: str, limit: int) -> None:
+    # tables imports
+    reader_kwargs = {
+        "encoding": "utf-8",
+        "sep": ",",
+        "quotechar": '"',
+        "escapechar": "\\",
+    }
+    movies = pd.read_csv(
+        join(data_dir, "movies.csv"), index_col="id", **reader_kwargs
+    ).sample(n=limit)
+    movies_genres = pd.read_csv(join(data_dir, "movies_genres.csv"), **reader_kwargs)
+    movies_directors = pd.read_csv(
+        join(data_dir, "movies_directors.csv"), **reader_kwargs
+    )
+    movies_directors = movies_directors[
+        movies_directors.movie_id.isin(movies.index.values)
+    ]
+    directors = pd.read_csv(
+        join(data_dir, "directors.csv"), index_col="id", **reader_kwargs
+    )
+    director_genres = pd.read_csv(
+        join(data_dir, "directors_genres.csv"), **reader_kwargs
+    )
+    roles = pd.read_csv(join(data_dir, "roles.csv"), **reader_kwargs)
+    roles = roles[roles.movie_id.isin(movies.index.values)]
+    actors = pd.read_csv(join(data_dir, "actors.csv"), index_col="id", **reader_kwargs)
+
+    # actors
+    actor_roles = pd.merge(actors, roles, left_index=True, right_on="actor_id")
+    actor_roles["serialized_roles"] = actor_roles.apply(
+        lambda x: {
+            "actor_id": x.actor_id,
+            "first_name": x.first_name,
+            "last_name": x.last_name,
+            "full_name": "%s %s" % (x.first_name, x.last_name),
+            "gender": x.gender,
+            "role": x.role,
+        },
+        axis=1,
+    )
+    movie_serialized_actors = actor_roles.groupby("movie_id").serialized_roles.apply(
+        list
+    )
+
+    # directors
+    directors_grouped_genres = pd.DataFrame(
+        director_genres.groupby("director_id").genre.apply(list)
+    )
+    movie_directors_extended = pd.merge(
+        movies_directors, directors, left_on="director_id", right_index=True
+    )
+    movie_directors_extended = pd.merge(
+        movie_directors_extended,
+        directors_grouped_genres,
+        how="left",
+        left_on="director_id",
+        right_index=True,
+    )
+    movie_directors_extended["serialized_directors"] = movie_directors_extended.apply(
+        lambda x: {
+            "director_id": x.director_id,
+            "first_name": x.first_name,
+            "last_name": x.last_name,
+            "full_name": "%s %s" % (x.first_name, x.last_name),
+            "genres": x.genre,
+        },
+        axis=1,
+    )
+    movie_serialized_directors = pd.DataFrame(
+        movie_directors_extended.groupby("movie_id").serialized_directors.apply(list)
+    )
+
+    # movie genres
+    movie_serialized_genres = movies_genres.groupby("movie_id").genre.apply(list)
+
+    # merge
+    enriched_movies = pd.merge(
+        movies, movie_serialized_actors, how="left", left_index=True, right_index=True
+    )
+    enriched_movies = pd.merge(
+        enriched_movies,
+        movie_serialized_directors,
+        how="left",
+        left_index=True,
+        right_index=True,
+    )
+    enriched_movies = pd.merge(
+        enriched_movies,
+        movie_serialized_genres,
+        how="left",
+        left_index=True,
+        right_index=True,
+    )
+
+    enriched_movies["nb_directors"] = enriched_movies.serialized_directors.apply(
+        lambda x: len(x) if isinstance(x, list) else 0
+    )
+    enriched_movies["nb_roles"] = enriched_movies.serialized_roles.apply(
+        lambda x: len(x) if isinstance(x, list) else 0
+    )
+
+    serialized = enriched_movies.apply(
+        lambda x: {
+            "movie_id": x.name,
+            "name": x.loc["name"],
+            "year": x.year,
+            "genres": x.genre,
+            "roles": x.serialized_roles,
+            "nb_roles": x.nb_roles,
+            "directors": x.serialized_directors,
+            "nb_directors": x.nb_directors,
+            "rank": x.loc["rank"],
+        },
+        axis=1,
+    )
+    # write
+    with open(join(data_dir, "index_documents.json"), "w") as f:
+        for s in serialized:
+            f.write(simplejson.dumps(s, ignore_nan=True, cls=NpEncoder) + "\n")
+
+
+def operations_iterator(data_dir) -> Iterator[Action]:
+    with open(join(data_dir, "index_documents.json"), "r") as f:
+        for line in f.readlines():
+            d = json.loads(line)
+            yield {"_source": d, "_id": d["movie_id"]}
+
+
+def setup_index_and_index_documents(data_dir: str, reset_index: bool) -> None:
+    movies = Movies(client)
+    if reset_index and movies.exists():
+        movies.delete()
+    movies.save()
+    movies.docs.bulk(
+        actions=operations_iterator(data_dir), _op_type_overwrite="index"
+    ).perform()
+    movies.refresh()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Download and ingest IMDB data in an Elasticsearch cluster.\nBy default the used cluster will be "
+        "localhost:9200, overwrite client options in this file to customize this behavior."
+    )
+    parser.add_argument(
+        "--dir",
+        type=str,
+        default="temp-data",
+        help="path to directory where the downloaded files will be stored (default 'temp-data')",
+    )
+    parser.add_argument(
+        "--overwrite",
+        action="store_const",
+        const=True,
+        default=False,
+        help="if True, re-download files even if they are already present (default False)",
+    )
+    parser.add_argument(
+        "--resetindex",
+        action="store_const",
+        const=True,
+        default=False,
+        help="if True, index is deleted and recreated even though it already exists (default False)",
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=20000,
+        help="number of movies that will be ingested (default 20000)",
+    )
+    args = parser.parse_args()
+
+    client = Elasticsearch(
+        # specify your config here
+    )
+
+    if not path.exists(args.dir):
+        os.mkdir(args.dir)
+
+    logging.info("Download tables from remote sql database")
+    download_tables(data_dir=args.dir, overwrite=args.overwrite)
+    logging.info("Serialize documents")
+    serialize_documents(data_dir=args.dir, limit=args.limit)
+    logging.info("Index documents in cluster")
+    setup_index_and_index_documents(data_dir=args.dir, reset_index=args.resetindex)
diff --git a/examples/imdb/load.py b/examples/imdb/model.py
similarity index 55%
rename from examples/imdb/load.py
rename to examples/imdb/model.py
index a6e7edea..e234cae4 100644
--- a/examples/imdb/load.py
+++ b/examples/imdb/model.py
@@ -1,15 +1,40 @@
-import json
-from collections import Iterator
-from os.path import join
-from elasticsearch import Elasticsearch
-from examples.imdb.conf import ES_HOST, ES_USE_AUTH, ES_PASSWORD, ES_USER, DATA_DIR
+from pandagg.document import DocumentSource, InnerDocSource
+from pandagg.index import DeclarativeIndex
+from pandagg.mappings import Float, Integer, Keyword, Nested, Text
 
-from pandagg.index import DeclarativeIndex, Action
-from pandagg.mappings import Keyword, Text, Float, Nested, Integer
+
+class Role(InnerDocSource):
+    role = Keyword()
+    actor_id = Keyword()
+    gender = Keyword()
+    first_name = Text()
+    last_name = Text()
+    full_name = Text()
+
+
+class Director(InnerDocSource):
+    director_id = Keyword()
+    first_name = Text()
+    last_name = Text()
+    full_name = Text()
+    genres = Keyword()
+
+
+class MovieEntry(DocumentSource):
+    movie_id = Keyword()
+    name = Text(fields={"raw": Keyword()})
+    year = Integer()
+    rank = Float()
+    genres = Keyword()
+    roles = Nested(Role)
+    directors = Nested(Director)
+    nb_directors = Integer()
+    nb_roles = Integer()
 
 
 class Movies(DeclarativeIndex):
     name = "movies"
+    document = MovieEntry
     mappings = {
         "dynamic": False,
         "properties": {
@@ -41,28 +66,3 @@ class Movies(DeclarativeIndex):
             "nb_roles": Integer(),
         },
     }
-
-
-def operations_iterator() -> Iterator[Action]:
-    with open(join(DATA_DIR, "serialized.json"), "r") as f:
-        for line in f.readlines():
-            d = json.loads(line)
-            yield {"_source": d, "_id": d["id"]}
-
-
-if __name__ == "__main__":
-    client_kwargs = {"hosts": [ES_HOST]}
-    if ES_USE_AUTH:
-        client_kwargs["http_auth"] = (ES_USER, ES_PASSWORD)
-    client = Elasticsearch(**client_kwargs)
-
-    movies = Movies(client)
-
-    print("Index creation")
-    movies.save()
-
-    print("Write documents")
-    movies.docs.bulk(
-        actions=operations_iterator(), _op_type_overwrite="index"
-    ).perform()
-    movies.refresh()
diff --git a/examples/imdb/serialize.py b/examples/imdb/serialize.py
deleted file mode 100644
index 5644af88..00000000
--- a/examples/imdb/serialize.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import logging
-from os.path import join
-import simplejson
-import pandas as pd
-import numpy as np
-
-from examples.imdb.conf import DATA_DIR, OUTPUT_FILE_NAME
-
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-
-
-class NpEncoder(simplejson.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, np.integer):
-            return int(obj)
-        elif isinstance(obj, np.floating):
-            return float(obj)
-        elif isinstance(obj, np.ndarray):
-            return obj.tolist()
-        else:
-            return super(NpEncoder, self).default(obj)
-
-
-if __name__ == "__main__":
-
-    logging.info("LOADING TABLES")
-    # tables imports
-    reader_kwargs = {
-        "encoding": "utf-8",
-        "sep": ",",
-        "quotechar": '"',
-        "escapechar": "\\",
-    }
-    movies = pd.read_csv(join(DATA_DIR, "movies.csv"), index_col="id", **reader_kwargs)
-    movies_genres = pd.read_csv(join(DATA_DIR, "movies_genres.csv"), **reader_kwargs)
-    movies_directors = pd.read_csv(
-        join(DATA_DIR, "movies_directors.csv"), **reader_kwargs
-    )
-    directors = pd.read_csv(
-        join(DATA_DIR, "directors.csv"), index_col="id", **reader_kwargs
-    )
-    director_genres = pd.read_csv(
-        join(DATA_DIR, "directors_genres.csv"), **reader_kwargs
-    )
-    roles = pd.read_csv(join(DATA_DIR, "roles.csv"), **reader_kwargs)
-    actors = pd.read_csv(join(DATA_DIR, "actors.csv"), index_col="id", **reader_kwargs)
-
-    # actors
-    logging.info("SERIALIZE ACTORS")
-    actor_roles = pd.merge(actors, roles, left_index=True, right_on="actor_id")
-    actor_roles["serialized_roles"] = actor_roles.apply(
-        lambda x: {
-            "actor_id": x.actor_id,
-            "first_name": x.first_name,
-            "last_name": x.last_name,
-            "full_name": "%s %s" % (x.first_name, x.last_name),
-            "gender": x.gender,
-            "role": x.role,
-        },
-        axis=1,
-    )
-    movie_serialized_actors = actor_roles.groupby("movie_id").serialized_roles.apply(
-        list
-    )
-
-    # directors
-    logging.info("SERIALIZE DIRECTORS")
-    directors_grouped_genres = pd.DataFrame(
-        director_genres.groupby("director_id").genre.apply(list)
-    )
-    movie_directors_extended = pd.merge(
-        movies_directors, directors, left_on="director_id", right_index=True
-    )
-    movie_directors_extended = pd.merge(
-        movie_directors_extended,
-        directors_grouped_genres,
-        how="left",
-        left_on="director_id",
-        right_index=True,
-    )
-    movie_directors_extended["serialized_directors"] = movie_directors_extended.apply(
-        lambda x: {
-            "director_id": x.director_id,
-            "first_name": x.first_name,
-            "last_name": x.last_name,
-            "full_name": "%s %s" % (x.first_name, x.last_name),
-            "genres": x.genre,
-        },
-        axis=1,
-    )
-    movie_serialized_directors = pd.DataFrame(
-        movie_directors_extended.groupby("movie_id").serialized_directors.apply(list)
-    )
-
-    # movie genres
-    logging.info("SERIALIZE MOVIE GENRES")
-    movie_serialized_genres = movies_genres.groupby("movie_id").genre.apply(list)
-
-    # merge
-    logging.info("MERGE DATASETS")
-    enriched_movies = pd.merge(
-        movies, movie_serialized_actors, how="left", left_index=True, right_index=True
-    )
-    enriched_movies = pd.merge(
-        enriched_movies,
-        movie_serialized_directors,
-        how="left",
-        left_index=True,
-        right_index=True,
-    )
-    enriched_movies = pd.merge(
-        enriched_movies,
-        movie_serialized_genres,
-        how="left",
-        left_index=True,
-        right_index=True,
-    )
-
-    enriched_movies["nb_directors"] = enriched_movies.serialized_directors.apply(
-        lambda x: len(x) if isinstance(x, list) else 0
-    )
-    enriched_movies["nb_roles"] = enriched_movies.serialized_roles.apply(
-        lambda x: len(x) if isinstance(x, list) else 0
-    )
-
-    serialized = enriched_movies.apply(
-        lambda x: {
-            "movie_id": x.name,
-            "name": x.loc["name"],
-            "year": x.year,
-            "genres": x.genre,
-            "roles": x.serialized_roles,
-            "nb_roles": x.nb_roles,
-            "directors": x.serialized_directors,
-            "nb_directors": x.nb_directors,
-            "rank": x.loc["rank"],
-        },
-        axis=1,
-    )
-
-    # write
-    logging.info("WRITE SERIALIZED DOCUMENTS")
-    with open(join(DATA_DIR, OUTPUT_FILE_NAME), "w") as f:
-        for s in serialized:
-            f.write(simplejson.dumps(s, ignore_nan=True, cls=NpEncoder) + "\n")
diff --git a/examples/jupyter_notebook_config.py b/examples/jupyter_notebook_config.py
deleted file mode 100644
index c1ae2262..00000000
--- a/examples/jupyter_notebook_config.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# taken from https://www.svds.com/jupyter-notebook-best-practices-for-data-science/
-
-import os
-from subprocess import check_call
-
-
-def post_save(model, os_path, contents_manager):
-    """post-save hook for converting notebooks to .py scripts"""
-    if model["type"] != "notebook":
-        return  # only do this for notebooks
-    d, fname = os.path.split(os_path)
-    check_call(["jupyter", "nbconvert", "--to", "script", fname], cwd=d)
-    check_call(["jupyter", "nbconvert", "--to", "html", fname], cwd=d)
-
-
-c.FileContentsManager.post_save_hook = post_save
diff --git a/pandagg/__init__.py b/pandagg/__init__.py
index 18517883..8d532a6a 100644
--- a/pandagg/__init__.py
+++ b/pandagg/__init__.py
@@ -1,11 +1,10 @@
 import logging
-
 from logging import NullHandler
+from typing import List
 
 # ensures all required classes are registered in DSLMeta
 from .interactive import *
 from .search import *
-from typing import List
 
 # Inspired by https://python-guide-pt-br.readthedocs.io/fr/latest/writing/logging.html#logging-in-a-library
 # Set default logging handler to avoid "No handler found" warnings.
diff --git a/pandagg/_decorators.py b/pandagg/_decorators.py
index 90d306cc..44585d62 100644
--- a/pandagg/_decorators.py
+++ b/pandagg/_decorators.py
@@ -1,7 +1,6 @@
 from textwrap import dedent
 from typing import Any, Callable
 
-
 # Substitution and Appender are copied from pandas.util._decorators
 # https://github.com/pandas-dev/pandas/blob/master/LICENSE
 
diff --git a/pandagg/aggs.py b/pandagg/aggs.py
index ab9838c6..95ec2157 100644
--- a/pandagg/aggs.py
+++ b/pandagg/aggs.py
@@ -1,67 +1,63 @@
 from pandagg.node.aggs.bucket import (
-    Terms,
-    Filters,
-    Histogram,
+    AdjacencyMatrix,
+    AutoDateHistogram,
+    Children,
     DateHistogram,
-    Global,
+    DiversifiedSampler,
     Filter,
-    Nested,
-    ReverseNested,
-    Range,
-    Missing,
-    MatchAll,
-    GeoHashGrid,
+    Filters,
     GeoDistance,
-    AdjacencyMatrix,
-    AutoDateHistogram,
-    VariableWidthHistogram,
-    SignificantTerms,
-    RareTerms,
+    GeoHashGrid,
     GeoTileGrid,
+    Global,
+    Histogram,
     IPRange,
-    Sampler,
-    DiversifiedSampler,
-    Children,
+    MatchAll,
+    Missing,
+    MultiTerms,
+    Nested,
     Parent,
+    Range,
+    RareTerms,
+    ReverseNested,
+    Sampler,
+    SignificantTerms,
     SignificantText,
-    MultiTerms,
+    Terms,
+    VariableWidthHistogram,
 )
-
 from pandagg.node.aggs.composite import Composite
-
 from pandagg.node.aggs.metric import (
     Avg,
-    Max,
-    Min,
-    Sum,
     Cardinality,
-    Stats,
     ExtendedStats,
-    Percentiles,
-    PercentileRanks,
     GeoBound,
     GeoCentroid,
+    Max,
+    Min,
+    PercentileRanks,
+    Percentiles,
+    Stats,
+    Sum,
     TopHits,
     ValueCount,
 )
-
 from pandagg.node.aggs.pipeline import (
     AvgBucket,
+    BucketScript,
+    BucketSelector,
+    BucketSort,
+    CumulativeSum,
     Derivative,
+    ExtendedStatsBucket,
     MaxBucket,
     MinBucket,
-    SumBucket,
-    StatsBucket,
-    ExtendedStatsBucket,
-    PercentilesBucket,
     MovingAvg,
-    CumulativeSum,
-    BucketScript,
-    BucketSelector,
-    BucketSort,
+    PercentilesBucket,
     SerialDiff,
+    StatsBucket,
+    SumBucket,
 )
-
 from pandagg.tree.aggs import Aggs
 
 __all__ = [
diff --git a/pandagg/discovery.py b/pandagg/discovery.py
index a1e3bd9a..3976bbc1 100644
--- a/pandagg/discovery.py
+++ b/pandagg/discovery.py
@@ -1,8 +1,8 @@
 from dataclasses import dataclass
-from typing import Dict, Any, Optional
+from typing import Any, Dict, Optional
 
-from lighttree.interactive import Obj
 from elasticsearch import Elasticsearch
+from lighttree.interactive import Obj
 
 from pandagg import Mappings, MappingsDict
 from pandagg.interactive.mappings import IMappings
@@ -46,9 +46,11 @@ class Indices(Obj):
 
 
 def discover(using: Elasticsearch, index: str = "*") -> Indices:
-    """
-    :param using: Elasticsearch client
+    """Scan cluster and return indices that match the provided index pattern.
+
+    :param using: Elasticsearch client.
     :param index: Comma-separated list or wildcard expression of index names used to limit the request.
+    :rtype: Indices
     """
     indices = Indices()
     for index_name, index_detail in using.indices.get(index=index).items():
diff --git a/pandagg/document.py b/pandagg/document.py
index 13988edb..39cc94ac 100644
--- a/pandagg/document.py
+++ b/pandagg/document.py
@@ -1,11 +1,43 @@
-from typing import Tuple, Dict, Any
+from typing import Any, Dict, Tuple
 
 from pandagg import Mappings
-from pandagg.node.mappings import Field, ComplexField
+from pandagg.node.mappings import ComplexField, Field
 
 
 class DocumentMeta(type):
+
+    """`DocumentSource` metaclass.
+
+    Note: you shouldn't have to use it directly, see `DocumentSource` instead.
+    """
+
     def __new__(cls, name: str, bases: Tuple, attrs: Dict) -> "DocumentMeta":
+        """Document metaclass is responsible for:
+
+        - registering `Fields`:
+            - as a `Mappings` instance in `_mappings_` attribute
+            - as a dict of `Fields` in `_field_attrs_`
+        - keeping other attributes and methods as is
+        - building `__init__` to accepts only those declared fields at `DocumentSource` instanciation
+
+        In following example::
+
+            class RestaurantInspection(DocumentSource):
+
+                description = "Represent a new-york restaurant inspection."
+
+                name = Text()
+                borough = Keyword()
+                cuisine = Keyword()
+
+            document = RestaurantInspection(
+                name="Almighty burger",
+                borough="Brooklyn"
+            )
+
+        The `description` attribute isn't a `Field` instance, and won't be considered as a valid input at document
+        instanciation.
+        """
         regular_attrs: Dict = {}
         field_attrs: Dict = {}
         for k, v in attrs.items():
@@ -32,31 +64,88 @@ def __init__(self: "DocumentSource", **kwargs: Any) -> None:
                 setattr(self, k, v)
             self._post_init_()
 
+        def __str__(self: "DocumentSource") -> str:
+            return "{}({})".format(
+                self.__class__.__name__,
+                ", ".join(
+                    "{}={!r}".format(key, getattr(self, key))
+                    for key in field_attrs.keys()
+                ),
+            )
+
         regular_attrs["__init__"] = __init__
+        regular_attrs["__str__"] = __str__
+        regular_attrs["__repr__"] = __str__
         return super(DocumentMeta, cls).__new__(cls, name, bases, regular_attrs)
 
 
 class DocumentSource(metaclass=DocumentMeta):
 
-    # __init__ is overidden by metaclass, this is only so that pycharm doesn't highlights fake errors when instantiating
-    # documents
+    """
+    Model-like class for persisting documents in elasticsearch.
+
+    It is both used for mappings declaration, and for document manipulation and persistence::
+
+        class RestaurantInspection(DocumentSource):
+            name = Text()
+            borough = Keyword()
+            cuisine = Keyword()
+            grade = Keyword()
+            score = Integer()
+            location = GeoPoint()
+            inspection_date = Date(format="MM/dd/yyyy")
+
+    This document can be referenced in a DeclarativeIndex class to declare index mappings::
+
+        class NYCRestaurants(DeclarativeIndex):
+            name = "nyc-restaurants"
+            document = RestaurantInspection
+
+
+
+    Note: these mappings will be used to create index mappings when using DeclarativeIndex `save` method.
+
+    It is possible to serialize mappings via `_mappings.to_dict` method::
+
+        >>> NYCRestaurants._mappings.to_dict()
+        {
+            'properties': {
+                'name': {'type': 'text'},
+                'borough': {'type': 'keyword'},
+                'cuisine': {'type': 'keyword'},
+                'grade': {'type': 'keyword'},
+                'score': {'type': 'integer'},
+                'location': {'type': 'geo_point'},
+                'inspection_date': {'format': 'MM/dd/yyyy', 'type': 'date'}
+            }
+        }
+
+    """
+
     def __init__(self, **kwargs: Any) -> None:
-        pass
+        """Overridden by metaclass, it is declared here only so that pycharm doesn't highlight fake errors when
+        instantiating documents.
+        """
 
     def _post_init_(self) -> None:
-        # intended to be overwritten
-        # apply transformations after instantiation
-        # note: it applies both when instantiating manually documents, and when documents are instantiated while
-        # deserializing ElasticSearch search response
-        pass
+        """Intended to be overwritten.
+        Apply transformations after document instantiation. Executed both when manually instantiating documents, and
+        when documents are instantiated while deserializing ElasticSearch search response.
+        """
 
     def _pre_save_op_(self) -> None:
-        # intended to be overwritten
-        # apply transformations before any persisting operation (create / index / update)
-        # for instance to update 'last_updated_at' date
-        pass
+        """Intended to be overwritten.
+        Apply transformations before any persisting operation (create / index / update).
+
+        Example: update 'last_updated_at' date.
+        """
 
     def _to_dict_(self, with_empty_keys: bool = False) -> Dict[str, Any]:
+        """
+        Serialize document as a json-compatible python dict.
+
+        :param with_empty_keys: if True, empty field will be serialized with `None` value.
+        """
         d: Dict[str, Any] = {}
         k: str
         field: Field
@@ -80,6 +169,14 @@ def _to_dict_(self, with_empty_keys: bool = False) -> Dict[str, Any]:
     def _from_dict_(
         cls, source: Dict, strict: bool = True, path: str = ""
     ) -> "DocumentSource":
+        """
+        Deserialize document source into a Document instance.
+
+        :param source: document source (python dict) to deserialize.
+        :param strict: if True, check that fields declared with `multiple=True` or `multiple=False` have the intended
+        shape (fields with `multiple=True` are expected to contain a list or values, fields with `multiple=False` are
+        expected to contain a single value.)
+        """
         doc = cls()
         k: str
         field: Field
@@ -121,4 +218,7 @@ def _from_dict_(
 
 
 class InnerDocSource(DocumentSource):
-    pass
+
+    """
+    Common class for inner documents like Object or Nested
+    """
diff --git a/pandagg/index.py b/pandagg/index.py
index 1e1b6235..fb951a74 100644
--- a/pandagg/index.py
+++ b/pandagg/index.py
@@ -1,17 +1,17 @@
 # adapted from elasticsearch-dsl-py
 
 import dataclasses
-from itertools import chain
 from copy import deepcopy
-from typing import Optional, Any, List, Dict, Tuple, Union, Iterator, Iterable
-from typing_extensions import TypedDict, Literal
+from itertools import chain
+from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
 
 from elasticsearch import Elasticsearch, helpers
+from typing_extensions import Literal, TypedDict
 
 from pandagg import Mappings, Search
-from pandagg.document import DocumentSource, DocumentMeta
+from pandagg.document import DocumentMeta, DocumentSource
 from pandagg.tree.mappings import MappingsDictOrNode
-from pandagg.types import SettingsDict, IndexAliases, DocSource, Action, OpType
+from pandagg.types import Action, DocSource, IndexAliases, OpType, SettingsDict
 from pandagg.utils import _cast_pre_save_source, get_action_modifier, is_subset
 
 
@@ -23,6 +23,11 @@ class Template(TypedDict, total=False):
 
 @dataclasses.dataclass
 class DocumentBulkWriter:
+
+    """Class responsible for performing document operations by batches.
+    Not intended to be used directly (it is used within `DeclarativeIndex`).
+    """
+
     _index: "DeclarativeIndex"
     _operations: Iterator[Action] = dataclasses.field(
         init=False, default_factory=lambda: iter([])
@@ -158,6 +163,15 @@ def _deepcopy_mutable_attrs(attrs: Dict[str, Any], attrs_names: List[str]) -> No
 
 class IndexMeta(type):
 
+    """DeclarativeIndex Metaclass.
+
+    - ensure `DeclarativeIndex` has a `name`
+    - ensure Mappings declaration is valid, and that `mappings` and `document` do not conflict
+    - build `Mappings` instance into `_mappings` attribute, based on `mappings` if provided, else on `document` if
+    provided.
+    - deepcopies `mappings` and `settings` attributes so that they cannot be mutated by side effects.
+    """
+
     # global flag to apply changes to descendants of DeclarativeIndex class (and not the abstract class itself)
     _abstract_index_initialized = False
 
@@ -288,6 +302,58 @@ def exists(self, **kwargs: Any) -> bool:
 
 class DeclarativeIndex(metaclass=IndexMeta):
 
+    """Express an index structure. Provide methods to persist this structure to the cluster, and to perform queries on
+    it.
+
+    The only compulsory class attribute is the index name.
+    Index settings and aliases can simply be declared via `settings` and `aliases` attributes.
+    Mappings though can be declared by using `mappings` and/or `document` attributes.
+
+    - `mappings` attribute more strictly sticks to Elasticsearch syntax, and take precedence over `document` attribute.
+    - `document` attribute accepts a `DocumentSource` class, and provide ORM capabilities, see `DocumentSource`.
+
+    Example of index declaration::
+
+        from pandagg.index import DeclarativeIndex
+
+        class Inspection(DocumentSource):
+            name = Text()
+            borough = Keyword()
+            score = Integer()
+            location = GeoPoint()
+            inspection_date = Date(format="MM/dd/yyyy")
+
+
+        class NYCRestaurants(DeclarativeIndex):
+            name = "nyc-restaurants"
+            document = Inspection
+            mappings = {
+                "properties": {
+                    "name": {"type": "text"},
+                    "borough": {"type": "keyword"},
+                    "score": {"type": "integer"},
+                    "location": {"type": "geo_point"},
+                    "inspection_date": {"type": "date", "format": "MM/dd/yyyy"},
+                }
+            }
+            settings = {"number_of_shards": 2}
+
+    This declaration can be used to persist this expected state to the cluster
+
+        from elasticsearch import Elasticsearch
+        client = Elasticsearch()
+        index = NYCRestaurants(client=client)
+
+        if not index.exists():
+            index.save()
+
+    Search this index using `search` method.
+
+        >>> index.search().execute()
+        <Response> took 3ms, success: True, total result >=10000, contains 10 hits
+
+    """
+
     name: str
     mappings: Optional[MappingsDictOrNode] = None
     settings: Optional[SettingsDict] = None
@@ -322,10 +388,17 @@ def _get_connection(self) -> Elasticsearch:
 
     def search(
         self,
+        deserialize_source: bool = False,
         nested_autocorrect: bool = False,
         repr_auto_execute: bool = False,
-        deserialize_source: bool = False,
     ) -> Search:
+        """Return a Search instance:
+        :param deserialize_source: if True, hits sources are deserialized into `DocumentSource` instances (based on
+        class provided in `DeclarativeIndex` `document` attribute).
+        :param nested_autocorrect: if True, missing nested clauses will automatically be injected.
+        :param repr_auto_execute: intended for interactive usage only, if True automatically performs a request whil
+        :rtype: Search
+        """
         if deserialize_source is True and self.document is None:
             raise ValueError(
                 "'%s' DeclarativeIndex doesn't have any declared document to deserialize hits' sources"
@@ -348,7 +421,7 @@ def create(self, **kwargs: Any) -> Any:
         ``Elasticsearch.indices.create`` unchanged.
         """
         return self._get_connection().indices.create(
-            index=self.name, body=self.to_dict(), **kwargs
+            index=self.name, **self.to_dict(), **kwargs
         )
 
     def is_closed(self) -> bool:
diff --git a/pandagg/interactive/_field_agg_factory.py b/pandagg/interactive/_field_agg_factory.py
index c9c0dfeb..45af19b7 100644
--- a/pandagg/interactive/_field_agg_factory.py
+++ b/pandagg/interactive/_field_agg_factory.py
@@ -1,10 +1,9 @@
 import dataclasses
-from typing import List, Type, Any, Callable, Dict
-
-from pandagg.search import Search
+from typing import Any, Callable, Dict, List, Type
 
 from pandagg.node.aggs.abstract import AggClause, BucketAggClause
 from pandagg.node.types import MAPPING_TYPES
+from pandagg.search import Search
 from pandagg.types import FieldType
 
 
diff --git a/pandagg/interactive/mappings.py b/pandagg/interactive/mappings.py
index 86f167f0..84a48b68 100644
--- a/pandagg/interactive/mappings.py
+++ b/pandagg/interactive/mappings.py
@@ -1,13 +1,12 @@
 import json
-from typing import Optional, List
+from typing import List, Optional
 
 from elasticsearch import Elasticsearch
-
 from lighttree import TreeBasedObj
 from lighttree.node import NodeId
 
-from pandagg.tree.mappings import Mappings
 from pandagg.interactive._field_agg_factory import field_classes_per_name
+from pandagg.tree.mappings import Mappings
 from pandagg.utils import DSLMixin
 
 
diff --git a/pandagg/mappings.py b/pandagg/mappings.py
index 1fb8e616..1385f1e3 100644
--- a/pandagg/mappings.py
+++ b/pandagg/mappings.py
@@ -1,60 +1,60 @@
-from pandagg.tree.mappings import Mappings
 from pandagg.interactive.mappings import IMappings
 from pandagg.node.mappings.field_datatypes import (
-    IpRange,
-    Text,
-    Keyword,
-    ConstantKeyword,
-    WildCard,
-    Long,
-    Integer,
-    Short,
+    IP,
+    Alias,
+    Binary,
+    Boolean,
     Byte,
-    Double,
-    HalfFloat,
-    ScaledFloat,
+    Completion,
+    ConstantKeyword,
     Date,
     DateNanos,
-    Boolean,
-    Binary,
-    IntegerRange,
+    DateRange,
+    DenseVector,
+    Double,
+    DoubleRange,
+    Flattened,
     Float,
     FloatRange,
-    LongRange,
-    DoubleRange,
-    DateRange,
-    Object,
-    Nested,
     GeoPoint,
     GeoShape,
-    IP,
-    Completion,
-    TokenCount,
-    MapperMurMur3,
+    HalfFloat,
+    Histogram,
+    Integer,
+    IntegerRange,
+    IpRange,
+    Join,
+    Keyword,
+    Long,
+    LongRange,
     MapperAnnotatedText,
+    MapperMurMur3,
+    Nested,
+    Object,
     Percolator,
-    Join,
     RankFeature,
     RankFeatures,
-    DenseVector,
-    SparseVector,
+    ScaledFloat,
     SearchAsYouType,
-    Alias,
-    Flattened,
     Shape,
-    Histogram,
+    Short,
+    SparseVector,
+    Text,
+    TokenCount,
+    WildCard,
 )
 from pandagg.node.mappings.meta_fields import (
-    Index,
-    Type,
-    Id,
     FieldNames,
-    Source,
-    Size,
+    Id,
     Ignored,
-    Routing,
+    Index,
     Meta,
+    Routing,
+    Size,
+    Source,
+    Type,
 )
+from pandagg.tree.mappings import Mappings
 
 __all__ = [
     "Mappings",
diff --git a/pandagg/node/__init__.py b/pandagg/node/__init__.py
index 2b4a6567..a40c40d6 100644
--- a/pandagg/node/__init__.py
+++ b/pandagg/node/__init__.py
@@ -1,4 +1,4 @@
 # make sure all classes are loaded and available through metaclass
-import pandagg.node.query
 import pandagg.node.aggs
 import pandagg.node.mappings
+import pandagg.node.query
diff --git a/pandagg/node/_node.py b/pandagg/node/_node.py
index 583b7605..fcc3355c 100644
--- a/pandagg/node/_node.py
+++ b/pandagg/node/_node.py
@@ -1,7 +1,8 @@
+from typing import Any, Dict
+
 from lighttree import AutoIdNode as OriginalNode
 
 from pandagg.utils import DSLMixin
-from typing import Dict, Any
 
 
 class Node(DSLMixin, OriginalNode):
diff --git a/pandagg/node/aggs/abstract.py b/pandagg/node/aggs/abstract.py
index 64f153a7..5feda0be 100644
--- a/pandagg/node/aggs/abstract.py
+++ b/pandagg/node/aggs/abstract.py
@@ -1,21 +1,20 @@
 import json
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union
 
 from pandagg.node._node import Node
-from typing import Optional, List, Union, Dict, Any, Tuple, Iterator, Type
-
 from pandagg.types import (
-    Meta,
-    BucketKey,
-    BucketDict,
     AggClauseDict,
-    AggType,
-    Script,
-    GapPolicy,
-    AggName,
     AggClauseResponseDict,
-    BucketsWrapperDict,
+    AggName,
+    AggType,
+    BucketDict,
+    BucketKey,
     BucketKeyAtom,
     BucketsDict,
+    BucketsWrapperDict,
+    GapPolicy,
+    Meta,
+    Script,
 )
 
 
diff --git a/pandagg/node/aggs/bucket.py b/pandagg/node/aggs/bucket.py
index 07412c89..3b90555c 100644
--- a/pandagg/node/aggs/bucket.py
+++ b/pandagg/node/aggs/bucket.py
@@ -1,10 +1,10 @@
 # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket.html
 
-from typing import Any, Optional, Dict, Union, List
+from typing import Any, Dict, List, Optional, Union
 
-from pandagg.node.types import NUMERIC_TYPES
 from pandagg.node.aggs.abstract import MultipleBucketAgg, UniqueBucketAgg
-from pandagg.types import Meta, QueryClauseDict, RangeDict, DistanceType, ExecutionHint
+from pandagg.node.types import NUMERIC_TYPES
+from pandagg.types import DistanceType, ExecutionHint, Meta, QueryClauseDict, RangeDict
 
 
 class Global(UniqueBucketAgg):
@@ -39,6 +39,8 @@ def __init__(
 
 
 class MatchAll(Filter):
+    KEY = "match_all"
+
     def __init__(self, **body: Any):
         super(MatchAll, self).__init__(filter={"match_all": {}}, **body)
 
diff --git a/pandagg/node/aggs/composite.py b/pandagg/node/aggs/composite.py
index 159e1e3f..91e22618 100644
--- a/pandagg/node/aggs/composite.py
+++ b/pandagg/node/aggs/composite.py
@@ -1,14 +1,16 @@
-from .abstract import BucketAggClause
-from typing import Optional, Any, Dict, List, Iterator, Tuple
+from typing import Any, Dict, Iterator, List, Optional, Tuple
+
 from pandagg.types import (
     AfterKey,
-    CompositeSource,
-    BucketDict,
     AggClauseResponseDict,
     AggName,
+    BucketDict,
     CompositeBucketKey,
+    CompositeSource,
 )
 
+from .abstract import BucketAggClause
+
 
 class Composite(BucketAggClause):
 
diff --git a/pandagg/node/aggs/metric.py b/pandagg/node/aggs/metric.py
index 61183436..2eb8f693 100644
--- a/pandagg/node/aggs/metric.py
+++ b/pandagg/node/aggs/metric.py
@@ -1,7 +1,7 @@
 from typing import Any, List
 
-from pandagg.node.types import NUMERIC_TYPES
 from pandagg.node.aggs.abstract import FieldOrScriptMetricAgg, MetricAgg
+from pandagg.node.types import NUMERIC_TYPES
 
 
 class TopHits(MetricAgg):
diff --git a/pandagg/node/mappings/abstract.py b/pandagg/node/mappings/abstract.py
index 71efc6a0..05dcf187 100644
--- a/pandagg/node/mappings/abstract.py
+++ b/pandagg/node/mappings/abstract.py
@@ -1,14 +1,13 @@
 from __future__ import annotations
 
 import json
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Type, Union
 
 from pandagg.node._node import Node
-from typing import Optional, Any, Tuple, Dict, Type, Union, TYPE_CHECKING
-
 from pandagg.types import FieldType
 
 if TYPE_CHECKING:
-    from pandagg.document import DocumentSource, DocumentMeta
+    from pandagg.document import DocumentMeta, DocumentSource
 
 
 class Field(Node):
@@ -72,7 +71,7 @@ class ComplexField(Field):
     def __init__(
         self,
         properties: Optional[Union[Dict, Type[DocumentSource]]] = None,
-        **body: Any
+        **body: Any,
     ) -> None:
         properties = properties or {}
         if not isinstance(properties, dict):
diff --git a/pandagg/node/query/__init__.py b/pandagg/node/query/__init__.py
index d9dedddf..90e2c4e4 100644
--- a/pandagg/node/query/__init__.py
+++ b/pandagg/node/query/__init__.py
@@ -4,6 +4,7 @@
 from .full_text import *
 from .geo import *
 from .joining import *
+from .match_all import *
 from .shape import *
 from .span import *
 from .specialized import *
diff --git a/pandagg/node/query/abstract.py b/pandagg/node/query/abstract.py
index 10a20db6..ff785ea4 100644
--- a/pandagg/node/query/abstract.py
+++ b/pandagg/node/query/abstract.py
@@ -1,9 +1,8 @@
 import json
+from typing import Any, Dict, List, Optional, Tuple, Type, Union
 
 from pandagg.node._node import Node
-from typing import Optional, Union, Dict, Any, Tuple, List, Type
-
-from pandagg.types import QueryType, QueryClauseDict
+from pandagg.types import QueryClauseDict, QueryType
 
 
 class QueryClause(Node):
diff --git a/pandagg/node/query/compound.py b/pandagg/node/query/compound.py
index 0af1b6a1..590f7046 100644
--- a/pandagg/node/query/compound.py
+++ b/pandagg/node/query/compound.py
@@ -1,7 +1,7 @@
-from typing import List, Optional, Any
+from typing import Any, List, Optional
 
-from pandagg.types import QueryName
 from pandagg.node.query.abstract import QueryClause
+from pandagg.types import QueryName
 
 
 class CompoundClause(QueryClause):
diff --git a/pandagg/node/query/full_text.py b/pandagg/node/query/full_text.py
index b0e9f689..76972f69 100644
--- a/pandagg/node/query/full_text.py
+++ b/pandagg/node/query/full_text.py
@@ -1,4 +1,4 @@
-from .abstract import LeafQueryClause, KeyFieldQueryClause, MultiFieldsQueryClause
+from .abstract import KeyFieldQueryClause, LeafQueryClause, MultiFieldsQueryClause
 
 
 class Intervals(KeyFieldQueryClause):
diff --git a/pandagg/node/query/geo.py b/pandagg/node/query/geo.py
index 89be4ff6..b29c8d27 100644
--- a/pandagg/node/query/geo.py
+++ b/pandagg/node/query/geo.py
@@ -1,8 +1,9 @@
 from typing import Any, Optional, Tuple
 
-from .abstract import KeyFieldQueryClause, AbstractSingleFieldQueryClause
 from pandagg.types import DistanceType, ValidationMethod
 
+from .abstract import AbstractSingleFieldQueryClause, KeyFieldQueryClause
+
 
 class GeoBoundingBox(KeyFieldQueryClause):
     KEY = "geo_bounding_box"
diff --git a/pandagg/node/query/match_all.py b/pandagg/node/query/match_all.py
new file mode 100644
index 00000000..f4e12582
--- /dev/null
+++ b/pandagg/node/query/match_all.py
@@ -0,0 +1,18 @@
+from typing import Any
+
+from pandagg.node.query import LeafQueryClause
+
+
+class MatchAll(LeafQueryClause):
+
+    KEY = "match_all"
+
+    def __init__(self, **body: Any) -> None:
+        super(MatchAll, self).__init__(**body)
+
+
+class MatchNone(LeafQueryClause):
+    KEY = "match_none"
+
+    def __init__(self, **body: Any) -> None:
+        super(MatchNone, self).__init__(**body)
diff --git a/pandagg/node/query/specialized.py b/pandagg/node/query/specialized.py
index 9ce6829d..360098e2 100644
--- a/pandagg/node/query/specialized.py
+++ b/pandagg/node/query/specialized.py
@@ -1,4 +1,4 @@
-from .abstract import LeafQueryClause, FlatFieldQueryClause, MultiFieldsQueryClause
+from .abstract import FlatFieldQueryClause, LeafQueryClause, MultiFieldsQueryClause
 
 
 class DistanceFeature(FlatFieldQueryClause):
diff --git a/pandagg/node/query/term_level.py b/pandagg/node/query/term_level.py
index 4c39d9d9..20b4f2de 100644
--- a/pandagg/node/query/term_level.py
+++ b/pandagg/node/query/term_level.py
@@ -1,11 +1,12 @@
-from typing import Optional, Any, Tuple, List, Union
+from typing import Any, List, Optional, Tuple, Union
+
+from pandagg.types import QueryName
 
 from .abstract import (
-    LeafQueryClause,
     AbstractSingleFieldQueryClause,
     KeyFieldQueryClause,
+    LeafQueryClause,
 )
-from pandagg.types import QueryName
 
 
 class Exists(LeafQueryClause):
diff --git a/pandagg/query.py b/pandagg/query.py
index f3f9b187..a0f2d878 100644
--- a/pandagg/query.py
+++ b/pandagg/query.py
@@ -1,37 +1,25 @@
-from pandagg.node.query.shape import Shape
-from pandagg.node.query.term_level import (
-    Exists,
-    Fuzzy,
-    Ids,
-    Prefix,
-    Range,
-    Regexp,
-    Term,
-    Terms,
-    TermsSet,
-    Type,
-    Wildcard,
+from pandagg.node.query.compound import (
+    Bool,
+    Boosting,
+    ConstantScore,
+    DisMax,
+    FunctionScore,
 )
 from pandagg.node.query.full_text import (
+    Common,
     Intervals,
     Match,
     MatchBoolPrefix,
     MatchPhrase,
     MatchPhrasePrefix,
     MultiMatch,
-    Common,
     QueryString,
     SimpleQueryString,
 )
-from pandagg.node.query.compound import (
-    Bool,
-    Boosting,
-    ConstantScore,
-    FunctionScore,
-    DisMax,
-)
-from pandagg.node.query.joining import Nested, HasChild, HasParent, ParentId
-from pandagg.node.query.geo import GeoShape, GeoPolygone, GeoDistance, GeoBoundingBox
+from pandagg.node.query.geo import GeoBoundingBox, GeoDistance, GeoPolygone, GeoShape
+from pandagg.node.query.joining import HasChild, HasParent, Nested, ParentId
+from pandagg.node.query.match_all import MatchAll, MatchNone
+from pandagg.node.query.shape import Shape
 from pandagg.node.query.specialized import (
     DistanceFeature,
     MoreLikeThis,
@@ -40,7 +28,20 @@
     Script,
     Wrapper,
 )
-from pandagg.node.query.specialized_compound import ScriptScore, PinnedQuery
+from pandagg.node.query.specialized_compound import PinnedQuery, ScriptScore
+from pandagg.node.query.term_level import (
+    Exists,
+    Fuzzy,
+    Ids,
+    Prefix,
+    Range,
+    Regexp,
+    Term,
+    Terms,
+    TermsSet,
+    Type,
+    Wildcard,
+)
 from pandagg.tree.query import Query
 
 __all__ = [
@@ -78,6 +79,9 @@
     "HasParent",
     "HasChild",
     "ParentId",
+    # match_all
+    "MatchAll",
+    "MatchNone",
     # shape
     "Shape",
     # geo
diff --git a/pandagg/response.py b/pandagg/response.py
index 28daa5d3..4105ec8b 100644
--- a/pandagg/response.py
+++ b/pandagg/response.py
@@ -2,47 +2,48 @@
 
 import copy
 import dataclasses
-from typing_extensions import Literal, TypedDict
 from typing import (
-    Iterator,
-    Optional,
-    List,
     TYPE_CHECKING,
+    Any,
     Dict,
+    Iterator,
+    List,
+    Optional,
     Tuple,
     Union,
     overload,
-    Any,
 )
 
 from elasticsearch import Elasticsearch
 from lighttree.node import NodeId
+from typing_extensions import Literal, TypedDict
 
-from pandagg.query import Query
 from pandagg.aggs import Aggs, Composite
-from pandagg.node.aggs.abstract import UniqueBucketAgg, MetricAgg, Root, AggClause
+from pandagg.node.aggs.abstract import AggClause, MetricAgg, Root, UniqueBucketAgg
 from pandagg.node.aggs.bucket import Nested, ReverseNested
+from pandagg.query import Query
 from pandagg.types import (
-    HitDict,
-    HitsDict,
-    DocSource,
-    TotalDict,
-    SearchResponseDict,
-    ShardsDict,
-    AggregationsResponseDict,
     AggName,
-    ProfileDict,
+    AggregationsResponseDict,
     BucketDict,
     BucketKey,
-    CompositeBucketKey,
     BucketKeyAtom,
+    CompositeBucketKey,
+    DocSource,
+    HitDict,
+    HitsDict,
+    ProfileDict,
+    SearchResponseDict,
+    ShardsDict,
+    TotalDict,
 )
 
 if TYPE_CHECKING:
     import pandas as pd
-    from pandagg.search import Search
+
     from pandagg import DocumentMeta
     from pandagg.document import DocumentSource
+    from pandagg.search import Search
 
 
 GroupingKeysDict = Dict[AggName, BucketKeyAtom]
diff --git a/pandagg/search.py b/pandagg/search.py
index 6ac575dd..f6b5a4e5 100644
--- a/pandagg/search.py
+++ b/pandagg/search.py
@@ -5,15 +5,15 @@
 import copy
 import json
 from typing import (
-    Optional,
-    Union,
-    Tuple,
-    List,
+    TYPE_CHECKING,
     Any,
-    TypeVar,
     Dict,
     Iterator,
-    TYPE_CHECKING,
+    List,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
 )
 
 from elasticsearch import Elasticsearch
@@ -21,31 +21,32 @@
 
 from pandagg.node.aggs.abstract import TypeOrAgg
 from pandagg.query import Bool
-from pandagg.response import SearchResponse, Hit, Aggregations
-from pandagg.tree.mappings import _mappings, Mappings
+from pandagg.response import Aggregations, Hit, SearchResponse
+from pandagg.tree.aggs import Aggs, AggsDictOrNode
+from pandagg.tree.mappings import Mappings, _mappings
 from pandagg.tree.query import (
-    Query,
     ADD,
-    TypeOrQuery,
     InsertionModes,
+    Query,
     SingleOrMultipleQueryClause,
+    TypeOrQuery,
 )
-from pandagg.tree.aggs import Aggs, AggsDictOrNode
 from pandagg.types import (
-    MappingsDict,
-    QueryName,
-    ClauseBody,
+    AfterKey,
     AggName,
-    SearchResponseDict,
+    BucketDict,
+    ClauseBody,
     DeleteByQueryResponse,
+    MappingsDict,
+    QueryName,
     SearchDict,
-    BucketDict,
-    AfterKey,
+    SearchResponseDict,
 )
 from pandagg.utils import DSLMixin
 
 if TYPE_CHECKING:
     import pandas as pd
+
     from pandagg.document import DocumentMeta
 
 # because Search.bool method shadows bool typing
@@ -100,7 +101,7 @@ def index(self: T, *index: Union[str, List[str], Tuple[str]]) -> T:
         """
         Set the index for the search. If called empty it will remove all information.
 
-        Example:
+        Example::
 
             s = Search()
             s = s.index('twitter-2015.01.01', 'twitter-2015.01.02')
@@ -200,7 +201,7 @@ def query(
         on: Optional[QueryName] = None,
         mode: InsertionModes = ADD,
         compound_param: str = None,
-        **body: Any
+        **body: Any,
     ) -> "Search":
         s = self._clone()
         s._query = s._query.query(
@@ -209,7 +210,7 @@ def query(
             on=on,
             mode=mode,
             compound_param=compound_param,
-            **body
+            **body,
         )
         return s
 
@@ -224,7 +225,7 @@ def bool(
         insert_below: Optional[QueryName] = None,
         on: Optional[QueryName] = None,
         mode: InsertionModes = ADD,
-        **body: Any
+        **body: Any,
     ) -> "Search":
         s = self._clone()
         s._query = s._query.bool(
@@ -235,7 +236,7 @@ def bool(
             insert_below=insert_below,
             on=on,
             mode=mode,
-            **body
+            **body,
         )
         return s
 
@@ -248,7 +249,7 @@ def filter(
         on: Optional[QueryName] = None,
         mode: InsertionModes = ADD,
         bool_body: ClauseBody = None,
-        **body: Any
+        **body: Any,
     ) -> "Search":
         s = self._clone()
         s._query = s._query.filter(
@@ -257,7 +258,7 @@ def filter(
             on=on,
             mode=mode,
             bool_body=bool_body,
-            **body
+            **body,
         )
         return s
 
@@ -270,7 +271,7 @@ def must_not(
         on: Optional[QueryName] = None,
         mode: InsertionModes = ADD,
         bool_body: ClauseBody = None,
-        **body: Any
+        **body: Any,
     ) -> "Search":
         s = self._clone()
         s._query = s._query.must_not(
@@ -279,7 +280,7 @@ def must_not(
             on=on,
             mode=mode,
             bool_body=bool_body,
-            **body
+            **body,
         )
         return s
 
@@ -292,7 +293,7 @@ def should(
         on: Optional[QueryName] = None,
         mode: InsertionModes = ADD,
         bool_body: ClauseBody = None,
-        **body: Any
+        **body: Any,
     ) -> "Search":
         s = self._clone()
         s._query = s._query.should(
@@ -301,7 +302,7 @@ def should(
             on=on,
             mode=mode,
             bool_body=bool_body,
-            **body
+            **body,
         )
         return s
 
@@ -314,7 +315,7 @@ def must(
         on: Optional[QueryName] = None,
         mode: InsertionModes = ADD,
         bool_body: ClauseBody = None,
-        **body: Any
+        **body: Any,
     ) -> "Search":
         s = self._clone()
         s._query = s._query.must(
@@ -323,7 +324,7 @@ def must(
             on=on,
             mode=mode,
             bool_body=bool_body,
-            **body
+            **body,
         )
         return s
 
@@ -336,7 +337,7 @@ def exclude(
         on: Optional[QueryName] = None,
         mode: InsertionModes = ADD,
         bool_body: ClauseBody = None,
-        **body: Any
+        **body: Any,
     ) -> "Search":
         """Must not wrapped in filter context."""
         s = self._clone()
@@ -356,7 +357,7 @@ def post_filter(
         on: Optional[QueryName] = None,
         mode: InsertionModes = ADD,
         compound_param: str = None,
-        **body: Any
+        **body: Any,
     ) -> "Search":
         s = self._clone()
         s._post_filter = s._post_filter.query(
@@ -365,7 +366,7 @@ def post_filter(
             on=on,
             mode=mode,
             compound_param=compound_param,
-            **body
+            **body,
         )
         return s
 
@@ -375,7 +376,7 @@ def agg(
         type_or_agg: Optional[TypeOrAgg] = None,
         insert_below: Optional[AggName] = None,
         at_root: bool_ = False,
-        **body: Any
+        **body: Any,
     ) -> "Search":
         s = self._clone()
         s._aggs = s._aggs.agg(
@@ -383,7 +384,7 @@ def agg(
             type_or_agg=type_or_agg,
             insert_below=insert_below,
             at_root=at_root,
-            **body
+            **body,
         )
         return s
 
@@ -407,7 +408,7 @@ def groupby(
         type_or_agg: Optional[TypeOrAgg] = None,
         insert_below: Optional[AggName] = None,
         at_root: bool_ = False,
-        **body: Any
+        **body: Any,
     ) -> "Search":
         s = self._clone()
         s._aggs = s._aggs.groupby(
@@ -415,7 +416,7 @@ def groupby(
             type_or_agg=type_or_agg,
             insert_below=insert_below,
             at_root=at_root,
-            **body
+            **body,
         )
         return s
 
@@ -791,7 +792,7 @@ def execute(self) -> SearchResponse:
         the data.
         """
         es = self._get_connection()
-        raw_data = es.search(index=self._index, body=self.to_dict())
+        raw_data = es.search(index=self._index, **self.to_dict())  # type: ignore
         return SearchResponse(data=raw_data, _search=self)  # type: ignore
 
     def scan_composite_agg(self, size: int) -> Iterator[BucketDict]:
@@ -803,6 +804,7 @@ def scan_composite_agg(self, size: int) -> Iterator[BucketDict]:
         buckets: List[BucketDict] = r.aggregations.data[a_name][  # type: ignore
             "buckets"
         ]
+        yield from buckets
         after_key: AfterKey = r.aggregations.data[a_name]["after_key"]  # type: ignore
 
         init: bool = True
@@ -812,8 +814,7 @@ def scan_composite_agg(self, size: int) -> Iterator[BucketDict]:
             r = s.execute()
             agg_clause_response = r.aggregations.data[a_name]
             buckets = agg_clause_response["buckets"]  # type: ignore
-            for bucket in buckets:
-                yield bucket
+            yield from buckets
             if "after_key" in agg_clause_response:
                 after_key = agg_clause_response["after_key"]  # type: ignore
             else:
@@ -831,18 +832,18 @@ def scan_composite_agg_at_once(self, size: int) -> Aggregations:
         # artificially merge all buckets as if they were returned in a single query
         return Aggregations(_search=s, data={agg_name: {"buckets": all_buckets}})
 
-    def scan(self) -> Iterator[Hit]:
+    def scan(self, **kwargs: Any) -> Iterator[Hit]:
         """
         Turn the search into a scan search and return a generator that will
         iterate over all the documents matching the query.
 
-        Use ``params`` method to specify any additional arguments you with to
-        pass to the underlying ``scan`` helper from ``elasticsearch-py`` -
+        Use ``kwargs`` to specify any additional arguments to pass to the underlying ``scan`` helper from
+        ``elasticsearch-py`` -
         https://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.scan
 
         """
         es = self._get_connection()
-        for hit in scan(es, query=self.to_dict(), index=self._index):
+        for hit in scan(es, query=self.to_dict(), index=self._index, **kwargs):
             yield Hit(hit, _document_class=self._document_class)
 
     def delete(self) -> DeleteByQueryResponse:
diff --git a/pandagg/tree/__init__.py b/pandagg/tree/__init__.py
index b4364d8e..644e958d 100644
--- a/pandagg/tree/__init__.py
+++ b/pandagg/tree/__init__.py
@@ -1,3 +1,3 @@
 import pandagg.tree.aggs
-import pandagg.tree.query
 import pandagg.tree.mappings
+import pandagg.tree.query
diff --git a/pandagg/tree/aggs.py b/pandagg/tree/aggs.py
index c751a71b..f83a0911 100644
--- a/pandagg/tree/aggs.py
+++ b/pandagg/tree/aggs.py
@@ -1,23 +1,23 @@
 import json
-from typing import Optional, Union, Any, Dict, Tuple
+from typing import Any, Dict, Optional, Tuple, Union
 
 from lighttree import Key, Tree
 from lighttree.node import NodeId
-from pandagg.node.aggs import Composite
-from pandagg.tree._tree import TreeReprMixin
-from pandagg.tree.mappings import _mappings, Mappings, MappingsDict
 
+from pandagg.node.aggs import Composite
 from pandagg.node.aggs.abstract import (
+    A,
+    AggClause,
+    AggClauseDict,
     BucketAggClause,
     Root,
-    A,
     TypeOrAgg,
-    AggClauseDict,
-    AggClause,
 )
 from pandagg.node.aggs.bucket import Nested, ReverseNested
 from pandagg.node.aggs.pipeline import BucketSelector, BucketSort
-from pandagg.types import AggName, NamedAggsDict, AfterKey
+from pandagg.tree._tree import TreeReprMixin
+from pandagg.tree.mappings import Mappings, MappingsDict, _mappings
+from pandagg.types import AfterKey, AggName, NamedAggsDict
 
 # {"my_agg": {"terms": "some_field"}} or {"my_agg": Terms(field="some_field")}
 AggsDictOrNode = Dict[AggName, Union[AggClauseDict, AggClause]]
diff --git a/pandagg/tree/mappings.py b/pandagg/tree/mappings.py
index d9a6a3b1..f07fb1da 100644
--- a/pandagg/tree/mappings.py
+++ b/pandagg/tree/mappings.py
@@ -1,21 +1,20 @@
 from __future__ import annotations
 
-from typing_extensions import TypedDict
-from typing import Optional, Union, Any, List, Dict, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 
-from lighttree.node import NodeId
 from lighttree import Tree
-
-from pandagg.node.aggs.abstract import AggClause
-from pandagg.node.mappings import Object, Nested
-from pandagg.node.mappings.abstract import Field, RegularField, ComplexField, Root
+from lighttree.node import NodeId
+from typing_extensions import TypedDict
 
 from pandagg.exceptions import (
     AbsentMappingFieldError,
     InvalidOperationMappingFieldError,
 )
+from pandagg.node.aggs.abstract import AggClause
+from pandagg.node.mappings import Nested, Object
+from pandagg.node.mappings.abstract import ComplexField, Field, RegularField, Root
 from pandagg.tree._tree import TreeReprMixin
-from pandagg.types import DocSource, MappingsDict, FieldName, FieldClauseDict
+from pandagg.types import DocSource, FieldClauseDict, FieldName, MappingsDict
 
 if TYPE_CHECKING:
     from pandagg.document import DocumentSource
@@ -45,7 +44,7 @@ def __init__(
         self,
         properties: Optional[FieldPropertiesDictOrNode] = None,
         dynamic: Optional[bool] = None,
-        **body: Any
+        **body: Any,
     ) -> None:
         super(Mappings, self).__init__()
         # a Mappings always has a root after __init__
@@ -119,7 +118,7 @@ def validate_agg_clause(self, agg_clause: AggClause, exc: bool = True) -> bool:
             nid = self.get_node_id_by_path(agg_field.split("."))
         except Exception:
             raise AbsentMappingFieldError(
-                u"Agg of type <%s> on non-existing field <%s>."
+                "Agg of type <%s> on non-existing field <%s>."
                 % (agg_clause.KEY, agg_field)
             )
         _, field_node = self.get(nid)
@@ -129,7 +128,7 @@ def validate_agg_clause(self, agg_clause: AggClause, exc: bool = True) -> bool:
             if not exc:
                 return False
             raise InvalidOperationMappingFieldError(
-                u"Agg of type <%s> not possible on field of type <%s>."
+                "Agg of type <%s> not possible on field of type <%s>."
                 % (agg_clause.KEY, field_type)
             )
         return True
@@ -156,7 +155,7 @@ def mapping_type_of_field(self, field_path: str) -> str:
             nid = self.get_node_id_by_path(field_path.split("."))
         except ValueError:
             raise AbsentMappingFieldError(
-                u"<%s field is not present in mappings>" % field_path
+                "<%s field is not present in mappings>" % field_path
             )
         _, node = self.get(nid)
         return node.KEY
diff --git a/pandagg/tree/query.py b/pandagg/tree/query.py
index 73a72a63..8e70df9f 100644
--- a/pandagg/tree/query.py
+++ b/pandagg/tree/query.py
@@ -1,24 +1,24 @@
 import json
-from typing import Optional, Union, Any, List, Dict
-from typing_extensions import Literal
+from typing import Any, Dict, List, Optional, Union
 
 from lighttree import Key, Tree
 from lighttree.node import NodeId
+from typing_extensions import Literal
+
 from pandagg._decorators import Substitution
 from pandagg.node.query._parameter_clause import ParentParameterClause
 from pandagg.node.query.abstract import (
-    QueryClause,
     LeafQueryClause,
     Q,
+    QueryClause,
     QueryClauseDict,
     QueryType,
     TypeOrQuery_,
 )
-from pandagg.node.query.compound import CompoundClause, Bool
+from pandagg.node.query.compound import Bool, CompoundClause
 from pandagg.node.query.joining import Nested
-
-from pandagg.tree.mappings import _mappings, MappingsDict, Mappings
-from pandagg.types import QueryName, ClauseBody
+from pandagg.tree.mappings import Mappings, MappingsDict, _mappings
+from pandagg.types import ClauseBody, QueryName
 
 # because a method `bool` shadows the real bool
 bool_ = bool
diff --git a/pandagg/types.py b/pandagg/types.py
index f728d195..3a315114 100644
--- a/pandagg/types.py
+++ b/pandagg/types.py
@@ -1,5 +1,6 @@
-from typing_extensions import TypedDict, Literal
-from typing import Optional, Dict, Any, List, Union
+from typing import Any, Dict, List, Optional, Union
+
+from typing_extensions import Literal, TypedDict
 
 ClauseName = str
 ClauseType = str
diff --git a/pandagg/utils.py b/pandagg/utils.py
index d80345af..ed577ead 100644
--- a/pandagg/utils.py
+++ b/pandagg/utils.py
@@ -1,9 +1,9 @@
 # adapted from https://github.com/elastic/elasticsearch-dsl-py/blob/master/elasticsearch_dsl/utils.py#L162
 from __future__ import annotations
 
-from typing import Dict, Tuple, Any, Union, Callable, Optional, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Union
 
-from pandagg.types import DocSource, Action, IndexName, OpType
+from pandagg.types import Action, DocSource, IndexName, OpType
 
 if TYPE_CHECKING:
     from pandagg.document import DocumentSource
diff --git a/setup.cfg b/setup.cfg
index 405d6f86..7a04af35 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,8 +4,8 @@ test=pytest
 [flake8]
 max-line-length = 120
 ignore =
-    E402,  # module level import not at top of file
-    W503
+    E402, # Module level import not at top of file
+    W503  # Line break occurred before a binary operator
 exclude =
     *__init__.py
 
@@ -14,3 +14,11 @@ disallow_untyped_defs = True
 
 [mypy-pandas.*]
 ignore_missing_imports = True
+
+[tool:pytest]
+filterwarnings =
+    ignore:.*Elasticsearch built-in security features are not enabled:
+
+[isort]
+profile = black
+src_paths = pandagg,tests,examples
diff --git a/setup.py b/setup.py
index 8d9837f7..c3ea6257 100644
--- a/setup.py
+++ b/setup.py
@@ -1,22 +1,21 @@
-__version__ = "0.2.3"
+__version__ = "0.2.4"
 
 import os
 
-from setuptools import setup
-from setuptools import find_packages
-
+from setuptools import find_packages, setup
 
 here = os.path.abspath(os.path.dirname(__file__))
 README = open(os.path.join(here, "README.md")).read()
 
 install_requires = [
-    "lighttree==1.3.4",
+    "lighttree==1.3.5",
     "elasticsearch>=7.0.0,<8.0.0",
     "typing_extensions",
 ]
 
 develop_requires = [
     "pre-commit",
+    "isort",
     "black",
     "coverage",
     "flake8",
@@ -27,6 +26,7 @@
     "pandas",
     "Sphinx",
     "twine",
+    "simplejson",
 ]
 
 setup(
diff --git a/tests/conftest.py b/tests/conftest.py
index 7376b4f0..c1a31645 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,16 +1,14 @@
 # adapted from elasticsearch-dsl.py
 
 import re
-
-from mock import Mock
 from unittest import SkipTest
-from pytest import fixture, skip
 
 from elasticsearch.helpers import bulk
 from elasticsearch.helpers.test import get_test_client
+from mock import Mock
+from pytest import fixture, skip
 
-
-from .test_data import TEST_GIT_DATA, create_git_index, GIT_MAPPINGS
+from .test_data import GIT_MAPPINGS, TEST_GIT_DATA, create_git_index
 
 
 @fixture(scope="session")
@@ -24,8 +22,9 @@ def client():
 @fixture
 def write_client(client):
     yield client
-    client.indices.delete("test-*", ignore=404)
-    client.indices.delete_index_template("test-template", ignore=404)
+    client.indices.delete(index="test-git", ignore=404)
+    client.indices.delete(index="test-post", ignore=404)
+    client.indices.delete_index_template(name="test-template", ignore=404)
 
 
 @fixture(scope="session")
@@ -52,13 +51,13 @@ def git_mappings():
 
 @fixture(scope="session")
 def data_client(client):
-    client.indices.delete("git", ignore=(404,))
+    client.indices.delete(index="git", ignore=(404,))
     # create mappings
     create_git_index(client, "git")
     # load data
     bulk(client, TEST_GIT_DATA, raise_on_error=True, refresh=True)
     yield client
-    client.indices.delete("git")
+    client.indices.delete(index="git")
 
 
 @fixture
@@ -109,4 +108,4 @@ def updatable_index(client):
     create_git_index(client, index)
     bulk(client, TEST_GIT_DATA, raise_on_error=True, refresh=True)
     yield index
-    client.indices.delete(index, ignore=404)
+    client.indices.delete(index=index, ignore=404)
diff --git a/tests/interactive/test_mapping.py b/tests/interactive/test_mapping.py
index 34449243..56dbd0e9 100644
--- a/tests/interactive/test_mapping.py
+++ b/tests/interactive/test_mapping.py
@@ -1,10 +1,8 @@
 from pandagg import Search
-
-from pandagg.mappings import Keyword, Text, Nested, Object, Integer
-from pandagg.tree.mappings import Mappings
 from pandagg.interactive._field_agg_factory import field_classes_per_name
 from pandagg.interactive.mappings import IMappings
-
+from pandagg.mappings import Integer, Keyword, Nested, Object, Text
+from pandagg.tree.mappings import Mappings
 from tests.testing_samples.mapping_example import MAPPINGS
 
 
diff --git a/tests/node/agg/test_bucket.py b/tests/node/agg/test_bucket.py
index 01ead4ff..5e96a80b 100644
--- a/tests/node/agg/test_bucket.py
+++ b/tests/node/agg/test_bucket.py
@@ -1,27 +1,27 @@
 from pandagg.aggs import (
-    Terms,
+    AdjacencyMatrix,
+    AutoDateHistogram,
+    Children,
+    DateHistogram,
+    DiversifiedSampler,
     Filter,
     Filters,
-    DateHistogram,
-    Nested,
-    Range,
-    Histogram,
     GeoDistance,
     GeoHashGrid,
-    AdjacencyMatrix,
-    AutoDateHistogram,
-    VariableWidthHistogram,
-    SignificantTerms,
-    RareTerms,
     GeoTileGrid,
-    IPRange,
-    Sampler,
-    DiversifiedSampler,
     Global,
-    Children,
+    Histogram,
+    IPRange,
+    MultiTerms,
+    Nested,
     Parent,
+    Range,
+    RareTerms,
+    Sampler,
+    SignificantTerms,
     SignificantText,
-    MultiTerms,
+    Terms,
+    VariableWidthHistogram,
 )
 
 
diff --git a/tests/node/mapping/test_field.py b/tests/node/mapping/test_field.py
index f05e676e..2a3a29ee 100644
--- a/tests/node/mapping/test_field.py
+++ b/tests/node/mapping/test_field.py
@@ -1,5 +1,5 @@
 from pandagg.document import DocumentSource, InnerDocSource
-from pandagg.node.mappings import Text, Keyword, Nested, Object
+from pandagg.node.mappings import Keyword, Nested, Object, Text
 from pandagg.node.mappings.abstract import Root
 
 
diff --git a/tests/node/query/test_match_all.py b/tests/node/query/test_match_all.py
new file mode 100644
index 00000000..ed7bc540
--- /dev/null
+++ b/tests/node/query/test_match_all.py
@@ -0,0 +1,20 @@
+from pandagg.node.query import MatchAll, MatchNone
+
+
+def test_match_all_clause():
+    q = MatchAll()
+    assert q.body == {}
+    assert q.to_dict() == {"match_all": {}}
+    assert q.line_repr(depth=None) == ("match_all", "")
+
+    q = MatchAll(boost=0.5)
+    assert q.body == {"boost": 0.5}
+    assert q.to_dict() == {"match_all": {"boost": 0.5}}
+    assert q.line_repr(depth=None) == ("match_all", "boost=0.5")
+
+
+def test_match_none_clause():
+    q = MatchNone()
+    assert q.body == {}
+    assert q.to_dict() == {"match_none": {}}
+    assert q.line_repr(depth=None) == ("match_none", "")
diff --git a/tests/node/query/test_specialized.py b/tests/node/query/test_specialized.py
index ac32740d..edf80ac3 100644
--- a/tests/node/query/test_specialized.py
+++ b/tests/node/query/test_specialized.py
@@ -1,10 +1,10 @@
 from pandagg.query import (
-    Wrapper,
     DistanceFeature,
     MoreLikeThis,
     Percolate,
     RankFeature,
     Script,
+    Wrapper,
 )
 
 
diff --git a/tests/node/query/test_term_level.py b/tests/node/query/test_term_level.py
index 2395464a..b6edff32 100644
--- a/tests/node/query/test_term_level.py
+++ b/tests/node/query/test_term_level.py
@@ -1,12 +1,12 @@
 from pandagg.query import (
-    Terms,
-    Term,
-    Fuzzy,
     Exists,
+    Fuzzy,
     Ids,
     Prefix,
     Range,
     Regexp,
+    Term,
+    Terms,
     TermsSet,
     Wildcard,
 )
diff --git a/tests/test_data.py b/tests/test_data.py
index c8983a77..57f3f8a3 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -1,10 +1,9 @@
 # adapated from elasticsearch-dsl.py
 
-from typing import List, Any, Dict
+from typing import Any, Dict, List
 
 from typing_extensions import TypedDict
 
-
 GIT_MAPPINGS = {
     "dynamic": False,
     "properties": {
@@ -39,34 +38,32 @@ def create_git_index(client, index):
 
     client.indices.create(
         index=index,
-        body={
-            "settings": {
-                # just one shard, no replicas for testing
-                "number_of_shards": 1,
-                "number_of_replicas": 0,
-                # custom analyzer for analyzing file paths
-                "analysis": {
-                    "analyzer": {
-                        "file_path": {
-                            "type": "custom",
-                            "tokenizer": "path_hierarchy",
-                            "filter": ["lowercase"],
-                        }
+        settings={
+            # just one shard, no replicas for testing
+            "number_of_shards": 1,
+            "number_of_replicas": 0,
+            # custom analyzer for analyzing file paths
+            "analysis": {
+                "analyzer": {
+                    "file_path": {
+                        "type": "custom",
+                        "tokenizer": "path_hierarchy",
+                        "filter": ["lowercase"],
                     }
-                },
+                }
             },
-            "mappings": GIT_MAPPINGS,
         },
+        mappings=GIT_MAPPINGS,
     )
 
 
-class TestDocument(TypedDict):
+class _TestDocument(TypedDict):
     _id: str
     _source: Dict[str, Any]
     _index: str
 
 
-TEST_GIT_DATA: List[TestDocument] = [
+TEST_GIT_DATA: List[_TestDocument] = [
     {
         "_id": "3ca6e1e73a071a705b4babd2f581c91a2a3e5037",
         "_source": {
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 72282f0d..38d5a270 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -1,9 +1,8 @@
 from elasticsearch import Elasticsearch
 from elasticsearch.client import IndicesClient
-
-from pandagg.discovery import discover, Index
 from mock import patch
 
+from pandagg.discovery import Index, discover
 from pandagg.interactive.mappings import IMappings
 from tests.test_data import GIT_MAPPINGS
 from tests.testing_samples.mapping_example import MAPPINGS
diff --git a/tests/test_document.py b/tests/test_document.py
index e105e811..3e45aded 100644
--- a/tests/test_document.py
+++ b/tests/test_document.py
@@ -1,8 +1,8 @@
 import pytest
 
 from pandagg import Mappings
-from pandagg.document import InnerDocSource, DocumentSource
-from pandagg.mappings import Text, Long, Date, Keyword, Object, Nested
+from pandagg.document import DocumentSource, InnerDocSource
+from pandagg.mappings import Date, Keyword, Long, Nested, Object, Text
 from pandagg.node.mappings import Boolean
 
 
@@ -58,6 +58,14 @@ def test_document_init():
     )
 
 
+def test_doc_repr():
+    user = User(id=1, signed_up="2021-01-01")
+    assert (
+        user.__repr__()
+        == "User(id=1, signed_up='2021-01-01', username=None, email=None, location=None)"
+    )
+
+
 def test_pre_save(write_client):
     class AutoDatePost(DocumentSource):
 
diff --git a/tests/test_index.py b/tests/test_index.py
index fc737375..90df4fa9 100644
--- a/tests/test_index.py
+++ b/tests/test_index.py
@@ -4,8 +4,8 @@
 
 from pandagg import Mappings, Search
 from pandagg.document import DocumentSource
-from pandagg.mappings import Keyword, Text, Date
 from pandagg.index import DeclarativeIndex, DeclarativeIndexTemplate
+from pandagg.mappings import Date, Keyword, Text
 
 
 class Post(DeclarativeIndex):
@@ -62,11 +62,11 @@ def test_index_without_client_raises_error_on_write_op():
 
 
 def test_create_index(write_client):
-    assert not write_client.indices.exists("test-post")
+    assert not write_client.indices.exists(index="test-post")
     index = Post(client=write_client)
     index.save()
-    assert write_client.indices.exists("test-post")
-    persisted_index = write_client.indices.get("test-post")["test-post"]
+    assert write_client.indices.exists(index="test-post")
+    persisted_index = write_client.indices.get(index="test-post")["test-post"]
     assert persisted_index["aliases"] == {"post": {}}
     assert persisted_index["mappings"] == {
         "properties": {"published_from": {"type": "date"}, "title": {"type": "text"}}
@@ -260,7 +260,7 @@ def test_template_save(write_client):
         _source={"title": "salut", "published_from": "2021-01-01"},
     ).perform(refresh=True)
     assert post_index.exists()
-    auto_created_index = write_client.indices.get("test-post")["test-post"]
+    auto_created_index = write_client.indices.get(index="test-post")["test-post"]
     assert auto_created_index["mappings"] == {
         "properties": {"published_from": {"type": "date"}, "title": {"type": "text"}}
     }
diff --git a/tests/test_response.py b/tests/test_response.py
index 4e0a671d..bb476896 100644
--- a/tests/test_response.py
+++ b/tests/test_response.py
@@ -1,14 +1,13 @@
-from pandagg.document import DocumentSource
-from pandagg.node.mappings import Keyword
-from tests import PandaggTestCase
 import pandas as pd
 
+import tests.testing_samples.data_sample as sample
+from pandagg.document import DocumentSource
+from pandagg.node.mappings import Keyword
+from pandagg.response import Aggregations, Hit, Hits, SearchResponse
 from pandagg.search import Search
-from pandagg.response import SearchResponse, Hits, Hit, Aggregations
 from pandagg.tree.aggs import Aggs
-
-import tests.testing_samples.data_sample as sample
 from pandagg.utils import ordered
+from tests import PandaggTestCase
 from tests.testing_samples.mapping_example import MAPPINGS
 
 
diff --git a/tests/test_search.py b/tests/test_search.py
index 7d2ccf2f..ccf78dcd 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -1,15 +1,15 @@
 from copy import deepcopy
-
-from mock import patch
+from typing import List
 
 from elasticsearch import Elasticsearch
+from mock import patch
 
-from pandagg import Aggregations
-from pandagg.node.aggs import Max, DateHistogram, Sum
+from pandagg import Aggregations, Hit
+from pandagg.node.aggs import DateHistogram, Max, Sum
+from pandagg.query import Bool, Match, Query
 from pandagg.search import Search
-from pandagg.query import Query, Bool, Match
 from pandagg.tree.mappings import Mappings
-from pandagg.utils import ordered, equal_queries
+from pandagg.utils import equal_queries, ordered
 
 
 def test_expand__to_dot_is_respected():
@@ -96,7 +96,7 @@ def test_search_index():
     assert s._index == ["i"]
     s = s.index("i2")
     assert s._index == ["i", "i2"]
-    s = s.index(u"i3")
+    s = s.index("i3")
     assert s._index == ["i", "i2", "i3"]
     s = s.index()
     assert s._index is None
@@ -410,13 +410,13 @@ def test_repr_execution(client_search):
 
     s.size(2).__repr__()
     client_search.assert_called_once()
-    client_search.assert_any_call(body={"size": 2}, index=["yolo"])
+    client_search.assert_any_call(size=2, index=["yolo"])
 
     client_search.reset_mock()
 
     s.size(2)._repr_html_()
     client_search.assert_called_once()
-    client_search.assert_any_call(body={"size": 2}, index=["yolo"])
+    client_search.assert_any_call(size=2, index=["yolo"])
 
 
 @patch.object(Elasticsearch, "search")
@@ -457,19 +457,26 @@ def test_repr_aggs_execution(client_search):
     s.__repr__()
     client_search.assert_called_once()
     client_search.assert_any_call(
-        body={
-            "size": 0,
-            "aggs": {
-                "toto_terms": {
-                    "terms": {"field": "toto"},
-                    "aggs": {"toto_avg_price": {"avg": {"field": "price"}}},
-                }
-            },
+        size=0,
+        aggs={
+            "toto_terms": {
+                "terms": {"field": "toto"},
+                "aggs": {"toto_avg_price": {"avg": {"field": "price"}}},
+            }
         },
         index=["yolo"],
     )
 
 
+def test_scan(data_client):
+    search = Search(using=data_client, index="git")
+    hits_it = search.scan(scroll="1m", size=30)
+    assert hasattr(hits_it, "__iter__")
+    hits: List[Hit] = list(hits_it)
+    assert len(hits) == 52
+    assert all(isinstance(h, Hit) for h in hits)
+
+
 def test_scan_composite_agg(data_client, git_mappings):
     search = (
         Search(using=data_client, index="git", mappings=git_mappings)
@@ -486,6 +493,31 @@ def test_scan_composite_agg(data_client, git_mappings):
     assert hasattr(bucket_iterator, "__iter__")
     buckets = list(bucket_iterator)
     assert buckets == [
+        {
+            "doc_count": 2,
+            "insertions_sum": {"value": 91.0},
+            "key": {"compatible_histogram": 1393804800000},
+        },
+        {
+            "doc_count": 1,
+            "insertions_sum": {"value": 692.0},
+            "key": {"compatible_histogram": 1393891200000},
+        },
+        {
+            "doc_count": 3,
+            "insertions_sum": {"value": 134.0},
+            "key": {"compatible_histogram": 1393977600000},
+        },
+        {
+            "doc_count": 3,
+            "insertions_sum": {"value": 179.0},
+            "key": {"compatible_histogram": 1394064000000},
+        },
+        {
+            "doc_count": 9,
+            "insertions_sum": {"value": 344.0},
+            "key": {"compatible_histogram": 1394150400000},
+        },
         {
             "doc_count": 2,
             "insertions_sum": {"value": 120.0},
@@ -567,6 +599,11 @@ def test_scan_composite_agg_at_once(data_client, git_mappings):
     assert agg_response.to_tabular(index_orient=True) == (
         ["compatible_histogram", "author"],
         {
+            (1393804800000, "Honza Král"): {"doc_count": 2, "insertions_sum": 91.0},
+            (1393891200000, "Honza Král"): {"doc_count": 1, "insertions_sum": 692.0},
+            (1393977600000, "Honza Král"): {"doc_count": 3, "insertions_sum": 134.0},
+            (1394064000000, "Honza Král"): {"doc_count": 3, "insertions_sum": 179.0},
+            (1394150400000, "Honza Král"): {"doc_count": 9, "insertions_sum": 344.0},
             (1394409600000, "Honza Král"): {"doc_count": 2, "insertions_sum": 120.0},
             (1394841600000, "Honza Král"): {"doc_count": 4, "insertions_sum": 45.0},
             (1395360000000, "Honza Král"): {"doc_count": 2, "insertions_sum": 34.0},
@@ -608,6 +645,11 @@ def test_scan_composite_agg_at_once(data_client, git_mappings):
     assert agg_response.to_tabular(index_orient=True) == (
         ["commit_date", "author_name"],
         {
+            (1393804800000, "Honza Král"): {"doc_count": 2, "insertions_sum": 91.0},
+            (1393891200000, "Honza Král"): {"doc_count": 1, "insertions_sum": 692.0},
+            (1393977600000, "Honza Král"): {"doc_count": 3, "insertions_sum": 134.0},
+            (1394064000000, "Honza Král"): {"doc_count": 3, "insertions_sum": 179.0},
+            (1394150400000, "Honza Král"): {"doc_count": 9, "insertions_sum": 344.0},
             (1394409600000, "Honza Král"): {"doc_count": 2, "insertions_sum": 120.0},
             (1394841600000, "Honza Král"): {"doc_count": 4, "insertions_sum": 45.0},
             (1395360000000, "Honza Král"): {"doc_count": 2, "insertions_sum": 34.0},
diff --git a/tests/test_utils.py b/tests/test_utils.py
index a07cffad..d2d7a416 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,8 +1,8 @@
 import pytest
 
 from pandagg.document import DocumentSource
-from pandagg.node.mappings import Text, Keyword
-from pandagg.utils import equal_queries, equal_search, is_subset, get_action_modifier
+from pandagg.node.mappings import Keyword, Text
+from pandagg.utils import equal_queries, equal_search, get_action_modifier, is_subset
 
 
 def test_equal():
diff --git a/tests/testing_samples/data_nested_sample.py b/tests/testing_samples/data_nested_sample.py
index d026e9ce..1e0e9543 100644
--- a/tests/testing_samples/data_nested_sample.py
+++ b/tests/testing_samples/data_nested_sample.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 
 
-EXPECTED_REPR = u""""""
+EXPECTED_REPR = """"""
 
 EXPECTED_AGG_QUERY = {
     "week": {
diff --git a/tests/testing_samples/data_sample.py b/tests/testing_samples/data_sample.py
index 399efdff..7dcaeb55 100644
--- a/tests/testing_samples/data_sample.py
+++ b/tests/testing_samples/data_sample.py
@@ -8,7 +8,6 @@
 from pandagg.aggs import Aggs, Avg, Terms
 from tests.testing_samples.mapping_example import MAPPINGS
 
-
 EXPECTED_AGG_QUERY = {
     "classification_type": {
         "aggs": {
diff --git a/tests/tree/aggs/test_aggs.py b/tests/tree/aggs/test_aggs.py
index 26756dfb..dcbb7e68 100644
--- a/tests/tree/aggs/test_aggs.py
+++ b/tests/tree/aggs/test_aggs.py
@@ -3,15 +3,13 @@
 import pytest
 from mock import patch
 
-from pandagg.tree.aggs import Aggs
+import tests.testing_samples.data_sample as sample
+from pandagg.aggs import Avg, DateHistogram, Filter, Min, Terms
 from pandagg.exceptions import (
-    InvalidOperationMappingFieldError,
     AbsentMappingFieldError,
+    InvalidOperationMappingFieldError,
 )
-from pandagg.aggs import DateHistogram, Terms, Avg, Min, Filter
-
-import tests.testing_samples.data_sample as sample
-
+from pandagg.tree.aggs import Aggs
 from tests.testing_samples.mapping_example import MAPPINGS
 
 
diff --git a/tests/tree/mapping/test_mappings.py b/tests/tree/mapping/test_mappings.py
index a8510561..2fa823e9 100644
--- a/tests/tree/mapping/test_mappings.py
+++ b/tests/tree/mapping/test_mappings.py
@@ -1,9 +1,9 @@
 import pytest
 
 from pandagg.exceptions import AbsentMappingFieldError
-from pandagg.mappings import Keyword, Object, Text, Nested, Integer, Mappings
+from pandagg.mappings import Integer, Keyword, Mappings, Nested, Object, Text
 from pandagg.node.mappings.abstract import Field
-from tests.testing_samples.mapping_example import MAPPINGS, EXPECTED_MAPPING_TREE_REPR
+from tests.testing_samples.mapping_example import EXPECTED_MAPPING_TREE_REPR, MAPPINGS
 
 
 def test_deserialization():
diff --git a/tests/tree/query/test_query.py b/tests/tree/query/test_query.py
index 436c10ce..2af598b0 100644
--- a/tests/tree/query/test_query.py
+++ b/tests/tree/query/test_query.py
@@ -1,13 +1,12 @@
-from mock import patch
-
 from lighttree.tree import NotFoundNodeError
+from mock import patch
 
 from pandagg.node.query._parameter_clause import _Must
-from pandagg.query import Query, Range, Prefix, Ids, Term, Terms, Nested
-from pandagg.node.query.term_level import Term as TermNode, Exists as ExistsNode
-from pandagg.node.query.joining import Nested as NestedNode
 from pandagg.node.query.compound import Bool
-
+from pandagg.node.query.joining import Nested as NestedNode
+from pandagg.node.query.term_level import Exists as ExistsNode
+from pandagg.node.query.term_level import Term as TermNode
+from pandagg.query import Ids, Nested, Prefix, Query, Range, Term, Terms
 from pandagg.utils import equal_queries, ordered
 from tests import PandaggTestCase