Skip to content

Commit

Permalink
Merge pull request #13 from alkemics/search
Browse files Browse the repository at this point in the history
v0.0.3
  • Loading branch information
alk-lbinet authored May 10, 2020
2 parents 5a5619e + 4243297 commit 164138e
Show file tree
Hide file tree
Showing 21 changed files with 321 additions and 159 deletions.
5 changes: 4 additions & 1 deletion pandagg/node/query/_parameter_clause.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, value):
def line_repr(self, depth, **kwargs):
return "%s=%s" % (self.KEY, json.dumps(self.body["value"]))

def serialize(self, with_name=True):
def to_dict(self, with_name=True):
return {self.KEY: self.body["value"]}


Expand Down Expand Up @@ -131,6 +131,9 @@ def __init__(self, *args, **kwargs):
)
super(ParentParameterClause, self).__init__(_children=children)

def to_dict(self, with_name=True):
return {self.KEY: [n.to_dict() for n in self._children]}


class Filter(ParentParameterClause):
KEY = "filter"
Expand Down
6 changes: 3 additions & 3 deletions pandagg/node/query/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def name(self):
def _identifier_prefix(self):
return "%s_" % self.KEY

def serialize(self, with_name=True):
def to_dict(self, with_name=True):
b = self.body.copy()
if with_name and self._named:
b["_name"] = self.name
Expand All @@ -113,9 +113,9 @@ def __str__(self):

def __eq__(self, other):
if isinstance(other, self.__class__):
return other.serialize() == self.serialize()
return other.to_dict() == self.to_dict()
# make sure we still equal to a dict with the same data
return other == self.serialize()
return other == self.to_dict()


class LeafQueryClause(QueryClause):
Expand Down
22 changes: 15 additions & 7 deletions pandagg/node/query/compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,22 @@


class CompoundClause(QueryClause):
"""Compound clauses can encapsulate other query clauses.
"""Compound clauses can encapsulate other query clauses::
{
"<query_type>" : {
<query_body>
<children_clauses>
}
}
Note: the children attribute's only purpose is for initiation with the following syntax:
>>> from pandagg.query import Bool, Term
>>> query = Bool(
>>> filter=Term(field='some_path', value=3),
>>> _name='bool_id',
>>> )
{
"<query_type>" : {
<query_body>
<children_clauses>
}
}
"""

DEFAULT_OPERATOR = None
Expand Down Expand Up @@ -67,6 +69,12 @@ def params(cls, parent_only=False):
or not issubclass(cls.get_dsl_class(p, "_param_"), SimpleParameter)
}

def to_dict(self, with_name=True):
d = {}
for c in self._children:
d.update(c.to_dict())
return {self.KEY: d}


class Bool(CompoundClause):
DEFAULT_OPERATOR = Must
Expand Down
2 changes: 1 addition & 1 deletion pandagg/node/query/term_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, values, _name=None):
self.values = values
super(Ids, self).__init__(_name=_name, values=values)

def serialize(self, with_name=True):
def to_dict(self, with_name=True):
b = {"values": self.values}
if with_name and self._named:
b["_name"] = self.name
Expand Down
3 changes: 2 additions & 1 deletion pandagg/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,8 @@ def __getitem__(self, n):
return s

def size(self, size):
"""Equivalent to::
"""
Equivalent to::
s = Search().params(size=size)
Expand Down
134 changes: 98 additions & 36 deletions pandagg/tree/aggs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,57 @@

@python_2_unicode_compatible
class Aggs(Tree):
"""Tree combination of aggregation nodes.
r"""
Combination of aggregation clauses. This class provides handful methods to build an aggregation (see
:func:`~pandagg.tree.aggs.Aggs.aggs` and :func:`~pandagg.tree.aggs.Aggs.groupby`), and is used as well
to parse aggregations response in handy formats.
Mapping declaration is optional, but doing so validates aggregation validity.
Mapping declaration is optional, but doing so validates aggregation validity and automatically handles missing
nested clauses.
All following syntaxes are identical:
From a dict:
>>> Aggs({"per_user":{"terms":{"field":"user"}}})
Using shortcut declaration: first argument is the aggregation type, other arguments are aggregation body parameters:
>>> Aggs('terms', name='per_user', field='user')
Using DSL class:
>>> from pandagg.aggs import Terms
>>> Aggs(Terms('per_user', field='user'))
Dict and DSL class syntaxes allow to provide multiple clauses aggregations:
>>> Aggs({"per_user":{"terms":{"field":"user"}, "aggs": {"avg_age": {"avg": {"field": "age"}}}}})
With is similar to:
>>> from pandagg.aggs import Terms, Avg
>>> Aggs(Terms('per_user', field='user', aggs=Avg('avg_age', field='age')))
:Keyword Arguments:
* *mapping* (``dict`` or ``pandagg.tree.mapping.Mapping``) --
Mapping of requested indice(s). Providing it will validate aggregations validity, and add required nested
clauses if missing.
* *nested_autocorrect* (``bool``) --
In case of missing nested clauses in aggregation, if True, automatically add missing nested clauses, else
raise error.
* remaining kwargs:
Used as body in aggregation
"""

node_class = AggNode
_crafted_root_name = "root"

def __init__(self, *args, **kwargs):
self.mapping = Mapping(kwargs.pop("mapping", None))
self.nested_autocorrect = kwargs.pop("nested_autocorrect", False)
super(Aggs, self).__init__()
if args or kwargs:
self._fill(*args, **kwargs)
Expand All @@ -43,15 +84,6 @@ def __nonzero__(self):

__bool__ = __nonzero__

@classmethod
def deserialize(cls, *args, **kwargs):
mapping = kwargs.pop("mapping", None)
if len(args) == 1 and isinstance(args[0], Aggs):
return args[0]

new = cls(mapping=mapping)
return new._fill(*args, **kwargs)

def _fill(self, *args, **kwargs):
if args:
node_hierarchy = self.node_class._type_deserializer(*args, **kwargs)
Expand All @@ -63,7 +95,10 @@ def _fill(self, *args, **kwargs):
return self

def _clone_init(self, deep=False):
return Aggs(mapping=self.mapping.clone(deep=deep))
return Aggs(
mapping=self.mapping.clone(deep=deep),
nested_autocorrect=self.nested_autocorrect,
)

def _is_eligible_grouping_node(self, nid):
"""Return whether node can be used as grouping node."""
Expand All @@ -77,7 +112,8 @@ def _is_eligible_grouping_node(self, nid):

@property
def deepest_linear_bucket_agg(self):
"""Return deepest bucket aggregation node (pandagg.nodes.abstract.BucketAggNode) of that aggregation that
"""
Return deepest bucket aggregation node (pandagg.nodes.abstract.BucketAggNode) of that aggregation that
neither has siblings, nor has an ancestor with siblings.
"""
if not self.root or not self._is_eligible_grouping_node(self.root):
Expand All @@ -101,7 +137,8 @@ def deepest_linear_bucket_agg(self):
return last_bucket_agg_name

def _validate_aggs_parent_id(self, pid):
"""If pid is not None, ensure that pid belongs to tree, and that it refers to a bucket aggregation.
"""
If pid is not None, ensure that pid belongs to tree, and that it refers to a bucket aggregation.
Else, if not provided, return deepest bucket aggregation if there is no ambiguity (linear aggregations).
KO: non-ambiguous::
Expand Down Expand Up @@ -131,7 +168,8 @@ def _validate_aggs_parent_id(self, pid):
return leaves[0].identifier

def groupby(self, *args, **kwargs):
r"""Arrange passed aggregations in vertical/nested manner, above or below another agg clause.
r"""
Arrange passed aggregations in vertical/nested manner, above or below another agg clause.
Given the initial aggregation::
Expand All @@ -140,12 +178,12 @@ def groupby(self, *args, **kwargs):
If `insert_below` = 'A'::
A──> by──> B
A──> new──> B
└──> C
If `insert_above` = 'B'::
A──> by──> B
A──> new──> B
└──> C
`by` argument accepts single occurrence or sequence of following formats:
Expand All @@ -163,11 +201,10 @@ def groupby(self, *args, **kwargs):
└──> C
Accepted declarations for single aggregation:
Accepted all Aggs.__init__ syntaxes
Official DSL like:
>>> Aggs().groupby('terms', name='per_user_id', field='user_id')
>>> Aggs()\
>>> .groupby('terms', name='per_user_id', field='user_id')
{"terms_on_my_field":{"terms":{"field":"some_field"}}}
Passing a dict:
Expand Down Expand Up @@ -221,19 +258,19 @@ def groupby(self, *args, **kwargs):
raise ValueError(
"Kwargs not allowed when passing multiple aggregations in args."
)
inserted_aggs = [self.deserialize(arg) for arg in args]
inserted_aggs = [Aggs(arg) for arg in args]
# groupby([{}, {}])
elif len(args) == 1 and isinstance(args[0], (list, tuple)):
if kwargs:
raise ValueError(
"Kwargs not allowed when passing multiple aggregations in args."
)
inserted_aggs = [self.deserialize(arg) for arg in args[0]]
inserted_aggs = [Aggs(arg) for arg in args[0]]
# groupby({})
# groupby(Terms())
# groupby('terms', name='per_tag', field='tag')
else:
inserted_aggs = [self.deserialize(*args, **kwargs)]
inserted_aggs = [Aggs(*args, **kwargs)]

if insert_above is not None:
parent = new_agg.parent(insert_above, id_only=False)
Expand Down Expand Up @@ -265,35 +302,52 @@ def groupby(self, *args, **kwargs):
return new_agg

def aggs(self, *args, **kwargs):
"""Arrange passed aggregations in `arg` arguments "horizontally".
r"""
Arrange passed aggregations "horizontally".
Those will be placed under the `insert_below` aggregation clause id if provided, else under the deepest linear
bucket aggregation if there is no ambiguity:
Given the initial aggregation::
A──> B
└──> C
If passing multiple aggregations with `insert_below` = 'A'::
A──> B
└──> C
└──> new1
└──> new2
Note: those will be placed under the `insert_below` aggregation clause id if provided, else under the deepest
linear bucket aggregation if there is no ambiguity:
OK::
A──> B ─> C ─> arg
A──> B ─> C ─> new
KO::
A──> B
└──> C
`arg` argument accepts single occurrence or sequence of following formats:
`args` accepts single occurrence or sequence of following formats:
* string (for terms agg concise declaration)
* regular Elasticsearch dict syntax
* AggNode instance (for instance Terms, Filters etc)
:param arg: aggregation(s) clauses to insert "horizontally"
:param insert_below: parent aggregation id under which these aggregations should be placed
:param kwargs: agg body arguments when using "string" syntax for terms aggregation
:Keyword Arguments:
* *insert_below* (``string``) --
Parent aggregation name under which these aggregations should be placed
* remaining kwargs:
Used as body in aggregation
:rtype: pandagg.aggs.Aggs
"""
insert_below = self._validate_aggs_parent_id(kwargs.pop("insert_below", None))
new_agg = self.clone(with_tree=True)
deserialized = self.deserialize(*args, mapping=self.mapping, **kwargs)
deserialized = Aggs(*args, **kwargs)
deserialized_root = deserialized.get(deserialized.root)
if isinstance(deserialized_root, ShadowRoot):
new_agg.merge(deserialized, nid=insert_below)
Expand Down Expand Up @@ -335,7 +389,7 @@ def applied_nested_path_at_node(self, nid):
def _insert_node_below(self, node, parent_id, with_children=True):
"""If mapping is provided, nested aggregations are automatically applied.
"""
if isinstance(node, ShadowRoot):
if isinstance(node, ShadowRoot) and parent_id is not None:
for child in node._children or []:
super(Aggs, self)._insert_node_below(
child, parent_id=parent_id, with_children=with_children
Expand All @@ -346,7 +400,6 @@ def _insert_node_below(self, node, parent_id, with_children=True):
isinstance(node, Nested)
or isinstance(node, ReverseNested)
or not self.mapping
or parent_id is None
or not hasattr(node, "field")
):
return super(Aggs, self)._insert_node_below(
Expand All @@ -357,11 +410,20 @@ def _insert_node_below(self, node, parent_id, with_children=True):

# from deepest to highest
required_nested_level = self.mapping.nested_at_field(node.field)
current_nested_level = self.applied_nested_path_at_node(parent_id)

if self.is_empty():
current_nested_level = None
else:
current_nested_level = self.applied_nested_path_at_node(parent_id)
if current_nested_level == required_nested_level:
return super(Aggs, self)._insert_node_below(
node, parent_id, with_children=with_children
)
if not self.nested_autocorrect:
raise ValueError(
"Invalid %s agg on %s field. Invalid nested: expected %s, current %s."
% (node.KEY, node.field, required_nested_level, current_nested_level)
)
if current_nested_level and (
required_nested_level or "" in current_nested_level
):
Expand Down
4 changes: 2 additions & 2 deletions pandagg/tree/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __nonzero__(self):

__bool__ = __nonzero__

def serialize(self, from_=None, depth=None):
def to_dict(self, from_=None, depth=None):
if self.root is None:
return None
from_ = self.root if from_ is None else from_
Expand All @@ -54,7 +54,7 @@ def serialize(self, from_=None, depth=None):
if depth is not None:
depth -= 1
for child_node in self.children(node.identifier, id_only=False):
children_queries[child_node.name] = self.serialize(
children_queries[child_node.name] = self.to_dict(
from_=child_node.identifier, depth=depth
)
serialized_node = node.body
Expand Down
Loading

0 comments on commit 164138e

Please sign in to comment.