From 8103eb46f7553859ef2b3785f00ac919f31cac5c Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Thu, 28 Nov 2024 11:22:22 +0100 Subject: [PATCH 1/2] ESQL: fix COUNT filter pushdown (#117503) (#117652) * ESQL: fix COUNT filter pushdown (#117503) If `COUNT` agg has a filter applied, this must also be push down to source. This currently does not happen, but this issue is masked currently by two factors: * a logical optimisation, `ExtractAggregateCommonFilter` that extracts the filter out of the STATS entirely (and pushes it to source then from a `WHERE`); * the phisical plan optimisation implementing the push down, `PushStatsToSource`, currently only applies if there's just one agg function to push down. However, this fix needs to be applied since: * it's still present in versions prior to `ExtractAggregateCommonFilter` introduction; * the defect might resurface when the restriction in `PushStatsToSource` is lifted. Fixes #115522. (cherry picked from commit 560e0c5d0441a165f4588f8af869053b5202999f) * 8.16 adaptation --- docs/changelog/117503.yaml | 6 +++ .../src/main/resources/stats.csv-spec | 51 +++++++++++++++++++ .../physical/local/PushStatsToSource.java | 11 ++++ .../LocalPhysicalPlanOptimizerTests.java | 49 ++++++++++++++++++ 4 files changed, 117 insertions(+) create mode 100644 docs/changelog/117503.yaml diff --git a/docs/changelog/117503.yaml b/docs/changelog/117503.yaml new file mode 100644 index 0000000000000..d48741262b581 --- /dev/null +++ b/docs/changelog/117503.yaml @@ -0,0 +1,6 @@ +pr: 117503 +summary: Fix COUNT filter pushdown +area: ES|QL +type: bug +issues: + - 115522 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 0b0f844f7b8f2..6b0d6f226c235 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2537,6 +2537,57 @@ c2:l |c2_f:l |m2:i |m2_f:i |c:l 1 |1 |5 |5 |21 ; +simpleCountOnFieldWithFilteringAndNoGrouping +required_capability: per_agg_filtering +from employees +| stats c1 = count(emp_no) where emp_no < 10042 +; + +c1:long +41 +; + +simpleCountOnFieldWithFilteringOnDifferentFieldAndNoGrouping +required_capability: per_agg_filtering +from employees +| stats c1 = count(hire_date) where emp_no < 10042 +; + +c1:long +41 +; + +simpleCountOnStarWithFilteringAndNoGrouping +required_capability: per_agg_filtering +from employees +| stats c1 = count(*) where emp_no < 10042 +; + +c1:long +41 +; + +simpleCountWithFilteringAndNoGroupingOnFieldWithNulls +required_capability: per_agg_filtering +from employees +| stats c1 = count(birth_date) where emp_no <= 10050 +; + +c1:long +40 +; + + +simpleCountWithFilteringAndNoGroupingOnFieldWithMultivalues +required_capability: per_agg_filtering +from employees +| stats c1 = count(job_positions) where emp_no <= 10003 +; + +c1:long +3 +; + filterIsAlwaysTrue required_capability: per_agg_filtering FROM employees diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java index b0b86b43cd162..11fba80dc6f79 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.util.Queries; import org.elasticsearch.xpack.esql.core.util.StringUtils; import org.elasticsearch.xpack.esql.expression.function.aggregate.Count; import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; @@ -25,12 +26,15 @@ import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.planner.AbstractPhysicalOperationProviders; +import org.elasticsearch.xpack.esql.planner.PlannerUtils; import java.util.ArrayList; import java.util.List; +import static java.util.Arrays.asList; import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; +import static org.elasticsearch.xpack.esql.optimizer.rules.physical.local.PushFiltersToSource.canPushToSource; import static org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec.StatsType.COUNT; /** @@ -98,6 +102,13 @@ private Tuple, List> pushableStats( } } if (fieldName != null) { + if (count.hasFilter()) { + if (canPushToSource(count.filter(), fa -> false) == false) { + return null; // can't push down + } + var countFilter = PlannerUtils.TRANSLATOR_HANDLER.asQuery(count.filter()); + query = Queries.combine(Queries.Clause.MUST, asList(countFilter.asBuilder(), query)); + } return new EsStatsQueryExec.Stat(fieldName, COUNT, query); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 3dd0828b82eed..782377acb6556 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -67,6 +67,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.function.Function; import static java.util.Arrays.asList; import static org.elasticsearch.compute.aggregation.AggregatorMode.FINAL; @@ -373,6 +374,54 @@ public void testMultiCountAllWithFilter() { assertThat(plan.anyMatch(EsQueryExec.class::isInstance), is(true)); } + @SuppressWarnings("unchecked") + public void testSingleCountWithStatsFilter() { + var analyzer = makeAnalyzer("mapping-default.json", new EnrichResolution()); + var plannerOptimizer = new TestPlannerOptimizer(config, analyzer); + var plan = plannerOptimizer.plan(""" + from test + | stats c = count(hire_date) where emp_no < 10042 + """, IS_SV_STATS); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertThat(agg.getMode(), is(FINAL)); + var exchange = as(agg.child(), ExchangeExec.class); + var esStatsQuery = as(exchange.child(), EsStatsQueryExec.class); + assertThat(esStatsQuery.stats().size(), is(1)); + + Function compact = s -> s.replaceAll("\\s+", ""); + assertThat(compact.apply(esStatsQuery.stats().get(0).query().toString()), is(compact.apply(""" + { + "bool": { + "must": [ + { + "esql_single_value": { + "field": "emp_no", + "next": { + "range": { + "emp_no": { + "lt": 10042, + "boost": 1.0 + } + } + }, + "source": "emp_no < 10042@2:36" + } + }, + { + "exists": { + "field": "hire_date", + "boost": 1.0 + } + } + ], + "boost": 1.0 + } + } + """))); + } + /** * Expecting * LimitExec[1000[INTEGER]] From 84c04ef8550b59fccca9135829d52789a4e43529 Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Thu, 28 Nov 2024 17:01:26 +0100 Subject: [PATCH 2/2] [DOCS] Update tutorial example (#117538) (#117719) --- .../full-text-filtering-tutorial.asciidoc | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/reference/quickstart/full-text-filtering-tutorial.asciidoc b/docs/reference/quickstart/full-text-filtering-tutorial.asciidoc index fee4b797da724..a024305588cae 100644 --- a/docs/reference/quickstart/full-text-filtering-tutorial.asciidoc +++ b/docs/reference/quickstart/full-text-filtering-tutorial.asciidoc @@ -511,8 +511,9 @@ In this tutorial scenario it's useful for when users have complex requirements f Let's create a query that addresses the following user needs: -* Must be a vegetarian main course +* Must be a vegetarian recipe * Should contain "curry" or "spicy" in the title or description +* Should be a main course * Must not be a dessert * Must have a rating of at least 4.5 * Should prefer recipes published in the last month @@ -524,16 +525,7 @@ GET /cooking_blog/_search "query": { "bool": { "must": [ - { - "term": { - "category.keyword": "Main Course" - } - }, - { - "term": { - "tags": "vegetarian" - } - }, + { "term": { "tags": "vegetarian" } }, { "range": { "rating": { @@ -543,10 +535,18 @@ GET /cooking_blog/_search } ], "should": [ + { + "term": { + "category": "Main Course" + } + }, { "multi_match": { "query": "curry spicy", - "fields": ["title^2", "description"] + "fields": [ + "title^2", + "description" + ] } }, { @@ -590,12 +590,12 @@ GET /cooking_blog/_search "value": 1, "relation": "eq" }, - "max_score": 7.9835095, + "max_score": 7.444513, "hits": [ { "_index": "cooking_blog", "_id": "2", - "_score": 7.9835095, + "_score": 7.444513, "_source": { "title": "Spicy Thai Green Curry: A Vegetarian Adventure", <1> "description": "Dive into the flavors of Thailand with this vibrant green curry. Packed with vegetables and aromatic herbs, this dish is both healthy and satisfying. Don't worry about the heat - you can easily adjust the spice level to your liking.", <2> @@ -619,8 +619,8 @@ GET /cooking_blog/_search <1> The title contains "Spicy" and "Curry", matching our should condition. With the default <> behavior, this field contributes most to the relevance score. <2> While the description also contains matching terms, only the best matching field's score is used by default. <3> The recipe was published within the last month, satisfying our recency preference. -<4> The "Main Course" category matches our `must` condition. -<5> The "vegetarian" tag satisfies another `must` condition, while "curry" and "spicy" tags align with our `should` preferences. +<4> The "Main Course" category satisfies another `should` condition. +<5> The "vegetarian" tag satisfies a `must` condition, while "curry" and "spicy" tags align with our `should` preferences. <6> The rating of 4.6 meets our minimum rating requirement of 4.5. ==============