diff --git a/CHANGELOG.md b/CHANGELOG.md index 90ef2781705db..0b05fb087d80a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed +- Fix range aggregation optimization ignoring top level queries ([#15194](https://github.com/opensearch-project/OpenSearch/pull/15194)) + ### Security [Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/57cd81da11e5cb831029719f0394e40aff68ced2...2.16 diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml index 0ea9d3de00926..8c8a98b2db22c 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml @@ -61,3 +61,94 @@ setup: - match: { aggregations.histo.buckets.8.doc_count: 1 } - match: { aggregations.histo.buckets.12.key_as_string: "2016-06-01T00:00:00.000Z" } - match: { aggregations.histo.buckets.12.doc_count: 1 } + +--- +"Date histogram aggregation w/ filter query test": + - skip: + version: " - 2.99.99" + reason: Backport fix to 2.16 + + - do: + bulk: + refresh: true + index: dhisto-agg-w-query + body: + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-12", "dow": "monday"}' + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-14", "dow": "wednesday"}' + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-19", "dow": "monday"}' + - '{"index": {}}' + - '{"routing": "route2", "date": "2024-08-13", "dow": "tuesday"}' + - '{"index": {}}' + - '{"routing": "route2", "date": "2024-08-15", "dow": "thursday"}' + + - do: + search: + index: dhisto-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + weekHisto: + date_histogram: + field: date + calendar_interval: week + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.weekHisto.buckets.0.doc_count: 2 } + - match: { aggregations.weekHisto.buckets.1.doc_count: 1 } + +--- +"Date histogram aggregation w/ shared field range test": + - do: + bulk: + refresh: true + index: dhisto-agg-w-query + body: + - '{"index": {}}' + - '{"date": "2024-10-31"}' + - '{"index": {}}' + - '{"date": "2024-11-11"}' + - '{"index": {}}' + - '{"date": "2024-11-28"}' + - '{"index": {}}' + - '{"date": "2024-12-25"}' + - '{"index": {}}' + - '{"date": "2025-01-01"}' + - '{"index": {}}' + - '{"date": "2025-02-14"}' + + - do: + search: + index: dhisto-agg-w-query + body: + profile: true + query: + range: + date: + gte: "2024-01-01" + lt: "2025-01-01" + aggregations: + monthHisto: + date_histogram: + field: date + calendar_interval: month + _source: false + + - match: { hits.total.value: 4 } + - match: { aggregations.monthHisto.buckets.0.doc_count: 1 } + - match: { aggregations.monthHisto.buckets.1.doc_count: 2 } + - match: { aggregations.monthHisto.buckets.2.doc_count: 1 } + - match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 } + - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 } + - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 0 } + - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml index 80aad96ce1f6b..1e1d2b0706d6b 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml @@ -673,3 +673,82 @@ setup: - match: { aggregations.my_range.buckets.3.from: 1.5 } - is_false: aggregations.my_range.buckets.3.to - match: { aggregations.my_range.buckets.3.doc_count: 2 } + +--- +"Filter query w/ aggregation test": + - skip: + version: " - 2.99.99" + reason: Backport fix to 2.16 + + - do: + bulk: + refresh: true + index: range-agg-w-query + body: + - '{"index": {}}' + - '{"routing": "route1", "v": -10, "date": "2024-10-29"}' + - '{"index": {}}' + - '{"routing": "route1", "v": -5, "date": "2024-10-30"}' + - '{"index": {}}' + - '{"routing": "route1", "v": 10, "date": "2024-10-31"}' + - '{"index": {}}' + - '{"routing": "route2", "v": 15, "date": "2024-11-01"}' + - '{"index": {}}' + - '{"routing": "route2", "v": 20, "date": "2024-11-02"}' + + - do: + search: + index: range-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + NegPosAgg: + range: + field: v + keyed: true + ranges: + - to: 0 + key: "0" + - from: 0 + key: "1" + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.NegPosAgg.buckets.0.doc_count: 2 } + - match: { aggregations.NegPosAgg.buckets.1.doc_count: 1 } + + - do: + search: + index: range-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + HalloweenAgg: + date_range: + field: date + format: "yyyy-MM-dd" + keyed: true + ranges: + - to: "2024-11-01" + key: "to-october" + - from: "2024-11-01" + key: "from-september" + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.HalloweenAgg.buckets.to-october.doc_count: 3 } + - match: { aggregations.HalloweenAgg.buckets.from-september.doc_count: 0 } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java new file mode 100644 index 0000000000000..145a60373b4f3 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java @@ -0,0 +1,102 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.bucket.filterrewrite; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Weight; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.function.BiConsumer; +import java.util.function.Consumer; + +/** + * This interface provides a bridge between an aggregator and the optimization context, allowing + * the aggregator to provide data and optimize the aggregation process. + * + *
The main purpose of this interface is to encapsulate the aggregator-specific optimization + * logic and provide access to the data in Aggregator that is required for optimization, while keeping the optimization + * business logic separate from the aggregator implementation. + * + *
To use this interface to optimize an aggregator, you should subclass this interface in this package
+ * and put any specific optimization business logic in it. Then implement this subclass in the aggregator
+ * to provide data that is needed for doing the optimization
+ *
+ * @opensearch.internal
+ */
+public abstract class AggregatorBridge {
+
+ /**
+ * The field type associated with this aggregator bridge.
+ */
+ MappedFieldType fieldType;
+
+ Consumer
+ * This method is supposed to be implemented in a specific aggregator to take in fields from there
+ *
+ * @return {@code true} if the aggregator can be optimized, {@code false} otherwise.
+ * The result will be saved in the optimization context.
+ */
+ protected abstract boolean canOptimize();
+
+ /**
+ * Prepares the optimization at shard level after checking aggregator is optimizable.
+ *
+ * For example, figure out what are the ranges from the aggregation to do the optimization later
+ *
+ * This method is supposed to be implemented in a specific aggregator to take in fields from there
+ */
+ protected abstract void prepare() throws IOException;
+
+ /**
+ * Prepares the optimization for a specific segment when the segment is functionally matching all docs
+ *
+ * @param leaf the leaf reader context for the segment
+ */
+ abstract Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException;
+
+ /**
+ * Attempts to build aggregation results for a segment
+ *
+ * @param values the point values (index structure for numeric values) for a segment
+ * @param incrementDocCount a consumer to increment the document count for a range bucket. The First parameter is document count, the second is the key of the bucket
+ * @param ranges
+ */
+ abstract FilterRewriteOptimizationContext.DebugInfo tryOptimize(
+ PointValues values,
+ BiConsumer This method creates a weight from the search context's query and checks whether the weight's
+ * document count matches the total number of documents in the leaf reader context.
+ *
+ * @param ctx the search context
+ * @param leafCtx the leaf reader context for the segment
+ * @return {@code true} if the segment matches all documents, {@code false} otherwise
+ */
+ public static boolean segmentMatchAll(SearchContext ctx, LeafReaderContext leafCtx) throws IOException {
+ Weight weight = ctx.query().rewrite(ctx.searcher()).createWeight(ctx.searcher(), ScoreMode.COMPLETE_NO_SCORES, 1f);
+ return weight != null && weight.count(leafCtx) == leafCtx.reader().numDocs();
+ }
+}
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java
new file mode 100644
index 0000000000000..c780732a5ddce
--- /dev/null
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java
@@ -0,0 +1,157 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.search.aggregations.bucket.filterrewrite;
+
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PointValues;
+import org.opensearch.common.Rounding;
+import org.opensearch.index.mapper.DateFieldMapper;
+import org.opensearch.index.mapper.MappedFieldType;
+import org.opensearch.search.aggregations.bucket.histogram.LongBounds;
+import org.opensearch.search.aggregations.support.ValuesSourceConfig;
+import org.opensearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.OptionalLong;
+import java.util.function.BiConsumer;
+import java.util.function.Function;
+
+import static org.opensearch.search.aggregations.bucket.filterrewrite.PointTreeTraversal.multiRangesTraverse;
+
+/**
+ * For date histogram aggregation
+ */
+public abstract class DateHistogramAggregatorBridge extends AggregatorBridge {
+
+ int maxRewriteFilters;
+
+ protected boolean canOptimize(ValuesSourceConfig config) {
+ if (config.script() == null && config.missing() == null) {
+ MappedFieldType fieldType = config.fieldType();
+ if (fieldType instanceof DateFieldMapper.DateFieldType) {
+ if (fieldType.isSearchable()) {
+ this.fieldType = fieldType;
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ protected void buildRanges(SearchContext context) throws IOException {
+ long[] bounds = Helper.getDateHistoAggBounds(context, fieldType.name());
+ this.maxRewriteFilters = context.maxAggRewriteFilters();
+ setRanges.accept(buildRanges(bounds, maxRewriteFilters));
+ }
+
+ @Override
+ final Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException {
+ long[] bounds = Helper.getSegmentBounds(leaf, fieldType.name());
+ return buildRanges(bounds, maxRewriteFilters);
+ }
+
+ private Ranges buildRanges(long[] bounds, int maxRewriteFilters) {
+ bounds = processHardBounds(bounds);
+ if (bounds == null) {
+ return null;
+ }
+ assert bounds[0] <= bounds[1] : "Low bound should be less than high bound";
+
+ final Rounding rounding = getRounding(bounds[0], bounds[1]);
+ final OptionalLong intervalOpt = Rounding.getInterval(rounding);
+ if (intervalOpt.isEmpty()) {
+ return null;
+ }
+ final long interval = intervalOpt.getAsLong();
+
+ // process the after key of composite agg
+ bounds = processAfterKey(bounds, interval);
+
+ return Helper.createRangesFromAgg(
+ (DateFieldMapper.DateFieldType) fieldType,
+ interval,
+ getRoundingPrepared(),
+ bounds[0],
+ bounds[1],
+ maxRewriteFilters
+ );
+ }
+
+ protected abstract Rounding getRounding(final long low, final long high);
+
+ protected abstract Rounding.Prepared getRoundingPrepared();
+
+ protected long[] processAfterKey(long[] bounds, long interval) {
+ return bounds;
+ }
+
+ protected long[] processHardBounds(long[] bounds) {
+ return processHardBounds(bounds, null);
+ }
+
+ protected long[] processHardBounds(long[] bounds, LongBounds hardBounds) {
+ if (bounds != null) {
+ // Update min/max limit if user specified any hard bounds
+ if (hardBounds != null) {
+ if (hardBounds.getMin() > bounds[0]) {
+ bounds[0] = hardBounds.getMin();
+ }
+ if (hardBounds.getMax() - 1 < bounds[1]) {
+ bounds[1] = hardBounds.getMax() - 1; // hard bounds max is exclusive
+ }
+ if (bounds[0] > bounds[1]) {
+ return null;
+ }
+ }
+ }
+ return bounds;
+ }
+
+ private DateFieldMapper.DateFieldType getFieldType() {
+ assert fieldType instanceof DateFieldMapper.DateFieldType;
+ return (DateFieldMapper.DateFieldType) fieldType;
+ }
+
+ protected int getSize() {
+ return Integer.MAX_VALUE;
+ }
+
+ @Override
+ final FilterRewriteOptimizationContext.DebugInfo tryOptimize(
+ PointValues values,
+ BiConsumer