bowenlan-amzn · finnegancarroll · Jul 18, 2024 · Jul 18, 2024 · Jul 22, 2024 · Jul 22, 2024
diff --git a/...ava/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java b/...ava/org/opensearch/benchmark/search/aggregations/BKDTreeMultiRangesTraverseBenchmark.java
@@ -0,0 +1,171 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Modifications Copyright OpenSearch Contributors. See
+ * GitHub history for details.
+ */
+
+package org.opensearch.benchmark.search.aggregations;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PointValues;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.opensearch.common.logging.LogConfigurator;
+import org.opensearch.index.mapper.MappedFieldType;
+import org.opensearch.index.mapper.NumberFieldMapper;
+import org.opensearch.index.mapper.NumericPointEncoder;
+import org.opensearch.search.optimization.filterrewrite.PackedValueRanges;
+import org.opensearch.search.optimization.filterrewrite.TreeTraversal;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import java.util.function.BiConsumer;
+
+import static org.opensearch.search.optimization.filterrewrite.TreeTraversal.multiRangesTraverse;
+
+@Warmup(iterations = 10)
+@Measurement(iterations = 5)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@State(Scope.Thread)
+@Fork(value = 1)
+public class BKDTreeMultiRangesTraverseBenchmark {
+    @State(Scope.Benchmark)
+    public static class treeState {
+        @Param({ "10000", "10000000" })
+        int treeSize;
+
+        @Param({ "10000", "10000000" })
+        int valMax;
+
+        @Param({ "10", "100" })
+        int buckets;
+
+        @Param({ "12345" })
+        int seed;
+
+        private Random random;
+
+        Path tmpDir;
+        Directory directory;
+        IndexWriter writer;
+        IndexReader reader;
+
+        // multiRangesTraverse params
+        PointValues.PointTree pointTree;
+        PackedValueRanges packedValueRanges;
+        BiConsumer<Integer, List<Integer>> collectRangeIDs;
+        int maxNumNonZeroRanges = Integer.MAX_VALUE;
+
+        @Setup
+        public void setup() throws IOException {
+            LogConfigurator.setNodeName("sample-name");
+            random = new Random(seed);
+            tmpDir = Files.createTempDirectory("tree-test");
+            directory = FSDirectory.open(tmpDir);
+            writer = new IndexWriter(directory, new IndexWriterConfig());
+
+            for (int i = 0; i < treeSize; i++) {
+                writer.addDocument(List.of(new IntField("val", random.nextInt(valMax), Field.Store.NO)));
+            }
+
+            reader = DirectoryReader.open(writer);
+
+            // should only contain single segment
+            for (LeafReaderContext lrc : reader.leaves()) {
+                pointTree = lrc.reader().getPointValues("val").getPointTree();
+            }
+
+            MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType("val", NumberFieldMapper.NumberType.INTEGER);
+            NumericPointEncoder numericPointEncoder = (NumericPointEncoder) fieldType;
+
+            int bucketWidth = valMax / buckets;
+            byte[][] lowers = new byte[buckets][];
+            byte[][] uppers = new byte[buckets][];
+            for (int i = 0; i < buckets; i++) {
+                lowers[i] = numericPointEncoder.encodePoint(i * bucketWidth);
+                uppers[i] = numericPointEncoder.encodePoint(i * bucketWidth);
+            }
+
+            packedValueRanges = new PackedValueRanges(lowers, uppers);
+        }
+
+        @TearDown
+        public void tearDown() throws IOException {
+            for (String indexFile : FSDirectory.listAll(tmpDir)) {
+                Files.deleteIfExists(tmpDir.resolve(indexFile));
+            }
+            Files.deleteIfExists(tmpDir);
+        }
+    }
+
+    @Benchmark
+    public Map<Integer, List<Integer>> multiRangeTraverseTree(treeState state) throws Exception {
+        Map<Integer, List<Integer>> mockIDCollect = new HashMap<>();
+
+        TreeTraversal.RangeAwareIntersectVisitor treeVisitor = new TreeTraversal.DocCollectRangeAwareIntersectVisitor(
+            state.pointTree,
+            state.packedValueRanges,
+            state.maxNumNonZeroRanges,
+            (activeIndex, docID) -> {
+                if (mockIDCollect.containsKey(activeIndex)) {
+                    mockIDCollect.get(activeIndex).add(docID);
+                } else {
+                    mockIDCollect.put(activeIndex, List.of(docID));
+                }
+            }
+        );
+
+        multiRangesTraverse(treeVisitor);
+        return mockIDCollect;
+    }
+}
diff --git a/release-notes/opensearch.release-notes-2.1.0.md b/release-notes/opensearch.release-notes-2.1.0.md
@@ -61,7 +61,7 @@
 * Update github action gradle-check to use pull_request_target for accessing token (#3728) ([#3731](https://github.com/opensearch-project/opensearch/pull/3731))
 * Add gradle check test for github workflows (#3717) ([#3723](https://github.com/opensearch-project/opensearch/pull/3723))
 * Used set to make shell scripts more strict (#3278) ([#3344](https://github.com/opensearch-project/opensearch/pull/3344))
-* Bootstrap should implement a denylist of Java versions (ranges) (#3164) ([#3292](https://github.com/opensearch-project/opensearch/pull/3292))
+* Bootstrap should implement a denylist of Java versions (packedValueRanges) (#3164) ([#3292](https://github.com/opensearch-project/opensearch/pull/3292))
 * Add Github Workflow to build and publish lucene snapshots. (#2906) ([#3038](https://github.com/opensearch-project/opensearch/pull/3038))
 * Remove JavaVersion in favour of standard Runtime.Version (java-version-checker) (#3027) ([#3034](https://github.com/opensearch-project/opensearch/pull/3034))
 * Remove JavaVersion, use builtin Runtime.Version to deal with runtime versions (#3006) ([#3013](https://github.com/opensearch-project/opensearch/pull/3013))

diff --git a/release-notes/opensearch.release-notes-2.14.0.md b/release-notes/opensearch.release-notes-2.14.0.md
@@ -34,7 +34,7 @@
 - [Search Pipeline] Handle default pipeline for multiple indices ([#13276](https://github.com/opensearch-project/OpenSearch/pull/13276))
 - [Batch Ingestion] Add `batch_size` to `_bulk` API. ([#12457](https://github.com/opensearch-project/OpenSearch/issues/12457))
 - [Remote Store] Add capability of doing refresh as determined by the translog ([#12992](https://github.com/opensearch-project/OpenSearch/pull/12992))
-- Support multi ranges traversal when doing date histogram rewrite optimization. ([#13317](https://github.com/opensearch-project/OpenSearch/pull/13317))
+- Support multi packedValueRanges traversal when doing date histogram rewrite optimization. ([#13317](https://github.com/opensearch-project/OpenSearch/pull/13317))
 
 ### Dependencies
 - Bump `org.apache.commons:commons-configuration2` from 2.10.0 to 2.10.1 ([#12896](https://github.com/opensearch-project/OpenSearch/pull/12896))

diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml
@@ -706,3 +706,80 @@ setup:
   - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 }
   - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 }
   - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 }
+
+---
+"date_histogram with range sub aggregation":
+  - do:
+      indices.create:
+        index: test_date_hist_range_sub_agg
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 0
+          mappings:
+            properties:
+              date:
+                type: date
+  - do:
+      bulk:
+        refresh: true
+        index: test_date_hist_range_sub_agg
+        body:
+          - '{"index": {}}'
+          - '{"date": "2020-03-01", "v": 1}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-01", "v": 11}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-02", "v": 12}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-08", "v": 23}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-08", "v": 28}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-08", "v": 28}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-08", "v": 39}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-09", "v": 4}'
+  - do:
+      search:
+        body:
+          size: 0
+          aggs:
+            histo:
+              date_histogram:
+                field: date
+                calendar_interval: day
+              aggs:
+                my_range:
+                  range:
+                    field: v
+                    ranges:
+                      - to: 10
+                      - from: 10
+                        to: 20
+                      - from: 20
+                        to: 30
+                      - from: 30
+                        to: 40
+
+  - match: { hits.total.value: 8 }
+  - length: { aggregations.histo.buckets: 9 }
+
+  - match: { aggregations.histo.buckets.0.key_as_string: "2020-03-01T00:00:00.000Z" }
+  - match: { aggregations.histo.buckets.1.key_as_string: "2020-03-02T00:00:00.000Z" }
+  - match: { aggregations.histo.buckets.7.key_as_string: "2020-03-08T00:00:00.000Z" }
+  - match: { aggregations.histo.buckets.8.key_as_string: "2020-03-09T00:00:00.000Z" }
+
+  - match: { aggregations.histo.buckets.0.doc_count: 2 }
+  - match: { aggregations.histo.buckets.1.doc_count: 1 }
+  - match: { aggregations.histo.buckets.2.doc_count: 0 }
+  - match: { aggregations.histo.buckets.7.doc_count: 4 }
+  - match: { aggregations.histo.buckets.8.doc_count: 1 }
+
+  - match: { aggregations.histo.buckets.0.my_range.buckets.0.doc_count: 1 }
+
+  - match: { aggregations.histo.buckets.7.my_range.buckets.0.doc_count: 0 }
+  - match: { aggregations.histo.buckets.7.my_range.buckets.1.doc_count: 0 }
+  - match: { aggregations.histo.buckets.7.my_range.buckets.2.doc_count: 3 }
+  - match: { aggregations.histo.buckets.7.my_range.buckets.3.doc_count: 1 }
diff --git a/...spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml b/...spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml
@@ -158,3 +158,75 @@ setup:
   - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 }
   - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 }
   - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 }
+
+---
+"Range aggregation with auto_date_histogram sub-aggregation":
+  - do:
+      indices.create:
+        index: sub_agg_profile
+        body:
+          mappings:
+            properties:
+              "@timestamp":
+                type: date
+              metrics.size:
+                type: long
+
+  - do:
+      bulk:
+        refresh: true
+        index: sub_agg_profile
+        body:
+          - '{"index": {}}'
+          - '{"date": "2020-03-01", "v": 1}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-02", "v": 2}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-03", "v": 3}'
+          - '{"index": {}}'
+          - '{"date": "2020-04-09", "v": 4}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-08", "v": 13}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-09", "v": 14}'
+          - '{"index": {}}'
+          - '{"date": "2020-03-09", "v": 15}'
+          - '{"index": {}}'
+          - '{"date": "2020-04-11", "v": 19}'
+
+  - do:
+      search:
+        index: sub_agg_profile
+        body:
+          size: 0
+          aggs:
+            range_histo:
+              range:
+                field: v
+                ranges:
+                  - to: 0
+                  - from: 0
+                    to: 10
+                  - from: 10
+              aggs:
+                date:
+                  auto_date_histogram:
+                    field: "date"
+                    buckets: 3
+
+  - match: { hits.total.value: 8 }
+  - length: { aggregations.range_histo.buckets: 3 }
+
+  - match: { aggregations.range_histo.buckets.0.key: "*-0.0" }
+  - match: { aggregations.range_histo.buckets.1.key: "0.0-10.0" }
+  - match: { aggregations.range_histo.buckets.2.key: "10.0-*" }
+
+  - match: { aggregations.range_histo.buckets.0.doc_count: 0 }
+  - match: { aggregations.range_histo.buckets.1.doc_count: 4 }
+  - match: { aggregations.range_histo.buckets.2.doc_count: 4 }
+
+  - match: { aggregations.range_histo.buckets.1.date.buckets.0.doc_count: 3 }
+  - match: { aggregations.range_histo.buckets.1.date.buckets.1.doc_count: 1 }
+
+  - match: { aggregations.range_histo.buckets.2.date.buckets.0.doc_count: 3 }
+  - match: { aggregations.range_histo.buckets.2.date.buckets.1.doc_count: 1 }