apache · cryptoe · Dec 17, 2024 · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024
diff --git a/integration-tests/src/test/resources/queries/twitterstream_queries.json b/integration-tests/src/test/resources/queries/twitterstream_queries.json
@@ -19,8 +19,8 @@
                 }
             ],
             "context": {
-                "useCache": "true",
-                "populateCache": "true",
+                "useCache": "false",
+                "populateCache": "false",
                 "timeout": 60000
             }
         },
@@ -92,8 +92,8 @@
             },
             "threshold": 2,
             "context": {
-                "useCache": "true",
-                "populateCache": "true",
+                "useCache": "false",
+                "populateCache": "false",
                 "timeout": 60000
             }
         },
@@ -196,8 +196,8 @@
             },
             "threshold": 2,
             "context": {
-                "useCache": "true",
-                "populateCache": "true",
+                "useCache": "false",
+                "populateCache": "false",
                 "timeout": 60000
             }
         },
@@ -320,8 +320,8 @@
             },
             "threshold": 2,
             "context": {
-                "useCache": "true",
-                "populateCache": "true",
+                "useCache": "false",
+                "populateCache": "false",
                 "timeout": 60000
             }
         },
@@ -473,8 +473,8 @@
             },
             "limit": 3,
             "context": {
-                "useCache": "true",
-                "populateCache": "true",
+                "useCache": "false",
+                "populateCache": "false",
                 "timeout": 60000
             }
         },
@@ -739,8 +739,8 @@
             },
             "threshold": 2,
             "context": {
-                "useCache": "true",
-                "populateCache": "true",
+                "useCache": "false",
+                "populateCache": "false",
                 "timeout": 60000
             }
         },

diff --git a/processing/src/main/java/org/apache/druid/query/CursorGranularizer.java b/processing/src/main/java/org/apache/druid/query/CursorGranularizer.java
@@ -24,6 +24,7 @@
 import com.google.common.collect.Lists;
 import org.apache.druid.error.DruidException;
 import org.apache.druid.java.util.common.DateTimes;
+import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.java.util.common.granularity.Granularities;
 import org.apache.druid.java.util.common.granularity.Granularity;
 import org.apache.druid.segment.ColumnValueSelector;
@@ -94,11 +95,13 @@ public static CursorGranularizer create(
       timeSelector = cursor.getColumnSelectorFactory().makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME);
     }
 
-    return new CursorGranularizer(cursor, bucketIterable, timeSelector, timeOrder == Order.DESCENDING);
+    return new CursorGranularizer(cursor, granularity, bucketIterable, timeSelector, timeOrder == Order.DESCENDING);
   }
 
   private final Cursor cursor;
 
+  private final Granularity granularity;
+
   // Iterable that iterates over time buckets.
   private final Iterable<Interval> bucketIterable;
 
@@ -112,12 +115,14 @@ public static CursorGranularizer create(
 
   private CursorGranularizer(
       Cursor cursor,
+      Granularity granularity,
       Iterable<Interval> bucketIterable,
       @Nullable ColumnValueSelector timeSelector,
       boolean descending
   )
   {
     this.cursor = cursor;
+    this.granularity = granularity;
     this.bucketIterable = bucketIterable;
     this.timeSelector = timeSelector;
     this.descending = descending;
@@ -133,13 +138,18 @@ public DateTime getBucketStart()
     return DateTimes.utc(currentBucketStart);
   }
 
+  public Interval getCurrentInterval()
+  {
+    return Intervals.utc(currentBucketStart, currentBucketEnd);
+  }
+
   public boolean advanceToBucket(final Interval bucketInterval)
   {
+    currentBucketStart = bucketInterval.getStartMillis();
+    currentBucketEnd = bucketInterval.getEndMillis();
     if (cursor.isDone()) {
       return false;
     }
-    currentBucketStart = bucketInterval.getStartMillis();
-    currentBucketEnd = bucketInterval.getEndMillis();
     if (timeSelector == null) {
       return true;
     }

diff --git a/processing/src/main/java/org/apache/druid/query/QueryContexts.java b/processing/src/main/java/org/apache/druid/query/QueryContexts.java
@@ -89,6 +89,7 @@ public class QueryContexts
   public static final String UNCOVERED_INTERVALS_LIMIT_KEY = "uncoveredIntervalsLimit";
   public static final String MIN_TOP_N_THRESHOLD = "minTopNThreshold";
   public static final String CATALOG_VALIDATION_ENABLED = "catalogValidationEnabled";
+  public static final String TOPN_USE_MULTI_PASS_POOLED_QUERY_GRANULARITY = "topNuseMultiPassPooledQueryGranularity";
   /**
    * Context parameter to enable/disable the extended filtered sum rewrite logic.
    *

diff --git a/...essing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java b/...essing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java
@@ -391,7 +391,7 @@ public boolean hasNext()
       if (delegate != null && delegate.hasNext()) {
         return true;
       } else {
-        if (!cursor.isDone() && granularizer.currentOffsetWithinBucket()) {
+        if (granularizer.currentOffsetWithinBucket()) {
           if (delegate != null) {
             delegate.close();
           }

diff --git a/processing/src/main/java/org/apache/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java b/processing/src/main/java/org/apache/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java
@@ -113,6 +113,7 @@ public void run(
     try {
       // reset cursor since we call run again
       params.getCursor().reset();
+      params.getGranularizer().advanceToBucket(params.getGranularizer().getCurrentInterval());
       // Run topN for all metrics for top N dimension values
       allMetricsParam = allMetricAlgo.makeInitParams(params.getSelectorPlus(), params.getCursor(), params.getGranularizer());
       allMetricAlgo.run(

diff --git a/processing/src/main/java/org/apache/druid/query/topn/BaseTopNAlgorithm.java b/processing/src/main/java/org/apache/druid/query/topn/BaseTopNAlgorithm.java
@@ -97,12 +97,14 @@ private void runWithCardinalityKnown(
     }
     boolean hasDimValSelector = (dimValSelector != null);
 
-    int cardinality = params.getCardinality();
+    final int cardinality = params.getCardinality();
+    final int numValuesPerPass = params.getNumValuesPerPass();
     int numProcessed = 0;
     long processedRows = 0;
     while (numProcessed < cardinality) {
       final int numToProcess;
-      int maxNumToProcess = Math.min(params.getNumValuesPerPass(), cardinality - numProcessed);
+      int maxNumToProcess = Math.min(numValuesPerPass, cardinality - numProcessed);
+
 
       DimValSelector theDimValSelector;
       if (!hasDimValSelector) {
@@ -125,6 +127,7 @@ private void runWithCardinalityKnown(
       numProcessed += numToProcess;
       if (numProcessed < cardinality) {
         params.getCursor().reset();
+        params.getGranularizer().advanceToBucket(params.getGranularizer().getCurrentInterval());
       }
     }
     if (queryMetrics != null) {

diff --git a/...sing/src/main/java/org/apache/druid/query/topn/Generic1AggPooledTopNScannerPrototype.java b/...sing/src/main/java/org/apache/druid/query/topn/Generic1AggPooledTopNScannerPrototype.java
@@ -54,25 +54,27 @@ public long scanAndAggregate(
   {
     long processedRows = 0;
     int positionToAllocate = 0;
-    while (!cursor.isDoneOrInterrupted()) {
-      final IndexedInts dimValues = dimensionSelector.getRow();
-      final int dimSize = dimValues.size();
-      for (int i = 0; i < dimSize; i++) {
-        int dimIndex = dimValues.get(i);
-        int position = positions[dimIndex];
-        if (position >= 0) {
-          aggregator.aggregate(resultsBuffer, position);
-        } else if (position == TopNAlgorithm.INIT_POSITION_VALUE) {
-          positions[dimIndex] = positionToAllocate;
-          position = positionToAllocate;
-          aggregator.init(resultsBuffer, position);
-          aggregator.aggregate(resultsBuffer, position);
-          positionToAllocate += aggregatorSize;
+    if (granularizer.currentOffsetWithinBucket()) {
+      while (!cursor.isDoneOrInterrupted()) {
+        final IndexedInts dimValues = dimensionSelector.getRow();
+        final int dimSize = dimValues.size();
+        for (int i = 0; i < dimSize; i++) {
+          int dimIndex = dimValues.get(i);
+          int position = positions[dimIndex];
+          if (position >= 0) {
+            aggregator.aggregate(resultsBuffer, position);
+          } else if (position == TopNAlgorithm.INIT_POSITION_VALUE) {
+            positions[dimIndex] = positionToAllocate;
+            position = positionToAllocate;
+            aggregator.init(resultsBuffer, position);
+            aggregator.aggregate(resultsBuffer, position);
+            positionToAllocate += aggregatorSize;
+          }
+        }
+        processedRows++;
+        if (!granularizer.advanceCursorWithinBucketUninterruptedly()) {
+          break;
         }
-      }
-      processedRows++;
-      if (!granularizer.advanceCursorWithinBucketUninterruptedly()) {
-        break;
       }
     }
     return processedRows;

diff --git a/...sing/src/main/java/org/apache/druid/query/topn/Generic2AggPooledTopNScannerPrototype.java b/...sing/src/main/java/org/apache/druid/query/topn/Generic2AggPooledTopNScannerPrototype.java
@@ -57,29 +57,31 @@ public long scanAndAggregate(
     int totalAggregatorsSize = aggregator1Size + aggregator2Size;
     long processedRows = 0;
     int positionToAllocate = 0;
-    while (!cursor.isDoneOrInterrupted()) {
-      final IndexedInts dimValues = dimensionSelector.getRow();
-      final int dimSize = dimValues.size();
-      for (int i = 0; i < dimSize; i++) {
-        int dimIndex = dimValues.get(i);
-        int position = positions[dimIndex];
-        if (position >= 0) {
-          aggregator1.aggregate(resultsBuffer, position);
-          aggregator2.aggregate(resultsBuffer, position + aggregator1Size);
-        } else if (position == TopNAlgorithm.INIT_POSITION_VALUE) {
-          positions[dimIndex] = positionToAllocate;
-          position = positionToAllocate;
-          aggregator1.init(resultsBuffer, position);
-          aggregator1.aggregate(resultsBuffer, position);
-          position += aggregator1Size;
-          aggregator2.init(resultsBuffer, position);
-          aggregator2.aggregate(resultsBuffer, position);
-          positionToAllocate += totalAggregatorsSize;
+    if (granularizer.currentOffsetWithinBucket()) {
+      while (!cursor.isDoneOrInterrupted()) {
+        final IndexedInts dimValues = dimensionSelector.getRow();
+        final int dimSize = dimValues.size();
+        for (int i = 0; i < dimSize; i++) {
+          int dimIndex = dimValues.get(i);
+          int position = positions[dimIndex];
+          if (position >= 0) {
+            aggregator1.aggregate(resultsBuffer, position);
+            aggregator2.aggregate(resultsBuffer, position + aggregator1Size);
+          } else if (position == TopNAlgorithm.INIT_POSITION_VALUE) {
+            positions[dimIndex] = positionToAllocate;
+            position = positionToAllocate;
+            aggregator1.init(resultsBuffer, position);
+            aggregator1.aggregate(resultsBuffer, position);
+            position += aggregator1Size;
+            aggregator2.init(resultsBuffer, position);
+            aggregator2.aggregate(resultsBuffer, position);
+            positionToAllocate += totalAggregatorsSize;
+          }
+        }
+        processedRows++;
+        if (!granularizer.advanceCursorWithinBucketUninterruptedly()) {
+          break;
         }
-      }
-      processedRows++;
-      if (!granularizer.advanceCursorWithinBucketUninterruptedly()) {
-        break;
       }
     }
     return processedRows;