From 006f404224dbfaea88b5d3eda534a6331fc371d0 Mon Sep 17 00:00:00 2001 From: Sandesh Kumar Date: Tue, 19 Dec 2023 12:58:18 +0530 Subject: [PATCH] Use Collector.setWeight to improve aggregation performance Signed-off-by: Sandesh Kumar --- .../GlobalOrdinalsStringTermsAggregator.java | 24 +++++++++++++++++++ .../search/internal/ContextIndexSearcher.java | 5 ++++ 2 files changed, 29 insertions(+) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 5ed899408ab40..505faf4fbdc10 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -37,6 +37,8 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; @@ -70,6 +72,7 @@ import java.util.function.Function; import java.util.function.LongPredicate; import java.util.function.LongUnaryOperator; +import java.util.logging.Logger; import static org.opensearch.search.aggregations.InternalOrder.isKeyOrder; import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; @@ -80,11 +83,16 @@ * @opensearch.internal */ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator { + + // testing only - will remove + protected Logger logger = Logger.getLogger(GlobalOrdinalsStringTermsAggregator.class.getName()); protected final ResultStrategy resultStrategy; protected final ValuesSource.Bytes.WithOrdinals valuesSource; private final LongPredicate acceptedGlobalOrdinals; private final long valueCount; + + private Weight weight; private final GlobalOrdLookupFunction lookupGlobalOrd; protected final CollectionStrategy collectionStrategy; protected int segmentsWithSingleValuedOrds = 0; @@ -142,8 +150,24 @@ String descriptCollectionStrategy() { return collectionStrategy.describe(); } + public void setWeight(Weight weight) { + this.weight = weight; + } + @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + assert weight != null; // if weight is invoked, then we must have weight initialized + + if (weight.count(ctx) == 0) { + logger.info("O Weight count"); + return LeafBucketCollector.NO_OP_COLLECTOR; + } else if (weight.count(ctx) == ctx.reader().maxDoc() && weight.getQuery() instanceof MatchAllDocsQuery) { + // no deleted documents & top level query matches everything + //iterate over the terms - doc frequency for each termsEnum directly + logger.info("No deleted documents in leaf"); + // return appropriate LeafCollector + } + SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx); collectionStrategy.globalOrdsReady(globalOrds); SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds); diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java index b042f3cf41d61..cc4ff0e9cd301 100644 --- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java @@ -386,6 +386,11 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { return null; } } + + @Override + public int count(LeafReaderContext context) throws IOException { + return weight.count(context); + } }; } else { return weight;