From 6667d187c941d3cd52687feeeb9bc2b34ceb59ae Mon Sep 17 00:00:00 2001 From: Sandesh Kumar Date: Tue, 19 Dec 2023 12:58:18 +0530 Subject: [PATCH] Use Collector.setWeight to improve aggregation performance Signed-off-by: Sandesh Kumar --- .../GlobalOrdinalsStringTermsAggregator.java | 48 +++++++++++++++++++ .../aggregations/support/ValuesSource.java | 2 +- .../search/internal/ContextIndexSearcher.java | 5 ++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 5ed899408ab40..07fe0a679d64f 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -37,6 +37,9 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; @@ -61,6 +64,7 @@ import org.opensearch.search.aggregations.bucket.terms.SignificanceLookup.BackgroundFrequencyForBytes; import org.opensearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic; import org.opensearch.search.aggregations.support.ValuesSource; +import org.opensearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals; import org.opensearch.search.internal.SearchContext; import java.io.IOException; @@ -70,6 +74,7 @@ import java.util.function.Function; import java.util.function.LongPredicate; import java.util.function.LongUnaryOperator; +import java.util.logging.Logger; import static org.opensearch.search.aggregations.InternalOrder.isKeyOrder; import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; @@ -80,11 +85,16 @@ * @opensearch.internal */ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator { + + // testing only - will remove + protected Logger logger = Logger.getLogger(GlobalOrdinalsStringTermsAggregator.class.getName()); protected final ResultStrategy resultStrategy; protected final ValuesSource.Bytes.WithOrdinals valuesSource; private final LongPredicate acceptedGlobalOrdinals; private final long valueCount; + + private Weight weight; private final GlobalOrdLookupFunction lookupGlobalOrd; protected final CollectionStrategy collectionStrategy; protected int segmentsWithSingleValuedOrds = 0; @@ -142,11 +152,49 @@ String descriptCollectionStrategy() { return collectionStrategy.describe(); } + public void setWeight(Weight weight) { + assert this.weight == null; // ensure weight is not assigned twice + this.weight = weight; + } + @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + assert weight != null; // if weight is invoked, then we must have weight initialized + + if (weight.count(ctx) == 0 || + Terms.getTerms(ctx.reader(), String.valueOf(((WithOrdinals.FieldData) valuesSource).indexFieldData)).iterator().next() == null) { + return LeafBucketCollector.NO_OP_COLLECTOR; +// } else if (weight.count(ctx) == ctx.reader().maxDoc() && weight.getQuery() instanceof MatchAllDocsQuery) { +// // no deleted documents & top level query matches everything +// // iterate over the terms - doc frequency for each termsEnum directly +// // return appropriate LeafCollector + } + SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx); collectionStrategy.globalOrdsReady(globalOrds); SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds); + + if (weight.count(ctx) == 0) { + logger.info("O Weight count"); + return LeafBucketCollector.NO_OP_COLLECTOR; + } else if (weight.count(ctx) == ctx.reader().maxDoc() && weight.getQuery() instanceof MatchAllDocsQuery) { + // no deleted documents & top level query matches everything + //iterate over the terms - doc frequency for each termsEnum directly + logger.info("No deleted documents in leaf"); + // return appropriate LeafCollector + + +// TermsEnum termsEnum = Terms.getTerms(ctx.reader(), "country_code").iterator(); + return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, valuesSource.globalOrdinalsValues(ctx)) { + + @Override + public void collect(int doc, long owningBucketOrd) throws IOException { + collectionStrategy.collectGlobalOrd(owningBucketOrd, doc, singleValues.ordValue(), sub); + } + }); + } + + if (singleValues != null) { segmentsWithSingleValuedOrds++; if (acceptedGlobalOrdinals == ALWAYS_TRUE) { diff --git a/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java b/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java index 1a76183ac1a2d..434fcf4e3a8a6 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java +++ b/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java @@ -238,7 +238,7 @@ public long globalMaxOrd(IndexSearcher indexSearcher) throws IOException { */ public static class FieldData extends WithOrdinals { - protected final IndexOrdinalsFieldData indexFieldData; + public final IndexOrdinalsFieldData indexFieldData; public FieldData(IndexOrdinalsFieldData indexFieldData) { this.indexFieldData = indexFieldData; diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java index b042f3cf41d61..cc4ff0e9cd301 100644 --- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java @@ -386,6 +386,11 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { return null; } } + + @Override + public int count(LeafReaderContext context) throws IOException { + return weight.count(context); + } }; } else { return weight;