Skip to content

Commit

Permalink
Approximate match all with sort with a bounded range query
Browse files Browse the repository at this point in the history
If we run a match-all query and sort results by a numeric/timestamp
field, then we can replace the match-all with a filter over the
top/bottom 10,000 (or whatever `track_total_hits` is set to) values
from the sort field.

Signed-off-by: Michael Froh <[email protected]>
  • Loading branch information
msfroh committed Oct 14, 2024
1 parent 9ddee61 commit 2bf0e80
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.opensearch.common.settings.Setting;
import org.opensearch.common.settings.Setting.Property;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.core.common.bytes.BytesArray;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.core.xcontent.XContentParser.Token;
Expand All @@ -71,6 +72,8 @@
import org.opensearch.index.fielddata.plain.SortedNumericIndexFieldData;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.approximate.ApproximatePointRangeQuery;
import org.opensearch.search.approximate.ApproximateScoreQuery;
import org.opensearch.search.lookup.SearchLookup;
import org.opensearch.search.query.BitmapDocValuesQuery;

Expand Down Expand Up @@ -1064,24 +1067,40 @@ public Query rangeQuery(
QueryShardContext context
) {
return longRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, (l, u) -> {
if (isSearchable && hasDocValues) {
Query query = LongPoint.newRangeQuery(field, l, u);
Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u);
query = new IndexOrDocValuesQuery(query, dvQuery);
if (context.indexSortedOnField(field)) {
query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query);
Query dvQuery = hasDocValues ? SortedNumericDocValuesField.newSlowRangeQuery(field, l, u) : null;
if (isSearchable) {
Query pointRangeQuery = LongPoint.newRangeQuery(field, l, u);
Query query;
if (dvQuery != null) {
query = new IndexOrDocValuesQuery(pointRangeQuery, dvQuery);
if (context.indexSortedOnField(field)) {
query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query);
}
} else {
query = pointRangeQuery;
}
return query;
}
if (hasDocValues) {
Query query = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u);
if (context.indexSortedOnField(field)) {
query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query);
if (FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY_SETTING)) {
return new ApproximateScoreQuery(
query,
new ApproximatePointRangeQuery(
field,
LongPoint.pack(new long[] { l }).bytes,
LongPoint.pack(new long[] { u }).bytes,
new long[] { l }.length
) {
@Override
protected String toString(int dimension, byte[] value) {
return Long.toString(LongPoint.decodeDimension(value, 0));
}
}
);
}
return query;
}
return LongPoint.newRangeQuery(field, l, u);

if (context.indexSortedOnField(field)) {
dvQuery = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, dvQuery);
}
return dvQuery;
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,15 @@

import org.apache.lucene.search.Query;
import org.opensearch.common.lucene.search.Queries;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.core.common.ParsingException;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.xcontent.ObjectParser;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.search.approximate.ApproximateMatchAllQuery;
import org.opensearch.search.approximate.ApproximateScoreQuery;

import java.io.IOException;

Expand Down Expand Up @@ -88,7 +91,11 @@ public static MatchAllQueryBuilder fromXContent(XContentParser parser) {

@Override
protected Query doToQuery(QueryShardContext context) {
return Queries.newMatchAllQuery();
Query query = Queries.newMatchAllQuery();
if (FeatureFlags.isEnabled(FeatureFlags.APPROXIMATE_POINT_RANGE_QUERY_SETTING)) {
return new ApproximateScoreQuery(query, new ApproximateMatchAllQuery());
}
return query;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.approximate;

import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.search.internal.SearchContext;
import org.opensearch.search.sort.FieldSortBuilder;

import java.io.IOException;

/**
* Replaces match-all query with a less expensive query if possible.
* <p>
* Currently, will rewrite to a bounded range query over the high/low end of a field if a primary sort is specified
* on that field.
*/
public class ApproximateMatchAllQuery extends ApproximateQuery {
private ApproximateQuery approximation = null;

@Override
protected boolean canApproximate(SearchContext context) {
if (context == null) {
return false;
}
if (context.aggregations() != null) {
return false;
}

if (context.request() != null && context.request().source() != null) {
FieldSortBuilder primarySortField = FieldSortBuilder.getPrimaryFieldSortOrNull(context.request().source());
if (primarySortField != null && primarySortField.missing() == null) {
MappedFieldType mappedFieldType = context.getQueryShardContext().fieldMapper(primarySortField.fieldName());
Query rangeQuery = mappedFieldType.rangeQuery(null, null, false, false, null, null, null, context.getQueryShardContext());
if (rangeQuery instanceof ApproximateScoreQuery) {
ApproximateScoreQuery approximateScoreQuery = (ApproximateScoreQuery) rangeQuery;
approximateScoreQuery.setContext(context);
if (approximateScoreQuery.resolvedQuery instanceof ApproximateQuery) {
approximation = (ApproximateQuery) approximateScoreQuery.resolvedQuery;
return true;
}
}
}
}
return false;
}

@Override
public String toString(String field) {
return "Approximate(*:*)";
}

@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);

}

@Override
public boolean equals(Object o) {
return sameClassAs(o);
}

@Override
public int hashCode() {
return classHash();
}

@Override
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
if (approximation == null) {
throw new IllegalStateException("rewrite called without setting context or query could not be approximated");
}
return approximation;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public ApproximateQuery getApproximationQuery() {
}

@Override
public final Query rewrite(IndexSearcher indexSearcher) throws IOException {
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
if (resolvedQuery == null) {
throw new IllegalStateException("Cannot rewrite resolved query without setContext being called");
}
Expand Down

0 comments on commit 2bf0e80

Please sign in to comment.