Skip to content

Commit

Permalink
Create a FilterBundle.Builder class and use it to construct FilterBun…
Browse files Browse the repository at this point in the history
  • Loading branch information
cecemei authored Sep 26, 2024
1 parent 986bc62 commit a0c842e
Show file tree
Hide file tree
Showing 9 changed files with 831 additions and 713 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public class QueryContexts
public static final String REWRITE_JOIN_TO_FILTER_ENABLE_KEY = "enableRewriteJoinToFilter";
public static final String JOIN_FILTER_REWRITE_MAX_SIZE_KEY = "joinFilterRewriteMaxSize";
public static final String MAX_NUMERIC_IN_FILTERS = "maxNumericInFilters";
public static final String CURSOR_AUTO_ARRANGE_FILTERS = "cursorAutoArrangeFilters";
// This flag controls whether a SQL join query with left scan should be attempted to be run as direct table access
// instead of being wrapped inside a query. With direct table access enabled, Druid can push down the join operation to
// data servers.
Expand Down
47 changes: 22 additions & 25 deletions processing/src/main/java/org/apache/druid/query/filter/Filter.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,27 +47,26 @@ public interface Filter
* cursor. If both are set, the cursor will effectively perform a logical AND to combine them.
* See {@link FilterBundle} for additional details.
*
* @param columnIndexSelector - provides {@link org.apache.druid.segment.column.ColumnIndexSupplier} to fetch column
* indexes and {@link org.apache.druid.collections.bitmap.BitmapFactory} to manipulate
* them
* @param bitmapResultFactory - wrapper for {@link ImmutableBitmap} operations to tie into
* {@link org.apache.druid.query.QueryMetrics} and build the output indexes
* @param applyRowCount - upper bound on number of rows this filter would be applied to, after removing rows
* short-circuited by prior bundle operations. For example, given "x AND y", if "x" is
* resolved using an index, then "y" will receive the number of rows that matched
* the filter "x". As another example, given "x OR y", if "x" is resolved using an
* index, then "y" will receive the number of rows that did *not* match the filter "x".
* @param totalRowCount - total number of rows to be scanned if no indexes are applied
* @param includeUnknown - mapping for Druid native two state logic system into SQL three-state logic system. If
* set to true, bitmaps returned by this method should include true bits for any rows
* where the matching result is 'unknown', such as from the input being null valued.
* See {@link NullHandling#useThreeValueLogic()}
* @return - {@link FilterBundle} containing any indexes and/or matchers that are needed to build
* a cursor
* @param <T> - Type of {@link BitmapResultFactory} results, {@link ImmutableBitmap} by default
* @param filterBundleBuilder contains {@link BitmapColumnIndex} and {@link ColumnIndexSelector}, and some additional
* info needed.
* @param bitmapResultFactory wrapper for {@link ImmutableBitmap} operations to tie into
* {@link org.apache.druid.query.QueryMetrics} and build the output indexes
* @param applyRowCount upper bound on number of rows this filter would be applied to, after removing rows
* short-circuited by prior bundle operations. For example, given "x AND y", if "x" is
* resolved using an index, then "y" will receive the number of rows that matched
* the filter "x". As another example, given "x OR y", if "x" is resolved using an
* index, then "y" will receive the number of rows that did *not* match the filter "x".
* @param totalRowCount total number of rows to be scanned if no indexes are applied
* @param includeUnknown mapping for Druid native two state logic system into SQL three-state logic system. If
* set to true, bitmaps returned by this method should include true bits for any rows
* where the matching result is 'unknown', such as from the input being null valued.
* See {@link NullHandling#useThreeValueLogic()}
* @param <T> type of {@link BitmapResultFactory} results, {@link ImmutableBitmap} by default
* @return {@link FilterBundle} containing any indexes and/or matchers that are needed to build
* a cursor
*/
default <T> FilterBundle makeFilterBundle(
ColumnIndexSelector columnIndexSelector,
FilterBundle.Builder filterBundleBuilder,
BitmapResultFactory<T> bitmapResultFactory,
int applyRowCount,
int totalRowCount,
Expand All @@ -76,7 +75,7 @@ default <T> FilterBundle makeFilterBundle(
{
final FilterBundle.IndexBundle indexBundle;
final boolean needMatcher;
final BitmapColumnIndex columnIndex = getBitmapColumnIndex(columnIndexSelector);
final BitmapColumnIndex columnIndex = filterBundleBuilder.getBitmapColumnIndex();
if (columnIndex != null) {
final long bitmapConstructionStartNs = System.nanoTime();
final T result = columnIndex.computeBitmapResult(
Expand Down Expand Up @@ -107,7 +106,7 @@ default <T> FilterBundle makeFilterBundle(
new FilterBundle.MatcherBundleInfo(this::toString, null, null),
this::makeMatcher,
this::makeVectorMatcher,
this.canVectorizeMatcher(columnIndexSelector)
this.canVectorizeMatcher(filterBundleBuilder.getColumnIndexSelector())
);
} else {
matcherBundle = null;
Expand All @@ -122,7 +121,6 @@ default <T> FilterBundle makeFilterBundle(
* examine details about the index prior to computing it, via {@link BitmapColumnIndex#getIndexCapabilities()}.
*
* @param selector Object used to create BitmapColumnIndex
*
* @return BitmapColumnIndex that can build ImmutableBitmap of matched row numbers
*/
@Nullable
Expand All @@ -132,7 +130,6 @@ default <T> FilterBundle makeFilterBundle(
* Get a {@link ValueMatcher} that applies this filter to row values.
*
* @param factory Object used to create ValueMatchers
*
* @return ValueMatcher that applies this filter to row values.
*/
ValueMatcher makeMatcher(ColumnSelectorFactory factory);
Expand All @@ -141,7 +138,6 @@ default <T> FilterBundle makeFilterBundle(
* Get a {@link VectorValueMatcher} that applies this filter to row vectors.
*
* @param factory Object used to create ValueMatchers
*
* @return VectorValueMatcher that applies this filter to row vectors.
*/
default VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory)
Expand All @@ -151,6 +147,7 @@ default VectorValueMatcher makeVectorMatcher(VectorColumnSelectorFactory factory

/**
* Returns true if this filter can produce a vectorized matcher from its "makeVectorMatcher" method.
*
* @param inspector Supplies type information for the selectors this filter will match against
*/
default boolean canVectorizeMatcher(ColumnInspector inspector)
Expand All @@ -176,7 +173,7 @@ default boolean supportsRequiredColumnRewrite()
* Return a copy of this filter that is identical to the this filter except that it operates on different columns,
* based on a renaming map where the key is the column to be renamed in the filter, and the value is the new
* column name.
*
* <p>
* For example, if I have a filter (A = hello), and I have a renaming map (A -> B),
* this should return the filter (B = hello)
*
Expand Down
177 changes: 132 additions & 45 deletions processing/src/main/java/org/apache/druid/query/filter/FilterBundle.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,21 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.query.BitmapResultFactory;
import org.apache.druid.query.filter.vector.VectorValueMatcher;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.column.ColumnIndexCapabilities;
import org.apache.druid.segment.column.SimpleColumnIndexCapabilities;
import org.apache.druid.segment.data.Offset;
import org.apache.druid.segment.filter.FalseFilter;
import org.apache.druid.segment.index.BitmapColumnIndex;
import org.apache.druid.segment.vector.ReadableVectorOffset;
import org.apache.druid.segment.vector.VectorColumnSelectorFactory;

import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
Expand All @@ -57,27 +62,12 @@
*/
public class FilterBundle
{
public static FilterBundle allFalse(long constructionTime, ImmutableBitmap emptyBitmap)
{
return new FilterBundle(
new FilterBundle.SimpleIndexBundle(
new FilterBundle.IndexBundleInfo(() -> FalseFilter.instance().toString(), 0, constructionTime, null),
emptyBitmap,
SimpleColumnIndexCapabilities.getConstant()
),
null
);
}

@Nullable
private final IndexBundle indexBundle;
@Nullable
private final MatcherBundle matcherBundle;

public FilterBundle(
@Nullable IndexBundle index,
@Nullable MatcherBundle matcherBundle
)
public FilterBundle(@Nullable IndexBundle index, @Nullable MatcherBundle matcherBundle)
{
Preconditions.checkArgument(
index != null || matcherBundle != null,
Expand All @@ -87,6 +77,17 @@ public FilterBundle(
this.matcherBundle = matcherBundle;
}

public static FilterBundle allFalse(long constructionTime, ImmutableBitmap emptyBitmap)
{
return new FilterBundle(
new FilterBundle.SimpleIndexBundle(
new FilterBundle.IndexBundleInfo(() -> FalseFilter.instance().toString(), 0, constructionTime, null),
emptyBitmap,
SimpleColumnIndexCapabilities.getConstant()
),
null
);
}

@Nullable
public IndexBundle getIndex()
Expand Down Expand Up @@ -151,6 +152,95 @@ public interface MatcherBundle
boolean canVectorize();
}

/**
* Wraps info needed to build a {@link FilterBundle}, and provides an estimated compute cost for
* {@link BitmapColumnIndex#computeBitmapResult}.
*/
public static class Builder
{
private final Filter filter;
private final ColumnIndexSelector columnIndexSelector;
@Nullable
private final BitmapColumnIndex bitmapColumnIndex;
private final List<FilterBundle.Builder> childBuilders;
private final int estimatedIndexComputeCost;

public Builder(Filter filter, ColumnIndexSelector columnIndexSelector, boolean cursorAutoArrangeFilters)
{
this.filter = filter;
this.columnIndexSelector = columnIndexSelector;
this.bitmapColumnIndex = filter.getBitmapColumnIndex(columnIndexSelector);
// Construct Builder instances for all child filters recursively.
if (filter instanceof BooleanFilter) {
Collection<Filter> childFilters = ((BooleanFilter) filter).getFilters();
this.childBuilders = new ArrayList<>(childFilters.size());
for (Filter childFilter : childFilters) {
this.childBuilders.add(new FilterBundle.Builder(childFilter, columnIndexSelector, cursorAutoArrangeFilters));
}
} else {
this.childBuilders = new ArrayList<>(0);
}
if (cursorAutoArrangeFilters) {
// Sort child builders by cost in ASCENDING order, should be stable by default.
this.childBuilders.sort(Comparator.comparingInt(FilterBundle.Builder::getEstimatedIndexComputeCost));
this.estimatedIndexComputeCost = calculateEstimatedIndexComputeCost();
} else {
this.estimatedIndexComputeCost = Integer.MAX_VALUE;
}
}

private int calculateEstimatedIndexComputeCost()
{
if (this.bitmapColumnIndex == null) {
return Integer.MAX_VALUE;
}
int cost = this.bitmapColumnIndex.estimatedComputeCost();
if (cost == Integer.MAX_VALUE) {
return Integer.MAX_VALUE;
}

for (FilterBundle.Builder childBuilder : childBuilders) {
int childCost = childBuilder.getEstimatedIndexComputeCost();
if (childCost >= Integer.MAX_VALUE - cost) {
return Integer.MAX_VALUE;
}
cost += childCost;
}
return cost;
}

public ColumnIndexSelector getColumnIndexSelector()
{
return columnIndexSelector;
}

@Nullable
public BitmapColumnIndex getBitmapColumnIndex()
{
return bitmapColumnIndex;
}

public List<FilterBundle.Builder> getChildBuilders()
{
return childBuilders;
}

public int getEstimatedIndexComputeCost()
{
return estimatedIndexComputeCost;
}

public <T> FilterBundle build(
BitmapResultFactory<T> bitmapResultFactory,
int applyRowCount,
int totalRowCount,
boolean includeUnknown
)
{
return filter.makeFilterBundle(this, bitmapResultFactory, applyRowCount, totalRowCount, includeUnknown);
}
}

public static class SimpleIndexBundle implements IndexBundle
{
private final IndexBundleInfo info;
Expand Down Expand Up @@ -211,11 +301,7 @@ public MatcherBundleInfo getMatcherInfo()
}

@Override
public ValueMatcher valueMatcher(
ColumnSelectorFactory selectorFactory,
Offset baseOffset,
boolean descending
)
public ValueMatcher valueMatcher(ColumnSelectorFactory selectorFactory, Offset baseOffset, boolean descending)
{
return matcherFn.apply(selectorFactory);
}
Expand Down Expand Up @@ -339,12 +425,11 @@ public List<IndexBundleInfo> getIndexes()
*/
public String describe()
{
final StringBuilder sb = new StringBuilder()
.append("index: ")
.append(filter.get())
.append(" (selectionSize = ")
.append(selectionSize)
.append(")\n");
final StringBuilder sb = new StringBuilder().append("index: ")
.append(filter.get())
.append(" (selectionSize = ")
.append(selectionSize)
.append(")\n");

if (indexes != null) {
for (final IndexBundleInfo info : indexes) {
Expand All @@ -358,23 +443,26 @@ public String describe()
@Override
public String toString()
{
return "{" +
"filter=\"" + filter.get() + '\"' +
", selectionSize=" + selectionSize +
", buildTime=" + TimeUnit.NANOSECONDS.toMicros(buildTimeNs) + "μs" +
(indexes != null ? ", indexes=" + indexes : "") +
'}';
return "{"
+ "filter=\""
+ filter.get()
+ '\"'
+ ", selectionSize="
+ selectionSize
+ ", buildTime="
+ TimeUnit.NANOSECONDS.toMicros(buildTimeNs)
+ "μs"
+ (indexes != null ? ", indexes=" + indexes : "")
+ '}';
}
}

public static class MatcherBundleInfo
{
private static final Pattern PATTERN_LINE_START = Pattern.compile("(?m)^");

private final Supplier<String> filter;
@Nullable
final List<MatcherBundleInfo> matchers;

private final Supplier<String> filter;
@Nullable
private final IndexBundleInfo partialIndex;

Expand Down Expand Up @@ -415,10 +503,7 @@ public List<MatcherBundleInfo> getMatchers()
*/
public String describe()
{
final StringBuilder sb = new StringBuilder()
.append("matcher: ")
.append(filter.get())
.append("\n");
final StringBuilder sb = new StringBuilder().append("matcher: ").append(filter.get()).append("\n");

if (partialIndex != null) {
sb.append(" with partial ")
Expand All @@ -437,11 +522,13 @@ public String describe()
@Override
public String toString()
{
return "{" +
"filter=\"" + filter.get() + '\"' +
(partialIndex != null ? ", partialIndex=" + partialIndex : "") +
(matchers != null ? ", matchers=" + matchers : "") +
'}';
return "{"
+ "filter=\""
+ filter.get()
+ '\"'
+ (partialIndex != null ? ", partialIndex=" + partialIndex : "")
+ (matchers != null ? ", matchers=" + matchers : "")
+ '}';
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

/**
* This class provides a mechanism to influence whether or not indexes are used for a {@link Filter} during processing
* by {@link Filter#makeFilterBundle(ColumnIndexSelector, BitmapResultFactory, int, int, boolean)}
* by {@link Filter#makeFilterBundle(FilterBundle.Builder, BitmapResultFactory, int, int, boolean)}
* (i.e. will a {@link Filter} be a "pre" filter in which we union indexes for all values that match the filter to
* create a {@link org.apache.druid.segment.BitmapOffset}/{@link org.apache.druid.segment.vector.BitmapVectorOffset},
* or will it be used as a "post" filter and evaluated while scanning row values from the
Expand Down
Loading

0 comments on commit a0c842e

Please sign in to comment.