Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport] fix incorrect unnest dimension cursor value matcher implementation #15203

Merged
merged 1 commit into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,21 @@ public ValueMatcher makeValueMatcher(@Nullable String value)
@Override
public boolean matches(boolean includeUnknown)
{
return includeUnknown;
// don't match anything, except for null values when includeUnknown is true
if (includeUnknown) {
if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() <= 0) {
return true;
}
final int rowId = indexedIntsForCurrentRow.get(index);
return lookupName(rowId) == null;
}
return false;
}

@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{

inspector.visit("selector", dimSelector);
}
};
}
Expand All @@ -155,7 +163,7 @@ public boolean matches(boolean includeUnknown)
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
dimSelector.inspectRuntimeShape(inspector);
inspector.visit("selector", dimSelector);
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
import org.apache.druid.query.dimension.ExtractionDimensionSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.extraction.StringFormatExtractionFn;
import org.apache.druid.query.filter.EqualityFilter;
import org.apache.druid.query.filter.NotDimFilter;
import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
import org.apache.druid.segment.IncrementalIndexSegment;
import org.apache.druid.segment.TestHelper;
Expand Down Expand Up @@ -704,6 +706,228 @@ private Iterable<ResultRow> runQuery(final GroupByQuery query, final Incremental
return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query);
}

@Test
public void testGroupByOnUnnestedFilterMatch()
{
// testGroupByOnUnnestedColumn but with filter to match single value
cannotVectorize();

final DataSource unnestDataSource = UnnestDataSource.create(
new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
new ExpressionVirtualColumn(
QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST,
"\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"",
null,
ExprMacroTable.nil()
),
null
);

GroupByQuery query = makeQueryBuilder()
.setDataSource(unnestDataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
.setDimensions(
new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0")
)
.setDimFilter(
new EqualityFilter(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, ColumnType.STRING, "a", null)
)
.setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
.setGranularity(QueryRunnerTestHelper.ALL_GRAN)
.addOrderByColumn("alias0", OrderByColumnSpec.Direction.ASCENDING)
.build();

List<ResultRow> expectedResults = Collections.singletonList(
makeRow(
query,
"2011-04-01",
"alias0", "a",
"rows", 2L
)
);

Iterable<ResultRow> results = runQuery(query, TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column");
}

@Test
public void testGroupByOnUnnestedNotFilterMatch()
{
// testGroupByOnUnnestedColumn but with negated filter to match everything except 1 value
cannotVectorize();

final DataSource unnestDataSource = UnnestDataSource.create(
new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
new ExpressionVirtualColumn(
QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST,
"\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"",
null,
ExprMacroTable.nil()
),
null
);

GroupByQuery query = makeQueryBuilder()
.setDataSource(unnestDataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
.setDimensions(
new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0")
)
.setDimFilter(
NotDimFilter.of(new EqualityFilter(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, ColumnType.STRING, "a", null))
)
.setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
.setGranularity(QueryRunnerTestHelper.ALL_GRAN)
.addOrderByColumn("alias0", OrderByColumnSpec.Direction.ASCENDING)
.build();

List<ResultRow> expectedResults = Arrays.asList(
makeRow(
query,
"2011-04-01",
"alias0", "b",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "e",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "h",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "m",
"rows", 6L
),
makeRow(
query,
"2011-04-01",
"alias0", "n",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "p",
"rows", 6L
),
makeRow(
query,
"2011-04-01",
"alias0", "preferred",
"rows", 26L
),
makeRow(
query,
"2011-04-01",
"alias0", "t",
"rows", 4L
)
);

Iterable<ResultRow> results = runQuery(query, TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column");
}

@Test
public void testGroupByOnUnnestedNotFilterMatchNonexistentValue()
{
// testGroupByOnUnnestedColumn but with negated filter on nonexistent value to still match everything
cannotVectorize();

final DataSource unnestDataSource = UnnestDataSource.create(
new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
new ExpressionVirtualColumn(
QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST,
"\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"",
null,
ExprMacroTable.nil()
),
null
);

GroupByQuery query = makeQueryBuilder()
.setDataSource(unnestDataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
.setDimensions(
new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0")
)
.setDimFilter(
NotDimFilter.of(new EqualityFilter(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, ColumnType.STRING, "noexist", null))
)
.setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
.setGranularity(QueryRunnerTestHelper.ALL_GRAN)
.addOrderByColumn("alias0", OrderByColumnSpec.Direction.ASCENDING)
.build();

List<ResultRow> expectedResults = Arrays.asList(
makeRow(
query,
"2011-04-01",
"alias0", "a",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "b",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "e",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "h",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "m",
"rows", 6L
),
makeRow(
query,
"2011-04-01",
"alias0", "n",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "p",
"rows", 6L
),
makeRow(
query,
"2011-04-01",
"alias0", "preferred",
"rows", 26L
),
makeRow(
query,
"2011-04-01",
"alias0", "t",
"rows", 4L
)
);

Iterable<ResultRow> results = runQuery(query, TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column");
}

private Map<String, Object> makeContext()
{
return ImmutableMap.<String, Object>builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.druid.query.filter.EqualityFilter;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
Expand Down Expand Up @@ -300,12 +301,6 @@ public void test_two_levels_of_unnest_adapters()
});
}

private static void assertColumnReadsIdentifier(final VirtualColumn column, final String identifier)
{
MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class));
Assert.assertEquals("\"" + identifier + "\"", ((ExpressionVirtualColumn) column).getExpression());
}

@Test
public void test_pushdown_or_filters_unnested_and_original_dimension_with_unnest_adapters()
{
Expand Down Expand Up @@ -350,6 +345,7 @@ public void test_pushdown_or_filters_unnested_and_original_dimension_with_unnest
return null;
});
}

@Test
public void test_nested_filters_unnested_and_original_dimension_with_unnest_adapters()
{
Expand Down Expand Up @@ -483,7 +479,6 @@ public void test_nested_filters_unnested_and_topLevelORAnd3filtersInOR()
"(unnested-multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && unnested-multi-string1 = 1))"
);
}

@Test
public void test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOrs()
{
Expand Down Expand Up @@ -734,6 +729,62 @@ public void test_pushdown_filters_unnested_dimension_outside()
});
}

@Test
public void testUnnestValueMatcherValueDoesntExist()
{
final String inputColumn = "multi-string5";
final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench");

final DataSegment dataSegment = DataSegment.builder()
.dataSource("foo")
.interval(schemaInfo.getDataInterval())
.version("1")
.shardSpec(new LinearShardSpec(0))
.size(0)
.build();
final SegmentGenerator segmentGenerator = CLOSER.register(new SegmentGenerator());

IncrementalIndex index = CLOSER.register(
segmentGenerator.generateIncrementalIndex(dataSegment, schemaInfo, Granularities.HOUR, 100)
);
IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
UnnestStorageAdapter withNullsStorageAdapter = new UnnestStorageAdapter(
adapter,
new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + inputColumn + "\"", null, ExprMacroTable.nil()),
null
);
Sequence<Cursor> cursorSequence = withNullsStorageAdapter.makeCursors(
null,
withNullsStorageAdapter.getInterval(),
VirtualColumns.EMPTY,
Granularities.ALL,
false,
null
);

cursorSequence.accumulate(null, (accumulated, cursor) -> {
ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();

DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME));
// wont match anything
ValueMatcher matcher = dimSelector.makeValueMatcher("x");
int count = 0;
while (!cursor.isDone()) {
Object dimSelectorVal = dimSelector.getObject();
if (dimSelectorVal == null) {
Assert.assertNull(dimSelectorVal);
Assert.assertTrue(matcher.matches(true));
}
Assert.assertFalse(matcher.matches(false));
cursor.advance();
count++;
}
Assert.assertEquals(count, 618);
return null;
});

}

public void testComputeBaseAndPostUnnestFilters(
Filter testQueryFilter,
String expectedBasePushDown,
Expand Down Expand Up @@ -777,6 +828,12 @@ public void testComputeBaseAndPostUnnestFilters(
actualPostUnnestFilter == null ? "" : actualPostUnnestFilter.toString()
);
}

private static void assertColumnReadsIdentifier(final VirtualColumn column, final String identifier)
{
MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class));
Assert.assertEquals("\"" + identifier + "\"", ((ExpressionVirtualColumn) column).getExpression());
}
}

/**
Expand Down
Loading