Skip to content

Commit

Permalink
fix incorrect unnest dimension cursor value matcher implementation (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
clintropolis authored Oct 19, 2023
1 parent 0825251 commit ba18ab2
Show file tree
Hide file tree
Showing 3 changed files with 299 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,21 @@ public ValueMatcher makeValueMatcher(@Nullable String value)
@Override
public boolean matches(boolean includeUnknown)
{
return includeUnknown;
// don't match anything, except for null values when includeUnknown is true
if (includeUnknown) {
if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() <= 0) {
return true;
}
final int rowId = indexedIntsForCurrentRow.get(index);
return lookupName(rowId) == null;
}
return false;
}

@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{

inspector.visit("selector", dimSelector);
}
};
}
Expand All @@ -155,7 +163,7 @@ public boolean matches(boolean includeUnknown)
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
dimSelector.inspectRuntimeShape(inspector);
inspector.visit("selector", dimSelector);
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
import org.apache.druid.query.dimension.ExtractionDimensionSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.extraction.StringFormatExtractionFn;
import org.apache.druid.query.filter.EqualityFilter;
import org.apache.druid.query.filter.NotDimFilter;
import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
import org.apache.druid.segment.IncrementalIndexSegment;
import org.apache.druid.segment.TestHelper;
Expand Down Expand Up @@ -704,6 +706,228 @@ private Iterable<ResultRow> runQuery(final GroupByQuery query, final Incremental
return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query);
}

@Test
public void testGroupByOnUnnestedFilterMatch()
{
// testGroupByOnUnnestedColumn but with filter to match single value
cannotVectorize();

final DataSource unnestDataSource = UnnestDataSource.create(
new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
new ExpressionVirtualColumn(
QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST,
"\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"",
null,
ExprMacroTable.nil()
),
null
);

GroupByQuery query = makeQueryBuilder()
.setDataSource(unnestDataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
.setDimensions(
new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0")
)
.setDimFilter(
new EqualityFilter(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, ColumnType.STRING, "a", null)
)
.setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
.setGranularity(QueryRunnerTestHelper.ALL_GRAN)
.addOrderByColumn("alias0", OrderByColumnSpec.Direction.ASCENDING)
.build();

List<ResultRow> expectedResults = Collections.singletonList(
makeRow(
query,
"2011-04-01",
"alias0", "a",
"rows", 2L
)
);

Iterable<ResultRow> results = runQuery(query, TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column");
}

@Test
public void testGroupByOnUnnestedNotFilterMatch()
{
// testGroupByOnUnnestedColumn but with negated filter to match everything except 1 value
cannotVectorize();

final DataSource unnestDataSource = UnnestDataSource.create(
new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
new ExpressionVirtualColumn(
QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST,
"\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"",
null,
ExprMacroTable.nil()
),
null
);

GroupByQuery query = makeQueryBuilder()
.setDataSource(unnestDataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
.setDimensions(
new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0")
)
.setDimFilter(
NotDimFilter.of(new EqualityFilter(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, ColumnType.STRING, "a", null))
)
.setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
.setGranularity(QueryRunnerTestHelper.ALL_GRAN)
.addOrderByColumn("alias0", OrderByColumnSpec.Direction.ASCENDING)
.build();

List<ResultRow> expectedResults = Arrays.asList(
makeRow(
query,
"2011-04-01",
"alias0", "b",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "e",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "h",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "m",
"rows", 6L
),
makeRow(
query,
"2011-04-01",
"alias0", "n",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "p",
"rows", 6L
),
makeRow(
query,
"2011-04-01",
"alias0", "preferred",
"rows", 26L
),
makeRow(
query,
"2011-04-01",
"alias0", "t",
"rows", 4L
)
);

Iterable<ResultRow> results = runQuery(query, TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column");
}

@Test
public void testGroupByOnUnnestedNotFilterMatchNonexistentValue()
{
// testGroupByOnUnnestedColumn but with negated filter on nonexistent value to still match everything
cannotVectorize();

final DataSource unnestDataSource = UnnestDataSource.create(
new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE),
new ExpressionVirtualColumn(
QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST,
"\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"",
null,
ExprMacroTable.nil()
),
null
);

GroupByQuery query = makeQueryBuilder()
.setDataSource(unnestDataSource)
.setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD)
.setDimensions(
new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0")
)
.setDimFilter(
NotDimFilter.of(new EqualityFilter(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, ColumnType.STRING, "noexist", null))
)
.setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
.setGranularity(QueryRunnerTestHelper.ALL_GRAN)
.addOrderByColumn("alias0", OrderByColumnSpec.Direction.ASCENDING)
.build();

List<ResultRow> expectedResults = Arrays.asList(
makeRow(
query,
"2011-04-01",
"alias0", "a",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "b",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "e",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "h",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "m",
"rows", 6L
),
makeRow(
query,
"2011-04-01",
"alias0", "n",
"rows", 2L
),
makeRow(
query,
"2011-04-01",
"alias0", "p",
"rows", 6L
),
makeRow(
query,
"2011-04-01",
"alias0", "preferred",
"rows", 26L
),
makeRow(
query,
"2011-04-01",
"alias0", "t",
"rows", 4L
)
);

Iterable<ResultRow> results = runQuery(query, TestIndex.getIncrementalTestIndex());
TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column");
}

private Map<String, Object> makeContext()
{
return ImmutableMap.<String, Object>builder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.druid.query.filter.EqualityFilter;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
Expand Down Expand Up @@ -300,12 +301,6 @@ public void test_two_levels_of_unnest_adapters()
});
}

private static void assertColumnReadsIdentifier(final VirtualColumn column, final String identifier)
{
MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class));
Assert.assertEquals("\"" + identifier + "\"", ((ExpressionVirtualColumn) column).getExpression());
}

@Test
public void test_pushdown_or_filters_unnested_and_original_dimension_with_unnest_adapters()
{
Expand Down Expand Up @@ -350,6 +345,7 @@ public void test_pushdown_or_filters_unnested_and_original_dimension_with_unnest
return null;
});
}

@Test
public void test_nested_filters_unnested_and_original_dimension_with_unnest_adapters()
{
Expand Down Expand Up @@ -483,7 +479,6 @@ public void test_nested_filters_unnested_and_topLevelORAnd3filtersInOR()
"(unnested-multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && unnested-multi-string1 = 1))"
);
}

@Test
public void test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOrs()
{
Expand Down Expand Up @@ -734,6 +729,62 @@ public void test_pushdown_filters_unnested_dimension_outside()
});
}

@Test
public void testUnnestValueMatcherValueDoesntExist()
{
final String inputColumn = "multi-string5";
final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench");

final DataSegment dataSegment = DataSegment.builder()
.dataSource("foo")
.interval(schemaInfo.getDataInterval())
.version("1")
.shardSpec(new LinearShardSpec(0))
.size(0)
.build();
final SegmentGenerator segmentGenerator = CLOSER.register(new SegmentGenerator());

IncrementalIndex index = CLOSER.register(
segmentGenerator.generateIncrementalIndex(dataSegment, schemaInfo, Granularities.HOUR, 100)
);
IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
UnnestStorageAdapter withNullsStorageAdapter = new UnnestStorageAdapter(
adapter,
new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + inputColumn + "\"", null, ExprMacroTable.nil()),
null
);
Sequence<Cursor> cursorSequence = withNullsStorageAdapter.makeCursors(
null,
withNullsStorageAdapter.getInterval(),
VirtualColumns.EMPTY,
Granularities.ALL,
false,
null
);

cursorSequence.accumulate(null, (accumulated, cursor) -> {
ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();

DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME));
// wont match anything
ValueMatcher matcher = dimSelector.makeValueMatcher("x");
int count = 0;
while (!cursor.isDone()) {
Object dimSelectorVal = dimSelector.getObject();
if (dimSelectorVal == null) {
Assert.assertNull(dimSelectorVal);
Assert.assertTrue(matcher.matches(true));
}
Assert.assertFalse(matcher.matches(false));
cursor.advance();
count++;
}
Assert.assertEquals(count, 618);
return null;
});

}

public void testComputeBaseAndPostUnnestFilters(
Filter testQueryFilter,
String expectedBasePushDown,
Expand Down Expand Up @@ -777,6 +828,12 @@ public void testComputeBaseAndPostUnnestFilters(
actualPostUnnestFilter == null ? "" : actualPostUnnestFilter.toString()
);
}

private static void assertColumnReadsIdentifier(final VirtualColumn column, final String identifier)
{
MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class));
Assert.assertEquals("\"" + identifier + "\"", ((ExpressionVirtualColumn) column).getExpression());
}
}

/**
Expand Down

0 comments on commit ba18ab2

Please sign in to comment.