diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index 5db2dd414e03..a9880c705f9c 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -126,13 +126,21 @@ public ValueMatcher makeValueMatcher(@Nullable String value) @Override public boolean matches(boolean includeUnknown) { - return includeUnknown; + // don't match anything, except for null values when includeUnknown is true + if (includeUnknown) { + if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() <= 0) { + return true; + } + final int rowId = indexedIntsForCurrentRow.get(index); + return lookupName(rowId) == null; + } + return false; } @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { - + inspector.visit("selector", dimSelector); } }; } @@ -155,7 +163,7 @@ public boolean matches(boolean includeUnknown) @Override public void inspectRuntimeShape(RuntimeShapeInspector inspector) { - dimSelector.inspectRuntimeShape(inspector); + inspector.visit("selector", dimSelector); } }; } diff --git a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java index d0a7d94c3967..62caef2493a2 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java @@ -40,6 +40,8 @@ import org.apache.druid.query.dimension.ExtractionDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.extraction.StringFormatExtractionFn; +import org.apache.druid.query.filter.EqualityFilter; +import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; import org.apache.druid.segment.IncrementalIndexSegment; import org.apache.druid.segment.TestHelper; @@ -704,6 +706,228 @@ private Iterable runQuery(final GroupByQuery query, final Incremental return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query); } + @Test + public void testGroupByOnUnnestedFilterMatch() + { + // testGroupByOnUnnestedColumn but with filter to match single value + cannotVectorize(); + + final DataSource unnestDataSource = UnnestDataSource.create( + new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), + new ExpressionVirtualColumn( + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", + null, + ExprMacroTable.nil() + ), + null + ); + + GroupByQuery query = makeQueryBuilder() + .setDataSource(unnestDataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) + .setDimensions( + new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0") + ) + .setDimFilter( + new EqualityFilter(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, ColumnType.STRING, "a", null) + ) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.ALL_GRAN) + .addOrderByColumn("alias0", OrderByColumnSpec.Direction.ASCENDING) + .build(); + + List expectedResults = Collections.singletonList( + makeRow( + query, + "2011-04-01", + "alias0", "a", + "rows", 2L + ) + ); + + Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); + } + + @Test + public void testGroupByOnUnnestedNotFilterMatch() + { + // testGroupByOnUnnestedColumn but with negated filter to match everything except 1 value + cannotVectorize(); + + final DataSource unnestDataSource = UnnestDataSource.create( + new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), + new ExpressionVirtualColumn( + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", + null, + ExprMacroTable.nil() + ), + null + ); + + GroupByQuery query = makeQueryBuilder() + .setDataSource(unnestDataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) + .setDimensions( + new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0") + ) + .setDimFilter( + NotDimFilter.of(new EqualityFilter(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, ColumnType.STRING, "a", null)) + ) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.ALL_GRAN) + .addOrderByColumn("alias0", OrderByColumnSpec.Direction.ASCENDING) + .build(); + + List expectedResults = Arrays.asList( + makeRow( + query, + "2011-04-01", + "alias0", "b", + "rows", 2L + ), + makeRow( + query, + "2011-04-01", + "alias0", "e", + "rows", 2L + ), + makeRow( + query, + "2011-04-01", + "alias0", "h", + "rows", 2L + ), + makeRow( + query, + "2011-04-01", + "alias0", "m", + "rows", 6L + ), + makeRow( + query, + "2011-04-01", + "alias0", "n", + "rows", 2L + ), + makeRow( + query, + "2011-04-01", + "alias0", "p", + "rows", 6L + ), + makeRow( + query, + "2011-04-01", + "alias0", "preferred", + "rows", 26L + ), + makeRow( + query, + "2011-04-01", + "alias0", "t", + "rows", 4L + ) + ); + + Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); + } + + @Test + public void testGroupByOnUnnestedNotFilterMatchNonexistentValue() + { + // testGroupByOnUnnestedColumn but with negated filter on nonexistent value to still match everything + cannotVectorize(); + + final DataSource unnestDataSource = UnnestDataSource.create( + new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), + new ExpressionVirtualColumn( + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", + null, + ExprMacroTable.nil() + ), + null + ); + + GroupByQuery query = makeQueryBuilder() + .setDataSource(unnestDataSource) + .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) + .setDimensions( + new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0") + ) + .setDimFilter( + NotDimFilter.of(new EqualityFilter(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, ColumnType.STRING, "noexist", null)) + ) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setGranularity(QueryRunnerTestHelper.ALL_GRAN) + .addOrderByColumn("alias0", OrderByColumnSpec.Direction.ASCENDING) + .build(); + + List expectedResults = Arrays.asList( + makeRow( + query, + "2011-04-01", + "alias0", "a", + "rows", 2L + ), + makeRow( + query, + "2011-04-01", + "alias0", "b", + "rows", 2L + ), + makeRow( + query, + "2011-04-01", + "alias0", "e", + "rows", 2L + ), + makeRow( + query, + "2011-04-01", + "alias0", "h", + "rows", 2L + ), + makeRow( + query, + "2011-04-01", + "alias0", "m", + "rows", 6L + ), + makeRow( + query, + "2011-04-01", + "alias0", "n", + "rows", 2L + ), + makeRow( + query, + "2011-04-01", + "alias0", "p", + "rows", 6L + ), + makeRow( + query, + "2011-04-01", + "alias0", "preferred", + "rows", 26L + ), + makeRow( + query, + "2011-04-01", + "alias0", "t", + "rows", 4L + ) + ); + + Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); + TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); + } + private Map makeContext() { return ImmutableMap.builder() diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 286a636e89a3..6df1766bf984 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -35,6 +35,7 @@ import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.SelectorDimFilter; +import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ValueType; @@ -300,12 +301,6 @@ public void test_two_levels_of_unnest_adapters() }); } - private static void assertColumnReadsIdentifier(final VirtualColumn column, final String identifier) - { - MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class)); - Assert.assertEquals("\"" + identifier + "\"", ((ExpressionVirtualColumn) column).getExpression()); - } - @Test public void test_pushdown_or_filters_unnested_and_original_dimension_with_unnest_adapters() { @@ -350,6 +345,7 @@ public void test_pushdown_or_filters_unnested_and_original_dimension_with_unnest return null; }); } + @Test public void test_nested_filters_unnested_and_original_dimension_with_unnest_adapters() { @@ -483,7 +479,6 @@ public void test_nested_filters_unnested_and_topLevelORAnd3filtersInOR() "(unnested-multi-string1 = 3 || (newcol = 2 && multi-string1 = 2 && unnested-multi-string1 = 1))" ); } - @Test public void test_nested_filters_unnested_and_topLevelAND3filtersInORWithNestedOrs() { @@ -734,6 +729,62 @@ public void test_pushdown_filters_unnested_dimension_outside() }); } + @Test + public void testUnnestValueMatcherValueDoesntExist() + { + final String inputColumn = "multi-string5"; + final GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get("expression-testbench"); + + final DataSegment dataSegment = DataSegment.builder() + .dataSource("foo") + .interval(schemaInfo.getDataInterval()) + .version("1") + .shardSpec(new LinearShardSpec(0)) + .size(0) + .build(); + final SegmentGenerator segmentGenerator = CLOSER.register(new SegmentGenerator()); + + IncrementalIndex index = CLOSER.register( + segmentGenerator.generateIncrementalIndex(dataSegment, schemaInfo, Granularities.HOUR, 100) + ); + IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index); + UnnestStorageAdapter withNullsStorageAdapter = new UnnestStorageAdapter( + adapter, + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + inputColumn + "\"", null, ExprMacroTable.nil()), + null + ); + Sequence cursorSequence = withNullsStorageAdapter.makeCursors( + null, + withNullsStorageAdapter.getInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + cursorSequence.accumulate(null, (accumulated, cursor) -> { + ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); + + DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); + // wont match anything + ValueMatcher matcher = dimSelector.makeValueMatcher("x"); + int count = 0; + while (!cursor.isDone()) { + Object dimSelectorVal = dimSelector.getObject(); + if (dimSelectorVal == null) { + Assert.assertNull(dimSelectorVal); + Assert.assertTrue(matcher.matches(true)); + } + Assert.assertFalse(matcher.matches(false)); + cursor.advance(); + count++; + } + Assert.assertEquals(count, 618); + return null; + }); + + } + public void testComputeBaseAndPostUnnestFilters( Filter testQueryFilter, String expectedBasePushDown, @@ -777,6 +828,12 @@ public void testComputeBaseAndPostUnnestFilters( actualPostUnnestFilter == null ? "" : actualPostUnnestFilter.toString() ); } + + private static void assertColumnReadsIdentifier(final VirtualColumn column, final String identifier) + { + MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class)); + Assert.assertEquals("\"" + identifier + "\"", ((ExpressionVirtualColumn) column).getExpression()); + } } /**