Skip to content

Commit

Permalink
Fix an issue with passing order by and limit to realtime tasks (apach…
Browse files Browse the repository at this point in the history
…e#15301)

While running queries on real time tasks using MSQ, there is an issue with queries with certain order by columns.

If the query specifies a non time column, the query is planned as it is supported by MSQ. However, this throws an exception when passed to real time tasks once as the native query stack does not support it. This PR resolves this by removing the ordering from the query before contacting real time tasks.

    Fixes a bug with MSQ while reading data from real time tasks with non time ordering
  • Loading branch information
adarshsanjeev authored Nov 2, 2023
1 parent b82ad59 commit 22443ab
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import org.apache.druid.msq.input.table.SegmentWithDescriptor;
import org.apache.druid.msq.querykit.BaseLeafFrameProcessor;
import org.apache.druid.msq.querykit.QueryKitUtils;
import org.apache.druid.query.Druids;
import org.apache.druid.query.IterableRowsCursorHelper;
import org.apache.druid.query.filter.Filter;
import org.apache.druid.query.scan.ScanQuery;
Expand All @@ -78,6 +79,7 @@
import org.joda.time.Interval;

import javax.annotation.Nullable;
import javax.validation.constraints.NotNull;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -172,13 +174,31 @@ public static Sequence<Object[]> mappingFunction(Sequence<ScanResultValue> input
}).map(List::toArray);
}

/**
* Prepares the scan query to be sent to a data server.
* If the query contains a non-time ordering, removes the ordering and limit, as the native query stack does not
* support it.
*/
private static ScanQuery prepareScanQueryForDataServer(@NotNull ScanQuery scanQuery)
{
if (ScanQuery.Order.NONE.equals(scanQuery.getTimeOrder()) && !scanQuery.getOrderBys().isEmpty()) {
return Druids.ScanQueryBuilder.copy(scanQuery)
.orderBy(ImmutableList.of())
.limit(0)
.build();
} else {
return scanQuery;
}
}

@Override
protected ReturnOrAwait<Unit> runWithLoadedSegment(final SegmentWithDescriptor segment) throws IOException
{
if (cursor == null) {
ScanQuery preparedQuery = prepareScanQueryForDataServer(query);
final Pair<LoadedSegmentDataProvider.DataServerQueryStatus, Yielder<Object[]>> statusSequencePair =
segment.fetchRowsFromDataServer(
query,
preparedQuery,
ScanQueryFrameProcessor::mappingFunction,
closer
);
Expand All @@ -188,7 +208,7 @@ protected ReturnOrAwait<Unit> runWithLoadedSegment(final SegmentWithDescriptor s
return runWithSegment(segment);
}

RowSignature rowSignature = ScanQueryKit.getAndValidateSignature(query, jsonMapper);
RowSignature rowSignature = ScanQueryKit.getAndValidateSignature(preparedQuery, jsonMapper);
Pair<Cursor, Closeable> cursorFromIterable = IterableRowsCursorHelper.getCursorFromYielder(
statusSequencePair.rhs,
rowSignature
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.query.scan.ScanQuery;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
Expand All @@ -50,13 +51,15 @@
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
import org.hamcrest.CoreMatchers;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.util.Map;

import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.doThrow;

Expand Down Expand Up @@ -148,6 +151,69 @@ public void testSelectWithLoadedSegmentsOnFoo() throws IOException
.verifyResults();
}

@Test
public void testSelectWithLoadedSegmentsOnFooWithOrderBy() throws IOException
{
RowSignature resultSignature = RowSignature.builder()
.add("cnt", ColumnType.LONG)
.add("dim1", ColumnType.STRING)
.build();

doAnswer(
invocationOnMock -> {
ScanQuery query = invocationOnMock.getArgument(0);
ScanQuery.verifyOrderByForNativeExecution(query);
Assert.assertEquals(Long.MAX_VALUE, query.getScanRowsLimit());
return Pair.of(
LoadedSegmentDataProvider.DataServerQueryStatus.SUCCESS,
Yielders.each(
Sequences.simple(
ImmutableList.of(
new Object[]{1L, "qwe"},
new Object[]{1L, "tyu"}
)
)
)
);
}

)
.when(loadedSegmentDataProvider)
.fetchRowsFromDataServer(any(), any(), any(), any());

testSelectQuery()
.setSql("select cnt, dim1 from foo order by dim1")
.setExpectedMSQSpec(
MSQSpec.builder()
.query(
newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.columns("cnt", "dim1")
.orderBy(ImmutableList.of(new ScanQuery.OrderBy("dim1", ScanQuery.Order.ASCENDING)))
.context(defaultScanQueryContext(REALTIME_QUERY_CTX, resultSignature))
.build()
)
.columnMappings(ColumnMappings.identity(resultSignature))
.tuningConfig(MSQTuningConfig.defaultConfig())
.destination(TaskReportMSQDestination.INSTANCE)
.build()
)
.setQueryContext(REALTIME_QUERY_CTX)
.setExpectedRowSignature(resultSignature)
.setExpectedResultRows(ImmutableList.of(
new Object[]{1L, ""},
new Object[]{1L, "1"},
new Object[]{1L, "10.1"},
new Object[]{1L, "2"},
new Object[]{1L, "abc"},
new Object[]{1L, "def"},
new Object[]{1L, "qwe"},
new Object[]{1L, "tyu"}
))
.verifyResults();
}

@Test
public void testGroupByWithLoadedSegmentsOnFoo() throws IOException
{
Expand Down

0 comments on commit 22443ab

Please sign in to comment.