Skip to content

Commit

Permalink
ScanAndSort query fails with NPE for simple queries (apache#15914)
Browse files Browse the repository at this point in the history
* some stuff

* add dummy fields

* draft-fix

* rename test

* cleanup

* add null

* cleanup

* cleanup

* add test

* updates

* move check tp constructore

* cleanup

* updates/etc

* fix some more

* add rowSignatureMode

* checkstyle/etc

* override

* missing msqIncompat

* fix test

* fixes

* undo

* updates

* remove param
  • Loading branch information
kgyrtkirk authored Feb 24, 2024
1 parent 6145c8d commit 06deda9
Show file tree
Hide file tree
Showing 18 changed files with 293 additions and 108 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ public static RowSignature getAndValidateSignature(final ScanQuery scanQuery, fi
RowSignature scanSignature;
try {
final String s = scanQuery.context().getString(DruidQuery.CTX_SCAN_SIGNATURE);
scanSignature = jsonMapper.readValue(s, RowSignature.class);
if (s == null) {
scanSignature = scanQuery.getRowSignature();
} else {
scanSignature = jsonMapper.readValue(s, RowSignature.class);
}
}
catch (JsonProcessingException e) {
throw new RuntimeException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ public void testExplain() throws IOException
+ "\"resultFormat\":\"compactedList\","
+ "\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],"
+ "\"legacy\":false,"
+ "\"context\":{\"__resultFormat\":\"object\",\"executionMode\":\"ASYNC\",\"scanSignature\":\"[{\\\"name\\\":\\\"__time\\\",\\\"type\\\":\\\"LONG\\\"},{\\\"name\\\":\\\"cnt\\\",\\\"type\\\":\\\"LONG\\\"},{\\\"name\\\":\\\"dim1\\\",\\\"type\\\":\\\"STRING\\\"},{\\\"name\\\":\\\"dim2\\\",\\\"type\\\":\\\"STRING\\\"},{\\\"name\\\":\\\"dim3\\\",\\\"type\\\":\\\"STRING\\\"},{\\\"name\\\":\\\"m1\\\",\\\"type\\\":\\\"FLOAT\\\"},{\\\"name\\\":\\\"m2\\\",\\\"type\\\":\\\"DOUBLE\\\"},{\\\"name\\\":\\\"unique_dim1\\\",\\\"type\\\":\\\"COMPLEX<hyperUnique>\\\"}]\",\"sqlQueryId\":\"queryId\"},\"granularity\":{\"type\":\"all\"}},\"signature\":[{\"name\":\"__time\",\"type\":\"LONG\"},{\"name\":\"dim1\",\"type\":\"STRING\"},{\"name\":\"dim2\",\"type\":\"STRING\"},{\"name\":\"dim3\",\"type\":\"STRING\"},{\"name\":\"cnt\",\"type\":\"LONG\"},{\"name\":\"m1\",\"type\":\"FLOAT\"},{\"name\":\"m2\",\"type\":\"DOUBLE\"},{\"name\":\"unique_dim1\",\"type\":\"COMPLEX<hyperUnique>\"}],\"columnMappings\":[{\"queryColumn\":\"__time\",\"outputColumn\":\"__time\"},{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"},{\"queryColumn\":\"dim2\",\"outputColumn\":\"dim2\"},{\"queryColumn\":\"dim3\",\"outputColumn\":\"dim3\"},{\"queryColumn\":\"cnt\",\"outputColumn\":\"cnt\"},{\"queryColumn\":\"m1\",\"outputColumn\":\"m1\"},{\"queryColumn\":\"m2\",\"outputColumn\":\"m2\"},{\"queryColumn\":\"unique_dim1\",\"outputColumn\":\"unique_dim1\"}]}],"
+ "\"context\":{\"__resultFormat\":\"object\",\"executionMode\":\"ASYNC\",\"scanSignature\":\"[{\\\"name\\\":\\\"__time\\\",\\\"type\\\":\\\"LONG\\\"},{\\\"name\\\":\\\"cnt\\\",\\\"type\\\":\\\"LONG\\\"},{\\\"name\\\":\\\"dim1\\\",\\\"type\\\":\\\"STRING\\\"},{\\\"name\\\":\\\"dim2\\\",\\\"type\\\":\\\"STRING\\\"},{\\\"name\\\":\\\"dim3\\\",\\\"type\\\":\\\"STRING\\\"},{\\\"name\\\":\\\"m1\\\",\\\"type\\\":\\\"FLOAT\\\"},{\\\"name\\\":\\\"m2\\\",\\\"type\\\":\\\"DOUBLE\\\"},{\\\"name\\\":\\\"unique_dim1\\\",\\\"type\\\":\\\"COMPLEX<hyperUnique>\\\"}]\",\"sqlQueryId\":\"queryId\"},\"columnTypes\":[\"LONG\",\"LONG\",\"STRING\",\"STRING\",\"STRING\",\"FLOAT\",\"DOUBLE\",\"COMPLEX<hyperUnique>\"],\"granularity\":{\"type\":\"all\"}},\"signature\":[{\"name\":\"__time\",\"type\":\"LONG\"},{\"name\":\"dim1\",\"type\":\"STRING\"},{\"name\":\"dim2\",\"type\":\"STRING\"},{\"name\":\"dim3\",\"type\":\"STRING\"},{\"name\":\"cnt\",\"type\":\"LONG\"},{\"name\":\"m1\",\"type\":\"FLOAT\"},{\"name\":\"m2\",\"type\":\"DOUBLE\"},{\"name\":\"unique_dim1\",\"type\":\"COMPLEX<hyperUnique>\"}],\"columnMappings\":[{\"queryColumn\":\"__time\",\"outputColumn\":\"__time\"},{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"},{\"queryColumn\":\"dim2\",\"outputColumn\":\"dim2\"},{\"queryColumn\":\"dim3\",\"outputColumn\":\"dim3\"},{\"queryColumn\":\"cnt\",\"outputColumn\":\"cnt\"},{\"queryColumn\":\"m1\",\"outputColumn\":\"m1\"},{\"queryColumn\":\"m2\",\"outputColumn\":\"m2\"},{\"queryColumn\":\"unique_dim1\",\"outputColumn\":\"unique_dim1\"}]}],"
+ " RESOURCES=[{\"name\":\"foo\",\"type\":\"DATASOURCE\"}],"
+ " ATTRIBUTES={\"statementType\":\"SELECT\"}}",
String.valueOf(SqlStatementResourceTest.getResultRowsFromResponse(response).get(0))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ List<ScanResultValue> querySegment(DataSegment dataSegment, List<String> columns
null,
columns,
false,
null,
null
)
)
Expand Down
20 changes: 18 additions & 2 deletions processing/src/main/java/org/apache/druid/query/Druids.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.apache.druid.query.timeseries.TimeseriesQuery;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnType;
import org.joda.time.Interval;

import javax.annotation.Nullable;
Expand Down Expand Up @@ -826,6 +827,7 @@ public static class ScanQueryBuilder
private Boolean legacy;
private ScanQuery.Order order;
private List<ScanQuery.OrderBy> orderBy;
private List<ColumnType> columnTypes = null;

public ScanQuery build()
{
Expand All @@ -842,7 +844,8 @@ public ScanQuery build()
dimFilter,
columns,
legacy,
context
context,
columnTypes
);
}

Expand All @@ -860,7 +863,8 @@ public static ScanQueryBuilder copy(ScanQuery query)
.columns(query.getColumns())
.legacy(query.isLegacy())
.context(query.getContext())
.orderBy(query.getOrderBys());
.orderBy(query.getOrderBys())
.columnTypes(query.getColumnTypes());
}

public ScanQueryBuilder dataSource(String ds)
Expand Down Expand Up @@ -972,6 +976,18 @@ public ScanQueryBuilder orderBy(List<ScanQuery.OrderBy> orderBys)
this.orderBy = orderBys;
return this;
}

public ScanQueryBuilder columnTypes(List<ColumnType> columnTypes)
{
this.columnTypes = columnTypes;
return this;
}

public ScanQueryBuilder columnTypes(ColumnType... columnType)
{
this.columnTypes = Arrays.asList(columnType);
return this;
}
}

public static ScanQueryBuilder newScanQueryBuilder()
Expand Down
102 changes: 101 additions & 1 deletion processing/src/main/java/org/apache/druid/query/scan/ScanQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,18 @@
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.Druids;
import org.apache.druid.query.InlineDataSource;
import org.apache.druid.query.Queries;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.operator.OffsetLimit;
import org.apache.druid.query.spec.QuerySegmentSpec;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.column.RowSignature.Builder;

import javax.annotation.Nullable;
import java.util.Collections;
Expand Down Expand Up @@ -186,6 +192,7 @@ public static Order fromString(String name)
private final List<OrderBy> orderBys;
private final Integer maxRowsQueuedForOrdering;
private final Integer maxSegmentPartitionsOrderedInMemory;
private final List<ColumnType> columnTypes;

@JsonCreator
public ScanQuery(
Expand All @@ -201,7 +208,8 @@ public ScanQuery(
@JsonProperty("filter") DimFilter dimFilter,
@JsonProperty("columns") List<String> columns,
@JsonProperty("legacy") Boolean legacy,
@JsonProperty("context") Map<String, Object> context
@JsonProperty("context") Map<String, Object> context,
@JsonProperty("columnTypes") List<ColumnType> columnTypes
)
{
super(dataSource, querySegmentSpec, false, context);
Expand All @@ -225,6 +233,18 @@ public ScanQuery(
this.dimFilter = dimFilter;
this.columns = columns;
this.legacy = legacy;
this.columnTypes = columnTypes;

if (columnTypes != null) {
Preconditions.checkNotNull(columns, "columns may not be null if columnTypes are specified");
if (columns.size() != columnTypes.size()) {
throw new IAE(
"Inconsistent number of columns[%d] and columnTypes[%d] specified!",
columns.size(),
columnTypes.size()
);
}
}

final Pair<List<OrderBy>, Order> ordering = verifyAndReconcileOrdering(orderBysFromUser, orderFromUser);
this.orderBys = Preconditions.checkNotNull(ordering.lhs);
Expand Down Expand Up @@ -418,6 +438,14 @@ public List<String> getColumns()
return columns;
}

@Nullable
@JsonProperty
@JsonInclude(JsonInclude.Include.NON_EMPTY)
public List<ColumnType> getColumnTypes()
{
return columnTypes;
}

/**
* Compatibility mode with the legacy scan-query extension.
*
Expand Down Expand Up @@ -674,4 +702,76 @@ public boolean equals(Object obj)
}
}


/**
* Returns the RowSignature.
*
* If {@link ScanQuery#columnTypes} is not available it will do its best to fill in the types.
*/
@Nullable
public RowSignature getRowSignature()
{
return getRowSignature(false);
}

@Nullable
public RowSignature getRowSignature(boolean defaultIsLegacy)
{
if (columns == null || columns.isEmpty()) {
// Note: if no specific list of columns is provided, then since we can't predict what columns will come back, we
// unfortunately can't do array-based results. In this case, there is a major difference between standard and
// array-based results: the standard results will detect and return _all_ columns, whereas the array-based results
// will include none of them.
return RowSignature.empty();
}
if (columnTypes != null) {
Builder builder = RowSignature.builder();
for (int i = 0; i < columnTypes.size(); i++) {
builder.add(columns.get(i), columnTypes.get(i));
}
return builder.build();
}
return guessRowSignature(defaultIsLegacy);
}

private RowSignature guessRowSignature(boolean defaultIsLegacy)
{
final RowSignature.Builder builder = RowSignature.builder();
if (Boolean.TRUE.equals(legacy) || (legacy == null && defaultIsLegacy)) {
builder.add(ScanQueryEngine.LEGACY_TIMESTAMP_KEY, null);
}
DataSource dataSource = getDataSource();
for (String columnName : columns) {
final ColumnType columnType = guessColumnType(columnName, virtualColumns, dataSource);
builder.add(columnName, columnType);
}
return builder.build();
}

/**
* Tries to guess the {@link ColumnType} from the {@link VirtualColumns} and the {@link DataSource}.
*
* We know the columnType for virtual columns and in some cases the columntypes of the datasource as well.
*/
@Nullable
private static ColumnType guessColumnType(String columnName, VirtualColumns virtualColumns, DataSource dataSource)
{
final VirtualColumn virtualColumn = virtualColumns.getVirtualColumn(columnName);
if (virtualColumn != null) {
final ColumnCapabilities capabilities = virtualColumn.capabilities(c -> null, columnName);
if (capabilities != null) {
return capabilities.toColumnType();
}
} else {
if (dataSource instanceof InlineDataSource) {
InlineDataSource inlineDataSource = (InlineDataSource) dataSource;
ColumnCapabilities caps = inlineDataSource.getRowSignature().getColumnCapabilities(columnName);
if (caps != null) {
return caps.toColumnType();
}
}
}
// Unknown type. In the future, it would be nice to have a way to fill these in.
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,25 +42,18 @@
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.FrameSignaturePair;
import org.apache.druid.query.GenericQueryMetricsFactory;
import org.apache.druid.query.InlineDataSource;
import org.apache.druid.query.IterableRowsCursorHelper;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryMetrics;
import org.apache.druid.query.QueryRunner;
import org.apache.druid.query.QueryToolChest;
import org.apache.druid.query.aggregation.MetricManipulationFn;
import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.utils.CloseableUtils;

import javax.annotation.Nullable;

import java.io.Closeable;
import java.util.ArrayList;
import java.util.Iterator;
Expand Down Expand Up @@ -178,50 +171,8 @@ public QueryRunner<ScanResultValue> preMergeQueryDecoration(final QueryRunner<Sc
@Override
public RowSignature resultArraySignature(final ScanQuery query)
{
if (query.getColumns() == null || query.getColumns().isEmpty()) {
// Note: if no specific list of columns is provided, then since we can't predict what columns will come back, we
// unfortunately can't do array-based results. In this case, there is a major difference between standard and
// array-based results: the standard results will detect and return _all_ columns, whereas the array-based results
// will include none of them.
return RowSignature.empty();
} else {
final RowSignature.Builder builder = RowSignature.builder();

if (query.withNonNullLegacy(scanQueryConfig).isLegacy()) {
builder.add(ScanQueryEngine.LEGACY_TIMESTAMP_KEY, null);
}

for (String columnName : query.getColumns()) {
// With the Scan query we only know the columnType for virtual columns. Let's report those, at least.
final ColumnType columnType;

final VirtualColumn virtualColumn = query.getVirtualColumns().getVirtualColumn(columnName);
if (virtualColumn != null) {
final ColumnCapabilities capabilities = virtualColumn.capabilities(c -> null, columnName);
columnType = capabilities != null ? capabilities.toColumnType() : null;
} else {
columnType = getDataSourceColumnType(query.getDataSource(), columnName);
}

builder.add(columnName, columnType);
}

return builder.build();
}
}

@Nullable
private ColumnType getDataSourceColumnType(DataSource dataSource, String columnName)
{
if (dataSource instanceof InlineDataSource) {
InlineDataSource inlineDataSource = (InlineDataSource) dataSource;
ColumnCapabilities caps = inlineDataSource.getRowSignature().getColumnCapabilities(columnName);
if (caps != null) {
return caps.toColumnType();
}
}
// Unknown type. In the future, it would be nice to have a way to fill these in.
return null;
boolean defaultIsLegacy = scanQueryConfig.isLegacy();
return query.getRowSignature(defaultIsLegacy);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,4 +392,34 @@ public enum Finalization
*/
UNKNOWN
}

/**
* Builds a safe {@link RowSignature}.
*
* The new rowsignature will not contain `null` types - they will be replaced by STRING.
*/
public RowSignature buildSafeSignature(ImmutableList<String> requestedColumnNames)
{
Builder builder = new Builder();
for (String columnName : requestedColumnNames) {
ColumnType columnType = columnTypes.get(columnName);
if (columnType == null) {
columnType = ColumnType.STRING;
}
builder.add(columnName, columnType);
}
return builder.build();
}

/**
* Returns the column types in the order they are in.
*/
public List<ColumnType> getColumnTypes()
{
List<ColumnType> ret = new ArrayList<ColumnType>();
for (String colName : columnNames) {
ret.add(columnTypes.get(colName));
}
return ret;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ public void testSerialization() throws Exception
null,
Arrays.asList("market", "quality", "index"),
null,
null,
null
);

Expand Down Expand Up @@ -101,6 +102,7 @@ public void testSerializationWithTimeOrder() throws Exception
null,
Arrays.asList("market", "quality", "index", "__time"),
null,
null,
null
);

Expand Down Expand Up @@ -139,6 +141,7 @@ public void testSerializationWithOrderBy() throws Exception
null,
Arrays.asList("market", "quality", "index", "__time"),
null,
null,
null
);

Expand Down Expand Up @@ -168,6 +171,7 @@ public void testSerializationLegacyString() throws Exception
null,
Arrays.asList("market", "quality", "index"),
null,
null,
null
);

Expand Down
Loading

0 comments on commit 06deda9

Please sign in to comment.