From dc8d2192c340910f8b75fd0b3bca16a3dfbb0b88 Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Mon, 16 Oct 2023 10:37:32 +0530 Subject: [PATCH] Introduce natural comparator for types that don't have a StringComparator (#15145) Fixes a bug when executing queries with the ordering of arrays --- .../msq/querykit/groupby/GroupByQueryKit.java | 9 + .../apache/druid/msq/exec/MSQArraysTest.java | 334 +++++++++++++----- .../apache/druid/msq/exec/MSQSelectTest.java | 3 - .../druid/jackson/StringComparatorModule.java | 3 +- .../druid/query/filter/BoundDimFilter.java | 3 +- .../GrouperBufferComparatorUtils.java | 7 +- .../epinephelinae/RowBasedGrouperHelper.java | 3 +- .../query/ordering/StringComparator.java | 2 + .../query/ordering/StringComparators.java | 56 ++- .../query/ordering/StringComparatorsTest.java | 55 ++- .../druid/sql/calcite/filtration/Bounds.java | 2 +- .../druid/sql/calcite/planner/Calcites.java | 9 +- .../sql/calcite/planner/CalcitesTest.java | 16 + 13 files changed, 380 insertions(+), 122 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByQueryKit.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByQueryKit.java index 4ea56f73881d..b12048a315f2 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByQueryKit.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByQueryKit.java @@ -311,8 +311,17 @@ private static void validateQuery(final GroupByQuery query) } } + /** + * Only allow ordering the queries from the MSQ engine, ignoring the comparator that is set in the query. This + * function checks if it is safe to do so, which is the case if the natural comparator is used for the dimension. + * Since MSQ executes the queries planned by the SQL layer, this is a sanity check as we always add the natural + * comparator for the dimensions there + */ private static boolean isNaturalComparator(final ValueType type, final StringComparator comparator) { + if (StringComparators.NATURAL.equals(comparator)) { + return true; + } return ((type == ValueType.STRING && StringComparators.LEXICOGRAPHIC.equals(comparator)) || (type.isNumeric() && StringComparators.NUMERIC.equals(comparator))) && !type.isArray(); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java index d2696f232820..2b152cfbe1c4 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java @@ -30,9 +30,11 @@ import org.apache.druid.msq.indexing.destination.TaskReportMSQDestination; import org.apache.druid.msq.test.MSQTestBase; import org.apache.druid.msq.util.MultiStageQueryContext; +import org.apache.druid.query.DataSource; import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.Query; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.scan.ScanQuery; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; @@ -43,6 +45,7 @@ import org.apache.druid.timeline.SegmentId; import org.apache.druid.utils.CompressionUtils; import org.hamcrest.CoreMatchers; +import org.junit.Before; import org.junit.Test; import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.runner.RunWith; @@ -67,6 +70,9 @@ @RunWith(Parameterized.class) public class MSQArraysTest extends MSQTestBase { + private String dataFileNameJsonString; + private String dataFileSignatureJsonString; + private DataSource dataFileExternalDataSource; @Parameterized.Parameters(name = "{index}:with context {0}") public static Collection data() @@ -86,6 +92,40 @@ public static Collection data() @Parameterized.Parameter(1) public Map context; + @Before + public void setup() throws IOException + { + // Read the file and make the name available to the tests + File dataFile = temporaryFolder.newFile(); + final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader() + .getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); + final InputStream decompressing = CompressionUtils.decompress( + resourceStream, + NestedDataTestUtils.ARRAY_TYPES_DATA_FILE + ); + Files.copy(decompressing, dataFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + decompressing.close(); + + dataFileNameJsonString = queryFramework().queryJsonMapper().writeValueAsString(dataFile); + + RowSignature dataFileSignature = RowSignature.builder() + .add("timestamp", ColumnType.STRING) + .add("arrayString", ColumnType.STRING_ARRAY) + .add("arrayStringNulls", ColumnType.STRING_ARRAY) + .add("arrayLong", ColumnType.LONG_ARRAY) + .add("arrayLongNulls", ColumnType.LONG_ARRAY) + .add("arrayDouble", ColumnType.DOUBLE_ARRAY) + .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) + .build(); + dataFileSignatureJsonString = queryFramework().queryJsonMapper().writeValueAsString(dataFileSignature); + + dataFileExternalDataSource = new ExternalDataSource( + new LocalInputSource(null, null, ImmutableList.of(dataFile)), + new JsonInputFormat(null, null, null, null, null), + dataFileSignature + ); + } + /** * Tests the behaviour of INSERT query when arrayIngestMode is set to none (default) and the user tries to ingest * string arrays @@ -135,7 +175,7 @@ public void testInsertOnFoo1WithMultiValueToArrayGroupByWithDefaultContext() * Tests the INSERT query when 'auto' type is set */ @Test - public void testInsertArraysAutoType() throws IOException + public void testInsertArraysAutoType() { List expectedRows = Arrays.asList( new Object[]{1672531200000L, null, null, null}, @@ -164,18 +204,6 @@ public void testInsertArraysAutoType() throws IOException final Map adjustedContext = new HashMap<>(context); adjustedContext.put(MultiStageQueryContext.CTX_USE_AUTO_SCHEMAS, true); - final File tmpFile = temporaryFolder.newFile(); - final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader() - .getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); - final InputStream decompressing = CompressionUtils.decompress( - resourceStream, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE - ); - Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - decompressing.close(); - - final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile); - testIngestQuery().setSql(" INSERT INTO foo1 SELECT\n" + " TIME_PARSE(\"timestamp\") as __time,\n" + " arrayString,\n" @@ -183,7 +211,7 @@ public void testInsertArraysAutoType() throws IOException + " arrayDouble\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" + " '[{\"name\": \"timestamp\", \"type\": \"STRING\"}, {\"name\": \"arrayString\", \"type\": \"COMPLEX\"}, {\"name\": \"arrayLong\", \"type\": \"COMPLEX\"}, {\"name\": \"arrayDouble\", \"type\": \"COMPLEX\"}]'\n" + " )\n" @@ -200,39 +228,11 @@ public void testInsertArraysAutoType() throws IOException * types as well */ @Test - public void testInsertArraysWithStringArraysAsMVDs() throws IOException + public void testInsertArraysWithStringArraysAsMVDs() { - RowSignature rowSignatureWithoutTimeAndStringColumns = - RowSignature.builder() - .add("arrayLong", ColumnType.LONG_ARRAY) - .add("arrayLongNulls", ColumnType.LONG_ARRAY) - .add("arrayDouble", ColumnType.DOUBLE_ARRAY) - .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) - .build(); - - - RowSignature fileSignature = RowSignature.builder() - .add("timestamp", ColumnType.STRING) - .add("arrayString", ColumnType.STRING_ARRAY) - .add("arrayStringNulls", ColumnType.STRING_ARRAY) - .addAll(rowSignatureWithoutTimeAndStringColumns) - .build(); - final Map adjustedContext = new HashMap<>(context); adjustedContext.put(MultiStageQueryContext.CTX_ARRAY_INGEST_MODE, "mvd"); - final File tmpFile = temporaryFolder.newFile(); - final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader() - .getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); - final InputStream decompressing = CompressionUtils.decompress( - resourceStream, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE - ); - Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - decompressing.close(); - - final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile); - testIngestQuery().setSql(" INSERT INTO foo1 SELECT\n" + " TIME_PARSE(\"timestamp\") as __time,\n" + " arrayString,\n" @@ -243,9 +243,9 @@ public void testInsertArraysWithStringArraysAsMVDs() throws IOException + " arrayDoubleNulls\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" - + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " '" + dataFileSignatureJsonString + "'\n" + " )\n" + ") PARTITIONED BY ALL") .setQueryContext(adjustedContext) @@ -262,7 +262,7 @@ public void testInsertArraysWithStringArraysAsMVDs() throws IOException * array types */ @Test - public void testInsertArraysAsArrays() throws IOException + public void testInsertArraysAsArrays() { final List expectedRows = Arrays.asList( new Object[]{ @@ -403,11 +403,6 @@ public void testInsertArraysAsArrays() throws IOException .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) .build(); - RowSignature fileSignature = RowSignature.builder() - .add("timestamp", ColumnType.STRING) - .addAll(rowSignatureWithoutTimeColumn) - .build(); - RowSignature rowSignature = RowSignature.builder() .add("__time", ColumnType.LONG) .addAll(rowSignatureWithoutTimeColumn) @@ -416,18 +411,6 @@ public void testInsertArraysAsArrays() throws IOException final Map adjustedContext = new HashMap<>(context); adjustedContext.put(MultiStageQueryContext.CTX_ARRAY_INGEST_MODE, "array"); - final File tmpFile = temporaryFolder.newFile(); - final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader() - .getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); - final InputStream decompressing = CompressionUtils.decompress( - resourceStream, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE - ); - Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - decompressing.close(); - - final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile); - testIngestQuery().setSql(" INSERT INTO foo1 SELECT\n" + " TIME_PARSE(\"timestamp\") as __time,\n" + " arrayString,\n" @@ -438,9 +421,9 @@ public void testInsertArraysAsArrays() throws IOException + " arrayDoubleNulls\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" - + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " '" + dataFileSignatureJsonString + "'\n" + " )\n" + ") PARTITIONED BY ALL") .setQueryContext(adjustedContext) @@ -451,26 +434,26 @@ public void testInsertArraysAsArrays() throws IOException } @Test - public void testSelectOnArraysWithArrayIngestModeAsNone() throws IOException + public void testSelectOnArraysWithArrayIngestModeAsNone() { testSelectOnArrays("none"); } @Test - public void testSelectOnArraysWithArrayIngestModeAsMVD() throws IOException + public void testSelectOnArraysWithArrayIngestModeAsMVD() { testSelectOnArrays("mvd"); } @Test - public void testSelectOnArraysWithArrayIngestModeAsArray() throws IOException + public void testSelectOnArraysWithArrayIngestModeAsArray() { testSelectOnArrays("array"); } // Tests the behaviour of the select with the given arrayIngestMode. The expectation should be the same, since the // arrayIngestMode should only determine how the array gets ingested at the end. - public void testSelectOnArrays(String arrayIngestMode) throws IOException + public void testSelectOnArrays(String arrayIngestMode) { final List expectedRows = Arrays.asList( new Object[]{ @@ -611,11 +594,6 @@ public void testSelectOnArrays(String arrayIngestMode) throws IOException .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) .build(); - RowSignature fileSignature = RowSignature.builder() - .add("timestamp", ColumnType.STRING) - .addAll(rowSignatureWithoutTimeColumn) - .build(); - RowSignature rowSignature = RowSignature.builder() .add("__time", ColumnType.LONG) .addAll(rowSignatureWithoutTimeColumn) @@ -634,24 +612,8 @@ public void testSelectOnArrays(String arrayIngestMode) throws IOException final Map adjustedContext = new HashMap<>(context); adjustedContext.put(MultiStageQueryContext.CTX_ARRAY_INGEST_MODE, arrayIngestMode); - final File tmpFile = temporaryFolder.newFile(); - final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader() - .getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); - final InputStream decompressing = CompressionUtils.decompress( - resourceStream, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE - ); - Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - decompressing.close(); - - final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile); - Query expectedQuery = newScanQueryBuilder() - .dataSource(new ExternalDataSource( - new LocalInputSource(null, null, ImmutableList.of(tmpFile)), - new JsonInputFormat(null, null, null, null, null), - fileSignature - )) + .dataSource(dataFileExternalDataSource) .intervals(querySegmentSpec(Filtration.eternity())) .columns( "arrayDouble", @@ -681,9 +643,9 @@ public void testSelectOnArrays(String arrayIngestMode) throws IOException + " arrayDoubleNulls\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" - + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " '" + dataFileSignatureJsonString + "'\n" + " )\n" + ")") .setQueryContext(adjustedContext) @@ -708,6 +670,192 @@ public void testSelectOnArrays(String arrayIngestMode) throws IOException .verifyResults(); } + @Test + public void testScanWithOrderByOnStringArray() + { + final List expectedRows = Arrays.asList( + new Object[]{Arrays.asList("d", "e")}, + new Object[]{Arrays.asList("d", "e")}, + new Object[]{Arrays.asList("b", "c")}, + new Object[]{Arrays.asList("b", "c")}, + new Object[]{Arrays.asList("a", "b", "c")}, + new Object[]{Arrays.asList("a", "b", "c")}, + new Object[]{Arrays.asList("a", "b")}, + new Object[]{Arrays.asList("a", "b")}, + new Object[]{Arrays.asList("a", "b")}, + new Object[]{Arrays.asList("a", "b")}, + new Object[]{null}, + new Object[]{null}, + new Object[]{null}, + new Object[]{null} + ); + + + RowSignature rowSignature = RowSignature.builder() + .add("arrayString", ColumnType.STRING_ARRAY) + .build(); + + RowSignature scanSignature = RowSignature.builder() + .add("arrayString", ColumnType.STRING_ARRAY) + .build(); + + Query expectedQuery = newScanQueryBuilder() + .dataSource(dataFileExternalDataSource) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("arrayString") + .orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayString", ScanQuery.Order.DESCENDING))) + .context(defaultScanQueryContext(context, scanSignature)) + .build(); + + testSelectQuery().setSql("SELECT\n" + + " arrayString\n" + + "FROM TABLE(\n" + + " EXTERN(\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + + " '{\"type\": \"json\"}',\n" + + " '" + dataFileSignatureJsonString + "'\n" + + " )\n" + + ")\n" + + "ORDER BY arrayString DESC") + .setQueryContext(context) + .setExpectedMSQSpec(MSQSpec + .builder() + .query(expectedQuery) + .columnMappings(new ColumnMappings(ImmutableList.of( + new ColumnMapping("arrayString", "arrayString") + ))) + .tuningConfig(MSQTuningConfig.defaultConfig()) + .destination(TaskReportMSQDestination.INSTANCE) + .build() + ) + .setExpectedRowSignature(rowSignature) + .setExpectedResultRows(expectedRows) + .verifyResults(); + } + + @Test + public void testScanWithOrderByOnLongArray() + { + final List expectedRows = Arrays.asList( + new Object[]{null}, + new Object[]{null}, + new Object[]{null}, + new Object[]{null}, + new Object[]{Arrays.asList(1L, 2L, 3L)}, + new Object[]{Arrays.asList(1L, 2L, 3L)}, + new Object[]{Arrays.asList(1L, 2L, 3L)}, + new Object[]{Arrays.asList(1L, 2L, 3L)}, + new Object[]{Arrays.asList(1L, 2L, 3L, 4L)}, + new Object[]{Arrays.asList(1L, 2L, 3L, 4L)}, + new Object[]{Arrays.asList(1L, 4L)}, + new Object[]{Arrays.asList(1L, 4L)}, + new Object[]{Arrays.asList(2L, 3L)}, + new Object[]{Arrays.asList(2L, 3L)} + ); + + RowSignature rowSignature = RowSignature.builder() + .add("arrayLong", ColumnType.LONG_ARRAY) + .build(); + + RowSignature scanSignature = RowSignature.builder() + .add("arrayLong", ColumnType.LONG_ARRAY) + .build(); + + Query expectedQuery = newScanQueryBuilder() + .dataSource(dataFileExternalDataSource) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("arrayLong") + .orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayLong", ScanQuery.Order.ASCENDING))) + .context(defaultScanQueryContext(context, scanSignature)) + .build(); + + testSelectQuery().setSql("SELECT\n" + + " arrayLong\n" + + "FROM TABLE(\n" + + " EXTERN(\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + + " '{\"type\": \"json\"}',\n" + + " '" + dataFileSignatureJsonString + "'\n" + + " )\n" + + ")\n" + + "ORDER BY arrayLong") + .setQueryContext(context) + .setExpectedMSQSpec(MSQSpec + .builder() + .query(expectedQuery) + .columnMappings(new ColumnMappings(ImmutableList.of( + new ColumnMapping("arrayLong", "arrayLong") + ))) + .tuningConfig(MSQTuningConfig.defaultConfig()) + .destination(TaskReportMSQDestination.INSTANCE) + .build() + ) + .setExpectedRowSignature(rowSignature) + .setExpectedResultRows(expectedRows) + .verifyResults(); + } + + @Test + public void testScanWithOrderByOnDoubleArray() + { + final List expectedRows = Arrays.asList( + new Object[]{null}, + new Object[]{null}, + new Object[]{null}, + new Object[]{null}, + new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)}, + new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)}, + new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)}, + new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)}, + new Object[]{Arrays.asList(1.1d, 3.3d)}, + new Object[]{Arrays.asList(1.1d, 3.3d)}, + new Object[]{Arrays.asList(2.2d, 3.3d, 4.0d)}, + new Object[]{Arrays.asList(2.2d, 3.3d, 4.0d)}, + new Object[]{Arrays.asList(3.3d, 4.4d, 5.5d)}, + new Object[]{Arrays.asList(3.3d, 4.4d, 5.5d)} + ); + + RowSignature rowSignature = RowSignature.builder() + .add("arrayDouble", ColumnType.DOUBLE_ARRAY) + .build(); + + RowSignature scanSignature = RowSignature.builder() + .add("arrayDouble", ColumnType.DOUBLE_ARRAY) + .build(); + + Query expectedQuery = newScanQueryBuilder() + .dataSource(dataFileExternalDataSource) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("arrayDouble") + .orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayDouble", ScanQuery.Order.ASCENDING))) + .context(defaultScanQueryContext(context, scanSignature)) + .build(); + + testSelectQuery().setSql("SELECT\n" + + " arrayDouble\n" + + "FROM TABLE(\n" + + " EXTERN(\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + + " '{\"type\": \"json\"}',\n" + + " '" + dataFileSignatureJsonString + "'\n" + + " )\n" + + ")\n" + + "ORDER BY arrayDouble") + .setQueryContext(context) + .setExpectedMSQSpec(MSQSpec + .builder() + .query(expectedQuery) + .columnMappings(new ColumnMappings(ImmutableList.of( + new ColumnMapping("arrayDouble", "arrayDouble") + ))) + .tuningConfig(MSQTuningConfig.defaultConfig()) + .destination(TaskReportMSQDestination.INSTANCE) + .build() + ) + .setExpectedRowSignature(rowSignature) + .setExpectedResultRows(expectedRows) + .verifyResults(); + } private List expectedMultiValueFooRowsToArray() { diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java index 30bbb850fbcd..219af6a31883 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java @@ -89,7 +89,6 @@ import org.mockito.ArgumentMatchers; import org.mockito.Mockito; -import javax.annotation.Nonnull; import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -2534,7 +2533,6 @@ public void testUnionAllUsingUnionDataSource() .verifyResults(); } - @Nonnull private List expectedMultiValueFooRowsGroup() { ArrayList expected = new ArrayList<>(); @@ -2553,7 +2551,6 @@ private List expectedMultiValueFooRowsGroup() return expected; } - @Nonnull private List expectedMultiValueFooRowsGroupByList() { ArrayList expected = new ArrayList<>(); diff --git a/processing/src/main/java/org/apache/druid/jackson/StringComparatorModule.java b/processing/src/main/java/org/apache/druid/jackson/StringComparatorModule.java index 4b7ea2953db1..d0e1f1128bfa 100644 --- a/processing/src/main/java/org/apache/druid/jackson/StringComparatorModule.java +++ b/processing/src/main/java/org/apache/druid/jackson/StringComparatorModule.java @@ -37,7 +37,8 @@ public StringComparatorModule() new NamedType(StringComparators.AlphanumericComparator.class, StringComparators.ALPHANUMERIC_NAME), new NamedType(StringComparators.StrlenComparator.class, StringComparators.STRLEN_NAME), new NamedType(StringComparators.NumericComparator.class, StringComparators.NUMERIC_NAME), - new NamedType(StringComparators.VersionComparator.class, StringComparators.VERSION_NAME) + new NamedType(StringComparators.VersionComparator.class, StringComparators.VERSION_NAME), + new NamedType(StringComparators.NaturalComparator.class, StringComparators.NATURAL_NAME) ); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java index 17cecc411511..412ec7e57265 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java @@ -100,7 +100,8 @@ public BoundDimFilter( boolean orderingIsAlphanumeric = this.ordering.equals(StringComparators.ALPHANUMERIC); Preconditions.checkState( alphaNumeric == orderingIsAlphanumeric, - "mismatch between alphanumeric and ordering property"); + "mismatch between alphanumeric and ordering property" + ); } } this.extractionFn = extractionFn; diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java index 08ba4ba2ee6c..109ddb66f3d1 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java @@ -434,6 +434,11 @@ public static Grouper.BufferComparator makeNullHandlingBufferComparatorForNumeri private static boolean isPrimitiveComparable(boolean pushLimitDown, @Nullable StringComparator stringComparator) { - return !pushLimitDown || stringComparator == null || stringComparator.equals(StringComparators.NUMERIC); + return !pushLimitDown + || stringComparator == null + || stringComparator.equals(StringComparators.NUMERIC) + // NATURAL isn't set for numeric types, however if it is, then that would mean that we are ordering the + // numeric type with its natural comparator (which is NUMERIC) + || stringComparator.equals(StringComparators.NATURAL); } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java index c5e561c315f0..8c3e485ef7ca 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java @@ -1116,7 +1116,8 @@ private static int compareDimsInRowsWithAggs( final StringComparator comparator = comparators.get(i); final ColumnType fieldType = fieldTypes.get(i); - if (fieldType.isNumeric() && comparator.equals(StringComparators.NUMERIC)) { + if (fieldType.isNumeric() + && (comparator.equals(StringComparators.NUMERIC) || comparator.equals(StringComparators.NATURAL))) { // use natural comparison if (fieldType.is(ValueType.DOUBLE)) { // sometimes doubles can become floats making the round trip from serde, make sure to coerce them both diff --git a/processing/src/main/java/org/apache/druid/query/ordering/StringComparator.java b/processing/src/main/java/org/apache/druid/query/ordering/StringComparator.java index 8e66baf7df37..44d03adce44f 100644 --- a/processing/src/main/java/org/apache/druid/query/ordering/StringComparator.java +++ b/processing/src/main/java/org/apache/druid/query/ordering/StringComparator.java @@ -41,6 +41,8 @@ public static StringComparator fromString(String type) return StringComparators.NUMERIC; case StringComparators.VERSION_NAME: return StringComparators.VERSION; + case StringComparators.NATURAL_NAME: + return StringComparators.NATURAL; default: throw new IAE("Unknown string comparator[%s]", type); } diff --git a/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java b/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java index 4fdcc5c6f3a4..650b259ed562 100644 --- a/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java +++ b/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java @@ -22,6 +22,8 @@ import com.google.common.collect.Ordering; import com.google.common.primitives.Ints; import org.apache.druid.common.guava.GuavaUtils; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.StringUtils; import org.apache.maven.artifact.versioning.DefaultArtifactVersion; import java.math.BigDecimal; @@ -34,25 +36,28 @@ public class StringComparators public static final String NUMERIC_NAME = "numeric"; public static final String STRLEN_NAME = "strlen"; public static final String VERSION_NAME = "version"; + public static final String NATURAL_NAME = "natural"; public static final StringComparator LEXICOGRAPHIC = new LexicographicComparator(); public static final StringComparator ALPHANUMERIC = new AlphanumericComparator(); public static final StringComparator NUMERIC = new NumericComparator(); public static final StringComparator STRLEN = new StrlenComparator(); public static final StringComparator VERSION = new VersionComparator(); + public static final StringComparator NATURAL = new NaturalComparator(); public static final int LEXICOGRAPHIC_CACHE_ID = 0x01; public static final int ALPHANUMERIC_CACHE_ID = 0x02; public static final int NUMERIC_CACHE_ID = 0x03; public static final int STRLEN_CACHE_ID = 0x04; public static final int VERSION_CACHE_ID = 0x05; + public static final int NATURAL_CACHE_ID = 0x06; /** * Comparison using the natural comparator of {@link String}. * * Note that this is not equivalent to comparing UTF-8 byte arrays; see javadocs for - * {@link org.apache.druid.java.util.common.StringUtils#compareUnicode(String, String)} and - * {@link org.apache.druid.java.util.common.StringUtils#compareUtf8UsingJavaStringOrdering(byte[], byte[])}. + * {@link StringUtils#compareUnicode(String, String)} and + * {@link StringUtils#compareUtf8UsingJavaStringOrdering(byte[], byte[])}. */ public static class LexicographicComparator extends StringComparator { @@ -492,4 +497,51 @@ public byte[] getCacheKey() return new byte[]{(byte) VERSION_CACHE_ID}; } } + + /** + * NaturalComparator refers to the natural ordering of the type that it refers. + * + * For example, if the type is Long, the natural ordering would be numeric + * if the type is an array, the natural ordering would be lexicographic comparison of the natural ordering of the + * elements in the arrays. + * + * It is a sigil value for the dimension that we can handle in the execution layer, and don't need the comparator for. + * It is also a placeholder for dimensions that we don't have a comparator for (like arrays), but is a required for + * planning + */ + public static class NaturalComparator extends StringComparator + { + @Override + public int compare(String o1, String o2) + { + throw DruidException.defensive("compare() should not be called for the NaturalComparator"); + } + + @Override + public String toString() + { + return StringComparators.NATURAL_NAME; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + return o != null && getClass() == o.getClass(); + } + + @Override + public int hashCode() + { + return 0; + } + + @Override + public byte[] getCacheKey() + { + return new byte[]{(byte) NATURAL_CACHE_ID}; + } + } } diff --git a/processing/src/test/java/org/apache/druid/query/ordering/StringComparatorsTest.java b/processing/src/test/java/org/apache/druid/query/ordering/StringComparatorsTest.java index 7fa0a3b82ca6..db80a598802c 100644 --- a/processing/src/test/java/org/apache/druid/query/ordering/StringComparatorsTest.java +++ b/processing/src/test/java/org/apache/druid/query/ordering/StringComparatorsTest.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import org.apache.druid.error.DruidException; import org.apache.druid.jackson.DefaultObjectMapper; import org.junit.Assert; import org.junit.Test; @@ -33,6 +34,8 @@ public class StringComparatorsTest { + private static final ObjectMapper JSON_MAPPER = new DefaultObjectMapper(); + private void commonTest(StringComparator comparator) { // equality test @@ -156,65 +159,83 @@ public void testVersionComparator() Assert.assertTrue(StringComparators.VERSION.compare("1.0-SNAPSHOT", "1.0-Final") < 0); } + @Test + public void testNaturalComparator() + { + Assert.assertThrows(DruidException.class, () -> StringComparators.NATURAL.compare("str1", "str2")); + } + @Test public void testLexicographicComparatorSerdeTest() throws IOException { - ObjectMapper jsonMapper = new DefaultObjectMapper(); String expectJsonSpec = "{\"type\":\"lexicographic\"}"; - String jsonSpec = jsonMapper.writeValueAsString(StringComparators.LEXICOGRAPHIC); + String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.LEXICOGRAPHIC); Assert.assertEquals(expectJsonSpec, jsonSpec); - Assert.assertEquals(StringComparators.LEXICOGRAPHIC, jsonMapper.readValue(expectJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.LEXICOGRAPHIC, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class)); String makeFromJsonSpec = "\"lexicographic\""; Assert.assertEquals( StringComparators.LEXICOGRAPHIC, - jsonMapper.readValue(makeFromJsonSpec, StringComparator.class) + JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class) ); } @Test public void testAlphanumericComparatorSerdeTest() throws IOException { - ObjectMapper jsonMapper = new DefaultObjectMapper(); String expectJsonSpec = "{\"type\":\"alphanumeric\"}"; - String jsonSpec = jsonMapper.writeValueAsString(StringComparators.ALPHANUMERIC); + String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.ALPHANUMERIC); Assert.assertEquals(expectJsonSpec, jsonSpec); - Assert.assertEquals(StringComparators.ALPHANUMERIC, jsonMapper.readValue(expectJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.ALPHANUMERIC, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class)); String makeFromJsonSpec = "\"alphanumeric\""; - Assert.assertEquals(StringComparators.ALPHANUMERIC, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.ALPHANUMERIC, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); } @Test public void testStrlenComparatorSerdeTest() throws IOException { - ObjectMapper jsonMapper = new DefaultObjectMapper(); String expectJsonSpec = "{\"type\":\"strlen\"}"; - String jsonSpec = jsonMapper.writeValueAsString(StringComparators.STRLEN); + String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.STRLEN); Assert.assertEquals(expectJsonSpec, jsonSpec); - Assert.assertEquals(StringComparators.STRLEN, jsonMapper.readValue(expectJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.STRLEN, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class)); String makeFromJsonSpec = "\"strlen\""; - Assert.assertEquals(StringComparators.STRLEN, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.STRLEN, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); } @Test public void testNumericComparatorSerdeTest() throws IOException { - ObjectMapper jsonMapper = new DefaultObjectMapper(); String expectJsonSpec = "{\"type\":\"numeric\"}"; - String jsonSpec = jsonMapper.writeValueAsString(StringComparators.NUMERIC); + String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.NUMERIC); Assert.assertEquals(expectJsonSpec, jsonSpec); - Assert.assertEquals(StringComparators.NUMERIC, jsonMapper.readValue(expectJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.NUMERIC, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class)); String makeFromJsonSpec = "\"numeric\""; - Assert.assertEquals(StringComparators.NUMERIC, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.NUMERIC, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); makeFromJsonSpec = "\"NuMeRiC\""; - Assert.assertEquals(StringComparators.NUMERIC, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.NUMERIC, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); + } + + @Test + public void testNaturalComparatorSerdeTest() throws IOException + { + String expectJsonSpec = "{\"type\":\"natural\"}"; + + String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.NATURAL); + Assert.assertEquals(expectJsonSpec, jsonSpec); + Assert.assertEquals(StringComparators.NATURAL, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class)); + + String makeFromJsonSpec = "\"natural\""; + Assert.assertEquals(StringComparators.NATURAL, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); + + makeFromJsonSpec = "\"NaTuRaL\""; + Assert.assertEquals(StringComparators.NATURAL, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java index b41833d7b332..a1c310522c8f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java @@ -198,7 +198,7 @@ public static BoundDimFilter lessThanOrEqualTo(final BoundRefKey boundRefKey, fi public static BoundDimFilter interval(final BoundRefKey boundRefKey, final Interval interval) { if (!boundRefKey.getComparator().equals(StringComparators.NUMERIC)) { - // Interval comparison only works with NUMERIC comparator. + // Interval comparison only works with NUMERIC comparator throw new ISE("Comparator must be NUMERIC but was[%s]", boundRefKey.getComparator()); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java index 913790fd9441..22b0aacfa05f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java @@ -40,7 +40,6 @@ import org.apache.calcite.util.TimestampString; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.ExpressionProcessing; import org.apache.druid.math.expr.ExpressionProcessingConfig; @@ -208,12 +207,18 @@ public static boolean isLongType(SqlTypeName sqlTypeName) SqlTypeName.INT_TYPES.contains(sqlTypeName); } + /** + * Returns the natural StringComparator associated with the RelDataType + */ public static StringComparator getStringComparatorForRelDataType(RelDataType dataType) { final ColumnType valueType = getColumnTypeForRelDataType(dataType); return getStringComparatorForValueType(valueType); } + /** + * Returns the natural StringComparator associated with the given ColumnType + */ public static StringComparator getStringComparatorForValueType(ColumnType valueType) { if (valueType.isNumeric()) { @@ -221,7 +226,7 @@ public static StringComparator getStringComparatorForValueType(ColumnType valueT } else if (valueType.is(ValueType.STRING)) { return StringComparators.LEXICOGRAPHIC; } else { - throw new ISE("Unrecognized valueType[%s]", valueType); + return StringComparators.NATURAL; } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitesTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitesTest.java index 676c0b7fab60..576a57cc9375 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitesTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitesTest.java @@ -20,6 +20,8 @@ package org.apache.druid.sql.calcite.planner; import com.google.common.collect.ImmutableSortedSet; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.sql.calcite.util.CalciteTestBase; import org.junit.Assert; import org.junit.Test; @@ -52,4 +54,18 @@ public void testFindUnusedPrefix() Assert.assertEquals("x", Calcites.findUnusedPrefixForDigits("x", ImmutableSortedSet.of("foo", "xa", "_x"))); Assert.assertEquals("__x", Calcites.findUnusedPrefixForDigits("x", ImmutableSortedSet.of("foo", "x1a", "_x90"))); } + + @Test + public void testGetStringComparatorForColumnType() + { + Assert.assertEquals(StringComparators.LEXICOGRAPHIC, Calcites.getStringComparatorForValueType(ColumnType.STRING)); + Assert.assertEquals(StringComparators.NUMERIC, Calcites.getStringComparatorForValueType(ColumnType.LONG)); + Assert.assertEquals(StringComparators.NUMERIC, Calcites.getStringComparatorForValueType(ColumnType.FLOAT)); + Assert.assertEquals(StringComparators.NUMERIC, Calcites.getStringComparatorForValueType(ColumnType.DOUBLE)); + Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.STRING_ARRAY)); + Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.LONG_ARRAY)); + Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.DOUBLE_ARRAY)); + Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.NESTED_DATA)); + Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.UNKNOWN_COMPLEX)); + } }