From 77ced5a4f975d8f20a99f4ab8dd8bea100bdadc7 Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Fri, 13 Oct 2023 04:44:35 +0530 Subject: [PATCH 1/4] natural comparator --- .../msq/querykit/groupby/GroupByQueryKit.java | 9 + .../apache/druid/msq/exec/MSQArraysTest.java | 306 +++++++++++++++--- .../apache/druid/msq/exec/MSQSelectTest.java | 3 - .../druid/jackson/StringComparatorModule.java | 3 +- .../druid/query/filter/BoundDimFilter.java | 5 +- .../GrouperBufferComparatorUtils.java | 5 +- .../epinephelinae/RowBasedGrouperHelper.java | 3 +- .../query/ordering/StringComparator.java | 2 + .../query/ordering/StringComparators.java | 56 +++- .../druid/sql/calcite/filtration/Bounds.java | 7 +- .../druid/sql/calcite/planner/Calcites.java | 9 +- 11 files changed, 340 insertions(+), 68 deletions(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByQueryKit.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByQueryKit.java index 4ea56f73881d..b12048a315f2 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByQueryKit.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByQueryKit.java @@ -311,8 +311,17 @@ private static void validateQuery(final GroupByQuery query) } } + /** + * Only allow ordering the queries from the MSQ engine, ignoring the comparator that is set in the query. This + * function checks if it is safe to do so, which is the case if the natural comparator is used for the dimension. + * Since MSQ executes the queries planned by the SQL layer, this is a sanity check as we always add the natural + * comparator for the dimensions there + */ private static boolean isNaturalComparator(final ValueType type, final StringComparator comparator) { + if (StringComparators.NATURAL.equals(comparator)) { + return true; + } return ((type == ValueType.STRING && StringComparators.LEXICOGRAPHIC.equals(comparator)) || (type.isNumeric() && StringComparators.NUMERIC.equals(comparator))) && !type.isArray(); diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java index d2696f232820..91f9fa833f05 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java @@ -33,6 +33,7 @@ import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.Query; import org.apache.druid.query.expression.TestExprMacroTable; +import org.apache.druid.query.scan.ScanQuery; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; @@ -43,6 +44,7 @@ import org.apache.druid.timeline.SegmentId; import org.apache.druid.utils.CompressionUtils; import org.hamcrest.CoreMatchers; +import org.junit.Before; import org.junit.Test; import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.runner.RunWith; @@ -67,6 +69,8 @@ @RunWith(Parameterized.class) public class MSQArraysTest extends MSQTestBase { + private File dataFile; + private String dataFileNameAsJsonString; @Parameterized.Parameters(name = "{index}:with context {0}") public static Collection data() @@ -86,6 +90,23 @@ public static Collection data() @Parameterized.Parameter(1) public Map context; + @Before + public void setup() throws IOException + { + // Read the file and make the name available to the tests + dataFile = temporaryFolder.newFile(); + final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader() + .getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); + final InputStream decompressing = CompressionUtils.decompress( + resourceStream, + NestedDataTestUtils.ARRAY_TYPES_DATA_FILE + ); + Files.copy(decompressing, dataFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + decompressing.close(); + + dataFileNameAsJsonString = queryFramework().queryJsonMapper().writeValueAsString(dataFile); + } + /** * Tests the behaviour of INSERT query when arrayIngestMode is set to none (default) and the user tries to ingest * string arrays @@ -164,18 +185,6 @@ public void testInsertArraysAutoType() throws IOException final Map adjustedContext = new HashMap<>(context); adjustedContext.put(MultiStageQueryContext.CTX_USE_AUTO_SCHEMAS, true); - final File tmpFile = temporaryFolder.newFile(); - final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader() - .getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); - final InputStream decompressing = CompressionUtils.decompress( - resourceStream, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE - ); - Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - decompressing.close(); - - final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile); - testIngestQuery().setSql(" INSERT INTO foo1 SELECT\n" + " TIME_PARSE(\"timestamp\") as __time,\n" + " arrayString,\n" @@ -183,7 +192,7 @@ public void testInsertArraysAutoType() throws IOException + " arrayDouble\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" + " '[{\"name\": \"timestamp\", \"type\": \"STRING\"}, {\"name\": \"arrayString\", \"type\": \"COMPLEX\"}, {\"name\": \"arrayLong\", \"type\": \"COMPLEX\"}, {\"name\": \"arrayDouble\", \"type\": \"COMPLEX\"}]'\n" + " )\n" @@ -221,18 +230,6 @@ public void testInsertArraysWithStringArraysAsMVDs() throws IOException final Map adjustedContext = new HashMap<>(context); adjustedContext.put(MultiStageQueryContext.CTX_ARRAY_INGEST_MODE, "mvd"); - final File tmpFile = temporaryFolder.newFile(); - final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader() - .getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); - final InputStream decompressing = CompressionUtils.decompress( - resourceStream, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE - ); - Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - decompressing.close(); - - final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile); - testIngestQuery().setSql(" INSERT INTO foo1 SELECT\n" + " TIME_PARSE(\"timestamp\") as __time,\n" + " arrayString,\n" @@ -243,7 +240,7 @@ public void testInsertArraysWithStringArraysAsMVDs() throws IOException + " arrayDoubleNulls\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + " )\n" @@ -416,18 +413,6 @@ public void testInsertArraysAsArrays() throws IOException final Map adjustedContext = new HashMap<>(context); adjustedContext.put(MultiStageQueryContext.CTX_ARRAY_INGEST_MODE, "array"); - final File tmpFile = temporaryFolder.newFile(); - final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader() - .getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); - final InputStream decompressing = CompressionUtils.decompress( - resourceStream, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE - ); - Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - decompressing.close(); - - final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile); - testIngestQuery().setSql(" INSERT INTO foo1 SELECT\n" + " TIME_PARSE(\"timestamp\") as __time,\n" + " arrayString,\n" @@ -438,7 +423,7 @@ public void testInsertArraysAsArrays() throws IOException + " arrayDoubleNulls\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + " )\n" @@ -634,21 +619,9 @@ public void testSelectOnArrays(String arrayIngestMode) throws IOException final Map adjustedContext = new HashMap<>(context); adjustedContext.put(MultiStageQueryContext.CTX_ARRAY_INGEST_MODE, arrayIngestMode); - final File tmpFile = temporaryFolder.newFile(); - final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader() - .getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); - final InputStream decompressing = CompressionUtils.decompress( - resourceStream, - NestedDataTestUtils.ARRAY_TYPES_DATA_FILE - ); - Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - decompressing.close(); - - final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile); - Query expectedQuery = newScanQueryBuilder() .dataSource(new ExternalDataSource( - new LocalInputSource(null, null, ImmutableList.of(tmpFile)), + new LocalInputSource(null, null, ImmutableList.of(dataFile)), new JsonInputFormat(null, null, null, null, null), fileSignature )) @@ -681,7 +654,7 @@ public void testSelectOnArrays(String arrayIngestMode) throws IOException + " arrayDoubleNulls\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + " )\n" @@ -708,6 +681,233 @@ public void testSelectOnArrays(String arrayIngestMode) throws IOException .verifyResults(); } + @Test + public void testScanWithOrderByOnStringArray() throws IOException + { + final List expectedRows = Arrays.asList( + new Object[]{Arrays.asList("d", "e")}, + new Object[]{Arrays.asList("d", "e")}, + new Object[]{Arrays.asList("b", "c")}, + new Object[]{Arrays.asList("b", "c")}, + new Object[]{Arrays.asList("a", "b", "c")}, + new Object[]{Arrays.asList("a", "b", "c")}, + new Object[]{Arrays.asList("a", "b")}, + new Object[]{Arrays.asList("a", "b")}, + new Object[]{Arrays.asList("a", "b")}, + new Object[]{Arrays.asList("a", "b")}, + new Object[]{null}, + new Object[]{null}, + new Object[]{null}, + new Object[]{null} + ); + + RowSignature fileSignature = RowSignature.builder() + .add("timestamp", ColumnType.STRING) + .add("arrayString", ColumnType.STRING_ARRAY) + .add("arrayStringNulls", ColumnType.STRING_ARRAY) + .add("arrayLong", ColumnType.LONG_ARRAY) + .add("arrayLongNulls", ColumnType.LONG_ARRAY) + .add("arrayDouble", ColumnType.DOUBLE_ARRAY) + .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) + .build(); + + RowSignature rowSignature = RowSignature.builder() + .add("arrayString", ColumnType.STRING_ARRAY) + .build(); + + RowSignature scanSignature = RowSignature.builder() + .add("arrayString", ColumnType.STRING_ARRAY) + .build(); + + Query expectedQuery = newScanQueryBuilder() + .dataSource(new ExternalDataSource( + new LocalInputSource(null, null, ImmutableList.of(dataFile)), + new JsonInputFormat(null, null, null, null, null), + fileSignature + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("arrayString") + .orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayString", ScanQuery.Order.DESCENDING))) + .context(defaultScanQueryContext(context, scanSignature)) + .build(); + + testSelectQuery().setSql("SELECT\n" + + " arrayString\n" + + "FROM TABLE(\n" + + " EXTERN(\n" + + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + + " '{\"type\": \"json\"}',\n" + + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " )\n" + + ")\n" + + "ORDER BY arrayString DESC") + .setQueryContext(context) + .setExpectedMSQSpec(MSQSpec + .builder() + .query(expectedQuery) + .columnMappings(new ColumnMappings(ImmutableList.of( + new ColumnMapping("arrayString", "arrayString") + ))) + .tuningConfig(MSQTuningConfig.defaultConfig()) + .destination(TaskReportMSQDestination.INSTANCE) + .build() + ) + .setExpectedRowSignature(rowSignature) + .setExpectedResultRows(expectedRows) + .verifyResults(); + } + + @Test + public void testScanWithOrderByOnLongArray() throws IOException + { + final List expectedRows = Arrays.asList( + new Object[]{null}, + new Object[]{null}, + new Object[]{null}, + new Object[]{null}, + new Object[]{Arrays.asList(1L, 2L, 3L)}, + new Object[]{Arrays.asList(1L, 2L, 3L)}, + new Object[]{Arrays.asList(1L, 2L, 3L)}, + new Object[]{Arrays.asList(1L, 2L, 3L)}, + new Object[]{Arrays.asList(1L, 2L, 3L, 4L)}, + new Object[]{Arrays.asList(1L, 2L, 3L, 4L)}, + new Object[]{Arrays.asList(1L, 4L)}, + new Object[]{Arrays.asList(1L, 4L)}, + new Object[]{Arrays.asList(2L, 3L)}, + new Object[]{Arrays.asList(2L, 3L)} + ); + + RowSignature fileSignature = RowSignature.builder() + .add("timestamp", ColumnType.STRING) + .add("arrayString", ColumnType.STRING_ARRAY) + .add("arrayStringNulls", ColumnType.STRING_ARRAY) + .add("arrayLong", ColumnType.LONG_ARRAY) + .add("arrayLongNulls", ColumnType.LONG_ARRAY) + .add("arrayDouble", ColumnType.DOUBLE_ARRAY) + .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) + .build(); + + RowSignature rowSignature = RowSignature.builder() + .add("arrayLong", ColumnType.LONG_ARRAY) + .build(); + + RowSignature scanSignature = RowSignature.builder() + .add("arrayLong", ColumnType.LONG_ARRAY) + .build(); + + Query expectedQuery = newScanQueryBuilder() + .dataSource(new ExternalDataSource( + new LocalInputSource(null, null, ImmutableList.of(dataFile)), + new JsonInputFormat(null, null, null, null, null), + fileSignature + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("arrayLong") + .orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayLong", ScanQuery.Order.ASCENDING))) + .context(defaultScanQueryContext(context, scanSignature)) + .build(); + + testSelectQuery().setSql("SELECT\n" + + " arrayLong\n" + + "FROM TABLE(\n" + + " EXTERN(\n" + + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + + " '{\"type\": \"json\"}',\n" + + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " )\n" + + ")\n" + + "ORDER BY arrayLong") + .setQueryContext(context) + .setExpectedMSQSpec(MSQSpec + .builder() + .query(expectedQuery) + .columnMappings(new ColumnMappings(ImmutableList.of( + new ColumnMapping("arrayLong", "arrayLong") + ))) + .tuningConfig(MSQTuningConfig.defaultConfig()) + .destination(TaskReportMSQDestination.INSTANCE) + .build() + ) + .setExpectedRowSignature(rowSignature) + .setExpectedResultRows(expectedRows) + .verifyResults(); + } + + @Test + public void testScanWithOrderByOnDoubleArray() throws IOException + { + final List expectedRows = Arrays.asList( + new Object[]{null}, + new Object[]{null}, + new Object[]{null}, + new Object[]{null}, + new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)}, + new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)}, + new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)}, + new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)}, + new Object[]{Arrays.asList(1.1d, 3.3d)}, + new Object[]{Arrays.asList(1.1d, 3.3d)}, + new Object[]{Arrays.asList(2.2d, 3.3d, 4.0d)}, + new Object[]{Arrays.asList(2.2d, 3.3d, 4.0d)}, + new Object[]{Arrays.asList(3.3d, 4.4d, 5.5d)}, + new Object[]{Arrays.asList(3.3d, 4.4d, 5.5d)} + ); + + RowSignature fileSignature = RowSignature.builder() + .add("timestamp", ColumnType.STRING) + .add("arrayString", ColumnType.STRING_ARRAY) + .add("arrayStringNulls", ColumnType.STRING_ARRAY) + .add("arrayLong", ColumnType.LONG_ARRAY) + .add("arrayLongNulls", ColumnType.LONG_ARRAY) + .add("arrayDouble", ColumnType.DOUBLE_ARRAY) + .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) + .build(); + + RowSignature rowSignature = RowSignature.builder() + .add("arrayDouble", ColumnType.DOUBLE_ARRAY) + .build(); + + RowSignature scanSignature = RowSignature.builder() + .add("arrayDouble", ColumnType.DOUBLE_ARRAY) + .build(); + + Query expectedQuery = newScanQueryBuilder() + .dataSource(new ExternalDataSource( + new LocalInputSource(null, null, ImmutableList.of(dataFile)), + new JsonInputFormat(null, null, null, null, null), + fileSignature + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .columns("arrayDouble") + .orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayDouble", ScanQuery.Order.ASCENDING))) + .context(defaultScanQueryContext(context, scanSignature)) + .build(); + + testSelectQuery().setSql("SELECT\n" + + " arrayDouble\n" + + "FROM TABLE(\n" + + " EXTERN(\n" + + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + + " '{\"type\": \"json\"}',\n" + + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " )\n" + + ")\n" + + "ORDER BY arrayDouble") + .setQueryContext(context) + .setExpectedMSQSpec(MSQSpec + .builder() + .query(expectedQuery) + .columnMappings(new ColumnMappings(ImmutableList.of( + new ColumnMapping("arrayDouble", "arrayDouble") + ))) + .tuningConfig(MSQTuningConfig.defaultConfig()) + .destination(TaskReportMSQDestination.INSTANCE) + .build() + ) + .setExpectedRowSignature(rowSignature) + .setExpectedResultRows(expectedRows) + .verifyResults(); + } private List expectedMultiValueFooRowsToArray() { diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java index d771f7497a8c..4710fbbd6120 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java @@ -88,7 +88,6 @@ import org.mockito.ArgumentMatchers; import org.mockito.Mockito; -import javax.annotation.Nonnull; import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -2381,7 +2380,6 @@ public void testUnionAllUsingUnionDataSource() .verifyResults(); } - @Nonnull private List expectedMultiValueFooRowsGroup() { ArrayList expected = new ArrayList<>(); @@ -2400,7 +2398,6 @@ private List expectedMultiValueFooRowsGroup() return expected; } - @Nonnull private List expectedMultiValueFooRowsGroupByList() { ArrayList expected = new ArrayList<>(); diff --git a/processing/src/main/java/org/apache/druid/jackson/StringComparatorModule.java b/processing/src/main/java/org/apache/druid/jackson/StringComparatorModule.java index 4b7ea2953db1..d0e1f1128bfa 100644 --- a/processing/src/main/java/org/apache/druid/jackson/StringComparatorModule.java +++ b/processing/src/main/java/org/apache/druid/jackson/StringComparatorModule.java @@ -37,7 +37,8 @@ public StringComparatorModule() new NamedType(StringComparators.AlphanumericComparator.class, StringComparators.ALPHANUMERIC_NAME), new NamedType(StringComparators.StrlenComparator.class, StringComparators.STRLEN_NAME), new NamedType(StringComparators.NumericComparator.class, StringComparators.NUMERIC_NAME), - new NamedType(StringComparators.VersionComparator.class, StringComparators.VERSION_NAME) + new NamedType(StringComparators.VersionComparator.class, StringComparators.VERSION_NAME), + new NamedType(StringComparators.NaturalComparator.class, StringComparators.NATURAL_NAME) ); } diff --git a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java index 17cecc411511..f7d8c95d7bb2 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java @@ -88,7 +88,7 @@ public BoundDimFilter( // will be used if the new 'ordering' // property is missing. If both 'ordering' and 'alphaNumeric' are present, // make sure they are consistent. - if (ordering == null) { + if (ordering == null || ordering.equals(StringComparators.NATURAL)) { if (alphaNumeric == null || !alphaNumeric) { this.ordering = StringComparators.LEXICOGRAPHIC; } else { @@ -100,7 +100,8 @@ public BoundDimFilter( boolean orderingIsAlphanumeric = this.ordering.equals(StringComparators.ALPHANUMERIC); Preconditions.checkState( alphaNumeric == orderingIsAlphanumeric, - "mismatch between alphanumeric and ordering property"); + "mismatch between alphanumeric and ordering property" + ); } } this.extractionFn = extractionFn; diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java index 08ba4ba2ee6c..ec51e56303a0 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java @@ -434,6 +434,9 @@ public static Grouper.BufferComparator makeNullHandlingBufferComparatorForNumeri private static boolean isPrimitiveComparable(boolean pushLimitDown, @Nullable StringComparator stringComparator) { - return !pushLimitDown || stringComparator == null || stringComparator.equals(StringComparators.NUMERIC); + return !pushLimitDown + || stringComparator == null + || stringComparator.equals(StringComparators.NUMERIC) + || stringComparator.equals(StringComparators.NATURAL); } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java index 4689e37ebcaf..629ce5cc4cd1 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/RowBasedGrouperHelper.java @@ -1116,7 +1116,8 @@ private static int compareDimsInRowsWithAggs( final StringComparator comparator = comparators.get(i); final ColumnType fieldType = fieldTypes.get(i); - if (fieldType.isNumeric() && comparator.equals(StringComparators.NUMERIC)) { + if (fieldType.isNumeric() + && (comparator.equals(StringComparators.NUMERIC) || comparator.equals(StringComparators.NATURAL))) { // use natural comparison if (fieldType.is(ValueType.DOUBLE)) { // sometimes doubles can become floats making the round trip from serde, make sure to coerce them both diff --git a/processing/src/main/java/org/apache/druid/query/ordering/StringComparator.java b/processing/src/main/java/org/apache/druid/query/ordering/StringComparator.java index 8e66baf7df37..44d03adce44f 100644 --- a/processing/src/main/java/org/apache/druid/query/ordering/StringComparator.java +++ b/processing/src/main/java/org/apache/druid/query/ordering/StringComparator.java @@ -41,6 +41,8 @@ public static StringComparator fromString(String type) return StringComparators.NUMERIC; case StringComparators.VERSION_NAME: return StringComparators.VERSION; + case StringComparators.NATURAL_NAME: + return StringComparators.NATURAL; default: throw new IAE("Unknown string comparator[%s]", type); } diff --git a/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java b/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java index 4fdcc5c6f3a4..650b259ed562 100644 --- a/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java +++ b/processing/src/main/java/org/apache/druid/query/ordering/StringComparators.java @@ -22,6 +22,8 @@ import com.google.common.collect.Ordering; import com.google.common.primitives.Ints; import org.apache.druid.common.guava.GuavaUtils; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.StringUtils; import org.apache.maven.artifact.versioning.DefaultArtifactVersion; import java.math.BigDecimal; @@ -34,25 +36,28 @@ public class StringComparators public static final String NUMERIC_NAME = "numeric"; public static final String STRLEN_NAME = "strlen"; public static final String VERSION_NAME = "version"; + public static final String NATURAL_NAME = "natural"; public static final StringComparator LEXICOGRAPHIC = new LexicographicComparator(); public static final StringComparator ALPHANUMERIC = new AlphanumericComparator(); public static final StringComparator NUMERIC = new NumericComparator(); public static final StringComparator STRLEN = new StrlenComparator(); public static final StringComparator VERSION = new VersionComparator(); + public static final StringComparator NATURAL = new NaturalComparator(); public static final int LEXICOGRAPHIC_CACHE_ID = 0x01; public static final int ALPHANUMERIC_CACHE_ID = 0x02; public static final int NUMERIC_CACHE_ID = 0x03; public static final int STRLEN_CACHE_ID = 0x04; public static final int VERSION_CACHE_ID = 0x05; + public static final int NATURAL_CACHE_ID = 0x06; /** * Comparison using the natural comparator of {@link String}. * * Note that this is not equivalent to comparing UTF-8 byte arrays; see javadocs for - * {@link org.apache.druid.java.util.common.StringUtils#compareUnicode(String, String)} and - * {@link org.apache.druid.java.util.common.StringUtils#compareUtf8UsingJavaStringOrdering(byte[], byte[])}. + * {@link StringUtils#compareUnicode(String, String)} and + * {@link StringUtils#compareUtf8UsingJavaStringOrdering(byte[], byte[])}. */ public static class LexicographicComparator extends StringComparator { @@ -492,4 +497,51 @@ public byte[] getCacheKey() return new byte[]{(byte) VERSION_CACHE_ID}; } } + + /** + * NaturalComparator refers to the natural ordering of the type that it refers. + * + * For example, if the type is Long, the natural ordering would be numeric + * if the type is an array, the natural ordering would be lexicographic comparison of the natural ordering of the + * elements in the arrays. + * + * It is a sigil value for the dimension that we can handle in the execution layer, and don't need the comparator for. + * It is also a placeholder for dimensions that we don't have a comparator for (like arrays), but is a required for + * planning + */ + public static class NaturalComparator extends StringComparator + { + @Override + public int compare(String o1, String o2) + { + throw DruidException.defensive("compare() should not be called for the NaturalComparator"); + } + + @Override + public String toString() + { + return StringComparators.NATURAL_NAME; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + return o != null && getClass() == o.getClass(); + } + + @Override + public int hashCode() + { + return 0; + } + + @Override + public byte[] getCacheKey() + { + return new byte[]{(byte) NATURAL_CACHE_ID}; + } + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java index b41833d7b332..6def8a152ba1 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java @@ -197,9 +197,10 @@ public static BoundDimFilter lessThanOrEqualTo(final BoundRefKey boundRefKey, fi public static BoundDimFilter interval(final BoundRefKey boundRefKey, final Interval interval) { - if (!boundRefKey.getComparator().equals(StringComparators.NUMERIC)) { - // Interval comparison only works with NUMERIC comparator. - throw new ISE("Comparator must be NUMERIC but was[%s]", boundRefKey.getComparator()); + if (!boundRefKey.getComparator().equals(StringComparators.NUMERIC) + && !boundRefKey.getComparator().equals(StringComparators.NATURAL)) { + // Interval comparison only works with NUMERIC comparator or the NATURAL comparator + throw new ISE("Comparator must be NUMERIC or NATURAL but was[%s]", boundRefKey.getComparator()); } return new BoundDimFilter( diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java index 913790fd9441..22b0aacfa05f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java @@ -40,7 +40,6 @@ import org.apache.calcite.util.TimestampString; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.ExpressionProcessing; import org.apache.druid.math.expr.ExpressionProcessingConfig; @@ -208,12 +207,18 @@ public static boolean isLongType(SqlTypeName sqlTypeName) SqlTypeName.INT_TYPES.contains(sqlTypeName); } + /** + * Returns the natural StringComparator associated with the RelDataType + */ public static StringComparator getStringComparatorForRelDataType(RelDataType dataType) { final ColumnType valueType = getColumnTypeForRelDataType(dataType); return getStringComparatorForValueType(valueType); } + /** + * Returns the natural StringComparator associated with the given ColumnType + */ public static StringComparator getStringComparatorForValueType(ColumnType valueType) { if (valueType.isNumeric()) { @@ -221,7 +226,7 @@ public static StringComparator getStringComparatorForValueType(ColumnType valueT } else if (valueType.is(ValueType.STRING)) { return StringComparators.LEXICOGRAPHIC; } else { - throw new ISE("Unrecognized valueType[%s]", valueType); + return StringComparators.NATURAL; } } From b1f3ee6bd92829763a3fb6cbca5d955d4e2e5608 Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Fri, 13 Oct 2023 10:38:34 +0530 Subject: [PATCH 2/4] revert some changes --- .../java/org/apache/druid/query/filter/BoundDimFilter.java | 2 +- .../epinephelinae/GrouperBufferComparatorUtils.java | 2 ++ .../org/apache/druid/sql/calcite/filtration/Bounds.java | 7 +++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java index f7d8c95d7bb2..412ec7e57265 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/BoundDimFilter.java @@ -88,7 +88,7 @@ public BoundDimFilter( // will be used if the new 'ordering' // property is missing. If both 'ordering' and 'alphaNumeric' are present, // make sure they are consistent. - if (ordering == null || ordering.equals(StringComparators.NATURAL)) { + if (ordering == null) { if (alphaNumeric == null || !alphaNumeric) { this.ordering = StringComparators.LEXICOGRAPHIC; } else { diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java index ec51e56303a0..109ddb66f3d1 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GrouperBufferComparatorUtils.java @@ -437,6 +437,8 @@ private static boolean isPrimitiveComparable(boolean pushLimitDown, @Nullable St return !pushLimitDown || stringComparator == null || stringComparator.equals(StringComparators.NUMERIC) + // NATURAL isn't set for numeric types, however if it is, then that would mean that we are ordering the + // numeric type with its natural comparator (which is NUMERIC) || stringComparator.equals(StringComparators.NATURAL); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java index 6def8a152ba1..a1c310522c8f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Bounds.java @@ -197,10 +197,9 @@ public static BoundDimFilter lessThanOrEqualTo(final BoundRefKey boundRefKey, fi public static BoundDimFilter interval(final BoundRefKey boundRefKey, final Interval interval) { - if (!boundRefKey.getComparator().equals(StringComparators.NUMERIC) - && !boundRefKey.getComparator().equals(StringComparators.NATURAL)) { - // Interval comparison only works with NUMERIC comparator or the NATURAL comparator - throw new ISE("Comparator must be NUMERIC or NATURAL but was[%s]", boundRefKey.getComparator()); + if (!boundRefKey.getComparator().equals(StringComparators.NUMERIC)) { + // Interval comparison only works with NUMERIC comparator + throw new ISE("Comparator must be NUMERIC but was[%s]", boundRefKey.getComparator()); } return new BoundDimFilter( From ad9f57157534e88c98efb90d17f4a2c08fd741da Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Fri, 13 Oct 2023 14:14:04 +0530 Subject: [PATCH 3/4] refactor MSQArraysTest even more --- .../apache/druid/msq/exec/MSQArraysTest.java | 152 ++++++------------ 1 file changed, 50 insertions(+), 102 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java index 91f9fa833f05..2b152cfbe1c4 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQArraysTest.java @@ -30,6 +30,7 @@ import org.apache.druid.msq.indexing.destination.TaskReportMSQDestination; import org.apache.druid.msq.test.MSQTestBase; import org.apache.druid.msq.util.MultiStageQueryContext; +import org.apache.druid.query.DataSource; import org.apache.druid.query.NestedDataTestUtils; import org.apache.druid.query.Query; import org.apache.druid.query.expression.TestExprMacroTable; @@ -69,8 +70,9 @@ @RunWith(Parameterized.class) public class MSQArraysTest extends MSQTestBase { - private File dataFile; - private String dataFileNameAsJsonString; + private String dataFileNameJsonString; + private String dataFileSignatureJsonString; + private DataSource dataFileExternalDataSource; @Parameterized.Parameters(name = "{index}:with context {0}") public static Collection data() @@ -94,7 +96,7 @@ public static Collection data() public void setup() throws IOException { // Read the file and make the name available to the tests - dataFile = temporaryFolder.newFile(); + File dataFile = temporaryFolder.newFile(); final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader() .getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE); final InputStream decompressing = CompressionUtils.decompress( @@ -104,7 +106,24 @@ public void setup() throws IOException Files.copy(decompressing, dataFile.toPath(), StandardCopyOption.REPLACE_EXISTING); decompressing.close(); - dataFileNameAsJsonString = queryFramework().queryJsonMapper().writeValueAsString(dataFile); + dataFileNameJsonString = queryFramework().queryJsonMapper().writeValueAsString(dataFile); + + RowSignature dataFileSignature = RowSignature.builder() + .add("timestamp", ColumnType.STRING) + .add("arrayString", ColumnType.STRING_ARRAY) + .add("arrayStringNulls", ColumnType.STRING_ARRAY) + .add("arrayLong", ColumnType.LONG_ARRAY) + .add("arrayLongNulls", ColumnType.LONG_ARRAY) + .add("arrayDouble", ColumnType.DOUBLE_ARRAY) + .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) + .build(); + dataFileSignatureJsonString = queryFramework().queryJsonMapper().writeValueAsString(dataFileSignature); + + dataFileExternalDataSource = new ExternalDataSource( + new LocalInputSource(null, null, ImmutableList.of(dataFile)), + new JsonInputFormat(null, null, null, null, null), + dataFileSignature + ); } /** @@ -156,7 +175,7 @@ public void testInsertOnFoo1WithMultiValueToArrayGroupByWithDefaultContext() * Tests the INSERT query when 'auto' type is set */ @Test - public void testInsertArraysAutoType() throws IOException + public void testInsertArraysAutoType() { List expectedRows = Arrays.asList( new Object[]{1672531200000L, null, null, null}, @@ -192,7 +211,7 @@ public void testInsertArraysAutoType() throws IOException + " arrayDouble\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" + " '[{\"name\": \"timestamp\", \"type\": \"STRING\"}, {\"name\": \"arrayString\", \"type\": \"COMPLEX\"}, {\"name\": \"arrayLong\", \"type\": \"COMPLEX\"}, {\"name\": \"arrayDouble\", \"type\": \"COMPLEX\"}]'\n" + " )\n" @@ -209,24 +228,8 @@ public void testInsertArraysAutoType() throws IOException * types as well */ @Test - public void testInsertArraysWithStringArraysAsMVDs() throws IOException + public void testInsertArraysWithStringArraysAsMVDs() { - RowSignature rowSignatureWithoutTimeAndStringColumns = - RowSignature.builder() - .add("arrayLong", ColumnType.LONG_ARRAY) - .add("arrayLongNulls", ColumnType.LONG_ARRAY) - .add("arrayDouble", ColumnType.DOUBLE_ARRAY) - .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) - .build(); - - - RowSignature fileSignature = RowSignature.builder() - .add("timestamp", ColumnType.STRING) - .add("arrayString", ColumnType.STRING_ARRAY) - .add("arrayStringNulls", ColumnType.STRING_ARRAY) - .addAll(rowSignatureWithoutTimeAndStringColumns) - .build(); - final Map adjustedContext = new HashMap<>(context); adjustedContext.put(MultiStageQueryContext.CTX_ARRAY_INGEST_MODE, "mvd"); @@ -240,9 +243,9 @@ public void testInsertArraysWithStringArraysAsMVDs() throws IOException + " arrayDoubleNulls\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" - + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " '" + dataFileSignatureJsonString + "'\n" + " )\n" + ") PARTITIONED BY ALL") .setQueryContext(adjustedContext) @@ -259,7 +262,7 @@ public void testInsertArraysWithStringArraysAsMVDs() throws IOException * array types */ @Test - public void testInsertArraysAsArrays() throws IOException + public void testInsertArraysAsArrays() { final List expectedRows = Arrays.asList( new Object[]{ @@ -400,11 +403,6 @@ public void testInsertArraysAsArrays() throws IOException .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) .build(); - RowSignature fileSignature = RowSignature.builder() - .add("timestamp", ColumnType.STRING) - .addAll(rowSignatureWithoutTimeColumn) - .build(); - RowSignature rowSignature = RowSignature.builder() .add("__time", ColumnType.LONG) .addAll(rowSignatureWithoutTimeColumn) @@ -423,9 +421,9 @@ public void testInsertArraysAsArrays() throws IOException + " arrayDoubleNulls\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" - + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " '" + dataFileSignatureJsonString + "'\n" + " )\n" + ") PARTITIONED BY ALL") .setQueryContext(adjustedContext) @@ -436,26 +434,26 @@ public void testInsertArraysAsArrays() throws IOException } @Test - public void testSelectOnArraysWithArrayIngestModeAsNone() throws IOException + public void testSelectOnArraysWithArrayIngestModeAsNone() { testSelectOnArrays("none"); } @Test - public void testSelectOnArraysWithArrayIngestModeAsMVD() throws IOException + public void testSelectOnArraysWithArrayIngestModeAsMVD() { testSelectOnArrays("mvd"); } @Test - public void testSelectOnArraysWithArrayIngestModeAsArray() throws IOException + public void testSelectOnArraysWithArrayIngestModeAsArray() { testSelectOnArrays("array"); } // Tests the behaviour of the select with the given arrayIngestMode. The expectation should be the same, since the // arrayIngestMode should only determine how the array gets ingested at the end. - public void testSelectOnArrays(String arrayIngestMode) throws IOException + public void testSelectOnArrays(String arrayIngestMode) { final List expectedRows = Arrays.asList( new Object[]{ @@ -596,11 +594,6 @@ public void testSelectOnArrays(String arrayIngestMode) throws IOException .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) .build(); - RowSignature fileSignature = RowSignature.builder() - .add("timestamp", ColumnType.STRING) - .addAll(rowSignatureWithoutTimeColumn) - .build(); - RowSignature rowSignature = RowSignature.builder() .add("__time", ColumnType.LONG) .addAll(rowSignatureWithoutTimeColumn) @@ -620,11 +613,7 @@ public void testSelectOnArrays(String arrayIngestMode) throws IOException adjustedContext.put(MultiStageQueryContext.CTX_ARRAY_INGEST_MODE, arrayIngestMode); Query expectedQuery = newScanQueryBuilder() - .dataSource(new ExternalDataSource( - new LocalInputSource(null, null, ImmutableList.of(dataFile)), - new JsonInputFormat(null, null, null, null, null), - fileSignature - )) + .dataSource(dataFileExternalDataSource) .intervals(querySegmentSpec(Filtration.eternity())) .columns( "arrayDouble", @@ -654,9 +643,9 @@ public void testSelectOnArrays(String arrayIngestMode) throws IOException + " arrayDoubleNulls\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" - + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " '" + dataFileSignatureJsonString + "'\n" + " )\n" + ")") .setQueryContext(adjustedContext) @@ -682,7 +671,7 @@ public void testSelectOnArrays(String arrayIngestMode) throws IOException } @Test - public void testScanWithOrderByOnStringArray() throws IOException + public void testScanWithOrderByOnStringArray() { final List expectedRows = Arrays.asList( new Object[]{Arrays.asList("d", "e")}, @@ -701,15 +690,6 @@ public void testScanWithOrderByOnStringArray() throws IOException new Object[]{null} ); - RowSignature fileSignature = RowSignature.builder() - .add("timestamp", ColumnType.STRING) - .add("arrayString", ColumnType.STRING_ARRAY) - .add("arrayStringNulls", ColumnType.STRING_ARRAY) - .add("arrayLong", ColumnType.LONG_ARRAY) - .add("arrayLongNulls", ColumnType.LONG_ARRAY) - .add("arrayDouble", ColumnType.DOUBLE_ARRAY) - .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) - .build(); RowSignature rowSignature = RowSignature.builder() .add("arrayString", ColumnType.STRING_ARRAY) @@ -720,11 +700,7 @@ public void testScanWithOrderByOnStringArray() throws IOException .build(); Query expectedQuery = newScanQueryBuilder() - .dataSource(new ExternalDataSource( - new LocalInputSource(null, null, ImmutableList.of(dataFile)), - new JsonInputFormat(null, null, null, null, null), - fileSignature - )) + .dataSource(dataFileExternalDataSource) .intervals(querySegmentSpec(Filtration.eternity())) .columns("arrayString") .orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayString", ScanQuery.Order.DESCENDING))) @@ -735,9 +711,9 @@ public void testScanWithOrderByOnStringArray() throws IOException + " arrayString\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" - + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " '" + dataFileSignatureJsonString + "'\n" + " )\n" + ")\n" + "ORDER BY arrayString DESC") @@ -758,7 +734,7 @@ public void testScanWithOrderByOnStringArray() throws IOException } @Test - public void testScanWithOrderByOnLongArray() throws IOException + public void testScanWithOrderByOnLongArray() { final List expectedRows = Arrays.asList( new Object[]{null}, @@ -777,16 +753,6 @@ public void testScanWithOrderByOnLongArray() throws IOException new Object[]{Arrays.asList(2L, 3L)} ); - RowSignature fileSignature = RowSignature.builder() - .add("timestamp", ColumnType.STRING) - .add("arrayString", ColumnType.STRING_ARRAY) - .add("arrayStringNulls", ColumnType.STRING_ARRAY) - .add("arrayLong", ColumnType.LONG_ARRAY) - .add("arrayLongNulls", ColumnType.LONG_ARRAY) - .add("arrayDouble", ColumnType.DOUBLE_ARRAY) - .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) - .build(); - RowSignature rowSignature = RowSignature.builder() .add("arrayLong", ColumnType.LONG_ARRAY) .build(); @@ -796,11 +762,7 @@ public void testScanWithOrderByOnLongArray() throws IOException .build(); Query expectedQuery = newScanQueryBuilder() - .dataSource(new ExternalDataSource( - new LocalInputSource(null, null, ImmutableList.of(dataFile)), - new JsonInputFormat(null, null, null, null, null), - fileSignature - )) + .dataSource(dataFileExternalDataSource) .intervals(querySegmentSpec(Filtration.eternity())) .columns("arrayLong") .orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayLong", ScanQuery.Order.ASCENDING))) @@ -811,9 +773,9 @@ public void testScanWithOrderByOnLongArray() throws IOException + " arrayLong\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" - + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " '" + dataFileSignatureJsonString + "'\n" + " )\n" + ")\n" + "ORDER BY arrayLong") @@ -834,7 +796,7 @@ public void testScanWithOrderByOnLongArray() throws IOException } @Test - public void testScanWithOrderByOnDoubleArray() throws IOException + public void testScanWithOrderByOnDoubleArray() { final List expectedRows = Arrays.asList( new Object[]{null}, @@ -853,16 +815,6 @@ public void testScanWithOrderByOnDoubleArray() throws IOException new Object[]{Arrays.asList(3.3d, 4.4d, 5.5d)} ); - RowSignature fileSignature = RowSignature.builder() - .add("timestamp", ColumnType.STRING) - .add("arrayString", ColumnType.STRING_ARRAY) - .add("arrayStringNulls", ColumnType.STRING_ARRAY) - .add("arrayLong", ColumnType.LONG_ARRAY) - .add("arrayLongNulls", ColumnType.LONG_ARRAY) - .add("arrayDouble", ColumnType.DOUBLE_ARRAY) - .add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY) - .build(); - RowSignature rowSignature = RowSignature.builder() .add("arrayDouble", ColumnType.DOUBLE_ARRAY) .build(); @@ -872,11 +824,7 @@ public void testScanWithOrderByOnDoubleArray() throws IOException .build(); Query expectedQuery = newScanQueryBuilder() - .dataSource(new ExternalDataSource( - new LocalInputSource(null, null, ImmutableList.of(dataFile)), - new JsonInputFormat(null, null, null, null, null), - fileSignature - )) + .dataSource(dataFileExternalDataSource) .intervals(querySegmentSpec(Filtration.eternity())) .columns("arrayDouble") .orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayDouble", ScanQuery.Order.ASCENDING))) @@ -887,9 +835,9 @@ public void testScanWithOrderByOnDoubleArray() throws IOException + " arrayDouble\n" + "FROM TABLE(\n" + " EXTERN(\n" - + " '{ \"files\": [" + dataFileNameAsJsonString + "],\"type\":\"local\"}',\n" + + " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n" + " '{\"type\": \"json\"}',\n" - + " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n" + + " '" + dataFileSignatureJsonString + "'\n" + " )\n" + ")\n" + "ORDER BY arrayDouble") From 979624d177d2fb3a2ca53bc483cfcece7379f17f Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Fri, 13 Oct 2023 14:28:03 +0530 Subject: [PATCH 4/4] tests --- .../query/ordering/StringComparatorsTest.java | 55 +++++++++++++------ .../sql/calcite/planner/CalcitesTest.java | 16 ++++++ 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/query/ordering/StringComparatorsTest.java b/processing/src/test/java/org/apache/druid/query/ordering/StringComparatorsTest.java index 7fa0a3b82ca6..db80a598802c 100644 --- a/processing/src/test/java/org/apache/druid/query/ordering/StringComparatorsTest.java +++ b/processing/src/test/java/org/apache/druid/query/ordering/StringComparatorsTest.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import org.apache.druid.error.DruidException; import org.apache.druid.jackson.DefaultObjectMapper; import org.junit.Assert; import org.junit.Test; @@ -33,6 +34,8 @@ public class StringComparatorsTest { + private static final ObjectMapper JSON_MAPPER = new DefaultObjectMapper(); + private void commonTest(StringComparator comparator) { // equality test @@ -156,65 +159,83 @@ public void testVersionComparator() Assert.assertTrue(StringComparators.VERSION.compare("1.0-SNAPSHOT", "1.0-Final") < 0); } + @Test + public void testNaturalComparator() + { + Assert.assertThrows(DruidException.class, () -> StringComparators.NATURAL.compare("str1", "str2")); + } + @Test public void testLexicographicComparatorSerdeTest() throws IOException { - ObjectMapper jsonMapper = new DefaultObjectMapper(); String expectJsonSpec = "{\"type\":\"lexicographic\"}"; - String jsonSpec = jsonMapper.writeValueAsString(StringComparators.LEXICOGRAPHIC); + String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.LEXICOGRAPHIC); Assert.assertEquals(expectJsonSpec, jsonSpec); - Assert.assertEquals(StringComparators.LEXICOGRAPHIC, jsonMapper.readValue(expectJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.LEXICOGRAPHIC, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class)); String makeFromJsonSpec = "\"lexicographic\""; Assert.assertEquals( StringComparators.LEXICOGRAPHIC, - jsonMapper.readValue(makeFromJsonSpec, StringComparator.class) + JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class) ); } @Test public void testAlphanumericComparatorSerdeTest() throws IOException { - ObjectMapper jsonMapper = new DefaultObjectMapper(); String expectJsonSpec = "{\"type\":\"alphanumeric\"}"; - String jsonSpec = jsonMapper.writeValueAsString(StringComparators.ALPHANUMERIC); + String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.ALPHANUMERIC); Assert.assertEquals(expectJsonSpec, jsonSpec); - Assert.assertEquals(StringComparators.ALPHANUMERIC, jsonMapper.readValue(expectJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.ALPHANUMERIC, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class)); String makeFromJsonSpec = "\"alphanumeric\""; - Assert.assertEquals(StringComparators.ALPHANUMERIC, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.ALPHANUMERIC, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); } @Test public void testStrlenComparatorSerdeTest() throws IOException { - ObjectMapper jsonMapper = new DefaultObjectMapper(); String expectJsonSpec = "{\"type\":\"strlen\"}"; - String jsonSpec = jsonMapper.writeValueAsString(StringComparators.STRLEN); + String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.STRLEN); Assert.assertEquals(expectJsonSpec, jsonSpec); - Assert.assertEquals(StringComparators.STRLEN, jsonMapper.readValue(expectJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.STRLEN, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class)); String makeFromJsonSpec = "\"strlen\""; - Assert.assertEquals(StringComparators.STRLEN, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.STRLEN, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); } @Test public void testNumericComparatorSerdeTest() throws IOException { - ObjectMapper jsonMapper = new DefaultObjectMapper(); String expectJsonSpec = "{\"type\":\"numeric\"}"; - String jsonSpec = jsonMapper.writeValueAsString(StringComparators.NUMERIC); + String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.NUMERIC); Assert.assertEquals(expectJsonSpec, jsonSpec); - Assert.assertEquals(StringComparators.NUMERIC, jsonMapper.readValue(expectJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.NUMERIC, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class)); String makeFromJsonSpec = "\"numeric\""; - Assert.assertEquals(StringComparators.NUMERIC, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.NUMERIC, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); makeFromJsonSpec = "\"NuMeRiC\""; - Assert.assertEquals(StringComparators.NUMERIC, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class)); + Assert.assertEquals(StringComparators.NUMERIC, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); + } + + @Test + public void testNaturalComparatorSerdeTest() throws IOException + { + String expectJsonSpec = "{\"type\":\"natural\"}"; + + String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.NATURAL); + Assert.assertEquals(expectJsonSpec, jsonSpec); + Assert.assertEquals(StringComparators.NATURAL, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class)); + + String makeFromJsonSpec = "\"natural\""; + Assert.assertEquals(StringComparators.NATURAL, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); + + makeFromJsonSpec = "\"NaTuRaL\""; + Assert.assertEquals(StringComparators.NATURAL, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)); } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitesTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitesTest.java index 676c0b7fab60..576a57cc9375 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitesTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitesTest.java @@ -20,6 +20,8 @@ package org.apache.druid.sql.calcite.planner; import com.google.common.collect.ImmutableSortedSet; +import org.apache.druid.query.ordering.StringComparators; +import org.apache.druid.segment.column.ColumnType; import org.apache.druid.sql.calcite.util.CalciteTestBase; import org.junit.Assert; import org.junit.Test; @@ -52,4 +54,18 @@ public void testFindUnusedPrefix() Assert.assertEquals("x", Calcites.findUnusedPrefixForDigits("x", ImmutableSortedSet.of("foo", "xa", "_x"))); Assert.assertEquals("__x", Calcites.findUnusedPrefixForDigits("x", ImmutableSortedSet.of("foo", "x1a", "_x90"))); } + + @Test + public void testGetStringComparatorForColumnType() + { + Assert.assertEquals(StringComparators.LEXICOGRAPHIC, Calcites.getStringComparatorForValueType(ColumnType.STRING)); + Assert.assertEquals(StringComparators.NUMERIC, Calcites.getStringComparatorForValueType(ColumnType.LONG)); + Assert.assertEquals(StringComparators.NUMERIC, Calcites.getStringComparatorForValueType(ColumnType.FLOAT)); + Assert.assertEquals(StringComparators.NUMERIC, Calcites.getStringComparatorForValueType(ColumnType.DOUBLE)); + Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.STRING_ARRAY)); + Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.LONG_ARRAY)); + Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.DOUBLE_ARRAY)); + Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.NESTED_DATA)); + Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.UNKNOWN_COMPLEX)); + } }