diff --git a/.github/workflows/revised-its.yml b/.github/workflows/revised-its.yml
index c762a6016514..62aac48dc994 100644
--- a/.github/workflows/revised-its.yml
+++ b/.github/workflows/revised-its.yml
@@ -50,7 +50,7 @@ jobs:
matrix:
#jdk: [8, 11, 17]
jdk: [8]
- it: [HighAvailability, MultiStageQuery, Catalog, BatchIndex, MultiStageQueryWithMM, InputSource, InputFormat]
+ it: [HighAvailability, MultiStageQuery, Catalog, BatchIndex, MultiStageQueryWithMM, InputSource, InputFormat, Security]
#indexer: [indexer, middleManager]
indexer: [middleManager]
uses: ./.github/workflows/reusable-revised-its.yml
diff --git a/.github/workflows/static-checks.yml b/.github/workflows/static-checks.yml
index c77d15888ed1..a87000ac07e0 100644
--- a/.github/workflows/static-checks.yml
+++ b/.github/workflows/static-checks.yml
@@ -34,7 +34,7 @@ env:
MVN: mvn -B
MAVEN_SKIP: -P skip-static-checks -Dweb.console.skip=true -Dmaven.javadoc.skip=true
MAVEN_SKIP_TESTS: -P skip-tests
- MAVEN_OPTS: -Xmx3000m
+ MAVEN_OPTS: -Xmx8g
jobs:
static-checks:
@@ -144,6 +144,28 @@ jobs:
--levels ERROR \
--scope JavaInspectionsScope
+ openrewrite:
+ runs-on: ubuntu-latest
+ steps:
+ - name: checkout branch
+ uses: actions/checkout@v4
+
+ - uses: actions/setup-java@v4
+ with:
+ distribution: 'zulu'
+ java-version: '8'
+ cache: 'maven'
+
+ - name: maven install
+ run: |
+ echo 'Running Maven install...' &&
+ ${MVN} clean install -q -ff -pl '!distribution' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS} -T1C &&
+ ${MVN} install -q -ff -pl 'distribution' ${MAVEN_SKIP} ${MAVEN_SKIP_TESTS}
+
+ - name: rewrite:dryRun
+ run: |
+ ${MVN} rewrite:dryRun ${MAVEN_SKIP}
+
web-checks:
strategy:
fail-fast: false
diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
index 7b3e71aecec8..c04d4e31f959 100644
--- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
+++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java
@@ -213,7 +213,12 @@ public String getFormatString()
// 40: regex filtering
"SELECT string4, COUNT(*) FROM foo WHERE REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL GROUP BY 1",
// 41: complicated filtering
- "SELECT string2, SUM(long1) FROM foo WHERE string1 = '1000' AND string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) GROUP BY 1 ORDER BY 2"
+ "SELECT string2, SUM(long1) FROM foo WHERE string1 = '1000' AND string5 LIKE '%1%' AND (string3 in ('1', '10', '20', '22', '32') AND long2 IN (1, 19, 21, 23, 25, 26, 46) AND double3 < 1010.0 AND double3 > 1000.0 AND (string4 = '1' OR REGEXP_EXTRACT(string1, '^1') IS NOT NULL OR REGEXP_EXTRACT('Z' || string2, '^Z2') IS NOT NULL)) GROUP BY 1 ORDER BY 2",
+ // 42: array_contains expr
+ "SELECT ARRAY_CONTAINS(\"multi-string3\", 100) FROM foo",
+ "SELECT ARRAY_CONTAINS(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo",
+ "SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 100]) FROM foo",
+ "SELECT ARRAY_OVERLAP(\"multi-string3\", ARRAY[1, 2, 10, 11, 20, 22, 30, 33, 40, 44, 50, 55, 100]) FROM foo"
);
@Param({"5000000"})
@@ -275,7 +280,11 @@ public String getFormatString()
"38",
"39",
"40",
- "41"
+ "41",
+ "42",
+ "43",
+ "44",
+ "45"
})
private String query;
@@ -369,8 +378,8 @@ public void setup()
.writeValueAsString(jsonMapper.readValue((String) planResult[0], List.class))
);
}
- catch (JsonProcessingException e) {
- throw new RuntimeException(e);
+ catch (JsonProcessingException ignored) {
+
}
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, ImmutableMap.of())) {
@@ -384,6 +393,9 @@ public void setup()
}
log.info("Total result row count:" + rowCounter);
}
+ catch (Throwable ignored) {
+
+ }
}
@TearDown(Level.Trial)
diff --git a/docs/api-reference/data-management-api.md b/docs/api-reference/data-management-api.md
index 4adeaa8b2083..754bf62f725a 100644
--- a/docs/api-reference/data-management-api.md
+++ b/docs/api-reference/data-management-api.md
@@ -206,7 +206,8 @@ Marks the state of a group of segments as unused, using an array of segment IDs
Pass the array of segment IDs or interval as a JSON object in the request body.
For the interval, specify the start and end times as ISO 8601 strings to identify segments inclusive of the start time and exclusive of the end time.
-Druid only updates the segments completely contained within the specified interval; partially overlapping segments are not affected.
+Optionally, specify an array of segment versions with interval. Druid updates only the segments completely contained
+within the specified interval that match the optional list of versions; partially overlapping segments are not affected.
#### URL
@@ -214,12 +215,13 @@ Druid only updates the segments completely contained within the specified interv
#### Request body
-The group of segments is sent as a JSON request payload that accepts one of the following properties:
+The group of segments is sent as a JSON request payload that accepts the following properties:
-|Property|Description|Example|
-|----------|-------------|---------|
-|`interval`|ISO 8601 segments interval.|`"2015-09-12T03:00:00.000Z/2015-09-12T05:00:00.000Z"`|
-|`segmentIds`|Array of segment IDs.|`["segmentId1", "segmentId2"]`|
+|Property|Description|Required|Example|
+|----------|-------------|---------|---------|
+|`interval`|ISO 8601 segments interval.|Yes, if `segmentIds` is not specified.|`"2015-09-12T03:00:00.000Z/2015-09-12T05:00:00.000Z"`|
+|`segmentIds`|List of segment IDs.|Yes, if `interval` is not specified.|`["segmentId1", "segmentId2"]`|
+|`versions`|List of segment versions. Must be provided with `interval`.|No.|`["2024-03-14T16:00:04.086Z", ""2024-03-12T16:00:04.086Z"]`|
#### Responses
@@ -306,7 +308,8 @@ Marks the state of a group of segments as used, using an array of segment IDs or
Pass the array of segment IDs or interval as a JSON object in the request body.
For the interval, specify the start and end times as ISO 8601 strings to identify segments inclusive of the start time and exclusive of the end time.
-Druid only updates the segments completely contained within the specified interval; partially overlapping segments are not affected.
+Optionally, specify an array of segment versions with interval. Druid updates only the segments completely contained
+within the specified interval that match the optional list of versions; partially overlapping segments are not affected.
#### URL
@@ -314,12 +317,13 @@ Druid only updates the segments completely contained within the specified interv
#### Request body
-The group of segments is sent as a JSON request payload that accepts one of the following properties:
+The group of segments is sent as a JSON request payload that accepts the following properties:
-|Property|Description|Example|
-|----------|-------------|---------|
-|`interval`| ISO 8601 segments interval.|`"2015-09-12T03:00:00.000Z/2015-09-12T05:00:00.000Z"`|
-|`segmentIds`|Array of segment IDs.|`["segmentId1", "segmentId2"]`|
+|Property|Description|Required|Example|
+|----------|-------------|---------|---------|
+|`interval`|ISO 8601 segments interval.|Yes, if `segmentIds` is not specified.|`"2015-09-12T03:00:00.000Z/2015-09-12T05:00:00.000Z"`|
+|`segmentIds`|List of segment IDs.|Yes, if `interval` is not specified.|`["segmentId1", "segmentId2"]`|
+|`versions`|List of segment versions. Must be provided with `interval`.|No.|`["2024-03-14T16:00:04.086Z", ""2024-03-12T16:00:04.086Z"]`|
#### Responses
diff --git a/docs/development/extensions-core/lookups-cached-global.md b/docs/development/extensions-core/lookups-cached-global.md
index 15e1469de4a8..5cfcbea01c24 100644
--- a/docs/development/extensions-core/lookups-cached-global.md
+++ b/docs/development/extensions-core/lookups-cached-global.md
@@ -211,8 +211,8 @@ The remapping values for each globally cached lookup can be specified by a JSON
|Property|Description|Required|Default|
|--------|-----------|--------|-------|
|`pollPeriod`|Period between polling for updates|No|0 (only once)|
-|`uri`|URI for the file of interest, specified as a file, hdfs, s3 or gs path|No|Use `uriPrefix`|
-|`uriPrefix`|A URI that specifies a directory (or other searchable resource) in which to search for files|No|Use `uri`|
+|`uri`|URI for the lookup file. Can be a file, HDFS, S3 or GCS path|Either `uri` or `uriPrefix` must be set|None|
+|`uriPrefix`|A URI prefix that specifies a directory or other searchable resource where lookup files are located |Either `uri` or `uriPrefix` must be set|None|
|`fileRegex`|Optional regex for matching the file name under `uriPrefix`. Only used if `uriPrefix` is used|No|`".*"`|
|`namespaceParseSpec`|How to interpret the data at the URI|Yes||
|`maxHeapPercentage`|The maximum percentage of heap size that the lookup should consume. If the lookup grows beyond this size, warning messages will be logged in the respective service logs.|No|10% of JVM heap size|
diff --git a/docs/multi-stage-query/concepts.md b/docs/multi-stage-query/concepts.md
index 27b7d12c91c9..cae88a0f3750 100644
--- a/docs/multi-stage-query/concepts.md
+++ b/docs/multi-stage-query/concepts.md
@@ -200,8 +200,8 @@ To perform ingestion with rollup:
2. Set [`finalizeAggregations: false`](reference.md#context-parameters) in your context. This causes aggregation
functions to write their internal state to the generated segments, instead of the finalized end result, and enables
further aggregation at query time.
-3. See [ARRAY types](../querying/arrays.md#sql-based-ingestion-with-rollup) for information about ingesting `ARRAY` columns
-4. See [multi-value dimensions](../querying/multi-value-dimensions.md#sql-based-ingestion-with-rollup) for information to ingest multi-value VARCHAR columns
+3. See [ARRAY types](../querying/arrays.md#sql-based-ingestion) for information about ingesting `ARRAY` columns
+4. See [multi-value dimensions](../querying/multi-value-dimensions.md#sql-based-ingestion) for information to ingest multi-value VARCHAR columns
When you do all of these things, Druid understands that you intend to do an ingestion with rollup, and it writes
rollup-related metadata into the generated segments. Other applications can then use [`segmentMetadata`
diff --git a/docs/multi-stage-query/reference.md b/docs/multi-stage-query/reference.md
index 45dfa464416f..0b10e14b50f9 100644
--- a/docs/multi-stage-query/reference.md
+++ b/docs/multi-stage-query/reference.md
@@ -351,7 +351,7 @@ The following table lists the context parameters for the MSQ task engine:
| `maxNumTasks` | SELECT, INSERT, REPLACE
The maximum total number of tasks to launch, including the controller task. The lowest possible value for this setting is 2: one controller and one worker. All tasks must be able to launch simultaneously. If they cannot, the query returns a `TaskStartTimeout` error code after approximately 10 minutes.
May also be provided as `numTasks`. If both are present, `maxNumTasks` takes priority. | 2 |
| `taskAssignment` | SELECT, INSERT, REPLACE
Determines how many tasks to use. Possible values include:
`max`: Uses as many tasks as possible, up to `maxNumTasks`.
`auto`: When file sizes can be determined through directory listing (for example: local files, S3, GCS, HDFS) uses as few tasks as possible without exceeding 512 MiB or 10,000 files per task, unless exceeding these limits is necessary to stay within `maxNumTasks`. When calculating the size of files, the weighted size is used, which considers the file format and compression format used if any. When file sizes cannot be determined through directory listing (for example: http), behaves the same as `max`.
Determines the type of aggregation to return. If true, Druid finalizes the results of complex aggregations that directly appear in query results. If false, Druid returns the aggregation's intermediate type rather than finalized type. This parameter is useful during ingestion, where it enables storing sketches directly in Druid tables. For more information about aggregations, see [SQL aggregation functions](../querying/sql-aggregations.md). | `true` |
-| `arrayIngestMode` | INSERT, REPLACE
Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). When set to `none`, Druid will throw an exception when trying to store any type of arrays. `none` is most useful when set in the system default query context with (`druid.query.default.context.arrayIngestMode=none`) to be used to help migrate operators from `mvd` mode to `array` mode and force query writers to make an explicit choice between ARRAY and multi-value VARCHAR typed columns. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)|
+| `arrayIngestMode` | INSERT, REPLACE
Controls how ARRAY type values are stored in Druid segments. When set to `array` (recommended for SQL compliance), Druid will store all ARRAY typed values in [ARRAY typed columns](../querying/arrays.md), and supports storing both VARCHAR and numeric typed arrays. When set to `mvd` (the default, for backwards compatibility), Druid only supports VARCHAR typed arrays, and will store them as [multi-value string columns](../querying/multi-value-dimensions.md). See [`arrayIngestMode`] in the [Arrays](../querying/arrays.md) page for more details. | `mvd` (for backwards compatibility, recommended to use `array` for SQL compliance)|
| `sqlJoinAlgorithm` | SELECT, INSERT, REPLACE
Algorithm to use for JOIN. Use `broadcast` (the default) for broadcast hash join or `sortMerge` for sort-merge join. Affects all JOIN operations in the query. This is a hint to the MSQ engine and the actual joins in the query may proceed in a different way than specified. See [Joins](#joins) for more details. | `broadcast` |
| `rowsInMemory` | INSERT or REPLACE
Maximum number of rows to store in memory at once before flushing to disk during the segment generation process. Ignored for non-INSERT queries. In most cases, use the default value. You may need to override the default if you run into one of the [known issues](./known-issues.md) around memory usage. | 100,000 |
| `segmentSortOrder` | INSERT or REPLACE
Normally, Druid sorts rows in individual segments using `__time` first, followed by the [CLUSTERED BY](#clustered-by) clause. When you set `segmentSortOrder`, Druid sorts rows in segments using this column list first, followed by the CLUSTERED BY order.
You provide the column list as comma-separated values or as a JSON array in string form. If your query includes `__time`, then this list must begin with `__time`. For example, consider an INSERT query that uses `CLUSTERED BY country` and has `segmentSortOrder` set to `__time,city`. Within each time chunk, Druid assigns rows to segments based on `country`, and then within each of those segments, Druid sorts those rows by `__time` first, then `city`, then `country`. | empty list |
@@ -364,6 +364,7 @@ The following table lists the context parameters for the MSQ task engine:
| `waitUntilSegmentsLoad` | INSERT, REPLACE
If set, the ingest query waits for the generated segment to be loaded before exiting, else the ingest query exits without waiting. The task and live reports contain the information about the status of loading segments if this flag is set. This will ensure that any future queries made after the ingestion exits will include results from the ingestion. The drawback is that the controller task will stall till the segments are loaded. | `false` |
| `includeSegmentSource` | SELECT, INSERT, REPLACE
Controls the sources, which will be queried for results in addition to the segments present on deep storage. Can be `NONE` or `REALTIME`. If this value is `NONE`, only non-realtime (published and used) segments will be downloaded from deep storage. If this value is `REALTIME`, results will also be included from realtime tasks. | `NONE` |
| `rowsPerPage` | SELECT
The number of rows per page to target. The actual number of rows per page may be somewhat higher or lower than this number. In most cases, use the default. This property comes into effect only when `selectDestination` is set to `durableStorage` | 100000 |
+| `skipTypeVerification` | INSERT or REPLACE
During query validation, Druid validates that [string arrays](../querying/arrays.md) and [multi-value dimensions](../querying/multi-value-dimensions.md) are not mixed in the same column. If you are intentionally migrating from one to the other, use this context parameter to disable type validation.
Provide the column list as comma-separated values or as a JSON array in string form.| empty list |
| `failOnEmptyInsert` | INSERT or REPLACE
When set to false (the default), an INSERT query generating no output rows will be no-op, and a REPLACE query generating no output rows will delete all data that matches the OVERWRITE clause. When set to true, an ingest query generating no output rows will throw an `InsertCannotBeEmpty` fault. | `false` |
## Joins
diff --git a/docs/querying/arrays.md b/docs/querying/arrays.md
index dbeb3ec6e028..a7eebaa32afe 100644
--- a/docs/querying/arrays.md
+++ b/docs/querying/arrays.md
@@ -71,9 +71,46 @@ The following shows an example `dimensionsSpec` for native ingestion of the data
### SQL-based ingestion
-Arrays can also be inserted with [SQL-based ingestion](../multi-stage-query/index.md) when you include a query context parameter [`"arrayIngestMode":"array"`](../multi-stage-query/reference.md#context-parameters).
+#### `arrayIngestMode`
+
+Arrays can be inserted with [SQL-based ingestion](../multi-stage-query/index.md) when you include the query context
+parameter `arrayIngestMode: array`.
+
+When `arrayIngestMode` is `array`, SQL ARRAY types are stored using Druid array columns. This is recommended for new
+tables.
+
+When `arrayIngestMode` is `mvd`, SQL `VARCHAR ARRAY` are implicitly wrapped in [`ARRAY_TO_MV`](sql-functions.md#array_to_mv).
+This causes them to be stored as [multi-value strings](multi-value-dimensions.md), using the same `STRING` column type
+as regular scalar strings. SQL `BIGINT ARRAY` and `DOUBLE ARRAY` cannot be loaded under `arrayIngestMode: mvd`. This
+is the default behavior when `arrayIngestMode` is not provided in your query context, although the default behavior
+may change to `array` in a future release.
+
+When `arrayIngestMode` is `none`, Druid throws an exception when trying to store any type of arrays. This mode is most
+useful when set in the system default query context with `druid.query.default.context.arrayIngestMode = none`, in cases
+where the cluster administrator wants SQL query authors to explicitly provide one or the other in their query context.
+
+The following table summarizes the differences in SQL ARRAY handling between `arrayIngestMode: array` and
+`arrayIngestMode: mvd`.
+
+| SQL type | Stored type when `arrayIngestMode: array` | Stored type when `arrayIngestMode: mvd` (default) |
+|---|---|---|
+|`VARCHAR ARRAY`|`ARRAY`|[multi-value `STRING`](multi-value-dimensions.md)|
+|`BIGINT ARRAY`|`ARRAY`|not possible (validation error)|
+|`DOUBLE ARRAY`|`ARRAY`|not possible (validation error)|
+
+In either mode, you can explicitly wrap string arrays in `ARRAY_TO_MV` to cause them to be stored as
+[multi-value strings](multi-value-dimensions.md).
+
+When validating a SQL INSERT or REPLACE statement that contains arrays, Druid checks whether the statement would lead
+to mixing string arrays and multi-value strings in the same column. If this condition is detected, the statement fails
+validation unless the column is named under the `skipTypeVerification` context parameter. This parameter can be either
+a comma-separated list of column names, or a JSON array in string form. This validation is done to prevent accidentally
+mixing arrays and multi-value strings in the same column.
+
+#### Examples
+
+Set [`arrayIngestMode: array`](#arrayingestmode) in your query context to run the following examples.
-For example, to insert the data used in this document:
```sql
REPLACE INTO "array_example" OVERWRITE ALL
WITH "ext" AS (
@@ -81,9 +118,14 @@ WITH "ext" AS (
FROM TABLE(
EXTERN(
'{"type":"inline","data":"{\"timestamp\": \"2023-01-01T00:00:00\", \"label\": \"row1\", \"arrayString\": [\"a\", \"b\"], \"arrayLong\":[1, null,3], \"arrayDouble\":[1.1, 2.2, null]}\n{\"timestamp\": \"2023-01-01T00:00:00\", \"label\": \"row2\", \"arrayString\": [null, \"b\"], \"arrayLong\":null, \"arrayDouble\":[999, null, 5.5]}\n{\"timestamp\": \"2023-01-01T00:00:00\", \"label\": \"row3\", \"arrayString\": [], \"arrayLong\":[1, 2, 3], \"arrayDouble\":[null, 2.2, 1.1]} \n{\"timestamp\": \"2023-01-01T00:00:00\", \"label\": \"row4\", \"arrayString\": [\"a\", \"b\"], \"arrayLong\":[1, 2, 3], \"arrayDouble\":[]}\n{\"timestamp\": \"2023-01-01T00:00:00\", \"label\": \"row5\", \"arrayString\": null, \"arrayLong\":[], \"arrayDouble\":null}"}',
- '{"type":"json"}',
- '[{"name":"timestamp", "type":"STRING"},{"name":"label", "type":"STRING"},{"name":"arrayString", "type":"ARRAY"},{"name":"arrayLong", "type":"ARRAY"},{"name":"arrayDouble", "type":"ARRAY"}]'
+ '{"type":"json"}'
)
+ ) EXTEND (
+ "timestamp" VARCHAR,
+ "label" VARCHAR,
+ "arrayString" VARCHAR ARRAY,
+ "arrayLong" BIGINT ARRAY,
+ "arrayDouble" DOUBLE ARRAY
)
)
SELECT
@@ -96,8 +138,7 @@ FROM "ext"
PARTITIONED BY DAY
```
-### SQL-based ingestion with rollup
-These input arrays can also be grouped for rollup:
+Arrays can also be used as `GROUP BY` keys for rollup:
```sql
REPLACE INTO "array_example_rollup" OVERWRITE ALL
@@ -106,9 +147,14 @@ WITH "ext" AS (
FROM TABLE(
EXTERN(
'{"type":"inline","data":"{\"timestamp\": \"2023-01-01T00:00:00\", \"label\": \"row1\", \"arrayString\": [\"a\", \"b\"], \"arrayLong\":[1, null,3], \"arrayDouble\":[1.1, 2.2, null]}\n{\"timestamp\": \"2023-01-01T00:00:00\", \"label\": \"row2\", \"arrayString\": [null, \"b\"], \"arrayLong\":null, \"arrayDouble\":[999, null, 5.5]}\n{\"timestamp\": \"2023-01-01T00:00:00\", \"label\": \"row3\", \"arrayString\": [], \"arrayLong\":[1, 2, 3], \"arrayDouble\":[null, 2.2, 1.1]} \n{\"timestamp\": \"2023-01-01T00:00:00\", \"label\": \"row4\", \"arrayString\": [\"a\", \"b\"], \"arrayLong\":[1, 2, 3], \"arrayDouble\":[]}\n{\"timestamp\": \"2023-01-01T00:00:00\", \"label\": \"row5\", \"arrayString\": null, \"arrayLong\":[], \"arrayDouble\":null}"}',
- '{"type":"json"}',
- '[{"name":"timestamp", "type":"STRING"},{"name":"label", "type":"STRING"},{"name":"arrayString", "type":"ARRAY"},{"name":"arrayLong", "type":"ARRAY"},{"name":"arrayDouble", "type":"ARRAY"}]'
+ '{"type":"json"}'
)
+ ) EXTEND (
+ "timestamp" VARCHAR,
+ "label" VARCHAR,
+ "arrayString" VARCHAR ARRAY,
+ "arrayLong" BIGINT ARRAY,
+ "arrayDouble" DOUBLE ARRAY
)
)
SELECT
diff --git a/docs/querying/multi-value-dimensions.md b/docs/querying/multi-value-dimensions.md
index 2b33737a36fc..1ce3a618dac7 100644
--- a/docs/querying/multi-value-dimensions.md
+++ b/docs/querying/multi-value-dimensions.md
@@ -507,9 +507,9 @@ Avoid confusing string arrays with [multi-value dimensions](multi-value-dimensio
Use care during ingestion to ensure you get the type you want.
-To get arrays when performing an ingestion using JSON ingestion specs, such as [native batch](../ingestion/native-batch.md) or streaming ingestion such as with [Apache Kafka](../ingestion/kafka-ingestion.md), use dimension type `auto` or enable `useSchemaDiscovery`. When performing a [SQL-based ingestion](../multi-stage-query/index.md), write a query that generates arrays and set the context parameter `"arrayIngestMode": "array"`. Arrays may contain strings or numbers.
+To get arrays when performing an ingestion using JSON ingestion specs, such as [native batch](../ingestion/native-batch.md) or streaming ingestion such as with [Apache Kafka](../ingestion/kafka-ingestion.md), use dimension type `auto` or enable `useSchemaDiscovery`. When performing a [SQL-based ingestion](../multi-stage-query/index.md), write a query that generates arrays and set the context parameter [`"arrayIngestMode": "array"`](arrays.md#arrayingestmode). Arrays may contain strings or numbers.
-To get multi-value dimensions when performing an ingestion using JSON ingestion specs, use dimension type `string` and do not enable `useSchemaDiscovery`. When performing a [SQL-based ingestion](../multi-stage-query/index.md), wrap arrays in [`ARRAY_TO_MV`](multi-value-dimensions.md#sql-based-ingestion), which ensures you get multi-value dimensions in any `arrayIngestMode`. Multi-value dimensions can only contain strings.
+To get multi-value dimensions when performing an ingestion using JSON ingestion specs, use dimension type `string` and do not enable `useSchemaDiscovery`. When performing a [SQL-based ingestion](../multi-stage-query/index.md), wrap arrays in [`ARRAY_TO_MV`](multi-value-dimensions.md#sql-based-ingestion), which ensures you get multi-value dimensions in any [`arrayIngestMode`](arrays.md#arrayingestmode). Multi-value dimensions can only contain strings.
You can tell which type you have by checking the `INFORMATION_SCHEMA.COLUMNS` table, using a query like:
diff --git a/docs/tutorials/tutorial-append-data.md b/docs/tutorials/tutorial-append-data.md
new file mode 100644
index 000000000000..acdb05442516
--- /dev/null
+++ b/docs/tutorials/tutorial-append-data.md
@@ -0,0 +1,133 @@
+---
+id: tutorial-append-data
+title: Append data
+sidebar_label: Append data
+description: Learn how to append data to a datasource without changing the existing data in Apache Druid.
+---
+
+
+
+This tutorial shows you how to use the Apache Druid SQL [INSERT](../multi-stage-query/reference.md#insert) function to append data to a [datasource](../design/storage.md) without changing the existing data.
+The examples in the tutorial use the [multi-stage query (MSQ)](../multi-stage-query/index.md) task engine to executes SQL statements.
+
+## Prerequisites
+
+Before you follow the steps in this tutorial, download Druid as described in [Quickstart (local)](index.md) and have it running on your local machine. You don't need to load any data into the Druid cluster.
+
+You should be familiar with data querying in Druid. If you haven't already, go through the [Query data](../tutorials/tutorial-query.md) tutorial first.
+
+## Load sample data
+
+Load a sample dataset using [INSERT](../multi-stage-query/reference.md#insert) and [EXTERN](../multi-stage-query/reference.md#extern-function) functions. The EXTERN function lets you read external data or write to an external location.
+
+In the Druid [web console](../operations/web-console.md), go to the **Query** view and run the following query:
+
+```sql
+INSERT INTO "append_tutorial"
+SELECT
+ TIME_PARSE("timestamp") AS "__time",
+ "animal",
+ "number"
+FROM TABLE(
+ EXTERN(
+ '{"type":"inline","data":"{\"timestamp\":\"2024-01-01T07:01:35Z\",\"animal\":\"octopus\", \"number\":115}\n{\"timestamp\":\"2024-01-01T05:01:35Z\",\"animal\":\"mongoose\", \"number\":737}\n{\"timestamp\":\"2024-01-01T06:01:35Z\",\"animal\":\"snake\", \"number\":1234}\n{\"timestamp\":\"2024-01-01T01:01:35Z\",\"animal\":\"lion\", \"number\":300}\n{\"timestamp\":\"2024-01-02T07:01:35Z\",\"animal\":\"seahorse\", \"number\":115}\n{\"timestamp\":\"2024-01-02T05:01:35Z\",\"animal\":\"skunk\", \"number\":737}\n{\"timestamp\":\"2024-01-02T06:01:35Z\",\"animal\":\"iguana\", \"number\":1234}\n{\"timestamp\":\"2024-01-02T01:01:35Z\",\"animal\":\"opossum\", \"number\":300}"}',
+ '{"type":"json"}'
+ )
+ ) EXTEND ("timestamp" VARCHAR, "animal" VARCHAR, "number" BIGINT)
+PARTITIONED BY DAY
+```
+
+The resulting `append_tutorial` datasource contains records for eight animals over two days.
+To view the results, open a new tab and run the following query:
+
+```sql
+SELECT * FROM "append_tutorial"
+```
+
+
+ View the results
+
+| `__time` | `animal` | `number`|
+| -- | -- | -- |
+| `2024-01-01T01:01:35.000Z`| `lion`| 300 |
+| `2024-01-01T05:01:35.000Z`| `mongoose`| 737 |
+| `2024-01-01T06:01:35.000Z`| `snake`| 1234 |
+| `2024-01-01T07:01:35.000Z`| `octopus`| 115 |
+| `2024-01-02T01:01:35.000Z`| `opossum`| 300 |
+| `2024-01-02T05:01:35.000Z`| `skunk`| 737 |
+| `2024-01-02T06:01:35.000Z`| `iguana`| 1234 |
+| `2024-01-02T07:01:35.000Z`| `seahorse`| 115 |
+
+
+
+## Append data
+
+You can use the INSERT function to append data to the datasource without changing the existing data.
+In a new tab, run the following query to ingest and append data to the `append_tutorial` datasource:
+
+```sql
+INSERT INTO "append_tutorial"
+SELECT
+ TIME_PARSE("timestamp") AS "__time",
+ "animal",
+ "number"
+FROM TABLE(
+ EXTERN(
+ '{"type":"inline","data":"{\"timestamp\":\"2024-01-03T01:09:35Z\",\"animal\":\"zebra\", \"number\":233}\n{\"timestamp\":\"2024-01-04T07:01:35Z\",\"animal\":\"bear\", \"number\":577}\n{\"timestamp\":\"2024-01-04T05:01:35Z\",\"animal\":\"falcon\", \"number\":848}\n{\"timestamp\":\"2024-01-04T06:01:35Z\",\"animal\":\"giraffe\", \"number\":113}\n{\"timestamp\":\"2024-01-04T01:01:35Z\",\"animal\":\"rhino\", \"number\":473}"}',
+ '{"type":"json"}'
+ )
+ ) EXTEND ("timestamp" VARCHAR, "animal" VARCHAR, "number" BIGINT)
+PARTITIONED BY DAY
+```
+
+Druid adds rows for the subsequent days after `seahorse`.
+When the task completes, open a new tab and run the following query to view the results:
+
+```sql
+SELECT * FROM "append_tutorial"
+```
+
+
+ View the results
+
+| `__time` | `animal` | `number`|
+| -- | -- | -- |
+| `2024-01-01T01:01:35.000Z`| `lion`| 300 |
+| `2024-01-01T05:01:35.000Z`| `mongoose`| 737 |
+| `2024-01-01T06:01:35.000Z`| `snake`| 1234 |
+| `2024-01-01T07:01:35.000Z`| `octopus`| 115 |
+| `2024-01-02T01:01:35.000Z`| `opossum`| 300 |
+| `2024-01-02T05:01:35.000Z`| `skunk`| 737 |
+| `2024-01-02T06:01:35.000Z`| `iguana`| 1234 |
+| `2024-01-02T07:01:35.000Z`| `seahorse`| 115 |
+| `2024-01-03T01:09:35.000Z`| `zebra`| 233 |
+| `2024-01-04T01:01:35.000Z`| `rhino`| 473 |
+| `2024-01-04T05:01:35.000Z`| `falcon`| 848 |
+| `2024-01-04T06:01:35.000Z`| `giraffe`| 113 |
+| `2024-01-04T07:01:35.000Z`| `bear`| 577 |
+
+
+
+## Learn more
+
+See the following topics for more information:
+
+* [SQL-based ingestion reference](../multi-stage-query/reference.md) for a reference on MSQ architecture.
+* [SQL-based ingestion query examples](../multi-stage-query/examples.md) for example queries using the MSQ task engine.
\ No newline at end of file
diff --git a/docs/tutorials/tutorial-update-data.md b/docs/tutorials/tutorial-update-data.md
index aa85a2aca7ce..e761cd2a95d7 100644
--- a/docs/tutorials/tutorial-update-data.md
+++ b/docs/tutorials/tutorial-update-data.md
@@ -1,7 +1,8 @@
---
id: tutorial-update-data
-title: Update existing data
-sidebar_label: Update existing data
+title: Update data
+sidebar_label: Update data
+description: Learn how to update data in Apache Druid.
---
+Apache Druid stores data and indexes in [segment files](../design/segments.md) partitioned by time.
+After Druid creates a segment, its contents can't be modified.
+You can either replace data for the whole segment, or, in some cases, overshadow a portion of the segment data.
-This tutorial shows you how to update data in a datasource by overwriting existing data and adding new data to the datasource.
+In Druid, use time ranges to specify the data you want to update, as opposed to a primary key or dimensions often used in transactional databases. Data outside the specified replacement time range remains unaffected.
+You can use this Druid functionality to perform data updates, inserts, and deletes, similar to UPSERT functionality for transactional databases.
-## Prerequisites
-
-Before starting this tutorial, download and run Apache Druid on your local machine as described in
-the [single-machine quickstart](index.md).
-
-You should also be familiar with the material in the following tutorials:
-* [Load a file](../tutorials/tutorial-batch.md)
-* [Query data](../tutorials/tutorial-query.md)
-* [Rollup](../tutorials/tutorial-rollup.md)
+This tutorial shows you how to use the Druid SQL [REPLACE](../multi-stage-query/reference.md#replace) function with the OVERWRITE clause to update existing data.
-## Load initial data
+The tutorial walks you through the following use cases:
-Load an initial data set to which you will overwrite and append data.
+* [Overwrite all data](#overwrite-all-data)
+* [Overwrite records for a specific time range](#overwrite-records-for-a-specific-time-range)
+* [Update a row using partial segment overshadowing](#update-a-row-using-partial-segment-overshadowing)
-The ingestion spec is located at `quickstart/tutorial/updates-init-index.json`. This spec creates a datasource called `updates-tutorial` and ingests data from `quickstart/tutorial/updates-data.json`.
+All examples use the [multi-stage query (MSQ)](../multi-stage-query/index.md) task engine to executes SQL statements.
-Submit the ingestion task:
-
-```bash
-bin/post-index-task --file quickstart/tutorial/updates-init-index.json --url http://localhost:8081
-```
+## Prerequisites
-Start the SQL command-line client:
-```bash
-bin/dsql
-```
+Before you follow the steps in this tutorial, download Druid as described in [Quickstart (local)](index.md) and have it running on your local machine. You don't need to load any data into the Druid cluster.
-Run the following SQL query to retrieve data from `updates-tutorial`:
-
-```bash
-dsql> SELECT * FROM "updates-tutorial";
-┌──────────────────────────┬──────────┬───────┬────────┐
-│ __time │ animal │ count │ number │
-├──────────────────────────┼──────────┼───────┼────────┤
-│ 2018-01-01T01:01:00.000Z │ tiger │ 1 │ 100 │
-│ 2018-01-01T03:01:00.000Z │ aardvark │ 1 │ 42 │
-│ 2018-01-01T03:01:00.000Z │ giraffe │ 1 │ 14124 │
-└──────────────────────────┴──────────┴───────┴────────┘
-Retrieved 3 rows in 1.42s.
-```
+You should be familiar with data querying in Druid. If you haven't already, go through the [Query data](../tutorials/tutorial-query.md) tutorial first.
-The datasource contains three rows of data with an `animal` dimension and a `number` metric.
+## Load sample data
-## Overwrite data
+Load a sample dataset using [REPLACE](../multi-stage-query/reference.md#replace) and [EXTERN](../multi-stage-query/reference.md#extern-function) functions.
+In Druid SQL, the REPLACE function can create a new [datasource](../design/storage.md) or update an existing datasource.
-To overwrite the data, submit another task for the same interval but with different input data.
+In the Druid [web console](../operations/web-console.md), go to the **Query** view and run the following query:
-The `quickstart/tutorial/updates-overwrite-index.json` spec performs an overwrite on the `updates-tutorial` datasource.
+```sql
+REPLACE INTO "update_tutorial" OVERWRITE ALL
+WITH "ext" AS (
+ SELECT *
+ FROM TABLE(
+ EXTERN(
+ '{"type":"inline","data":"{\"timestamp\":\"2024-01-01T07:01:35Z\",\"animal\":\"octopus\", \"number\":115}\n{\"timestamp\":\"2024-01-01T05:01:35Z\",\"animal\":\"mongoose\", \"number\":737}\n{\"timestamp\":\"2024-01-01T06:01:35Z\",\"animal\":\"snake\", \"number\":1234}\n{\"timestamp\":\"2024-01-01T01:01:35Z\",\"animal\":\"lion\", \"number\":300}\n{\"timestamp\":\"2024-01-02T07:01:35Z\",\"animal\":\"seahorse\", \"number\":115}\n{\"timestamp\":\"2024-01-02T05:01:35Z\",\"animal\":\"skunk\", \"number\":737}\n{\"timestamp\":\"2024-01-02T06:01:35Z\",\"animal\":\"iguana\", \"number\":1234}\n{\"timestamp\":\"2024-01-02T01:01:35Z\",\"animal\":\"opossum\", \"number\":300}"}',
+ '{"type":"json"}'
+ )
+ ) EXTEND ("timestamp" VARCHAR, "animal" VARCHAR, "number" BIGINT)
+)
+SELECT
+ TIME_PARSE("timestamp") AS "__time",
+ "animal",
+ "number"
+FROM "ext"
+PARTITIONED BY DAY
-In the overwrite ingestion spec, notice the following:
-* The `intervals` field remains the same: `"intervals" : ["2018-01-01/2018-01-03"]`
-* New data is loaded from the local file, `quickstart/tutorial/updates-data2.json`
-* `appendToExisting` is set to `false`, indicating an overwrite task
+```
-Submit the ingestion task to overwrite the data:
+In the resulting `update_tutorial` datasource, individual rows are uniquely identified by `__time`, `animal`, and `number`.
+To view the results, open a new tab and run the following query:
-```bash
-bin/post-index-task --file quickstart/tutorial/updates-overwrite-index.json --url http://localhost:8081
+```sql
+SELECT * FROM "update_tutorial"
```
-When Druid finishes loading the new segment from this overwrite task, run the SELECT query again.
-In the new results, the `tiger` row now has the value `lion`, the `aardvark` row has a different number, and the `giraffe` row has been replaced with a `bear` row.
-
-```bash
-dsql> SELECT * FROM "updates-tutorial";
-┌──────────────────────────┬──────────┬───────┬────────┐
-│ __time │ animal │ count │ number │
-├──────────────────────────┼──────────┼───────┼────────┤
-│ 2018-01-01T01:01:00.000Z │ lion │ 1 │ 100 │
-│ 2018-01-01T03:01:00.000Z │ aardvark │ 1 │ 9999 │
-│ 2018-01-01T04:01:00.000Z │ bear │ 1 │ 111 │
-└──────────────────────────┴──────────┴───────┴────────┘
-Retrieved 3 rows in 0.02s.
+
+ View the results
+
+| `__time` | `animal` | `number`|
+| -- | -- | -- |
+| `2024-01-01T01:01:35.000Z`| `lion`| 300 |
+| `2024-01-01T05:01:35.000Z`| `mongoose`| 737 |
+| `2024-01-01T06:01:35.000Z`| `snake`| 1234 |
+| `2024-01-01T07:01:35.000Z`| `octopus`| 115 |
+| `2024-01-02T01:01:35.000Z`| `opossum`| 300 |
+| `2024-01-02T05:01:35.000Z`| `skunk`| 737 |
+| `2024-01-02T06:01:35.000Z`| `iguana`| 1234 |
+| `2024-01-02T07:01:35.000Z`| `seahorse`| 115 |
+
+
+
+The results contain records for eight animals over two days.
+
+## Overwrite all data
+
+You can use the REPLACE function with OVERWRITE ALL to replace the entire datasource with new data while dropping the old data.
+
+In the web console, open a new tab and run the following query to overwrite timestamp data for the entire `update_tutorial` datasource:
+
+```sql
+REPLACE INTO "update_tutorial" OVERWRITE ALL
+WITH "ext" AS (SELECT *
+FROM TABLE(
+ EXTERN(
+ '{"type":"inline","data":"{\"timestamp\":\"2024-01-02T07:01:35Z\",\"animal\":\"octopus\", \"number\":115}\n{\"timestamp\":\"2024-01-02T05:01:35Z\",\"animal\":\"mongoose\", \"number\":737}\n{\"timestamp\":\"2024-01-02T06:01:35Z\",\"animal\":\"snake\", \"number\":1234}\n{\"timestamp\":\"2024-01-02T01:01:35Z\",\"animal\":\"lion\", \"number\":300}\n{\"timestamp\":\"2024-01-03T07:01:35Z\",\"animal\":\"seahorse\", \"number\":115}\n{\"timestamp\":\"2024-01-03T05:01:35Z\",\"animal\":\"skunk\", \"number\":737}\n{\"timestamp\":\"2024-01-03T06:01:35Z\",\"animal\":\"iguana\", \"number\":1234}\n{\"timestamp\":\"2024-01-03T01:01:35Z\",\"animal\":\"opossum\", \"number\":300}"}',
+ '{"type":"json"}'
+ )
+) EXTEND ("timestamp" VARCHAR, "animal" VARCHAR, "number" BIGINT))
+SELECT
+ TIME_PARSE("timestamp") AS "__time",
+ "animal",
+ "number"
+FROM "ext"
+PARTITIONED BY DAY
```
-## Combine existing data with new data and overwrite
-
-Now append new data to the `updates-tutorial` datasource from `quickstart/tutorial/updates-data3.json` using the ingestion spec `quickstart/tutorial/updates-append-index.json`.
-
-The spec directs Druid to read from the existing `updates-tutorial` datasource as well as the `quickstart/tutorial/updates-data3.json` file. The task combines data from the two input sources, then overwrites the original data with the new combined data.
-
-Submit that task:
-
-```bash
-bin/post-index-task --file quickstart/tutorial/updates-append-index.json --url http://localhost:8081
+
+ View the results
+
+| `__time` | `animal` | `number`|
+| -- | -- | -- |
+| `2024-01-02T01:01:35.000Z`| `lion`| 300 |
+| `2024-01-02T05:01:35.000Z`| `mongoose`| 737 |
+| `2024-01-02T06:01:35.000Z`| `snake`| 1234 |
+| `2024-01-02T07:01:35.000Z`| `octopus`| 115 |
+| `2024-01-03T01:01:35.000Z`| `opossum`| 300 |
+| `2024-01-03T05:01:35.000Z`| `skunk`| 737 |
+| `2024-01-03T06:01:35.000Z`| `iguana`| 1234 |
+| `2024-01-03T07:01:35.000Z`| `seahorse`| 115 |
+
+
+
+Note that the values in the `__time` column have changed to one day later.
+
+## Overwrite records for a specific time range
+
+You can use the REPLACE function to overwrite a specific time range of a datasource. When you overwrite a specific time range, that time range must align with the granularity specified in the PARTITIONED BY clause.
+
+In the web console, open a new tab and run the following query to insert a new row and update specific rows. Note that the OVERWRITE WHERE clause tells the query to only update records for the date 2024-01-03.
+
+```sql
+REPLACE INTO "update_tutorial"
+ OVERWRITE WHERE "__time" >= TIMESTAMP'2024-01-03 00:00:00' AND "__time" < TIMESTAMP'2024-01-04 00:00:00'
+WITH "ext" AS (SELECT *
+FROM TABLE(
+ EXTERN(
+ '{"type":"inline","data":"{\"timestamp\":\"2024-01-03T01:01:35Z\",\"animal\":\"tiger\", \"number\":300}\n{\"timestamp\":\"2024-01-03T07:01:35Z\",\"animal\":\"seahorse\", \"number\":500}\n{\"timestamp\":\"2024-01-03T05:01:35Z\",\"animal\":\"polecat\", \"number\":626}\n{\"timestamp\":\"2024-01-03T06:01:35Z\",\"animal\":\"iguana\", \"number\":300}\n{\"timestamp\":\"2024-01-03T01:01:35Z\",\"animal\":\"flamingo\", \"number\":999}"}',
+ '{"type":"json"}'
+ )
+) EXTEND ("timestamp" VARCHAR, "animal" VARCHAR, "number" BIGINT))
+SELECT
+ TIME_PARSE("timestamp") AS "__time",
+ "animal",
+ "number"
+FROM "ext"
+PARTITIONED BY DAY
```
-When Druid finishes loading the new segment from this overwrite task, it adds the new rows to the datasource.
-Run the SELECT query again. Druid automatically rolls up the data at ingestion time, aggregating the data in the `lion` row:
-
-```bash
-dsql> SELECT * FROM "updates-tutorial";
-┌──────────────────────────┬──────────┬───────┬────────┐
-│ __time │ animal │ count │ number │
-├──────────────────────────┼──────────┼───────┼────────┤
-│ 2018-01-01T01:01:00.000Z │ lion │ 2 │ 400 │
-│ 2018-01-01T03:01:00.000Z │ aardvark │ 1 │ 9999 │
-│ 2018-01-01T04:01:00.000Z │ bear │ 1 │ 111 │
-│ 2018-01-01T05:01:00.000Z │ mongoose │ 1 │ 737 │
-│ 2018-01-01T06:01:00.000Z │ snake │ 1 │ 1234 │
-│ 2018-01-01T07:01:00.000Z │ octopus │ 1 │ 115 │
-└──────────────────────────┴──────────┴───────┴────────┘
-Retrieved 6 rows in 0.02s.
+
+ View the results
+
+| `__time` | `animal` | `number`|
+| -- | -- | -- |
+| `2024-01-02T01:01:35.000Z`| `lion`| 300 |
+| `2024-01-02T05:01:35.000Z`| `mongoose`| 737 |
+| `2024-01-02T06:01:35.000Z`| `snake`| 1234 |
+| `2024-01-02T07:01:35.000Z`| `octopus`| 115 |
+| `2024-01-03T01:01:35.000Z`| `flamingo`| 999 |
+| `2024-01-03T01:01:35.000Z`| `tiger`| 300 |
+| `2024-01-03T05:01:35.000Z`| `polecat`| 626 |
+| `2024-01-03T06:01:35.000Z`| `iguana`| 300 |
+| `2024-01-03T07:01:35.000Z`| `seahorse`| 500 |
+
+
+
+Note the changes in the resulting datasource:
+
+* There is now a new row called `flamingo`.
+* The `opossum` row has the value `tiger`.
+* The `skunk` row has the value `polecat`.
+* The `iguana` and `seahorse` rows have different numbers.
+
+## Update a row using partial segment overshadowing
+
+In Druid, you can overlay older data with newer data for the entire segment or portions of the segment within a particular partition.
+This capability is called [overshadowing](../ingestion/tasks.md#overshadowing-between-segments).
+
+You can use partial overshadowing to update a single row by adding a smaller time granularity segment on top of the existing data.
+It's a less common variation on a more common approach where you replace the entire time chunk.
+
+The following example demonstrates how update data using partial overshadowing with mixed segment granularity.
+Note the following important points about the example:
+
+* The query updates a single record for a specific `number` row.
+* The original datasource uses DAY segment granularity.
+* The new data segment is at HOUR granularity and represents a time range that's smaller than the existing data.
+* The OVERWRITE WHERE and WHERE TIME_IN_INTERVAL clauses specify the destination where the update occurs and the source of the update, respectively.
+* The query replaces everything within the specified interval. To update only a subset of data in that interval, you have to carry forward all records, changing only what you want to change. You can accomplish that by using the [CASE](../querying/sql-functions.md#case) function in the SELECT list.
+
+```sql
+REPLACE INTO "update_tutorial"
+ OVERWRITE
+ WHERE "__time" >= TIMESTAMP'2024-01-03 05:00:00' AND "__time" < TIMESTAMP'2024-01-03 06:00:00'
+SELECT
+ "__time",
+ "animal",
+ CAST(486 AS BIGINT) AS "number"
+FROM "update_tutorial"
+WHERE TIME_IN_INTERVAL("__time", '2024-01-03T05:01:35Z/PT1S')
+PARTITIONED BY FLOOR(__time TO HOUR)
```
-## Append data
+
+ View the results
-Now you append data to the datasource without changing the existing data.
-Use the ingestion spec located at `quickstart/tutorial/updates-append-index2.json`.
+| `__time` | `animal` | `number`|
+| -- | -- | -- |
+| `2024-01-02T01:01:35.000Z`| `lion`| 300 |
+| `2024-01-02T05:01:35.000Z`| `mongoose`| 737 |
+| `2024-01-02T06:01:35.000Z`| `snake`| 1234 |
+| `2024-01-02T07:01:35.000Z`| `octopus`| 115 |
+| `2024-01-03T01:01:35.000Z`| `flamingo`| 999 |
+| `2024-01-03T01:01:35.000Z`| `tiger`| 300 |
+| `2024-01-03T05:01:35.000Z`| `polecat`| 486 |
+| `2024-01-03T06:01:35.000Z`| `iguana`| 300 |
+| `2024-01-03T07:01:35.000Z`| `seahorse`| 500 |
-The spec directs Druid to ingest data from `quickstart/tutorial/updates-data4.json` and append it to the `updates-tutorial` datasource. The property `appendToExisting` is set to `true` in this spec.
+
-Submit the task:
+Note that the `number` for `polecat` has changed from 626 to 486.
-```bash
-bin/post-index-task --file quickstart/tutorial/updates-append-index2.json --url http://localhost:8081
-```
+When you perform partial segment overshadowing multiple times, you can create segment fragmentation that could affect query performance. Use [compaction](../data-management/compaction.md) to correct any fragmentation.
-Druid adds two additional rows after `octopus`. When the task completes, query the data again to see them.
-Druid doesn't roll up the new `bear` row with the existing `bear` row because it stored the new data in a separate segment.
-
-```bash
-dsql> SELECT * FROM "updates-tutorial";
-┌──────────────────────────┬──────────┬───────┬────────┐
-│ __time │ animal │ count │ number │
-├──────────────────────────┼──────────┼───────┼────────┤
-│ 2018-01-01T01:01:00.000Z │ lion │ 2 │ 400 │
-│ 2018-01-01T03:01:00.000Z │ aardvark │ 1 │ 9999 │
-│ 2018-01-01T04:01:00.000Z │ bear │ 1 │ 111 │
-│ 2018-01-01T05:01:00.000Z │ mongoose │ 1 │ 737 │
-│ 2018-01-01T06:01:00.000Z │ snake │ 1 │ 1234 │
-│ 2018-01-01T07:01:00.000Z │ octopus │ 1 │ 115 │
-│ 2018-01-01T04:01:00.000Z │ bear │ 1 │ 222 │
-│ 2018-01-01T09:01:00.000Z │ falcon │ 1 │ 1241 │
-└──────────────────────────┴──────────┴───────┴────────┘
-Retrieved 8 rows in 0.02s.
-```
+## Learn more
-Run the following groupBy query to see that the `bear` rows group together at query time:
-
-```bash
-dsql> SELECT __time, animal, SUM("count"), SUM("number") FROM "updates-tutorial" GROUP BY __time, animal;
-┌──────────────────────────┬──────────┬────────┬────────┐
-│ __time │ animal │ EXPR$2 │ EXPR$3 │
-├──────────────────────────┼──────────┼────────┼────────┤
-│ 2018-01-01T01:01:00.000Z │ lion │ 2 │ 400 │
-│ 2018-01-01T03:01:00.000Z │ aardvark │ 1 │ 9999 │
-│ 2018-01-01T04:01:00.000Z │ bear │ 2 │ 333 │
-│ 2018-01-01T05:01:00.000Z │ mongoose │ 1 │ 737 │
-│ 2018-01-01T06:01:00.000Z │ snake │ 1 │ 1234 │
-│ 2018-01-01T07:01:00.000Z │ octopus │ 1 │ 115 │
-│ 2018-01-01T09:01:00.000Z │ falcon │ 1 │ 1241 │
-└──────────────────────────┴──────────┴────────┴────────┘
-Retrieved 7 rows in 0.23s.
-```
+See the following topics for more information:
+
+* [Data updates](../data-management/update.md) for an overview of updating data in Druid.
+* [Load files with SQL-based ingestion](../tutorials/tutorial-msq-extern.md) for generating a query that references externally hosted data.
+* [Overwrite data with REPLACE](../multi-stage-query/concepts.md#overwrite-data-with-replace) for details on how the MSQ task engine executes SQL REPLACE queries.
\ No newline at end of file
diff --git a/extensions-contrib/compressed-bigdecimal/pom.xml b/extensions-contrib/compressed-bigdecimal/pom.xml
index 5dfaf7c38e17..c70fb2251cc7 100644
--- a/extensions-contrib/compressed-bigdecimal/pom.xml
+++ b/extensions-contrib/compressed-bigdecimal/pom.xml
@@ -63,6 +63,36 @@
+
+ junit
+ junit
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-migrationsupport
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+
+
+ org.junit.vintage
+ junit-vintage-engine
+ test
+ org.apache.druiddruid-processing
@@ -91,11 +121,6 @@
test-jartest
-
- junit
- junit
- test
- org.hamcrestjava-hamcrest
diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalMaxSqlAggregatorTest.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalMaxSqlAggregatorTest.java
index 06709040b645..1223135ab27d 100644
--- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalMaxSqlAggregatorTest.java
+++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalMaxSqlAggregatorTest.java
@@ -19,11 +19,17 @@
package org.apache.druid.compressedbigdecimal;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+
public class CompressedBigDecimalMaxSqlAggregatorTest extends CompressedBigDecimalSqlAggregatorTestBase
{
private static final String FUNCTION_NAME = CompressedBigDecimalMaxSqlAggregator.NAME;
@Override
+ @Test
public void testCompressedBigDecimalAggWithNumberParse()
{
testCompressedBigDecimalAggWithNumberParseHelper(
@@ -34,15 +40,19 @@ public void testCompressedBigDecimalAggWithNumberParse()
}
@Override
+ @Test
public void testCompressedBigDecimalAggWithStrictNumberParse()
{
- testCompressedBigDecimalAggWithStrictNumberParseHelper(
- FUNCTION_NAME,
- CompressedBigDecimalMaxAggregatorFactory::new
- );
+ assertThrows(NumberFormatException.class, () -> {
+ testCompressedBigDecimalAggWithStrictNumberParseHelper(
+ FUNCTION_NAME,
+ CompressedBigDecimalMaxAggregatorFactory::new
+ );
+ });
}
@Override
+ @Test
public void testCompressedBigDecimalAggDefaultNumberParseAndCustomSizeAndScale()
{
testCompressedBigDecimalAggDefaultNumberParseAndCustomSizeAndScaleHelper(
@@ -53,6 +63,7 @@ public void testCompressedBigDecimalAggDefaultNumberParseAndCustomSizeAndScale()
}
@Override
+ @Test
public void testCompressedBigDecimalAggDefaultScale()
{
testCompressedBigDecimalAggDefaultScaleHelper(
@@ -63,6 +74,7 @@ public void testCompressedBigDecimalAggDefaultScale()
}
@Override
+ @Test
public void testCompressedBigDecimalAggDefaultSizeAndScale()
{
testCompressedBigDecimalAggDefaultSizeAndScaleHelper(
diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalMinSqlAggregatorTest.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalMinSqlAggregatorTest.java
index 3a18dd49652b..e739b928f2e0 100644
--- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalMinSqlAggregatorTest.java
+++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalMinSqlAggregatorTest.java
@@ -19,11 +19,16 @@
package org.apache.druid.compressedbigdecimal;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.Assert.assertThrows;
+
public class CompressedBigDecimalMinSqlAggregatorTest extends CompressedBigDecimalSqlAggregatorTestBase
{
private static final String FUNCTION_NAME = CompressedBigDecimalMinSqlAggregator.NAME;
@Override
+ @Test
public void testCompressedBigDecimalAggWithNumberParse()
{
testCompressedBigDecimalAggWithNumberParseHelper(
@@ -34,15 +39,19 @@ public void testCompressedBigDecimalAggWithNumberParse()
}
@Override
+ @Test
public void testCompressedBigDecimalAggWithStrictNumberParse()
{
- testCompressedBigDecimalAggWithStrictNumberParseHelper(
- FUNCTION_NAME,
- CompressedBigDecimalMinAggregatorFactory::new
- );
+ assertThrows(NumberFormatException.class, () -> {
+ testCompressedBigDecimalAggWithStrictNumberParseHelper(
+ FUNCTION_NAME,
+ CompressedBigDecimalMinAggregatorFactory::new
+ );
+ });
}
@Override
+ @Test
public void testCompressedBigDecimalAggDefaultNumberParseAndCustomSizeAndScale()
{
testCompressedBigDecimalAggDefaultNumberParseAndCustomSizeAndScaleHelper(
@@ -53,6 +62,7 @@ public void testCompressedBigDecimalAggDefaultNumberParseAndCustomSizeAndScale()
}
@Override
+ @Test
public void testCompressedBigDecimalAggDefaultScale()
{
testCompressedBigDecimalAggDefaultScaleHelper(
@@ -63,6 +73,7 @@ public void testCompressedBigDecimalAggDefaultScale()
}
@Override
+ @Test
public void testCompressedBigDecimalAggDefaultSizeAndScale()
{
testCompressedBigDecimalAggDefaultSizeAndScaleHelper(
diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java
index 2acc0d88e8db..baa9c0f32e8c 100644
--- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java
+++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSqlAggregatorTestBase.java
@@ -48,9 +48,8 @@
import org.apache.druid.sql.calcite.util.TestDataBuilder;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
@@ -80,11 +79,11 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
final QueryRunnerFactoryConglomerate conglomerate,
final JoinableFactoryWrapper joinableFactory,
final Injector injector
- ) throws IOException
+ )
{
QueryableIndex index =
IndexBuilder.create()
- .tmpDir(temporaryFolder.newFolder())
+ .tmpDir(newTempFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(
new IncrementalIndexSchema.Builder()
@@ -120,7 +119,8 @@ public void configureJsonMapper(ObjectMapper objectMapper)
@Test
public abstract void testCompressedBigDecimalAggWithNumberParse();
- @Test(expected = NumberFormatException.class)
+ // expected: NumberFormatException.class
+ @Test
public abstract void testCompressedBigDecimalAggWithStrictNumberParse();
@Test
diff --git a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSumSqlAggregatorTest.java b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSumSqlAggregatorTest.java
index db248ef67f0e..8970f002ffe1 100644
--- a/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSumSqlAggregatorTest.java
+++ b/extensions-contrib/compressed-bigdecimal/src/test/java/org/apache/druid/compressedbigdecimal/CompressedBigDecimalSumSqlAggregatorTest.java
@@ -19,11 +19,16 @@
package org.apache.druid.compressedbigdecimal;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.Assert.assertThrows;
+
public class CompressedBigDecimalSumSqlAggregatorTest extends CompressedBigDecimalSqlAggregatorTestBase
{
private static final String FUNCTION_NAME = CompressedBigDecimalSumSqlAggregator.NAME;
@Override
+ @Test
public void testCompressedBigDecimalAggWithNumberParse()
{
testCompressedBigDecimalAggWithNumberParseHelper(
@@ -34,15 +39,19 @@ public void testCompressedBigDecimalAggWithNumberParse()
}
@Override
+ @Test
public void testCompressedBigDecimalAggWithStrictNumberParse()
{
- testCompressedBigDecimalAggWithStrictNumberParseHelper(
- FUNCTION_NAME,
- CompressedBigDecimalSumAggregatorFactory::new
- );
+ assertThrows(NumberFormatException.class, () -> {
+ testCompressedBigDecimalAggWithStrictNumberParseHelper(
+ FUNCTION_NAME,
+ CompressedBigDecimalSumAggregatorFactory::new
+ );
+ });
}
@Override
+ @Test
public void testCompressedBigDecimalAggDefaultNumberParseAndCustomSizeAndScale()
{
testCompressedBigDecimalAggDefaultNumberParseAndCustomSizeAndScaleHelper(
@@ -53,6 +62,7 @@ public void testCompressedBigDecimalAggDefaultNumberParseAndCustomSizeAndScale()
}
@Override
+ @Test
public void testCompressedBigDecimalAggDefaultScale()
{
testCompressedBigDecimalAggDefaultScaleHelper(
@@ -63,6 +73,7 @@ public void testCompressedBigDecimalAggDefaultScale()
}
@Override
+ @Test
public void testCompressedBigDecimalAggDefaultSizeAndScale()
{
testCompressedBigDecimalAggDefaultSizeAndScaleHelper(
diff --git a/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisor.java b/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisor.java
index 333868780157..7e0eaf60d836 100644
--- a/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisor.java
+++ b/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisor.java
@@ -27,6 +27,8 @@
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningScheduledExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.error.EntryAlreadyExists;
import org.apache.druid.indexer.TaskStatus;
import org.apache.druid.indexing.common.task.HadoopIndexTask;
import org.apache.druid.indexing.overlord.DataSourceMetadata;
@@ -47,7 +49,6 @@
import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.java.util.emitter.EmittingLogger;
-import org.apache.druid.metadata.EntryExistsException;
import org.apache.druid.metadata.MetadataSupervisorManager;
import org.apache.druid.metadata.SqlSegmentsMetadataManager;
import org.apache.druid.timeline.DataSegment;
@@ -443,8 +444,15 @@ private void submitTasks(
runningTasks.put(entry.getKey(), task);
}
}
- catch (EntryExistsException e) {
- log.error("task %s already exsits", task);
+ catch (DruidException e) {
+ if (EntryAlreadyExists.ERROR_CODE.equals(e.getErrorCode())) {
+ log.error("Task[%s] already exists", task.getId());
+ } else {
+ throw e;
+ }
+ }
+ catch (RuntimeException e) {
+ throw e;
}
catch (Exception e) {
throw new RuntimeException(e);
diff --git a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java
index 1e74180ae69f..64070b11dc87 100644
--- a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java
+++ b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java
@@ -25,8 +25,10 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Sets;
+import junit.framework.AssertionFailedError;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.StringDimensionSchema;
+import org.apache.druid.error.EntryAlreadyExists;
import org.apache.druid.indexer.HadoopIOConfig;
import org.apache.druid.indexer.HadoopIngestionSpec;
import org.apache.druid.indexer.HadoopTuningConfig;
@@ -37,6 +39,7 @@
import org.apache.druid.indexing.overlord.TaskQueue;
import org.apache.druid.indexing.overlord.TaskStorage;
import org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig;
+import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator;
@@ -58,11 +61,12 @@
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
-import org.junit.rules.ExpectedException;
import java.io.IOException;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -71,10 +75,9 @@
public class MaterializedViewSupervisorTest
{
@Rule
- public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = new TestDerbyConnector.DerbyConnectorRule();
+ public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule
+ = new TestDerbyConnector.DerbyConnectorRule();
- @Rule
- public final ExpectedException expectedException = ExpectedException.none();
private TaskStorage taskStorage;
private TaskMaster taskMaster;
private IndexerMetadataStorageCoordinator indexerMetadataStorageCoordinator;
@@ -102,7 +105,7 @@ public void setUp()
metadataSupervisorManager = EasyMock.createMock(MetadataSupervisorManager.class);
sqlSegmentsMetadataManager = EasyMock.createMock(SqlSegmentsMetadataManager.class);
taskQueue = EasyMock.createMock(TaskQueue.class);
- taskQueue.start();
+
objectMapper.registerSubtypes(new NamedType(HashBasedNumberedShardSpec.class, "hashed"));
spec = new MaterializedViewSupervisorSpec(
"base",
@@ -113,7 +116,7 @@ public void setUp()
null,
null,
null,
- null,
+ Collections.singletonMap("maxTaskCount", 2),
false,
objectMapper,
taskMaster,
@@ -133,125 +136,81 @@ public void setUp()
@Test
public void testCheckSegments() throws IOException
{
- Set baseSegments = Sets.newHashSet(
- new DataSegment(
- "base",
- Intervals.of("2015-01-01T00Z/2015-01-02T00Z"),
- "2015-01-02",
- ImmutableMap.of(),
- ImmutableList.of("dim1", "dim2"),
- ImmutableList.of("m1"),
- new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null),
- 9,
- 1024
- ),
- new DataSegment(
- "base",
- Intervals.of("2015-01-02T00Z/2015-01-03T00Z"),
- "2015-01-03",
- ImmutableMap.of(),
- ImmutableList.of("dim1", "dim2"),
- ImmutableList.of("m1"),
- new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null),
- 9,
- 1024
- ),
- new DataSegment(
- "base",
- Intervals.of("2015-01-03T00Z/2015-01-04T00Z"),
- "2015-01-04",
- ImmutableMap.of(),
- ImmutableList.of("dim1", "dim2"),
- ImmutableList.of("m1"),
- new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null),
- 9,
- 1024
- )
- );
- Set derivativeSegments = Sets.newHashSet(
- new DataSegment(
- derivativeDatasourceName,
- Intervals.of("2015-01-01T00Z/2015-01-02T00Z"),
- "2015-01-02",
- ImmutableMap.of(),
- ImmutableList.of("dim1", "dim2"),
- ImmutableList.of("m1"),
- new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null),
- 9,
- 1024
- ),
- new DataSegment(
- derivativeDatasourceName,
- Intervals.of("2015-01-02T00Z/2015-01-03T00Z"),
- "3015-01-01",
- ImmutableMap.of(),
- ImmutableList.of("dim1", "dim2"),
- ImmutableList.of("m1"),
- new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null),
- 9,
- 1024
- )
+ List baseSegments = createBaseSegments();
+ Set derivativeSegments = Sets.newHashSet(createDerivativeSegments());
+
+ final Interval day1 = baseSegments.get(0).getInterval();
+ final Interval day2 = new Interval(day1.getStart().plusDays(1), day1.getEnd().plusDays(1));
+
+ indexerMetadataStorageCoordinator.commitSegments(new HashSet<>(baseSegments));
+ indexerMetadataStorageCoordinator.commitSegments(derivativeSegments);
+ EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
+ EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
+ EasyMock.expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes();
+
+ Pair, Map>> toBuildInterval
+ = supervisor.checkSegments();
+
+ Map> expectedSegments = ImmutableMap.of(
+ day1, Collections.singletonList(baseSegments.get(0)),
+ day2, Collections.singletonList(baseSegments.get(1))
);
+ Assert.assertEquals(Collections.singleton(day1), toBuildInterval.lhs.keySet());
+ Assert.assertEquals(expectedSegments, toBuildInterval.rhs);
+ }
+
+ @Test
+ public void testSubmitTasksDoesNotFailIfTaskAlreadyExists() throws IOException
+ {
+ Set baseSegments = Sets.newHashSet(createBaseSegments());
+ Set derivativeSegments = Sets.newHashSet(createDerivativeSegments());
+
indexerMetadataStorageCoordinator.commitSegments(baseSegments);
indexerMetadataStorageCoordinator.commitSegments(derivativeSegments);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
EasyMock.expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes();
- Pair, Map>> toBuildInterval = supervisor.checkSegments();
- Set expectedToBuildInterval = Sets.newHashSet(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"));
- Map> expectedSegments = new HashMap<>();
- expectedSegments.put(
- Intervals.of("2015-01-01T00Z/2015-01-02T00Z"),
- Collections.singletonList(
- new DataSegment(
- "base",
- Intervals.of("2015-01-01T00Z/2015-01-02T00Z"),
- "2015-01-02",
- ImmutableMap.of(),
- ImmutableList.of("dim1", "dim2"),
- ImmutableList.of("m1"),
- new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null),
- 9,
- 1024
- )
- )
- );
- expectedSegments.put(
- Intervals.of("2015-01-02T00Z/2015-01-03T00Z"),
- Collections.singletonList(
- new DataSegment(
- "base",
- Intervals.of("2015-01-02T00Z/2015-01-03T00Z"),
- "2015-01-03",
- ImmutableMap.of(),
- ImmutableList.of("dim1", "dim2"),
- ImmutableList.of("m1"),
- new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null),
- 9,
- 1024
- )
- )
+
+ EasyMock.expect(taskQueue.add(EasyMock.anyObject()))
+ .andThrow(EntryAlreadyExists.exception("Task ID already exists"));
+
+ EasyMock.replay(taskMaster, taskStorage, taskQueue);
+
+ supervisor.checkSegmentsAndSubmitTasks();
+
+ EasyMock.verify(taskMaster, taskStorage, taskQueue);
+ }
+
+ @Test
+ public void testSubmitTasksFailsIfTaskCannotBeAdded() throws IOException
+ {
+ Set baseSegments = Sets.newHashSet(createBaseSegments());
+ Set derivativeSegments = Sets.newHashSet(createDerivativeSegments());
+
+ indexerMetadataStorageCoordinator.commitSegments(baseSegments);
+ indexerMetadataStorageCoordinator.commitSegments(derivativeSegments);
+ EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
+ EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
+ EasyMock.expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes();
+
+ EasyMock.expect(taskQueue.add(EasyMock.anyObject()))
+ .andThrow(new ISE("Could not add task"));
+
+ EasyMock.replay(taskMaster, taskStorage, taskQueue);
+
+ ISE exception = Assert.assertThrows(
+ ISE.class,
+ () -> supervisor.checkSegmentsAndSubmitTasks()
);
- Assert.assertEquals(expectedToBuildInterval, toBuildInterval.lhs.keySet());
- Assert.assertEquals(expectedSegments, toBuildInterval.rhs);
+ Assert.assertEquals("Could not add task", exception.getMessage());
+
+ EasyMock.verify(taskMaster, taskStorage, taskQueue);
}
@Test
public void testCheckSegmentsAndSubmitTasks() throws IOException
{
- Set baseSegments = Sets.newHashSet(
- new DataSegment(
- "base",
- Intervals.of("2015-01-02T00Z/2015-01-03T00Z"),
- "2015-01-03",
- ImmutableMap.of(),
- ImmutableList.of("dim1", "dim2"),
- ImmutableList.of("m1"),
- new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null),
- 9,
- 1024
- )
- );
+ Set baseSegments = Collections.singleton(createBaseSegments().get(0));
indexerMetadataStorageCoordinator.commitSegments(baseSegments);
EasyMock.expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
EasyMock.expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
@@ -315,28 +274,12 @@ public void testCheckSegmentsAndSubmitTasks() throws IOException
Assert.assertEquals(expectedRunningTasks, runningTasks);
Assert.assertEquals(expectedRunningVersion, runningVersion);
-
}
- /**
- * Verifies that creating HadoopIndexTask compleates without raising exception.
- */
@Test
- public void testCreateTask()
+ public void testCreateTaskSucceeds()
{
- List baseSegments = Collections.singletonList(
- new DataSegment(
- "base",
- Intervals.of("2015-01-02T00Z/2015-01-03T00Z"),
- "2015-01-03",
- ImmutableMap.of(),
- ImmutableList.of("dim1", "dim2"),
- ImmutableList.of("m1"),
- new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null),
- 9,
- 1024
- )
- );
+ List baseSegments = createBaseSegments().subList(0, 1);
HadoopIndexTask task = spec.createTask(
Intervals.of("2015-01-02T00Z/2015-01-03T00Z"),
@@ -348,7 +291,7 @@ public void testCreateTask()
}
@Test
- public void testSuspendedDoesntRun()
+ public void testSuspendedDoesNotRun()
{
MaterializedViewSupervisorSpec suspended = new MaterializedViewSupervisorSpec(
"base",
@@ -378,10 +321,7 @@ public void testSuspendedDoesntRun()
// which will be true if truly suspended, since this is the first operation of the 'run' method otherwise
IndexerSQLMetadataStorageCoordinator mock = EasyMock.createMock(IndexerSQLMetadataStorageCoordinator.class);
EasyMock.expect(mock.retrieveDataSourceMetadata(suspended.getDataSourceName()))
- .andAnswer(() -> {
- Assert.fail();
- return null;
- })
+ .andThrow(new AssertionFailedError())
.anyTimes();
EasyMock.replay(mock);
@@ -420,4 +360,36 @@ public void testResetOffsetsNotSupported()
() -> supervisor.resetOffsets(null)
);
}
+
+ private List createBaseSegments()
+ {
+ return Arrays.asList(
+ createSegment("base", "2015-01-01T00Z/2015-01-02T00Z", "2015-01-02"),
+ createSegment("base", "2015-01-02T00Z/2015-01-03T00Z", "2015-01-03"),
+ createSegment("base", "2015-01-03T00Z/2015-01-04T00Z", "2015-01-04")
+ );
+ }
+
+ private List createDerivativeSegments()
+ {
+ return Arrays.asList(
+ createSegment(derivativeDatasourceName, "2015-01-01T00Z/2015-01-02T00Z", "2015-01-02"),
+ createSegment(derivativeDatasourceName, "2015-01-02T00Z/2015-01-03T00Z", "3015-01-01")
+ );
+ }
+
+ private DataSegment createSegment(String datasource, String interval, String version)
+ {
+ return new DataSegment(
+ datasource,
+ Intervals.of(interval),
+ version,
+ Collections.emptyMap(),
+ Arrays.asList("dim1", "dim2"),
+ Collections.singletonList("m2"),
+ new HashBasedNumberedShardSpec(0, 1, 0, 1, null, null, null),
+ 9,
+ 1024
+ );
+ }
}
diff --git a/extensions-contrib/tdigestsketch/pom.xml b/extensions-contrib/tdigestsketch/pom.xml
index 824a9af4398b..ccfa7f6fb653 100644
--- a/extensions-contrib/tdigestsketch/pom.xml
+++ b/extensions-contrib/tdigestsketch/pom.xml
@@ -139,6 +139,31 @@
junittest
+
+ org.junit.jupiter
+ junit-jupiter-api
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-migrationsupport
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+
+
+ org.junit.vintage
+ junit-vintage-engine
+ test
+ org.easymockeasymock
diff --git a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java
index de515814d239..2a3db93b81a8 100644
--- a/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java
+++ b/extensions-contrib/tdigestsketch/src/test/java/org/apache/druid/query/aggregation/tdigestsketch/sql/TDigestSketchSqlAggregatorTest.java
@@ -53,9 +53,8 @@
import org.apache.druid.sql.calcite.util.TestDataBuilder;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.io.IOException;
import java.util.List;
public class TDigestSketchSqlAggregatorTest extends BaseCalciteQueryTest
@@ -72,13 +71,13 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
final QueryRunnerFactoryConglomerate conglomerate,
final JoinableFactoryWrapper joinableFactory,
final Injector injector
- ) throws IOException
+ )
{
TDigestSketchModule.registerSerde();
final QueryableIndex index =
IndexBuilder.create(CalciteTests.getJsonMapper())
- .tmpDir(temporaryFolder.newFolder())
+ .tmpDir(newTempFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(
new IncrementalIndexSchema.Builder()
diff --git a/extensions-core/datasketches/pom.xml b/extensions-core/datasketches/pom.xml
index 4f2180f0e3b8..bdd5b0391615 100644
--- a/extensions-core/datasketches/pom.xml
+++ b/extensions-core/datasketches/pom.xml
@@ -150,6 +150,31 @@
junittest
+
+ org.junit.jupiter
+ junit-jupiter-api
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-migrationsupport
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+
+
+ org.junit.vintage
+ junit-vintage-engine
+ test
+ joda-timejoda-time
diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java
index 538ca8171808..b0eae011e2c3 100644
--- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java
+++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/hll/sql/HllSketchSqlAggregatorTest.java
@@ -86,9 +86,8 @@
import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import org.junit.Assert;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@@ -257,12 +256,12 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
final QueryRunnerFactoryConglomerate conglomerate,
final JoinableFactoryWrapper joinableFactory,
final Injector injector
- ) throws IOException
+ )
{
HllSketchModule.registerSerde();
final QueryableIndex index = IndexBuilder
.create()
- .tmpDir(temporaryFolder.newFolder())
+ .tmpDir(newTempFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(
new IncrementalIndexSchema.Builder()
diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java
index c71890c036c0..fe8680de9848 100644
--- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java
+++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/quantiles/sql/DoublesSketchSqlAggregatorTest.java
@@ -62,9 +62,8 @@
import org.apache.druid.sql.calcite.util.TestDataBuilder;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@@ -84,13 +83,13 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
final QueryRunnerFactoryConglomerate conglomerate,
final JoinableFactoryWrapper joinableFactory,
final Injector injector
- ) throws IOException
+ )
{
DoublesSketchModule.registerSerde();
final QueryableIndex index =
IndexBuilder.create(CalciteTests.getJsonMapper())
- .tmpDir(temporaryFolder.newFolder())
+ .tmpDir(newTempFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(
new IncrementalIndexSchema.Builder()
diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java
index 2650e15a04b8..19dc255ff2fe 100644
--- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java
+++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/theta/sql/ThetaSketchSqlAggregatorTest.java
@@ -70,9 +70,8 @@
import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import org.junit.Assert;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@@ -111,12 +110,12 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
final QueryRunnerFactoryConglomerate conglomerate,
final JoinableFactoryWrapper joinableFactory,
final Injector injector
- ) throws IOException
+ )
{
SketchModule.registerSerde();
final QueryableIndex index = IndexBuilder.create()
- .tmpDir(temporaryFolder.newFolder())
+ .tmpDir(newTempFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(
new IncrementalIndexSchema.Builder()
diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/sql/ArrayOfDoublesSketchSqlAggregatorTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/sql/ArrayOfDoublesSketchSqlAggregatorTest.java
index 134c2c76e4ff..0ed01f3f4117 100644
--- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/sql/ArrayOfDoublesSketchSqlAggregatorTest.java
+++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/sql/ArrayOfDoublesSketchSqlAggregatorTest.java
@@ -52,9 +52,8 @@
import org.apache.druid.sql.calcite.util.TestDataBuilder;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
@@ -111,12 +110,12 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
final QueryRunnerFactoryConglomerate conglomerate,
final JoinableFactoryWrapper joinableFactory,
final Injector injector
- ) throws IOException
+ )
{
ArrayOfDoublesSketchModule.registerSerde();
final QueryableIndex index = IndexBuilder.create()
- .tmpDir(temporaryFolder.newFolder())
+ .tmpDir(newTempFolder())
.segmentWriteOutMediumFactory(
OffHeapMemorySegmentWriteOutMediumFactory.instance()
)
diff --git a/extensions-core/druid-bloom-filter/pom.xml b/extensions-core/druid-bloom-filter/pom.xml
index 6106ba74ca2c..fcb6905b195d 100644
--- a/extensions-core/druid-bloom-filter/pom.xml
+++ b/extensions-core/druid-bloom-filter/pom.xml
@@ -109,6 +109,36 @@
+
+ junit
+ junit
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-migrationsupport
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+
+
+ org.junit.vintage
+ junit-vintage-engine
+ test
+ org.apache.druiddruid-processing
@@ -130,11 +160,6 @@
testtest-jar
-
- junit
- junit
- test
- org.easymockeasymock
diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java
index 8dcab824abf7..8d00b5b559a6 100644
--- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java
+++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/aggregation/bloom/sql/BloomFilterSqlAggregatorTest.java
@@ -54,9 +54,8 @@
import org.apache.druid.sql.calcite.util.TestDataBuilder;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.io.IOException;
import java.util.List;
public class BloomFilterSqlAggregatorTest extends BaseCalciteQueryTest
@@ -77,11 +76,11 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
final QueryRunnerFactoryConglomerate conglomerate,
final JoinableFactoryWrapper joinableFactory,
final Injector injector
- ) throws IOException
+ )
{
final QueryableIndex index =
IndexBuilder.create()
- .tmpDir(temporaryFolder.newFolder())
+ .tmpDir(newTempFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(
new IncrementalIndexSchema.Builder()
diff --git a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java
index 15152694ce4d..49632c79a81c 100644
--- a/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java
+++ b/extensions-core/druid-bloom-filter/src/test/java/org/apache/druid/query/filter/sql/BloomDimFilterSqlTest.java
@@ -39,8 +39,8 @@
import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests;
import org.apache.druid.sql.http.SqlParameter;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
import java.io.IOException;
@@ -219,7 +219,7 @@ public void testBloomFilters() throws IOException
);
}
- @Ignore("this test is really slow and is intended to use for comparisons with testBloomFilterBigParameter")
+ @Disabled("this test is really slow and is intended to use for comparisons with testBloomFilterBigParameter")
@Test
public void testBloomFilterBigNoParam() throws IOException
{
@@ -247,7 +247,7 @@ public void testBloomFilterBigNoParam() throws IOException
);
}
- @Ignore("this test is for comparison with testBloomFilterBigNoParam")
+ @Disabled("this test is for comparison with testBloomFilterBigNoParam")
@Test
public void testBloomFilterBigParameter() throws IOException
{
diff --git a/extensions-core/druid-catalog/pom.xml b/extensions-core/druid-catalog/pom.xml
index 5c4614573fdb..6e16ff62ccc4 100644
--- a/extensions-core/druid-catalog/pom.xml
+++ b/extensions-core/druid-catalog/pom.xml
@@ -148,13 +148,38 @@
- org.easymock
- easymock
+ junit
+ junittest
- junit
- junit
+ org.junit.jupiter
+ junit-jupiter-api
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-migrationsupport
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+
+
+ org.junit.vintage
+ junit-vintage-engine
+ test
+
+
+ org.easymock
+ easymocktest
diff --git a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogInsertTest.java b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogInsertTest.java
index 22a0ca116ed4..d4a97e666ed4 100644
--- a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogInsertTest.java
+++ b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogInsertTest.java
@@ -28,12 +28,12 @@
import org.apache.druid.catalog.storage.CatalogTests;
import org.apache.druid.catalog.sync.CachedMetadataCatalog;
import org.apache.druid.catalog.sync.MetadataCatalog;
-import org.apache.druid.metadata.TestDerbyConnector;
+import org.apache.druid.metadata.TestDerbyConnector.DerbyConnectorRule5;
import org.apache.druid.sql.calcite.CalciteCatalogInsertTest;
import org.apache.druid.sql.calcite.planner.CatalogResolver;
import org.apache.druid.sql.calcite.table.DatasourceTable;
import org.apache.druid.sql.calcite.util.SqlTestFramework;
-import org.junit.ClassRule;
+import org.junit.jupiter.api.extension.RegisterExtension;
import static org.junit.Assert.fail;
@@ -42,9 +42,8 @@
*/
public class CatalogInsertTest extends CalciteCatalogInsertTest
{
- @ClassRule
- public static final TestDerbyConnector.DerbyConnectorRule DERBY_CONNECTION_RULE =
- new TestDerbyConnector.DerbyConnectorRule();
+ @RegisterExtension
+ public static final DerbyConnectorRule5 DERBY_CONNECTION_RULE = new DerbyConnectorRule5();
private static CatalogStorage storage;
diff --git a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogQueryTest.java b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogQueryTest.java
index 2ee9041fc929..b947c2e2c96d 100644
--- a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogQueryTest.java
+++ b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogQueryTest.java
@@ -27,14 +27,14 @@
import org.apache.druid.catalog.storage.CatalogTests;
import org.apache.druid.catalog.sync.CachedMetadataCatalog;
import org.apache.druid.catalog.sync.MetadataCatalog;
-import org.apache.druid.metadata.TestDerbyConnector;
+import org.apache.druid.metadata.TestDerbyConnector.DerbyConnectorRule5;
import org.apache.druid.sql.calcite.BaseCalciteQueryTest;
import org.apache.druid.sql.calcite.SqlSchema;
import org.apache.druid.sql.calcite.planner.CatalogResolver;
import org.apache.druid.sql.calcite.util.SqlTestFramework;
-import org.junit.After;
-import org.junit.Rule;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.RegisterExtension;
import java.util.Arrays;
import java.util.Collections;
@@ -43,8 +43,8 @@
public class CatalogQueryTest extends BaseCalciteQueryTest
{
- @Rule
- public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = new TestDerbyConnector.DerbyConnectorRule();
+ @RegisterExtension
+ public static final DerbyConnectorRule5 DERBY_CONNECTION_RULE = new DerbyConnectorRule5();
private CatalogTests.DbFixture dbFixture;
private CatalogStorage storage;
@@ -70,7 +70,7 @@ public void testCatalogSchema()
.run();
}
- @After
+ @AfterEach
public void catalogTearDown()
{
CatalogTests.tearDown(dbFixture);
@@ -79,7 +79,7 @@ public void catalogTearDown()
@Override
public CatalogResolver createCatalogResolver()
{
- dbFixture = new CatalogTests.DbFixture(derbyConnectorRule);
+ dbFixture = new CatalogTests.DbFixture(DERBY_CONNECTION_RULE);
storage = dbFixture.storage;
MetadataCatalog catalog = new CachedMetadataCatalog(
storage,
diff --git a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogReplaceTest.java b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogReplaceTest.java
index da86934403b5..34011fb2205f 100644
--- a/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogReplaceTest.java
+++ b/extensions-core/druid-catalog/src/test/java/org/apache/druid/catalog/sql/CatalogReplaceTest.java
@@ -28,12 +28,12 @@
import org.apache.druid.catalog.storage.CatalogTests;
import org.apache.druid.catalog.sync.CachedMetadataCatalog;
import org.apache.druid.catalog.sync.MetadataCatalog;
-import org.apache.druid.metadata.TestDerbyConnector;
+import org.apache.druid.metadata.TestDerbyConnector.DerbyConnectorRule5;
import org.apache.druid.sql.calcite.CalciteCatalogReplaceTest;
import org.apache.druid.sql.calcite.planner.CatalogResolver;
import org.apache.druid.sql.calcite.table.DatasourceTable;
import org.apache.druid.sql.calcite.util.SqlTestFramework;
-import org.junit.ClassRule;
+import org.junit.jupiter.api.extension.RegisterExtension;
import static org.junit.Assert.fail;
@@ -42,10 +42,8 @@
*/
public class CatalogReplaceTest extends CalciteCatalogReplaceTest
{
- @ClassRule
- public static final TestDerbyConnector.DerbyConnectorRule DERBY_CONNECTION_RULE =
- new TestDerbyConnector.DerbyConnectorRule();
-
+ @RegisterExtension
+ public static final DerbyConnectorRule5 DERBY_CONNECTION_RULE = new DerbyConnectorRule5();
private static CatalogStorage storage;
@Override
diff --git a/extensions-core/hdfs-storage/pom.xml b/extensions-core/hdfs-storage/pom.xml
index e46044455773..56f37a8e29c6 100644
--- a/extensions-core/hdfs-storage/pom.xml
+++ b/extensions-core/hdfs-storage/pom.xml
@@ -177,9 +177,21 @@
runtime
- log4j
- log4j
- 1.2.17
+ org.apache.logging.log4j
+ log4j-api
+ ${log4j.version}
+ test
+
+
+ org.apache.logging.log4j
+ log4j-core
+ ${log4j.version}
+ test
+
+
+ org.apache.logging.log4j
+ log4j-1.2-api
+ ${log4j.version}test
diff --git a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java
index 918f051d2243..10cff01c2b9f 100644
--- a/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java
+++ b/extensions-core/hdfs-storage/src/test/java/org/apache/druid/inputsource/hdfs/HdfsInputSourceTest.java
@@ -53,10 +53,8 @@
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
-import org.junit.experimental.runners.Enclosed;
import org.junit.rules.ExpectedException;
import org.junit.rules.TemporaryFolder;
-import org.junit.runner.RunWith;
import java.io.BufferedWriter;
import java.io.File;
@@ -76,7 +74,6 @@
import java.util.stream.Collectors;
import java.util.stream.IntStream;
-@RunWith(Enclosed.class)
public class HdfsInputSourceTest extends InitializedNullHandlingTest
{
private static final String PATH = "hdfs://localhost:7020/foo/bar";
diff --git a/extensions-core/histogram/pom.xml b/extensions-core/histogram/pom.xml
index 787571ccef76..936b592614c5 100644
--- a/extensions-core/histogram/pom.xml
+++ b/extensions-core/histogram/pom.xml
@@ -93,6 +93,36 @@
+
+ junit
+ junit
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-migrationsupport
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+
+
+ org.junit.vintage
+ junit-vintage-engine
+ test
+ org.apache.druiddruid-processing
@@ -114,11 +144,6 @@
testtest-jar
-
- junit
- junit
- test
- org.easymockeasymock
diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java
index 75a29ab4f2f5..bb6a7e82ba9d 100644
--- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java
+++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/FixedBucketsHistogramQuantileSqlAggregatorTest.java
@@ -56,9 +56,8 @@
import org.apache.druid.sql.calcite.util.TestDataBuilder;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.io.IOException;
import java.util.List;
public class FixedBucketsHistogramQuantileSqlAggregatorTest extends BaseCalciteQueryTest
@@ -75,12 +74,12 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
final QueryRunnerFactoryConglomerate conglomerate,
final JoinableFactoryWrapper joinableFactory,
final Injector injector
- ) throws IOException
+ )
{
ApproximateHistogramDruidModule.registerSerde();
final QueryableIndex index = IndexBuilder.create(CalciteTests.getJsonMapper())
- .tmpDir(temporaryFolder.newFolder())
+ .tmpDir(newTempFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(
new IncrementalIndexSchema.Builder()
diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java
index 3ee18f886f00..a14be3162e5b 100644
--- a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java
+++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/sql/QuantileSqlAggregatorTest.java
@@ -55,9 +55,8 @@
import org.apache.druid.sql.calcite.util.TestDataBuilder;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.LinearShardSpec;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
-import java.io.IOException;
import java.util.List;
public class QuantileSqlAggregatorTest extends BaseCalciteQueryTest
@@ -74,12 +73,12 @@ public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker(
final QueryRunnerFactoryConglomerate conglomerate,
final JoinableFactoryWrapper joinableFactory,
final Injector injector
- ) throws IOException
+ )
{
ApproximateHistogramDruidModule.registerSerde();
final QueryableIndex index = IndexBuilder.create(CalciteTests.getJsonMapper())
- .tmpDir(temporaryFolder.newFolder())
+ .tmpDir(newTempFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(
new IncrementalIndexSchema.Builder()
diff --git a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorTest.java b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorTest.java
index a2f3e98deca0..a4ab6a30a808 100644
--- a/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorTest.java
+++ b/extensions-core/kinesis-indexing-service/src/test/java/org/apache/druid/indexing/kinesis/supervisor/KinesisSupervisorTest.java
@@ -79,7 +79,6 @@
import org.apache.druid.java.util.emitter.service.AlertEvent;
import org.apache.druid.java.util.metrics.DruidMonitorSchedulerConfig;
import org.apache.druid.java.util.metrics.StubServiceEmitter;
-import org.apache.druid.metadata.EntryExistsException;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.segment.TestHelper;
@@ -3744,7 +3743,7 @@ public void testGetCurrentTotalStats()
@Test
public void testDoNotKillCompatibleTasks()
- throws InterruptedException, EntryExistsException
+ throws InterruptedException
{
// This supervisor always returns true for isTaskCurrent -> it should not kill its tasks
int numReplicas = 2;
@@ -3840,8 +3839,7 @@ public void testDoNotKillCompatibleTasks()
}
@Test
- public void testKillIncompatibleTasks()
- throws InterruptedException, EntryExistsException
+ public void testKillIncompatibleTasks() throws InterruptedException
{
// This supervisor always returns false for isTaskCurrent -> it should kill its tasks
int numReplicas = 2;
diff --git a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/NamespaceLookupIntrospectHandler.java b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/NamespaceLookupIntrospectHandler.java
index 889467324da3..d95361a1bdf3 100644
--- a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/NamespaceLookupIntrospectHandler.java
+++ b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/NamespaceLookupIntrospectHandler.java
@@ -20,8 +20,8 @@
package org.apache.druid.query.lookup;
import com.google.common.collect.ImmutableMap;
-import org.apache.druid.common.utils.ServletResourceUtils;
import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.server.http.ServletResourceUtils;
import org.apache.druid.server.lookup.namespace.cache.CacheScheduler;
import javax.ws.rs.GET;
diff --git a/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/JdbcExtractionNamespaceUrlCheckTest.java b/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/JdbcExtractionNamespaceUrlCheckTest.java
index 178abca9c497..d2ba30b49464 100644
--- a/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/JdbcExtractionNamespaceUrlCheckTest.java
+++ b/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/JdbcExtractionNamespaceUrlCheckTest.java
@@ -25,13 +25,10 @@
import org.joda.time.Period;
import org.junit.Rule;
import org.junit.Test;
-import org.junit.experimental.runners.Enclosed;
import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
import java.util.Set;
-@RunWith(Enclosed.class)
public class JdbcExtractionNamespaceUrlCheckTest
{
private static final String TABLE_NAME = "abstractDbRenameTest";
diff --git a/extensions-core/lookups-cached-single/src/test/java/org/apache/druid/server/lookup/jdbc/JdbcDataFetcherTest.java b/extensions-core/lookups-cached-single/src/test/java/org/apache/druid/server/lookup/jdbc/JdbcDataFetcherTest.java
index 1139d4c91fc8..b437d2a3d556 100644
--- a/extensions-core/lookups-cached-single/src/test/java/org/apache/druid/server/lookup/jdbc/JdbcDataFetcherTest.java
+++ b/extensions-core/lookups-cached-single/src/test/java/org/apache/druid/server/lookup/jdbc/JdbcDataFetcherTest.java
@@ -35,9 +35,7 @@
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
-import org.junit.experimental.runners.Enclosed;
import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
import org.skife.jdbi.v2.Handle;
import java.io.IOException;
@@ -45,7 +43,6 @@
import java.util.Collections;
import java.util.Map;
-@RunWith(Enclosed.class)
public class JdbcDataFetcherTest extends InitializedNullHandlingTest
{
private static final String TABLE_NAME = "tableName";
diff --git a/extensions-core/lookups-cached-single/src/test/java/org/apache/druid/server/lookup/jdbc/JdbcDataFetcherUrlCheckTest.java b/extensions-core/lookups-cached-single/src/test/java/org/apache/druid/server/lookup/jdbc/JdbcDataFetcherUrlCheckTest.java
index 8f7f2e9d6d6a..b38875b51892 100644
--- a/extensions-core/lookups-cached-single/src/test/java/org/apache/druid/server/lookup/jdbc/JdbcDataFetcherUrlCheckTest.java
+++ b/extensions-core/lookups-cached-single/src/test/java/org/apache/druid/server/lookup/jdbc/JdbcDataFetcherUrlCheckTest.java
@@ -24,13 +24,10 @@
import org.apache.druid.server.initialization.JdbcAccessSecurityConfig;
import org.junit.Rule;
import org.junit.Test;
-import org.junit.experimental.runners.Enclosed;
import org.junit.rules.ExpectedException;
-import org.junit.runner.RunWith;
import java.util.Set;
-@RunWith(Enclosed.class)
public class JdbcDataFetcherUrlCheckTest
{
private static final String TABLE_NAME = "tableName";
diff --git a/extensions-core/multi-stage-query/pom.xml b/extensions-core/multi-stage-query/pom.xml
index 163a90ae09b9..58e1976d556c 100644
--- a/extensions-core/multi-stage-query/pom.xml
+++ b/extensions-core/multi-stage-query/pom.xml
@@ -203,11 +203,31 @@
+
+ junit
+ junit
+ test
+ org.junit.jupiterjunit-jupiter-apitest
+
+ org.junit.jupiter
+ junit-jupiter-engine
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-migrationsupport
+ test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ test
+ org.easymockeasymock
@@ -224,8 +244,13 @@
test
- junit
- junit
+ org.junit.jupiter
+ junit-jupiter
+ test
+
+
+ org.junit.vintage
+ junit-vintage-enginetest
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerClient.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerClient.java
index b3675f0e0476..afd1ece4dad1 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerClient.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerClient.java
@@ -73,8 +73,12 @@ void postWorkerError(
void postWorkerWarning(
List MSQErrorReports
) throws IOException;
+
List getTaskList() throws IOException;
+ /**
+ * Close this client. Idempotent.
+ */
@Override
void close();
}
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index 5da74f0a52a9..e9d71239940b 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -783,18 +783,10 @@ public void updatePartialKeyStatisticsInformation(
addToKernelManipulationQueue(
queryKernel -> {
final StageId stageId = queryKernel.getStageId(stageNumber);
-
- // We need a specially-decorated ObjectMapper to deserialize key statistics.
- final StageDefinition stageDef = queryKernel.getStageDefinition(stageId);
- final ObjectMapper mapper = MSQTasks.decorateObjectMapperForKeyCollectorSnapshot(
- context.jsonMapper(),
- stageDef.getShuffleSpec().clusterBy(),
- stageDef.getShuffleSpec().doesAggregate()
- );
-
final PartialKeyStatisticsInformation partialKeyStatisticsInformation;
+
try {
- partialKeyStatisticsInformation = mapper.convertValue(
+ partialKeyStatisticsInformation = context.jsonMapper().convertValue(
partialKeyStatisticsInformationObject,
PartialKeyStatisticsInformation.class
);
@@ -1913,7 +1905,8 @@ private static QueryDefinition makeQueryDefinition(
.processorFactory(new ExportResultsFrameProcessorFactory(
queryId,
exportStorageProvider,
- resultFormat
+ resultFormat,
+ columnMappings
))
);
return builder.build();
@@ -2135,9 +2128,13 @@ private static Pair, List> makeDimensio
// deprecation and removal in future
if (MultiStageQueryContext.getArrayIngestMode(query.context()) == ArrayIngestMode.MVD) {
log.warn(
- "'%s' is set to 'mvd' in the query's context. This ingests the string arrays as multi-value "
- + "strings instead of arrays, and is preserved for legacy reasons when MVDs were the only way to ingest string "
- + "arrays in Druid. It is incorrect behaviour and will likely be removed in the future releases of Druid",
+ "%s[mvd] is active for this task. This causes string arrays (VARCHAR ARRAY in SQL) to be ingested as "
+ + "multi-value strings rather than true arrays. This behavior may change in a future version of Druid. To be "
+ + "compatible with future behavior changes, we recommend setting %s to[array], which creates a clearer "
+ + "separation between multi-value strings and true arrays. In either[mvd] or[array] mode, you can write "
+ + "out multi-value string dimensions using ARRAY_TO_MV. "
+ + "See https://druid.apache.org/docs/latest/querying/arrays#arrayingestmode for more details.",
+ MultiStageQueryContext.CTX_ARRAY_INGEST_MODE,
MultiStageQueryContext.CTX_ARRAY_INGEST_MODE
);
}
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java
index 24a3fad8dbf1..2dff4419bdbc 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java
@@ -19,10 +19,8 @@
package org.apache.druid.msq.exec;
-import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.inject.Injector;
import com.google.inject.Key;
-import org.apache.druid.frame.key.ClusterBy;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.msq.guice.MultiStageQuery;
@@ -39,10 +37,6 @@
import org.apache.druid.msq.indexing.error.UnknownFault;
import org.apache.druid.msq.indexing.error.WorkerFailedFault;
import org.apache.druid.msq.indexing.error.WorkerRpcFailedFault;
-import org.apache.druid.msq.statistics.KeyCollectorFactory;
-import org.apache.druid.msq.statistics.KeyCollectorSnapshot;
-import org.apache.druid.msq.statistics.KeyCollectorSnapshotDeserializerModule;
-import org.apache.druid.msq.statistics.KeyCollectors;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.server.DruidNode;
import org.apache.druid.storage.NilStorageConnector;
@@ -125,24 +119,6 @@ public static long primaryTimestampFromObjectForInsert(final Object timestamp)
}
}
- /**
- * Returns a decorated copy of an ObjectMapper that knows how to deserialize the appropriate kind of
- * {@link KeyCollectorSnapshot}.
- */
- static ObjectMapper decorateObjectMapperForKeyCollectorSnapshot(
- final ObjectMapper mapper,
- final ClusterBy clusterBy,
- final boolean aggregate
- )
- {
- final KeyCollectorFactory, ?> keyCollectorFactory =
- KeyCollectors.makeStandardFactory(clusterBy, aggregate);
-
- final ObjectMapper mapperCopy = mapper.copy();
- mapperCopy.registerModule(new KeyCollectorSnapshotDeserializerModule(keyCollectorFactory));
- return mapperCopy;
- }
-
/**
* Returns the host:port from a {@link DruidNode}. Convenience method to make it easier to construct
* {@link MSQErrorReport} instances.
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java
index 3f2ef39b5bf7..e76169f70423 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java
@@ -498,20 +498,16 @@ public Optional runTask(final Closer closer) throws Exception
@Override
public void stopGracefully()
{
- log.info("Stopping gracefully for taskId [%s]", task.getId());
- kernelManipulationQueue.add(
- kernel -> {
- // stopGracefully() is called when the containing process is terminated, or when the task is canceled.
- throw new MSQException(CanceledFault.INSTANCE);
- }
- );
+ // stopGracefully() is called when the containing process is terminated, or when the task is canceled.
+ log.info("Worker task[%s] canceled.", task.getId());
+ doCancel();
}
@Override
public void controllerFailed()
{
- controllerAlive = false;
- stopGracefully();
+ log.info("Controller task[%s] for worker task[%s] failed. Canceling.", task.getControllerTaskId(), task.getId());
+ doCancel();
}
@Override
@@ -909,6 +905,31 @@ private void cleanStageOutput(final StageId stageId, boolean removeDurableStorag
}
}
+ /**
+ * Called by {@link #stopGracefully()} (task canceled, or containing process shut down) and
+ * {@link #controllerFailed()}.
+ */
+ private void doCancel()
+ {
+ // Set controllerAlive = false so we don't try to contact the controller after being canceled. If it canceled us,
+ // it doesn't need to know that we were canceled. If we were canceled by something else, the controller will
+ // detect this as part of its monitoring of workers.
+ controllerAlive = false;
+
+ // Close controller client to cancel any currently in-flight calls to the controller.
+ if (controllerClient != null) {
+ controllerClient.close();
+ }
+
+ // Clear the main loop event queue, then throw a CanceledFault into the loop to exit it promptly.
+ kernelManipulationQueue.clear();
+ kernelManipulationQueue.add(
+ kernel -> {
+ throw new MSQException(CanceledFault.INSTANCE);
+ }
+ );
+ }
+
/**
* Log (at DEBUG level) a string explaining the status of all work assigned to this worker.
*/
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncher.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncher.java
index a485831532f0..55ff6a3876d8 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncher.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncher.java
@@ -357,21 +357,18 @@ public Map> getWorkerStats()
final Map> workerStats = new TreeMap<>();
for (Map.Entry taskEntry : taskTrackers.entrySet()) {
+ final TaskTracker taskTracker = taskEntry.getValue();
+ final TaskStatus taskStatus = taskTracker.statusRef.get();
- TaskTracker taskTracker = taskEntry.getValue();
+ // taskStatus is null when TaskTrackers are first set up, and stay null until the first status call comes back.
+ final TaskState statusCode = taskStatus != null ? taskStatus.getStatusCode() : null;
+
+ // getDuration() returns -1 for running tasks. It's not calculated on-the-fly here since
+ // taskTracker.startTimeMillis marks task submission time rather than the actual start.
+ final long duration = taskStatus != null ? taskStatus.getDuration() : -1;
- TaskStatus taskStatus = taskTracker.statusRef.get();
workerStats.computeIfAbsent(taskTracker.workerNumber, k -> new ArrayList<>())
- .add(new WorkerStats(
- taskEntry.getKey(),
- taskStatus.getStatusCode(),
- // getDuration() returns -1 for running tasks.
- // It's not calculated on-the-fly here since
- // taskTracker.startTimeMillis marks task
- // submission time rather than the actual start.
- taskStatus.getDuration(),
- taskTracker.taskPendingTimeInMs()
- ));
+ .add(new WorkerStats(taskEntry.getKey(), statusCode, duration, taskTracker.taskPendingTimeInMs()));
}
for (List workerStatsList : workerStats.values()) {
diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java
index de65d3e9d7ad..52697578b07e 100644
--- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java
+++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessor.java
@@ -21,6 +21,8 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import it.unimi.dsi.fastutil.ints.IntSet;
+import it.unimi.dsi.fastutil.objects.Object2IntMap;
+import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import org.apache.druid.error.DruidException;
import org.apache.druid.frame.Frame;
import org.apache.druid.frame.channel.ReadableFrameChannel;
@@ -35,13 +37,14 @@
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.msq.counters.ChannelCounters;
-import org.apache.druid.msq.querykit.QueryKitUtils;
import org.apache.druid.msq.util.SequenceUtils;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.RowSignature;
+import org.apache.druid.sql.calcite.planner.ColumnMapping;
+import org.apache.druid.sql.calcite.planner.ColumnMappings;
import org.apache.druid.sql.http.ResultFormat;
import org.apache.druid.storage.StorageConnector;
@@ -60,6 +63,8 @@ public class ExportResultsFrameProcessor implements FrameProcessor