Address review comments

apache · Oct 19, 2023 · 2a8024b · 2a8024b
1 parent 50a0eb7
commit 2a8024b
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 45 deletions.
diff --git a/docs/api-reference/sql-ingestion-api.md b/docs/api-reference/sql-ingestion-api.md
@@ -596,47 +596,52 @@ The response shows an example report for a query.
 
 The following table describes the response fields when you retrieve a report for a MSQ task engine using the `/druid/indexer/v1/task/<taskId>/reports` endpoint:
 
-| Field | Description |
-|---|---|
-| `multiStageQuery.taskId` | Controller task ID. |
-| `multiStageQuery.payload.status` | Query status container. |
-| `multiStageQuery.payload.status.status` | RUNNING, SUCCESS, or FAILED. |
-| `multiStageQuery.payload.status.startTime` | Start time of the query in ISO format. Only present if the query has started running. |
-| `multiStageQuery.payload.status.durationMs` | Milliseconds elapsed after the query has started running. -1 denotes that the query hasn't started running yet. |
-| `multiStageQuery.payload.status.pendingTasks` | Number of tasks that are not fully started. -1 denotes that the number is currently unknown. |
-| `multiStageQuery.payload.status.runningTasks` | Number of currently running tasks. Should be at least 1 since the controller is included. |
-| `multiStageQuery.payload.status.segmentLoadStatus` | Segment loading container. Only present after the segments have been published. |
-| `multiStageQuery.payload.status.segmentLoadStatus.state` | Either INIT, WAITING, SUCCESS, FAILED or TIMED_OUT. |
-| `multiStageQuery.payload.status.segmentLoadStatus.startTime` | Time since which the controller has been waiting for the segments to finish loading. |
-| `multiStageQuery.payload.status.segmentLoadStatus.duration` | The duration in milliseconds that the controller has been waiting for the segments to load. |
-| `multiStageQuery.payload.status.segmentLoadStatus.totalSegments` | The total number of segments generated by the job. This includes tombstone segments (if any). |
-| `multiStageQuery.payload.status.segmentLoadStatus.usedSegments` | The number of segments which are marked as used based on the load rules. Unused segments can be cleaned up at any time. |
-| `multiStageQuery.payload.status.segmentLoadStatus.precachedSegments` | The number of segments which are marked as precached and served by historicals, as per the load rules. |
-| `multiStageQuery.payload.status.segmentLoadStatus.onDemandSegments` | The number of segments which are not loaded on any historical, as per the load rules. |
-| `multiStageQuery.payload.status.segmentLoadStatus.pendingSegments` | The number of segments remaining to be loaded. |
-| `multiStageQuery.payload.status.segmentLoadStatus.unknownSegments` | The number of segments whose status is unknown. |
-| `multiStageQuery.payload.status.errorReport` | Error object. Only present if there was an error. |
-| `multiStageQuery.payload.status.errorReport.taskId` | The task that reported the error, if known. May be a controller task or a worker task. |
-| `multiStageQuery.payload.status.errorReport.host` | The hostname and port of the task that reported the error, if known. |
-| `multiStageQuery.payload.status.errorReport.stageNumber` | The stage number that reported the error, if it happened during execution of a specific stage. |
-| `multiStageQuery.payload.status.errorReport.error` | Error object. Contains `errorCode` at a minimum, and may contain other fields as described in the [error code table](../multi-stage-query/reference.md#error-codes). Always present if there is an error. |
-| `multiStageQuery.payload.status.errorReport.error.errorCode` | One of the error codes from the [error code table](../multi-stage-query/reference.md#error-codes). Always present if there is an error. |
-| `multiStageQuery.payload.status.errorReport.error.errorMessage` | User-friendly error message. Not always present, even if there is an error. |
-| `multiStageQuery.payload.status.errorReport.exceptionStackTrace` | Java stack trace in string form, if the error was due to a server-side exception. |
-| `multiStageQuery.payload.stages` | Array of query stages. |
-| `multiStageQuery.payload.stages[].stageNumber` | Each stage has a number that differentiates it from other stages. |
-| `multiStageQuery.payload.stages[].phase` | Either NEW, READING_INPUT, POST_READING, RESULTS_COMPLETE, or FAILED. Only present if the stage has started. |
-| `multiStageQuery.payload.stages[].workerCount` | Number of parallel tasks that this stage is running on. Only present if the stage has started. |
-| `multiStageQuery.payload.stages[].partitionCount` | Number of output partitions generated by this stage. Only present if the stage has started and has computed its number of output partitions. |
-| `multiStageQuery.payload.stages[].startTime` | Start time of this stage. Only present if the stage has started. |
-| `multiStageQuery.payload.stages[].duration` | The number of milliseconds that the stage has been running. Only present if the stage has started. |
-| `multiStageQuery.payload.stages[].sort` | A boolean that is set to `true` if the stage does a sort as part of its execution. |
-| `multiStageQuery.payload.stages[].definition` | The object defining what the stage does. |
-| `multiStageQuery.payload.stages[].definition.id` | The unique identifier of the stage. |
-| `multiStageQuery.payload.stages[].definition.input` | Array of inputs that the stage has. |
-| `multiStageQuery.payload.stages[].definition.broadcast` | Array of input indexes that get broadcasted. Only present if there are inputs that get broadcasted. |
-| `multiStageQuery.payload.stages[].definition.processor` | An object defining the processor logic. |
-| `multiStageQuery.payload.stages[].definition.signature` | The output signature of the stage. |
+| Field                                                                | Description                                                                                                                                                                                        |
+|----------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `multiStageQuery.taskId`                                             | Controller task ID.                                                                                                                                                                                |
+| `multiStageQuery.payload.status`                                     | Query status container.                                                                                                                                                                            |
+| `multiStageQuery.payload.status.status`                              | RUNNING, SUCCESS, or FAILED.                                                                                                                                                                       |
+| `multiStageQuery.payload.status.startTime`                           | Start time of the query in ISO format. Only present if the query has started running.                                                                                                              |
+| `multiStageQuery.payload.status.durationMs`                          | Milliseconds elapsed after the query has started running. -1 denotes that the query hasn't started running yet.                                                                                    |
+| `multiStageQuery.payload.status.workers`                             | Workers for the controller task.                                                                                                                                                                   |
+| `multiStageQuery.payload.status.workers.<workerNumber>`              | Array of worker tasks including retries.                                                                                                                                                           |
+| `multiStageQuery.payload.status.workers.<workerNumber>[].workerId`   | Id of the worker task.                                                                                                                                                                             |
+| `multiStageQuery.payload.status.workers.<workerNumber>[].status`     | RUNNING, SUCCESS, or FAILED.                                                                                                                                                                       |
+| `multiStageQuery.payload.status.workers.<workerNumber>[].durationMs` | Milliseconds elapsed after the worker task started running.                                                                                                                                        |
+| `multiStageQuery.payload.status.pendingTasks`                        | Number of tasks that are not fully started. -1 denotes that the number is currently unknown.                                                                                                       |
+| `multiStageQuery.payload.status.runningTasks`                        | Number of currently running tasks. Should be at least 1 since the controller is included.                                                                                                          |
+| `multiStageQuery.payload.status.segmentLoadStatus`                   | Segment loading container. Only present after the segments have been published.                                                                                                                    |
+| `multiStageQuery.payload.status.segmentLoadStatus.state`             | Either INIT, WAITING, SUCCESS, FAILED or TIMED_OUT.                                                                                                                                                |
+| `multiStageQuery.payload.status.segmentLoadStatus.startTime`         | Time since which the controller has been waiting for the segments to finish loading.                                                                                                               |
+| `multiStageQuery.payload.status.segmentLoadStatus.duration`          | The duration in milliseconds that the controller has been waiting for the segments to load.                                                                                                        |
+| `multiStageQuery.payload.status.segmentLoadStatus.totalSegments`     | The total number of segments generated by the job. This includes tombstone segments (if any).                                                                                                      |
+| `multiStageQuery.payload.status.segmentLoadStatus.usedSegments`      | The number of segments which are marked as used based on the load rules. Unused segments can be cleaned up at any time.                                                                            |
+| `multiStageQuery.payload.status.segmentLoadStatus.precachedSegments` | The number of segments which are marked as precached and served by historicals, as per the load rules.                                                                                             |
+| `multiStageQuery.payload.status.segmentLoadStatus.onDemandSegments`  | The number of segments which are not loaded on any historical, as per the load rules.                                                                                                              |
+| `multiStageQuery.payload.status.segmentLoadStatus.pendingSegments`   | The number of segments remaining to be loaded.                                                                                                                                                     |
+| `multiStageQuery.payload.status.segmentLoadStatus.unknownSegments`   | The number of segments whose status is unknown.                                                                                                                                                    |
+| `multiStageQuery.payload.status.errorReport`                         | Error object. Only present if there was an error.                                                                                                                                                  |
+| `multiStageQuery.payload.status.errorReport.taskId`                  | The task that reported the error, if known. May be a controller task or a worker task.                                                                                                             |
+| `multiStageQuery.payload.status.errorReport.host`                    | The hostname and port of the task that reported the error, if known.                                                                                                                               |
+| `multiStageQuery.payload.status.errorReport.stageNumber`             | The stage number that reported the error, if it happened during execution of a specific stage.                                                                                                     |
+| `multiStageQuery.payload.status.errorReport.error`                   | Error object. Contains `errorCode` at a minimum, and may contain other fields as described in the [error code table](../multi-stage-query/reference.md#error-codes). Always present if there is an error. |
+| `multiStageQuery.payload.status.errorReport.error.errorCode`         | One of the error codes from the [error code table](../multi-stage-query/reference.md#error-codes). Always present if there is an error.                                                            |
+| `multiStageQuery.payload.status.errorReport.error.errorMessage`      | User-friendly error message. Not always present, even if there is an error.                                                                                                                        |
+| `multiStageQuery.payload.status.errorReport.exceptionStackTrace`     | Java stack trace in string form, if the error was due to a server-side exception.                                                                                                                  |
+| `multiStageQuery.payload.stages`                                     | Array of query stages.                                                                                                                                                                             |
+| `multiStageQuery.payload.stages[].stageNumber`                       | Each stage has a number that differentiates it from other stages.                                                                                                                                  |
+| `multiStageQuery.payload.stages[].phase`                             | Either NEW, READING_INPUT, POST_READING, RESULTS_COMPLETE, or FAILED. Only present if the stage has started.                                                                                       |
+| `multiStageQuery.payload.stages[].workerCount`                       | Number of parallel tasks that this stage is running on. Only present if the stage has started.                                                                                                     |
+| `multiStageQuery.payload.stages[].partitionCount`                    | Number of output partitions generated by this stage. Only present if the stage has started and has computed its number of output partitions.                                                       |
+| `multiStageQuery.payload.stages[].startTime`                         | Start time of this stage. Only present if the stage has started.                                                                                                                                   |
+| `multiStageQuery.payload.stages[].duration`                          | The number of milliseconds that the stage has been running. Only present if the stage has started.                                                                                                 |
+| `multiStageQuery.payload.stages[].sort`                              | A boolean that is set to `true` if the stage does a sort as part of its execution.                                                                                                                 |
+| `multiStageQuery.payload.stages[].definition`                        | The object defining what the stage does.                                                                                                                                                           |
+| `multiStageQuery.payload.stages[].definition.id`                     | The unique identifier of the stage.                                                                                                                                                                |
+| `multiStageQuery.payload.stages[].definition.input`                  | Array of inputs that the stage has.                                                                                                                                                                |
+| `multiStageQuery.payload.stages[].definition.broadcast`              | Array of input indexes that get broadcasted. Only present if there are inputs that get broadcasted.                                                                                                |
+| `multiStageQuery.payload.stages[].definition.processor`              | An object defining the processor logic.                                                                                                                                                            |
+| `multiStageQuery.payload.stages[].definition.signature`              | The output signature of the stage.                                                                                                                                                                 |
 
 ## Cancel a query task
 

diff --git a/.../multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncher.java b/.../multi-stage-query/src/main/java/org/apache/druid/msq/indexing/MSQWorkerTaskLauncher.java
@@ -48,6 +48,7 @@
 
 import java.time.Duration;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -114,7 +115,7 @@ private enum State
   // Mutable state accessible only to the main loop. LinkedHashMap since order of key set matters. Tasks are added
   // here once they are submitted for running, but before they are fully started up.
   // taskId -> taskTracker
-  private final Map<String, TaskTracker> taskTrackers = new LinkedHashMap<>();
+  private final Map<String, TaskTracker> taskTrackers = Collections.synchronizedMap(new LinkedHashMap<>());
 
   // Set of tasks which are issued a cancel request by the controller.
   private final Set<String> canceledWorkerTasks = ConcurrentHashMap.newKeySet();
@@ -371,13 +372,13 @@ public WorkerStats(String workerId, TaskState state, long duration)
       this.duration = duration;
     }
 
-    @JsonProperty()
+    @JsonProperty
     public String getWorkerId()
     {
       return workerId;
     }
 
-    @JsonProperty()
+    @JsonProperty
     public TaskState getState()
     {
       return state;

diff --git a/...multi-stage-query/src/main/java/org/apache/druid/msq/indexing/report/MSQStatusReport.java b/...multi-stage-query/src/main/java/org/apache/druid/msq/indexing/report/MSQStatusReport.java
@@ -131,7 +131,7 @@ public long getDurationMs()
   }
 
   @JsonProperty("workers")
-  public Map<Integer, List<MSQWorkerTaskLauncher.WorkerStats>> getWorkersDurationsMs()
+  public Map<Integer, List<MSQWorkerTaskLauncher.WorkerStats>> getWorkerStats()
   {
     return workerStats;
   }