Skip to content

Commit

Permalink
HIVE-28661: OTEL: Latency in retrieving query end time leads to threa…
Browse files Browse the repository at this point in the history
…d interruption (#5576). (Tanishq Chugh, reviewed by Ayush Saxena)
  • Loading branch information
tanishq-chugh authored Dec 12, 2024
1 parent 4218877 commit dc28391
Showing 1 changed file with 50 additions and 43 deletions.
93 changes: 50 additions & 43 deletions service/src/java/org/apache/hive/service/servlet/OTELExporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,13 @@ public OTELExporter(OpenTelemetry openTelemetry, SessionManager sessionManager,
@Override
public void run() {
while (true) {
jvmMetrics.setJvmMetrics();
exposeMetricsToOTEL();
try {
jvmMetrics.setJvmMetrics();
exposeMetricsToOTEL();
} catch (Throwable e) {
LOG.error("Exception occurred in OTELExporter thread ", e);
}

try {
Thread.sleep(frequency);
} catch (InterruptedException e) {
Expand Down Expand Up @@ -136,55 +141,57 @@ public void exposeMetricsToOTEL() {

Set<String> historicalQueryIDs = new HashSet<>();
for (QueryInfo hQuery : historicalQueries) {
String hQueryId = hQuery.getQueryDisplay().getQueryId();
historicalQueryIDs.add(hQueryId);
Span rootspan = queryIdToSpanMap.remove(hQueryId);
Set<String> completedTasks = queryIdToTasksMap.remove(hQueryId);
if (hQuery.getEndTime() != null) {
String hQueryId = hQuery.getQueryDisplay().getQueryId();
historicalQueryIDs.add(hQueryId);
Span rootspan = queryIdToSpanMap.remove(hQueryId);
Set<String> completedTasks = queryIdToTasksMap.remove(hQueryId);

//For queries that were live till last loop but have ended before start of this loop
if (rootspan != null) {
for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) {
if (!completedTasks.contains(task.getTaskId())) {
Context parentContext = Context.current().with(rootspan);
tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
.setStartTimestamp(task.getBeginTime(), TimeUnit.MILLISECONDS).startSpan()
.end(task.getEndTime(), TimeUnit.MILLISECONDS);
}
}

//Update the rootSpan name & attributes before ending it
rootspan.updateName(hQueryId + " - completed").setAllAttributes(addQueryAttributes(hQuery))
.end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
historicalQueryId.add(hQueryId);
}

//For queries that were live till last loop but have ended before start of this loop
if (rootspan != null) {
for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) {
if (!completedTasks.contains(task.getTaskId())) {
Context parentContext = Context.current().with(rootspan);
//For queries that already ended either before OTEL service started or in between OTEL loops
if (historicalQueryId.add(hQueryId)) {
rootspan = tracer.spanBuilder(hQueryId + " - completed")
.setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan();
Context parentContext = Context.current().with(rootspan);

Span initSpan = tracer.spanBuilder(hQueryId).setParent(parentContext)
.setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan()
.setAttribute("QueryId", hQueryId)
.setAttribute("QueryString", hQuery.getQueryDisplay().getQueryString())
.setAttribute("UserName", hQuery.getUserName())
.setAttribute("ExecutionEngine", hQuery.getExecutionEngine());
if (hQuery.getQueryDisplay().getErrorMessage() != null) {
initSpan.setAttribute("ErrorMessage", hQuery.getQueryDisplay().getErrorMessage());
}
initSpan.end(hQuery.getBeginTime(), TimeUnit.MILLISECONDS);

for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) {
parentContext = Context.current().with(rootspan);
tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
.setStartTimestamp(task.getBeginTime(), TimeUnit.MILLISECONDS).startSpan()
.end(task.getEndTime(), TimeUnit.MILLISECONDS);
}
}

//Update the rootSpan name & attributes before ending it
rootspan.updateName(hQueryId + " - completed").setAllAttributes(addQueryAttributes(hQuery))
.end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
historicalQueryId.add(hQueryId);
}

//For queries that already ended either before OTEL service started or in between OTEL loops
if (historicalQueryId.add(hQueryId)) {
rootspan = tracer.spanBuilder(hQueryId + " - completed")
.setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan();
Context parentContext = Context.current().with(rootspan);

Span initSpan = tracer.spanBuilder(hQueryId).setParent(parentContext)
.setStartTimestamp(hQuery.getBeginTime(), TimeUnit.MILLISECONDS).startSpan()
.setAttribute("QueryId", hQueryId)
.setAttribute("QueryString", hQuery.getQueryDisplay().getQueryString())
.setAttribute("UserName", hQuery.getUserName())
.setAttribute("ExecutionEngine", hQuery.getExecutionEngine());
if (hQuery.getQueryDisplay().getErrorMessage() != null) {
initSpan.setAttribute("ErrorMessage", hQuery.getQueryDisplay().getErrorMessage());
rootspan.setAllAttributes(addQueryAttributes(hQuery)).end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
}
initSpan.end(hQuery.getBeginTime(), TimeUnit.MILLISECONDS);

for (QueryDisplay.TaskDisplay task : hQuery.getQueryDisplay().getTaskDisplays()) {
parentContext = Context.current().with(rootspan);
tracer.spanBuilder(hQueryId + " - " + task.getTaskId())
.setParent(parentContext).setAllAttributes(addTaskAttributes(task))
.setStartTimestamp(task.getBeginTime(), TimeUnit.MILLISECONDS).startSpan()
.end(task.getEndTime(), TimeUnit.MILLISECONDS);
}

rootspan.setAllAttributes(addQueryAttributes(hQuery)).end(hQuery.getEndTime(), TimeUnit.MILLISECONDS);
}
}

Expand Down

0 comments on commit dc28391

Please sign in to comment.