Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New liveness probe to check for deadlocked threads #11388

Merged
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion management/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ plugins {

dependencies {
annotationProcessor(projects.micronautInjectJava)
testAnnotationProcessor(projects.micronautInjectJava)
annotationProcessor(projects.micronautGraal)

api(projects.micronautRouter)
Expand Down Expand Up @@ -38,5 +39,5 @@ dependencies {

compileOnly(libs.logback.classic)
compileOnly(libs.log4j)

testImplementation(libs.awaitility)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Copyright 2017-2024 original authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.micronaut.management.health.indicator.threads;

import io.micronaut.context.annotation.Requires;
import io.micronaut.core.async.publisher.Publishers;
import io.micronaut.core.util.StringUtils;
import io.micronaut.health.HealthStatus;
import io.micronaut.management.endpoint.health.HealthEndpoint;
import io.micronaut.management.health.indicator.HealthIndicator;
import io.micronaut.management.health.indicator.HealthResult;
import io.micronaut.management.health.indicator.annotation.Liveness;
import jakarta.inject.Singleton;
import org.reactivestreams.Publisher;
import reactor.core.publisher.Mono;

import java.lang.management.ManagementFactory;
import java.lang.management.MonitorInfo;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Map;

/**
* <p>A {@link HealthIndicator} that uses the {@link ThreadMXBean} to check for deadlocked threads.
* Returns {@link HealthStatus#DOWN} if any are found and their {@link ThreadInfo} in the details.</p>
*
* @author Andreas Brenk
* @since 4.8.0
*/
@Singleton
@Liveness
@Requires(property = HealthEndpoint.PREFIX + ".deadlocked-threads.enabled", notEquals = StringUtils.FALSE)
@Requires(beans = HealthEndpoint.class)
public class DeadlockedThreadsHealthIndicator implements HealthIndicator {

private static final String NAME = "deadlockedThreads";
private static final String KEY_THREAD_ID = "threadId";
private static final String KEY_THREAD_NAME = "threadName";
private static final String KEY_THREAD_STATE = "threadState";
private static final String KEY_DAEMON = "daemon";
private static final String KEY_PRIORITY = "priority";
private static final String KEY_SUSPENDED = "suspended";
private static final String KEY_IN_NATIVE = "inNative";
private static final String KEY_LOCK_NAME = "lockName";
private static final String KEY_LOCK_OWNER_NAME = "lockOwnerName";
private static final String KEY_LOCK_OWNER_ID = "lockOwnerId";
private static final String KEY_LOCKED_SYNCHRONIZERS = "lockedSynchronizers";
private static final String KEY_STACK_TRACE = "stackTrace";

@Override
public Publisher<HealthResult> getResult() {
return Mono.just(findResult());
sdelamo marked this conversation as resolved.
Show resolved Hide resolved
}

private static HealthResult findResult() {
HealthResult.Builder builder = HealthResult.builder(NAME);
try {
ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean();
long[] deadlockedThreads = threadMXBean.findDeadlockedThreads();

if (deadlockedThreads == null) {
builder.status(HealthStatus.UP);
} else {
builder.status(HealthStatus.DOWN);
builder.details(
Arrays.stream(threadMXBean.getThreadInfo(deadlockedThreads, true, true, Integer.MAX_VALUE))
.map(DeadlockedThreadsHealthIndicator::getDetails)
.toList());
}
} catch (Exception e) {
builder.status(HealthStatus.UNKNOWN);
builder.exception(e);
}
return builder.build();
}

private static Map<String, Object> getDetails(ThreadInfo threadInfo) {
Map<String, Object> details = new LinkedHashMap<>();
details.put(KEY_THREAD_ID, String.valueOf(threadInfo.getThreadId()));
details.put(KEY_THREAD_NAME, threadInfo.getThreadName());
details.put(KEY_THREAD_STATE, threadInfo.getThreadState().name());
details.put(KEY_DAEMON, String.valueOf(threadInfo.isDaemon()));
details.put(KEY_PRIORITY, String.valueOf(threadInfo.getPriority()));
details.put(KEY_SUSPENDED, String.valueOf(threadInfo.isSuspended()));
details.put(KEY_IN_NATIVE, String.valueOf(threadInfo.isInNative()));
details.put(KEY_LOCK_NAME, threadInfo.getLockName());
details.put(KEY_LOCK_OWNER_NAME, threadInfo.getLockOwnerName());
details.put(KEY_LOCK_OWNER_ID, String.valueOf(threadInfo.getLockOwnerId()));
details.put(KEY_LOCKED_SYNCHRONIZERS, Arrays.stream(threadInfo.getLockedSynchronizers()).map(String::valueOf).toList());
details.put(KEY_STACK_TRACE, formatStackTrace(threadInfo));
return details;
}

private static String formatStackTrace(ThreadInfo threadInfo) {
StringBuilder sb = new StringBuilder();

int i = 0;
StackTraceElement[] stackTrace = threadInfo.getStackTrace();
for (; i < stackTrace.length; i++) {
StackTraceElement ste = stackTrace[i];
sb.append(ste.toString());
sb.append('\n');

if (i == 0 && threadInfo.getLockInfo() != null) {
switch (threadInfo.getThreadState()) {
case BLOCKED:
sb.append("- blocked on ");
sb.append(threadInfo.getLockInfo());
sb.append('\n');
break;
case WAITING, TIMED_WAITING:
sb.append("- waiting on ");
sb.append(threadInfo.getLockInfo());
sb.append('\n');
break;
default:
}
}

for (MonitorInfo mi : threadInfo.getLockedMonitors()) {
if (mi.getLockedStackDepth() == i) {
sb.append("- locked ");
sb.append(mi);
sb.append('\n');
}
}
}

return sb.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright 2017-2024 original authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Thread deadlock indicator.
*
* @author Andreas Brenk
* @since 4.8.0
*/
package io.micronaut.management.health.indicator.threads;
Original file line number Diff line number Diff line change
Expand Up @@ -43,24 +43,26 @@ class HealthAggregatorSpec extends Specification {
assert appender.events.isEmpty()
break
case Level.DEBUG:
assert appender.events.size() == 7
assert appender.events.size() == 8
assert appender.events[0] == 'Health result for compositeDiscoveryClient(): status UP'
assert appender.events[1] == 'Health result for diskSpace: status UP'
assert appender.events[2] == 'Health result for jdbc: status UP'
assert appender.events[3] == 'Health result for jdbc:h2:mem:oneDb: status UP'
assert appender.events[4] == 'Health result for liveness: status UP'
assert appender.events[5] == 'Health result for readiness: status UP'
assert appender.events[6] == 'Health result for service: status UP'
assert appender.events[1] == 'Health result for deadlockedThreads: status UP'
assert appender.events[2] == 'Health result for diskSpace: status UP'
assert appender.events[3] == 'Health result for jdbc: status UP'
assert appender.events[4] == 'Health result for jdbc:h2:mem:oneDb: status UP'
assert appender.events[5] == 'Health result for liveness: status UP'
assert appender.events[6] == 'Health result for readiness: status UP'
assert appender.events[7] == 'Health result for service: status UP'
break
case Level.TRACE:
assert appender.events.size() == 7
assert appender.events.size() == 8
assert appender.events[0].contains('Health result for compositeDiscoveryClient(): status UP, details {')
assert appender.events[1].contains('Health result for diskSpace: status UP, details {')
assert appender.events[2].contains('Health result for jdbc: status UP, details {')
assert appender.events[3].contains('Health result for jdbc:h2:mem:oneDb: status UP, details {')
assert appender.events[4] == 'Health result for liveness: status UP, details {}'
assert appender.events[5] == 'Health result for readiness: status UP, details {}'
assert appender.events[6] == 'Health result for service: status UP, details {}'
assert appender.events[1] == 'Health result for deadlockedThreads: status UP, details {}'
assert appender.events[2].contains('Health result for diskSpace: status UP, details {')
assert appender.events[3].contains('Health result for jdbc: status UP, details {')
assert appender.events[4].contains('Health result for jdbc:h2:mem:oneDb: status UP, details {')
assert appender.events[5] == 'Health result for liveness: status UP, details {}'
assert appender.events[6] == 'Health result for readiness: status UP, details {}'
assert appender.events[7] == 'Health result for service: status UP, details {}'
break
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package io.micronaut.management.health.indicator.threads

import io.micronaut.context.ApplicationContext
import io.micronaut.management.health.indicator.discovery.DiscoveryClientHealthIndicator
import io.micronaut.management.health.indicator.discovery.DiscoveryClientHealthIndicatorConfiguration
import spock.lang.Specification

class DeadlockedThreadsHealthIndicatorConfigurationSpec extends Specification {

void "bean of type DeadlockedThreadsHealthIndicator does not exist if you set endpoints.health.deadlocked-threads.enabled=false"() {
given:
ApplicationContext applicationContext = ApplicationContext.run(['endpoints.health.deadlocked-threads.enabled': 'false'])

expect:
!applicationContext.containsBean(DeadlockedThreadsHealthIndicator)

cleanup:
applicationContext.close()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package io.micronaut.management.health.indicator.threads

import io.micronaut.health.HealthStatus
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import reactor.core.publisher.Mono
import spock.lang.Specification

import static java.lang.Thread.sleep

class DeadlockedThreadsHealthIndicatorSpec extends Specification {

Logger log = LoggerFactory.getLogger(DeadlockedThreadsHealthIndicatorSpec)

def lock1 = new Object()
def lock2 = new Object()
def thread1
def thread2

def "No deadlocked threads so status is UP"() {
given:
thread1 = new Thread()
thread2 = new Thread()
def healthIndicator = new DeadlockedThreadsHealthIndicator()

when:
thread1.start()
thread2.start()
def result = Mono.from(healthIndicator.getResult()).block()

then:
HealthStatus.UP == result.status
null == result.details
}

def "Deadlocked threads found so status is DOWN"() {
given:
thread1 = new Thread(() -> {
synchronized (lock1) {
log.debug "Thread 1: Holding lock 1"

sleep 200

synchronized (lock2) {
log.debug "Thread 1: Holding lock 1 and lock 2"
}
}
})
thread2 = new Thread(() -> {
synchronized (lock2) {
log.debug "Thread 2: Holding lock 2"

sleep 100

synchronized (lock1) {
log.debug "Thread 2: Holding lock 2 and lock 1"
}
}
})
def healthIndicator = new DeadlockedThreadsHealthIndicator()

when:
thread1.start()
thread2.start()

Thread.sleep(300)

def result = Mono.from(healthIndicator.getResult()).block()

then:
HealthStatus.DOWN == result.status
null != result.details
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,24 @@ class HealthMonitorTaskSpec extends Specification {
assert appender.events.isEmpty()
break
case Level.DEBUG:
assert appender.events.size() == 6
assert appender.events.size() == 7
assert appender.events[0] == 'Health monitor result for compositeDiscoveryClient(): status UP'
assert appender.events[1] == 'Health monitor result for diskSpace: status UP'
assert appender.events[2] == 'Health monitor result for liveness: status UP'
assert appender.events[3] == 'Health monitor result for readiness: status UP'
assert appender.events[4] == 'Health monitor result for service: status UP'
assert appender.events[5] == 'Starting health monitor check'
assert appender.events[1] == 'Health monitor result for deadlockedThreads: status UP'
assert appender.events[2] == 'Health monitor result for diskSpace: status UP'
assert appender.events[3] == 'Health monitor result for liveness: status UP'
assert appender.events[4] == 'Health monitor result for readiness: status UP'
assert appender.events[5] == 'Health monitor result for service: status UP'
assert appender.events[6] == 'Starting health monitor check'
break
case Level.TRACE:
assert appender.events.size() == 6
assert appender.events.size() == 7
assert appender.events[0].contains('Health monitor result for compositeDiscoveryClient(): status UP, details {')
assert appender.events[1].contains('Health monitor result for diskSpace: status UP, details {')
assert appender.events[2] == 'Health monitor result for liveness: status UP, details {}'
assert appender.events[3] == 'Health monitor result for readiness: status UP, details {}'
assert appender.events[4] == 'Health monitor result for service: status UP, details {}'
assert appender.events[5] == 'Starting health monitor check'
assert appender.events[1] == 'Health monitor result for deadlockedThreads: status UP, details {}'
assert appender.events[2].contains('Health monitor result for diskSpace: status UP, details {')
assert appender.events[3] == 'Health monitor result for liveness: status UP, details {}'
assert appender.events[4] == 'Health monitor result for readiness: status UP, details {}'
assert appender.events[5] == 'Health monitor result for service: status UP, details {}'
assert appender.events[6] == 'Starting health monitor check'
break
}
}
Expand Down
Loading
Loading