Skip to content

Commit

Permalink
Add cgroup cpu/mem/disk usage metrics (#16472)
Browse files Browse the repository at this point in the history
* Add cgroup cpu/mem usage metrics

* checks

* comments

* docs fix

* add disk metrics

* fapi check

* checkstyle

* issues

* spelling

* change asserts

* checks

* use proc builder instead of runtime

* specify charset

* spotbug
  • Loading branch information
adithyachakilam authored May 29, 2024
1 parent 75937c9 commit a9044ac
Show file tree
Hide file tree
Showing 26 changed files with 716 additions and 32 deletions.
1 change: 1 addition & 0 deletions docs/configuration/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,7 @@ Metric monitoring is an essential part of Druid operations. The following monito
|`org.apache.druid.java.util.metrics.JvmThreadsMonitor`|Reports Thread statistics in the JVM, like numbers of total, daemon, started, died threads.|
|`org.apache.druid.java.util.metrics.CgroupCpuMonitor`|Reports CPU shares and quotas as per the `cpu` cgroup.|
|`org.apache.druid.java.util.metrics.CgroupCpuSetMonitor`|Reports CPU core/HT and memory node allocations as per the `cpuset` cgroup.|
|`org.apache.druid.java.util.metrics.CgroupDiskMonitor`|Reports disk statistic as per the blkio cgroup.|
|`org.apache.druid.java.util.metrics.CgroupMemoryMonitor`|Reports memory statistic as per the memory cgroup.|
|`org.apache.druid.server.metrics.EventReceiverFirehoseMonitor`|Reports how many events have been queued in the EventReceiverFirehose.|
|`org.apache.druid.server.metrics.HistoricalMetricsMonitor`|Reports statistics on Historical services. Available only on Historical services.|
Expand Down
9 changes: 9 additions & 0 deletions docs/operations/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -516,8 +516,17 @@ These metrics are available on operating systems with the cgroup kernel feature.
|------|-----------|----------|------------|
|`cgroup/cpu/shares`|Relative value of CPU time available to this process. Read from `cpu.shares`.||Varies|
|`cgroup/cpu/cores_quota`|Number of cores available to this process. Derived from `cpu.cfs_quota_us`/`cpu.cfs_period_us`.||Varies. A value of -1 indicates there is no explicit quota set.|
|`cgroup/cpu/usage/total/percentage`|Total cpu percentage used by cgroup of process that is running||0-100|
|`cgroup/cpu/usage/user/percentage`|User cpu percentage used by cgroup of process that is running||0-100|
|`cgroup/cpu/usage/sys/percentage`|Sys cpu percentage used by cgroup of process that is running||0-100|
|`cgroup/disk/read/size`|Reports the number of bytes transferred to specific devices by a cgroup of process that is running.|`diskName`|Varies|
|`cgroup/disk/write/size`|Reports the number of bytes transferred from specific devices by a cgroup of process that is running.|`diskName`|Varies|
|`cgroup/disk/read/count`|Reports the number of read operations performed on specific devices by a cgroup of process that is running.|`diskName`|Varies|
|`cgroup/disk/write/count`|Reports the number of write operations performed on specific devices by a cgroup of process that is running.|`diskName`|Varies|
|`cgroup/memory/*`|Memory stats for this process, such as `cache` and `total_swap`. Each stat produces a separate metric. Read from `memory.stat`.||Varies|
|`cgroup/memory_numa/*/pages`|Memory stats, per NUMA node, for this process, such as `total` and `unevictable`. Each stat produces a separate metric. Read from `memory.num_stat`.|`numaZone`|Varies|
|`cgroup/memory/limit/bytes`|Reports the maximum memory that can be used by processes in the cgroup (in bytes)||Varies|
|`cgroup/memory/usage/bytes`|Reports the maximum amount of user memory (including file cache)||Varies|
|`cgroup/cpuset/cpu_count`|Total number of CPUs available to the process. Derived from `cpuset.cpus`.||Varies|
|`cgroup/cpuset/effective_cpu_count`|Total number of active CPUs available to the process. Derived from `cpuset.effective_cpus`.||Varies|
|`cgroup/cpuset/mems_count`|Total number of memory nodes available to the process. Derived from `cpuset.mems`.||Varies|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,64 @@
package org.apache.druid.java.util.metrics;

import com.google.common.collect.ImmutableMap;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.cgroups.CgroupDiscoverer;
import org.apache.druid.java.util.metrics.cgroups.Cpu;
import org.apache.druid.java.util.metrics.cgroups.ProcSelfCgroupDiscoverer;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.util.Map;

public class CgroupCpuMonitor extends FeedDefiningMonitor
{
private static final Logger LOG = new Logger(CgroupCpuMonitor.class);
private static final Long DEFAULT_USER_HZ = 100L;
public static final String TOTAL_USAGE_METRIC = "cgroup/cpu/usage/total/percentage";
public static final String USER_USAGE_METRIC = "cgroup/cpu/usage/user/percentage";
public static final String SYS_USAGE_METRIC = "cgroup/cpu/usage/sys/percentage";
private static final String TOTAL = "total";
private static final String USER = "user";
private static final String SYSTEM = "system";
final CgroupDiscoverer cgroupDiscoverer;
final Map<String, String[]> dimensions;
private Long userHz;
private KeyedDiff jiffies = new KeyedDiff();
private long prevJiffiesSnapshotAt = 0;

public CgroupCpuMonitor(CgroupDiscoverer cgroupDiscoverer, final Map<String, String[]> dimensions, String feed)
{
super(feed);
this.cgroupDiscoverer = cgroupDiscoverer;
this.dimensions = dimensions;
try {
Process p = new ProcessBuilder("getconf", "CLK_TCK").start();
try (BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream(), StandardCharsets.UTF_8))) {
String line = in.readLine();
if (line != null) {
userHz = Long.valueOf(line.trim());
}
}
}
catch (IOException | NumberFormatException e) {
LOG.warn(e, "Error getting the USER_HZ value");
}
finally {
if (userHz == null) {
LOG.warn("Using default value for USER_HZ");
userHz = DEFAULT_USER_HZ;
}
}
}

public CgroupCpuMonitor(final Map<String, String[]> dimensions, String feed)
{
this(null, dimensions, feed);
this(new ProcSelfCgroupDiscoverer(), dimensions, feed);
}

public CgroupCpuMonitor(final Map<String, String[]> dimensions)
Expand All @@ -58,7 +94,8 @@ public CgroupCpuMonitor()
public boolean doMonitor(ServiceEmitter emitter)
{
final Cpu cpu = new Cpu(cgroupDiscoverer);
final Cpu.CpuAllocationMetric cpuSnapshot = cpu.snapshot();
final Cpu.CpuMetrics cpuSnapshot = cpu.snapshot();
long now = Instant.now().getEpochSecond();

final ServiceMetricEvent.Builder builder = builder();
MonitorUtils.addDimensionsToBuilder(builder, dimensions);
Expand All @@ -68,6 +105,26 @@ public boolean doMonitor(ServiceEmitter emitter)
computeProcessorQuota(cpuSnapshot.getQuotaUs(), cpuSnapshot.getPeriodUs())
));

long elapsedJiffiesSnapshotSecs = now - prevJiffiesSnapshotAt;
if (elapsedJiffiesSnapshotSecs > 0) {
prevJiffiesSnapshotAt = now;
final Map<String, Long> elapsedJiffies = jiffies.to(
"usage",
ImmutableMap.<String, Long>builder()
.put(USER, cpuSnapshot.getUserJiffies())
.put(SYSTEM, cpuSnapshot.getSystemJiffies())
.put(TOTAL, cpuSnapshot.getTotalJiffies())
.build()
);
if (elapsedJiffies != null) {
double totalUsagePct = 100.0 * elapsedJiffies.get(TOTAL) / userHz / elapsedJiffiesSnapshotSecs;
double sysUsagePct = 100.0 * elapsedJiffies.get(SYSTEM) / userHz / elapsedJiffiesSnapshotSecs;
double userUsagePct = 100.0 * elapsedJiffies.get(USER) / userHz / elapsedJiffiesSnapshotSecs;
emitter.emit(builder.setMetric(TOTAL_USAGE_METRIC, totalUsagePct));
emitter.emit(builder.setMetric(SYS_USAGE_METRIC, sysUsagePct));
emitter.emit(builder.setMetric(USER_USAGE_METRIC, userUsagePct));
}
}
return true;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.java.util.metrics;

import com.google.common.collect.ImmutableMap;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.cgroups.CgroupDiscoverer;
import org.apache.druid.java.util.metrics.cgroups.Disk;
import org.apache.druid.java.util.metrics.cgroups.ProcSelfCgroupDiscoverer;

import java.util.Map;

public class CgroupDiskMonitor extends FeedDefiningMonitor
{
final CgroupDiscoverer cgroupDiscoverer;
final Map<String, String[]> dimensions;
private final KeyedDiff diff = new KeyedDiff();

public CgroupDiskMonitor(CgroupDiscoverer cgroupDiscoverer, final Map<String, String[]> dimensions, String feed)
{
super(feed);
this.cgroupDiscoverer = cgroupDiscoverer;
this.dimensions = dimensions;
}

public CgroupDiskMonitor(final Map<String, String[]> dimensions, String feed)
{
this(new ProcSelfCgroupDiscoverer(), dimensions, feed);
}

public CgroupDiskMonitor(final Map<String, String[]> dimensions)
{
this(dimensions, DEFAULT_METRICS_FEED);
}

public CgroupDiskMonitor()
{
this(ImmutableMap.of());
}

@Override
public boolean doMonitor(ServiceEmitter emitter)
{
Map<String, Disk.Metrics> snapshot = new Disk(cgroupDiscoverer).snapshot();
for (Map.Entry<String, Disk.Metrics> entry : snapshot.entrySet()) {
final Map<String, Long> stats = diff.to(
entry.getKey(),
ImmutableMap.<String, Long>builder()
.put("cgroup/disk/read/bytes", entry.getValue().getReadBytes())
.put("cgroup/disk/read/count", entry.getValue().getReadCount())
.put("cgroup/disk/write/bytes", entry.getValue().getWriteBytes())
.put("cgroup/disk/write/count", entry.getValue().getWriteCount())
.build()
);

if (stats != null) {
final ServiceMetricEvent.Builder builder = builder()
.setDimension("diskName", entry.getValue().getDiskName());
MonitorUtils.addDimensionsToBuilder(builder, dimensions);
for (Map.Entry<String, Long> stat : stats.entrySet()) {
emitter.emit(builder.setMetric(stat.getKey(), stat.getValue()));
}
}
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.cgroups.CgroupDiscoverer;
import org.apache.druid.java.util.metrics.cgroups.Memory;
import org.apache.druid.java.util.metrics.cgroups.ProcSelfCgroupDiscoverer;

import java.util.Map;

Expand All @@ -42,7 +43,7 @@ public CgroupMemoryMonitor(CgroupDiscoverer cgroupDiscoverer, final Map<String,

public CgroupMemoryMonitor(final Map<String, String[]> dimensions, String feed)
{
this(null, dimensions, feed);
this(new ProcSelfCgroupDiscoverer(), dimensions, feed);
}

public CgroupMemoryMonitor(final Map<String, String[]> dimensions)
Expand All @@ -60,16 +61,18 @@ public boolean doMonitor(ServiceEmitter emitter)
{
final Memory memory = new Memory(cgroupDiscoverer);
final Memory.MemoryStat stat = memory.snapshot();
final ServiceMetricEvent.Builder builder = builder();
MonitorUtils.addDimensionsToBuilder(builder, dimensions);
emitter.emit(builder.setMetric("cgroup/memory/usage/bytes", stat.getUsage()));
emitter.emit(builder.setMetric("cgroup/memory/limit/bytes", stat.getLimit()));

stat.getMemoryStats().forEach((key, value) -> {
final ServiceMetricEvent.Builder builder = builder();
MonitorUtils.addDimensionsToBuilder(builder, dimensions);
// See https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
// There are inconsistent units for these. Most are bytes.
emitter.emit(builder.setMetric(StringUtils.format("cgroup/memory/%s", key), value));
});
stat.getNumaMemoryStats().forEach((key, value) -> {
final ServiceMetricEvent.Builder builder = builder().setDimension("numaZone", Long.toString(key));
MonitorUtils.addDimensionsToBuilder(builder, dimensions);
builder().setDimension("numaZone", Long.toString(key));
value.forEach((k, v) -> emitter.emit(builder.setMetric(StringUtils.format("cgroup/memory_numa/%s/pages", k), v)));
});
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,32 @@

package org.apache.druid.java.util.metrics;

import com.google.common.primitives.Longs;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.metrics.cgroups.CgroupDiscoverer;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.Objects;
import java.util.regex.Pattern;

public class CgroupUtil
{
private static final Logger LOG = new Logger(CgroupUtil.class);
public static final String SPACE_MATCH = Pattern.quote(" ");
public static final String COMMA_MATCH = Pattern.quote(",");

public static long readLongValue(CgroupDiscoverer discoverer, String cgroup, String fileName, long defaultValue)
{
try {
List<String> lines = Files.readAllLines(Paths.get(discoverer.discover(cgroup).toString(), fileName));
return lines.stream().map(Longs::tryParse).filter(Objects::nonNull).findFirst().orElse(defaultValue);
}
catch (RuntimeException | IOException ex) {
LOG.warn(ex, "Unable to fetch %s", fileName);
return defaultValue;
}
}
}
Loading

0 comments on commit a9044ac

Please sign in to comment.