Skip to content

Commit 9b6913a

Browse files
authored
Auto switch cgroup monitors from v1 to v2 (#18705)
1 parent aec5c4a commit 9b6913a

34 files changed

+1595
-202
lines changed

docs/configuration/index.md

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1956,13 +1956,14 @@ The following table lists available monitors and the respective services where t
19561956
|`org.apache.druid.java.util.metrics.JvmCpuMonitor`|Reports statistics of CPU consumption by the JVM.|Any|
19571957
|`org.apache.druid.java.util.metrics.CpuAcctDeltaMonitor`|Reports consumed CPU as per the cpuacct cgroup.|Any|
19581958
|`org.apache.druid.java.util.metrics.JvmThreadsMonitor`|Reports Thread statistics in the JVM, like numbers of total, daemon, started, died threads.|Any|
1959-
|`org.apache.druid.java.util.metrics.CgroupCpuMonitor`|Reports CPU shares and quotas as per the `cpu` cgroup.|Any|
1960-
|`org.apache.druid.java.util.metrics.CgroupCpuSetMonitor`|Reports CPU core/HT and memory node allocations as per the `cpuset` cgroup.|Any|
1961-
|`org.apache.druid.java.util.metrics.CgroupDiskMonitor`|Reports disk statistic as per the blkio cgroup.|Any|
1962-
|`org.apache.druid.java.util.metrics.CgroupMemoryMonitor`|Reports memory statistic as per the memory cgroup.|Any|
1963-
|`org.apache.druid.java.util.metrics.CgroupV2CpuMonitor`| **EXPERIMENTAL** Reports CPU usage from `cpu.stat` file. Only applicable to `cgroupv2`.|Any|
1964-
|`org.apache.druid.java.util.metrics.CgroupV2DiskMonitor`| **EXPERIMENTAL** Reports disk usage from `io.stat` file. Only applicable to `cgroupv2`.|Any|
1965-
|`org.apache.druid.java.util.metrics.CgroupV2MemoryMonitor`| **EXPERIMENTAL** Reports memory usage from `memory.current` and `memory.max` files. Only applicable to `cgroupv2`.|Any|
1959+
|`org.apache.druid.java.util.metrics.CgroupCpuMonitor`|Reports CPU shares and quotas as per the `cpu` cgroup. Automatically switches to `CgroupV2CpuMonitor` in case `cgroupv2` type is detected.|Any|
1960+
|`org.apache.druid.java.util.metrics.CgroupCpuSetMonitor`|Reports CPU core/HT and memory node allocations as per the `cpuset` cgroup. Automatically switches to `CgroupV2CpuSetMonitor` in case `cgroupv2` type is detected.|Any|
1961+
|`org.apache.druid.java.util.metrics.CgroupDiskMonitor`|Reports disk statistic as per the blkio cgroup. Automatically switches to `CgroupV2DiskMonitor` in case `cgroupv2` type is detected.|Any|
1962+
|`org.apache.druid.java.util.metrics.CgroupMemoryMonitor`|Reports memory statistic as per the memory cgroup. Automatically switches to `CgroupV2MemoryMonitor` in case `cgroupv2` type is detected.|Any|
1963+
|`org.apache.druid.java.util.metrics.CgroupV2CpuMonitor`| Reports CPU usage from `cpu.stat` file. Only applicable to `cgroupv2`.|Any|
1964+
|`org.apache.druid.java.util.metrics.CgroupV2CpuSetMonitor`|Reports CPU core/HT and memory node allocations as per the `cpuset` cgroup. Only applicable to `cgroupv2`.|Any|
1965+
|`org.apache.druid.java.util.metrics.CgroupV2DiskMonitor`| Reports disk usage from `io.stat` file. Only applicable to `cgroupv2`.|Any|
1966+
|`org.apache.druid.java.util.metrics.CgroupV2MemoryMonitor`| Reports memory usage from `memory.current` and `memory.max` files. Only applicable to `cgroupv2`.|Any|
19661967
|`org.apache.druid.server.metrics.HistoricalMetricsMonitor`|Reports statistics on Historical services.|Historical|
19671968
|`org.apache.druid.server.metrics.SegmentStatsMonitor` | **EXPERIMENTAL** Reports statistics about segments on Historical services. Not to be used when lazy loading is configured.|Historical|
19681969
|`org.apache.druid.server.metrics.QueryCountStatsMonitor`|Reports how many queries have been successful/failed/interrupted.|Broker, Historical, Router, Indexer, Peon|

processing/src/main/java/org/apache/druid/java/util/metrics/CgroupCpuMonitor.java

Lines changed: 47 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
2525
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
2626
import org.apache.druid.java.util.metrics.cgroups.CgroupDiscoverer;
27+
import org.apache.druid.java.util.metrics.cgroups.CgroupVersion;
2728
import org.apache.druid.java.util.metrics.cgroups.Cpu;
2829
import org.apache.druid.java.util.metrics.cgroups.ProcSelfCgroupDiscoverer;
2930

@@ -41,37 +42,32 @@ public class CgroupCpuMonitor extends FeedDefiningMonitor
4142
final CgroupDiscoverer cgroupDiscoverer;
4243
final Map<String, String[]> dimensions;
4344
private Long userHz;
44-
private KeyedDiff jiffies = new KeyedDiff();
45+
private final KeyedDiff jiffies = new KeyedDiff();
4546
private long prevJiffiesSnapshotAt = 0;
47+
private final boolean isRunningOnCgroupsV2;
48+
private final CgroupV2CpuMonitor cgroupV2CpuMonitor;
4649

4750
public CgroupCpuMonitor(CgroupDiscoverer cgroupDiscoverer, final Map<String, String[]> dimensions, String feed)
4851
{
4952
super(feed);
5053
this.cgroupDiscoverer = cgroupDiscoverer;
5154
this.dimensions = dimensions;
52-
try {
53-
Process p = new ProcessBuilder("getconf", "CLK_TCK").start();
54-
try (BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream(), StandardCharsets.UTF_8))) {
55-
String line = in.readLine();
56-
if (line != null) {
57-
userHz = Long.valueOf(line.trim());
58-
}
59-
}
60-
}
61-
catch (IOException | NumberFormatException e) {
62-
LOG.warn(e, "Error getting the USER_HZ value");
63-
}
64-
finally {
65-
if (userHz == null) {
66-
LOG.warn("Using default value for USER_HZ");
67-
userHz = DEFAULT_USER_HZ;
68-
}
55+
56+
// Check if we're running on cgroups v2
57+
this.isRunningOnCgroupsV2 = cgroupDiscoverer.getCgroupVersion().equals(CgroupVersion.V2);
58+
if (isRunningOnCgroupsV2) {
59+
this.cgroupV2CpuMonitor = new CgroupV2CpuMonitor(cgroupDiscoverer, dimensions, feed);
60+
LOG.info("Detected cgroups v2, using CgroupV2CpuMonitor behavior for accurate metrics");
61+
} else {
62+
this.cgroupV2CpuMonitor = null;
63+
initUzerHz();
6964
}
65+
7066
}
7167

7268
public CgroupCpuMonitor(final Map<String, String[]> dimensions, String feed)
7369
{
74-
this(new ProcSelfCgroupDiscoverer(), dimensions, feed);
70+
this(ProcSelfCgroupDiscoverer.autoCgroupDiscoverer(), dimensions, feed);
7571
}
7672

7773
public CgroupCpuMonitor(final Map<String, String[]> dimensions)
@@ -87,16 +83,26 @@ public CgroupCpuMonitor()
8783
@Override
8884
public boolean doMonitor(ServiceEmitter emitter)
8985
{
90-
final Cpu cpu = new Cpu(cgroupDiscoverer);
91-
final Cpu.CpuMetrics cpuSnapshot = cpu.snapshot();
86+
if (isRunningOnCgroupsV2) {
87+
return cgroupV2CpuMonitor.doMonitor(emitter);
88+
} else {
89+
return doMonitorV1(emitter);
90+
}
91+
}
92+
93+
private boolean doMonitorV1(ServiceEmitter emitter)
94+
{
95+
final Cpu.CpuMetrics cpuSnapshot = cgroupDiscoverer.getCpuMetrics();
9296
long now = Instant.now().getEpochSecond();
9397

9498
final ServiceMetricEvent.Builder builder = builder();
9599
MonitorUtils.addDimensionsToBuilder(builder, dimensions);
100+
builder.setDimension("cgroupversion", cgroupDiscoverer.getCgroupVersion().name());
101+
96102
emitter.emit(builder.setMetric("cgroup/cpu/shares", cpuSnapshot.getShares()));
97103
emitter.emit(builder.setMetric(
98104
"cgroup/cpu/cores_quota",
99-
computeProcessorQuota(cpuSnapshot.getQuotaUs(), cpuSnapshot.getPeriodUs())
105+
CgroupUtil.computeProcessorQuota(cpuSnapshot.getQuotaUs(), cpuSnapshot.getPeriodUs())
100106
));
101107

102108
long elapsedJiffiesSnapshotSecs = now - prevJiffiesSnapshotAt;
@@ -122,18 +128,25 @@ public boolean doMonitor(ServiceEmitter emitter)
122128
return true;
123129
}
124130

125-
/**
126-
* Calculates the total cores allocated through quotas. A negative value indicates that no quota has been specified.
127-
* We use -1 because that's the default value used in the cgroup.
128-
*
129-
* @param quotaUs the cgroup quota value.
130-
* @param periodUs the cgroup period value.
131-
* @return the calculated processor quota, -1 if no quota or period set.
132-
*/
133-
public static double computeProcessorQuota(long quotaUs, long periodUs)
131+
private void initUzerHz()
134132
{
135-
return quotaUs < 0 || periodUs == 0
136-
? -1
137-
: (double) quotaUs / periodUs;
133+
try {
134+
Process p = new ProcessBuilder("getconf", "CLK_TCK").start();
135+
try (BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream(), StandardCharsets.UTF_8))) {
136+
String line = in.readLine();
137+
if (line != null) {
138+
userHz = Long.valueOf(line.trim());
139+
}
140+
}
141+
}
142+
catch (IOException | NumberFormatException e) {
143+
LOG.warn(e, "Error getting the USER_HZ value");
144+
}
145+
finally {
146+
if (userHz == null) {
147+
LOG.warn("Using default value for USER_HZ");
148+
userHz = DEFAULT_USER_HZ;
149+
}
150+
}
138151
}
139152
}

processing/src/main/java/org/apache/druid/java/util/metrics/CgroupCpuSetMonitor.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@
2828

2929
import java.util.Map;
3030

31+
/**
32+
* Monitor that reports CPU set metrics from cgroups both v1 and v2.
33+
*/
34+
3135
public class CgroupCpuSetMonitor extends FeedDefiningMonitor
3236
{
3337
final CgroupDiscoverer cgroupDiscoverer;
@@ -42,7 +46,7 @@ public CgroupCpuSetMonitor(CgroupDiscoverer cgroupDiscoverer, final Map<String,
4246

4347
public CgroupCpuSetMonitor(final Map<String, String[]> dimensions, String feed)
4448
{
45-
this(new ProcSelfCgroupDiscoverer(), dimensions, feed);
49+
this(ProcSelfCgroupDiscoverer.autoCgroupDiscoverer(), dimensions, feed);
4650
}
4751

4852
public CgroupCpuSetMonitor(final Map<String, String[]> dimensions)
@@ -58,11 +62,10 @@ public CgroupCpuSetMonitor()
5862
@Override
5963
public boolean doMonitor(ServiceEmitter emitter)
6064
{
61-
final CpuSet cpuset = new CpuSet(cgroupDiscoverer);
62-
final CpuSet.CpuSetMetric cpusetSnapshot = cpuset.snapshot();
63-
65+
final CpuSet.CpuSetMetric cpusetSnapshot = cgroupDiscoverer.getCpuSetMetrics();
6466
final ServiceMetricEvent.Builder builder = builder();
6567
MonitorUtils.addDimensionsToBuilder(builder, dimensions);
68+
builder.setDimension("cgroupversion", cgroupDiscoverer.getCgroupVersion().name());
6669
emitter.emit(builder.setMetric(
6770
"cgroup/cpuset/cpu_count",
6871
cpusetSnapshot.getCpuSetCpus().length

processing/src/main/java/org/apache/druid/java/util/metrics/CgroupDiskMonitor.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,44 @@
2020
package org.apache.druid.java.util.metrics;
2121

2222
import com.google.common.collect.ImmutableMap;
23+
import org.apache.druid.java.util.common.logger.Logger;
2324
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
2425
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
2526
import org.apache.druid.java.util.metrics.cgroups.CgroupDiscoverer;
27+
import org.apache.druid.java.util.metrics.cgroups.CgroupVersion;
2628
import org.apache.druid.java.util.metrics.cgroups.Disk;
2729
import org.apache.druid.java.util.metrics.cgroups.ProcSelfCgroupDiscoverer;
2830

2931
import java.util.Map;
3032

3133
public class CgroupDiskMonitor extends FeedDefiningMonitor
3234
{
35+
private static final Logger LOG = new Logger(CgroupDiskMonitor.class);
3336
final CgroupDiscoverer cgroupDiscoverer;
3437
final Map<String, String[]> dimensions;
3538
private final KeyedDiff diff = new KeyedDiff();
39+
private final boolean isRunningOnCgroupsV2;
40+
private final CgroupV2DiskMonitor cgroupV2DiskMonitor;
3641

3742
public CgroupDiskMonitor(CgroupDiscoverer cgroupDiscoverer, final Map<String, String[]> dimensions, String feed)
3843
{
3944
super(feed);
4045
this.cgroupDiscoverer = cgroupDiscoverer;
4146
this.dimensions = dimensions;
47+
48+
// Check if we're running on cgroups v2
49+
this.isRunningOnCgroupsV2 = cgroupDiscoverer.getCgroupVersion().equals(CgroupVersion.V2);
50+
if (isRunningOnCgroupsV2) {
51+
this.cgroupV2DiskMonitor = new CgroupV2DiskMonitor(cgroupDiscoverer, dimensions, feed);
52+
LOG.info("Detected cgroups v2, using CgroupV2DiskMonitor behavior for accurate metrics");
53+
} else {
54+
this.cgroupV2DiskMonitor = null;
55+
}
4256
}
4357

4458
public CgroupDiskMonitor(final Map<String, String[]> dimensions, String feed)
4559
{
46-
this(new ProcSelfCgroupDiscoverer(), dimensions, feed);
60+
this(ProcSelfCgroupDiscoverer.autoCgroupDiscoverer(), dimensions, feed);
4761
}
4862

4963
public CgroupDiskMonitor(final Map<String, String[]> dimensions)
@@ -58,6 +72,15 @@ public CgroupDiskMonitor()
5872

5973
@Override
6074
public boolean doMonitor(ServiceEmitter emitter)
75+
{
76+
if (isRunningOnCgroupsV2) {
77+
return cgroupV2DiskMonitor.doMonitor(emitter);
78+
} else {
79+
return doMonitorV1(emitter);
80+
}
81+
}
82+
83+
private boolean doMonitorV1(ServiceEmitter emitter)
6184
{
6285
Map<String, Disk.Metrics> snapshot = new Disk(cgroupDiscoverer).snapshot();
6386
for (Map.Entry<String, Disk.Metrics> entry : snapshot.entrySet()) {
@@ -75,6 +98,7 @@ public boolean doMonitor(ServiceEmitter emitter)
7598
final ServiceMetricEvent.Builder builder = builder()
7699
.setDimension("diskName", entry.getValue().getDiskName());
77100
MonitorUtils.addDimensionsToBuilder(builder, dimensions);
101+
builder.setDimension("cgroupversion", cgroupDiscoverer.getCgroupVersion());
78102
for (Map.Entry<String, Long> stat : stats.entrySet()) {
79103
emitter.emit(builder.setMetric(stat.getKey(), stat.getValue()));
80104
}

processing/src/main/java/org/apache/druid/java/util/metrics/CgroupMemoryMonitor.java

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,29 +21,47 @@
2121

2222
import com.google.common.collect.ImmutableMap;
2323
import org.apache.druid.java.util.common.StringUtils;
24+
import org.apache.druid.java.util.common.logger.Logger;
2425
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
2526
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
2627
import org.apache.druid.java.util.metrics.cgroups.CgroupDiscoverer;
28+
import org.apache.druid.java.util.metrics.cgroups.CgroupVersion;
2729
import org.apache.druid.java.util.metrics.cgroups.Memory;
2830
import org.apache.druid.java.util.metrics.cgroups.ProcSelfCgroupDiscoverer;
2931

3032
import java.util.Map;
3133

3234
public class CgroupMemoryMonitor extends FeedDefiningMonitor
3335
{
36+
private static final Logger LOG = new Logger(CgroupMemoryMonitor.class);
37+
private static final String MEMORY_USAGE_FILE = "memory.usage_in_bytes";
38+
private static final String MEMORY_LIMIT_FILE = "memory.limit_in_bytes";
39+
3440
final CgroupDiscoverer cgroupDiscoverer;
3541
final Map<String, String[]> dimensions;
42+
private final boolean isRunningOnCgroupsV2;
43+
private final CgroupV2MemoryMonitor cgroupV2MemoryMonitor;
44+
3645

3746
public CgroupMemoryMonitor(CgroupDiscoverer cgroupDiscoverer, final Map<String, String[]> dimensions, String feed)
3847
{
3948
super(feed);
4049
this.cgroupDiscoverer = cgroupDiscoverer;
4150
this.dimensions = dimensions;
51+
52+
// Check if we're running on cgroups v2
53+
this.isRunningOnCgroupsV2 = cgroupDiscoverer.getCgroupVersion().equals(CgroupVersion.V2);
54+
if (isRunningOnCgroupsV2) {
55+
this.cgroupV2MemoryMonitor = new CgroupV2MemoryMonitor(cgroupDiscoverer, dimensions, feed);
56+
LOG.info("Detected cgroups v2, using CgroupV2MemoryMonitor behavior for accurate metrics");
57+
} else {
58+
this.cgroupV2MemoryMonitor = null;
59+
}
4260
}
4361

4462
public CgroupMemoryMonitor(final Map<String, String[]> dimensions, String feed)
4563
{
46-
this(new ProcSelfCgroupDiscoverer(), dimensions, feed);
64+
this(ProcSelfCgroupDiscoverer.autoCgroupDiscoverer(), dimensions, feed);
4765
}
4866

4967
public CgroupMemoryMonitor(final Map<String, String[]> dimensions)
@@ -58,11 +76,31 @@ public CgroupMemoryMonitor()
5876

5977
@Override
6078
public boolean doMonitor(ServiceEmitter emitter)
79+
{
80+
if (isRunningOnCgroupsV2) {
81+
return cgroupV2MemoryMonitor.doMonitor(emitter);
82+
} else {
83+
return parseAndEmit(emitter, cgroupDiscoverer, dimensions, MEMORY_USAGE_FILE, MEMORY_LIMIT_FILE, this);
84+
}
85+
}
86+
87+
/**
88+
* Common metric parser and emitter for both v1 and v2 cgroupMemory monitors.
89+
*/
90+
public static boolean parseAndEmit(
91+
ServiceEmitter emitter,
92+
CgroupDiscoverer cgroupDiscoverer,
93+
Map<String, String[]> dimensions,
94+
String memoryUsageFile,
95+
String memoryLimitFile,
96+
FeedDefiningMonitor feedDefiningMonitor
97+
)
6198
{
6299
final Memory memory = new Memory(cgroupDiscoverer);
63-
final Memory.MemoryStat stat = memory.snapshot(memoryUsageFile(), memoryLimitFile());
64-
final ServiceMetricEvent.Builder builder = builder();
100+
final Memory.MemoryStat stat = memory.snapshot(memoryUsageFile, memoryLimitFile);
101+
final ServiceMetricEvent.Builder builder = feedDefiningMonitor.builder();
65102
MonitorUtils.addDimensionsToBuilder(builder, dimensions);
103+
builder.setDimension("cgroupversion", cgroupDiscoverer.getCgroupVersion().name());
66104
emitter.emit(builder.setMetric("cgroup/memory/usage/bytes", stat.getUsage()));
67105
emitter.emit(builder.setMetric("cgroup/memory/limit/bytes", stat.getLimit()));
68106

@@ -72,19 +110,10 @@ public boolean doMonitor(ServiceEmitter emitter)
72110
emitter.emit(builder.setMetric(StringUtils.format("cgroup/memory/%s", key), value));
73111
});
74112
stat.getNumaMemoryStats().forEach((key, value) -> {
75-
builder().setDimension("numaZone", Long.toString(key));
113+
feedDefiningMonitor.builder().setDimension("numaZone", Long.toString(key));
76114
value.forEach((k, v) -> emitter.emit(builder.setMetric(StringUtils.format("cgroup/memory_numa/%s/pages", k), v)));
77115
});
78116
return true;
79117
}
80118

81-
public String memoryUsageFile()
82-
{
83-
return "memory.usage_in_bytes";
84-
}
85-
86-
public String memoryLimitFile()
87-
{
88-
return "memory.limit_in_bytes";
89-
}
90119
}

processing/src/main/java/org/apache/druid/java/util/metrics/CgroupUtil.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,20 @@ public static long readLongValue(CgroupDiscoverer discoverer, String cgroup, Str
5858
return defaultValue;
5959
}
6060
}
61+
62+
/**
63+
* Calculates the total cores allocated through quotas. A negative value indicates that no quota has been specified.
64+
* We use -1 because that's the default value used in the cgroup.
65+
*
66+
* @param quotaUs the cgroup quota value.
67+
* @param periodUs the cgroup period value.
68+
* @return the calculated processor quota, -1 if no quota or period set.
69+
*/
70+
public static double computeProcessorQuota(long quotaUs, long periodUs)
71+
{
72+
return quotaUs < 0 || periodUs == 0
73+
? -1
74+
: (double) quotaUs / periodUs;
75+
}
76+
6177
}

0 commit comments

Comments
 (0)