Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 172 additions & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ limitations under the License.
package metrics

import (
"strings"
"sync"
"unicode"

"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
Expand All @@ -29,6 +31,43 @@ const (
DeschedulerSubsystem = "descheduler"
)

// MetricsHandler provides a smart wrapper for metrics that handles export logic
type MetricsHandler interface {
// WithLabelValues is equivalent to metrics.WithLabelValues but respects ShouldExportMetrics
WithLabelValues(lvs ...string) MetricsHandler
// Set sets the gauge value (for GaugeVec metrics)
Set(val float64)
// Inc increments the counter (for CounterVec metrics)
Inc()
// Add adds a value to the counter (for CounterVec metrics)
Add(val float64)
// Observe observes a value for histogram (for HistogramVec metrics)
Observe(val float64)
}

// normalizePluginName converts a plugin name to a normalized form for use in metrics subsystems
// Examples: "LowNodeUtilization" -> "low_node_utilization", "RemovePodsViolatingNodeTaints" -> "remove_pods_violating_node_taints"
func normalizePluginName(pluginName string) string {
var result strings.Builder
for i, r := range pluginName {
if unicode.IsUpper(r) && i > 0 {
result.WriteRune('_')
}
result.WriteRune(unicode.ToLower(r))
}
return result.String()
}

// getPluginSubsystem returns the subsystem name for a plugin, optionally including profile name
func getPluginSubsystem(profileName, pluginName string) string {
normalizedPluginName := normalizePluginName(pluginName)
if profileName == "" {
return DeschedulerSubsystem + "_" + normalizedPluginName
}
normalizedProfileName := normalizePluginName(profileName)
return DeschedulerSubsystem + "_" + normalizedProfileName + "_" + normalizedPluginName
}

var (
PodsEvicted = metrics.NewCounterVec(
&metrics.CounterOpts{
Expand Down Expand Up @@ -105,6 +144,139 @@ var (

var registerMetrics sync.Once

type PluginMetricsRegistry struct {
pluginMetricsMap map[string]map[string]interface{} // plugin -> metric name -> metric object
shouldExport bool
mutex sync.RWMutex
}

func NewPluginMetricsRegistry() *PluginMetricsRegistry {
return &PluginMetricsRegistry{
pluginMetricsMap: make(map[string]map[string]interface{}),
shouldExport: true, // Default to true, can be updated later
}
}

func (r *PluginMetricsRegistry) SetShouldExport(shouldExport bool) {
r.mutex.Lock()
defer r.mutex.Unlock()
r.shouldExport = shouldExport
}

func (r *PluginMetricsRegistry) ShouldExport() bool {
r.mutex.RLock()
defer r.mutex.RUnlock()
return r.shouldExport
}

// RegisterMetricsWithNames registers metrics for a specific plugin with name mapping
func (r *PluginMetricsRegistry) RegisterNamedPluginMetrics(pluginName string, namedMetrics map[string]metrics.Registerable) {
r.mutex.Lock()
defer r.mutex.Unlock()

// Initialize the plugin metrics map if needed
if r.pluginMetricsMap[pluginName] == nil {
r.pluginMetricsMap[pluginName] = make(map[string]interface{})
}

for name, metric := range namedMetrics {
// Store in the metrics map
r.pluginMetricsMap[pluginName][name] = metric

// Register the metric
legacyregistry.MustRegister(metric)
}
}

func (r *PluginMetricsRegistry) GetPluginSubsystem(profileName, pluginName string) string {
return getPluginSubsystem(profileName, pluginName)
}

func (r *PluginMetricsRegistry) GetPluginMetric(pluginName, metricName string) interface{} {
r.mutex.RLock()
defer r.mutex.RUnlock()

if pluginMap, exists := r.pluginMetricsMap[pluginName]; exists {
if metric, exists := pluginMap[metricName]; exists {
return metric
}
}
return nil
}

func (r *PluginMetricsRegistry) HandlePluginMetric(pluginName, metricName string) MetricsHandler {
metric := r.GetPluginMetric(pluginName, metricName)
return newMetricsHandler(metric, r.ShouldExport())
}

type metricsHandler struct {
metric interface{} // Could be *metrics.GaugeVec, *metrics.CounterVec, etc.
shouldExport bool
currentLabels []string
}

func newMetricsHandler(metric interface{}, shouldExport bool) *metricsHandler {
return &metricsHandler{
metric: metric,
shouldExport: shouldExport,
}
}

func (h *metricsHandler) WithLabelValues(lvs ...string) MetricsHandler {
if !h.shouldExport || h.metric == nil {
return h // Return no-op handler
}

newHandler := &metricsHandler{
metric: h.metric,
shouldExport: h.shouldExport,
currentLabels: lvs,
}
return newHandler
}

func (h *metricsHandler) Set(val float64) {
if !h.shouldExport || h.metric == nil {
return
}

if gaugeVec, ok := h.metric.(*metrics.GaugeVec); ok {
gaugeVec.WithLabelValues(h.currentLabels...).Set(val)
}
}

func (h *metricsHandler) Inc() {
if !h.shouldExport || h.metric == nil {
return
}

if counterVec, ok := h.metric.(*metrics.CounterVec); ok {
counterVec.WithLabelValues(h.currentLabels...).Inc()
}
}

func (h *metricsHandler) Add(val float64) {
if !h.shouldExport || h.metric == nil {
return
}

if counterVec, ok := h.metric.(*metrics.CounterVec); ok {
counterVec.WithLabelValues(h.currentLabels...).Add(val)
}
}

func (h *metricsHandler) Observe(val float64) {
if !h.shouldExport || h.metric == nil {
return
}

if histogramVec, ok := h.metric.(*metrics.HistogramVec); ok {
histogramVec.WithLabelValues(h.currentLabels...).Observe(val)
}
}

var PluginRegistry = NewPluginMetricsRegistry()

// Register all metrics.
func Register() {
// Register the metrics.
Expand Down
84 changes: 84 additions & 0 deletions metrics/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
Copyright 2025 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"testing"
)

func TestNormalizePluginName(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"LowNodeUtilization", "low_node_utilization"},
{"RemovePodsViolatingNodeTaints", "remove_pods_violating_node_taints"},
{"HighNodeUtilization", "high_node_utilization"},
{"RemoveDuplicates", "remove_duplicates"},
{"simple", "simple"},
{"SimplePlugin", "simple_plugin"},
{"ABC", "a_b_c"},
}

for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
result := normalizePluginName(test.input)
if result != test.expected {
t.Errorf("normalizePluginName(%q) = %q, expected %q", test.input, result, test.expected)
}
})
}
}

func TestPluginMetricsRegistry_GetPluginSubsystem(t *testing.T) {
registry := NewPluginMetricsRegistry()

tests := []struct {
name string
profileName string
pluginName string
expected string
}{
{
name: "plugin without a profile",
profileName: "",
pluginName: "LowNodeUtilization",
expected: "descheduler_low_node_utilization",
},
{
name: "well known profile with a plugin",
profileName: "ProfileA",
pluginName: "LowNodeUtilization",
expected: "descheduler_profile_a_low_node_utilization",
},
{
name: "same plugin registered by a different profile",
profileName: "CustomProfile",
pluginName: "LowNodeUtilization",
expected: "descheduler_custom_profile_low_node_utilization",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := registry.GetPluginSubsystem(tt.profileName, tt.pluginName)
if result != tt.expected {
t.Errorf("GetPluginSubsystem(%s, %s) = %q, expected %q", tt.profileName, tt.pluginName, result, tt.expected)
}
})
}
}
1 change: 1 addition & 0 deletions pkg/descheduler/descheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
frameworkprofile.WithGetPodsAssignedToNodeFnc(d.getPodsAssignedToNode),
frameworkprofile.WithMetricsCollector(d.metricsCollector),
frameworkprofile.WithPrometheusClient(d.prometheusClient),
frameworkprofile.WithShouldExportMetrics(!d.rs.DisableMetrics),
)
if err != nil {
klog.ErrorS(err, "unable to create a profile", "profile", profile.Name)
Expand Down
1 change: 1 addition & 0 deletions pkg/framework/fake/fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ type HandleImpl struct {
PodEvictorImpl *evictions.PodEvictor
MetricsCollectorImpl *metricscollector.MetricsCollector
PrometheusClientImpl promapi.Client
ShouldExportMetricsImpl bool
}

var _ frameworktypes.Handle = &HandleImpl{}
Expand Down
3 changes: 3 additions & 0 deletions pkg/framework/plugins/example/example.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,6 @@ func (d *Example) Deschedule(ctx context.Context, nodes []*v1.Node) *fwtypes.Sta
logger.Info("Example plugin finished descheduling")
return nil
}

// TODO: add an example metric
// document how to register metrics in README.md
29 changes: 27 additions & 2 deletions pkg/framework/plugins/nodeutilization/lownodeutilization.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@ package nodeutilization
import (
"context"
"fmt"
"strconv"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog/v2"

"sigs.k8s.io/descheduler/metrics"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
Expand All @@ -35,9 +37,10 @@ import (

const LowNodeUtilizationPluginName = "LowNodeUtilization"

// this lines makes sure that HighNodeUtilization implements the BalancePlugin
// interface.
// this lines makes sure that LowNodeUtilization implements the BalancePlugin
// and Metrics interfaces.
var _ frameworktypes.BalancePlugin = &LowNodeUtilization{}
var _ frameworktypes.Metrics = &LowNodeUtilization{}

// LowNodeUtilization evicts pods from overutilized nodes to underutilized
// nodes. Note that CPU/Memory requests are used to calculate nodes'
Expand Down Expand Up @@ -134,6 +137,16 @@ func (l *LowNodeUtilization) Name() string {
return LowNodeUtilizationPluginName
}

// HandleMetric returns a metrics handler for the specified metric name
func (l *LowNodeUtilization) HandleMetric(metricName string) metrics.MetricsHandler {
return metrics.PluginRegistry.HandlePluginMetric(LowNodeUtilizationPluginName, metricName)
}

// RegisterMetrics registers the plugin's metrics
func (l *LowNodeUtilization) RegisterMetrics(profileName string) {
RegisterMetrics(metrics.PluginRegistry, profileName)
}

// Balance holds the main logic of the plugin. It evicts pods from over
// utilized nodes to under utilized nodes. The goal here is to evenly
// distribute pods across nodes.
Expand Down Expand Up @@ -179,6 +192,14 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
)
}

for key, rt := range thresholds {
for i, t := range rt {
for resource, value := range t {
l.HandleMetric("thresholds").WithLabelValues(key, strconv.Itoa(i), resource.String()).Set(float64(value))
}
}
}

// classify nodes in under and over utilized. we will later try to move
// pods from the overutilized nodes to the underutilized ones.
nodeGroups := classifier.Classify(
Expand Down Expand Up @@ -255,6 +276,10 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra
logger.V(1).Info("Criteria for a node above target utilization", l.overCriteria...)
logger.V(1).Info("Number of overutilized nodes", "totalNumber", len(highNodes))

// Export classification metrics
l.HandleMetric("classification").WithLabelValues("underutilized").Set(float64(len(lowNodes)))
l.HandleMetric("classification").WithLabelValues("overutilized").Set(float64(len(highNodes)))

if len(lowNodes) == 0 {
logger.V(1).Info(
"No node is underutilized, nothing to do here, you might tune your thresholds further",
Expand Down
Loading