diff --git a/metrics/metrics.go b/metrics/metrics.go index 2b8884939d..611928951e 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -17,7 +17,9 @@ limitations under the License. package metrics import ( + "strings" "sync" + "unicode" "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" @@ -29,6 +31,43 @@ const ( DeschedulerSubsystem = "descheduler" ) +// MetricsHandler provides a smart wrapper for metrics that handles export logic +type MetricsHandler interface { + // WithLabelValues is equivalent to metrics.WithLabelValues but respects ShouldExportMetrics + WithLabelValues(lvs ...string) MetricsHandler + // Set sets the gauge value (for GaugeVec metrics) + Set(val float64) + // Inc increments the counter (for CounterVec metrics) + Inc() + // Add adds a value to the counter (for CounterVec metrics) + Add(val float64) + // Observe observes a value for histogram (for HistogramVec metrics) + Observe(val float64) +} + +// normalizePluginName converts a plugin name to a normalized form for use in metrics subsystems +// Examples: "LowNodeUtilization" -> "low_node_utilization", "RemovePodsViolatingNodeTaints" -> "remove_pods_violating_node_taints" +func normalizePluginName(pluginName string) string { + var result strings.Builder + for i, r := range pluginName { + if unicode.IsUpper(r) && i > 0 { + result.WriteRune('_') + } + result.WriteRune(unicode.ToLower(r)) + } + return result.String() +} + +// getPluginSubsystem returns the subsystem name for a plugin, optionally including profile name +func getPluginSubsystem(profileName, pluginName string) string { + normalizedPluginName := normalizePluginName(pluginName) + if profileName == "" { + return DeschedulerSubsystem + "_" + normalizedPluginName + } + normalizedProfileName := normalizePluginName(profileName) + return DeschedulerSubsystem + "_" + normalizedProfileName + "_" + normalizedPluginName +} + var ( PodsEvicted = metrics.NewCounterVec( &metrics.CounterOpts{ @@ -105,6 +144,139 @@ var ( var registerMetrics sync.Once +type PluginMetricsRegistry struct { + pluginMetricsMap map[string]map[string]interface{} // plugin -> metric name -> metric object + shouldExport bool + mutex sync.RWMutex +} + +func NewPluginMetricsRegistry() *PluginMetricsRegistry { + return &PluginMetricsRegistry{ + pluginMetricsMap: make(map[string]map[string]interface{}), + shouldExport: true, // Default to true, can be updated later + } +} + +func (r *PluginMetricsRegistry) SetShouldExport(shouldExport bool) { + r.mutex.Lock() + defer r.mutex.Unlock() + r.shouldExport = shouldExport +} + +func (r *PluginMetricsRegistry) ShouldExport() bool { + r.mutex.RLock() + defer r.mutex.RUnlock() + return r.shouldExport +} + +// RegisterMetricsWithNames registers metrics for a specific plugin with name mapping +func (r *PluginMetricsRegistry) RegisterNamedPluginMetrics(pluginName string, namedMetrics map[string]metrics.Registerable) { + r.mutex.Lock() + defer r.mutex.Unlock() + + // Initialize the plugin metrics map if needed + if r.pluginMetricsMap[pluginName] == nil { + r.pluginMetricsMap[pluginName] = make(map[string]interface{}) + } + + for name, metric := range namedMetrics { + // Store in the metrics map + r.pluginMetricsMap[pluginName][name] = metric + + // Register the metric + legacyregistry.MustRegister(metric) + } +} + +func (r *PluginMetricsRegistry) GetPluginSubsystem(profileName, pluginName string) string { + return getPluginSubsystem(profileName, pluginName) +} + +func (r *PluginMetricsRegistry) GetPluginMetric(pluginName, metricName string) interface{} { + r.mutex.RLock() + defer r.mutex.RUnlock() + + if pluginMap, exists := r.pluginMetricsMap[pluginName]; exists { + if metric, exists := pluginMap[metricName]; exists { + return metric + } + } + return nil +} + +func (r *PluginMetricsRegistry) HandlePluginMetric(pluginName, metricName string) MetricsHandler { + metric := r.GetPluginMetric(pluginName, metricName) + return newMetricsHandler(metric, r.ShouldExport()) +} + +type metricsHandler struct { + metric interface{} // Could be *metrics.GaugeVec, *metrics.CounterVec, etc. + shouldExport bool + currentLabels []string +} + +func newMetricsHandler(metric interface{}, shouldExport bool) *metricsHandler { + return &metricsHandler{ + metric: metric, + shouldExport: shouldExport, + } +} + +func (h *metricsHandler) WithLabelValues(lvs ...string) MetricsHandler { + if !h.shouldExport || h.metric == nil { + return h // Return no-op handler + } + + newHandler := &metricsHandler{ + metric: h.metric, + shouldExport: h.shouldExport, + currentLabels: lvs, + } + return newHandler +} + +func (h *metricsHandler) Set(val float64) { + if !h.shouldExport || h.metric == nil { + return + } + + if gaugeVec, ok := h.metric.(*metrics.GaugeVec); ok { + gaugeVec.WithLabelValues(h.currentLabels...).Set(val) + } +} + +func (h *metricsHandler) Inc() { + if !h.shouldExport || h.metric == nil { + return + } + + if counterVec, ok := h.metric.(*metrics.CounterVec); ok { + counterVec.WithLabelValues(h.currentLabels...).Inc() + } +} + +func (h *metricsHandler) Add(val float64) { + if !h.shouldExport || h.metric == nil { + return + } + + if counterVec, ok := h.metric.(*metrics.CounterVec); ok { + counterVec.WithLabelValues(h.currentLabels...).Add(val) + } +} + +func (h *metricsHandler) Observe(val float64) { + if !h.shouldExport || h.metric == nil { + return + } + + if histogramVec, ok := h.metric.(*metrics.HistogramVec); ok { + histogramVec.WithLabelValues(h.currentLabels...).Observe(val) + } +} + +var PluginRegistry = NewPluginMetricsRegistry() + // Register all metrics. func Register() { // Register the metrics. diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go new file mode 100644 index 0000000000..0801734504 --- /dev/null +++ b/metrics/metrics_test.go @@ -0,0 +1,84 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "testing" +) + +func TestNormalizePluginName(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"LowNodeUtilization", "low_node_utilization"}, + {"RemovePodsViolatingNodeTaints", "remove_pods_violating_node_taints"}, + {"HighNodeUtilization", "high_node_utilization"}, + {"RemoveDuplicates", "remove_duplicates"}, + {"simple", "simple"}, + {"SimplePlugin", "simple_plugin"}, + {"ABC", "a_b_c"}, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + result := normalizePluginName(test.input) + if result != test.expected { + t.Errorf("normalizePluginName(%q) = %q, expected %q", test.input, result, test.expected) + } + }) + } +} + +func TestPluginMetricsRegistry_GetPluginSubsystem(t *testing.T) { + registry := NewPluginMetricsRegistry() + + tests := []struct { + name string + profileName string + pluginName string + expected string + }{ + { + name: "plugin without a profile", + profileName: "", + pluginName: "LowNodeUtilization", + expected: "descheduler_low_node_utilization", + }, + { + name: "well known profile with a plugin", + profileName: "ProfileA", + pluginName: "LowNodeUtilization", + expected: "descheduler_profile_a_low_node_utilization", + }, + { + name: "same plugin registered by a different profile", + profileName: "CustomProfile", + pluginName: "LowNodeUtilization", + expected: "descheduler_custom_profile_low_node_utilization", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := registry.GetPluginSubsystem(tt.profileName, tt.pluginName) + if result != tt.expected { + t.Errorf("GetPluginSubsystem(%s, %s) = %q, expected %q", tt.profileName, tt.pluginName, result, tt.expected) + } + }) + } +} diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index b21d5c2227..b0406adce5 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -425,6 +425,7 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac frameworkprofile.WithGetPodsAssignedToNodeFnc(d.getPodsAssignedToNode), frameworkprofile.WithMetricsCollector(d.metricsCollector), frameworkprofile.WithPrometheusClient(d.prometheusClient), + frameworkprofile.WithShouldExportMetrics(!d.rs.DisableMetrics), ) if err != nil { klog.ErrorS(err, "unable to create a profile", "profile", profile.Name) diff --git a/pkg/framework/fake/fake.go b/pkg/framework/fake/fake.go index 681a9275bf..831886a9de 100644 --- a/pkg/framework/fake/fake.go +++ b/pkg/framework/fake/fake.go @@ -23,6 +23,7 @@ type HandleImpl struct { PodEvictorImpl *evictions.PodEvictor MetricsCollectorImpl *metricscollector.MetricsCollector PrometheusClientImpl promapi.Client + ShouldExportMetricsImpl bool } var _ frameworktypes.Handle = &HandleImpl{} diff --git a/pkg/framework/plugins/example/example.go b/pkg/framework/plugins/example/example.go index a883ce6493..6c645fc56e 100644 --- a/pkg/framework/plugins/example/example.go +++ b/pkg/framework/plugins/example/example.go @@ -184,3 +184,6 @@ func (d *Example) Deschedule(ctx context.Context, nodes []*v1.Node) *fwtypes.Sta logger.Info("Example plugin finished descheduling") return nil } + +// TODO: add an example metric +// document how to register metrics in README.md diff --git a/pkg/framework/plugins/nodeutilization/lownodeutilization.go b/pkg/framework/plugins/nodeutilization/lownodeutilization.go index 5748e73778..6f3db26264 100644 --- a/pkg/framework/plugins/nodeutilization/lownodeutilization.go +++ b/pkg/framework/plugins/nodeutilization/lownodeutilization.go @@ -19,11 +19,13 @@ package nodeutilization import ( "context" "fmt" + "strconv" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/klog/v2" + "sigs.k8s.io/descheduler/metrics" "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node" @@ -35,9 +37,10 @@ import ( const LowNodeUtilizationPluginName = "LowNodeUtilization" -// this lines makes sure that HighNodeUtilization implements the BalancePlugin -// interface. +// this lines makes sure that LowNodeUtilization implements the BalancePlugin +// and Metrics interfaces. var _ frameworktypes.BalancePlugin = &LowNodeUtilization{} +var _ frameworktypes.Metrics = &LowNodeUtilization{} // LowNodeUtilization evicts pods from overutilized nodes to underutilized // nodes. Note that CPU/Memory requests are used to calculate nodes' @@ -134,6 +137,16 @@ func (l *LowNodeUtilization) Name() string { return LowNodeUtilizationPluginName } +// HandleMetric returns a metrics handler for the specified metric name +func (l *LowNodeUtilization) HandleMetric(metricName string) metrics.MetricsHandler { + return metrics.PluginRegistry.HandlePluginMetric(LowNodeUtilizationPluginName, metricName) +} + +// RegisterMetrics registers the plugin's metrics +func (l *LowNodeUtilization) RegisterMetrics(profileName string) { + RegisterMetrics(metrics.PluginRegistry, profileName) +} + // Balance holds the main logic of the plugin. It evicts pods from over // utilized nodes to under utilized nodes. The goal here is to evenly // distribute pods across nodes. @@ -179,6 +192,14 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra ) } + for key, rt := range thresholds { + for i, t := range rt { + for resource, value := range t { + l.HandleMetric("thresholds").WithLabelValues(key, strconv.Itoa(i), resource.String()).Set(float64(value)) + } + } + } + // classify nodes in under and over utilized. we will later try to move // pods from the overutilized nodes to the underutilized ones. nodeGroups := classifier.Classify( @@ -255,6 +276,10 @@ func (l *LowNodeUtilization) Balance(ctx context.Context, nodes []*v1.Node) *fra logger.V(1).Info("Criteria for a node above target utilization", l.overCriteria...) logger.V(1).Info("Number of overutilized nodes", "totalNumber", len(highNodes)) + // Export classification metrics + l.HandleMetric("classification").WithLabelValues("underutilized").Set(float64(len(lowNodes))) + l.HandleMetric("classification").WithLabelValues("overutilized").Set(float64(len(highNodes))) + if len(lowNodes) == 0 { logger.V(1).Info( "No node is underutilized, nothing to do here, you might tune your thresholds further", diff --git a/pkg/framework/plugins/nodeutilization/metrics.go b/pkg/framework/plugins/nodeutilization/metrics.go new file mode 100644 index 0000000000..07c3fda23e --- /dev/null +++ b/pkg/framework/plugins/nodeutilization/metrics.go @@ -0,0 +1,64 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeutilization + +import ( + "k8s.io/component-base/metrics" + frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types" +) + +var ( + // ThresholdsMetric tracks thresholds used by the LowNodeUtilization plugin + ThresholdsMetric *metrics.GaugeVec + + // ClassificationMetric tracks number of nodes by class + ClassificationMetric *metrics.GaugeVec +) + +// RegisterMetrics registers the plugin's metrics using the provided registry +func RegisterMetrics(registry frameworktypes.MetricsRegistry, profileName string) { + pluginSubsystem := registry.GetPluginSubsystem(profileName, LowNodeUtilizationPluginName) + + // Initialize metrics with the plugin-specific subsystem + ThresholdsMetric = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: pluginSubsystem, + Name: "thresholds", + Help: "Thresholds used by the LowNodeUtilization to classify nodes by node, by class, by resource", + StabilityLevel: metrics.ALPHA, + }, + []string{"node", "class", "resource"}, + ) + + ClassificationMetric = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: pluginSubsystem, + Name: "classification", + Help: "Number of nodes by class", + StabilityLevel: metrics.ALPHA, + }, + []string{"class"}, + ) + + // Register the metrics with names in the centralized registry + namedMetrics := map[string]metrics.Registerable{ + "thresholds": ThresholdsMetric, + "classification": ClassificationMetric, + } + + registry.RegisterNamedPluginMetrics(LowNodeUtilizationPluginName, namedMetrics) +} diff --git a/pkg/framework/profile/profile.go b/pkg/framework/profile/profile.go index 8cf1f82b04..83ce16117e 100644 --- a/pkg/framework/profile/profile.go +++ b/pkg/framework/profile/profile.go @@ -74,6 +74,7 @@ type handleImpl struct { getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc sharedInformerFactory informers.SharedInformerFactory evictor *evictorImpl + shouldExportMetrics bool } var _ frameworktypes.Handle = &handleImpl{} @@ -142,6 +143,7 @@ type handleImplOpts struct { getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc podEvictor *evictions.PodEvictor metricsCollector *metricscollector.MetricsCollector + shouldExportMetrics bool } // WithClientSet sets clientSet for the scheduling frameworkImpl. @@ -182,6 +184,12 @@ func WithMetricsCollector(metricsCollector *metricscollector.MetricsCollector) O } } +func WithShouldExportMetrics(shouldExportMetrics bool) Option { + return func(o *handleImplOpts) { + o.shouldExportMetrics = shouldExportMetrics + } +} + func getPluginConfig(pluginName string, pluginConfigs []api.PluginConfig) (*api.PluginConfig, int) { for idx, pluginConfig := range pluginConfigs { if pluginConfig.Name == pluginName { @@ -208,6 +216,13 @@ func buildPlugin(ctx context.Context, config api.DeschedulerProfile, pluginName klog.ErrorS(err, "unable to initialize a plugin", "pluginName", pluginName) return nil, fmt.Errorf("unable to initialize %q plugin: %v", pluginName, err) } + + // If the plugin implements Metrics, register its metrics + if metricsPlugin, ok := pg.(frameworktypes.Metrics); ok { + metrics.PluginRegistry.SetShouldExport(handle.shouldExportMetrics) + metricsPlugin.RegisterMetrics(config.Name) + } + return pg, nil } @@ -280,8 +295,9 @@ func NewProfile(ctx context.Context, config api.DeschedulerProfile, reg pluginre profileName: config.Name, podEvictor: hOpts.podEvictor, }, - metricsCollector: hOpts.metricsCollector, - prometheusClient: hOpts.prometheusClient, + metricsCollector: hOpts.metricsCollector, + prometheusClient: hOpts.prometheusClient, + shouldExportMetrics: hOpts.shouldExportMetrics, } pluginNames := append(config.Plugins.Deschedule.Enabled, config.Plugins.Balance.Enabled...) diff --git a/pkg/framework/types/types.go b/pkg/framework/types/types.go index 2480e06b02..9bcf3f1fee 100644 --- a/pkg/framework/types/types.go +++ b/pkg/framework/types/types.go @@ -22,7 +22,8 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/client-go/informers" clientset "k8s.io/client-go/kubernetes" - + "k8s.io/component-base/metrics" + deschedulermetrics "sigs.k8s.io/descheduler/metrics" "sigs.k8s.io/descheduler/pkg/descheduler/evictions" "sigs.k8s.io/descheduler/pkg/descheduler/metricscollector" podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod" @@ -43,6 +44,20 @@ type Handle interface { MetricsCollector() *metricscollector.MetricsCollector } +type MetricsRegistry interface { + RegisterNamedPluginMetrics(pluginName string, namedMetrics map[string]metrics.Registerable) + GetPluginSubsystem(profileName, pluginName string) string +} + +// Metrics is an optional interface that plugins can implement to register and expose their own metrics +type Metrics interface { + Plugin + // HandleMetric returns a metrics handler for the specified metric, by name + HandleMetric(metricName string) deschedulermetrics.MetricsHandler + // RegisterMetrics registers the plugin's metrics for the given profile + RegisterMetrics(profileName string) +} + // Evictor defines an interface for filtering and evicting pods // while abstracting away the specific pod evictor/evictor filter. type Evictor interface {