Skip to content

Commit e795ac9

Browse files
Add support for all-or-nothing scale-up strategy (kubernetes#6821)
* Add support for all-or-nothing scale-up strategy * Review fixes
1 parent fa9969a commit e795ac9

File tree

12 files changed

+185
-25
lines changed

12 files changed

+185
-25
lines changed

cluster-autoscaler/core/scaleup/equivalence/groups.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@ import (
3030

3131
// PodGroup contains a group of pods that are equivalent in terms of schedulability.
3232
type PodGroup struct {
33-
Pods []*apiv1.Pod
34-
SchedulingErrors map[string]status.Reasons
35-
Schedulable bool
33+
Pods []*apiv1.Pod
34+
SchedulingErrors map[string]status.Reasons
35+
SchedulableGroups []string
36+
Schedulable bool
3637
}
3738

3839
// BuildPodGroups prepares pod groups with equivalent scheduling properties.

cluster-autoscaler/core/scaleup/orchestrator/executor.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,20 @@ func (e *scaleUpExecutor) ExecuteScaleUps(
6161
scaleUpInfos []nodegroupset.ScaleUpInfo,
6262
nodeInfos map[string]*schedulerframework.NodeInfo,
6363
now time.Time,
64+
atomic bool,
6465
) (errors.AutoscalerError, []cloudprovider.NodeGroup) {
6566
options := e.autoscalingContext.AutoscalingOptions
6667
if options.ParallelScaleUp {
67-
return e.executeScaleUpsParallel(scaleUpInfos, nodeInfos, now)
68+
return e.executeScaleUpsParallel(scaleUpInfos, nodeInfos, now, atomic)
6869
}
69-
return e.executeScaleUpsSync(scaleUpInfos, nodeInfos, now)
70+
return e.executeScaleUpsSync(scaleUpInfos, nodeInfos, now, atomic)
7071
}
7172

7273
func (e *scaleUpExecutor) executeScaleUpsSync(
7374
scaleUpInfos []nodegroupset.ScaleUpInfo,
7475
nodeInfos map[string]*schedulerframework.NodeInfo,
7576
now time.Time,
77+
atomic bool,
7678
) (errors.AutoscalerError, []cloudprovider.NodeGroup) {
7779
availableGPUTypes := e.autoscalingContext.CloudProvider.GetAvailableGPUTypes()
7880
for _, scaleUpInfo := range scaleUpInfos {
@@ -81,7 +83,7 @@ func (e *scaleUpExecutor) executeScaleUpsSync(
8183
klog.Errorf("ExecuteScaleUp: failed to get node info for node group %s", scaleUpInfo.Group.Id())
8284
continue
8385
}
84-
if aErr := e.executeScaleUp(scaleUpInfo, nodeInfo, availableGPUTypes, now); aErr != nil {
86+
if aErr := e.executeScaleUp(scaleUpInfo, nodeInfo, availableGPUTypes, now, atomic); aErr != nil {
8587
return aErr, []cloudprovider.NodeGroup{scaleUpInfo.Group}
8688
}
8789
}
@@ -92,6 +94,7 @@ func (e *scaleUpExecutor) executeScaleUpsParallel(
9294
scaleUpInfos []nodegroupset.ScaleUpInfo,
9395
nodeInfos map[string]*schedulerframework.NodeInfo,
9496
now time.Time,
97+
atomic bool,
9598
) (errors.AutoscalerError, []cloudprovider.NodeGroup) {
9699
if err := checkUniqueNodeGroups(scaleUpInfos); err != nil {
97100
return err, extractNodeGroups(scaleUpInfos)
@@ -113,7 +116,7 @@ func (e *scaleUpExecutor) executeScaleUpsParallel(
113116
klog.Errorf("ExecuteScaleUp: failed to get node info for node group %s", info.Group.Id())
114117
return
115118
}
116-
if aErr := e.executeScaleUp(info, nodeInfo, availableGPUTypes, now); aErr != nil {
119+
if aErr := e.executeScaleUp(info, nodeInfo, availableGPUTypes, now, atomic); aErr != nil {
117120
errResults <- errResult{err: aErr, info: &info}
118121
}
119122
}(scaleUpInfo)
@@ -136,19 +139,31 @@ func (e *scaleUpExecutor) executeScaleUpsParallel(
136139
return nil, nil
137140
}
138141

142+
func (e *scaleUpExecutor) increaseSize(nodeGroup cloudprovider.NodeGroup, increase int, atomic bool) error {
143+
if atomic {
144+
if err := nodeGroup.AtomicIncreaseSize(increase); err != cloudprovider.ErrNotImplemented {
145+
return err
146+
}
147+
// If error is cloudprovider.ErrNotImplemented, fall back to non-atomic
148+
// increase - cloud provider doesn't support it.
149+
}
150+
return nodeGroup.IncreaseSize(increase)
151+
}
152+
139153
func (e *scaleUpExecutor) executeScaleUp(
140154
info nodegroupset.ScaleUpInfo,
141155
nodeInfo *schedulerframework.NodeInfo,
142156
availableGPUTypes map[string]struct{},
143157
now time.Time,
158+
atomic bool,
144159
) errors.AutoscalerError {
145160
gpuConfig := e.autoscalingContext.CloudProvider.GetNodeGpuConfig(nodeInfo.Node())
146161
gpuResourceName, gpuType := gpu.GetGpuInfoForMetrics(gpuConfig, availableGPUTypes, nodeInfo.Node(), nil)
147162
klog.V(0).Infof("Scale-up: setting group %s size to %d", info.Group.Id(), info.NewSize)
148163
e.autoscalingContext.LogRecorder.Eventf(apiv1.EventTypeNormal, "ScaledUpGroup",
149164
"Scale-up: setting group %s size to %d instead of %d (max: %d)", info.Group.Id(), info.NewSize, info.CurrentSize, info.MaxSize)
150165
increase := info.NewSize - info.CurrentSize
151-
if err := info.Group.IncreaseSize(increase); err != nil {
166+
if err := e.increaseSize(info.Group, increase, atomic); err != nil {
152167
e.autoscalingContext.LogRecorder.Eventf(apiv1.EventTypeWarning, "FailedToScaleUpGroup", "Scale-up failed for group %s: %v", info.Group.Id(), err)
153168
aerr := errors.ToAutoscalerError(errors.CloudProviderError, err).AddPrefix("failed to increase node group size: ")
154169
e.scaleStateNotifier.RegisterFailedScaleUp(info.Group, string(aerr.Type()), aerr.Error(), gpuResourceName, gpuType, now)

cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ func (o *ScaleUpOrchestrator) ScaleUp(
8989
nodes []*apiv1.Node,
9090
daemonSets []*appsv1.DaemonSet,
9191
nodeInfos map[string]*schedulerframework.NodeInfo,
92+
allOrNothing bool, // Either request enough capacity for all unschedulablePods, or don't request it at all.
9293
) (*status.ScaleUpStatus, errors.AutoscalerError) {
9394
if !o.initialized {
9495
return status.UpdateScaleUpError(&status.ScaleUpStatus{}, errors.NewAutoscalerError(errors.InternalError, "ScaleUpOrchestrator is not initialized"))
@@ -146,11 +147,13 @@ func (o *ScaleUpOrchestrator) ScaleUp(
146147
}
147148

148149
for _, nodeGroup := range validNodeGroups {
149-
option := o.ComputeExpansionOption(nodeGroup, schedulablePodGroups, nodeInfos, len(nodes)+len(upcomingNodes), now)
150+
option := o.ComputeExpansionOption(nodeGroup, schedulablePodGroups, nodeInfos, len(nodes)+len(upcomingNodes), now, allOrNothing)
150151
o.processors.BinpackingLimiter.MarkProcessed(o.autoscalingContext, nodeGroup.Id())
151152

152153
if len(option.Pods) == 0 || option.NodeCount == 0 {
153154
klog.V(4).Infof("No pod can fit to %s", nodeGroup.Id())
155+
} else if allOrNothing && len(option.Pods) < len(unschedulablePods) {
156+
klog.V(4).Infof("Some pods can't fit to %s, giving up due to all-or-nothing scale-up strategy", nodeGroup.Id())
154157
} else {
155158
options = append(options, option)
156159
}
@@ -211,9 +214,26 @@ func (o *ScaleUpOrchestrator) ScaleUp(
211214
aErr)
212215
}
213216

217+
if newNodes < bestOption.NodeCount {
218+
klog.V(1).Infof("Only %d nodes can be added to %s due to cluster-wide limits", newNodes, bestOption.NodeGroup.Id())
219+
if allOrNothing {
220+
// Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable.
221+
klog.V(1).Info("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated")
222+
markedEquivalenceGroups := markAllGroupsAsUnschedulable(podEquivalenceGroups, AllOrNothingReason)
223+
return buildNoOptionsAvailableStatus(markedEquivalenceGroups, skippedNodeGroups, nodeGroups), nil
224+
}
225+
}
226+
214227
// If necessary, create the node group. This is no longer simulation, an empty node group will be created by cloud provider if supported.
215228
createNodeGroupResults := make([]nodegroups.CreateNodeGroupResult, 0)
216229
if !bestOption.NodeGroup.Exist() {
230+
if allOrNothing && bestOption.NodeGroup.MaxSize() < newNodes {
231+
klog.V(1).Infof("Can only create a new node group with max %d nodes, need %d nodes", bestOption.NodeGroup.MaxSize(), newNodes)
232+
// Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable.
233+
klog.V(1).Info("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated")
234+
markedEquivalenceGroups := markAllGroupsAsUnschedulable(podEquivalenceGroups, AllOrNothingReason)
235+
return buildNoOptionsAvailableStatus(markedEquivalenceGroups, skippedNodeGroups, nodeGroups), nil
236+
}
217237
var scaleUpStatus *status.ScaleUpStatus
218238
createNodeGroupResults, scaleUpStatus, aErr = o.CreateNodeGroup(bestOption, nodeInfos, schedulablePodGroups, podEquivalenceGroups, daemonSets)
219239
if aErr != nil {
@@ -256,9 +276,24 @@ func (o *ScaleUpOrchestrator) ScaleUp(
256276
aErr)
257277
}
258278

279+
// Last check before scale-up. Node group capacity (both due to max size limits & current size) is only checked when balancing.
280+
totalCapacity := 0
281+
for _, sui := range scaleUpInfos {
282+
totalCapacity += sui.NewSize - sui.CurrentSize
283+
}
284+
if totalCapacity < newNodes {
285+
klog.V(1).Infof("Can only add %d nodes due to node group limits, need %d nodes", totalCapacity, newNodes)
286+
if allOrNothing {
287+
// Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable.
288+
klog.V(1).Info("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated")
289+
markedEquivalenceGroups := markAllGroupsAsUnschedulable(podEquivalenceGroups, AllOrNothingReason)
290+
return buildNoOptionsAvailableStatus(markedEquivalenceGroups, skippedNodeGroups, nodeGroups), nil
291+
}
292+
}
293+
259294
// Execute scale up.
260295
klog.V(1).Infof("Final scale-up plan: %v", scaleUpInfos)
261-
aErr, failedNodeGroups := o.scaleUpExecutor.ExecuteScaleUps(scaleUpInfos, nodeInfos, now)
296+
aErr, failedNodeGroups := o.scaleUpExecutor.ExecuteScaleUps(scaleUpInfos, nodeInfos, now, allOrNothing)
262297
if aErr != nil {
263298
return status.UpdateScaleUpError(
264299
&status.ScaleUpStatus{
@@ -364,7 +399,7 @@ func (o *ScaleUpOrchestrator) ScaleUpToNodeGroupMinSize(
364399
}
365400

366401
klog.V(1).Infof("ScaleUpToNodeGroupMinSize: final scale-up plan: %v", scaleUpInfos)
367-
aErr, failedNodeGroups := o.scaleUpExecutor.ExecuteScaleUps(scaleUpInfos, nodeInfos, now)
402+
aErr, failedNodeGroups := o.scaleUpExecutor.ExecuteScaleUps(scaleUpInfos, nodeInfos, now, false /* allOrNothing disabled */)
368403
if aErr != nil {
369404
return status.UpdateScaleUpError(
370405
&status.ScaleUpStatus{
@@ -447,6 +482,7 @@ func (o *ScaleUpOrchestrator) ComputeExpansionOption(
447482
nodeInfos map[string]*schedulerframework.NodeInfo,
448483
currentNodeCount int,
449484
now time.Time,
485+
allOrNothing bool,
450486
) expander.Option {
451487
option := expander.Option{NodeGroup: nodeGroup}
452488
podGroups := schedulablePodGroups[nodeGroup.Id()]
@@ -471,11 +507,22 @@ func (o *ScaleUpOrchestrator) ComputeExpansionOption(
471507
if err != nil && err != cloudprovider.ErrNotImplemented {
472508
klog.Errorf("Failed to get autoscaling options for node group %s: %v", nodeGroup.Id(), err)
473509
}
510+
511+
// Special handling for groups that only scale from zero to max.
474512
if autoscalingOptions != nil && autoscalingOptions.ZeroOrMaxNodeScaling {
475-
if option.NodeCount > 0 && option.NodeCount != nodeGroup.MaxSize() {
513+
// For zero-or-max scaling groups, the only valid value of node count is node group's max size.
514+
if allOrNothing && option.NodeCount > nodeGroup.MaxSize() {
515+
// We would have to cap the node count, which means not all pods will be
516+
// accommodated. This violates the principle of all-or-nothing strategy.
517+
option.Pods = nil
518+
option.NodeCount = 0
519+
}
520+
if option.NodeCount > 0 {
521+
// Cap or increase the number of nodes to the only valid value - node group's max size.
476522
option.NodeCount = nodeGroup.MaxSize()
477523
}
478524
}
525+
479526
return option
480527
}
481528

@@ -564,6 +611,7 @@ func (o *ScaleUpOrchestrator) SchedulablePodGroups(
564611
})
565612
// Mark pod group as (theoretically) schedulable.
566613
eg.Schedulable = true
614+
eg.SchedulableGroups = append(eg.SchedulableGroups, nodeGroup.Id())
567615
} else {
568616
klog.V(2).Infof("Pod %s/%s can't be scheduled on %s, predicate checking error: %v", samplePod.Namespace, samplePod.Name, nodeGroup.Id(), err.VerboseMessage())
569617
if podCount := len(eg.Pods); podCount > 1 {
@@ -709,6 +757,29 @@ func matchingSchedulablePodGroups(podGroups []estimator.PodEquivalenceGroup, sim
709757
return true
710758
}
711759

760+
func markAllGroupsAsUnschedulable(egs []*equivalence.PodGroup, reason status.Reasons) []*equivalence.PodGroup {
761+
for _, eg := range egs {
762+
if eg.Schedulable {
763+
if eg.SchedulingErrors == nil {
764+
eg.SchedulingErrors = map[string]status.Reasons{}
765+
}
766+
for _, sg := range eg.SchedulableGroups {
767+
eg.SchedulingErrors[sg] = reason
768+
}
769+
eg.Schedulable = false
770+
}
771+
}
772+
return egs
773+
}
774+
775+
func buildNoOptionsAvailableStatus(egs []*equivalence.PodGroup, skipped map[string]status.Reasons, ngs []cloudprovider.NodeGroup) *status.ScaleUpStatus {
776+
return &status.ScaleUpStatus{
777+
Result: status.ScaleUpNoOptionsAvailable,
778+
PodsRemainUnschedulable: GetRemainingPods(egs, skipped),
779+
ConsideredNodeGroups: ngs,
780+
}
781+
}
782+
712783
// GetRemainingPods returns information about pods which CA is unable to help
713784
// at this moment.
714785
func GetRemainingPods(egs []*equivalence.PodGroup, skipped map[string]status.Reasons) []status.NoScaleUpInfo {

cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,37 @@ func TestNoCreateNodeGroupMaxCoresLimitHit(t *testing.T) {
861861
simpleNoScaleUpTest(t, config, results)
862862
}
863863

864+
func TestAllOrNothing(t *testing.T) {
865+
options := defaultOptions
866+
867+
extraPods := []PodConfig{}
868+
extraPodNames := []string{}
869+
for i := 0; i < 11; i++ {
870+
podName := fmt.Sprintf("pod-%d", i)
871+
extraPods = append(extraPods, PodConfig{Name: podName, Cpu: 1000, Memory: 100})
872+
extraPodNames = append(extraPodNames, podName)
873+
}
874+
875+
config := &ScaleUpTestConfig{
876+
Nodes: []NodeConfig{
877+
{Name: "n1", Cpu: 1000, Memory: 1000, Gpu: 0, Ready: true, Group: "ng"},
878+
},
879+
Pods: []PodConfig{},
880+
ExtraPods: extraPods,
881+
Options: &options,
882+
AllOrNothing: true,
883+
}
884+
885+
result := &ScaleTestResults{
886+
NoScaleUpReason: "all-or-nothing",
887+
ScaleUpStatus: ScaleUpStatusInfo{
888+
PodsRemainUnschedulable: extraPodNames,
889+
},
890+
}
891+
892+
simpleNoScaleUpTest(t, config, result)
893+
}
894+
864895
func simpleScaleUpTest(t *testing.T, config *ScaleUpTestConfig, expectedResults *ScaleTestResults) {
865896
results := runSimpleScaleUpTest(t, config)
866897
assert.NotNil(t, results.GroupSizeChanges, "Expected scale up event")
@@ -1032,7 +1063,7 @@ func runSimpleScaleUpTest(t *testing.T, config *ScaleUpTestConfig) *ScaleUpTestR
10321063
context.ExpanderStrategy = expander
10331064

10341065
// scale up
1035-
scaleUpStatus, scaleUpErr := orchestrator.ScaleUp(extraPods, nodes, []*appsv1.DaemonSet{}, nodeInfos)
1066+
scaleUpStatus, scaleUpErr := orchestrator.ScaleUp(extraPods, nodes, []*appsv1.DaemonSet{}, nodeInfos, config.AllOrNothing)
10361067
processors.ScaleUpStatusProcessor.Process(&context, scaleUpStatus)
10371068

10381069
// aggregate group size changes
@@ -1131,7 +1162,7 @@ func TestScaleUpUnhealthy(t *testing.T) {
11311162
processors := NewTestProcessors(&context)
11321163
suOrchestrator := New()
11331164
suOrchestrator.Initialize(&context, processors, clusterState, newEstimatorBuilder(), taints.TaintConfig{})
1134-
scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{p3}, nodes, []*appsv1.DaemonSet{}, nodeInfos)
1165+
scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{p3}, nodes, []*appsv1.DaemonSet{}, nodeInfos, false)
11351166

11361167
assert.NoError(t, err)
11371168
// Node group is unhealthy.
@@ -1185,7 +1216,7 @@ func TestBinpackingLimiter(t *testing.T) {
11851216
expander := NewMockRepotingStrategy(t, nil)
11861217
context.ExpanderStrategy = expander
11871218

1188-
scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{extraPod}, nodes, []*appsv1.DaemonSet{}, nodeInfos)
1219+
scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{extraPod}, nodes, []*appsv1.DaemonSet{}, nodeInfos, false)
11891220
processors.ScaleUpStatusProcessor.Process(&context, scaleUpStatus)
11901221
assert.NoError(t, err)
11911222
assert.True(t, scaleUpStatus.WasSuccessful())
@@ -1231,7 +1262,7 @@ func TestScaleUpNoHelp(t *testing.T) {
12311262
processors := NewTestProcessors(&context)
12321263
suOrchestrator := New()
12331264
suOrchestrator.Initialize(&context, processors, clusterState, newEstimatorBuilder(), taints.TaintConfig{})
1234-
scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{p3}, nodes, []*appsv1.DaemonSet{}, nodeInfos)
1265+
scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{p3}, nodes, []*appsv1.DaemonSet{}, nodeInfos, false)
12351266
processors.ScaleUpStatusProcessor.Process(&context, scaleUpStatus)
12361267

12371268
assert.NoError(t, err)
@@ -1453,7 +1484,7 @@ func TestScaleUpBalanceGroups(t *testing.T) {
14531484
processors := NewTestProcessors(&context)
14541485
suOrchestrator := New()
14551486
suOrchestrator.Initialize(&context, processors, clusterState, newEstimatorBuilder(), taints.TaintConfig{})
1456-
scaleUpStatus, typedErr := suOrchestrator.ScaleUp(pods, nodes, []*appsv1.DaemonSet{}, nodeInfos)
1487+
scaleUpStatus, typedErr := suOrchestrator.ScaleUp(pods, nodes, []*appsv1.DaemonSet{}, nodeInfos, false)
14571488

14581489
assert.NoError(t, typedErr)
14591490
assert.True(t, scaleUpStatus.WasSuccessful())
@@ -1515,7 +1546,7 @@ func TestScaleUpAutoprovisionedNodeGroup(t *testing.T) {
15151546

15161547
suOrchestrator := New()
15171548
suOrchestrator.Initialize(&context, processors, clusterState, newEstimatorBuilder(), taints.TaintConfig{})
1518-
scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{p1}, nodes, []*appsv1.DaemonSet{}, nodeInfos)
1549+
scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{p1}, nodes, []*appsv1.DaemonSet{}, nodeInfos, false)
15191550
assert.NoError(t, err)
15201551
assert.True(t, scaleUpStatus.WasSuccessful())
15211552
assert.Equal(t, "autoprovisioned-T1", utils.GetStringFromChan(createdGroups))
@@ -1570,7 +1601,7 @@ func TestScaleUpBalanceAutoprovisionedNodeGroups(t *testing.T) {
15701601

15711602
suOrchestrator := New()
15721603
suOrchestrator.Initialize(&context, processors, clusterState, newEstimatorBuilder(), taints.TaintConfig{})
1573-
scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{p1, p2, p3}, nodes, []*appsv1.DaemonSet{}, nodeInfos)
1604+
scaleUpStatus, err := suOrchestrator.ScaleUp([]*apiv1.Pod{p1, p2, p3}, nodes, []*appsv1.DaemonSet{}, nodeInfos, false)
15741605
assert.NoError(t, err)
15751606
assert.True(t, scaleUpStatus.WasSuccessful())
15761607
assert.Equal(t, "autoprovisioned-T1", utils.GetStringFromChan(createdGroups))
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
Copyright 2024 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package orchestrator
18+
19+
// RejectedReasons contains information why given node group was rejected as a scale-up option.
20+
type RejectedReasons struct {
21+
messages []string
22+
}
23+
24+
// NewRejectedReasons creates new RejectedReason object.
25+
func NewRejectedReasons(m string) *RejectedReasons {
26+
return &RejectedReasons{[]string{m}}
27+
}
28+
29+
// Reasons returns a slice of reasons why the node group was not considered for scale up.
30+
func (sr *RejectedReasons) Reasons() []string {
31+
return sr.messages
32+
}
33+
34+
var (
35+
// AllOrNothingReason means the node group was rejected because not all pods would fit it when using all-or-nothing strategy.
36+
AllOrNothingReason = NewRejectedReasons("not all pods would fit and scale-up is using all-or-nothing strategy")
37+
)

cluster-autoscaler/core/scaleup/scaleup.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ type Orchestrator interface {
4848
nodes []*apiv1.Node,
4949
daemonSets []*appsv1.DaemonSet,
5050
nodeInfos map[string]*schedulerframework.NodeInfo,
51+
allOrNothing bool,
5152
) (*status.ScaleUpStatus, errors.AutoscalerError)
5253
// ScaleUpToNodeGroupMinSize tries to scale up node groups that have less nodes
5354
// than the configured min size. The source of truth for the current node group

0 commit comments

Comments
 (0)