@@ -89,6 +89,7 @@ func (o *ScaleUpOrchestrator) ScaleUp(
8989 nodes []* apiv1.Node ,
9090 daemonSets []* appsv1.DaemonSet ,
9191 nodeInfos map [string ]* schedulerframework.NodeInfo ,
92+ allOrNothing bool , // Either request enough capacity for all unschedulablePods, or don't request it at all.
9293) (* status.ScaleUpStatus , errors.AutoscalerError ) {
9394 if ! o .initialized {
9495 return status .UpdateScaleUpError (& status.ScaleUpStatus {}, errors .NewAutoscalerError (errors .InternalError , "ScaleUpOrchestrator is not initialized" ))
@@ -146,11 +147,13 @@ func (o *ScaleUpOrchestrator) ScaleUp(
146147 }
147148
148149 for _ , nodeGroup := range validNodeGroups {
149- option := o .ComputeExpansionOption (nodeGroup , schedulablePodGroups , nodeInfos , len (nodes )+ len (upcomingNodes ), now )
150+ option := o .ComputeExpansionOption (nodeGroup , schedulablePodGroups , nodeInfos , len (nodes )+ len (upcomingNodes ), now , allOrNothing )
150151 o .processors .BinpackingLimiter .MarkProcessed (o .autoscalingContext , nodeGroup .Id ())
151152
152153 if len (option .Pods ) == 0 || option .NodeCount == 0 {
153154 klog .V (4 ).Infof ("No pod can fit to %s" , nodeGroup .Id ())
155+ } else if allOrNothing && len (option .Pods ) < len (unschedulablePods ) {
156+ klog .V (4 ).Infof ("Some pods can't fit to %s, giving up due to all-or-nothing scale-up strategy" , nodeGroup .Id ())
154157 } else {
155158 options = append (options , option )
156159 }
@@ -211,9 +214,26 @@ func (o *ScaleUpOrchestrator) ScaleUp(
211214 aErr )
212215 }
213216
217+ if newNodes < bestOption .NodeCount {
218+ klog .V (1 ).Infof ("Only %d nodes can be added to %s due to cluster-wide limits" , newNodes , bestOption .NodeGroup .Id ())
219+ if allOrNothing {
220+ // Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable.
221+ klog .V (1 ).Info ("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated" )
222+ markedEquivalenceGroups := markAllGroupsAsUnschedulable (podEquivalenceGroups , AllOrNothingReason )
223+ return buildNoOptionsAvailableStatus (markedEquivalenceGroups , skippedNodeGroups , nodeGroups ), nil
224+ }
225+ }
226+
214227 // If necessary, create the node group. This is no longer simulation, an empty node group will be created by cloud provider if supported.
215228 createNodeGroupResults := make ([]nodegroups.CreateNodeGroupResult , 0 )
216229 if ! bestOption .NodeGroup .Exist () {
230+ if allOrNothing && bestOption .NodeGroup .MaxSize () < newNodes {
231+ klog .V (1 ).Infof ("Can only create a new node group with max %d nodes, need %d nodes" , bestOption .NodeGroup .MaxSize (), newNodes )
232+ // Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable.
233+ klog .V (1 ).Info ("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated" )
234+ markedEquivalenceGroups := markAllGroupsAsUnschedulable (podEquivalenceGroups , AllOrNothingReason )
235+ return buildNoOptionsAvailableStatus (markedEquivalenceGroups , skippedNodeGroups , nodeGroups ), nil
236+ }
217237 var scaleUpStatus * status.ScaleUpStatus
218238 createNodeGroupResults , scaleUpStatus , aErr = o .CreateNodeGroup (bestOption , nodeInfos , schedulablePodGroups , podEquivalenceGroups , daemonSets )
219239 if aErr != nil {
@@ -256,9 +276,24 @@ func (o *ScaleUpOrchestrator) ScaleUp(
256276 aErr )
257277 }
258278
279+ // Last check before scale-up. Node group capacity (both due to max size limits & current size) is only checked when balancing.
280+ totalCapacity := 0
281+ for _ , sui := range scaleUpInfos {
282+ totalCapacity += sui .NewSize - sui .CurrentSize
283+ }
284+ if totalCapacity < newNodes {
285+ klog .V (1 ).Infof ("Can only add %d nodes due to node group limits, need %d nodes" , totalCapacity , newNodes )
286+ if allOrNothing {
287+ // Can't execute a scale-up that will accommodate all pods, so nothing is considered schedulable.
288+ klog .V (1 ).Info ("Not attempting scale-up due to all-or-nothing strategy: not all pods would be accommodated" )
289+ markedEquivalenceGroups := markAllGroupsAsUnschedulable (podEquivalenceGroups , AllOrNothingReason )
290+ return buildNoOptionsAvailableStatus (markedEquivalenceGroups , skippedNodeGroups , nodeGroups ), nil
291+ }
292+ }
293+
259294 // Execute scale up.
260295 klog .V (1 ).Infof ("Final scale-up plan: %v" , scaleUpInfos )
261- aErr , failedNodeGroups := o .scaleUpExecutor .ExecuteScaleUps (scaleUpInfos , nodeInfos , now )
296+ aErr , failedNodeGroups := o .scaleUpExecutor .ExecuteScaleUps (scaleUpInfos , nodeInfos , now , allOrNothing )
262297 if aErr != nil {
263298 return status .UpdateScaleUpError (
264299 & status.ScaleUpStatus {
@@ -364,7 +399,7 @@ func (o *ScaleUpOrchestrator) ScaleUpToNodeGroupMinSize(
364399 }
365400
366401 klog .V (1 ).Infof ("ScaleUpToNodeGroupMinSize: final scale-up plan: %v" , scaleUpInfos )
367- aErr , failedNodeGroups := o .scaleUpExecutor .ExecuteScaleUps (scaleUpInfos , nodeInfos , now )
402+ aErr , failedNodeGroups := o .scaleUpExecutor .ExecuteScaleUps (scaleUpInfos , nodeInfos , now , false /* allOrNothing disabled */ )
368403 if aErr != nil {
369404 return status .UpdateScaleUpError (
370405 & status.ScaleUpStatus {
@@ -447,6 +482,7 @@ func (o *ScaleUpOrchestrator) ComputeExpansionOption(
447482 nodeInfos map [string ]* schedulerframework.NodeInfo ,
448483 currentNodeCount int ,
449484 now time.Time ,
485+ allOrNothing bool ,
450486) expander.Option {
451487 option := expander.Option {NodeGroup : nodeGroup }
452488 podGroups := schedulablePodGroups [nodeGroup .Id ()]
@@ -471,11 +507,22 @@ func (o *ScaleUpOrchestrator) ComputeExpansionOption(
471507 if err != nil && err != cloudprovider .ErrNotImplemented {
472508 klog .Errorf ("Failed to get autoscaling options for node group %s: %v" , nodeGroup .Id (), err )
473509 }
510+
511+ // Special handling for groups that only scale from zero to max.
474512 if autoscalingOptions != nil && autoscalingOptions .ZeroOrMaxNodeScaling {
475- if option .NodeCount > 0 && option .NodeCount != nodeGroup .MaxSize () {
513+ // For zero-or-max scaling groups, the only valid value of node count is node group's max size.
514+ if allOrNothing && option .NodeCount > nodeGroup .MaxSize () {
515+ // We would have to cap the node count, which means not all pods will be
516+ // accommodated. This violates the principle of all-or-nothing strategy.
517+ option .Pods = nil
518+ option .NodeCount = 0
519+ }
520+ if option .NodeCount > 0 {
521+ // Cap or increase the number of nodes to the only valid value - node group's max size.
476522 option .NodeCount = nodeGroup .MaxSize ()
477523 }
478524 }
525+
479526 return option
480527}
481528
@@ -564,6 +611,7 @@ func (o *ScaleUpOrchestrator) SchedulablePodGroups(
564611 })
565612 // Mark pod group as (theoretically) schedulable.
566613 eg .Schedulable = true
614+ eg .SchedulableGroups = append (eg .SchedulableGroups , nodeGroup .Id ())
567615 } else {
568616 klog .V (2 ).Infof ("Pod %s/%s can't be scheduled on %s, predicate checking error: %v" , samplePod .Namespace , samplePod .Name , nodeGroup .Id (), err .VerboseMessage ())
569617 if podCount := len (eg .Pods ); podCount > 1 {
@@ -709,6 +757,29 @@ func matchingSchedulablePodGroups(podGroups []estimator.PodEquivalenceGroup, sim
709757 return true
710758}
711759
760+ func markAllGroupsAsUnschedulable (egs []* equivalence.PodGroup , reason status.Reasons ) []* equivalence.PodGroup {
761+ for _ , eg := range egs {
762+ if eg .Schedulable {
763+ if eg .SchedulingErrors == nil {
764+ eg .SchedulingErrors = map [string ]status.Reasons {}
765+ }
766+ for _ , sg := range eg .SchedulableGroups {
767+ eg .SchedulingErrors [sg ] = reason
768+ }
769+ eg .Schedulable = false
770+ }
771+ }
772+ return egs
773+ }
774+
775+ func buildNoOptionsAvailableStatus (egs []* equivalence.PodGroup , skipped map [string ]status.Reasons , ngs []cloudprovider.NodeGroup ) * status.ScaleUpStatus {
776+ return & status.ScaleUpStatus {
777+ Result : status .ScaleUpNoOptionsAvailable ,
778+ PodsRemainUnschedulable : GetRemainingPods (egs , skipped ),
779+ ConsideredNodeGroups : ngs ,
780+ }
781+ }
782+
712783// GetRemainingPods returns information about pods which CA is unable to help
713784// at this moment.
714785func GetRemainingPods (egs []* equivalence.PodGroup , skipped map [string ]status.Reasons ) []status.NoScaleUpInfo {
0 commit comments