@@ -376,16 +376,27 @@ func getMachineSetFraction(ms clusterv1.MachineSet, md clusterv1.MachineDeployme
376376 return integer .RoundToInt32 (newMSsize ) - * (ms .Spec .Replicas )
377377}
378378
379+ // NotUpToDateResult is the result of calling the MachineTemplateUpToDate func for a MachineTemplateSpec.
380+ type NotUpToDateResult struct {
381+ LogMessages []string // consider if to make this private.
382+ ConditionMessages []string
383+ EligibleForInPlaceUpdate bool
384+ }
385+
379386// MachineTemplateUpToDate returns true if the current MachineTemplateSpec is up-to-date with a corresponding desired MachineTemplateSpec.
380387// Note: The comparison does not consider any in-place propagated fields, as well as the version from external references.
381- func MachineTemplateUpToDate (current , desired * clusterv1.MachineTemplateSpec ) (upToDate bool , logMessages , conditionMessages []string ) {
388+ func MachineTemplateUpToDate (current , desired * clusterv1.MachineTemplateSpec ) (bool , * NotUpToDateResult ) {
389+ res := & NotUpToDateResult {
390+ EligibleForInPlaceUpdate : true ,
391+ }
392+
382393 currentCopy := MachineTemplateDeepCopyRolloutFields (current )
383394 desiredCopy := MachineTemplateDeepCopyRolloutFields (desired )
384395
385396 if currentCopy .Spec .Version != desiredCopy .Spec .Version {
386- logMessages = append (logMessages , fmt .Sprintf ("spec.version %s, %s required" , currentCopy .Spec .Version , desiredCopy .Spec .Version ))
397+ res . LogMessages = append (res . LogMessages , fmt .Sprintf ("spec.version %s, %s required" , currentCopy .Spec .Version , desiredCopy .Spec .Version ))
387398 // Note: the code computing the message for MachineDeployment's RolloutOut condition is making assumptions on the format/content of this message.
388- conditionMessages = append (conditionMessages , fmt .Sprintf ("Version %s, %s required" , currentCopy .Spec .Version , desiredCopy .Spec .Version ))
399+ res . ConditionMessages = append (res . ConditionMessages , fmt .Sprintf ("Version %s, %s required" , currentCopy .Spec .Version , desiredCopy .Spec .Version ))
389400 }
390401
391402 // Note: we return a message based on desired.bootstrap.ConfigRef != nil, but we always compare the entire bootstrap
@@ -394,33 +405,33 @@ func MachineTemplateUpToDate(current, desired *clusterv1.MachineTemplateSpec) (u
394405 // common operation so it is acceptable to handle it in this way).
395406 if currentCopy .Spec .Bootstrap .ConfigRef .IsDefined () {
396407 if ! reflect .DeepEqual (currentCopy .Spec .Bootstrap , desiredCopy .Spec .Bootstrap ) {
397- logMessages = append (logMessages , fmt .Sprintf ("spec.bootstrap.configRef %s %s, %s %s required" , currentCopy .Spec .Bootstrap .ConfigRef .Kind , currentCopy .Spec .Bootstrap .ConfigRef .Name , desiredCopy .Spec .Bootstrap .ConfigRef .Kind , desiredCopy .Spec .Bootstrap .ConfigRef .Name ))
408+ res . LogMessages = append (res . LogMessages , fmt .Sprintf ("spec.bootstrap.configRef %s %s, %s %s required" , currentCopy .Spec .Bootstrap .ConfigRef .Kind , currentCopy .Spec .Bootstrap .ConfigRef .Name , desiredCopy .Spec .Bootstrap .ConfigRef .Kind , desiredCopy .Spec .Bootstrap .ConfigRef .Name ))
398409 // Note: dropping "Template" suffix because conditions message will surface on machine.
399- conditionMessages = append (conditionMessages , fmt .Sprintf ("%s is not up-to-date" , strings .TrimSuffix (currentCopy .Spec .Bootstrap .ConfigRef .Kind , clusterv1 .TemplateSuffix )))
410+ res . ConditionMessages = append (res . ConditionMessages , fmt .Sprintf ("%s is not up-to-date" , strings .TrimSuffix (currentCopy .Spec .Bootstrap .ConfigRef .Kind , clusterv1 .TemplateSuffix )))
400411 }
401412 } else {
402413 if ! reflect .DeepEqual (currentCopy .Spec .Bootstrap , desiredCopy .Spec .Bootstrap ) {
403- logMessages = append (logMessages , fmt .Sprintf ("spec.bootstrap.dataSecretName %s, %s required" , ptr .Deref (currentCopy .Spec .Bootstrap .DataSecretName , "nil" ), ptr .Deref (desiredCopy .Spec .Bootstrap .DataSecretName , "nil" )))
404- conditionMessages = append (conditionMessages , fmt .Sprintf ("spec.bootstrap.dataSecretName %s, %s required" , ptr .Deref (currentCopy .Spec .Bootstrap .DataSecretName , "nil" ), ptr .Deref (desiredCopy .Spec .Bootstrap .DataSecretName , "nil" )))
414+ res . LogMessages = append (res . LogMessages , fmt .Sprintf ("spec.bootstrap.dataSecretName %s, %s required" , ptr .Deref (currentCopy .Spec .Bootstrap .DataSecretName , "nil" ), ptr .Deref (desiredCopy .Spec .Bootstrap .DataSecretName , "nil" )))
415+ res . ConditionMessages = append (res . ConditionMessages , fmt .Sprintf ("spec.bootstrap.dataSecretName %s, %s required" , ptr .Deref (currentCopy .Spec .Bootstrap .DataSecretName , "nil" ), ptr .Deref (desiredCopy .Spec .Bootstrap .DataSecretName , "nil" )))
405416 }
406417 }
407418
408419 if ! reflect .DeepEqual (currentCopy .Spec .InfrastructureRef , desiredCopy .Spec .InfrastructureRef ) {
409- logMessages = append (logMessages , fmt .Sprintf ("spec.infrastructureRef %s %s, %s %s required" , currentCopy .Spec .InfrastructureRef .Kind , currentCopy .Spec .InfrastructureRef .Name , desiredCopy .Spec .InfrastructureRef .Kind , desiredCopy .Spec .InfrastructureRef .Name ))
420+ res . LogMessages = append (res . LogMessages , fmt .Sprintf ("spec.infrastructureRef %s %s, %s %s required" , currentCopy .Spec .InfrastructureRef .Kind , currentCopy .Spec .InfrastructureRef .Name , desiredCopy .Spec .InfrastructureRef .Kind , desiredCopy .Spec .InfrastructureRef .Name ))
410421 // Note: dropping "Template" suffix because conditions message will surface on machine.
411- conditionMessages = append (conditionMessages , fmt .Sprintf ("%s is not up-to-date" , strings .TrimSuffix (currentCopy .Spec .InfrastructureRef .Kind , clusterv1 .TemplateSuffix )))
422+ res . ConditionMessages = append (res . ConditionMessages , fmt .Sprintf ("%s is not up-to-date" , strings .TrimSuffix (currentCopy .Spec .InfrastructureRef .Kind , clusterv1 .TemplateSuffix )))
412423 }
413424
414425 if currentCopy .Spec .FailureDomain != desiredCopy .Spec .FailureDomain {
415- logMessages = append (logMessages , fmt .Sprintf ("spec.failureDomain %s, %s required" , currentCopy .Spec .FailureDomain , desiredCopy .Spec .FailureDomain ))
416- conditionMessages = append (conditionMessages , fmt .Sprintf ("Failure domain %s, %s required" , currentCopy .Spec .FailureDomain , desiredCopy .Spec .FailureDomain ))
426+ res . LogMessages = append (res . LogMessages , fmt .Sprintf ("spec.failureDomain %s, %s required" , currentCopy .Spec .FailureDomain , desiredCopy .Spec .FailureDomain ))
427+ res . ConditionMessages = append (res . ConditionMessages , fmt .Sprintf ("Failure domain %s, %s required" , currentCopy .Spec .FailureDomain , desiredCopy .Spec .FailureDomain ))
417428 }
418429
419- if len (logMessages ) > 0 || len (conditionMessages ) > 0 {
420- return false , logMessages , conditionMessages
430+ if len (res . LogMessages ) > 0 || len (res . ConditionMessages ) > 0 {
431+ return false , res
421432 }
422433
423- return true , nil , nil
434+ return true , nil
424435}
425436
426437// MachineTemplateDeepCopyRolloutFields copies a MachineTemplateSpec
@@ -446,81 +457,94 @@ func MachineTemplateDeepCopyRolloutFields(template *clusterv1.MachineTemplateSpe
446457 return templateCopy
447458}
448459
449- // FindNewMachineSet returns the new MS this given deployment targets (the one with the same machine template, ignoring
450- // in-place mutable fields).
460+ // FindNewAndOldMachineSets returns the newMS for a MachineDeployment (the one with the same machine template, ignoring
461+ // in-place mutable fields) as well as return oldMSs .
451462// Note: If the reconciliation time is after the deployment's `rolloutAfter` time, a MS has to be newer than
452463// `rolloutAfter` to be considered as matching the deployment's intent.
453464// NOTE: If we find a matching MachineSet which only differs in in-place mutable fields we can use it to
454465// fulfill the intent of the MachineDeployment by just updating the MachineSet to propagate in-place mutable fields.
455466// Thus we don't have to create a new MachineSet and we can avoid an unnecessary rollout.
456- // NOTE: Even after we changed MachineTemplateUpToDate to ignore fields that are propagated in-place we can guarantee that if there exists a "new machineset"
457- // using the old logic then a new machineset will definitely exist using the new logic. The new logic is looser. Therefore, we will
458- // not face a case where there exists a machine set matching the old logic but there does not exist a machineset matching the new logic.
459- // In fact previously not matching MS can now start matching the target. Since there could be multiple matches, lets choose the
460- // MS with the most replicas so that there is minimum machine churn.
461- func FindNewMachineSet (deployment * clusterv1.MachineDeployment , msList []* clusterv1.MachineSet , reconciliationTime * metav1.Time ) (* clusterv1.MachineSet , string , error ) {
467+ func FindNewAndOldMachineSets (deployment * clusterv1.MachineDeployment , msList []* clusterv1.MachineSet , reconciliationTime * metav1.Time ) (newMS * clusterv1.MachineSet , oldMSs []* clusterv1.MachineSet , oldMSNotUpToDateResults map [string ]NotUpToDateResult , createReason string ) {
462468 if len (msList ) == 0 {
463- return nil , "no MachineSets exist for the MachineDeployment" , nil
469+ return nil , nil , nil , "no MachineSets exist for the MachineDeployment"
464470 }
465471
466- // In rare cases, such as after cluster upgrades, Deployment may end up with
467- // having more than one new MachineSets that have the same template,
468- // see https://github.com/kubernetes/kubernetes/issues/40415
469- // We deterministically choose the oldest new MachineSet with matching template hash.
472+ // It could happen that there is more than one newMS candidate when reconciliationTime is > rolloutAfter; considering this, the
473+ // current implementation treats candidates that will be discarded as old machine sets not eligible for in-place updates.
474+ // NOTE: We could also have more than one MS candidate in the very unlikely case where
475+ // the diff logic MachineTemplateUpToDate is changed by dropping one of the existing criteria (version, failureDomain, infra/BootstrapRef).
476+ // NOTE: When dealing with more than one newMS candidate, deterministically choose the MS with the most replicas
477+ // so that there is minimum machine churn.
478+ var newMSCandidates []* clusterv1.MachineSet
470479 sort .Sort (MachineSetsByDecreasingReplicas (msList ))
471480
472- var matchingMachineSets []* clusterv1.MachineSet
481+ oldMSs = make ([]* clusterv1.MachineSet , 0 )
482+ oldMSNotUpToDateResults = make (map [string ]NotUpToDateResult )
473483 var diffs []string
474484 for _ , ms := range msList {
475- upToDate , logMessages , _ := MachineTemplateUpToDate (& ms .Spec .Template , & deployment .Spec .Template )
485+ upToDate , notUpToDateResult := MachineTemplateUpToDate (& ms .Spec .Template , & deployment .Spec .Template )
476486 if upToDate {
477- matchingMachineSets = append (matchingMachineSets , ms )
487+ newMSCandidates = append (newMSCandidates , ms )
478488 } else {
479- diffs = append (diffs , fmt .Sprintf ("MachineSet %s: diff: %s" , ms .Name , strings .Join (logMessages , ", " )))
489+ oldMSs = append (oldMSs , ms )
490+ // Override the EligibleForInPlaceUpdate decision if rollout after is expired.
491+ if ! deployment .Spec .Rollout .After .IsZero () && deployment .Spec .Rollout .After .Before (reconciliationTime ) && ! ms .CreationTimestamp .After (deployment .Spec .Rollout .After .Time ) {
492+ notUpToDateResult .EligibleForInPlaceUpdate = false
493+ notUpToDateResult .LogMessages = append (notUpToDateResult .LogMessages , "MachineDeployment spec.rolloutAfter expired" )
494+ // No need to set an additional condition message, it is not used anywhere.
495+ }
496+ oldMSNotUpToDateResults [ms .Name ] = * notUpToDateResult
497+ diffs = append (diffs , fmt .Sprintf ("MachineSet %s: diff: %s" , ms .Name , strings .Join (notUpToDateResult .LogMessages , ", " )))
480498 }
481499 }
482500
483- if len (matchingMachineSets ) == 0 {
484- return nil , fmt .Sprintf ("couldn't find MachineSet matching MachineDeployment spec template: %s" , strings .Join (diffs , "; " )), nil
501+ if len (newMSCandidates ) == 0 {
502+ return nil , oldMSs , oldMSNotUpToDateResults , fmt .Sprintf ("couldn't find MachineSet matching MachineDeployment spec template: %s" , strings .Join (diffs , "; " ))
485503 }
486504
487505 // If RolloutAfter is not set, pick the first matching MachineSet.
488506 if deployment .Spec .Rollout .After .IsZero () {
489- return matchingMachineSets [0 ], "" , nil
507+ for _ , ms := range newMSCandidates [1 :] {
508+ oldMSs = append (oldMSs , ms )
509+ oldMSNotUpToDateResults [ms .Name ] = NotUpToDateResult {
510+ // No need to set log or condition message for discarded candidates, it is not used anywhere.
511+ EligibleForInPlaceUpdate : false ,
512+ }
513+ }
514+ return newMSCandidates [0 ], oldMSs , oldMSNotUpToDateResults , ""
490515 }
491516
492517 // If reconciliation time is before RolloutAfter, pick the first matching MachineSet.
493518 if reconciliationTime .Before (& deployment .Spec .Rollout .After ) {
494- return matchingMachineSets [0 ], "" , nil
519+ for _ , ms := range newMSCandidates [1 :] {
520+ oldMSs = append (oldMSs , ms )
521+ oldMSNotUpToDateResults [ms .Name ] = NotUpToDateResult {
522+ // No need to set log or condition for discarded candidates, it is not used anywhere.
523+ EligibleForInPlaceUpdate : false ,
524+ }
525+ }
526+ return newMSCandidates [0 ], oldMSs , oldMSNotUpToDateResults , ""
495527 }
496528
497529 // Pick the first matching MachineSet that has been created at RolloutAfter or later.
498- for _ , ms := range matchingMachineSets {
499- if ms .CreationTimestamp .Sub (deployment .Spec .Rollout .After .Time ) >= 0 {
500- return ms , "" , nil
530+ for _ , ms := range newMSCandidates {
531+ if newMS == nil && ms .CreationTimestamp .Sub (deployment .Spec .Rollout .After .Time ) >= 0 {
532+ newMS = ms
533+ continue
534+ }
535+
536+ oldMSs = append (oldMSs , ms )
537+ oldMSNotUpToDateResults [ms .Name ] = NotUpToDateResult {
538+ // No need to set log or condition for discarded candidates, it is not used anywhere.
539+ EligibleForInPlaceUpdate : false ,
501540 }
502541 }
503542
504543 // If no matching MachineSet was created after RolloutAfter, trigger creation of a new MachineSet.
505- return nil , fmt .Sprintf ("spec.rollout.after on MachineDeployment set to %s, no MachineSet has been created afterwards" , deployment .Spec .Rollout .After .Format (time .RFC3339 )), nil
506- }
507-
508- // FindOldMachineSets returns the old machine sets targeted by the given Deployment, within the given slice of MSes.
509- // Returns a list of machine sets which contains all old machine sets.
510- func FindOldMachineSets (deployment * clusterv1.MachineDeployment , msList []* clusterv1.MachineSet , reconciliationTime * metav1.Time ) ([]* clusterv1.MachineSet , error ) {
511- allMSs := make ([]* clusterv1.MachineSet , 0 , len (msList ))
512- newMS , _ , err := FindNewMachineSet (deployment , msList , reconciliationTime )
513- if err != nil {
514- return nil , err
515- }
516- for _ , ms := range msList {
517- // Filter out new machine set
518- if newMS != nil && ms .UID == newMS .UID {
519- continue
520- }
521- allMSs = append (allMSs , ms )
544+ if newMS == nil {
545+ return nil , oldMSs , oldMSNotUpToDateResults , fmt .Sprintf ("spec.rollout.after on MachineDeployment set to %s, no MachineSet has been created afterwards" , deployment .Spec .Rollout .After .Format (time .RFC3339 ))
522546 }
523- return allMSs , nil
547+ return newMS , oldMSs , oldMSNotUpToDateResults , ""
524548}
525549
526550// GetReplicaCountForMachineSets returns the sum of Replicas of the given machine sets.
0 commit comments