Skip to content

Commit 9d575f1

Browse files
mweibelk8s-infra-cherrypick-robot
authored andcommitted
fix(machinepool): clear VirtualMachineProfile if no model/custom data update
Scale up/down with MachinePool always updates the VM image model to use. This change sets the VirtualMachineProfile to nil when no change is necessary and ensures therefore less churn on scale up/downs leading to model updates which may require manual fixing Fixes some linting errors related to unnecessary params, too.
1 parent 07a3ef6 commit 9d575f1

File tree

5 files changed

+254
-40
lines changed

5 files changed

+254
-40
lines changed

azure/scope/machinepool.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ type (
8080
capiMachinePoolPatchHelper *patch.Helper
8181
vmssState *azure.VMSS
8282
cache *MachinePoolCache
83+
skuCache *resourceskus.Cache
8384
}
8485

8586
// NodeStatus represents the status of a Kubernetes node.
@@ -163,12 +164,15 @@ func (m *MachinePoolScope) InitMachinePoolCache(ctx context.Context) error {
163164
return err
164165
}
165166

166-
skuCache, err := resourceskus.GetCache(m, m.Location())
167-
if err != nil {
168-
return err
167+
if m.skuCache == nil {
168+
skuCache, err := resourceskus.GetCache(m, m.Location())
169+
if err != nil {
170+
return errors.Wrap(err, "failed to init resourceskus cache")
171+
}
172+
m.skuCache = skuCache
169173
}
170174

171-
m.cache.VMSKU, err = skuCache.Get(ctx, m.AzureMachinePool.Spec.Template.VMSize, resourceskus.VirtualMachines)
175+
m.cache.VMSKU, err = m.skuCache.Get(ctx, m.AzureMachinePool.Spec.Template.VMSize, resourceskus.VirtualMachines)
172176
if err != nil {
173177
return errors.Wrapf(err, "failed to get VM SKU %s in compute api", m.AzureMachinePool.Spec.Template.VMSize)
174178
}
@@ -221,10 +225,8 @@ func (m *MachinePoolScope) ScaleSetSpec(ctx context.Context) azure.ResourceSpecG
221225
}
222226

223227
if m.cache != nil {
224-
if m.HasReplicasExternallyManaged(ctx) {
225-
spec.ShouldPatchCustomData = m.cache.HasBootstrapDataChanges
226-
log.V(4).Info("has bootstrap data changed?", "shouldPatchCustomData", spec.ShouldPatchCustomData)
227-
}
228+
spec.ShouldPatchCustomData = m.cache.HasBootstrapDataChanges
229+
log.V(4).Info("has bootstrap data changed?", "shouldPatchCustomData", spec.ShouldPatchCustomData)
228230
spec.VMSSExtensionSpecs = m.VMSSExtensionSpecs()
229231
spec.SKU = m.cache.VMSKU
230232
spec.VMImage = m.cache.VMImage
@@ -678,11 +680,9 @@ func (m *MachinePoolScope) Close(ctx context.Context) error {
678680
if err := m.updateReplicasAndProviderIDs(ctx); err != nil {
679681
return errors.Wrap(err, "failed to update replicas and providerIDs")
680682
}
681-
if m.HasReplicasExternallyManaged(ctx) {
682-
if err := m.updateCustomDataHash(ctx); err != nil {
683-
// ignore errors to calculating the custom data hash since it's not absolutely crucial.
684-
log.V(4).Error(err, "unable to update custom data hash, ignoring.")
685-
}
683+
if err := m.updateCustomDataHash(ctx); err != nil {
684+
// ignore errors to calculating the custom data hash since it's not absolutely crucial.
685+
log.V(4).Error(err, "unable to update custom data hash, ignoring.")
686686
}
687687
}
688688

azure/scope/machinepool_test.go

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,16 @@ package scope
1818

1919
import (
2020
"context"
21+
"crypto/sha256"
22+
"encoding/base64"
2123
"fmt"
24+
"io"
2225
"reflect"
2326
"testing"
2427

2528
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
2629
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2"
30+
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
2731
azureautorest "github.com/Azure/go-autorest/autorest/azure"
2832
"github.com/Azure/go-autorest/autorest/azure/auth"
2933
. "github.com/onsi/gomega"
@@ -1515,3 +1519,148 @@ func TestMachinePoolScope_applyAzureMachinePoolMachines(t *testing.T) {
15151519
})
15161520
}
15171521
}
1522+
1523+
func TestBootstrapDataChanges(t *testing.T) {
1524+
ctx, cancel := context.WithCancel(context.Background())
1525+
defer cancel()
1526+
scheme := runtime.NewScheme()
1527+
_ = clusterv1.AddToScheme(scheme)
1528+
_ = infrav1.AddToScheme(scheme)
1529+
_ = infrav1exp.AddToScheme(scheme)
1530+
_ = corev1.AddToScheme(scheme)
1531+
1532+
var (
1533+
g = NewWithT(t)
1534+
mockCtrl = gomock.NewController(t)
1535+
cb = fake.NewClientBuilder().WithScheme(scheme)
1536+
cluster = &clusterv1.Cluster{
1537+
ObjectMeta: metav1.ObjectMeta{
1538+
Name: "cluster1",
1539+
Namespace: "default",
1540+
},
1541+
Spec: clusterv1.ClusterSpec{
1542+
InfrastructureRef: &corev1.ObjectReference{
1543+
Name: "azCluster1",
1544+
},
1545+
},
1546+
Status: clusterv1.ClusterStatus{
1547+
InfrastructureReady: true,
1548+
},
1549+
}
1550+
azureCluster = &infrav1.AzureCluster{
1551+
ObjectMeta: metav1.ObjectMeta{
1552+
Name: "azCluster1",
1553+
Namespace: "default",
1554+
},
1555+
Spec: infrav1.AzureClusterSpec{
1556+
AzureClusterClassSpec: infrav1.AzureClusterClassSpec{
1557+
Location: "test",
1558+
},
1559+
},
1560+
}
1561+
mp = &expv1.MachinePool{
1562+
ObjectMeta: metav1.ObjectMeta{
1563+
Name: "mp1",
1564+
Namespace: "default",
1565+
OwnerReferences: []metav1.OwnerReference{
1566+
{
1567+
Name: "cluster1",
1568+
Kind: "Cluster",
1569+
APIVersion: clusterv1.GroupVersion.String(),
1570+
},
1571+
},
1572+
},
1573+
Spec: expv1.MachinePoolSpec{
1574+
Template: clusterv1.MachineTemplateSpec{
1575+
Spec: clusterv1.MachineSpec{
1576+
Bootstrap: clusterv1.Bootstrap{
1577+
DataSecretName: ptr.To("mp-secret"),
1578+
},
1579+
Version: ptr.To("v1.31.0"),
1580+
},
1581+
},
1582+
},
1583+
}
1584+
bootstrapData = "test"
1585+
bootstrapDataHash = sha256Hash(base64.StdEncoding.EncodeToString([]byte(bootstrapData)))
1586+
bootstrapSecret = corev1.Secret{
1587+
ObjectMeta: metav1.ObjectMeta{
1588+
Name: "mp-secret",
1589+
Namespace: "default",
1590+
},
1591+
Data: map[string][]byte{"value": []byte(bootstrapData)},
1592+
}
1593+
amp = &infrav1exp.AzureMachinePool{
1594+
ObjectMeta: metav1.ObjectMeta{
1595+
Name: "amp1",
1596+
Namespace: "default",
1597+
OwnerReferences: []metav1.OwnerReference{
1598+
{
1599+
Name: "mp1",
1600+
Kind: "MachinePool",
1601+
APIVersion: expv1.GroupVersion.String(),
1602+
},
1603+
},
1604+
Annotations: map[string]string{
1605+
azure.CustomDataHashAnnotation: fmt.Sprintf("%x", bootstrapDataHash),
1606+
},
1607+
},
1608+
Spec: infrav1exp.AzureMachinePoolSpec{
1609+
Template: infrav1exp.AzureMachinePoolMachineTemplate{
1610+
Image: &infrav1.Image{},
1611+
NetworkInterfaces: []infrav1.NetworkInterface{
1612+
{
1613+
SubnetName: "test",
1614+
},
1615+
},
1616+
VMSize: "VM_SIZE",
1617+
},
1618+
},
1619+
}
1620+
vmssState = &azure.VMSS{}
1621+
)
1622+
defer mockCtrl.Finish()
1623+
1624+
s := &MachinePoolScope{
1625+
client: cb.
1626+
WithObjects(&bootstrapSecret).
1627+
Build(),
1628+
ClusterScoper: &ClusterScope{
1629+
Cluster: cluster,
1630+
AzureCluster: azureCluster,
1631+
},
1632+
skuCache: resourceskus.NewStaticCache([]armcompute.ResourceSKU{
1633+
{
1634+
Name: ptr.To("VM_SIZE"),
1635+
},
1636+
}, "test"),
1637+
MachinePool: mp,
1638+
AzureMachinePool: amp,
1639+
vmssState: vmssState,
1640+
}
1641+
1642+
g.Expect(s.InitMachinePoolCache(ctx)).NotTo(HaveOccurred())
1643+
1644+
spec := s.ScaleSetSpec(ctx)
1645+
sSpec := spec.(*scalesets.ScaleSetSpec)
1646+
g.Expect(sSpec.ShouldPatchCustomData).To(Equal(false))
1647+
1648+
amp.Annotations[azure.CustomDataHashAnnotation] = "old"
1649+
1650+
// reset cache to be able to build up the cache again
1651+
s.cache = nil
1652+
g.Expect(s.InitMachinePoolCache(ctx)).NotTo(HaveOccurred())
1653+
1654+
spec = s.ScaleSetSpec(ctx)
1655+
sSpec = spec.(*scalesets.ScaleSetSpec)
1656+
g.Expect(sSpec.ShouldPatchCustomData).To(Equal(true))
1657+
}
1658+
1659+
func sha256Hash(text string) []byte {
1660+
h := sha256.New()
1661+
_, err := io.WriteString(h, text)
1662+
if err != nil {
1663+
panic(err)
1664+
}
1665+
return h.Sum(nil)
1666+
}

azure/services/scalesets/scalesets_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -742,8 +742,8 @@ func newWindowsVMSSSpec() ScaleSetSpec {
742742
return vmss
743743
}
744744

745-
func newDefaultExistingVMSS(vmSize string) armcompute.VirtualMachineScaleSet {
746-
vmss := newDefaultVMSS(vmSize)
745+
func newDefaultExistingVMSS() armcompute.VirtualMachineScaleSet {
746+
vmss := newDefaultVMSS("VM_SIZE")
747747
vmss.ID = ptr.To("subscriptions/1234/resourceGroups/my_resource_group/providers/Microsoft.Compute/virtualMachines/my-vm")
748748
return vmss
749749
}

azure/services/scalesets/spec.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ func (s *ScaleSetSpec) OwnerResourceName() string {
9292
}
9393

9494
func (s *ScaleSetSpec) existingParameters(ctx context.Context, existing interface{}) (parameters interface{}, err error) {
95+
ctx, log, done := tele.StartSpanWithLogger(ctx, "scalesets.ScaleSetSpec.existingParameters")
96+
defer done()
97+
9598
existingVMSS, ok := existing.(armcompute.VirtualMachineScaleSet)
9699
if !ok {
97100
return nil, errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", existing)
@@ -131,6 +134,15 @@ func (s *ScaleSetSpec) existingParameters(ctx context.Context, existing interfac
131134
return nil, nil
132135
}
133136

137+
// if there are no model changes and no change in custom data, remove VirtualMachineProfile to avoid unnecessary VMSS model
138+
// updates.
139+
if !hasModelChanges && !s.ShouldPatchCustomData {
140+
log.V(4).Info("removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", s.ShouldPatchCustomData)
141+
vmss.Properties.VirtualMachineProfile = nil
142+
} else {
143+
log.V(4).Info("has changes, not removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", s.ShouldPatchCustomData)
144+
}
145+
134146
return vmss, nil
135147
}
136148

0 commit comments

Comments
 (0)