Skip to content
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions apis/v1beta1/vspheremachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ const (
// Note: This reason is used only in supervisor mode.
VSphereMachineVirtualMachinePoweringOnV1Beta2Reason = "PoweringOn"

// VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason surfaces that the VirtualMachine
// is waiting for its corresponding VirtualMachineGroup to be created and to include this VM as a member.
VSphereMachineVirtualMachineWaitingForVirtualMachineGroupV1Beta2Reason = "WaitingForVirtualMachineGroup"

// VSphereMachineVirtualMachineWaitingForNetworkAddressV1Beta2Reason surfaces when the VirtualMachine that is controlled
// by the VSphereMachine waiting for the machine network settings to be reported after machine being powered on.
VSphereMachineVirtualMachineWaitingForNetworkAddressV1Beta2Reason = "WaitingForNetworkAddress"
Expand Down
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ spec:
- "--diagnostics-address=${CAPI_DIAGNOSTICS_ADDRESS:=:8443}"
- "--insecure-diagnostics=${CAPI_INSECURE_DIAGNOSTICS:=false}"
- --v=4
- "--feature-gates=MultiNetworks=${EXP_MULTI_NETWORKS:=false},NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZones=${EXP_NAMESPACE_SCOPED_ZONES:=false},PriorityQueue=${EXP_PRIORITY_QUEUE:=false}"
- "--feature-gates=MultiNetworks=${EXP_MULTI_NETWORKS:=false},NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZones=${EXP_NAMESPACE_SCOPED_ZONES:=false},NodeAutoPlacement=${EXP_NODE_AUTO_PLACEMENT:=false},PriorityQueue=${EXP_PRIORITY_QUEUE:=false}"
image: controller:latest
imagePullPolicy: IfNotPresent
name: manager
Expand Down
7 changes: 7 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ rules:
- apiGroups:
- vmoperator.vmware.com
resources:
- virtualmachinegroups
- virtualmachineimages
- virtualmachineimages/status
- virtualmachines
Expand All @@ -264,6 +265,12 @@ rules:
- patch
- update
- watch
- apiGroups:
- vmoperator.vmware.com
resources:
- virtualmachinegroups/status
verbs:
- get
- apiGroups:
- vmware.com
resources:
Expand Down
2 changes: 2 additions & 0 deletions controllers/vmware/controllers_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
. "github.com/onsi/ginkgo/v2"
"github.com/onsi/ginkgo/v2/types"
. "github.com/onsi/gomega"
vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
Expand Down Expand Up @@ -71,6 +72,7 @@ func setup(ctx context.Context) (*helpers.TestEnvironment, clustercache.ClusterC
utilruntime.Must(infrav1.AddToScheme(scheme.Scheme))
utilruntime.Must(clusterv1.AddToScheme(scheme.Scheme))
utilruntime.Must(vmwarev1.AddToScheme(scheme.Scheme))
utilruntime.Must(vmoprv1.AddToScheme(scheme.Scheme))

testEnv := helpers.NewTestEnvironment(ctx)

Expand Down
109 changes: 109 additions & 0 deletions controllers/vmware/virtualmachinegroup_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package vmware

import (
"context"

vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2"
apitypes "k8s.io/apimachinery/pkg/types"
clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
"sigs.k8s.io/cluster-api/util/predicates"
ctrl "sigs.k8s.io/controller-runtime"
ctrlbldr "sigs.k8s.io/controller-runtime/pkg/builder"
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context"
)

// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get;list;watch
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters/status,verbs=get
// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=vmoperator.vmware.com,resources=virtualmachinegroups/status,verbs=get
// +kubebuilder:rbac:groups=vmware.infrastructure.cluster.x-k8s.io,resources=vspheremachines,verbs=get;list;watch
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch

// AddVirtualMachineGroupControllerToManager adds the VirtualMachineGroup controller to the provided manager.
func AddVirtualMachineGroupControllerToManager(ctx context.Context, controllerManagerCtx *capvcontext.ControllerManagerContext, mgr manager.Manager, options controller.Options) error {
predicateLog := ctrl.LoggerFrom(ctx).WithValues("controller", "virtualmachinegroup")

reconciler := &VirtualMachineGroupReconciler{
Client: controllerManagerCtx.Client,
Recorder: mgr.GetEventRecorderFor("virtualmachinegroup-controller"),
}

builder := ctrl.NewControllerManagedBy(mgr).
For(&clusterv1.Cluster{}).
WithOptions(options).
// Set the controller's name explicitly to virtualmachinegroup.
Named("virtualmachinegroup").
Watches(
&vmoprv1.VirtualMachineGroup{},
handler.EnqueueRequestForOwner(mgr.GetScheme(), reconciler.Client.RESTMapper(), &clusterv1.Cluster{}),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
handler.EnqueueRequestForOwner(mgr.GetScheme(), reconciler.Client.RESTMapper(), &clusterv1.Cluster{}),
handler.EnqueueRequestForOwner(mgr.GetScheme(), reconciler.Client.RESTMapper(), &clusterv1.Cluster{}),
ctrlbldr.WithPredicates(predicates.ResourceIsChanged(mgr.GetScheme(), predicateLog)),

We have not been using this in CAPV up until now, but let's use it here to already improve this new reconciler (I have a task to follow-up for others soon'ish).

Per default the reconciler will get resyncs from all informers. i.e. every 10m everything gets reconciled because of the Cluster resync and additionally every 10m everything gets reconciled because of the VirtualMachineGroup resync.

In core CAPI we drop events from resyncs of secondary watches to avoid this issue

(it's not needed for VSphereMachine below as we already drop all updates)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. This help to reduce the resync noices.

).
Watches(
&vmwarev1.VSphereMachine{},
handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToCluster),
ctrlbldr.WithPredicates(
predicate.Funcs{
UpdateFunc: func(event.UpdateEvent) bool { return false },
CreateFunc: func(event.CreateEvent) bool { return true },
DeleteFunc: func(event.DeleteEvent) bool { return true },
GenericFunc: func(event.GenericEvent) bool { return false },
}),
).
WithEventFilter(predicates.ResourceHasFilterLabel(mgr.GetScheme(), predicateLog, controllerManagerCtx.WatchFilterValue))

return builder.Complete(reconciler)
}

// VSphereMachineToCluster maps VSphereMachine events to Cluster reconcile requests.
// This handler only processes VSphereMachine objects for Day-2 operations when VMG could be found, ensuring
// VMG member list in sync with VSphereMachines. If no corresponding VMG is found, this is a no-op.
func (r *VirtualMachineGroupReconciler) VSphereMachineToCluster(ctx context.Context, a ctrlclient.Object) []reconcile.Request {
vSphereMachine, ok := a.(*vmwarev1.VSphereMachine)
if !ok {
return nil
}

clusterName, ok := vSphereMachine.Labels[clusterv1.ClusterNameLabel]
if !ok || clusterName == "" {
return nil
}

vmg := &vmoprv1.VirtualMachineGroup{}
err := r.Client.Get(ctx, apitypes.NamespacedName{Namespace: vSphereMachine.Namespace, Name: clusterName}, vmg)

if err != nil {
return nil
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should not have this kind of logic in a mapper.

Let's deal with this only in the controller.

It's much more obvious there and the cache here can be out of sync.

Let's then use vSphereMachine.Namespace and clusterName for the request below

(please also update the godoc of this func)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make senses. Fixed it and also added predict in watch for VSphereMachine to only handle VSphereMachines which have MachineDeployment label.


return []reconcile.Request{{
NamespacedName: apitypes.NamespacedName{
Namespace: vmg.Namespace,
Name: vmg.Name,
},
}}
}
Loading