Skip to content

Commit e150b72

Browse files
authored
Update Kubernetes Secret synchronization logic (#650)
1 parent 80e6d6d commit e150b72

14 files changed

+117
-122
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
kind: ENHANCEMENTS
2+
body: '`AgentPool`: Update Kubernetes Secret synchronization to perform batch updates at the end of agent token reconciliation, reducing API calls and preventing race conditions.'
3+
time: 2025-10-28T12:56:28.082375+01:00
4+
custom:
5+
PR: "650"

internal/controller/agentpool_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func (r *AgentPoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
6868
return requeueAfter(requeueInterval)
6969
}
7070

71-
if a, ok := ap.instance.GetAnnotations()[annotationPaused]; ok && a == annotationTrue {
71+
if a, ok := ap.instance.GetAnnotations()[annotationPaused]; ok && a == metaTrue {
7272
ap.log.Info("Agent Pool Controller", "msg", "reconciliation is paused for this resource")
7373
return doNotRequeue()
7474
}

internal/controller/agentpool_controller_deletion_policy.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,7 @@ func (r *AgentPoolReconciler) deleteAgentPool(ctx context.Context, ap *agentPool
7272
ap.log.Error(err, "Reconcile Agent Pool", "msg", fmt.Sprintf("failed to remove token %s", t.ID))
7373
return err
7474
}
75-
err = r.removeToken(ctx, ap, t.ID)
76-
if err != nil {
77-
return err
78-
}
75+
ap.deleteTokenStatus(t.ID)
7976
}
8077
ap.log.Info("Reconcile Agent Pool", "msg", "successfully deleted tokens")
8178
}

internal/controller/agentpool_controller_tokens.go

Lines changed: 85 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package controller
66
import (
77
"context"
88
"fmt"
9+
"slices"
910

1011
tfc "github.com/hashicorp/go-tfe"
1112
corev1 "k8s.io/api/core/v1"
@@ -16,7 +17,6 @@ import (
1617

1718
appv1alpha2 "github.com/hashicorp/hcp-terraform-operator/api/v1alpha2"
1819
"github.com/hashicorp/hcp-terraform-operator/internal/pointer"
19-
"github.com/hashicorp/hcp-terraform-operator/internal/slice"
2020
)
2121

2222
func (ap *agentPoolInstance) getTokens(ctx context.Context) (map[string]string, error) {
@@ -33,86 +33,32 @@ func (ap *agentPoolInstance) getTokens(ctx context.Context) (map[string]string,
3333
return tokens, nil
3434
}
3535

36-
func (r *AgentPoolReconciler) createToken(ctx context.Context, ap *agentPoolInstance, token string) error {
37-
nn := getAgentPoolNamespacedName(&ap.instance)
36+
func (r *AgentPoolReconciler) createToken(ctx context.Context, ap *agentPoolInstance, token string) (*tfc.AgentToken, error) {
37+
// nn := getAgentPoolNamespacedName(&ap.instance)
3838
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("creating a new agent token %q", token))
39-
at, err := ap.tfClient.Client.AgentTokens.Create(ctx, ap.instance.Status.AgentPoolID, tfc.AgentTokenCreateOptions{
39+
t, err := ap.tfClient.Client.AgentTokens.Create(ctx, ap.instance.Status.AgentPoolID, tfc.AgentTokenCreateOptions{
4040
Description: &token,
4141
})
4242
if err != nil {
4343
ap.log.Error(err, "Reconcile Agent Tokens", "msg", fmt.Sprintf("failed to create a new token %q", token))
44-
return err
45-
}
46-
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("successfully created a new agent token %q %q", token, at.ID))
47-
// UPDATE SECRET
48-
s := &corev1.Secret{}
49-
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("update Kubernets Secret %q with token %q", s.Name, token))
50-
if err := r.Client.Get(ctx, nn, s); err != nil {
51-
ap.log.Error(err, "Reconcile Agent Tokens", "msg", fmt.Sprintf("failed to get Kubernets Secret %q", s.Name))
52-
return err
53-
}
54-
d := make(map[string][]byte)
55-
if s.Data != nil {
56-
d = s.DeepCopy().Data
57-
}
58-
d[at.Description] = []byte(at.Token)
59-
_, err = controllerutil.CreateOrUpdate(ctx, r.Client, s, func() error {
60-
s.Data = d
61-
s.Labels = map[string]string{
62-
"agentPoolID": ap.instance.Status.AgentPoolID,
63-
}
64-
return nil
65-
})
66-
if err != nil {
67-
ap.log.Error(err, "Reconcile Agent Tokens", "msg", fmt.Sprintf("failed to update Kubernets Secret %q with token %q", s.Name, token))
68-
return err
44+
return nil, err
6945
}
70-
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("successfully updated Kubernets Secret %q with token %q", s.Name, token))
46+
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("successfully created a new agent token %q %q", token, t.ID))
7147

7248
ap.instance.Status.AgentTokens = append(ap.instance.Status.AgentTokens, &appv1alpha2.AgentAPIToken{
73-
Name: at.Description,
74-
ID: at.ID,
75-
CreatedAt: pointer.PointerOf(at.CreatedAt.Unix()),
76-
LastUsedAt: pointer.PointerOf(at.LastUsedAt.Unix()),
49+
Name: t.Description,
50+
ID: t.ID,
51+
CreatedAt: pointer.PointerOf(t.CreatedAt.Unix()),
52+
LastUsedAt: pointer.PointerOf(t.LastUsedAt.Unix()),
7753
})
7854

79-
return nil
55+
return t, nil
8056
}
8157

82-
func (r *AgentPoolReconciler) removeToken(ctx context.Context, ap *agentPoolInstance, tokenID string) error {
83-
nn := getAgentPoolNamespacedName(&ap.instance)
84-
for i, token := range ap.instance.Status.AgentTokens {
85-
if token.ID == tokenID {
86-
// UPDATE SECRET
87-
s := &corev1.Secret{}
88-
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("remove token %q from Kubernets Secret %q", tokenID, nn.Name))
89-
if err := r.Client.Get(ctx, nn, s); err != nil {
90-
ap.log.Error(err, "Reconcile Agent Tokens", "msg", fmt.Sprintf("failed to get Kubernets Secret %q", nn.Name))
91-
return err
92-
}
93-
d := make(map[string][]byte)
94-
if s.Data != nil {
95-
d = s.DeepCopy().Data
96-
}
97-
delete(d, token.Name)
98-
_, err := controllerutil.CreateOrUpdate(ctx, r.Client, s, func() error {
99-
s.Data = d
100-
s.Labels = map[string]string{
101-
"agentPoolID": ap.instance.Status.AgentPoolID,
102-
}
103-
return nil
104-
})
105-
if err != nil {
106-
ap.log.Error(err, "Reconcile Agent Tokens", "msg", fmt.Sprintf("failed to remove token %q from Kubernets Secret %q", tokenID, s.Name))
107-
return err
108-
}
109-
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("successfully removed token %q from Kubernets Secret %q", tokenID, s.Name))
110-
// UPDATE STATUS
111-
ap.instance.Status.AgentTokens = slice.RemoveFromSlice(ap.instance.Status.AgentTokens, i)
112-
return nil
113-
}
114-
}
115-
return nil
58+
func (ap *agentPoolInstance) deleteTokenStatus(id string) {
59+
ap.instance.Status.AgentTokens = slices.DeleteFunc(ap.instance.Status.AgentTokens, func(vs *appv1alpha2.AgentAPIToken) bool {
60+
return vs.ID == id
61+
})
11662
}
11763

11864
func agentPoolOutputObjectName(name string) string {
@@ -126,7 +72,7 @@ func getAgentPoolNamespacedName(instance *appv1alpha2.AgentPool) types.Namespace
12672
}
12773
}
12874

129-
func (r *AgentPoolReconciler) createSecret(ctx context.Context, ap *agentPoolInstance) error {
75+
func (r *AgentPoolReconciler) createOrGetSecret(ctx context.Context, ap *agentPoolInstance) (*corev1.Secret, error) {
13076
s := &corev1.Secret{
13177
ObjectMeta: metav1.ObjectMeta{
13278
Name: agentPoolOutputObjectName(ap.instance.Name),
@@ -138,32 +84,56 @@ func (r *AgentPoolReconciler) createSecret(ctx context.Context, ap *agentPoolIns
13884
}
13985
if err := controllerutil.SetControllerReference(&ap.instance, s, r.Scheme); err != nil {
14086
ap.log.Error(err, "Reconcile Agent Tokens", "msg", "failed to set controller reference")
141-
return err
87+
return nil, err
14288
}
14389
if err := r.Client.Get(ctx, getAgentPoolNamespacedName(&ap.instance), s); err != nil {
14490
if errors.IsNotFound(err) {
14591
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("creating a new Kubernetes Secret %q", s.Name))
14692
if err = r.Client.Create(ctx, s); err != nil {
14793
ap.log.Error(err, "Reconcile Agent Tokens", "msg", fmt.Sprintf("failed to create a new Kubernetes Secret %q", s.Name))
148-
return err
94+
return nil, err
14995
}
15096
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("successfully created a new Kubernetes Secret %q", s.Name))
151-
return nil
97+
return s, nil
15298
}
15399
ap.log.Error(err, "Reconcile Agent Tokens", "msg", fmt.Sprintf("failed to get Kubernetes Secret %q", s.Name))
154-
return err
100+
return nil, err
155101
}
156102

157-
return nil
103+
return s, nil
104+
}
105+
106+
func deleteSecretKey(s *corev1.Secret, key string) {
107+
delete(s.Data, key)
108+
if s.Labels == nil {
109+
return
110+
}
111+
s.Labels[labelHasChanged] = metaTrue
112+
}
113+
114+
func setSecretKey(s *corev1.Secret, key, value string) {
115+
s.Data[key] = []byte(value)
116+
if s.Labels == nil {
117+
return
118+
}
119+
s.Labels[labelHasChanged] = metaTrue
158120
}
159121

160122
func (r *AgentPoolReconciler) reconcileAgentTokens(ctx context.Context, ap *agentPoolInstance) error {
161123
ap.log.Info("Reconcile Agent Tokens", "msg", "new reconciliation event")
162124

163-
if err := r.createSecret(ctx, ap); err != nil {
125+
s, err := r.createOrGetSecret(ctx, ap)
126+
if err != nil {
164127
ap.log.Error(err, "Reconcile Agent Tokens", "msg", fmt.Sprintf("failed to create a new Kubernetes Secret %s", agentPoolOutputObjectName(ap.instance.Name)))
165128
return err
166129
}
130+
if s.Data == nil {
131+
s.Data = make(map[string][]byte)
132+
}
133+
if s.Labels == nil {
134+
s.Labels = make(map[string]string)
135+
}
136+
s.Labels[labelHasChanged] = metaFalse
167137

168138
agentTokens, err := ap.getTokens(ctx)
169139
if err != nil {
@@ -176,39 +146,60 @@ func (r *AgentPoolReconciler) reconcileAgentTokens(ctx context.Context, ap *agen
176146
}
177147

178148
for _, token := range ap.instance.Spec.AgentTokens {
179-
if tokenID, ok := statusTokens[token.Name]; ok {
149+
if id, ok := statusTokens[token.Name]; ok {
180150
delete(statusTokens, token.Name)
181-
if _, ok := agentTokens[tokenID]; ok {
182-
delete(agentTokens, tokenID)
151+
if _, ok := agentTokens[id]; ok {
152+
delete(agentTokens, id)
183153
continue
184154
}
185-
if err := r.removeToken(ctx, ap, tokenID); err != nil {
186-
return err
187-
}
155+
deleteSecretKey(s, token.Name)
156+
ap.deleteTokenStatus(id)
188157
}
189-
if err := r.createToken(ctx, ap, token.Name); err != nil {
158+
t, err := r.createToken(ctx, ap, token.Name)
159+
if err != nil {
190160
return err
191161
}
162+
setSecretKey(s, t.Description, t.Token)
192163
}
193164

194165
// Clean up.
195-
for _, tokenID := range statusTokens {
196-
if err := r.removeToken(ctx, ap, tokenID); err != nil {
197-
return err
198-
}
166+
for name, id := range statusTokens {
167+
deleteSecretKey(s, name)
168+
ap.deleteTokenStatus(id)
199169
}
200170

201-
for tokenID := range agentTokens {
202-
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("removing agent token %q", tokenID))
203-
err := ap.tfClient.Client.AgentTokens.Delete(ctx, tokenID)
171+
for id, name := range agentTokens {
172+
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("removing agent token name=%q id=%q", name, id))
173+
err := ap.tfClient.Client.AgentTokens.Delete(ctx, id)
204174
if err != nil && err != tfc.ErrResourceNotFound {
205-
ap.log.Error(err, "Reconcile Agent Tokens", "msg", fmt.Sprintf("failed to remove agent token %q", tokenID))
206-
return err
207-
}
208-
if err := r.removeToken(ctx, ap, tokenID); err != nil {
175+
ap.log.Error(err, "Reconcile Agent Tokens", "msg", fmt.Sprintf("failed to remove agent token name=%q id=%q", name, id))
209176
return err
210177
}
178+
deleteSecretKey(s, name)
179+
ap.deleteTokenStatus(id)
211180
}
212181

182+
// Use defer to ensure the Secret is always updated, even if token creation or deletion fails.
183+
// This reduces the number of (Kubernetes) API calls and preserves the intermediate token state,
184+
// minimizing unnecessary updates during retries.
185+
defer func() {
186+
// Handle unexpected nil Secret, e.g. failed to retrieve it (should not happen here).
187+
if s == nil {
188+
return
189+
}
190+
// Do not update if there are no changes.
191+
if s.GetLabels()[labelHasChanged] == metaFalse {
192+
delete(s.Labels, labelHasChanged)
193+
ap.log.Info("Reconcile Agent Tokens", "msg", "no changes detected in Kubernetes Secret")
194+
return
195+
}
196+
delete(s.Labels, labelHasChanged)
197+
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("updating Kubernetes Secret %q", s.Name))
198+
if err := r.Client.Update(ctx, s); err != nil {
199+
ap.log.Error(err, "Reconcile Agent Tokens", "msg", fmt.Sprintf("failed to update Kubernetes Secret %q", s.Name))
200+
}
201+
ap.log.Info("Reconcile Agent Tokens", "msg", fmt.Sprintf("successfully updated Kubernetes Secret %q", s.Name))
202+
}()
203+
213204
return nil
214205
}

internal/controller/agenttoken_controller_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ var _ = Describe("AgentToken Controller", Ordered, func() {
9292
It("should successfully manage tokens with merge policy", func() {
9393
// CREATE AGENT POOL WITH ONE TOKEN
9494
pool := createAgentPoolWithToken(poolName)
95-
// CREATE KUBERNETS ITEM
95+
// CREATE KUBERNETES ITEM
9696
instance.Spec.AgentPool = appv1alpha2.AgentPoolRef{
9797
ID: pool.ID,
9898
}
@@ -126,7 +126,7 @@ var _ = Describe("AgentToken Controller", Ordered, func() {
126126
It("should successfully manage tokens with owner policy", func() {
127127
// CREATE AGENT POOL WITH ONE TOKEN
128128
pool := createAgentPoolWithToken(poolName)
129-
// CREATE KUBERNETS ITEM
129+
// CREATE KUBERNETES ITEM
130130
instance.Spec.AgentPool = appv1alpha2.AgentPoolRef{
131131
ID: pool.ID,
132132
}

internal/controller/consts.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@ import (
1010
// SHARED CONSTANTS
1111
const (
1212
annotationPaused = "app.terraform.io/paused"
13-
annotationTrue = "true"
14-
annotationFalse = "false"
15-
initPageNumber = 1
16-
maxPageSize = 100
17-
requeueInterval = 15 * time.Second
18-
runMessage = "Triggered by HCP Terraform Operator"
13+
labelHasChanged = "app.terraform.io/has-changed"
14+
metaTrue = "true"
15+
metaFalse = "false"
16+
17+
initPageNumber = 1
18+
maxPageSize = 100
19+
requeueInterval = 15 * time.Second
20+
runMessage = "Triggered by HCP Terraform Operator"
1921
)
2022

2123
// AGENT POOL CONTROLLER'S CONSTANTS

internal/controller/module_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ func (r *ModuleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
8383
return requeueAfter(requeueInterval)
8484
}
8585

86-
if a, ok := m.instance.GetAnnotations()[annotationPaused]; ok && a == annotationTrue {
86+
if a, ok := m.instance.GetAnnotations()[annotationPaused]; ok && a == metaTrue {
8787
m.log.Info("Module Controller", "msg", "reconciliation is paused for this resource")
8888
return doNotRequeue()
8989
}

internal/controller/predicates.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ func workspacePredicates() predicate.Predicate {
8181
}
8282
// Validate if a certain annotation persists in a new object and does not match the old one.
8383
// In that case, it is a new or updated annotation and we need to trigger a reconciliation cycle.
84-
if a, ok := e.ObjectNew.GetAnnotations()[workspaceAnnotationRunNew]; ok && a == annotationTrue {
84+
if a, ok := e.ObjectNew.GetAnnotations()[workspaceAnnotationRunNew]; ok && a == metaTrue {
8585
return true
8686
}
8787

internal/controller/project_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func (r *ProjectReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
6565
return requeueAfter(requeueInterval)
6666
}
6767

68-
if a, ok := p.instance.GetAnnotations()[annotationPaused]; ok && a == annotationTrue {
68+
if a, ok := p.instance.GetAnnotations()[annotationPaused]; ok && a == metaTrue {
6969
p.log.Info("Project Controller", "msg", "reconciliation is paused for this resource")
7070
return doNotRequeue()
7171
}

internal/controller/runscollector_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ func (r *RunsCollectorReconciler) Reconcile(ctx context.Context, req ctrl.Reques
105105
return doNotRequeue()
106106
}
107107

108-
if a, ok := rc.instance.GetAnnotations()[annotationPaused]; ok && a == annotationTrue {
108+
if a, ok := rc.instance.GetAnnotations()[annotationPaused]; ok && a == metaTrue {
109109
rc.log.Info("Runs Collector Controller", "msg", "reconciliation is paused for this resource")
110110
return doNotRequeue()
111111
}

0 commit comments

Comments
 (0)