Skip to content

Commit 91f3838

Browse files
committed
Add Redis Cluster auto-scaling via Alertmanager webhooks
Signed-off-by: jaehanbyun <[email protected]>
1 parent 67c9c31 commit 91f3838

File tree

2 files changed

+180
-0
lines changed

2 files changed

+180
-0
lines changed

cmd/main.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,20 @@ func main() {
160160
}
161161
}()
162162

163+
setupLog.Info("HTTP server started on port 9090")
164+
setupLog.Info("Available endpoints:")
165+
setupLog.Info("- GET /cluster/nodes: Get Redis cluster nodes information")
166+
setupLog.Info("- POST /webhooks/alertmanager: Receive Alertmanager alerts for auto-scaling Redis clusters")
167+
setupLog.Info("For auto-scaling, Alertmanager should send alerts with the following labels:")
168+
setupLog.Info(" - redis_cluster: Name of the Redis cluster")
169+
setupLog.Info(" - namespace: Namespace of the Redis cluster (optional, default: 'default')")
170+
setupLog.Info(" - alertname: Alert name that indicates the scaling action")
171+
setupLog.Info(" * HighMemoryUsage: Scale up masters")
172+
setupLog.Info(" * HighThroughput: Scale up replicas")
173+
setupLog.Info(" * LowMemoryUsage: Scale down masters")
174+
setupLog.Info(" * LowThroughput: Scale down replicas")
175+
setupLog.Info(" - scale_count: Number of nodes to scale (optional annotation, default: 1)")
176+
163177
setupLog.Info("starting manager")
164178
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
165179
setupLog.Error(err, "problem running manager")

util/util.go

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,17 @@ import (
44
"context"
55
"encoding/json"
66
"fmt"
7+
"io"
8+
"log"
79
"net/http"
10+
"strconv"
11+
"strings"
812

913
"github.com/gorilla/mux"
1014
redisv1beta1 "github.com/jaehanbyun/redis-operator/api/v1beta1"
1115
"github.com/jaehanbyun/redis-operator/k8sutils"
1216
corev1 "k8s.io/api/core/v1"
17+
"k8s.io/apimachinery/pkg/types"
1318
"sigs.k8s.io/controller-runtime/pkg/client"
1419
)
1520

@@ -18,15 +23,48 @@ type node struct {
1823
Port int32 `json:"port"`
1924
}
2025

26+
type AlertManagerPayload struct {
27+
Receiver string `json:"receiver"`
28+
Status string `json:"status"`
29+
Alerts []Alert `json:"alerts"`
30+
GroupLabels map[string]string `json:"groupLabels"`
31+
CommonLabels map[string]string `json:"commonLabels"`
32+
CommonAnnotations map[string]string `json:"commonAnnotations"`
33+
ExternalURL string `json:"externalURL"`
34+
}
35+
36+
type Alert struct {
37+
Status string `json:"status"`
38+
Labels map[string]string `json:"labels"`
39+
Annotations map[string]string `json:"annotations"`
40+
StartsAt string `json:"startsAt"`
41+
EndsAt string `json:"endsAt"`
42+
GeneratorURL string `json:"generatorURL"`
43+
Fingerprint string `json:"fingerprint"`
44+
}
45+
46+
type AutoScalingAction struct {
47+
Type string `json:"type"` // "master" or "replica"
48+
ClusterName string `json:"clusterName"`
49+
Namespace string `json:"namespace"`
50+
Count int32 `json:"count"`
51+
ScalingAction string `json:"scalingAction"` // "up" or "down"
52+
}
53+
2154
func StartHTTPServer(cl client.Client) error {
2255
r := mux.NewRouter()
2356
r.HandleFunc("/cluster/nodes", func(w http.ResponseWriter, r *http.Request) {
2457
handleClusterNodesRequest(w, r, cl)
2558
}).Methods("GET")
2659

60+
r.HandleFunc("/webhooks/alertmanager", func(w http.ResponseWriter, r *http.Request) {
61+
handleAlertManagerWebhook(w, r, cl)
62+
}).Methods("POST")
63+
2764
return http.ListenAndServe(":9090", r)
2865
}
2966

67+
// handleClusterNodesRequest is a function that handles the cluster nodes request.
3068
func handleClusterNodesRequest(w http.ResponseWriter, r *http.Request, cl client.Client) {
3169
clusterName := r.URL.Query().Get("clusterName")
3270
if clusterName == "" {
@@ -79,3 +117,131 @@ func handleClusterNodesRequest(w http.ResponseWriter, r *http.Request, cl client
79117
return
80118
}
81119
}
120+
121+
// handleAlertManagerWebhook is a function that handles Alertmanager webhook.
122+
func handleAlertManagerWebhook(w http.ResponseWriter, r *http.Request, cl client.Client) {
123+
body, err := io.ReadAll(r.Body)
124+
if err != nil {
125+
http.Error(w, "Failed to read request body", http.StatusBadRequest)
126+
return
127+
}
128+
defer r.Body.Close()
129+
130+
var payload AlertManagerPayload
131+
if err := json.Unmarshal(body, &payload); err != nil {
132+
http.Error(w, "Invalid JSON format", http.StatusBadRequest)
133+
return
134+
}
135+
136+
for _, alert := range payload.Alerts {
137+
if alert.Status == "firing" {
138+
action, err := parseAlertAction(alert)
139+
if err != nil {
140+
log.Printf("Failed to parse alert action: %v", err)
141+
continue
142+
}
143+
144+
if err := applyAutoScaling(cl, action); err != nil {
145+
log.Printf("Failed to apply auto-scaling: %v", err)
146+
continue
147+
}
148+
149+
log.Printf("Successfully applied auto-scaling: %+v", action)
150+
}
151+
}
152+
153+
w.WriteHeader(http.StatusOK)
154+
w.Write([]byte("Alerts processed"))
155+
}
156+
157+
func parseAlertAction(alert Alert) (*AutoScalingAction, error) {
158+
clusterName, ok := alert.Labels["redis_cluster"]
159+
if !ok {
160+
return nil, fmt.Errorf("missing required label: redis_cluster")
161+
}
162+
163+
namespace, ok := alert.Labels["namespace"]
164+
if !ok {
165+
namespace = "default"
166+
}
167+
168+
var action AutoScalingAction
169+
action.ClusterName = clusterName
170+
action.Namespace = namespace
171+
172+
alertName, ok := alert.Labels["alertname"]
173+
if !ok {
174+
return nil, fmt.Errorf("missing required label: alertname")
175+
}
176+
177+
switch {
178+
case strings.Contains(alertName, "HighMemoryUsage"):
179+
action.Type = "master"
180+
action.ScalingAction = "up"
181+
action.Count = 1
182+
case strings.Contains(alertName, "HighThroughput"):
183+
action.Type = "replica"
184+
action.ScalingAction = "up"
185+
action.Count = 1
186+
case strings.Contains(alertName, "LowMemoryUsage"):
187+
action.Type = "master"
188+
action.ScalingAction = "down"
189+
action.Count = 1
190+
case strings.Contains(alertName, "LowThroughput"):
191+
action.Type = "replica"
192+
action.ScalingAction = "down"
193+
action.Count = 1
194+
default:
195+
return nil, fmt.Errorf("unknown alert name: %s", alertName)
196+
}
197+
198+
if countStr, ok := alert.Annotations["scale_count"]; ok {
199+
count, err := strconv.ParseInt(countStr, 10, 32)
200+
if err == nil && count > 0 {
201+
action.Count = int32(count)
202+
}
203+
}
204+
205+
return &action, nil
206+
}
207+
208+
// applyAutoScaling is a function that applies auto-scaling to a Redis cluster.
209+
func applyAutoScaling(cl client.Client, action *AutoScalingAction) error {
210+
var redisCluster redisv1beta1.RedisCluster
211+
if err := cl.Get(context.Background(), types.NamespacedName{
212+
Name: action.ClusterName,
213+
Namespace: action.Namespace,
214+
}, &redisCluster); err != nil {
215+
return fmt.Errorf("failed to get RedisCluster: %v", err)
216+
}
217+
218+
// 타입에 따라 마스터 또는 레플리카 수 조정
219+
switch action.Type {
220+
case "master":
221+
if action.ScalingAction == "up" {
222+
redisCluster.Spec.Masters += action.Count
223+
} else if action.ScalingAction == "down" && redisCluster.Spec.Masters > action.Count {
224+
redisCluster.Spec.Masters -= action.Count
225+
} else {
226+
return fmt.Errorf("invalid scaling down action: current masters %d, scaling down by %d",
227+
redisCluster.Spec.Masters, action.Count)
228+
}
229+
case "replica":
230+
if action.ScalingAction == "up" {
231+
redisCluster.Spec.Replicas += action.Count
232+
} else if action.ScalingAction == "down" && redisCluster.Spec.Replicas > action.Count {
233+
redisCluster.Spec.Replicas -= action.Count
234+
} else {
235+
return fmt.Errorf("invalid scaling down action: current replicas %d, scaling down by %d",
236+
redisCluster.Spec.Replicas, action.Count)
237+
}
238+
default:
239+
return fmt.Errorf("unknown scaling type: %s", action.Type)
240+
}
241+
242+
if err := cl.Update(context.Background(), &redisCluster); err != nil {
243+
return fmt.Errorf("failed to update RedisCluster: %v", err)
244+
}
245+
246+
return nil
247+
}

0 commit comments

Comments
 (0)