Skip to content

Commit ec69c65

Browse files
committed
tide: add more metrics
1 parent 8026e80 commit ec69c65

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed

pkg/tide/tide.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,15 @@ var (
216216

217217
// Per controller
218218
syncHeartbeat *prometheus.CounterVec
219+
220+
// Retesting metrics
221+
retests *prometheus.CounterVec
222+
retestsByAction *prometheus.CounterVec
223+
batchFailures *prometheus.CounterVec
224+
poolMissingPRs *prometheus.GaugeVec
225+
poolPendingPRs *prometheus.GaugeVec
226+
poolSuccessfulPRs *prometheus.GaugeVec
227+
poolBatchPendingPRs *prometheus.GaugeVec
219228
}{
220229
pooledPRs: prometheus.NewGaugeVec(prometheus.GaugeOpts{
221230
Name: "pooledprs",
@@ -280,6 +289,63 @@ var (
280289
}, []string{
281290
"controller",
282291
}),
292+
retests: prometheus.NewCounterVec(prometheus.CounterOpts{
293+
Name: "tide_retests_total",
294+
Help: "Total number of test retriggers by org, repo, and branch. Incremented when Tide triggers tests for PRs that need retesting.",
295+
}, []string{
296+
"org",
297+
"repo",
298+
"branch",
299+
}),
300+
retestsByAction: prometheus.NewCounterVec(prometheus.CounterOpts{
301+
Name: "tide_retests_by_action_total",
302+
Help: "Total number of retests by action type (TRIGGER for serial, TRIGGER_BATCH for batch). Helps identify whether batch or serial testing is causing more retests.",
303+
}, []string{
304+
"org",
305+
"repo",
306+
"branch",
307+
"action",
308+
}),
309+
batchFailures: prometheus.NewCounterVec(prometheus.CounterOpts{
310+
Name: "tide_batch_failures_total",
311+
Help: "Total number of times a batch test completes but PRs move back to missing state (indicating batch failure). This is expensive as all PRs in the batch need retesting.",
312+
}, []string{
313+
"org",
314+
"repo",
315+
"branch",
316+
}),
317+
poolMissingPRs: prometheus.NewGaugeVec(prometheus.GaugeOpts{
318+
Name: "tide_pool_missing_prs",
319+
Help: "Number of PRs with missing or failed tests in each pool. High values indicate testing bottlenecks.",
320+
}, []string{
321+
"org",
322+
"repo",
323+
"branch",
324+
}),
325+
poolPendingPRs: prometheus.NewGaugeVec(prometheus.GaugeOpts{
326+
Name: "tide_pool_pending_prs",
327+
Help: "Number of PRs with pending tests in each pool.",
328+
}, []string{
329+
"org",
330+
"repo",
331+
"branch",
332+
}),
333+
poolSuccessfulPRs: prometheus.NewGaugeVec(prometheus.GaugeOpts{
334+
Name: "tide_pool_successful_prs",
335+
Help: "Number of PRs with all tests passing in each pool.",
336+
}, []string{
337+
"org",
338+
"repo",
339+
"branch",
340+
}),
341+
poolBatchPendingPRs: prometheus.NewGaugeVec(prometheus.GaugeOpts{
342+
Name: "tide_pool_batch_pending_prs",
343+
Help: "Number of PRs in a pending batch test in each pool.",
344+
}, []string{
345+
"org",
346+
"repo",
347+
"branch",
348+
}),
283349
}
284350
)
285351

@@ -292,6 +358,13 @@ func init() {
292358
prometheus.MustRegister(tideMetrics.syncHeartbeat)
293359
prometheus.MustRegister(tideMetrics.poolErrors)
294360
prometheus.MustRegister(tideMetrics.queryResults)
361+
prometheus.MustRegister(tideMetrics.retests)
362+
prometheus.MustRegister(tideMetrics.retestsByAction)
363+
prometheus.MustRegister(tideMetrics.batchFailures)
364+
prometheus.MustRegister(tideMetrics.poolMissingPRs)
365+
prometheus.MustRegister(tideMetrics.poolPendingPRs)
366+
prometheus.MustRegister(tideMetrics.poolSuccessfulPRs)
367+
prometheus.MustRegister(tideMetrics.poolBatchPendingPRs)
295368
}
296369

297370
type manager interface {
@@ -1705,8 +1778,22 @@ func (c *syncController) syncSubpool(sp subpool, blocks []blockers.Blocker) (Poo
17051778
"action": string(act),
17061779
"targets": prNumbers(targets),
17071780
}).Info("Subpool synced.")
1781+
17081782
tideMetrics.pooledPRs.WithLabelValues(sp.org, sp.repo, sp.branch).Set(float64(len(sp.prs)))
17091783
tideMetrics.updateTime.WithLabelValues(sp.org, sp.repo, sp.branch).Set(float64(time.Now().Unix()))
1784+
tideMetrics.poolMissingPRs.WithLabelValues(sp.org, sp.repo, sp.branch).Set(float64(len(missings)))
1785+
tideMetrics.poolPendingPRs.WithLabelValues(sp.org, sp.repo, sp.branch).Set(float64(len(pendings)))
1786+
tideMetrics.poolSuccessfulPRs.WithLabelValues(sp.org, sp.repo, sp.branch).Set(float64(len(successes)))
1787+
tideMetrics.poolBatchPendingPRs.WithLabelValues(sp.org, sp.repo, sp.branch).Set(float64(len(batchPending)))
1788+
if act == Trigger || act == TriggerBatch {
1789+
tideMetrics.retests.WithLabelValues(sp.org, sp.repo, sp.branch).Add(float64(len(targets)))
1790+
tideMetrics.retestsByAction.WithLabelValues(sp.org, sp.repo, sp.branch, string(act)).Add(float64(len(targets)))
1791+
}
1792+
if len(batchPending) == 0 && len(missings) > 0 && len(pendings) == 0 {
1793+
if act == Trigger {
1794+
tideMetrics.batchFailures.WithLabelValues(sp.org, sp.repo, sp.branch).Inc()
1795+
}
1796+
}
17101797
return Pool{
17111798
Org: sp.org,
17121799
Repo: sp.repo,

0 commit comments

Comments
 (0)