@@ -216,6 +216,15 @@ var (
216216
217217 // Per controller
218218 syncHeartbeat * prometheus.CounterVec
219+
220+ // Retesting metrics
221+ retests * prometheus.CounterVec
222+ retestsByAction * prometheus.CounterVec
223+ batchFailures * prometheus.CounterVec
224+ poolMissingPRs * prometheus.GaugeVec
225+ poolPendingPRs * prometheus.GaugeVec
226+ poolSuccessfulPRs * prometheus.GaugeVec
227+ poolBatchPendingPRs * prometheus.GaugeVec
219228 }{
220229 pooledPRs : prometheus .NewGaugeVec (prometheus.GaugeOpts {
221230 Name : "pooledprs" ,
@@ -280,6 +289,63 @@ var (
280289 }, []string {
281290 "controller" ,
282291 }),
292+ retests : prometheus .NewCounterVec (prometheus.CounterOpts {
293+ Name : "tide_retests_total" ,
294+ Help : "Total number of test retriggers by org, repo, and branch. Incremented when Tide triggers tests for PRs that need retesting." ,
295+ }, []string {
296+ "org" ,
297+ "repo" ,
298+ "branch" ,
299+ }),
300+ retestsByAction : prometheus .NewCounterVec (prometheus.CounterOpts {
301+ Name : "tide_retests_by_action_total" ,
302+ Help : "Total number of retests by action type (TRIGGER for serial, TRIGGER_BATCH for batch). Helps identify whether batch or serial testing is causing more retests." ,
303+ }, []string {
304+ "org" ,
305+ "repo" ,
306+ "branch" ,
307+ "action" ,
308+ }),
309+ batchFailures : prometheus .NewCounterVec (prometheus.CounterOpts {
310+ Name : "tide_batch_failures_total" ,
311+ Help : "Total number of times a batch test completes but PRs move back to missing state (indicating batch failure). This is expensive as all PRs in the batch need retesting." ,
312+ }, []string {
313+ "org" ,
314+ "repo" ,
315+ "branch" ,
316+ }),
317+ poolMissingPRs : prometheus .NewGaugeVec (prometheus.GaugeOpts {
318+ Name : "tide_pool_missing_prs" ,
319+ Help : "Number of PRs with missing or failed tests in each pool. High values indicate testing bottlenecks." ,
320+ }, []string {
321+ "org" ,
322+ "repo" ,
323+ "branch" ,
324+ }),
325+ poolPendingPRs : prometheus .NewGaugeVec (prometheus.GaugeOpts {
326+ Name : "tide_pool_pending_prs" ,
327+ Help : "Number of PRs with pending tests in each pool." ,
328+ }, []string {
329+ "org" ,
330+ "repo" ,
331+ "branch" ,
332+ }),
333+ poolSuccessfulPRs : prometheus .NewGaugeVec (prometheus.GaugeOpts {
334+ Name : "tide_pool_successful_prs" ,
335+ Help : "Number of PRs with all tests passing in each pool." ,
336+ }, []string {
337+ "org" ,
338+ "repo" ,
339+ "branch" ,
340+ }),
341+ poolBatchPendingPRs : prometheus .NewGaugeVec (prometheus.GaugeOpts {
342+ Name : "tide_pool_batch_pending_prs" ,
343+ Help : "Number of PRs in a pending batch test in each pool." ,
344+ }, []string {
345+ "org" ,
346+ "repo" ,
347+ "branch" ,
348+ }),
283349 }
284350)
285351
@@ -292,6 +358,13 @@ func init() {
292358 prometheus .MustRegister (tideMetrics .syncHeartbeat )
293359 prometheus .MustRegister (tideMetrics .poolErrors )
294360 prometheus .MustRegister (tideMetrics .queryResults )
361+ prometheus .MustRegister (tideMetrics .retests )
362+ prometheus .MustRegister (tideMetrics .retestsByAction )
363+ prometheus .MustRegister (tideMetrics .batchFailures )
364+ prometheus .MustRegister (tideMetrics .poolMissingPRs )
365+ prometheus .MustRegister (tideMetrics .poolPendingPRs )
366+ prometheus .MustRegister (tideMetrics .poolSuccessfulPRs )
367+ prometheus .MustRegister (tideMetrics .poolBatchPendingPRs )
295368}
296369
297370type manager interface {
@@ -1705,8 +1778,22 @@ func (c *syncController) syncSubpool(sp subpool, blocks []blockers.Blocker) (Poo
17051778 "action" : string (act ),
17061779 "targets" : prNumbers (targets ),
17071780 }).Info ("Subpool synced." )
1781+
17081782 tideMetrics .pooledPRs .WithLabelValues (sp .org , sp .repo , sp .branch ).Set (float64 (len (sp .prs )))
17091783 tideMetrics .updateTime .WithLabelValues (sp .org , sp .repo , sp .branch ).Set (float64 (time .Now ().Unix ()))
1784+ tideMetrics .poolMissingPRs .WithLabelValues (sp .org , sp .repo , sp .branch ).Set (float64 (len (missings )))
1785+ tideMetrics .poolPendingPRs .WithLabelValues (sp .org , sp .repo , sp .branch ).Set (float64 (len (pendings )))
1786+ tideMetrics .poolSuccessfulPRs .WithLabelValues (sp .org , sp .repo , sp .branch ).Set (float64 (len (successes )))
1787+ tideMetrics .poolBatchPendingPRs .WithLabelValues (sp .org , sp .repo , sp .branch ).Set (float64 (len (batchPending )))
1788+ if act == Trigger || act == TriggerBatch {
1789+ tideMetrics .retests .WithLabelValues (sp .org , sp .repo , sp .branch ).Add (float64 (len (targets )))
1790+ tideMetrics .retestsByAction .WithLabelValues (sp .org , sp .repo , sp .branch , string (act )).Add (float64 (len (targets )))
1791+ }
1792+ if len (batchPending ) == 0 && len (missings ) > 0 && len (pendings ) == 0 {
1793+ if act == Trigger {
1794+ tideMetrics .batchFailures .WithLabelValues (sp .org , sp .repo , sp .branch ).Inc ()
1795+ }
1796+ }
17101797 return Pool {
17111798 Org : sp .org ,
17121799 Repo : sp .repo ,
0 commit comments