Skip to content

Commit 90bc7f3

Browse files
committed
[AUTO-COMMIT] Backup grafana dashboards
Files changed: M grafana/prod/dashboards/Ai Monitoring/ai-orchestrator-summary.json M grafana/prod/dashboards/Ai Monitoring/ai-overview-livestream.json M grafana/prod/dashboards/Ai Monitoring/ai-public-os.json M grafana/prod/dashboards/catalyst-multi-node.json M grafana/prod/dashboards/external-dns.json
1 parent 892a6a0 commit 90bc7f3

File tree

5 files changed

+54
-88
lines changed

5 files changed

+54
-88
lines changed

grafana/prod/dashboards/Ai Monitoring/ai-orchestrator-summary.json

Lines changed: 8 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -35,31 +35,6 @@
3535
}
3636
],
3737
"panels": [
38-
{
39-
"fieldConfig": {
40-
"defaults": {},
41-
"overrides": []
42-
},
43-
"gridPos": {
44-
"h": 4,
45-
"w": 24,
46-
"x": 0,
47-
"y": 0
48-
},
49-
"id": 5,
50-
"options": {
51-
"code": {
52-
"language": "plaintext",
53-
"showLineNumbers": false,
54-
"showMiniMap": false
55-
},
56-
"content": "## Please don't change the interval/refresh settings.\n\nThe dahsboard does not rely on either the\ntime interval or the auto-refresh settings.\n\nIt gets generated everytime you visit the dashboard and\ndoesn't store historical data anywhere.",
57-
"mode": "markdown"
58-
},
59-
"pluginVersion": "12.1.1",
60-
"title": "",
61-
"type": "text"
62-
},
6338
{
6439
"datasource": {
6540
"type": "yesoreyeram-infinity-datasource",
@@ -99,7 +74,7 @@
9974
"h": 8,
10075
"w": 4,
10176
"x": 0,
102-
"y": 4
77+
"y": 0
10378
},
10479
"id": 3,
10580
"options": {
@@ -188,7 +163,7 @@
188163
"h": 8,
189164
"w": 12,
190165
"x": 6,
191-
"y": 4
166+
"y": 0
192167
},
193168
"id": 2,
194169
"options": {
@@ -270,7 +245,7 @@
270245
"h": 8,
271246
"w": 4,
272247
"x": 20,
273-
"y": 4
248+
"y": 0
274249
},
275250
"id": 4,
276251
"options": {
@@ -358,7 +333,7 @@
358333
"h": 10,
359334
"w": 24,
360335
"x": 0,
361-
"y": 12
336+
"y": 8
362337
},
363338
"id": 1,
364339
"options": {
@@ -419,9 +394,11 @@
419394
"from": "now-30m",
420395
"to": "now"
421396
},
422-
"timepicker": {},
397+
"timepicker": {
398+
"hidden": true
399+
},
423400
"timezone": "utc",
424401
"title": "AI Orchestrator summary",
425402
"uid": "7f2d281c-ef2e-410e-9f96-eda535967ba9",
426-
"version": 4
403+
"version": 5
427404
}

grafana/prod/dashboards/Ai Monitoring/ai-overview-livestream.json

Lines changed: 31 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@
128128
},
129129
"direction": "backward",
130130
"editorMode": "code",
131-
"expr": "100 * (\n sum (count_over_time({app=~\".*prod-ai-load-test.*\"} |= \"PASS: TestFeatures/Start_a_Stream\" [10m]))\n) / (\n sum (count_over_time({app=~\".*prod-ai-load-test.*\"} |= \"PASS: TestFeatures/Start_a_Stream\" [10m])) +\n (sum (count_over_time({app=~\".*prod-ai-load-test.*\"} |= \"FAIL: TestFeatures/Start_a_Stream\" [10m])) or vector(0))\n)",
131+
"expr": "100 * (\n sum (count_over_time({app=~\".*prod-ai-load-test.*\"} |= \"PASS: TestFeatures/\" [10m]))\n) / (\n sum (count_over_time({app=~\".*prod-ai-load-test.*\"} |= \"PASS: TestFeatures/\" [10m])) +\n (sum (count_over_time({app=~\".*prod-ai-load-test.*\"} |= \"FAIL: TestFeatures/\" [10m])) or vector(0))\n)",
132132
"queryType": "range",
133133
"refId": "A"
134134
}
@@ -228,7 +228,7 @@
228228
},
229229
"direction": "backward",
230230
"editorMode": "code",
231-
"expr": "100 * (\n sum (count_over_time({app=~\".*staging-ai-load-test.*\"} |= \"PASS: TestFeatures/Start_a_Stream\" [10m]))\n) / (\n sum (count_over_time({app=~\".*staging-ai-load-test.*\"} |= \"PASS: TestFeatures/Start_a_Stream\" [10m])) +\n (sum (count_over_time({app=~\".*staging-ai-load-test.*\"} |= \"FAIL: TestFeatures/Start_a_Stream\" [10m])) or vector(0))\n)\n",
231+
"expr": "100 * (\n sum (count_over_time({app=~\".*staging-ai-load-test.*\"} |= \"PASS: TestFeatures/\" [10m]))\n) / (\n sum (count_over_time({app=~\".*staging-ai-load-test.*\"} |= \"PASS: TestFeatures/\" [10m])) +\n (sum (count_over_time({app=~\".*staging-ai-load-test.*\"} |= \"FAIL: TestFeatures/\" [10m])) or vector(0))\n)\n",
232232
"queryType": "range",
233233
"refId": "A"
234234
}
@@ -596,6 +596,7 @@
596596
"axisColorMode": "text",
597597
"axisLabel": "",
598598
"axisPlacement": "auto",
599+
"axisSoftMin": 0,
599600
"barAlignment": 0,
600601
"barWidthFactor": 0.6,
601602
"drawStyle": "bars",
@@ -639,33 +640,7 @@
639640
},
640641
"unit": "GPUs"
641642
},
642-
"overrides": [
643-
{
644-
"__systemRef": "hideSeriesFrom",
645-
"matcher": {
646-
"id": "byNames",
647-
"options": {
648-
"mode": "exclude",
649-
"names": [
650-
" ai3.ad-astra.live:9988 (streamdiffusion-sdxl)",
651-
" ai3.ad-astra.live:9988 (streamdiffusion-sdxl-faceid)"
652-
],
653-
"prefix": "All except:",
654-
"readOnly": true
655-
}
656-
},
657-
"properties": [
658-
{
659-
"id": "custom.hideFrom",
660-
"value": {
661-
"legend": false,
662-
"tooltip": false,
663-
"viz": true
664-
}
665-
}
666-
]
667-
}
668-
]
643+
"overrides": []
669644
},
670645
"gridPos": {
671646
"h": 8,
@@ -835,7 +810,7 @@
835810
"axisSoftMin": 0,
836811
"barAlignment": 0,
837812
"barWidthFactor": 0.6,
838-
"drawStyle": "line",
813+
"drawStyle": "bars",
839814
"fillOpacity": 0,
840815
"gradientMode": "none",
841816
"hideFrom": {
@@ -854,7 +829,7 @@
854829
"spanNulls": false,
855830
"stacking": {
856831
"group": "A",
857-
"mode": "none"
832+
"mode": "normal"
858833
},
859834
"thresholdsStyle": {
860835
"mode": "off"
@@ -887,13 +862,12 @@
887862
"options": {
888863
"legend": {
889864
"calcs": [
890-
"p99",
891865
"lastNotNull"
892866
],
893867
"displayMode": "table",
894868
"placement": "right",
895869
"showLegend": true,
896-
"sortBy": "99th %",
870+
"sortBy": "Last *",
897871
"sortDesc": true
898872
},
899873
"tooltip": {
@@ -907,7 +881,7 @@
907881
{
908882
"disableTextWrap": false,
909883
"editorMode": "code",
910-
"expr": "sum by(region) (max by(region, node_id, pipeline) (livepeer_ai_current_live_pipelines{livepeer_node_type=~\"prod-livepeer-ai-gateway.*\", region=~\"${gateway:pipe}\"}))",
884+
"expr": "sum by(region) (max by(region, node_id, pipeline) (livepeer_ai_current_live_pipelines{livepeer_node_type=~\".*-livepeer-ai-gateway.*\", region=~\"${gateway:pipe}\"}))",
911885
"fullMetaSearch": false,
912886
"includeNullMetadata": true,
913887
"legendFormat": "__auto",
@@ -1044,7 +1018,7 @@
10441018
"axisSoftMin": 0,
10451019
"barAlignment": 0,
10461020
"barWidthFactor": 0.6,
1047-
"drawStyle": "line",
1021+
"drawStyle": "bars",
10481022
"fillOpacity": 0,
10491023
"gradientMode": "none",
10501024
"hideFrom": {
@@ -1063,7 +1037,7 @@
10631037
"spanNulls": false,
10641038
"stacking": {
10651039
"group": "A",
1066-
"mode": "none"
1040+
"mode": "normal"
10671041
},
10681042
"thresholdsStyle": {
10691043
"mode": "off"
@@ -1096,13 +1070,12 @@
10961070
"options": {
10971071
"legend": {
10981072
"calcs": [
1099-
"p99",
11001073
"lastNotNull"
11011074
],
11021075
"displayMode": "table",
11031076
"placement": "right",
11041077
"showLegend": true,
1105-
"sortBy": "99th %",
1078+
"sortBy": "Last *",
11061079
"sortDesc": true
11071080
},
11081081
"tooltip": {
@@ -1116,7 +1089,7 @@
11161089
{
11171090
"disableTextWrap": false,
11181091
"editorMode": "code",
1119-
"expr": "sum by(pipeline) (max by(pipeline, region, node_id) (livepeer_ai_current_live_pipelines{livepeer_node_type=~\".*-livepeer-ai-gateway.*\"}))\n\n",
1092+
"expr": "sum by(pipeline) (max by(region, node_id, pipeline) (livepeer_ai_current_live_pipelines{livepeer_node_type=~\".*-livepeer-ai-gateway.*\", region=~\"${gateway:pipe}\"}))\n\n",
11201093
"fullMetaSearch": false,
11211094
"includeNullMetadata": true,
11221095
"legendFormat": "__auto",
@@ -1199,7 +1172,10 @@
11991172
"interval": "1m",
12001173
"options": {
12011174
"legend": {
1202-
"calcs": [],
1175+
"calcs": [
1176+
"last",
1177+
"max"
1178+
],
12031179
"displayMode": "list",
12041180
"placement": "right",
12051181
"showLegend": true
@@ -1237,6 +1213,7 @@
12371213
"type": "prometheus",
12381214
"uid": "PBFA97CFB590B2093"
12391215
},
1216+
"description": "orchestrator-based metric",
12401217
"fieldConfig": {
12411218
"defaults": {
12421219
"color": {
@@ -1306,7 +1283,9 @@
13061283
],
13071284
"displayMode": "table",
13081285
"placement": "right",
1309-
"showLegend": true
1286+
"showLegend": true,
1287+
"sortBy": "Last *",
1288+
"sortDesc": true
13101289
},
13111290
"tooltip": {
13121291
"hideZeros": false,
@@ -1317,11 +1296,15 @@
13171296
"pluginVersion": "12.1.1",
13181297
"targets": [
13191298
{
1320-
"editorMode": "code",
1321-
"expr": "sum by(model_name) (livepeer_ai_container_in_use{model_name=~\"${model:pipe}\"}[1h])",
1299+
"disableTextWrap": false,
1300+
"editorMode": "builder",
1301+
"expr": "sum by(model_name) (livepeer_ai_container_in_use{model_name=~\"${model:pipe}\"})",
1302+
"fullMetaSearch": false,
1303+
"includeNullMetadata": true,
13221304
"legendFormat": "{{label_name}}",
13231305
"range": true,
1324-
"refId": "A"
1306+
"refId": "A",
1307+
"useBackend": false
13251308
}
13261309
],
13271310
"title": "First-party In Use Runners",
@@ -3362,14 +3345,14 @@
33623345
"options": {
33633346
"legend": {
33643347
"calcs": [
3365-
"median",
3348+
"mean",
33663349
"max"
33673350
],
33683351
"displayMode": "table",
33693352
"placement": "right",
33703353
"showLegend": true,
3371-
"sortBy": "Median",
3372-
"sortDesc": true
3354+
"sortBy": "Mean",
3355+
"sortDesc": false
33733356
},
33743357
"tooltip": {
33753358
"hideZeros": false,
@@ -4053,5 +4036,5 @@
40534036
"timezone": "utc",
40544037
"title": "AI Overview (Livestream)",
40554038
"uid": "be6llteqebk00b",
4056-
"version": 203
4039+
"version": 217
40574040
}

grafana/prod/dashboards/Ai Monitoring/ai-public-os.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,12 +1980,12 @@
19801980
"options": {
19811981
"legend": {
19821982
"calcs": [
1983-
"p99"
1983+
"lastNotNull"
19841984
],
19851985
"displayMode": "table",
19861986
"placement": "right",
19871987
"showLegend": true,
1988-
"sortBy": "99th %",
1988+
"sortBy": "Last *",
19891989
"sortDesc": true
19901990
},
19911991
"tooltip": {
@@ -1999,7 +1999,7 @@
19991999
{
20002000
"disableTextWrap": false,
20012001
"editorMode": "builder",
2002-
"expr": "sum by(pipeline) (max by(node_id, pipeline) (livepeer_ai_current_live_pipelines{livepeer_node_type=~\".*-livepeer-ai-gateway.*\", region=~\".*-ai\"}))",
2002+
"expr": "sum by(pipeline) (max by(node_id, pipeline, region) (livepeer_ai_current_live_pipelines{livepeer_node_type=~\".*-livepeer-ai-gateway.*\"}))",
20032003
"fullMetaSearch": false,
20042004
"includeNullMetadata": true,
20052005
"legendFormat": "__auto",
@@ -2251,5 +2251,5 @@
22512251
"timezone": "utc",
22522252
"title": "AI Public Os",
22532253
"uid": "45346781-4b93-4865-9b10-ac4fc3ab0334",
2254-
"version": 104
2254+
"version": 108
22552255
}

grafana/prod/dashboards/catalyst-multi-node.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1616,6 +1616,7 @@
16161616
"axisColorMode": "text",
16171617
"axisLabel": "",
16181618
"axisPlacement": "auto",
1619+
"axisSoftMin": 0,
16191620
"barAlignment": 0,
16201621
"barWidthFactor": 0.6,
16211622
"drawStyle": "line",
@@ -3382,5 +3383,5 @@
33823383
"timezone": "utc",
33833384
"title": "Catalyst Multi-Node",
33843385
"uid": "EWXkaj7Vz",
3385-
"version": 17
3386+
"version": 18
33863387
}

grafana/prod/dashboards/external-dns.json

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@
434434
"y": 18
435435
},
436436
"id": 37,
437+
"interval": "1m",
437438
"options": {
438439
"minVizHeight": 75,
439440
"minVizWidth": 75,
@@ -455,13 +456,17 @@
455456
"datasource": {
456457
"uid": "$datasource"
457458
},
459+
"disableTextWrap": false,
458460
"editorMode": "code",
459461
"exemplar": true,
460-
"expr": "external_dns_controller_no_op_runs_total {pod=~\".*external-dns.*\",pod=~\"$pod\"}",
462+
"expr": "avg by(region) (external_dns_controller_no_op_runs_total{pod=~\".*external-dns.*\", pod=~\"$pod\", region=~\"$region\"})",
463+
"fullMetaSearch": false,
464+
"includeNullMetadata": true,
461465
"interval": "",
462-
"legendFormat": "{{pod}}",
466+
"legendFormat": "{{region}}",
463467
"range": true,
464-
"refId": "A"
468+
"refId": "A",
469+
"useBackend": false
465470
}
466471
],
467472
"title": "External-dns controller no_op runs total",
@@ -2272,5 +2277,5 @@
22722277
"timezone": "utc",
22732278
"title": "External DNS",
22742279
"uid": "eea5u_I7z",
2275-
"version": 6
2280+
"version": 8
22762281
}

0 commit comments

Comments
 (0)