CrunchyData · keithf4 · Jun 30, 2025 · Jun 27, 2025
@@ -0,0 +1,4 @@
+---
+minor_changes:
+ - prometheus - Remove unnecessary absence alerts. The general ExporterDown metric can cover these scenarios
+ - prometheus - Moved the ExporterDown alert to its own common alerts file and have it be enabled by default (no .example extension on the file name)
@@ -0,0 +1,21 @@
+###
+#
+# Copyright © 2017-2025 Crunchy Data Solutions, Inc. All Rights Reserved.
+#
+###
+
+groups:
+- name: alert-rules
+  rules:
+
+########## COMMON RULES ##########
+  - alert: ExporterDown
+    expr: avg_over_time(up[5m]) < 0.5
+    for: 10s
+    labels:
+      service: system
+      severity: critical
+      severity_num: 300
+    annotations:
+      description: 'Metrics exporter service for {{ $labels.job }} running on {{ $labels.instance }} has been down at least 50% of the time for the last 5 minutes. Service may be flapping or down.'
+      summary: 'Prometheus Exporter Service Down'
@@ -56,36 +56,3 @@ groups:
 #      severity_num: 300
 #    annotations:
 #      description: 'The expected minimum count of etcd nodes was not found. Current count {{ $value }}'
-
-# Absence alerts must be configured per named job, otherwise there's no way to know which job is down
-# Below is are some examples using the leader metric for a targets called "etcd#" for a 3 node etcd cluster
-
-#  - alert: ETCDAbsent_etcd1
-#    expr: absent(etcd_server_has_leader{job="ip11_etcd1"})
-#    for: 10s
-#    labels:
-#      service: etcd
-#      severity: critical
-#      severity_num: 300
-#    annotations:
-#      description: 'Leader metric is absent from target {{ $labels.job }}. Check that etcd is running on target host.'
-
-#  - alert: ETCDAbsent_etcd2
-#    expr: absent(etcd_server_has_leader{job="ip21_etcd2"})
-#    for: 10s
-#    labels:
-#      service: etcd
-#      severity: critical
-#      severity_num: 300
-#    annotations:
-#      description: 'Leader metric is absent from target {{ $labels.job }}. Check that etcd is running on target host.'
-
-#  - alert: ETCDAbsent_etcd3
-#    expr: absent(etcd_server_has_leader{job="ip31_etcd3"})
-#    for: 10s
-#    labels:
-#      service: etcd
-#      severity: critical
-#      severity_num: 300
-#    annotations:
-#      description: 'Leader metric is absent from target {{ $labels.job }}. Check that etcd is running on target host.'
@@ -164,18 +164,6 @@ groups:
 #      summary: '{{ $labels.job }} has changed from replica to primary'
 
 
-## Absence alerts must be configured per named job, otherwise there's no way to know which job is down
-## Below is an example for a target job called "Prod"
-#  - alert: PGConnectionAbsent_Prod
-#    expr: absent(ccp_connection_stats_max_connections{job="Prod"})
-#    for: 10s
-#    labels:
-#      service: postgresql
-#      severity: critical
-#      severity_num: 300
-#    annotations:
-#      description: 'Connection metric is absent from target (Prod). Check that postgres_exporter can connect to PostgreSQL.'
-
 
 ## Optional monitor for changes to pg_settings (postgresql.conf) system catalog.
 ## A similar metric is available for monitoring pg_hba.conf. See ccp_hba_settings_checksum.