diff --git a/infra/k8s/monitoring/grafana/overlays/production/alerting/values.yaml b/infra/k8s/monitoring/grafana/overlays/production/alerting/values.yaml index ce93a5bc89..1e2da38f73 100644 --- a/infra/k8s/monitoring/grafana/overlays/production/alerting/values.yaml +++ b/infra/k8s/monitoring/grafana/overlays/production/alerting/values.yaml @@ -25,22 +25,28 @@ alerting: period: '300' region: ap-northeast-2 statistic: Average + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 2147483648 type: lt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: warning @@ -66,22 +72,28 @@ alerting: period: '300' region: ap-northeast-2 statistic: Maximum + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 150 type: gt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: warning @@ -107,22 +119,28 @@ alerting: expr: (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 intervalMs: 60000 maxDataPoints: 43200 + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 85 type: gt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: warning @@ -143,22 +161,28 @@ alerting: expr: (1 - node_filesystem_avail_bytes{mountpoint="/",fstype!="tmpfs"} / node_filesystem_size_bytes{mountpoint="/",fstype!="tmpfs"}) * 100 intervalMs: 60000 maxDataPoints: 43200 + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 85 type: gt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: warning @@ -179,22 +203,28 @@ alerting: expr: kube_node_status_condition{condition="Ready",status="true"} == 0 intervalMs: 60000 maxDataPoints: 43200 + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 0 type: gt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: critical @@ -220,22 +250,28 @@ alerting: expr: rabbitmq_queue_messages_ready intervalMs: 60000 maxDataPoints: 43200 + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 100 type: gt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: warning @@ -256,25 +292,31 @@ alerting: expr: rabbitmq_queue_messages_unacked intervalMs: 60000 maxDataPoints: 43200 + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 50 type: gt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: warning annotations: summary: 'RabbitMQ unacked messages > 50' - description: '큐 {{ `{{ $labels.queue }}` }}에 미확인 메시지 {{ `{{ $values.A }}` }}개' + description: '큐 {{ `{{ $labels.queue }}` }}에 미확인 메시지 {{ `{{ $values.A }}` }}개' \ No newline at end of file diff --git a/infra/k8s/monitoring/grafana/overlays/stage/alerting/values.yaml b/infra/k8s/monitoring/grafana/overlays/stage/alerting/values.yaml index b80c520ef0..5651d63583 100644 --- a/infra/k8s/monitoring/grafana/overlays/stage/alerting/values.yaml +++ b/infra/k8s/monitoring/grafana/overlays/stage/alerting/values.yaml @@ -21,22 +21,28 @@ alerting: expr: (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 intervalMs: 60000 maxDataPoints: 43200 + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 85 type: gt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: warning @@ -57,22 +63,28 @@ alerting: expr: (1 - node_filesystem_avail_bytes{mountpoint="/",fstype!="tmpfs"} / node_filesystem_size_bytes{mountpoint="/",fstype!="tmpfs"}) * 100 intervalMs: 60000 maxDataPoints: 43200 + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 85 type: gt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: warning @@ -93,22 +105,28 @@ alerting: expr: kube_node_status_condition{condition="Ready",status="true"} == 0 intervalMs: 60000 maxDataPoints: 43200 + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 0 type: gt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: critical @@ -134,22 +152,28 @@ alerting: expr: rabbitmq_queue_messages_ready intervalMs: 60000 maxDataPoints: 43200 + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 100 type: gt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: warning @@ -170,25 +194,31 @@ alerting: expr: rabbitmq_queue_messages_unacked intervalMs: 60000 maxDataPoints: 43200 + - refId: B + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + type: reduce + expression: A + reducer: last - refId: C relativeTimeRange: from: 600 to: 0 datasourceUid: __expr__ model: + type: threshold + expression: B conditions: - evaluator: params: - 50 type: gt - operator: - type: and - reducer: - type: last - type: classic_condition for: 5m labels: severity: warning annotations: summary: 'RabbitMQ unacked messages > 50' - description: '큐 {{ `{{ $labels.queue }}` }}에 미확인 메시지 {{ `{{ $values.A }}` }}개' + description: '큐 {{ `{{ $labels.queue }}` }}에 미확인 메시지 {{ `{{ $values.A }}` }}개' \ No newline at end of file