devops/prometheus/aserver/aserver-offline-alert.yaml

23 lines
674 B
YAML

apiVersion: v1
kind: ConfigMap
metadata:
name: aserver-offline-alert
namespace: default
data:
aserver-offline-alert.yml: |
groups:
- name: aserver-offline-alerts
rules:
- alert: AServerOffline
expr: up{kubernetes_pod_name=~"aserver.*"} == 0
for: 1m
labels:
severity: critical
service: aserver
alert_type: offline
annotations:
summary: "AServer 服务离线"
description: "AServer Pod {{ $labels.kubernetes_pod_name }} 已离线超过1分钟"
runbook_url: "https://docs.example.com/aserver-troubleshooting"
action: "请检查 Pod 状态和日志"