#!/bin/bash set -euo pipefail echo "开始安装 Prometheus..." sudo chown -R 65534:65534 /opt/prometheus/data kubectl apply -f prometheus-configmap-basic.yaml kubectl apply -f prometheus-rbac.yaml kubectl apply -f prometheus-deployment.yaml kubectl apply -f prometheus-service.yaml kubectl apply -f prometheus-ingress.yaml echo "等待 Prometheus 启动..." kubectl wait --for=condition=available --timeout=300s deployment/prometheus echo "Prometheus 安装完成!" echo "查看状态: kubectl get pods -l app=prometheus" echo "" echo "==========================================" echo "Prometheus 与 Alertmanager 集成配置" echo "注意:此部分需要在安装完 Alertmanager 后执行" echo "==========================================" echo "应用告警规则配置..." kubectl apply -f prometheus-rules-configmap.yaml echo "更新 Prometheus 配置以集成 Alertmanager..." kubectl apply -f prometheus-configmap.yaml kubectl apply -f prometheus-deployment.yaml echo "等待 Prometheus 重新启动..." kubectl wait --for=condition=available --timeout=300s deployment/prometheus echo "==========================================" echo "Prometheus 与 Alertmanager 集成完成!" echo "==========================================" echo "" echo "==========================================" echo "K3s 监控组件安装" echo "==========================================" echo "检查 K3s 环境..." kubectl get nodes kubectl get pods -A | grep metrics echo "" echo "安装 kube-state-metrics (可选,提供更丰富的K8s对象指标)..." kubectl apply -f kube-state-metrics-deployment.yaml echo "" echo "安装 node-exporter (可选,提供节点硬件指标)..." kubectl apply -f node-exporter-daemonset.yaml echo "" echo "等待组件启动..." kubectl wait --for=condition=available --timeout=300s deployment/kube-state-metrics 2>/dev/null || echo "kube-state-metrics 未安装或启动失败" kubectl wait --for=condition=ready --timeout=300s pod -l app=node-exporter 2>/dev/null || echo "node-exporter 未安装或启动失败" echo "" echo "更新 Prometheus 配置以适配 K3s..." kubectl apply -f prometheus-configmap.yaml kubectl apply -f prometheus-deployment.yaml echo "" echo "等待 Prometheus 重新启动..." kubectl wait --for=condition=available --timeout=300s deployment/prometheus echo "" echo "检查 Prometheus targets..." sleep 10 kubectl exec -it $(kubectl get pods -l app=prometheus -o jsonpath="{.items[0].metadata.name}") -- wget -qO- http://localhost:9090/api/v1/targets | grep -o '"job":"[^"]*"' | sort | uniq echo "" echo "==========================================" echo "K3s 监控组件安装完成!" echo "==========================================" echo "K3s 内置监控组件:" echo "- metrics-server: 已存在" echo "- kubelet metrics: 通过 API 代理访问" echo "- cAdvisor: 通过 API 代理访问" echo "" echo "可选组件:" echo "- kube-state-metrics: $(kubectl get pods -l app=kube-state-metrics --no-headers 2>/dev/null | wc -l) 个实例" echo "- node-exporter: $(kubectl get pods -l app=node-exporter --no-headers 2>/dev/null | wc -l) 个实例" echo "" echo "访问地址:" echo "- Prometheus: https://prom-ops.t-aaron.com/targets" echo "- Grafana: https://grafana-ops.t-aaron.com" echo "- Alertmanager: https://alertmanager-ops.t-aaron.com" echo "" echo "现在可以在 Grafana 中看到 K3s 容器监控数据了!" echo "=========================================="