2025-09-10 15:34:13 +08:00
|
|
|
|
#!/bin/bash
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
|
|
|
|
|
echo "开始安装 Prometheus..."
|
|
|
|
|
|
sudo chown -R 65534:65534 /opt/prometheus/data
|
2025-09-16 10:51:23 +08:00
|
|
|
|
kubectl apply -f prometheus-configmap-basic.yaml
|
2025-09-10 15:34:13 +08:00
|
|
|
|
kubectl apply -f prometheus-rbac.yaml
|
|
|
|
|
|
kubectl apply -f prometheus-deployment.yaml
|
|
|
|
|
|
kubectl apply -f prometheus-service.yaml
|
|
|
|
|
|
kubectl apply -f prometheus-ingress.yaml
|
|
|
|
|
|
|
|
|
|
|
|
echo "等待 Prometheus 启动..."
|
|
|
|
|
|
kubectl wait --for=condition=available --timeout=300s deployment/prometheus
|
|
|
|
|
|
|
|
|
|
|
|
echo "Prometheus 安装完成!"
|
|
|
|
|
|
echo "查看状态: kubectl get pods -l app=prometheus"
|
|
|
|
|
|
|
2025-09-16 10:10:25 +08:00
|
|
|
|
echo ""
|
|
|
|
|
|
echo "=========================================="
|
|
|
|
|
|
echo "Prometheus 与 Alertmanager 集成配置"
|
|
|
|
|
|
echo "注意:此部分需要在安装完 Alertmanager 后执行"
|
|
|
|
|
|
echo "=========================================="
|
|
|
|
|
|
|
|
|
|
|
|
echo "应用告警规则配置..."
|
|
|
|
|
|
kubectl apply -f prometheus-rules-configmap.yaml
|
|
|
|
|
|
|
|
|
|
|
|
echo "更新 Prometheus 配置以集成 Alertmanager..."
|
|
|
|
|
|
kubectl apply -f prometheus-configmap.yaml
|
|
|
|
|
|
kubectl apply -f prometheus-deployment.yaml
|
|
|
|
|
|
|
|
|
|
|
|
echo "等待 Prometheus 重新启动..."
|
|
|
|
|
|
kubectl wait --for=condition=available --timeout=300s deployment/prometheus
|
|
|
|
|
|
|
|
|
|
|
|
echo "=========================================="
|
|
|
|
|
|
echo "Prometheus 与 Alertmanager 集成完成!"
|
|
|
|
|
|
echo "=========================================="
|
2025-09-16 10:51:23 +08:00
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "=========================================="
|
|
|
|
|
|
echo "K3s 监控组件安装"
|
|
|
|
|
|
echo "=========================================="
|
|
|
|
|
|
|
|
|
|
|
|
echo "检查 K3s 环境..."
|
|
|
|
|
|
kubectl get nodes
|
|
|
|
|
|
kubectl get pods -A | grep metrics
|
|
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "安装 kube-state-metrics (可选,提供更丰富的K8s对象指标)..."
|
|
|
|
|
|
kubectl apply -f kube-state-metrics-deployment.yaml
|
|
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "安装 node-exporter (可选,提供节点硬件指标)..."
|
|
|
|
|
|
kubectl apply -f node-exporter-daemonset.yaml
|
|
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "等待组件启动..."
|
|
|
|
|
|
kubectl wait --for=condition=available --timeout=300s deployment/kube-state-metrics 2>/dev/null || echo "kube-state-metrics 未安装或启动失败"
|
|
|
|
|
|
kubectl wait --for=condition=ready --timeout=300s pod -l app=node-exporter 2>/dev/null || echo "node-exporter 未安装或启动失败"
|
|
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "更新 Prometheus 配置以适配 K3s..."
|
|
|
|
|
|
kubectl apply -f prometheus-configmap.yaml
|
|
|
|
|
|
kubectl apply -f prometheus-deployment.yaml
|
|
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "等待 Prometheus 重新启动..."
|
|
|
|
|
|
kubectl wait --for=condition=available --timeout=300s deployment/prometheus
|
|
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "检查 Prometheus targets..."
|
|
|
|
|
|
sleep 10
|
|
|
|
|
|
kubectl exec -it $(kubectl get pods -l app=prometheus -o jsonpath="{.items[0].metadata.name}") -- wget -qO- http://localhost:9090/api/v1/targets | grep -o '"job":"[^"]*"' | sort | uniq
|
|
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "=========================================="
|
|
|
|
|
|
echo "K3s 监控组件安装完成!"
|
|
|
|
|
|
echo "=========================================="
|
|
|
|
|
|
echo "K3s 内置监控组件:"
|
|
|
|
|
|
echo "- metrics-server: 已存在"
|
|
|
|
|
|
echo "- kubelet metrics: 通过 API 代理访问"
|
|
|
|
|
|
echo "- cAdvisor: 通过 API 代理访问"
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "可选组件:"
|
|
|
|
|
|
echo "- kube-state-metrics: $(kubectl get pods -l app=kube-state-metrics --no-headers 2>/dev/null | wc -l) 个实例"
|
|
|
|
|
|
echo "- node-exporter: $(kubectl get pods -l app=node-exporter --no-headers 2>/dev/null | wc -l) 个实例"
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "访问地址:"
|
|
|
|
|
|
echo "- Prometheus: https://prom-ops.t-aaron.com/targets"
|
|
|
|
|
|
echo "- Grafana: https://grafana-ops.t-aaron.com"
|
|
|
|
|
|
echo "- Alertmanager: https://alertmanager-ops.t-aaron.com"
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "现在可以在 Grafana 中看到 K3s 容器监控数据了!"
|
2025-09-16 10:10:25 +08:00
|
|
|
|
echo "=========================================="
|
|
|
|
|
|
|
2025-09-10 15:34:13 +08:00
|
|
|
|
|