devops/prometheus/install-prometheus.sh

96 lines
3.4 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -euo pipefail
echo "开始安装 Prometheus..."
sudo chown -R 65534:65534 /opt/prometheus/data
kubectl apply -f prometheus-configmap-basic.yaml
kubectl apply -f prometheus-rbac.yaml
kubectl apply -f prometheus-deployment.yaml
kubectl apply -f prometheus-service.yaml
kubectl apply -f prometheus-ingress.yaml
echo "等待 Prometheus 启动..."
kubectl wait --for=condition=available --timeout=300s deployment/prometheus
echo "Prometheus 安装完成!"
echo "查看状态: kubectl get pods -l app=prometheus"
echo ""
echo "=========================================="
echo "Prometheus 与 Alertmanager 集成配置"
echo "注意:此部分需要在安装完 Alertmanager 后执行"
echo "=========================================="
echo "应用告警规则配置..."
kubectl apply -f prometheus-rules-configmap.yaml
echo "更新 Prometheus 配置以集成 Alertmanager..."
kubectl apply -f prometheus-configmap.yaml
kubectl apply -f prometheus-deployment.yaml
echo "等待 Prometheus 重新启动..."
kubectl wait --for=condition=available --timeout=300s deployment/prometheus
echo "=========================================="
echo "Prometheus 与 Alertmanager 集成完成!"
echo "=========================================="
echo ""
echo "=========================================="
echo "K3s 监控组件安装"
echo "=========================================="
echo "检查 K3s 环境..."
kubectl get nodes
kubectl get pods -A | grep metrics
echo ""
echo "安装 kube-state-metrics (可选提供更丰富的K8s对象指标)..."
kubectl apply -f kube-state-metrics-deployment.yaml
echo ""
echo "安装 node-exporter (可选,提供节点硬件指标)..."
kubectl apply -f node-exporter-daemonset.yaml
echo ""
echo "等待组件启动..."
kubectl wait --for=condition=available --timeout=300s deployment/kube-state-metrics 2>/dev/null || echo "kube-state-metrics 未安装或启动失败"
kubectl wait --for=condition=ready --timeout=300s pod -l app=node-exporter 2>/dev/null || echo "node-exporter 未安装或启动失败"
echo ""
echo "更新 Prometheus 配置以适配 K3s..."
kubectl apply -f prometheus-configmap.yaml
kubectl apply -f prometheus-deployment.yaml
echo ""
echo "等待 Prometheus 重新启动..."
kubectl wait --for=condition=available --timeout=300s deployment/prometheus
echo ""
echo "检查 Prometheus targets..."
sleep 10
kubectl exec -it $(kubectl get pods -l app=prometheus -o jsonpath="{.items[0].metadata.name}") -- wget -qO- http://localhost:9090/api/v1/targets | grep -o '"job":"[^"]*"' | sort | uniq
echo ""
echo "=========================================="
echo "K3s 监控组件安装完成!"
echo "=========================================="
echo "K3s 内置监控组件:"
echo "- metrics-server: 已存在"
echo "- kubelet metrics: 通过 API 代理访问"
echo "- cAdvisor: 通过 API 代理访问"
echo ""
echo "可选组件:"
echo "- kube-state-metrics: $(kubectl get pods -l app=kube-state-metrics --no-headers 2>/dev/null | wc -l) 个实例"
echo "- node-exporter: $(kubectl get pods -l app=node-exporter --no-headers 2>/dev/null | wc -l) 个实例"
echo ""
echo "访问地址:"
echo "- Prometheus: https://prom-ops.t-aaron.com/targets"
echo "- Grafana: https://grafana-ops.t-aaron.com"
echo "- Alertmanager: https://alertmanager-ops.t-aaron.com"
echo ""
echo "现在可以在 Grafana 中看到 K3s 容器监控数据了!"
echo "=========================================="