This commit is contained in:
孙小云 2025-09-16 10:51:23 +08:00
parent 33f210b602
commit b6072d51c7
5 changed files with 463 additions and 5 deletions

View File

@ -3,7 +3,7 @@ set -euo pipefail
echo "开始安装 Prometheus..."
sudo chown -R 65534:65534 /opt/prometheus/data
kubectl apply -f prometheus-configmap.yaml
kubectl apply -f prometheus-configmap-basic.yaml
kubectl apply -f prometheus-rbac.yaml
kubectl apply -f prometheus-deployment.yaml
kubectl apply -f prometheus-service.yaml
@ -34,9 +34,62 @@ kubectl wait --for=condition=available --timeout=300s deployment/prometheus
echo "=========================================="
echo "Prometheus 与 Alertmanager 集成完成!"
echo "=========================================="
echo "访问地址: https://prometheus-ops.t-aaron.com"
echo "告警管理: https://alertmanager-ops.t-aaron.com"
echo "已集成 Alertmanager告警规则已加载"
echo ""
echo "=========================================="
echo "K3s 监控组件安装"
echo "=========================================="
echo "检查 K3s 环境..."
kubectl get nodes
kubectl get pods -A | grep metrics
echo ""
echo "安装 kube-state-metrics (可选提供更丰富的K8s对象指标)..."
kubectl apply -f kube-state-metrics-deployment.yaml
echo ""
echo "安装 node-exporter (可选,提供节点硬件指标)..."
kubectl apply -f node-exporter-daemonset.yaml
echo ""
echo "等待组件启动..."
kubectl wait --for=condition=available --timeout=300s deployment/kube-state-metrics 2>/dev/null || echo "kube-state-metrics 未安装或启动失败"
kubectl wait --for=condition=ready --timeout=300s pod -l app=node-exporter 2>/dev/null || echo "node-exporter 未安装或启动失败"
echo ""
echo "更新 Prometheus 配置以适配 K3s..."
kubectl apply -f prometheus-configmap.yaml
kubectl apply -f prometheus-deployment.yaml
echo ""
echo "等待 Prometheus 重新启动..."
kubectl wait --for=condition=available --timeout=300s deployment/prometheus
echo ""
echo "检查 Prometheus targets..."
sleep 10
kubectl exec -it $(kubectl get pods -l app=prometheus -o jsonpath="{.items[0].metadata.name}") -- wget -qO- http://localhost:9090/api/v1/targets | grep -o '"job":"[^"]*"' | sort | uniq
echo ""
echo "=========================================="
echo "K3s 监控组件安装完成!"
echo "=========================================="
echo "K3s 内置监控组件:"
echo "- metrics-server: 已存在"
echo "- kubelet metrics: 通过 API 代理访问"
echo "- cAdvisor: 通过 API 代理访问"
echo ""
echo "可选组件:"
echo "- kube-state-metrics: $(kubectl get pods -l app=kube-state-metrics --no-headers 2>/dev/null | wc -l) 个实例"
echo "- node-exporter: $(kubectl get pods -l app=node-exporter --no-headers 2>/dev/null | wc -l) 个实例"
echo ""
echo "访问地址:"
echo "- Prometheus: https://prom-ops.t-aaron.com/targets"
echo "- Grafana: https://grafana-ops.t-aaron.com"
echo "- Alertmanager: https://alertmanager-ops.t-aaron.com"
echo ""
echo "现在可以在 Grafana 中看到 K3s 容器监控数据了!"
echo "=========================================="

View File

@ -0,0 +1,114 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: kube-state-metrics
namespace: default
labels:
app: kube-state-metrics
spec:
replicas: 1
selector:
matchLabels:
app: kube-state-metrics
template:
metadata:
labels:
app: kube-state-metrics
spec:
serviceAccountName: kube-state-metrics
containers:
- name: kube-state-metrics
image: registry.t-aaron.com/k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.5.0
ports:
- containerPort: 8080
- containerPort: 8081
resources:
requests:
memory: "64Mi"
cpu: "50m"
limits:
memory: "128Mi"
cpu: "100m"
---
apiVersion: v1
kind: Service
metadata:
name: kube-state-metrics
namespace: default
labels:
app: kube-state-metrics
spec:
selector:
app: kube-state-metrics
ports:
- name: http-metrics
port: 8080
targetPort: 8080
- name: telemetry
port: 8081
targetPort: 8081
type: ClusterIP
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-state-metrics
namespace: default
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kube-state-metrics
rules:
- apiGroups: [""]
resources: ["nodes", "pods", "services", "resourcequotas", "replicationcontrollers", "limitranges", "persistentvolumeclaims", "persistentvolumes", "namespaces", "endpoints"]
verbs: ["list", "watch"]
- apiGroups: ["extensions"]
resources: ["daemonsets", "deployments", "replicasets", "ingresses"]
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources: ["statefulsets", "daemonsets", "deployments", "replicasets"]
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources: ["cronjobs", "jobs"]
verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
resources: ["horizontalpodautoscalers"]
verbs: ["list", "watch"]
- apiGroups: ["authentication.k8s.io"]
resources: ["tokenreviews"]
verbs: ["create"]
- apiGroups: ["authorization.k8s.io"]
resources: ["subjectaccessreviews"]
verbs: ["create"]
- apiGroups: ["policy"]
resources: ["poddisruptionbudgets"]
verbs: ["list", "watch"]
- apiGroups: ["certificates.k8s.io"]
resources: ["certificatesigningrequests"]
verbs: ["list", "watch"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "volumeattachments"]
verbs: ["list", "watch"]
- apiGroups: ["admissionregistration.k8s.io"]
resources: ["mutatingwebhookconfigurations", "validatingwebhookconfigurations"]
verbs: ["list", "watch"]
- apiGroups: ["networking.k8s.io"]
resources: ["networkpolicies", "ingressclasses"]
verbs: ["list", "watch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kube-state-metrics
subjects:
- kind: ServiceAccount
name: kube-state-metrics
namespace: default

View File

@ -0,0 +1,69 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: default
labels:
app: node-exporter
spec:
selector:
matchLabels:
app: node-exporter
template:
metadata:
labels:
app: node-exporter
spec:
hostNetwork: true
hostPID: true
containers:
- name: node-exporter
image: registry.t-aaron.com/prom/node-exporter:latest
ports:
- containerPort: 9100
hostPort: 9100
name: metrics
args:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+)($|/)
- --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
resources:
requests:
memory: "64Mi"
cpu: "50m"
limits:
memory: "128Mi"
cpu: "100m"
volumeMounts:
- name: proc
mountPath: /host/proc
readOnly: true
- name: sys
mountPath: /host/sys
readOnly: true
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
tolerations:
- operator: Exists
---
apiVersion: v1
kind: Service
metadata:
name: node-exporter
namespace: default
labels:
app: node-exporter
spec:
selector:
app: node-exporter
ports:
- name: metrics
port: 9100
targetPort: 9100
type: ClusterIP

View File

@ -0,0 +1,123 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: default
data:
prometheus.yml: |
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "/etc/prometheus/rules/*.yml"
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# K3s API Server metrics
- job_name: 'k3s-apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
# K3s kubelet metrics (通过API代理访问)
- job_name: 'k3s-kubelet'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
# K3s cAdvisor metrics (容器指标)
- job_name: 'k3s-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
# K3s service endpoints
- job_name: 'k3s-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
# K3s pods
- job_name: 'k3s-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name

View File

@ -22,7 +22,88 @@ data:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'kubernetes-pods'
# K3s API Server metrics
- job_name: 'k3s-apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
# K3s kubelet metrics (通过API代理访问)
- job_name: 'k3s-kubelet'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
# K3s cAdvisor metrics (容器指标)
- job_name: 'k3s-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
# K3s service endpoints
- job_name: 'k3s-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
# K3s pods
- job_name: 'k3s-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
@ -38,5 +119,23 @@ data:
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
# kube-state-metrics (如果安装了)
- job_name: 'kube-state-metrics'
static_configs:
- targets: ['kube-state-metrics:8080']
# node-exporter (如果安装了)
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']