diff --git a/prometheus/install-prometheus.sh b/prometheus/install-prometheus.sh index 120b77a..bbf1c51 100644 --- a/prometheus/install-prometheus.sh +++ b/prometheus/install-prometheus.sh @@ -3,7 +3,7 @@ set -euo pipefail echo "开始安装 Prometheus..." sudo chown -R 65534:65534 /opt/prometheus/data -kubectl apply -f prometheus-configmap.yaml +kubectl apply -f prometheus-configmap-basic.yaml kubectl apply -f prometheus-rbac.yaml kubectl apply -f prometheus-deployment.yaml kubectl apply -f prometheus-service.yaml @@ -34,9 +34,62 @@ kubectl wait --for=condition=available --timeout=300s deployment/prometheus echo "==========================================" echo "Prometheus 与 Alertmanager 集成完成!" echo "==========================================" -echo "访问地址: https://prometheus-ops.t-aaron.com" -echo "告警管理: https://alertmanager-ops.t-aaron.com" -echo "已集成 Alertmanager,告警规则已加载" + +echo "" +echo "==========================================" +echo "K3s 监控组件安装" +echo "==========================================" + +echo "检查 K3s 环境..." +kubectl get nodes +kubectl get pods -A | grep metrics + +echo "" +echo "安装 kube-state-metrics (可选,提供更丰富的K8s对象指标)..." +kubectl apply -f kube-state-metrics-deployment.yaml + +echo "" +echo "安装 node-exporter (可选,提供节点硬件指标)..." +kubectl apply -f node-exporter-daemonset.yaml + +echo "" +echo "等待组件启动..." +kubectl wait --for=condition=available --timeout=300s deployment/kube-state-metrics 2>/dev/null || echo "kube-state-metrics 未安装或启动失败" +kubectl wait --for=condition=ready --timeout=300s pod -l app=node-exporter 2>/dev/null || echo "node-exporter 未安装或启动失败" + +echo "" +echo "更新 Prometheus 配置以适配 K3s..." +kubectl apply -f prometheus-configmap.yaml +kubectl apply -f prometheus-deployment.yaml + +echo "" +echo "等待 Prometheus 重新启动..." +kubectl wait --for=condition=available --timeout=300s deployment/prometheus + +echo "" +echo "检查 Prometheus targets..." +sleep 10 +kubectl exec -it $(kubectl get pods -l app=prometheus -o jsonpath="{.items[0].metadata.name}") -- wget -qO- http://localhost:9090/api/v1/targets | grep -o '"job":"[^"]*"' | sort | uniq + +echo "" +echo "==========================================" +echo "K3s 监控组件安装完成!" +echo "==========================================" +echo "K3s 内置监控组件:" +echo "- metrics-server: 已存在" +echo "- kubelet metrics: 通过 API 代理访问" +echo "- cAdvisor: 通过 API 代理访问" +echo "" +echo "可选组件:" +echo "- kube-state-metrics: $(kubectl get pods -l app=kube-state-metrics --no-headers 2>/dev/null | wc -l) 个实例" +echo "- node-exporter: $(kubectl get pods -l app=node-exporter --no-headers 2>/dev/null | wc -l) 个实例" +echo "" +echo "访问地址:" +echo "- Prometheus: https://prom-ops.t-aaron.com/targets" +echo "- Grafana: https://grafana-ops.t-aaron.com" +echo "- Alertmanager: https://alertmanager-ops.t-aaron.com" +echo "" +echo "现在可以在 Grafana 中看到 K3s 容器监控数据了!" echo "==========================================" diff --git a/prometheus/kube-state-metrics-deployment.yaml b/prometheus/kube-state-metrics-deployment.yaml new file mode 100644 index 0000000..59127eb --- /dev/null +++ b/prometheus/kube-state-metrics-deployment.yaml @@ -0,0 +1,114 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-state-metrics + namespace: default + labels: + app: kube-state-metrics +spec: + replicas: 1 + selector: + matchLabels: + app: kube-state-metrics + template: + metadata: + labels: + app: kube-state-metrics + spec: + serviceAccountName: kube-state-metrics + containers: + - name: kube-state-metrics + image: registry.t-aaron.com/k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.5.0 + ports: + - containerPort: 8080 + - containerPort: 8081 + resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "128Mi" + cpu: "100m" +--- +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics + namespace: default + labels: + app: kube-state-metrics +spec: + selector: + app: kube-state-metrics + ports: + - name: http-metrics + port: 8080 + targetPort: 8080 + - name: telemetry + port: 8081 + targetPort: 8081 + type: ClusterIP +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kube-state-metrics +rules: +- apiGroups: [""] + resources: ["nodes", "pods", "services", "resourcequotas", "replicationcontrollers", "limitranges", "persistentvolumeclaims", "persistentvolumes", "namespaces", "endpoints"] + verbs: ["list", "watch"] +- apiGroups: ["extensions"] + resources: ["daemonsets", "deployments", "replicasets", "ingresses"] + verbs: ["list", "watch"] +- apiGroups: ["apps"] + resources: ["statefulsets", "daemonsets", "deployments", "replicasets"] + verbs: ["list", "watch"] +- apiGroups: ["batch"] + resources: ["cronjobs", "jobs"] + verbs: ["list", "watch"] +- apiGroups: ["autoscaling"] + resources: ["horizontalpodautoscalers"] + verbs: ["list", "watch"] +- apiGroups: ["authentication.k8s.io"] + resources: ["tokenreviews"] + verbs: ["create"] +- apiGroups: ["authorization.k8s.io"] + resources: ["subjectaccessreviews"] + verbs: ["create"] +- apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["list", "watch"] +- apiGroups: ["certificates.k8s.io"] + resources: ["certificatesigningrequests"] + verbs: ["list", "watch"] +- apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "volumeattachments"] + verbs: ["list", "watch"] +- apiGroups: ["admissionregistration.k8s.io"] + resources: ["mutatingwebhookconfigurations", "validatingwebhookconfigurations"] + verbs: ["list", "watch"] +- apiGroups: ["networking.k8s.io"] + resources: ["networkpolicies", "ingressclasses"] + verbs: ["list", "watch"] +- apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: default diff --git a/prometheus/node-exporter-daemonset.yaml b/prometheus/node-exporter-daemonset.yaml new file mode 100644 index 0000000..919292f --- /dev/null +++ b/prometheus/node-exporter-daemonset.yaml @@ -0,0 +1,69 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-exporter + namespace: default + labels: + app: node-exporter +spec: + selector: + matchLabels: + app: node-exporter + template: + metadata: + labels: + app: node-exporter + spec: + hostNetwork: true + hostPID: true + containers: + - name: node-exporter + image: registry.t-aaron.com/prom/node-exporter:latest + ports: + - containerPort: 9100 + hostPort: 9100 + name: metrics + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+)($|/) + - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ + resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "128Mi" + cpu: "100m" + volumeMounts: + - name: proc + mountPath: /host/proc + readOnly: true + - name: sys + mountPath: /host/sys + readOnly: true + volumes: + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys + tolerations: + - operator: Exists +--- +apiVersion: v1 +kind: Service +metadata: + name: node-exporter + namespace: default + labels: + app: node-exporter +spec: + selector: + app: node-exporter + ports: + - name: metrics + port: 9100 + targetPort: 9100 + type: ClusterIP diff --git a/prometheus/prometheus-configmap-basic.yaml b/prometheus/prometheus-configmap-basic.yaml new file mode 100644 index 0000000..c20d66a --- /dev/null +++ b/prometheus/prometheus-configmap-basic.yaml @@ -0,0 +1,123 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-config + namespace: default +data: + prometheus.yml: | + global: + scrape_interval: 15s + evaluation_interval: 15s + + rule_files: + - "/etc/prometheus/rules/*.yml" + + scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + # K3s API Server metrics + - job_name: 'k3s-apiserver' + kubernetes_sd_configs: + - role: endpoints + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + + # K3s kubelet metrics (通过API代理访问) + - job_name: 'k3s-kubelet' + kubernetes_sd_configs: + - role: node + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + + # K3s cAdvisor metrics (容器指标) + - job_name: 'k3s-cadvisor' + kubernetes_sd_configs: + - role: node + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + + # K3s service endpoints + - job_name: 'k3s-service-endpoints' + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name + + # K3s pods + - job_name: 'k3s-pods' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name diff --git a/prometheus/prometheus-configmap.yaml b/prometheus/prometheus-configmap.yaml index 11984b0..cc18eba 100644 --- a/prometheus/prometheus-configmap.yaml +++ b/prometheus/prometheus-configmap.yaml @@ -22,7 +22,88 @@ data: - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] - - job_name: 'kubernetes-pods' + + # K3s API Server metrics + - job_name: 'k3s-apiserver' + kubernetes_sd_configs: + - role: endpoints + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + + # K3s kubelet metrics (通过API代理访问) + - job_name: 'k3s-kubelet' + kubernetes_sd_configs: + - role: node + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + + # K3s cAdvisor metrics (容器指标) + - job_name: 'k3s-cadvisor' + kubernetes_sd_configs: + - role: node + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + + # K3s service endpoints + - job_name: 'k3s-service-endpoints' + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name + + # K3s pods + - job_name: 'k3s-pods' kubernetes_sd_configs: - role: pod relabel_configs: @@ -38,5 +119,23 @@ data: regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + + # kube-state-metrics (如果安装了) + - job_name: 'kube-state-metrics' + static_configs: + - targets: ['kube-state-metrics:8080'] + + # node-exporter (如果安装了) + - job_name: 'node-exporter' + static_configs: + - targets: ['node-exporter:9100']