From 6316bb8b6f24594b4d114327dcf441274e18eee4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=99=E5=B0=8F=E4=BA=91?= Date: Wed, 10 Sep 2025 13:11:51 +0800 Subject: [PATCH] sky --- skywalking/README.md | 148 ++++++++++++++++++ .../elasticsearch-deployment-persistent.yaml | 70 +++++++++ skywalking/elasticsearch-deployment.yaml | 73 +++++++++ skywalking/elasticsearch-service.yaml | 18 +++ skywalking/install-skywalking.sh | 63 ++++++++ skywalking/setup-storage.sh | 30 ++++ skywalking/skywalking-ingress.yaml | 24 +++ skywalking/skywalking-oap-deployment.yaml | 87 ++++++++++ skywalking/skywalking-oap-service.yaml | 18 +++ skywalking/skywalking-ui-deployment.yaml | 65 ++++++++ skywalking/skywalking-ui-service.yaml | 15 ++ skywalking/storage-pvc.yaml | 27 ++++ 12 files changed, 638 insertions(+) create mode 100644 skywalking/README.md create mode 100644 skywalking/elasticsearch-deployment-persistent.yaml create mode 100644 skywalking/elasticsearch-deployment.yaml create mode 100644 skywalking/elasticsearch-service.yaml create mode 100755 skywalking/install-skywalking.sh create mode 100755 skywalking/setup-storage.sh create mode 100644 skywalking/skywalking-ingress.yaml create mode 100644 skywalking/skywalking-oap-deployment.yaml create mode 100644 skywalking/skywalking-oap-service.yaml create mode 100644 skywalking/skywalking-ui-deployment.yaml create mode 100644 skywalking/skywalking-ui-service.yaml create mode 100644 skywalking/storage-pvc.yaml diff --git a/skywalking/README.md b/skywalking/README.md new file mode 100644 index 0000000..796ca29 --- /dev/null +++ b/skywalking/README.md @@ -0,0 +1,148 @@ +# SkyWalking 安装指南 + +## 概述 + +SkyWalking 是一个开源的 APM(应用性能监控)系统,用于监控、追踪和诊断分布式系统。 + +## 组件 + +- **Elasticsearch 7.9.0**: 数据存储后端 +- **SkyWalking OAP Server 8.9.1**: 后端分析处理服务 +- **SkyWalking UI 8.9.1**: Web 管理界面 + +## 安装 + +### 快速安装 + +```bash +# 进入 skywalking 目录 +cd newinstall/skywalking + +# 1. 在 master 节点上设置存储目录 +sudo ./setup-storage.sh + +# 2. 执行安装脚本 +./install-skywalking.sh +``` + +### 手动安装 + +```bash +# 1. 部署 Elasticsearch +kubectl apply -f elasticsearch-deployment.yaml +kubectl apply -f elasticsearch-service.yaml + +# 2. 等待 Elasticsearch 就绪 +kubectl wait --for=condition=ready pod -l app=elasticsearch --timeout=300s + +# 3. 部署 SkyWalking OAP +kubectl apply -f skywalking-oap-deployment.yaml +kubectl apply -f skywalking-oap-service.yaml + +# 4. 等待 OAP 就绪 +kubectl wait --for=condition=ready pod -l app=skywalking-oap --timeout=300s + +# 5. 部署 SkyWalking UI +kubectl apply -f skywalking-ui-deployment.yaml +kubectl apply -f skywalking-ui-service.yaml + +# 6. 配置 Ingress +kubectl apply -f skywalking-ingress.yaml +``` + +## 访问 + +- **Web UI**: https://skywalking-ops.t-aaron.com +- **OAP REST API**: http://skywalking-oap:12800 +- **OAP gRPC**: http://skywalking-oap:11800 + +## 配置说明 + +### 存储配置 +- **存储类型**: HostPath (单节点) +- **存储位置**: Master 节点的 `/opt` 目录 +- **Elasticsearch 数据**: `/opt/elasticsearch` +- **SkyWalking OAP 数据**: `/opt/skywalking-oap` +- **数据持久性**: ✅ 持久化存储,Pod 重启后数据保留 + +### Elasticsearch +- 端口: 9200 (HTTP), 9300 (Transport) +- 内存: 1-2GB +- 存储: 单节点模式,数据存储在 `/opt/elasticsearch` + +### SkyWalking OAP +- 端口: 11800 (gRPC), 12800 (REST) +- 内存: 1-2GB +- 存储后端: Elasticsearch +- 数据存储: `/opt/skywalking-oap` + +### SkyWalking UI +- 端口: 8080 +- 内存: 256-512MB +- 后端: SkyWalking OAP + +## 监控应用 + +### Java 应用 + +在 Java 应用启动时添加以下 JVM 参数: + +```bash +-javaagent:/path/to/skywalking-agent.jar +-Dskywalking.agent.service_name=your-service-name +-Dskywalking.collector.backend_service=skywalking-oap:11800 +``` + +### Spring Boot 应用 + +在 `application.properties` 中添加: + +```properties +# SkyWalking 配置 +skywalking.agent.service_name=your-service-name +skywalking.collector.backend_service=skywalking-oap:11800 +``` + +## 故障排除 + +### 检查服务状态 + +```bash +# 查看所有 Pod 状态 +kubectl get pods | grep -E "(elasticsearch|skywalking)" + +# 查看服务状态 +kubectl get svc | grep -E "(elasticsearch|skywalking)" + +# 查看日志 +kubectl logs -l app=elasticsearch +kubectl logs -l app=skywalking-oap +kubectl logs -l app=skywalking-ui +``` + +### 常见问题 + +1. **Elasticsearch 启动失败** + - 检查内存限制 + - 检查存储权限 + +2. **OAP 连接 Elasticsearch 失败** + - 检查 Elasticsearch 是否就绪 + - 检查网络连接 + +3. **UI 无法访问** + - 检查 Ingress 配置 + - 检查 OAP 服务状态 + +## 卸载 + +```bash +# 删除所有资源 +kubectl delete -f skywalking-ingress.yaml +kubectl delete -f skywalking-ui-deployment.yaml +kubectl delete -f skywalking-ui-service.yaml +kubectl delete -f skywalking-oap-deployment.yaml +kubectl delete -f skywalking-oap-service.yaml +kubectl delete -f elasticsearch-deployment.yaml +kubectl delete -f elasticsearch-service.yaml +``` diff --git a/skywalking/elasticsearch-deployment-persistent.yaml b/skywalking/elasticsearch-deployment-persistent.yaml new file mode 100644 index 0000000..7eb7fdb --- /dev/null +++ b/skywalking/elasticsearch-deployment-persistent.yaml @@ -0,0 +1,70 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: elasticsearch + namespace: default + labels: + app: elasticsearch +spec: + replicas: 1 + selector: + matchLabels: + app: elasticsearch + template: + metadata: + labels: + app: elasticsearch + spec: + containers: + - name: elasticsearch + image: registry.t-aaron.com/elasticsearch:7.9.0 + ports: + - containerPort: 9200 + name: http + - containerPort: 9300 + name: transport + env: + - name: discovery.type + value: "single-node" + - name: bootstrap.memory_lock + value: "true" + - name: TZ + value: "Asia/Shanghai" + - name: "ES_JAVA_OPTS" + value: "-Xms512m -Xmx512m" + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "1000m" + livenessProbe: + httpGet: + path: /_cluster/health + port: 9200 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /_cluster/health + port: 9200 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + securityContext: + capabilities: + add: + - IPC_LOCK + volumeMounts: + - name: data + mountPath: /usr/share/elasticsearch/data + volumes: + - name: data + persistentVolumeClaim: + claimName: elasticsearch-pvc + securityContext: + fsGroup: 1000 diff --git a/skywalking/elasticsearch-deployment.yaml b/skywalking/elasticsearch-deployment.yaml new file mode 100644 index 0000000..da6aca6 --- /dev/null +++ b/skywalking/elasticsearch-deployment.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: elasticsearch + namespace: default + labels: + app: elasticsearch +spec: + replicas: 1 + selector: + matchLabels: + app: elasticsearch + template: + metadata: + labels: + app: elasticsearch + spec: + nodeSelector: + node-role.kubernetes.io/control-plane: "true" + containers: + - name: elasticsearch + image: registry.t-aaron.com/elasticsearch:7.9.0 + ports: + - containerPort: 9200 + name: http + - containerPort: 9300 + name: transport + env: + - name: discovery.type + value: "single-node" + - name: bootstrap.memory_lock + value: "true" + - name: TZ + value: "Asia/Shanghai" + - name: "ES_JAVA_OPTS" + value: "-Xms512m -Xmx512m" + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "1000m" + livenessProbe: + httpGet: + path: /_cluster/health + port: 9200 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /_cluster/health + port: 9200 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + securityContext: + capabilities: + add: + - IPC_LOCK + volumeMounts: + - name: data + mountPath: /usr/share/elasticsearch/data + volumes: + - name: data + hostPath: + path: /opt/elasticsearch + type: DirectoryOrCreate + securityContext: + fsGroup: 1000 diff --git a/skywalking/elasticsearch-service.yaml b/skywalking/elasticsearch-service.yaml new file mode 100644 index 0000000..352d0d9 --- /dev/null +++ b/skywalking/elasticsearch-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: elasticsearch + namespace: default + labels: + app: elasticsearch +spec: + selector: + app: elasticsearch + ports: + - name: http + port: 9200 + targetPort: 9200 + - name: transport + port: 9300 + targetPort: 9300 + type: ClusterIP diff --git a/skywalking/install-skywalking.sh b/skywalking/install-skywalking.sh new file mode 100755 index 0000000..60ddb63 --- /dev/null +++ b/skywalking/install-skywalking.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# SkyWalking 安装脚本 +echo "开始安装 SkyWalking..." + +# 检查 kubectl 是否可用 +if ! command -v kubectl &> /dev/null; then + echo "错误: kubectl 未安装或不在 PATH 中" + exit 1 +fi + +# 创建存储目录 +echo "创建存储目录..." +echo "在 master 节点上创建以下目录:" +echo " - /opt/elasticsearch" +echo " - /opt/skywalking-oap" +echo "" +echo "请确保 master 节点有足够的磁盘空间(建议至少 100GB)" +echo "" + +# 创建命名空间(如果不存在) +echo "创建命名空间..." +kubectl create namespace default --dry-run=client -o yaml | kubectl apply -f - + +# 按顺序部署组件 +echo "1. 部署 Elasticsearch..." +kubectl apply -f elasticsearch-deployment.yaml +kubectl apply -f elasticsearch-service.yaml + +echo "等待 Elasticsearch 启动..." +kubectl wait --for=condition=ready pod -l app=elasticsearch --timeout=300s + +echo "2. 部署 SkyWalking OAP..." +kubectl apply -f skywalking-oap-deployment.yaml +kubectl apply -f skywalking-oap-service.yaml + +echo "等待 SkyWalking OAP 启动..." +kubectl wait --for=condition=ready pod -l app=skywalking-oap --timeout=300s + +echo "3. 部署 SkyWalking UI..." +kubectl apply -f skywalking-ui-deployment.yaml +kubectl apply -f skywalking-ui-service.yaml + +echo "等待 SkyWalking UI 启动..." +kubectl wait --for=condition=ready pod -l app=skywalking-ui --timeout=300s + +echo "4. 配置 Ingress..." +kubectl apply -f skywalking-ingress.yaml + +echo "SkyWalking 安装完成!" +echo "" +echo "访问地址: https://skywalking-ops.t-aaron.com" +echo "" +echo "检查服务状态:" +kubectl get pods -l app=elasticsearch +kubectl get pods -l app=skywalking-oap +kubectl get pods -l app=skywalking-ui +echo "" +echo "检查服务:" +kubectl get svc | grep -E "(elasticsearch|skywalking)" +echo "" +echo "检查 Ingress:" +kubectl get ingress skywalking-ingress diff --git a/skywalking/setup-storage.sh b/skywalking/setup-storage.sh new file mode 100755 index 0000000..89ad8fe --- /dev/null +++ b/skywalking/setup-storage.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# SkyWalking 存储目录设置脚本 +# 在 master 节点上执行此脚本 + +echo "设置 SkyWalking 存储目录..." + +# 创建 Elasticsearch 数据目录 +echo "创建 Elasticsearch 数据目录..." +sudo mkdir -p /opt/elasticsearch +sudo chown -R 1000:1000 /opt/elasticsearch +sudo chmod -R 755 /opt/elasticsearch + +# 创建 SkyWalking OAP 数据目录 +echo "创建 SkyWalking OAP 数据目录..." +sudo mkdir -p /opt/skywalking-oap +sudo chown -R 1000:1000 /opt/skywalking-oap +sudo chmod -R 755 /opt/skywalking-oap + +# 检查磁盘空间 +echo "检查磁盘空间..." +df -h /opt + +echo "存储目录设置完成!" +echo "" +echo "目录信息:" +echo " Elasticsearch: /opt/elasticsearch (所有者: 1000:1000)" +echo " SkyWalking OAP: /opt/skywalking-oap (所有者: 1000:1000)" +echo "" +echo "现在可以运行 install-skywalking.sh 安装 SkyWalking" diff --git a/skywalking/skywalking-ingress.yaml b/skywalking/skywalking-ingress.yaml new file mode 100644 index 0000000..dfa13fb --- /dev/null +++ b/skywalking/skywalking-ingress.yaml @@ -0,0 +1,24 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: skywalking-ingress + namespace: default + annotations: + traefik.ingress.kubernetes.io/router.entrypoints: web,websecure + traefik.ingress.kubernetes.io/router.tls: "true" +spec: + tls: + - hosts: + - skywalking-ops.t-aaron.com + secretName: tls + rules: + - host: skywalking-ops.t-aaron.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: skywalking-ui + port: + number: 8080 diff --git a/skywalking/skywalking-oap-deployment.yaml b/skywalking/skywalking-oap-deployment.yaml new file mode 100644 index 0000000..7180747 --- /dev/null +++ b/skywalking/skywalking-oap-deployment.yaml @@ -0,0 +1,87 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: skywalking-oap + namespace: default + labels: + app: skywalking-oap +spec: + replicas: 1 + selector: + matchLabels: + app: skywalking-oap + template: + metadata: + labels: + app: skywalking-oap + spec: + nodeSelector: + node-role.kubernetes.io/control-plane: "true" + containers: + - name: skywalking-oap + image: registry.t-aaron.com/apache/skywalking-oap-server:8.9.1 + ports: + - containerPort: 11800 + name: grpc + - containerPort: 12800 + name: rest + env: + - name: SW_STORAGE + value: "elasticsearch" + - name: SW_STORAGE_ES_CLUSTER_NODES + value: "elasticsearch:9200" + - name: SW_HEALTH_CHECKER + value: "default" + - name: SW_TELEMETRY + value: "prometheus" + - name: TZ + value: "Asia/Shanghai" + - name: JAVA_OPTS + value: "-Xms1024m -Xmx1024m" + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "1000m" + livenessProbe: + exec: + command: + - /bin/sh + - -c + - "/skywalking/bin/swctl ch" + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + readinessProbe: + exec: + command: + - /bin/sh + - -c + - "/skywalking/bin/swctl ch" + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + volumeMounts: + - name: data + mountPath: /skywalking/data + volumes: + - name: data + hostPath: + path: /opt/skywalking-oap + type: DirectoryOrCreate + initContainers: + - name: wait-for-elasticsearch + image: registry.t-aaron.com/busybox:1.35 + command: + - sh + - -c + - | + until wget --spider -q 'http://elasticsearch:9200/_cluster/health'; do + echo "Waiting for Elasticsearch to be ready..." + sleep 5 + done + echo "Elasticsearch is ready!" diff --git a/skywalking/skywalking-oap-service.yaml b/skywalking/skywalking-oap-service.yaml new file mode 100644 index 0000000..bac7ed4 --- /dev/null +++ b/skywalking/skywalking-oap-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: skywalking-oap + namespace: default + labels: + app: skywalking-oap +spec: + selector: + app: skywalking-oap + ports: + - name: grpc + port: 11800 + targetPort: 11800 + - name: rest + port: 12800 + targetPort: 12800 + type: ClusterIP diff --git a/skywalking/skywalking-ui-deployment.yaml b/skywalking/skywalking-ui-deployment.yaml new file mode 100644 index 0000000..835477c --- /dev/null +++ b/skywalking/skywalking-ui-deployment.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: skywalking-ui + namespace: default + labels: + app: skywalking-ui +spec: + replicas: 1 + selector: + matchLabels: + app: skywalking-ui + template: + metadata: + labels: + app: skywalking-ui + spec: + nodeSelector: + node-role.kubernetes.io/control-plane: "true" + containers: + - name: skywalking-ui + image: registry.t-aaron.com/apache/skywalking-ui:8.9.1 + ports: + - containerPort: 8080 + name: http + env: + - name: SW_OAP_ADDRESS + value: "http://skywalking-oap:12800" + - name: TZ + value: "Asia/Shanghai" + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + livenessProbe: + httpGet: + path: / + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + readinessProbe: + httpGet: + path: / + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + initContainers: + - name: wait-for-oap + image: registry.t-aaron.com/busybox:1.35 + command: + - sh + - -c + - | + until nc -z skywalking-oap 12800; do + echo "Waiting for SkyWalking OAP to be ready..." + sleep 5 + done + echo "SkyWalking OAP is ready!" diff --git a/skywalking/skywalking-ui-service.yaml b/skywalking/skywalking-ui-service.yaml new file mode 100644 index 0000000..792e252 --- /dev/null +++ b/skywalking/skywalking-ui-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: skywalking-ui + namespace: default + labels: + app: skywalking-ui +spec: + selector: + app: skywalking-ui + ports: + - name: http + port: 8080 + targetPort: 8080 + type: ClusterIP diff --git a/skywalking/storage-pvc.yaml b/skywalking/storage-pvc.yaml new file mode 100644 index 0000000..7ccac7e --- /dev/null +++ b/skywalking/storage-pvc.yaml @@ -0,0 +1,27 @@ +# 生产环境存储配置 +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: elasticsearch-pvc + namespace: default +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 50Gi + storageClassName: local-storage + +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: skywalking-oap-pvc + namespace: default +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: local-storage