This commit is contained in:
孙小云 2025-09-10 13:11:51 +08:00
parent 16d226bd52
commit 6316bb8b6f
12 changed files with 638 additions and 0 deletions

148
skywalking/README.md Normal file
View File

@ -0,0 +1,148 @@
# SkyWalking 安装指南
## 概述
SkyWalking 是一个开源的 APM应用性能监控系统用于监控、追踪和诊断分布式系统。
## 组件
- **Elasticsearch 7.9.0**: 数据存储后端
- **SkyWalking OAP Server 8.9.1**: 后端分析处理服务
- **SkyWalking UI 8.9.1**: Web 管理界面
## 安装
### 快速安装
```bash
# 进入 skywalking 目录
cd newinstall/skywalking
# 1. 在 master 节点上设置存储目录
sudo ./setup-storage.sh
# 2. 执行安装脚本
./install-skywalking.sh
```
### 手动安装
```bash
# 1. 部署 Elasticsearch
kubectl apply -f elasticsearch-deployment.yaml
kubectl apply -f elasticsearch-service.yaml
# 2. 等待 Elasticsearch 就绪
kubectl wait --for=condition=ready pod -l app=elasticsearch --timeout=300s
# 3. 部署 SkyWalking OAP
kubectl apply -f skywalking-oap-deployment.yaml
kubectl apply -f skywalking-oap-service.yaml
# 4. 等待 OAP 就绪
kubectl wait --for=condition=ready pod -l app=skywalking-oap --timeout=300s
# 5. 部署 SkyWalking UI
kubectl apply -f skywalking-ui-deployment.yaml
kubectl apply -f skywalking-ui-service.yaml
# 6. 配置 Ingress
kubectl apply -f skywalking-ingress.yaml
```
## 访问
- **Web UI**: https://skywalking-ops.t-aaron.com
- **OAP REST API**: http://skywalking-oap:12800
- **OAP gRPC**: http://skywalking-oap:11800
## 配置说明
### 存储配置
- **存储类型**: HostPath (单节点)
- **存储位置**: Master 节点的 `/opt` 目录
- **Elasticsearch 数据**: `/opt/elasticsearch`
- **SkyWalking OAP 数据**: `/opt/skywalking-oap`
- **数据持久性**: ✅ 持久化存储Pod 重启后数据保留
### Elasticsearch
- 端口: 9200 (HTTP), 9300 (Transport)
- 内存: 1-2GB
- 存储: 单节点模式,数据存储在 `/opt/elasticsearch`
### SkyWalking OAP
- 端口: 11800 (gRPC), 12800 (REST)
- 内存: 1-2GB
- 存储后端: Elasticsearch
- 数据存储: `/opt/skywalking-oap`
### SkyWalking UI
- 端口: 8080
- 内存: 256-512MB
- 后端: SkyWalking OAP
## 监控应用
### Java 应用
在 Java 应用启动时添加以下 JVM 参数:
```bash
-javaagent:/path/to/skywalking-agent.jar
-Dskywalking.agent.service_name=your-service-name
-Dskywalking.collector.backend_service=skywalking-oap:11800
```
### Spring Boot 应用
`application.properties` 中添加:
```properties
# SkyWalking 配置
skywalking.agent.service_name=your-service-name
skywalking.collector.backend_service=skywalking-oap:11800
```
## 故障排除
### 检查服务状态
```bash
# 查看所有 Pod 状态
kubectl get pods | grep -E "(elasticsearch|skywalking)"
# 查看服务状态
kubectl get svc | grep -E "(elasticsearch|skywalking)"
# 查看日志
kubectl logs -l app=elasticsearch
kubectl logs -l app=skywalking-oap
kubectl logs -l app=skywalking-ui
```
### 常见问题
1. **Elasticsearch 启动失败**
- 检查内存限制
- 检查存储权限
2. **OAP 连接 Elasticsearch 失败**
- 检查 Elasticsearch 是否就绪
- 检查网络连接
3. **UI 无法访问**
- 检查 Ingress 配置
- 检查 OAP 服务状态
## 卸载
```bash
# 删除所有资源
kubectl delete -f skywalking-ingress.yaml
kubectl delete -f skywalking-ui-deployment.yaml
kubectl delete -f skywalking-ui-service.yaml
kubectl delete -f skywalking-oap-deployment.yaml
kubectl delete -f skywalking-oap-service.yaml
kubectl delete -f elasticsearch-deployment.yaml
kubectl delete -f elasticsearch-service.yaml
```

View File

@ -0,0 +1,70 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: elasticsearch
namespace: default
labels:
app: elasticsearch
spec:
replicas: 1
selector:
matchLabels:
app: elasticsearch
template:
metadata:
labels:
app: elasticsearch
spec:
containers:
- name: elasticsearch
image: registry.t-aaron.com/elasticsearch:7.9.0
ports:
- containerPort: 9200
name: http
- containerPort: 9300
name: transport
env:
- name: discovery.type
value: "single-node"
- name: bootstrap.memory_lock
value: "true"
- name: TZ
value: "Asia/Shanghai"
- name: "ES_JAVA_OPTS"
value: "-Xms512m -Xmx512m"
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /_cluster/health
port: 9200
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /_cluster/health
port: 9200
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
securityContext:
capabilities:
add:
- IPC_LOCK
volumeMounts:
- name: data
mountPath: /usr/share/elasticsearch/data
volumes:
- name: data
persistentVolumeClaim:
claimName: elasticsearch-pvc
securityContext:
fsGroup: 1000

View File

@ -0,0 +1,73 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: elasticsearch
namespace: default
labels:
app: elasticsearch
spec:
replicas: 1
selector:
matchLabels:
app: elasticsearch
template:
metadata:
labels:
app: elasticsearch
spec:
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
containers:
- name: elasticsearch
image: registry.t-aaron.com/elasticsearch:7.9.0
ports:
- containerPort: 9200
name: http
- containerPort: 9300
name: transport
env:
- name: discovery.type
value: "single-node"
- name: bootstrap.memory_lock
value: "true"
- name: TZ
value: "Asia/Shanghai"
- name: "ES_JAVA_OPTS"
value: "-Xms512m -Xmx512m"
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /_cluster/health
port: 9200
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /_cluster/health
port: 9200
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
securityContext:
capabilities:
add:
- IPC_LOCK
volumeMounts:
- name: data
mountPath: /usr/share/elasticsearch/data
volumes:
- name: data
hostPath:
path: /opt/elasticsearch
type: DirectoryOrCreate
securityContext:
fsGroup: 1000

View File

@ -0,0 +1,18 @@
apiVersion: v1
kind: Service
metadata:
name: elasticsearch
namespace: default
labels:
app: elasticsearch
spec:
selector:
app: elasticsearch
ports:
- name: http
port: 9200
targetPort: 9200
- name: transport
port: 9300
targetPort: 9300
type: ClusterIP

View File

@ -0,0 +1,63 @@
#!/bin/bash
# SkyWalking 安装脚本
echo "开始安装 SkyWalking..."
# 检查 kubectl 是否可用
if ! command -v kubectl &> /dev/null; then
echo "错误: kubectl 未安装或不在 PATH 中"
exit 1
fi
# 创建存储目录
echo "创建存储目录..."
echo "在 master 节点上创建以下目录:"
echo " - /opt/elasticsearch"
echo " - /opt/skywalking-oap"
echo ""
echo "请确保 master 节点有足够的磁盘空间(建议至少 100GB"
echo ""
# 创建命名空间(如果不存在)
echo "创建命名空间..."
kubectl create namespace default --dry-run=client -o yaml | kubectl apply -f -
# 按顺序部署组件
echo "1. 部署 Elasticsearch..."
kubectl apply -f elasticsearch-deployment.yaml
kubectl apply -f elasticsearch-service.yaml
echo "等待 Elasticsearch 启动..."
kubectl wait --for=condition=ready pod -l app=elasticsearch --timeout=300s
echo "2. 部署 SkyWalking OAP..."
kubectl apply -f skywalking-oap-deployment.yaml
kubectl apply -f skywalking-oap-service.yaml
echo "等待 SkyWalking OAP 启动..."
kubectl wait --for=condition=ready pod -l app=skywalking-oap --timeout=300s
echo "3. 部署 SkyWalking UI..."
kubectl apply -f skywalking-ui-deployment.yaml
kubectl apply -f skywalking-ui-service.yaml
echo "等待 SkyWalking UI 启动..."
kubectl wait --for=condition=ready pod -l app=skywalking-ui --timeout=300s
echo "4. 配置 Ingress..."
kubectl apply -f skywalking-ingress.yaml
echo "SkyWalking 安装完成!"
echo ""
echo "访问地址: https://skywalking-ops.t-aaron.com"
echo ""
echo "检查服务状态:"
kubectl get pods -l app=elasticsearch
kubectl get pods -l app=skywalking-oap
kubectl get pods -l app=skywalking-ui
echo ""
echo "检查服务:"
kubectl get svc | grep -E "(elasticsearch|skywalking)"
echo ""
echo "检查 Ingress:"
kubectl get ingress skywalking-ingress

30
skywalking/setup-storage.sh Executable file
View File

@ -0,0 +1,30 @@
#!/bin/bash
# SkyWalking 存储目录设置脚本
# 在 master 节点上执行此脚本
echo "设置 SkyWalking 存储目录..."
# 创建 Elasticsearch 数据目录
echo "创建 Elasticsearch 数据目录..."
sudo mkdir -p /opt/elasticsearch
sudo chown -R 1000:1000 /opt/elasticsearch
sudo chmod -R 755 /opt/elasticsearch
# 创建 SkyWalking OAP 数据目录
echo "创建 SkyWalking OAP 数据目录..."
sudo mkdir -p /opt/skywalking-oap
sudo chown -R 1000:1000 /opt/skywalking-oap
sudo chmod -R 755 /opt/skywalking-oap
# 检查磁盘空间
echo "检查磁盘空间..."
df -h /opt
echo "存储目录设置完成!"
echo ""
echo "目录信息:"
echo " Elasticsearch: /opt/elasticsearch (所有者: 1000:1000)"
echo " SkyWalking OAP: /opt/skywalking-oap (所有者: 1000:1000)"
echo ""
echo "现在可以运行 install-skywalking.sh 安装 SkyWalking"

View File

@ -0,0 +1,24 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: skywalking-ingress
namespace: default
annotations:
traefik.ingress.kubernetes.io/router.entrypoints: web,websecure
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
tls:
- hosts:
- skywalking-ops.t-aaron.com
secretName: tls
rules:
- host: skywalking-ops.t-aaron.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: skywalking-ui
port:
number: 8080

View File

@ -0,0 +1,87 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: skywalking-oap
namespace: default
labels:
app: skywalking-oap
spec:
replicas: 1
selector:
matchLabels:
app: skywalking-oap
template:
metadata:
labels:
app: skywalking-oap
spec:
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
containers:
- name: skywalking-oap
image: registry.t-aaron.com/apache/skywalking-oap-server:8.9.1
ports:
- containerPort: 11800
name: grpc
- containerPort: 12800
name: rest
env:
- name: SW_STORAGE
value: "elasticsearch"
- name: SW_STORAGE_ES_CLUSTER_NODES
value: "elasticsearch:9200"
- name: SW_HEALTH_CHECKER
value: "default"
- name: SW_TELEMETRY
value: "prometheus"
- name: TZ
value: "Asia/Shanghai"
- name: JAVA_OPTS
value: "-Xms1024m -Xmx1024m"
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "2Gi"
cpu: "1000m"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "/skywalking/bin/swctl ch"
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "/skywalking/bin/swctl ch"
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
volumeMounts:
- name: data
mountPath: /skywalking/data
volumes:
- name: data
hostPath:
path: /opt/skywalking-oap
type: DirectoryOrCreate
initContainers:
- name: wait-for-elasticsearch
image: registry.t-aaron.com/busybox:1.35
command:
- sh
- -c
- |
until wget --spider -q 'http://elasticsearch:9200/_cluster/health'; do
echo "Waiting for Elasticsearch to be ready..."
sleep 5
done
echo "Elasticsearch is ready!"

View File

@ -0,0 +1,18 @@
apiVersion: v1
kind: Service
metadata:
name: skywalking-oap
namespace: default
labels:
app: skywalking-oap
spec:
selector:
app: skywalking-oap
ports:
- name: grpc
port: 11800
targetPort: 11800
- name: rest
port: 12800
targetPort: 12800
type: ClusterIP

View File

@ -0,0 +1,65 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: skywalking-ui
namespace: default
labels:
app: skywalking-ui
spec:
replicas: 1
selector:
matchLabels:
app: skywalking-ui
template:
metadata:
labels:
app: skywalking-ui
spec:
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
containers:
- name: skywalking-ui
image: registry.t-aaron.com/apache/skywalking-ui:8.9.1
ports:
- containerPort: 8080
name: http
env:
- name: SW_OAP_ADDRESS
value: "http://skywalking-oap:12800"
- name: TZ
value: "Asia/Shanghai"
resources:
requests:
memory: "256Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /
port: 8080
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /
port: 8080
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
initContainers:
- name: wait-for-oap
image: registry.t-aaron.com/busybox:1.35
command:
- sh
- -c
- |
until nc -z skywalking-oap 12800; do
echo "Waiting for SkyWalking OAP to be ready..."
sleep 5
done
echo "SkyWalking OAP is ready!"

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: skywalking-ui
namespace: default
labels:
app: skywalking-ui
spec:
selector:
app: skywalking-ui
ports:
- name: http
port: 8080
targetPort: 8080
type: ClusterIP

View File

@ -0,0 +1,27 @@
# 生产环境存储配置
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: elasticsearch-pvc
namespace: default
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 50Gi
storageClassName: local-storage
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: skywalking-oap-pvc
namespace: default
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
storageClassName: local-storage