153 lines
5.2 KiB
Python
153 lines
5.2 KiB
Python
# -*- coding: utf-8 -*-
|
||
from threading import Thread
|
||
from time import sleep, time
|
||
from traceback import format_exc
|
||
|
||
#from common.Constant import init_progess
|
||
import json,os,psutil,GPUtil,platform,socket
|
||
from kafka import KafkaProducer, KafkaConsumer
|
||
#from util.KafkaUtils import CustomerKafkaProducer
|
||
from common.YmlConstant import service_yml_path, kafka_yml_path
|
||
class uploadGPUinfos(Thread):
|
||
__slots__ = ('__kafka_config', "_context")
|
||
|
||
def __init__(self, *args):
|
||
super().__init__()
|
||
self.__context,self.__kafka_config = args
|
||
self.__uploadInterval = self.__context['GPUpollInterval']
|
||
#kafkaProducer = CustomerKafkaProducer(self.__kafka_config)
|
||
self.__producer = KafkaProducer(
|
||
bootstrap_servers=self.__kafka_config['bootstrap_servers'],#tencent yun
|
||
value_serializer=lambda v: v.encode('utf-8'))
|
||
|
||
self.__topic = self.__kafka_config["topicGPU"]
|
||
def run(self):
|
||
while True:
|
||
|
||
try:
|
||
#获取当前的gpu状态信息
|
||
msg_dict = get_system_info()
|
||
#发送GPU状态到指定的topic
|
||
msg = json.dumps(msg_dict)
|
||
|
||
# 假设生产的消息为键值对(不是一定要键值对),且序列化方式为json
|
||
|
||
#future = kafkaProducer.sender(topic_on,msg)
|
||
future = self.__producer .send(self.__topic,msg)
|
||
try:
|
||
future.get(timeout=10)
|
||
except kafka_errors:
|
||
traceback.format_exc()
|
||
|
||
sleep(self.__uploadInterval)
|
||
|
||
except Exception as e:
|
||
print(e)
|
||
continue
|
||
#logger.error("上传GPU服务器线程状态异常:{}, requestId:{}", format_exc(), request_id)
|
||
|
||
|
||
|
||
|
||
def get_system_info():
|
||
# 初始化一个字典来存储系统信息
|
||
system_info = {}
|
||
|
||
# 获取CPU信息
|
||
system_info['CPU'] = {
|
||
'Physical Cores': psutil.cpu_count(logical=False), # 物理核心数
|
||
'Logical Cores': psutil.cpu_count(logical=True), # 逻辑核心数
|
||
'Current Frequency': psutil.cpu_freq().current, # 当前频率
|
||
'Usage Per Core': psutil.cpu_percent(interval=1, percpu=True), # 每个核心的使用率
|
||
'Total Usage': psutil.cpu_percent(interval=1) # 总体CPU使用率
|
||
}
|
||
|
||
# 获取内存信息
|
||
memory = psutil.virtual_memory()
|
||
system_info['Memory'] = {
|
||
'Total': memory.total / (1024 ** 3), # 总内存,单位为GB
|
||
'Available': memory.available / (1024 ** 3), # 可用内存
|
||
'Used': memory.used / (1024 ** 3), # 已用内存
|
||
'Usage Percentage': memory.percent # 内存使用率
|
||
}
|
||
|
||
# 获取GPU信息
|
||
gpus = GPUtil.getGPUs()
|
||
system_info['GPU'] = []
|
||
for gpu in gpus:
|
||
gpu_info = {
|
||
'ID': gpu.id,
|
||
'Name': gpu.name,
|
||
'Load': gpu.load * 100, # GPU负载,百分比
|
||
'Memory Total': gpu.memoryTotal, # 总显存,单位为MB
|
||
'Memory Used': gpu.memoryUsed, # 已用显存
|
||
'Memory Free': gpu.memoryFree, # 可用显存
|
||
'Temperature': gpu.temperature # GPU温度
|
||
}
|
||
system_info['GPU'].append(gpu_info)
|
||
|
||
# 获取系统信息
|
||
system_info['System'] = {
|
||
'Platform': platform.system(), # 操作系统类型
|
||
'Platform Version': platform.version(), # 操作系统版本
|
||
'Platform Release': platform.release(), # 操作系统发行版本
|
||
'Platform Node': platform.node(), # 网络名称
|
||
'Machine': platform.machine(), # 硬件架构
|
||
'Processor': platform.processor() # CPU架构
|
||
}
|
||
|
||
# 获取本机局域网IP地址(非回环地址)
|
||
try:
|
||
# 获取所有网络接口信息
|
||
net_if_addrs = psutil.net_if_addrs()
|
||
for interface, addrs in net_if_addrs.items():
|
||
for addr in addrs:
|
||
# 筛选IPv4地址且非回环地址
|
||
if addr.family == socket.AF_INET and not addr.address.startswith("127."):
|
||
system_info['System']['Local IP Address'] = addr.address
|
||
break
|
||
if 'Local IP Address' in system_info['System']:
|
||
break
|
||
else:
|
||
system_info['System']['Local IP Address'] = "No local IP found"
|
||
except Exception as e:
|
||
system_info['System']['Local IP Address'] = "Unable to retrieve local IP address"
|
||
|
||
return system_info
|
||
|
||
|
||
|
||
|
||
if __name__=="__main__":
|
||
|
||
|
||
context = {
|
||
'GPUpollInterval':1,
|
||
'topic':'server-status',
|
||
}
|
||
kafka_config = {
|
||
'bootstrap_servers':['192.168.10.66:9092']
|
||
}
|
||
|
||
base_dir, env = '/home/thsw2/WJ/test/tuoheng_algN','test'
|
||
|
||
|
||
|
||
|
||
|
||
upload_thread = uploadGPUinfos(context,kafka_config)
|
||
upload_thread.setDaemon(False)
|
||
|
||
upload_thread.start()
|
||
|
||
|
||
# 主线程等待守护线程运行
|
||
try:
|
||
while True:
|
||
sleep(1)
|
||
except KeyboardInterrupt:
|
||
print("主线程退出")
|
||
|
||
|
||
|
||
|