hxf/backend/th_agenter/models/excel_file.py

85 lines
4.1 KiB
Python
Raw Normal View History

2025-12-04 14:48:38 +08:00
"""Excel file models for smart query."""
2025-12-16 13:55:16 +08:00
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import String, Integer, Text, Boolean, JSON, DateTime
2025-12-04 14:48:38 +08:00
from sqlalchemy.sql import func
from ..db.base import BaseModel
class ExcelFile(BaseModel):
"""Excel file model for storing file metadata."""
__tablename__ = "excel_files"
# Basic file information
2025-12-16 13:55:16 +08:00
# user_id: Mapped[int] = mapped_column(Integer, nullable=False) # 用户ID
original_filename: Mapped[str] = mapped_column(String(255), nullable=False) # 原始文件名
file_path: Mapped[str] = mapped_column(String(500), nullable=False) # 文件存储路径
file_size: Mapped[int] = mapped_column(Integer, nullable=False) # 文件大小(字节)
file_type: Mapped[str] = mapped_column(String(50), nullable=False) # 文件类型 (.xlsx, .xls, .csv)
2025-12-04 14:48:38 +08:00
# Excel specific information
2025-12-16 13:55:16 +08:00
sheet_names: Mapped[list] = mapped_column(JSON, nullable=False) # 所有sheet名称列表
default_sheet: Mapped[str | None] = mapped_column(String(100), nullable=True) # 默认sheet名称
2025-12-04 14:48:38 +08:00
# Data preview information
2025-12-16 13:55:16 +08:00
columns_info: Mapped[dict] = mapped_column(JSON, nullable=False) # 列信息:{sheet_name: [column_names]}
preview_data: Mapped[dict] = mapped_column(JSON, nullable=False) # 前5行数据{sheet_name: [[row1], [row2], ...]}
data_types: Mapped[dict | None] = mapped_column(JSON, nullable=True) # 数据类型信息:{sheet_name: {column: dtype}}
2025-12-04 14:48:38 +08:00
# Statistics
2025-12-16 13:55:16 +08:00
total_rows: Mapped[dict | None] = mapped_column(JSON, nullable=True) # 每个sheet的总行数{sheet_name: row_count}
total_columns: Mapped[dict | None] = mapped_column(JSON, nullable=True) # 每个sheet的总列数{sheet_name: column_count}
2025-12-04 14:48:38 +08:00
# Processing status
2025-12-16 13:55:16 +08:00
is_processed: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False) # 是否已处理
processing_error: Mapped[str | None] = mapped_column(Text, nullable=True) # 处理错误信息
2025-12-04 14:48:38 +08:00
# Upload information
2025-12-16 13:55:16 +08:00
# upload_time: Mapped[DateTime] = mapped_column(DateTime, default=func.now(), nullable=False) # 上传时间
last_accessed: Mapped[DateTime | None] = mapped_column(DateTime, nullable=True) # 最后访问时间
2025-12-04 14:48:38 +08:00
def __repr__(self):
2025-12-16 13:55:16 +08:00
return f"<ExcelFile(id={self.id}, filename='{self.original_filename}')>"
2025-12-04 14:48:38 +08:00
@property
def file_size_mb(self):
"""Get file size in MB."""
return round(self.file_size / (1024 * 1024), 2)
@property
def sheet_count(self):
"""Get number of sheets."""
return len(self.sheet_names) if self.sheet_names else 0
def get_sheet_info(self, sheet_name: str = None):
"""Get information for a specific sheet or default sheet."""
if not sheet_name:
sheet_name = self.default_sheet or (self.sheet_names[0] if self.sheet_names else None)
if not sheet_name or sheet_name not in self.sheet_names:
return None
return {
'sheet_name': sheet_name,
'columns': self.columns_info.get(sheet_name, []) if self.columns_info else [],
'preview_data': self.preview_data.get(sheet_name, []) if self.preview_data else [],
'data_types': self.data_types.get(sheet_name, {}) if self.data_types else {},
'total_rows': self.total_rows.get(sheet_name, 0) if self.total_rows else 0,
'total_columns': self.total_columns.get(sheet_name, 0) if self.total_columns else 0
}
def get_all_sheets_summary(self):
"""Get summary information for all sheets."""
if not self.sheet_names:
return []
summary = []
for sheet_name in self.sheet_names:
sheet_info = self.get_sheet_info(sheet_name)
if sheet_info:
summary.append({
'sheet_name': sheet_name,
'columns_count': len(sheet_info['columns']),
'rows_count': sheet_info['total_rows'],
'columns': sheet_info['columns'][:10] # 只显示前10列
})
return summary