85 lines
4.1 KiB
Python
85 lines
4.1 KiB
Python
"""Excel file models for smart query."""
|
||
|
||
from sqlalchemy.orm import Mapped, mapped_column
|
||
from sqlalchemy import String, Integer, Text, Boolean, JSON, DateTime
|
||
from sqlalchemy.sql import func
|
||
|
||
from ..db.base import BaseModel
|
||
|
||
class ExcelFile(BaseModel):
|
||
"""Excel file model for storing file metadata."""
|
||
__tablename__ = "excel_files"
|
||
# Basic file information
|
||
# user_id: Mapped[int] = mapped_column(Integer, nullable=False) # 用户ID
|
||
original_filename: Mapped[str] = mapped_column(String(255), nullable=False) # 原始文件名
|
||
file_path: Mapped[str] = mapped_column(String(500), nullable=False) # 文件存储路径
|
||
file_size: Mapped[int] = mapped_column(Integer, nullable=False) # 文件大小(字节)
|
||
file_type: Mapped[str] = mapped_column(String(50), nullable=False) # 文件类型 (.xlsx, .xls, .csv)
|
||
|
||
# Excel specific information
|
||
sheet_names: Mapped[list] = mapped_column(JSON, nullable=False) # 所有sheet名称列表
|
||
default_sheet: Mapped[str | None] = mapped_column(String(100), nullable=True) # 默认sheet名称
|
||
|
||
# Data preview information
|
||
columns_info: Mapped[dict] = mapped_column(JSON, nullable=False) # 列信息:{sheet_name: [column_names]}
|
||
preview_data: Mapped[dict] = mapped_column(JSON, nullable=False) # 前5行数据:{sheet_name: [[row1], [row2], ...]}
|
||
data_types: Mapped[dict | None] = mapped_column(JSON, nullable=True) # 数据类型信息:{sheet_name: {column: dtype}}
|
||
|
||
# Statistics
|
||
total_rows: Mapped[dict | None] = mapped_column(JSON, nullable=True) # 每个sheet的总行数:{sheet_name: row_count}
|
||
total_columns: Mapped[dict | None] = mapped_column(JSON, nullable=True) # 每个sheet的总列数:{sheet_name: column_count}
|
||
|
||
# Processing status
|
||
is_processed: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False) # 是否已处理
|
||
processing_error: Mapped[str | None] = mapped_column(Text, nullable=True) # 处理错误信息
|
||
|
||
# Upload information
|
||
# upload_time: Mapped[DateTime] = mapped_column(DateTime, default=func.now(), nullable=False) # 上传时间
|
||
last_accessed: Mapped[DateTime | None] = mapped_column(DateTime, nullable=True) # 最后访问时间
|
||
|
||
def __repr__(self):
|
||
return f"<ExcelFile(id={self.id}, filename='{self.original_filename}')>"
|
||
|
||
@property
|
||
def file_size_mb(self):
|
||
"""Get file size in MB."""
|
||
return round(self.file_size / (1024 * 1024), 2)
|
||
|
||
@property
|
||
def sheet_count(self):
|
||
"""Get number of sheets."""
|
||
return len(self.sheet_names) if self.sheet_names else 0
|
||
|
||
def get_sheet_info(self, sheet_name: str = None):
|
||
"""Get information for a specific sheet or default sheet."""
|
||
if not sheet_name:
|
||
sheet_name = self.default_sheet or (self.sheet_names[0] if self.sheet_names else None)
|
||
|
||
if not sheet_name or sheet_name not in self.sheet_names:
|
||
return None
|
||
|
||
return {
|
||
'sheet_name': sheet_name,
|
||
'columns': self.columns_info.get(sheet_name, []) if self.columns_info else [],
|
||
'preview_data': self.preview_data.get(sheet_name, []) if self.preview_data else [],
|
||
'data_types': self.data_types.get(sheet_name, {}) if self.data_types else {},
|
||
'total_rows': self.total_rows.get(sheet_name, 0) if self.total_rows else 0,
|
||
'total_columns': self.total_columns.get(sheet_name, 0) if self.total_columns else 0
|
||
}
|
||
|
||
def get_all_sheets_summary(self):
|
||
"""Get summary information for all sheets."""
|
||
if not self.sheet_names:
|
||
return []
|
||
|
||
summary = []
|
||
for sheet_name in self.sheet_names:
|
||
sheet_info = self.get_sheet_info(sheet_name)
|
||
if sheet_info:
|
||
summary.append({
|
||
'sheet_name': sheet_name,
|
||
'columns_count': len(sheet_info['columns']),
|
||
'rows_count': sheet_info['total_rows'],
|
||
'columns': sheet_info['columns'][:10] # 只显示前10列
|
||
})
|
||
return summary |