87 lines
4.1 KiB
Python
87 lines
4.1 KiB
Python
|
|
"""Excel file models for smart query."""
|
|||
|
|
|
|||
|
|
from datetime import datetime
|
|||
|
|
from typing import Optional
|
|||
|
|
from sqlalchemy.orm import Mapped, mapped_column
|
|||
|
|
from sqlalchemy import String, Integer, Text, Boolean, JSON, DateTime
|
|||
|
|
from sqlalchemy.sql import func
|
|||
|
|
|
|||
|
|
from ..db.base import BaseModel
|
|||
|
|
|
|||
|
|
class ExcelFile(BaseModel):
|
|||
|
|
"""Excel file model for storing file metadata."""
|
|||
|
|
__tablename__ = "excel_files"
|
|||
|
|
# Basic file information
|
|||
|
|
# user_id: Mapped[int] = mapped_column(Integer, nullable=False) # 用户ID
|
|||
|
|
original_filename: Mapped[str] = mapped_column(String(255), nullable=False) # 原始文件名
|
|||
|
|
file_path: Mapped[str] = mapped_column(String(500), nullable=False) # 文件存储路径
|
|||
|
|
file_size: Mapped[int] = mapped_column(Integer, nullable=False) # 文件大小(字节)
|
|||
|
|
file_type: Mapped[str] = mapped_column(String(50), nullable=False) # 文件类型 (.xlsx, .xls, .csv)
|
|||
|
|
|
|||
|
|
# Excel specific information
|
|||
|
|
sheet_names: Mapped[list] = mapped_column(JSON, nullable=False) # 所有sheet名称列表
|
|||
|
|
default_sheet: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) # 默认sheet名称
|
|||
|
|
|
|||
|
|
# Data preview information
|
|||
|
|
columns_info: Mapped[dict] = mapped_column(JSON, nullable=False) # 列信息:{sheet_name: [column_names]}
|
|||
|
|
preview_data: Mapped[dict] = mapped_column(JSON, nullable=False) # 前5行数据:{sheet_name: [[row1], [row2], ...]}
|
|||
|
|
data_types: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True) # 数据类型信息:{sheet_name: {column: dtype}}
|
|||
|
|
|
|||
|
|
# Statistics
|
|||
|
|
total_rows: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True) # 每个sheet的总行数:{sheet_name: row_count}
|
|||
|
|
total_columns: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True) # 每个sheet的总列数:{sheet_name: column_count}
|
|||
|
|
|
|||
|
|
# Processing status
|
|||
|
|
is_processed: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False) # 是否已处理
|
|||
|
|
processing_error: Mapped[Optional[str]] = mapped_column(Text, nullable=True) # 处理错误信息
|
|||
|
|
|
|||
|
|
# Upload information
|
|||
|
|
# upload_time: Mapped[DateTime] = mapped_column(DateTime, default=func.now(), nullable=False) # 上传时间
|
|||
|
|
last_accessed: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) # 最后访问时间
|
|||
|
|
|
|||
|
|
def __repr__(self):
|
|||
|
|
return f"<ExcelFile(id={self.id}, filename='{self.original_filename}')>"
|
|||
|
|
|
|||
|
|
@property
|
|||
|
|
def file_size_mb(self):
|
|||
|
|
"""Get file size in MB."""
|
|||
|
|
return round(self.file_size / (1024 * 1024), 2)
|
|||
|
|
|
|||
|
|
@property
|
|||
|
|
def sheet_count(self):
|
|||
|
|
"""Get number of sheets."""
|
|||
|
|
return len(self.sheet_names) if self.sheet_names else 0
|
|||
|
|
|
|||
|
|
def get_sheet_info(self, sheet_name: str = None):
|
|||
|
|
"""Get information for a specific sheet or default sheet."""
|
|||
|
|
if not sheet_name:
|
|||
|
|
sheet_name = self.default_sheet or (self.sheet_names[0] if self.sheet_names else None)
|
|||
|
|
|
|||
|
|
if not sheet_name or sheet_name not in self.sheet_names:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
return {
|
|||
|
|
'sheet_name': sheet_name,
|
|||
|
|
'columns': self.columns_info.get(sheet_name, []) if self.columns_info else [],
|
|||
|
|
'preview_data': self.preview_data.get(sheet_name, []) if self.preview_data else [],
|
|||
|
|
'data_types': self.data_types.get(sheet_name, {}) if self.data_types else {},
|
|||
|
|
'total_rows': self.total_rows.get(sheet_name, 0) if self.total_rows else 0,
|
|||
|
|
'total_columns': self.total_columns.get(sheet_name, 0) if self.total_columns else 0
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def get_all_sheets_summary(self):
|
|||
|
|
"""Get summary information for all sheets."""
|
|||
|
|
if not self.sheet_names:
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
summary = []
|
|||
|
|
for sheet_name in self.sheet_names:
|
|||
|
|
sheet_info = self.get_sheet_info(sheet_name)
|
|||
|
|
if sheet_info:
|
|||
|
|
summary.append({
|
|||
|
|
'sheet_name': sheet_name,
|
|||
|
|
'columns_count': len(sheet_info['columns']),
|
|||
|
|
'rows_count': sheet_info['total_rows'],
|
|||
|
|
'columns': sheet_info['columns'][:10] # 只显示前10列
|
|||
|
|
})
|
|||
|
|
return summary
|