"""Knowledge base models.""" from typing import Optional from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy import String, Integer, Text, Boolean, JSON from ..db.base import BaseModel class KnowledgeBase(BaseModel): """Knowledge base model.""" __tablename__ = "knowledge_bases" name: Mapped[str] = mapped_column(String(100), unique=False, index=True, nullable=False) description: Mapped[Optional[str]] = mapped_column(Text, nullable=True) embedding_model: Mapped[str] = mapped_column(String(100), nullable=False, default="sentence-transformers/all-MiniLM-L6-v2") chunk_size: Mapped[int] = mapped_column(Integer, nullable=False, default=1000) chunk_overlap: Mapped[int] = mapped_column(Integer, nullable=False, default=200) is_active: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False) # Vector database settings vector_db_type: Mapped[str] = mapped_column(String(50), nullable=False, default="chroma") collection_name: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) # For vector DB collection # Relationships removed to eliminate foreign key constraints def __repr__(self): return f"" # Relationships are commented out to remove foreign key constraints, so these properties should be updated # @property # def document_count(self): # """Get the number of documents in this knowledge base.""" # return len(self.documents) # @property # def active_document_count(self): # """Get the number of active documents in this knowledge base.""" # return len([doc for doc in self.documents if doc.is_processed]) class Document(BaseModel): """Document model.""" __tablename__ = "documents" knowledge_base_id: Mapped[int] = mapped_column(Integer, nullable=False) # Removed ForeignKey("knowledge_bases.id") filename: Mapped[str] = mapped_column(String(255), nullable=False) original_filename: Mapped[str] = mapped_column(String(255), nullable=False) file_path: Mapped[str] = mapped_column(String(500), nullable=False) file_size: Mapped[int] = mapped_column(Integer, nullable=False) # in bytes file_type: Mapped[str] = mapped_column(String(50), nullable=False) # .pdf, .txt, .docx, etc. mime_type: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) # Processing status is_processed: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False) processing_error: Mapped[Optional[str]] = mapped_column(Text, nullable=True) # Content and metadata content: Mapped[Optional[str]] = mapped_column(Text, nullable=True) # Extracted text content doc_metadata: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True) # Additional metadata # Chunking information chunk_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False) # Embedding information embedding_model: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) vector_ids: Mapped[Optional[list]] = mapped_column(JSON, nullable=True) # Store vector database IDs for chunks # Relationships removed to eliminate foreign key constraints def __repr__(self): return f"" @property def file_size_mb(self): """Get file size in MB.""" return round(self.file_size / (1024 * 1024), 2) @property def is_text_file(self): """Check if document is a text file.""" return self.file_type.lower() in ['.txt', '.md', '.csv'] @property def is_pdf_file(self): """Check if document is a PDF file.""" return self.file_type.lower() == '.pdf' @property def is_office_file(self): """Check if document is an Office file.""" return self.file_type.lower() in ['.docx', '.xlsx', '.pptx']