# app/models/article.py

from motor.motor_asyncio import AsyncIOMotorClient
from datetime import datetime
from typing import Optional, List, Dict, Any
from bson import ObjectId

class Article:
    def __init__(self, db_client: AsyncIOMotorClient):
        self.db = db_client.web_scraper
        self.collection = self.db.html_pages  # Collection for storing HTML pages
    
    async def create_html_page(self, page_data: Dict[str, Any]) -> str:
        """Create a new HTML page record."""
        page_data["created_at"] = datetime.utcnow()
        result = await self.collection.insert_one(page_data)
        return str(result.inserted_id)
    
    async def get_html_page(self, page_id: str) -> Optional[Dict[str, Any]]:
        """Get an HTML page by ID."""
        if not ObjectId.is_valid(page_id):
            return None
        page = await self.collection.find_one({"_id": ObjectId(page_id)})
        if page:
            page["id"] = str(page["_id"])
            del page["_id"]
        return page
    
    async def get_pages_by_url(self, url: str) -> List[Dict[str, Any]]:
        """Get pages by URL."""
        cursor = self.collection.find({"url": url}).sort("created_at", -1)  # Latest first
        pages = []
        async for page in cursor:
            page["id"] = str(page["_id"])
            del page["_id"]
            pages.append(page)
        return pages
    
    async def get_all_pages(self, limit: int = 50, skip: int = 0) -> List[Dict[str, Any]]:
        """Get all scraped pages with pagination."""
        cursor = self.collection.find().sort("created_at", -1).limit(limit).skip(skip)
        pages = []
        async for page in cursor:
            page["id"] = str(page["_id"])
            del page["_id"]
            pages.append(page)
        return pages
    
    async def get_pages_with_filter(self, filter_query: Dict[str, Any], limit: int = 50, skip: int = 0) -> List[Dict[str, Any]]:
        """Get pages with custom filter."""
        cursor = self.collection.find(filter_query).sort("created_at", -1).limit(limit).skip(skip)
        pages = []
        async for page in cursor:
            page["id"] = str(page["_id"])
            del page["_id"]
            pages.append(page)
        return pages
    
    async def update_html_page(self, page_id: str, update_data: Dict[str, Any]) -> bool:
        """Update an HTML page."""
        if not ObjectId.is_valid(page_id):
            return False
        update_data["updated_at"] = datetime.utcnow()
        result = await self.collection.update_one(
            {"_id": ObjectId(page_id)},
            {"$set": update_data}
        )
        return result.modified_count > 0
    
    async def delete_html_page(self, page_id: str) -> bool:
        """Delete an HTML page."""
        if not ObjectId.is_valid(page_id):
            return False
        result = await self.collection.delete_one({"_id": ObjectId(page_id)})
        return result.deleted_count > 0
    
    async def get_pages_count(self, filter_query: Optional[Dict[str, Any]] = None) -> int:
        """Get count of pages with optional filter."""
        if filter_query is None:
            filter_query = {}
        return await self.collection.count_documents(filter_query)
    
    async def get_unprocessed_pages(self, limit: int = 50) -> List[Dict[str, Any]]:
        """Get pages that haven't been processed with LLM yet."""
        filter_query = {
            "status": "success",
            "$or": [
                {"processed_with_llm": {"$exists": False}},
                {"processed_with_llm": False}
            ]
        }
        return await self.get_pages_with_filter(filter_query, limit=limit)
    
    async def get_llm_processed_pages(self, limit: int = 50, skip: int = 0) -> List[Dict[str, Any]]:
        """Get pages that have been processed with LLM."""
        filter_query = {"processed_with_llm": True}
        return await self.get_pages_with_filter(filter_query, limit=limit, skip=skip)
    
    async def search_by_content(self, search_term: str, limit: int = 20) -> List[Dict[str, Any]]:
        """Search pages by extracted content."""
        # Create text index if it doesn't exist
        try:
            await self.collection.create_index([
                ("llm_extraction.extracted_content.title", "text"),
                ("llm_extraction.extracted_content.body", "text"),
                ("llm_extraction.extracted_content.summary", "text")
            ])
        except:
            pass  # Index might already exist
        
        cursor = self.collection.find(
            {"$text": {"$search": search_term}}
        ).limit(limit)
        
        pages = []
        async for page in cursor:
            page["id"] = str(page["_id"])
            del page["_id"]
            pages.append(page)
        return pages