"""
API endpoints for article management and retrieval
"""
import logging
from datetime import datetime, timedelta
from typing import List, Optional, Dict, Any
from bson import ObjectId

from fastapi import APIRouter, HTTPException, Query, Path
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field

from app.models.article import Article, ArticleResponse, SearchEngine, ArticleStatus
from app.config.settings import get_settings

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/articles", tags=["articles"])


class ArticleFilter(BaseModel):
    """Filter model for article queries"""
    search_query: Optional[str] = None
    search_engine: Optional[SearchEngine] = None
    source_domain: Optional[str] = None
    author: Optional[str] = None
    min_word_count: Optional[int] = None
    max_word_count: Optional[int] = None
    date_from: Optional[datetime] = None
    date_to: Optional[datetime] = None
    is_duplicate: Optional[bool] = None


class ArticleListResponse(BaseModel):
    """Response model for article list"""
    articles: List[ArticleResponse]
    total_count: int
    page: int
    page_size: int
    total_pages: int


class ArticleStatsResponse(BaseModel):
    """Response model for article statistics"""
    total_articles: int
    articles_by_engine: Dict[str, int]
    articles_by_domain: Dict[str, int]
    articles_by_status: Dict[str, int]
    duplicates_count: int
    avg_word_count: float
    total_word_count: int
    articles_today: int
    articles_this_week: int
    articles_this_month: int


@router.get("/", response_model=ArticleListResponse)
async def list_articles(
    page: int = Query(1, ge=1, description="Page number"),
    page_size: int = Query(20, ge=1, le=100, description="Number of articles per page"),
    search_query: Optional[str] = Query(None, description="Filter by search query"),
    search_engine: Optional[SearchEngine] = Query(None, description="Filter by search engine"),
    source_domain: Optional[str] = Query(None, description="Filter by source domain"),
    author: Optional[str] = Query(None, description="Filter by author"),
    min_word_count: Optional[int] = Query(None, ge=0, description="Minimum word count"),
    max_word_count: Optional[int] = Query(None, ge=0, description="Maximum word count"),
    date_from: Optional[datetime] = Query(None, description="Filter articles from this date"),
    date_to: Optional[datetime] = Query(None, description="Filter articles to this date"),
    is_duplicate: Optional[bool] = Query(None, description="Filter duplicates"),
    sort_by: str = Query("scraped_date", description="Sort field"),
    sort_order: str = Query("desc", regex="^(asc|desc)$", description="Sort order")
):
    """
    Get a paginated list of articles with optional filtering
    """
    try:
        # Build query filter
        query_filter = {}
        
        if search_query:
            query_filter['search_query__icontains'] = search_query
        
        if search_engine:
            query_filter['search_engine'] = search_engine.value
        
        if source_domain:
            query_filter['source_domain__icontains'] = source_domain
        
        if author:
            query_filter['author__icontains'] = author
        
        if min_word_count is not None:
            query_filter['word_count__gte'] = min_word_count
        
        if max_word_count is not None:
            query_filter['word_count__lte'] = max_word_count
        
        if date_from:
            query_filter['scraped_date__gte'] = date_from
        
        if date_to:
            query_filter['scraped_date__lte'] = date_to
        
        if is_duplicate is not None:
            query_filter['is_duplicate'] = is_duplicate
        
        # Calculate pagination
        skip = (page - 1) * page_size
        
        # Build sort order
        sort_field = sort_by
        if sort_order == "desc":
            sort_field = f"-{sort_by}"
        
        # Get articles
        articles_query = Article.objects(**query_filter)
        total_count = articles_query.count()
        articles = articles_query.order_by(sort_field).skip(skip).limit(page_size)
        
        # Convert to response format
        article_responses = []
        for article in articles:
            article_responses.append(ArticleResponse(
                id=str(article.id),
                title=article.title,
                url=article.url,
                content=article.content,
                snippet=article.snippet,
                author=article.author,
                published_date=article.published_date,
                scraped_date=article.scraped_date,
                source_domain=article.source_domain,
                search_query=article.search_query,
                search_engine=article.search_engine,
                search_rank=article.search_rank,
                word_count=article.word_count,
                reading_time=article.reading_time,
                images=article.images,
                status=article.status,
                is_duplicate=article.is_duplicate,
                duplicate_of=article.duplicate_of,
                content_hash=article.content_hash,
                quality_score=article.quality_score,
                sentiment_score=article.sentiment_score,
                language=article.language,
                tags=article.tags,
                social_shares=article.social_shares,
                social_comments=article.social_comments,
                social_likes=article.social_likes
            ))
        
        # Calculate total pages
        total_pages = (total_count + page_size - 1) // page_size
        
        return ArticleListResponse(
            articles=article_responses,
            total_count=total_count,
            page=page,
            page_size=page_size,
            total_pages=total_pages
        )
        
    except Exception as e:
        logger.error(f"Error listing articles: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to list articles: {str(e)}")


@router.get("/{article_id}", response_model=ArticleResponse)
async def get_article(
    article_id: str = Path(..., description="Article ID")
):
    """
    Get a specific article by ID
    """
    try:
        # Validate ObjectId
        if not ObjectId.is_valid(article_id):
            raise HTTPException(status_code=400, detail="Invalid article ID format")
        
        article = Article.objects(id=article_id).first()
        
        if not article:
            raise HTTPException(status_code=404, detail="Article not found")
        
        return ArticleResponse(
            id=str(article.id),
            title=article.title,
            url=article.url,
            content=article.content,
            snippet=article.snippet,
            author=article.author,
            published_date=article.published_date,
            scraped_date=article.scraped_date,
            source_domain=article.source_domain,
            search_query=article.search_query,
            search_engine=article.search_engine,
            search_rank=article.search_rank,
            word_count=article.word_count,
            reading_time=article.reading_time,
            images=article.images,
            status=article.status,
            is_duplicate=article.is_duplicate,
            duplicate_of=article.duplicate_of,
            content_hash=article.content_hash,
            quality_score=article.quality_score,
            sentiment_score=article.sentiment_score,
            language=article.language,
            tags=article.tags,
            social_shares=article.social_shares,
            social_comments=article.social_comments,
            social_likes=article.social_likes
        )
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error getting article: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to get article: {str(e)}")


@router.delete("/{article_id}")
async def delete_article(
    article_id: str = Path(..., description="Article ID")
):
    """
    Delete a specific article by ID
    """
    try:
        # Validate ObjectId
        if not ObjectId.is_valid(article_id):
            raise HTTPException(status_code=400, detail="Invalid article ID format")
        
        article = Article.objects(id=article_id).first()
        
        if not article:
            raise HTTPException(status_code=404, detail="Article not found")
        
        article.delete()
        
        logger.info(f"Deleted article {article_id}")
        
        return {"message": "Article deleted successfully", "article_id": article_id}
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error deleting article: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to delete article: {str(e)}")


@router.get("/search/similar/{article_id}")
async def find_similar_articles(
    article_id: str = Path(..., description="Article ID"),
    limit: int = Query(10, ge=1, le=50, description="Maximum number of similar articles")
):
    """
    Find articles similar to the given article
    """
    try:
        # Validate ObjectId
        if not ObjectId.is_valid(article_id):
            raise HTTPException(status_code=400, detail="Invalid article ID format")
        
        article = Article.objects(id=article_id).first()
        
        if not article:
            raise HTTPException(status_code=404, detail="Article not found")
        
        # Find similar articles based on various criteria
        similar_articles = []
        
        # 1. Same search query
        if article.search_query:
            query_similar = Article.objects(
                search_query=article.search_query,
                id__ne=article.id
            ).limit(limit // 2)
            similar_articles.extend(query_similar)
        
        # 2. Same domain
        if article.source_domain:
            domain_similar = Article.objects(
                source_domain=article.source_domain,
                id__ne=article.id
            ).limit(limit // 2)
            similar_articles.extend(domain_similar)
        
        # 3. Same author
        if article.author:
            author_similar = Article.objects(
                author=article.author,
                id__ne=article.id
            ).limit(limit // 4)
            similar_articles.extend(author_similar)
        
        # Remove duplicates and limit results
        seen_ids = set()
        unique_similar = []
        
        for similar in similar_articles:
            if str(similar.id) not in seen_ids:
                unique_similar.append(similar)
                seen_ids.add(str(similar.id))
                
                if len(unique_similar) >= limit:
                    break
        
        # Convert to response format
        similar_responses = []
        for similar in unique_similar:
            similar_responses.append({
                "id": str(similar.id),
                "title": similar.title,
                "url": similar.url,
                "snippet": similar.snippet,
                "source_domain": similar.source_domain,
                "author": similar.author,
                "scraped_date": similar.scraped_date,
                "word_count": similar.word_count,
                "similarity_reason": "same_query" if similar.search_query == article.search_query else
                                  "same_domain" if similar.source_domain == article.source_domain else
                                  "same_author" if similar.author == article.author else "other"
            })
        
        return {
            "article_id": article_id,
            "similar_articles": similar_responses,
            "total_found": len(similar_responses)
        }
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error finding similar articles: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to find similar articles: {str(e)}")


@router.get("/stats/overview", response_model=ArticleStatsResponse)
async def get_article_stats():
    """
    Get comprehensive article statistics
    """
    try:
        # Basic counts
        total_articles = Article.objects.count()
        duplicates_count = Article.objects(is_duplicate=True).count()
        
        # Articles by search engine
        articles_by_engine = {}
        for engine in SearchEngine:
            count = Article.objects(search_engine=engine.value).count()
            if count > 0:
                articles_by_engine[engine.value] = count
        
        # Articles by status
        articles_by_status = {}
        for status in ArticleStatus:
            count = Article.objects(status=status.value).count()
            if count > 0:
                articles_by_status[status.value] = count
        
        # Top domains
        pipeline = [
            {"$group": {"_id": "$source_domain", "count": {"$sum": 1}}},
            {"$sort": {"count": -1}},
            {"$limit": 10}
        ]
        domain_results = Article.objects.aggregate(pipeline)
        articles_by_domain = {result["_id"]: result["count"] for result in domain_results if result["_id"]}
        
        # Word count statistics
        word_count_pipeline = [
            {"$group": {
                "_id": None,
                "avg_word_count": {"$avg": "$word_count"},
                "total_word_count": {"$sum": "$word_count"}
            }}
        ]
        word_stats = list(Article.objects.aggregate(word_count_pipeline))
        avg_word_count = word_stats[0]["avg_word_count"] if word_stats else 0
        total_word_count = word_stats[0]["total_word_count"] if word_stats else 0
        
        # Time-based statistics
        now = datetime.utcnow()
        today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
        week_start = today_start - timedelta(days=7)
        month_start = today_start - timedelta(days=30)
        
        articles_today = Article.objects(scraped_date__gte=today_start).count()
        articles_this_week = Article.objects(scraped_date__gte=week_start).count()
        articles_this_month = Article.objects(scraped_date__gte=month_start).count()
        
        return ArticleStatsResponse(
            total_articles=total_articles,
            articles_by_engine=articles_by_engine,
            articles_by_domain=articles_by_domain,
            articles_by_status=articles_by_status,
            duplicates_count=duplicates_count,
            avg_word_count=round(avg_word_count, 2),
            total_word_count=total_word_count,
            articles_today=articles_today,
            articles_this_week=articles_this_week,
            articles_this_month=articles_this_month
        )
        
    except Exception as e:
        logger.error(f"Error getting article stats: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to get article stats: {str(e)}")


@router.post("/cleanup/duplicates")
async def cleanup_duplicates():
    """
    Remove duplicate articles based on content hash
    """
    try:
        # Find articles with duplicate content hashes
        pipeline = [
            {"$match": {"content_hash": {"$exists": True, "$ne": None}}},
            {"$group": {
                "_id": "$content_hash",
                "articles": {"$push": {"id": "$_id", "scraped_date": "$scraped_date"}},
                "count": {"$sum": 1}
            }},
            {"$match": {"count": {"$gt": 1}}}
        ]
        
        duplicate_groups = list(Article.objects.aggregate(pipeline))
        
        deleted_count = 0
        
        for group in duplicate_groups:
            articles = group["articles"]
            # Sort by scraped_date and keep the first one
            articles.sort(key=lambda x: x["scraped_date"])
            
            # Delete all but the first article
            for article_info in articles[1:]:
                Article.objects(id=article_info["id"]).delete()
                deleted_count += 1
        
        logger.info(f"Cleaned up {deleted_count} duplicate articles")
        
        return {
            "message": "Duplicate cleanup completed",
            "deleted_count": deleted_count,
            "duplicate_groups": len(duplicate_groups)
        }
        
    except Exception as e:
        logger.error(f"Error cleaning up duplicates: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to cleanup duplicates: {str(e)}")


@router.get("/export/csv")
async def export_articles_csv(
    search_query: Optional[str] = Query(None, description="Filter by search query"),
    search_engine: Optional[SearchEngine] = Query(None, description="Filter by search engine"),
    date_from: Optional[datetime] = Query(None, description="Filter articles from this date"),
    date_to: Optional[datetime] = Query(None, description="Filter articles to this date"),
    limit: int = Query(1000, ge=1, le=10000, description="Maximum number of articles to export")
):
    """
    Export articles to CSV format
    """
    try:
        import csv
        import io
        
        # Build query filter
        query_filter = {}
        
        if search_query:
            query_filter['search_query__icontains'] = search_query
        
        if search_engine:
            query_filter['search_engine'] = search_engine.value
        
        if date_from:
            query_filter['scraped_date__gte'] = date_from
        
        if date_to:
            query_filter['scraped_date__lte'] = date_to
        
        # Get articles
        articles = Article.objects(**query_filter).limit(limit)
        
        # Create CSV content
        output = io.StringIO()
        writer = csv.writer(output)
        
        # Write header
        writer.writerow([
            'ID', 'Title', 'URL', 'Author', 'Source Domain', 'Search Query',
            'Search Engine', 'Search Rank', 'Word Count', 'Reading Time',
            'Scraped Date', 'Published Date', 'Status', 'Is Duplicate'
        ])
        
        # Write data
        for article in articles:
            writer.writerow([
                str(article.id),
                article.title or '',
                article.url or '',
                article.author or '',
                article.source_domain or '',
                article.search_query or '',
                article.search_engine or '',
                article.search_rank or '',
                article.word_count or 0,
                article.reading_time or 0,
                article.scraped_date.isoformat() if article.scraped_date else '',
                article.published_date.isoformat() if article.published_date else '',
                article.status or '',
                article.is_duplicate or False
            ])
        
        # Return CSV response
        csv_content = output.getvalue()
        output.close()
        
        return JSONResponse(
            content={"csv_data": csv_content, "total_exported": articles.count()},
            headers={"Content-Type": "application/json"}
        )
        
    except Exception as e:
        logger.error(f"Error exporting articles: {e}")
        raise HTTPException(status_code=500, detail=f"Failed to export articles: {str(e)}")