"""Monitoring service for tracking scraper performance and metrics."""

import logging
from typing import Dict, Any, List
from datetime import datetime, timedelta
from dataclasses import dataclass, field
from collections import defaultdict, deque
import threading
import time

logger = logging.getLogger(__name__)

@dataclass
class ScraperMetrics:
    """Metrics for a single scraper."""
    engine: str
    total_requests: int = 0
    successful_requests: int = 0
    failed_requests: int = 0
    total_results: int = 0
    average_response_time: float = 0.0
    last_used: datetime = field(default_factory=datetime.now)
    error_rate: float = 0.0
    
    def success_rate(self) -> float:
        """Calculate success rate."""
        if self.total_requests == 0:
            return 0.0
        return (self.successful_requests / self.total_requests) * 100

@dataclass
class SystemMetrics:
    """System-wide metrics."""
    total_tasks: int = 0
    completed_tasks: int = 0
    failed_tasks: int = 0
    pending_tasks: int = 0
    cache_hits: int = 0
    cache_misses: int = 0
    average_task_time: float = 0.0
    uptime: float = 0.0
    
    def completion_rate(self) -> float:
        """Calculate task completion rate."""
        if self.total_tasks == 0:
            return 0.0
        return (self.completed_tasks / self.total_tasks) * 100
    
    def cache_hit_rate(self) -> float:
        """Calculate cache hit rate."""
        total_cache_requests = self.cache_hits + self.cache_misses
        if total_cache_requests == 0:
            return 0.0
        return (self.cache_hits / total_cache_requests) * 100

class MonitoringService:
    """Service for monitoring scraper performance and system metrics."""
    
    def __init__(self):
        self.scraper_metrics: Dict[str, ScraperMetrics] = {}
        self.system_metrics = SystemMetrics()
        self.response_times: Dict[str, deque] = defaultdict(lambda: deque(maxlen=100))
        self.error_logs: deque = deque(maxlen=1000)
        self.start_time = datetime.now()
        self.lock = threading.Lock()
        
        # Real-time monitoring
        self.monitoring_enabled = True
        self.monitoring_thread = threading.Thread(target=self._monitor_loop, daemon=True)
        self.monitoring_thread.start()
    
    def record_request(self, engine: str, success: bool, response_time: float, 
                      result_count: int = 0, error: str = None):
        """Record a scraper request."""
        with self.lock:
            if engine not in self.scraper_metrics:
                self.scraper_metrics[engine] = ScraperMetrics(engine=engine)
            
            metrics = self.scraper_metrics[engine]
            metrics.total_requests += 1
            metrics.last_used = datetime.now()
            
            if success:
                metrics.successful_requests += 1
                metrics.total_results += result_count
                
                # Update response time
                self.response_times[engine].append(response_time)
                metrics.average_response_time = sum(self.response_times[engine]) / len(self.response_times[engine])
                
            else:
                metrics.failed_requests += 1
                if error:
                    self.error_logs.append({
                        'timestamp': datetime.now(),
                        'engine': engine,
                        'error': error
                    })
            
            # Update error rate
            metrics.error_rate = (metrics.failed_requests / metrics.total_requests) * 100
    
    def record_task(self, status: str, task_time: float = 0.0):
        """Record a task completion."""
        with self.lock:
            self.system_metrics.total_tasks += 1
            
            if status == 'completed':
                self.system_metrics.completed_tasks += 1
                
                # Update average task time
                current_avg = self.system_metrics.average_task_time
                completed = self.system_metrics.completed_tasks
                self.system_metrics.average_task_time = ((current_avg * (completed - 1)) + task_time) / completed
                
            elif status == 'failed':
                self.system_metrics.failed_tasks += 1
            elif status == 'pending':
                self.system_metrics.pending_tasks += 1
    
    def record_cache_hit(self, hit: bool):
        """Record cache hit/miss."""
        with self.lock:
            if hit:
                self.system_metrics.cache_hits += 1
            else:
                self.system_metrics.cache_misses += 1
    
    def get_scraper_metrics(self, engine: str = None) -> Dict[str, Any]:
        """Get metrics for specific scraper or all scrapers."""
        with self.lock:
            if engine:
                if engine in self.scraper_metrics:
                    metrics = self.scraper_metrics[engine]
                    return {
                        'engine': metrics.engine,
                        'total_requests': metrics.total_requests,
                        'successful_requests': metrics.successful_requests,
                        'failed_requests': metrics.failed_requests,
                        'success_rate': metrics.success_rate(),
                        'error_rate': metrics.error_rate,
                        'total_results': metrics.total_results,
                        'average_response_time': metrics.average_response_time,
                        'last_used': metrics.last_used.isoformat()
                    }
                return {}
            
            # Return all scrapers
            return {
                engine: {
                    'engine': metrics.engine,
                    'total_requests': metrics.total_requests,
                    'successful_requests': metrics.successful_requests,
                    'failed_requests': metrics.failed_requests,
                    'success_rate': metrics.success_rate(),
                    'error_rate': metrics.error_rate,
                    'total_results': metrics.total_results,
                    'average_response_time': metrics.average_response_time,
                    'last_used': metrics.last_used.isoformat()
                }
                for engine, metrics in self.scraper_metrics.items()
            }
    
    def get_system_metrics(self) -> Dict[str, Any]:
        """Get system-wide metrics."""
        with self.lock:
            self.system_metrics.uptime = (datetime.now() - self.start_time).total_seconds()
            
            return {
                'total_tasks': self.system_metrics.total_tasks,
                'completed_tasks': self.system_metrics.completed_tasks,
                'failed_tasks': self.system_metrics.failed_tasks,
                'pending_tasks': self.system_metrics.pending_tasks,
                'completion_rate': self.system_metrics.completion_rate(),
                'cache_hits': self.system_metrics.cache_hits,
                'cache_misses': self.system_metrics.cache_misses,
                'cache_hit_rate': self.system_metrics.cache_hit_rate(),
                'average_task_time': self.system_metrics.average_task_time,
                'uptime_seconds': self.system_metrics.uptime,
                'uptime_human': str(timedelta(seconds=int(self.system_metrics.uptime)))
            }
    
    def get_error_logs(self, limit: int = 100) -> List[Dict[str, Any]]:
        """Get recent error logs."""
        with self.lock:
            return list(self.error_logs)[-limit:]
    
    def get_performance_report(self) -> Dict[str, Any]:
        """Get comprehensive performance report."""
        with self.lock:
            report = {
                'timestamp': datetime.now().isoformat(),
                'system_metrics': self.get_system_metrics(),
                'scraper_metrics': self.get_scraper_metrics(),
                'top_performers': self._get_top_performers(),
                'health_status': self._get_health_status(),
                'recent_errors': self.get_error_logs(50)
            }
            
            return report
    
    def _get_top_performers(self) -> Dict[str, Any]:
        """Get top performing scrapers."""
        if not self.scraper_metrics:
            return {}
        
        # Sort by success rate and result count
        sorted_scrapers = sorted(
            self.scraper_metrics.items(),
            key=lambda x: (x[1].success_rate(), x[1].total_results),
            reverse=True
        )
        
        return {
            'best_success_rate': sorted_scrapers[0][0] if sorted_scrapers else None,
            'most_results': max(
                self.scraper_metrics.items(),
                key=lambda x: x[1].total_results,
                default=(None, None)
            )[0],
            'fastest_response': min(
                self.scraper_metrics.items(),
                key=lambda x: x[1].average_response_time,
                default=(None, None)
            )[0]
        }
    
    def _get_health_status(self) -> Dict[str, Any]:
        """Get overall health status."""
        system_metrics = self.get_system_metrics()
        
        # Determine health based on metrics
        health_score = 100.0
        issues = []
        
        # Check completion rate
        if system_metrics['completion_rate'] < 80:
            health_score -= 20
            issues.append(f"Low completion rate: {system_metrics['completion_rate']:.1f}%")
        
        # Check cache hit rate
        if system_metrics['cache_hit_rate'] < 50:
            health_score -= 10
            issues.append(f"Low cache hit rate: {system_metrics['cache_hit_rate']:.1f}%")
        
        # Check scraper performance
        for engine, metrics in self.scraper_metrics.items():
            if metrics.error_rate > 30:
                health_score -= 15
                issues.append(f"{engine} high error rate: {metrics.error_rate:.1f}%")
        
        # Determine status
        if health_score >= 90:
            status = "healthy"
        elif health_score >= 70:
            status = "warning"
        else:
            status = "critical"
        
        return {
            'status': status,
            'health_score': max(0, health_score),
            'issues': issues
        }
    
    def reset_metrics(self):
        """Reset all metrics."""
        with self.lock:
            self.scraper_metrics.clear()
            self.system_metrics = SystemMetrics()
            self.response_times.clear()
            self.error_logs.clear()
            self.start_time = datetime.now()
            logger.info("Metrics reset")
    
    def _monitor_loop(self):
        """Background monitoring loop."""
        while self.monitoring_enabled:
            try:
                # Log periodic health check
                health = self._get_health_status()
                if health['status'] != 'healthy':
                    logger.warning(f"Health check: {health['status']} - {health['issues']}")
                
                # Sleep for monitoring interval
                time.sleep(60)  # Check every minute
                
            except Exception as e:
                logger.error(f"Error in monitoring loop: {e}")
                time.sleep(5)
    
    def stop_monitoring(self):
        """Stop the monitoring service."""
        self.monitoring_enabled = False
        if self.monitoring_thread.is_alive():
            self.monitoring_thread.join()
        logger.info("Monitoring service stopped")

# Global monitoring service instance
monitoring_service = MonitoringService()
