"""High-performance multi-threaded Selenium scraper manager with enhanced error handling."""

import logging
import asyncio
import time
import concurrent.futures
from typing import List, Dict, Optional, Any
from threading import Lock
import multiprocessing as mp
import traceback

from .google_fast import GoogleFastScraper
from .bing_fast import BingFastScraper
from .duckduckgo_fast import DuckDuckGoFastScraper
from .yandex_fast import YandexFastScraper
from .baidu_fast import BaiduFastScraper

from src.models.schemas import SearchResult, ScrapeRequest
from src.config.settings import MAX_WORKERS
from src.services.cache_service import cache_service
from src.services.monitoring_service import monitoring_service

logger = logging.getLogger(__name__)

class MultiEngineManager:
    """High-performance manager for parallel Selenium scrapers with enhanced error handling."""
    
    def __init__(self, use_cache: bool = True, headless: bool = True, max_workers: int = None):
        self.use_cache = use_cache
        self.headless = headless
        self.max_workers = max_workers or min(MAX_WORKERS, mp.cpu_count())
        self._lock = Lock()
        
        # Engine classes
        self.engine_classes = {
            "google": GoogleFastScraper,
            "bing": BingFastScraper,
            "duckduckgo": DuckDuckGoFastScraper,
            "yandex": YandexFastScraper,
            "baidu": BaiduFastScraper,
        }
        
        # Performance tracking
        self.stats = {
            'total_searches': 0,
            'total_results': 0,
            'total_time': 0,
            'total_errors': 0,
            'engine_stats': {}
        }
        
        # Thread pool for concurrent scraping
        self.executor = concurrent.futures.ThreadPoolExecutor(
            max_workers=self.max_workers,
            thread_name_prefix="selenium-scraper"
        )
        
        # Cache service
        self.cache_service = cache_service if use_cache else None
        self.monitoring_service = monitoring_service
        
        logger.info(f"Initialized MultiEngineManager with {self.max_workers} workers")
    
    def get_available_engines(self) -> List[str]:
        """Get list of available engines."""
        return list(self.engine_classes.keys())
    
    def get_engine_descriptions(self) -> Dict[str, str]:
        """Get engine descriptions."""
        return {
            "google": "Google - Most popular search engine (Fast Selenium)",
            "bing": "Microsoft Bing (Fast Selenium)",
            "duckduckgo": "DuckDuckGo - Privacy focused (Fast Selenium)",
            "yandex": "Yandex - Russian search engine (Fast Selenium)",
            "baidu": "Baidu - Chinese search engine (Fast Selenium)",
            "all": "Search across all available engines"
        }
    
    def _resolve_engines(self, engines: List[str]) -> List[str]:
        """Resolve engine list."""
        if "all" in engines:
            return list(self.engine_classes.keys())
        return [e for e in engines if e in self.engine_classes]
    
    def _scrape_single_engine_worker(self, engine_name: str, query: str, max_results: int) -> Dict[str, Any]:
        """Worker function for single engine scraping with enhanced error handling."""
        start_time = time.time()
        results = []
        error = None
        error_type = None
        
        try:
            # Create scraper instance
            scraper_class = self.engine_classes[engine_name]
            scraper = None
            
            try:
                scraper = scraper_class(headless=self.headless)
                logger.debug(f"Created {engine_name} scraper instance")
                
                # Perform search with timeout
                results = scraper.search(query, max_results)
                
                logger.info(f"{engine_name}: Found {len(results)} results in {time.time() - start_time:.2f}s")
                
            except Exception as search_error:
                error_type = "search_error"
                error = f"Search failed: {str(search_error)}"
                logger.error(f"Search error in {engine_name}: {error}")
                
            finally:
                # Always close the scraper
                if scraper:
                    try:
                        scraper.close()
                    except Exception as close_error:
                        logger.warning(f"Error closing {engine_name} scraper: {close_error}")
                
        except Exception as init_error:
            error_type = "initialization_error"
            error = f"Failed to initialize scraper: {str(init_error)}"
            logger.error(f"Initialization error for {engine_name}: {error}")
            logger.debug(f"Full traceback: {traceback.format_exc()}")
        
        execution_time = time.time() - start_time
        
        return {
            'engine': engine_name,
            'results': results,
            'error': error,
            'error_type': error_type,
            'execution_time': execution_time,
            'result_count': len(results),
            'success': error is None
        }
    
    async def scrape_multiple_engines_parallel(self, request: ScrapeRequest) -> List[SearchResult]:
        """Scrape multiple engines in parallel using thread pool with enhanced error handling."""
        start_time = time.time()
        engines = self._resolve_engines(request.engines)
        
        if not engines:
            logger.warning("No valid engines specified")
            return []
        
        # Check cache first
        if self.use_cache and self.cache_service:
            try:
                cached_results = self.cache_service.get_cached_results(
                    request.query, engines, request.max_results
                )
                if cached_results:
                    logger.info(f"Cache hit for query: '{request.query}'")
                    return cached_results
            except Exception as e:
                logger.warning(f"Cache check failed: {e}")
        
        logger.info(f"Scraping {len(engines)} engines in parallel: {engines}")
        
        # Submit all scraping tasks to thread pool
        loop = asyncio.get_event_loop()
        tasks = []
        
        for engine in engines:
            task = loop.run_in_executor(
                self.executor,
                self._scrape_single_engine_worker,
                engine,
                request.query,
                request.max_results
            )
            tasks.append(task)
        
        # Wait for all tasks to complete
        all_results = []
        engine_results = []
        successful_engines = []
        failed_engines = []
        
        try:
            completed_results = await asyncio.gather(*tasks, return_exceptions=True)
            
            for result in completed_results:
                if isinstance(result, Exception):
                    logger.error(f"Task failed with exception: {result}")
                    self.stats['total_errors'] += 1
                    continue
                
                engine_results.append(result)
                
                if result['error']:
                    failed_engines.append({
                        'engine': result['engine'],
                        'error': result['error'],
                        'error_type': result['error_type']
                    })
                    logger.error(f"Engine {result['engine']} failed: {result['error']}")
                else:
                    successful_engines.append(result['engine'])
                    all_results.extend(result['results'])
                    
                    # Update stats
                    with self._lock:
                        engine_name = result['engine']
                        if engine_name not in self.stats['engine_stats']:
                            self.stats['engine_stats'][engine_name] = {
                                'searches': 0, 'results': 0, 'total_time': 0, 'errors': 0
                            }
                        
                        self.stats['engine_stats'][engine_name]['searches'] += 1
                        self.stats['engine_stats'][engine_name]['results'] += result['result_count']
                        self.stats['engine_stats'][engine_name]['total_time'] += result['execution_time']
                        
                        if not result['success']:
                            self.stats['engine_stats'][engine_name]['errors'] += 1
                
        except Exception as e:
            logger.error(f"Error in parallel scraping: {e}")
            self.stats['total_errors'] += 1
        
        # Sort results by engine and position
        all_results.sort(key=lambda x: (x.engine, x.position))
        
        # Cache results if enabled and we have results
        if self.use_cache and self.cache_service and all_results:
            try:
                self.cache_service.cache_results(
                    request.query, engines, request.max_results, all_results
                )
            except Exception as e:
                logger.warning(f"Cache storage failed: {e}")
        
        # Update global stats
        total_time = time.time() - start_time
        with self._lock:
            self.stats['total_searches'] += 1
            self.stats['total_results'] += len(all_results)
            self.stats['total_time'] += total_time
        
        # Log summary
        if successful_engines:
            logger.info(f"Parallel scraping completed: {len(all_results)} results from {len(successful_engines)} engines in {total_time:.2f}s")
        if failed_engines:
            logger.warning(f"Failed engines: {[e['engine'] for e in failed_engines]}")
        
        return all_results
    
    async def scrape_multiple_engines_sequential(self, request: ScrapeRequest) -> List[SearchResult]:
        """Scrape multiple engines sequentially (fallback method)."""
        start_time = time.time()
        engines = self._resolve_engines(request.engines)
        all_results = []
        
        for engine in engines:
            try:
                result = self._scrape_single_engine_worker(engine, request.query, request.max_results)
                if not result['error']:
                    all_results.extend(result['results'])
                else:
                    logger.warning(f"Sequential scraping failed for {engine}: {result['error']}")
            except Exception as e:
                logger.error(f"Sequential scraping failed for {engine}: {e}")
        
        logger.info(f"Sequential scraping completed: {len(all_results)} results in {time.time() - start_time:.2f}s")
        return all_results
    
    def get_performance_stats(self) -> Dict[str, Any]:
        """Get performance statistics with error tracking."""
        with self._lock:
            stats = self.stats.copy()
        
        # Calculate averages
        if stats['total_searches'] > 0:
            stats['avg_results_per_search'] = stats['total_results'] / stats['total_searches']
            stats['avg_time_per_search'] = stats['total_time'] / stats['total_searches']
            stats['error_rate'] = stats['total_errors'] / stats['total_searches']
        else:
            stats['avg_results_per_search'] = 0
            stats['avg_time_per_search'] = 0
            stats['error_rate'] = 0
        
        # Engine-specific averages
        for engine, engine_stats in stats['engine_stats'].items():
            if engine_stats['searches'] > 0:
                engine_stats['avg_results'] = engine_stats['results'] / engine_stats['searches']
                engine_stats['avg_time'] = engine_stats['total_time'] / engine_stats['searches']
                engine_stats['error_rate'] = engine_stats.get('errors', 0) / engine_stats['searches']
            else:
                engine_stats['avg_results'] = 0
                engine_stats['avg_time'] = 0
                engine_stats['error_rate'] = 0
        
        return stats
    
    def reset_stats(self):
        """Reset performance statistics."""
        with self._lock:
            self.stats = {
                'total_searches': 0,
                'total_results': 0,
                'total_time': 0,
                'total_errors': 0,
                'engine_stats': {}
            }
    
    def test_engine_performance(self, test_query: str = "test search") -> Dict[str, Any]:
        """Test performance of all engines with detailed error reporting."""
        logger.info("Starting engine performance test...")
        
        results = {}
        
        for engine_name in self.get_available_engines():
            logger.info(f"Testing {engine_name}...")
            
            try:
                result = self._scrape_single_engine_worker(engine_name, test_query, 5)
                results[engine_name] = {
                    'success': result['success'],
                    'execution_time': result['execution_time'],
                    'result_count': result['result_count'],
                    'error': result['error'],
                    'error_type': result.get('error_type')
                }
            except Exception as e:
                results[engine_name] = {
                    'success': False,
                    'execution_time': 0,
                    'result_count': 0,
                    'error': str(e),
                    'error_type': 'unexpected_error'
                }
        
        logger.info("Engine performance test completed")
        return results
    
    def get_engine_health(self) -> Dict[str, Dict[str, Any]]:
        """Get detailed health status of all engines."""
        health = {}
        
        for engine_name in self.get_available_engines():
            try:
                # Quick health check
                start_time = time.time()
                result = self._scrape_single_engine_worker(engine_name, "health check", 1)
                response_time = time.time() - start_time
                
                health[engine_name] = {
                    'healthy': result['success'],
                    'response_time': response_time,
                    'last_error': result['error'],
                    'error_type': result.get('error_type')
                }
                
            except Exception as e:
                health[engine_name] = {
                    'healthy': False,
                    'response_time': None,
                    'last_error': str(e),
                    'error_type': 'health_check_failed'
                }
        
        return health
    
    def close(self):
        """Close the manager and all resources."""
        try:
            # Shutdown executor with timeout
            self.executor.shutdown(wait=True, timeout=30)
            logger.info("MultiEngineManager closed successfully")
        except Exception as e:
            logger.error(f"Error closing MultiEngineManager: {e}")

# Global instance
multi_engine_manager = MultiEngineManager(use_cache=True, headless=True)
