"""Enhanced scraper manager with Google and all search engines."""

import logging
import asyncio
from typing import List, Dict, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

from .google_scraper import GoogleScraper
from .duckduckgo_scraper import DuckDuckGoScraper
from .bing_scraper import BingScraper
from .yandex_scraper import YandexScraper
from .startpage_scraper import StartpageScraper
from .brave_scraper import BraveScraper
from .searx_scraper import SearxScraper
from .mock_scraper import MockScraper
from ..models.schemas import SearchResult, ScrapeRequest
from ..config.settings import AVAILABLE_ENGINES, MAX_WORKERS
from ..services.cache_service import cache_service
from ..services.monitoring_service import monitoring_service

logger = logging.getLogger(__name__)

class ScraperManager:
    """Enhanced manager for coordinating multiple search engine scrapers."""
    
    def __init__(self, use_mock: bool = False, use_cache: bool = True):
        self.use_cache = use_cache
        self.cache_service = cache_service
        self.monitoring_service = monitoring_service
        
        if use_mock:
            # Use mock scrapers for testing
            self.scrapers = {
                "google": MockScraper("google"),
                "duckduckgo": MockScraper("duckduckgo"),
                "bing": MockScraper("bing"),
                "yandex": MockScraper("yandex"),
                "startpage": MockScraper("startpage"),
                "searx": MockScraper("searx"),
                "brave": MockScraper("brave"),
            }
        else:
            # Use real scrapers
            self.scrapers = {
                "google": GoogleScraper(),
                "duckduckgo": DuckDuckGoScraper(),
                "bing": BingScraper(),
                "yandex": YandexScraper(),
                "startpage": StartpageScraper(),
                "brave": BraveScraper(),
                "searx": SearxScraper(),
            }
        
        self.executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
        
        # Initialize cache if enabled
        if self.use_cache:
            try:
                self.cache_service.connect()
            except Exception as e:
                logger.warning(f"Cache connection failed: {e}")
                self.use_cache = False
    
    def get_available_engines(self) -> List[str]:
        """Get list of available search engines."""
        return list(self.scrapers.keys())
    
    def get_engine_descriptions(self) -> Dict[str, str]:
        """Get descriptions of available engines."""
        return {
            "google": "Google search engine - Most popular search engine",
            "duckduckgo": "DuckDuckGo search engine - Privacy focused",
            "bing": "Microsoft Bing search engine",
            "yandex": "Yandex search engine - Russian search",
            "startpage": "Startpage search engine - Privacy focused",
            "searx": "SearX meta search engine",
            "brave": "Brave search engine",
            "all": "Search across all available engines"
        }
    
    def _resolve_engines(self, engines: List[str]) -> List[str]:
        """Resolve engine list, handling 'all' keyword."""
        if "all" in engines:
            return list(self.scrapers.keys())
        
        # Filter to only available engines
        available_engines = self.get_available_engines()
        return [engine for engine in engines if engine in available_engines]
    
    def _scrape_single_engine(self, engine: str, query: str, max_results: int) -> List[SearchResult]:
        """Scrape a single search engine with monitoring."""
        start_time = time.time()
        
        try:
            if engine not in self.scrapers:
                logger.warning(f"Engine {engine} not available")
                self.monitoring_service.record_request(engine, False, 0, 0, "Engine not available")
                return []
            
            scraper = self.scrapers[engine]
            results = scraper.search(query, max_results)
            
            # Record successful request
            response_time = time.time() - start_time
            self.monitoring_service.record_request(
                engine, True, response_time, len(results)
            )
            
            logger.info(f"Scraped {len(results)} results from {engine} in {response_time:.2f}s")
            return results
            
        except Exception as e:
            response_time = time.time() - start_time
            self.monitoring_service.record_request(
                engine, False, response_time, 0, str(e)
            )
            logger.error(f"Error scraping {engine}: {e}")
            return []
    
    async def scrape_multiple_engines(self, request: ScrapeRequest) -> List[SearchResult]:
        """Scrape multiple search engines with caching and monitoring."""
        start_time = time.time()
        engines = self._resolve_engines(request.engines)
        
        if not engines:
            logger.warning("No valid engines specified")
            return []
        
        # Check cache first
        cached_results = None
        if self.use_cache:
            cached_results = self.cache_service.get_cached_results(
                request.query, engines, request.max_results
            )
            
            if cached_results:
                self.monitoring_service.record_cache_hit(True)
                logger.info(f"Cache hit for query: '{request.query}'")
                return cached_results
            else:
                self.monitoring_service.record_cache_hit(False)
        
        logger.info(f"Scraping engines: {engines} for query: '{request.query}'")
        
        # Use ThreadPoolExecutor to run scrapers concurrently
        loop = asyncio.get_event_loop()
        
        # Submit all scraping tasks
        futures = []
        for engine in engines:
            future = loop.run_in_executor(
                self.executor,
                self._scrape_single_engine,
                engine,
                request.query,
                request.max_results
            )
            futures.append(future)
        
        # Wait for all tasks to complete
        all_results = []
        try:
            completed_results = await asyncio.gather(*futures, return_exceptions=True)
            
            for i, result in enumerate(completed_results):
                if isinstance(result, Exception):
                    logger.error(f"Engine {engines[i]} failed: {result}")
                else:
                    all_results.extend(result)
                    
        except Exception as e:
            logger.error(f"Error in concurrent scraping: {e}")
        
        # Sort results by position within each engine
        all_results.sort(key=lambda x: (x.engine, x.position))
        
        # Cache results if enabled
        if self.use_cache and all_results:
            self.cache_service.cache_results(
                request.query, engines, request.max_results, all_results
            )
        
        # Record task completion
        task_time = time.time() - start_time
        self.monitoring_service.record_task('completed', task_time)
        
        logger.info(f"Total results collected: {len(all_results)} in {task_time:.2f}s")
        return all_results
    
    def get_scraper_stats(self) -> Dict[str, any]:
        """Get statistics for all scrapers."""
        stats = {
            'scrapers': self.monitoring_service.get_scraper_metrics(),
            'system': self.monitoring_service.get_system_metrics(),
            'cache': self.cache_service.get_cache_stats() if self.use_cache else {'enabled': False},
            'health': self.monitoring_service._get_health_status()
        }
        
        return stats
    
    def get_performance_report(self) -> Dict[str, any]:
        """Get comprehensive performance report."""
        return self.monitoring_service.get_performance_report()
    
    def clear_cache(self) -> int:
        """Clear all cached results."""
        if self.use_cache:
            return self.cache_service.invalidate_cache()
        return 0
    
    def reset_metrics(self):
        """Reset all monitoring metrics."""
        self.monitoring_service.reset_metrics()
    
    def add_proxy_to_scraper(self, engine: str, proxy: str):
        """Add proxy to specific scraper (if supported)."""
        if engine in self.scrapers:
            scraper = self.scrapers[engine]
            if hasattr(scraper, 'add_proxy'):
                scraper.add_proxy(proxy)
                logger.info(f"Added proxy to {engine}: {proxy}")
            else:
                logger.warning(f"Scraper {engine} does not support proxy rotation")
        else:
            logger.warning(f"Engine {engine} not found")
    
    def close(self):
        """Close the scraper manager and all resources."""
        self.executor.shutdown(wait=True)
        
        if self.use_cache:
            self.cache_service.disconnect()
        
        self.monitoring_service.stop_monitoring()
        
        logger.info("Scraper manager closed")

# Global scraper manager instance
# Set use_mock=False for production, use_mock=False for testing
scraper_manager = ScraperManager(use_mock=False, use_cache=True)
