#!/usr/bin/env python3
"""Comprehensive test script for the fast multi-threaded Selenium system."""

import asyncio
import logging
import sys
import os
import time
import json

# Add the src directory to the Python path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))

from src.scrapers.selenium_engines.multi_engine_manager import MultiEngineManager
from src.models.schemas import ScrapeRequest

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

async def test_single_engines():
    """Test individual engines."""
    logger.info("=== Testing Individual Engines ===")
    
    manager = MultiEngineManager(use_cache=False, headless=True, max_workers=8)
    
    test_query = "artificial intelligence"
    engines_to_test = ["google", "bing", "duckduckgo", "yandex", "baidu"]
    
    for engine in engines_to_test:
        logger.info(f"\n--- Testing {engine.upper()} ---")
        
        request = ScrapeRequest(
            query=test_query,
            engines=[engine],
            max_results=5
        )
        
        start_time = time.time()
        try:
            results = await manager.scrape_multiple_engines_parallel(request)
            execution_time = time.time() - start_time
            
            logger.info(f"{engine}: {len(results)} results in {execution_time:.2f}s")
            
            for i, result in enumerate(results[:3]):
                logger.info(f"  {i+1}. {result.title[:60]}...")
                logger.info(f"     URL: {result.url}")
                
        except Exception as e:
            logger.error(f"Error testing {engine}: {e}")
    
    manager.close()

async def test_parallel_performance():
    """Test parallel vs sequential performance."""
    logger.info("\n=== Testing Parallel vs Sequential Performance ===")
    
    manager = MultiEngineManager(use_cache=False, headless=True, max_workers=8)
    
    test_query = "machine learning python"
    engines = ["google", "bing", "duckduckgo"]
    
    request = ScrapeRequest(
        query=test_query,
        engines=engines,
        max_results=5
    )
    
    # Test parallel
    logger.info("\n--- Parallel Execution ---")
    start_time = time.time()
    parallel_results = await manager.scrape_multiple_engines_parallel(request)
    parallel_time = time.time() - start_time
    
    logger.info(f"Parallel: {len(parallel_results)} results in {parallel_time:.2f}s")
    
    # Test sequential
    logger.info("\n--- Sequential Execution ---")
    start_time = time.time()
    sequential_results = await manager.scrape_multiple_engines_sequential(request)
    sequential_time = time.time() - start_time
    
    logger.info(f"Sequential: {len(sequential_results)} results in {sequential_time:.2f}s")
    
    # Performance comparison
    speedup = sequential_time / parallel_time if parallel_time > 0 else 0
    logger.info(f"\nPerformance Improvement: {speedup:.2f}x faster with parallel execution")
    
    manager.close()

async def test_high_load():
    """Test system under high load."""
    logger.info("\n=== Testing High Load Performance ===")
    
    manager = MultiEngineManager(use_cache=False, headless=True, max_workers=12)
    
    # Multiple queries to test concurrency
    test_queries = [
        "python programming",
        "web development",
        "data science",
        "machine learning",
        "artificial intelligence"
    ]
    
    start_time = time.time()
    
    # Run multiple searches concurrently
    tasks = []
    for query in test_queries:
        request = ScrapeRequest(
            query=query,
            engines=["google", "bing", "duckduckgo"],
            max_results=3
        )
        task = manager.scrape_multiple_engines_parallel(request)
        tasks.append(task)
    
    # Wait for all to complete
    all_results = await asyncio.gather(*tasks)
    
    total_time = time.time() - start_time
    total_results = sum(len(results) for results in all_results)
    
    logger.info(f"High Load Test: {len(test_queries)} queries completed in {total_time:.2f}s")
    logger.info(f"Total results: {total_results}")
    logger.info(f"Average time per query: {total_time / len(test_queries):.2f}s")
    
    manager.close()

async def test_performance_monitoring():
    """Test performance monitoring features."""
    logger.info("\n=== Testing Performance Monitoring ===")
    
    manager = MultiEngineManager(use_cache=False, headless=True)
    
    # Run some searches to generate stats
    queries = ["test query 1", "test query 2", "test query 3"]
    
    for query in queries:
        request = ScrapeRequest(
            query=query,
            engines=["google", "bing"],
            max_results=3
        )
        await manager.scrape_multiple_engines_parallel(request)
    
    # Get performance stats
    stats = manager.get_performance_stats()
    
    logger.info("Performance Statistics:")
    logger.info(f"  Total searches: {stats['total_searches']}")
    logger.info(f"  Total results: {stats['total_results']}")
    logger.info(f"  Average results per search: {stats['avg_results_per_search']:.2f}")
    logger.info(f"  Average time per search: {stats['avg_time_per_search']:.2f}s")
    
    logger.info("\nEngine-specific stats:")
    for engine, engine_stats in stats['engine_stats'].items():
        logger.info(f"  {engine}:")
        logger.info(f"    Searches: {engine_stats['searches']}")
        logger.info(f"    Results: {engine_stats['results']}")
        logger.info(f"    Avg time: {engine_stats['avg_time']:.2f}s")
        logger.info(f"    Avg results: {engine_stats['avg_results']:.2f}")
    
    manager.close()

async def test_engine_reliability():
    """Test engine reliability and error handling."""
    logger.info("\n=== Testing Engine Reliability ===")
    
    manager = MultiEngineManager(use_cache=False, headless=True)
    
    # Test with various queries
    test_cases = [
        "normal query",
        "special chars !@#$%^&*()",
        "unicode query 测试 тест",
        "very long query " * 20,
        ""  # Empty query
    ]
    
    for i, query in enumerate(test_cases):
        logger.info(f"\nTest case {i+1}: '{query[:50]}{'...' if len(query) > 50 else ''}'")
        
        request = ScrapeRequest(
            query=query,
            engines=["google", "bing", "duckduckgo"],
            max_results=3
        )
        
        try:
            results = await manager.scrape_multiple_engines_parallel(request)
            logger.info(f"  Success: {len(results)} results")
        except Exception as e:
            logger.error(f"  Error: {e}")
    
    manager.close()

async def run_comprehensive_test():
    """Run all tests."""
    logger.info("Starting comprehensive fast Selenium system test...")
    
    total_start_time = time.time()
    
    try:
        # Run all test suites
        await test_single_engines()
        await test_parallel_performance()
        await test_high_load()
        await test_performance_monitoring()
        await test_engine_reliability()
        
        total_time = time.time() - total_start_time
        logger.info(f"\n=== All Tests Completed Successfully in {total_time:.2f}s ===")
        
    except Exception as e:
        logger.error(f"Test suite failed: {e}")

if __name__ == "__main__":
    asyncio.run(run_comprehensive_test())
