"""SearX meta search engine scraper."""

import logging
from typing import List
from urllib.parse import urlencode
import json

from .base_scraper import BaseScraper
from ..models.schemas import SearchResult

logger = logging.getLogger(__name__)

class SearxScraper(BaseScraper):
    """SearX meta search engine scraper."""
    
    def __init__(self):
        super().__init__("searx")
        # List of public SearX instances
        self.instances = [
            "https://searx.be",
            "https://search.privacytools.io",
            "https://searx.info",
            "https://searx.me",
            "https://searx.xyz"
        ]
        self.current_instance = 0
    
    def _get_next_instance(self) -> str:
        """Get next SearX instance URL."""
        instance = self.instances[self.current_instance]
        self.current_instance = (self.current_instance + 1) % len(self.instances)
        return instance
    
    def search(self, query: str, max_results: int = 10) -> List[SearchResult]:
        """Search SearX for results."""
        results = []
        
        for attempt in range(len(self.instances)):
            try:
                instance_url = self._get_next_instance()
                search_url = f"{instance_url}/search"
                
                # SearX supports JSON API
                params = {
                    'q': query,
                    'format': 'json',
                    'categories': 'general',
                    'engines': 'google,bing,duckduckgo',
                    'language': 'en-US'
                }
                
                response = self._make_request(search_url, params)
                if not response:
                    continue
                
                # Try to parse JSON response
                try:
                    data = response.json()
                    if 'results' in data:
                        for i, item in enumerate(data['results'][:max_results]):
                            try:
                                title = item.get('title', '')
                                url = item.get('url', '')
                                description = item.get('content', '')
                                
                                if title and url:
                                    result = self._create_search_result(
                                        title=title,
                                        url=url,
                                        description=description,
                                        position=i + 1
                                    )
                                    results.append(result)
                            except Exception as e:
                                logger.error(f"Error parsing SearX result {i}: {e}")
                                continue
                        
                        if results:
                            logger.info(f"SearX: Found {len(results)} results for '{query}' from {instance_url}")
                            break
                            
                except json.JSONDecodeError:
                    # Fallback to HTML parsing
                    soup = self._parse_html(response.text)
                    if not soup:
                        continue
                    
                    result_containers = soup.select('.result, .result-default')
                    
                    for i, container in enumerate(result_containers[:max_results]):
                        try:
                            title_elem = container.select_one('h3 a, h4 a, .result-title a')
                            title = title_elem.get_text(strip=True) if title_elem else ""
                            
                            url = title_elem.get('href', '') if title_elem else ""
                            
                            desc_elem = container.select_one('.result-content, .content')
                            description = desc_elem.get_text(strip=True) if desc_elem else ""
                            
                            if title and url:
                                result = self._create_search_result(
                                    title=title,
                                    url=url,
                                    description=description,
                                    position=i + 1
                                )
                                results.append(result)
                                
                        except Exception as e:
                            logger.error(f"Error parsing SearX HTML result {i}: {e}")
                            continue
                    
                    if results:
                        logger.info(f"SearX: Found {len(results)} results for '{query}' from {instance_url}")
                        break
                        
            except Exception as e:
                logger.error(f"SearX search error with {instance_url}: {e}")
                continue
        
        return results
