"""Google search engine scraper with advanced anti-detection."""

import logging
from typing import List
from urllib.parse import urlencode, quote
import time
import random
import re

from .base_scraper import BaseScraper
from ..models.schemas import SearchResult

logger = logging.getLogger(__name__)

class GoogleScraper(BaseScraper):
    """Google search engine scraper with anti-detection measures."""
    
    def __init__(self):
        super().__init__("google")
        self.base_url = "https://www.google.com/search"
        self.mobile_base_url = "https://www.google.com/search"
        self.use_mobile = random.choice([True, False])
        
        # Enhanced headers for Google
        self.session.headers.update({
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
            'Sec-Fetch-Dest': 'document',
            'Sec-Fetch-Mode': 'navigate',
            'Sec-Fetch-Site': 'none',
            'Sec-Fetch-User': '?1',
            'Cache-Control': 'max-age=0',
            'DNT': '1'
        })
        
        if self.use_mobile:
            self.session.headers.update({
                'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Mobile/15E148 Safari/604.1'
            })
    
    def search(self, query: str, max_results: int = 10) -> List[SearchResult]:
        """Search Google for results."""
        results = []
        
        try:
            # Random delay before starting
            time.sleep(random.uniform(2.0, 4.0))
            
            # Prepare search parameters
            params = {
                'q': query,
                'num': min(max_results, 100),  # Google allows max 100 results
                'hl': 'en',
                'gl': 'us',
                'safe': 'off',
                'filter': '0'
            }
            
            # Add mobile-specific parameters
            if self.use_mobile:
                params['pccc'] = '1'
                params['pws'] = '0'
            
            # Make the request
            response = self._make_request(self.base_url, params)
            if not response:
                return results
            
            # Check for CAPTCHA or blocking
            if self._is_google_blocked(response):
                logger.warning("Google blocked the request, trying alternative approach")
                return self._try_alternative_search(query, max_results)
            
            soup = self._parse_html(response.text)
            if not soup:
                return results
            
            # Try multiple selectors for Google results
            selectors = [
                'div.g',                    # Standard results
                'div[data-sokoban-container]', # New layout
                'div.rc',                   # Classic results
                'div.MjjYud',              # Recent layout
                'div.kvH3mc',              # Mobile results
                'div.kCrYT',               # Alternative mobile
                'div.ZINbBC',              # News results
                'div.hlcw0c'               # Mixed results
            ]
            
            result_containers = []
            for selector in selectors:
                containers = soup.select(selector)
                if containers:
                    result_containers = containers
                    logger.info(f"Found {len(containers)} results with selector: {selector}")
                    break
            
            if not result_containers:
                logger.warning("No result containers found, trying fallback extraction")
                result_containers = self._extract_results_fallback(soup)
            
            # Extract results
            for i, container in enumerate(result_containers[:max_results]):
                try:
                    result = self._extract_single_result(container, i + 1)
                    if result:
                        results.append(result)
                        
                except Exception as e:
                    logger.error(f"Error parsing Google result {i}: {e}")
                    continue
            
            logger.info(f"Google: Found {len(results)} results for '{query}'")
            
        except Exception as e:
            logger.error(f"Google search error: {e}")
        
        return results
    
    def _extract_single_result(self, container, position: int) -> SearchResult:
        """Extract a single result from container."""
        title = ""
        url = ""
        description = ""
        
        # Extract title and URL
        title_selectors = [
            'h3',
            'h3 a',
            'a h3',
            '.LC20lb',
            '.DKV0Md',
            '.BNeawe.vvjwJb.AP7Wnd'
        ]
        
        title_elem = None
        for selector in title_selectors:
            title_elem = container.select_one(selector)
            if title_elem:
                break
        
        if title_elem:
            title = title_elem.get_text(strip=True)
            
            # Find the URL
            url_elem = title_elem.find_parent('a') or title_elem.find('a') or container.select_one('a')
            if url_elem:
                url = url_elem.get('href', '')
                
                # Clean Google redirect URLs
                if url.startswith('/url?'):
                    url = self._clean_google_url(url)
                elif url.startswith('/search?'):
                    url = ""  # Skip internal search URLs
        
        # Extract description
        desc_selectors = [
            '.VwiC3b',
            '.s3v9rd',
            '.st',
            '.BNeawe.s3v9rd.AP7Wnd',
            '.hgKElc',
            '.IsZvec'
        ]
        
        for selector in desc_selectors:
            desc_elem = container.select_one(selector)
            if desc_elem:
                description = desc_elem.get_text(strip=True)
                break
        
        # Skip if no valid data
        if not title or not url or not url.startswith('http'):
            return None
        
        return self._create_search_result(
            title=title,
            url=url,
            description=description,
            position=position
        )
    
    def _clean_google_url(self, url: str) -> str:
        """Clean Google redirect URL."""
        try:
            # Extract the actual URL from Google's redirect
            if '&url=' in url:
                url = url.split('&url=')[1].split('&')[0]
            elif 'url=' in url:
                url = url.split('url=')[1].split('&')[0]
            
            # URL decode
            import urllib.parse
            url = urllib.parse.unquote(url)
            
            return url
        except:
            return url
    
    def _is_google_blocked(self, response) -> bool:
        """Check if Google blocked the request."""
        if response.status_code == 429:
            return True
        
        # Check for common Google blocking patterns
        blocking_patterns = [
            'detected unusual traffic',
            'captcha',
            'blocked',
            'automated queries',
            'robots.txt',
            'our systems have detected unusual traffic'
        ]
        
        text = response.text.lower()
        for pattern in blocking_patterns:
            if pattern in text:
                return True
        
        return False
    
    def _try_alternative_search(self, query: str, max_results: int) -> List[SearchResult]:
        """Try alternative search approach when blocked."""
        logger.info("Trying alternative Google search approach")
        
        # Try different approach with different parameters
        time.sleep(random.uniform(5.0, 10.0))
        
        # Switch user agent
        mobile_ua = 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15'
        desktop_ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        
        self.session.headers['User-Agent'] = mobile_ua if not self.use_mobile else desktop_ua
        
        # Try with minimal parameters
        params = {
            'q': query,
            'num': min(max_results, 50)
        }
        
        response = self._make_request(self.base_url, params)
        if response and not self._is_google_blocked(response):
            soup = self._parse_html(response.text)
            if soup:
                return self._extract_results_fallback(soup)
        
        return []
    
    def _extract_results_fallback(self, soup) -> List:
        """Fallback result extraction method."""
        results = []
        
        # Look for any elements with links that might be results
        links = soup.find_all('a')
        
        for link in links:
            href = link.get('href', '')
            if href.startswith('http') and not any(domain in href for domain in [
                'google.com', 'youtube.com', 'maps.google.com', 'images.google.com'
            ]):
                title = link.get_text(strip=True)
                if title and len(title) > 10:
                    # Try to find description nearby
                    parent = link.find_parent('div')
                    description = ""
                    if parent:
                        desc_text = parent.get_text(strip=True)
                        if len(desc_text) > len(title):
                            description = desc_text[len(title):].strip()[:200]
                    
                    result = {
                        'title': title,
                        'url': href,
                        'description': description
                    }
                    results.append(result)
                    
                    if len(results) >= 10:
                        break
        
        return results[:10]
