# app/services/llm_processor.py

import os
import logging
from typing import Dict, Any, Optional
import asyncio
import httpx
import json
import re

# Configure logging
logger = logging.getLogger(__name__)

class OllamaQwenProcessor:
    """
    A service for HTML content extraction using Ollama with Qwen model.
    Much more stable and production-ready than loading models directly.
    """
    
    def __init__(
        self, 
        ollama_url: str = "http://ollama:11434",
        model_name: str = "qwen2.5vl:7b"
    ):
        self.ollama_url = ollama_url
        self.model_name = model_name
        # Increased timeout for CPU inference - this is the main fix
        self.client = httpx.AsyncClient(timeout=600.0)  # 10 minutes for CPU inference
        
    async def ensure_model_loaded(self) -> bool:
        """Ensure the Qwen model is pulled and available in Ollama."""
        try:
            # Check if model is available
            response = await self.client.get(f"{self.ollama_url}/api/tags")
            if response.status_code == 200:
                models = response.json().get("models", [])
                model_names = [model["name"] for model in models]
                
                if self.model_name in model_names:
                    logger.info(f"Model {self.model_name} is already available")
                    return True
                else:
                    logger.info(f"Model {self.model_name} not found, pulling...")
                    return await self._pull_model()
            else:
                logger.error(f"Failed to connect to Ollama: {response.status_code}")
                return False
                
        except Exception as e:
            logger.error(f"Error checking Ollama models: {e}")
            return False
    
    async def _pull_model(self) -> bool:
        """Pull the Qwen model from Ollama registry."""
        try:
            logger.info(f"Pulling model {self.model_name}...")
            
            # Start model pull
            response = await self.client.post(
                f"{self.ollama_url}/api/pull",
                json={"name": self.model_name},
                timeout=1200.0  # 20 minutes for model pull
            )
            
            if response.status_code == 200:
                logger.info(f"Model {self.model_name} pulled successfully")
                return True
            else:
                logger.error(f"Failed to pull model: {response.status_code}")
                return False
                
        except Exception as e:
            logger.error(f"Error pulling model: {e}")
            return False

    async def extract_content_from_html(self, html_source: str, url: str) -> Dict[str, Any]:
        """Extract structured content from HTML using Ollama Qwen."""
        logger.info(f"Starting content extraction for URL: {url}")
        
        try:
            # Ensure model is loaded
            if not await self.ensure_model_loaded():
                return {
                    "status": "error",
                    "error": "Failed to load Qwen model in Ollama",
                    "extracted_content": None
                }
            
            # Create extraction prompt
            prompt = self._create_extraction_prompt(html_source, url)
            logger.info(f"Created prompt with {len(prompt)} characters")
            
            # Send request to Ollama
            logger.info("Sending request to Ollama...")
            response = await self.client.post(
                f"{self.ollama_url}/api/generate",
                json={
                    "model": self.model_name,
                    "prompt": prompt,
                    "stream": False,
                    "options": {
                        "temperature": 0.7,
                        "top_p": 0.8,
                        "num_predict": 1000,  # ✅ Fixed: use num_predict instead of max_tokens
                        "num_ctx": 8192,      # ✅ Added: context window size
                        "repeat_penalty": 1.1,
                        "stop": ["\n\n---", "```"]  # ✅ Added: stop tokens to prevent rambling
                    }
                },
                timeout=600.0  # ✅ Fixed: individual request timeout
            )
            
            if response.status_code == 200:
                result = response.json()
                ollama_response = result.get("response", "")
                logger.info(f"Received response from Ollama: {len(ollama_response)} characters")
                
                # Parse the response
                extracted_content = self._parse_response(ollama_response)
                
                return {
                    "status": "success",
                    "extracted_content": extracted_content,
                    "raw_llm_output": ollama_response[:1000]
                }
            else:
                logger.error(f"Ollama request failed: {response.status_code}")
                error_text = response.text if response.text else "Unknown error"
                return {
                    "status": "error", 
                    "error": f"Ollama request failed: {response.status_code} - {error_text}",
                    "extracted_content": None
                }
                
        except asyncio.TimeoutError:
            logger.error("Request to Ollama timed out")
            return {
                "status": "error",
                "error": "Request timed out - consider using a smaller model or GPU acceleration",
                "extracted_content": None
            }
        except Exception as e:
            logger.error(f"Error in Ollama content extraction: {e}")
            return {
                "status": "error",
                "error": str(e),
                "extracted_content": None
            }

    def _create_extraction_prompt(self, html_source: str, url: str) -> str:
        """Create extraction prompt for Ollama Qwen with JSON output."""
        # Limit HTML length for better performance
        max_html_length = 4000
        if len(html_source) > max_html_length:
            html_source = html_source[:max_html_length] + "..."
        
        return f"""You are an information extraction assistant specialized in processing HTML content.

Your task is to extract the following details from the provided HTML page content and return them as a valid JSON object with the exact structure shown below:

{{
    "title": "main page title",
    "authors": ["author1", "author2"],
    "subject": "main topic or theme", 
    "summary": "2-3 sentence summary of the content",
    "body": "main content text without HTML tags",
    "keywords": ["keyword1", "keyword2", "keyword3"],
    "publish_date": "publication date if available (YYYY-MM-DD format preferred)",
    "language": "detected language code (e.g., en, fr, ar)",
    "content_type": "type of content (article, blog, news, etc.)",
    "meta_description": "page meta description",
    "comments": "any comment content found"
}}

IMPORTANT INSTRUCTIONS:
- Return ONLY the JSON object, no additional text or explanations
- Use exactly the field names shown above
- If a field cannot be found, use null as the value
- For arrays (authors, keywords), return empty array [] if none found
- Do not modify, interpret, or add anything to the HTML content provided
- Extract text content exactly as it appears, removing only HTML tags
- Ensure the JSON is valid and properly formatted
- Do not include any markdown formatting or code blocks in your response

Here is the HTML content of the page at: {url}

---
{html_source}
---

Extract the information and return as valid JSON only."""

    def _extract_json_from_text(self, text: str) -> Optional[Dict[str, Any]]:
        """Helper function to extract JSON-like content from text."""
        # Look for JSON-like patterns in the text
        patterns = [
            r'"title":\s*"([^"]*)"',
            r'"authors":\s*(\[[^\]]*\])',
            r'"subject":\s*"([^"]*)"',
            r'"summary":\s*"([^"]*)"',
            r'"body":\s*"([^"]*)"',
            r'"keywords":\s*(\[[^\]]*\])',
            r'"publish_date":\s*"([^"]*)"',
            r'"language":\s*"([^"]*)"',
            r'"content_type":\s*"([^"]*)"',
            r'"meta_description":\s*"([^"]*)"',
            r'"comments":\s*"([^"]*)"'
        ]
        
        extracted = {}
        for pattern in patterns:
            match = re.search(pattern, text)
            if match:
                key = pattern.split('"')[1]  # Extract the key from the pattern
                value = match.group(1)
                
                # Handle arrays
                if key in ["authors", "keywords"]:
                    try:
                        extracted[key] = json.loads(value)
                    except:
                        extracted[key] = []
                else:
                    extracted[key] = value
        
        return extracted if extracted else None

    def _parse_response(self, response: str) -> Dict[str, Any]:
        """Parse Ollama response and extract JSON."""
        logger.info("Parsing Ollama response...")
        
        try:
            # First try to find complete JSON object
            json_match = re.search(r'\{[\s\S]*?\}', response, re.DOTALL)
            
            if json_match:
                json_str = json_match.group(0)
                # Clean up common issues
                json_str = json_str.strip()
                try:
                    parsed_json = json.loads(json_str)
                    logger.info("Successfully parsed JSON from Ollama response")
                    return parsed_json
                except json.JSONDecodeError:
                    logger.warning("Found JSON-like content but failed to parse it")
                    extracted = self._extract_json_from_text(json_str)
                    if extracted:
                        return extracted
            
            # Try to find JSON within code blocks
            code_block_match = re.search(r'```(?:json)?\s*(\{[\s\S]*?\})\s*```', response, re.DOTALL)
            if code_block_match:
                json_str = code_block_match.group(1).strip()
                try:
                    parsed_json = json.loads(json_str)
                    logger.info("Successfully parsed JSON from code block")
                    return parsed_json
                except json.JSONDecodeError:
                    logger.warning("Found JSON-like content in code block but failed to parse it")
                    extracted = self._extract_json_from_text(json_str)
                    if extracted:
                        return extracted
            
            # Try to extract JSON-like content from raw response
            logger.warning("No valid JSON found, attempting to extract structured content")
            extracted = self._extract_json_from_text(response)
            if extracted:
                return extracted
            
            # Final fallback if no structured content found
            logger.warning("No structured content found, using fallback")
            return {
                "title": "Extracted from Ollama response",
                "authors": [],
                "subject": "Content extraction",
                "summary": response[:200] if response else "No content",
                "body": response,
                "keywords": [],
                "publish_date": None,
                "language": "en",
                "content_type": "article",
                "meta_description": None,
                "comments": []
            }
                
        except json.JSONDecodeError as e:
            logger.error(f"JSON parse error from Ollama: {e}")
            return {
                "title": "Parse Error",
                "authors": [],
                "subject": "Parse Error",
                "summary": "Failed to parse Ollama response",
                "body": response[:500] if response else "No response",
                "keywords": [],
                "publish_date": None,
                "language": "unknown",
                "content_type": "unknown",
                "meta_description": None,
                "comments": [],
                "extraction_error": str(e)
            }

    async def process_batch(self, html_pages: list) -> list:
        """Process multiple pages in batch using Ollama."""
        logger.info(f"Starting batch processing of {len(html_pages)} pages with Ollama")
        results = []
        
        for i, page in enumerate(html_pages):
            logger.info(f"Processing page {i+1}/{len(html_pages)} with Ollama")
            result = await self.extract_content_from_html(
                page.get("html_source", ""),
                page.get("url", "")
            )
            results.append({
                "page_id": page.get("id"),
                "url": page.get("url"),
                **result
            })
            
            # ✅ Added: small delay between requests to prevent overload
            await asyncio.sleep(1)
        
        logger.info("Ollama batch processing completed")
        return results

    async def get_model_info(self) -> Dict[str, Any]:
        """Get Ollama model information."""
        try:
            response = await self.client.get(f"{self.ollama_url}/api/tags")
            if response.status_code == 200:
                models = response.json().get("models", [])
                current_model = next((m for m in models if m["name"] == self.model_name), None)
                
                return {
                    "service": "Ollama",
                    "ollama_url": self.ollama_url,
                    "model_name": self.model_name,
                    "model_loaded": current_model is not None,
                    "model_size": current_model.get("size") if current_model else None,
                    "available_models": [m["name"] for m in models],
                    "performance_mode": "CPU Optimized" if current_model else "Model Not Loaded"
                }
            else:
                return {
                    "service": "Ollama",
                    "ollama_url": self.ollama_url,
                    "model_name": self.model_name,
                    "error": f"Failed to connect to Ollama: {response.status_code}"
                }
        except Exception as e:
            return {
                "service": "Ollama",
                "ollama_url": self.ollama_url,
                "model_name": self.model_name,
                "error": str(e)
            }

    async def cleanup(self):
        """Clean up HTTP client."""
        await self.client.aclose()

# Singleton instance
_ollama_processor = None

def get_ollama_processor():
    """Get singleton Ollama processor instance."""
    global _ollama_processor
    if _ollama_processor is None:
        ollama_url = os.getenv("OLLAMA_URL", "http://ollama:11434")
        # ✅ Updated: match the actual model name from logs
        model_name = os.getenv("OLLAMA_MODEL", "qwen2.5vl:7b")
        _ollama_processor = OllamaQwenProcessor(ollama_url, model_name)
    return _ollama_processor