"""
Celery Tasks for System Monitoring and Metrics Collection

This module contains background tasks for collecting system metrics,
monitoring performance, and generating health reports.
"""

import logging
import psutil
import redis
from typing import Dict, Any
from celery import shared_task
from django.utils import timezone
from django.conf import settings
from django.db import connection
from django.core.cache import cache
from datetime import timedelta

from apps.streams.models import Channel, StreamSession, HLSSegment
from apps.jingles.models import JingleDetection, AdBreak
from apps.notifications.models import Notification, NotificationChannel
from apps.core.backup import create_scheduled_backup, cleanup_old_backups

# Set up logging for monitoring tasks
logger = logging.getLogger('stream_processor.monitoring_tasks')


@shared_task
def collect_system_metrics():
    """
    Collect system performance metrics and store in cache.
    
    This task gathers CPU, memory, disk, and network metrics
    for monitoring dashboard display.
    
    Returns:
        dict: System metrics data
    """
    try:
        logger.debug("Collecting system metrics")
        
        # CPU metrics
        cpu_percent = psutil.cpu_percent(interval=1)
        cpu_count = psutil.cpu_count()
        load_avg = psutil.getloadavg() if hasattr(psutil, 'getloadavg') else [0, 0, 0]
        
        # Memory metrics
        memory = psutil.virtual_memory()
        memory_metrics = {
            'total': memory.total,
            'available': memory.available,
            'used': memory.used,
            'percent': memory.percent
        }
        
        # Disk metrics
        disk = psutil.disk_usage('/')
        disk_metrics = {
            'total': disk.total,
            'used': disk.used,
            'free': disk.free,
            'percent': (disk.used / disk.total) * 100
        }
        
        # Network metrics
        network = psutil.net_io_counters()
        network_metrics = {
            'bytes_sent': network.bytes_sent,
            'bytes_recv': network.bytes_recv,
            'packets_sent': network.packets_sent,
            'packets_recv': network.packets_recv
        }
        
        # Process metrics for important services
        process_metrics = {}
        for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent']):
            try:
                if any(name in proc.info['name'].lower() for name in ['python', 'celery', 'redis', 'postgres']):
                    process_metrics[proc.info['pid']] = {
                        'name': proc.info['name'],
                        'cpu_percent': proc.info['cpu_percent'],
                        'memory_percent': proc.info['memory_percent']
                    }
            except (psutil.NoSuchProcess, psutil.AccessDenied):
                continue
        
        metrics_data = {
            'timestamp': timezone.now().isoformat(),
            'cpu': {
                'percent': cpu_percent,
                'count': cpu_count,
                'load_avg': load_avg
            },
            'memory': memory_metrics,
            'disk': disk_metrics,
            'network': network_metrics,
            'processes': process_metrics
        }
        
        # Store in cache for dashboard access
        cache.set('system_metrics', metrics_data, timeout=300)  # 5 minutes
        
        logger.info(f"System metrics collected: CPU {cpu_percent}%, Memory {memory.percent}%")
        
        return {
            'success': True,
            'metrics_collected': len(metrics_data),
            'cpu_usage': cpu_percent,
            'memory_usage': memory.percent
        }
        
    except Exception as e:
        logger.error(f"System metrics collection failed: {e}")
        raise


@shared_task
def collect_application_metrics():
    """
    Collect application-specific metrics and performance data.
    
    This task gathers metrics about stream processing, detection
    performance, and notification delivery rates.
    
    Returns:
        dict: Application metrics data
    """
    try:
        logger.debug("Collecting application metrics")
        
        # Time ranges
        now = timezone.now()
        last_hour = now - timedelta(hours=1)
        last_24h = now - timedelta(hours=24)
        
        # Stream metrics
        active_streams = StreamSession.objects.filter(
            status__in=['active', 'processing']
        ).count()
        
        streams_last_hour = StreamSession.objects.filter(
            started_at__gte=last_hour
        ).count()
        
        total_segments = HLSSegment.objects.filter(
            processed_at__gte=last_24h
        ).count()
        
        # Detection metrics
        detections_last_hour = JingleDetection.objects.filter(
            detection_time__gte=last_hour
        ).count()
        
        detection_accuracy = 0
        total_detections = JingleDetection.objects.filter(
            detection_time__gte=last_24h
        ).count()
        
        if total_detections > 0:
            confirmed_detections = JingleDetection.objects.filter(
                detection_time__gte=last_24h,
                is_confirmed=True
            ).count()
            detection_accuracy = (confirmed_detections / total_detections) * 100
        
        # Ad break metrics
        ad_breaks_last_hour = AdBreak.objects.filter(
            start_time__gte=last_hour
        ).count()
        
        active_ad_breaks = AdBreak.objects.filter(
            status='active'
        ).count()
        
        # Notification metrics
        notifications_last_hour = Notification.objects.filter(
            created_at__gte=last_hour
        ).count()
        
        notification_success_rate = 0
        total_notifications = Notification.objects.filter(
            created_at__gte=last_24h
        ).count()
        
        if total_notifications > 0:
            sent_notifications = Notification.objects.filter(
                created_at__gte=last_24h,
                status='completed'
            ).count()
            notification_success_rate = (sent_notifications / total_notifications) * 100
        
        # Database metrics
        with connection.cursor() as cursor:
            cursor.execute("""
                SELECT 
                    schemaname,
                    tablename,
                    n_tup_ins + n_tup_upd + n_tup_del as total_operations,
                    pg_total_relation_size(schemaname||'.'||tablename) as table_size
                FROM pg_stat_user_tables 
                WHERE schemaname = 'public'
                ORDER BY total_operations DESC
                LIMIT 10
            """)
            db_stats = cursor.fetchall()
        
        # Redis metrics
        try:
            r = redis.from_url(settings.CELERY_BROKER_URL)
            redis_info = r.info()
            redis_metrics = {
                'used_memory': redis_info.get('used_memory', 0),
                'connected_clients': redis_info.get('connected_clients', 0),
                'total_commands_processed': redis_info.get('total_commands_processed', 0),
                'keyspace_hits': redis_info.get('keyspace_hits', 0),
                'keyspace_misses': redis_info.get('keyspace_misses', 0)
            }
        except Exception as e:
            logger.warning(f"Failed to collect Redis metrics: {e}")
            redis_metrics = {}
        
        app_metrics = {
            'timestamp': now.isoformat(),
            'streams': {
                'active_streams': active_streams,
                'streams_last_hour': streams_last_hour,
                'total_segments_24h': total_segments
            },
            'detections': {
                'detections_last_hour': detections_last_hour,
                'total_detections_24h': total_detections,
                'accuracy_rate': round(detection_accuracy, 2)
            },
            'ad_breaks': {
                'ad_breaks_last_hour': ad_breaks_last_hour,
                'active_ad_breaks': active_ad_breaks
            },
            'notifications': {
                'notifications_last_hour': notifications_last_hour,
                'total_notifications_24h': total_notifications,
                'success_rate': round(notification_success_rate, 2)
            },
            'database': {
                'table_stats': [
                    {
                        'table': f"{row[0]}.{row[1]}",
                        'operations': row[2],
                        'size_bytes': row[3]
                    } for row in db_stats
                ]
            },
            'redis': redis_metrics
        }
        
        # Store in cache
        cache.set('application_metrics', app_metrics, timeout=300)
        
        logger.info(f"Application metrics collected: {active_streams} active streams, {detections_last_hour} detections/hour")
        
        return {
            'success': True,
            'active_streams': active_streams,
            'detections_last_hour': detections_last_hour,
            'notification_success_rate': notification_success_rate
        }
        
    except Exception as e:
        logger.error(f"Application metrics collection failed: {e}")
        raise


@shared_task
def generate_performance_report():
    """
    Generate comprehensive performance report.
    
    This task creates detailed performance analysis including
    trends, bottlenecks, and recommendations.
    
    Returns:
        dict: Performance report data
    """
    try:
        logger.debug("Generating performance report")
        
        # Time ranges for analysis
        now = timezone.now()
        last_24h = now - timedelta(hours=24)
        last_7d = now - timedelta(days=7)
        
        # Stream performance analysis
        stream_sessions = StreamSession.objects.filter(started_at__gte=last_7d)
        
        stream_performance = {
            'total_sessions': stream_sessions.count(),
            'successful_sessions': stream_sessions.filter(
                status__in=['completed', 'active']
            ).count(),
            'failed_sessions': stream_sessions.filter(status='failed').count(),
            'avg_session_duration': 0,
            'total_segments': 0
        }
        
        # Calculate average session duration
        completed_sessions = stream_sessions.filter(status='completed')
        if completed_sessions.exists():
            durations = []
            for session in completed_sessions:
                if session.duration():
                    durations.append(session.duration().total_seconds())
            
            if durations:
                stream_performance['avg_session_duration'] = sum(durations) / len(durations)
        
        # Total segments processed
        stream_performance['total_segments'] = HLSSegment.objects.filter(
            processed_at__gte=last_7d
        ).count()
        
        # Detection performance analysis
        detections = JingleDetection.objects.filter(detection_time__gte=last_7d)
        
        detection_performance = {
            'total_detections': detections.count(),
            'confirmed_detections': detections.filter(is_confirmed=True).count(),
            'false_positives': detections.filter(is_confirmed=False).count(),
            'accuracy_rate': 0,
            'avg_confidence': 0
        }
        
        if detections.exists():
            confirmed_count = detection_performance['confirmed_detections']
            total_count = detection_performance['total_detections']
            
            if total_count > 0:
                detection_performance['accuracy_rate'] = (confirmed_count / total_count) * 100
            
            # Calculate average confidence
            confidence_scores = detections.values_list('confidence_score', flat=True)
            if confidence_scores:
                detection_performance['avg_confidence'] = sum(confidence_scores) / len(confidence_scores)
        
        # Notification performance analysis
        notifications = Notification.objects.filter(created_at__gte=last_7d)
        
        notification_performance = {
            'total_notifications': notifications.count(),
            'sent_notifications': notifications.filter(status='completed').count(),
            'failed_notifications': notifications.filter(status='failed').count(),
            'pending_notifications': notifications.filter(status='pending').count(),
            'avg_delivery_time': 0
        }
        
        # Calculate average delivery time
        completed_notifications = notifications.filter(
            status='completed',
            sent_at__isnull=False
        )
        
        if completed_notifications.exists():
            delivery_times = []
            for notification in completed_notifications:
                if notification.sent_at and notification.created_at:
                    delta = notification.sent_at - notification.created_at
                    delivery_times.append(delta.total_seconds())
            
            if delivery_times:
                notification_performance['avg_delivery_time'] = sum(delivery_times) / len(delivery_times)
        
        # System health indicators
        health_indicators = {
            'database_connections': 0,
            'redis_memory_usage': 0,
            'celery_active_tasks': 0,
            'disk_usage_percent': 0
        }
        
        try:
            # Database connection count
            with connection.cursor() as cursor:
                cursor.execute("SELECT count(*) FROM pg_stat_activity WHERE state = 'active'")
                health_indicators['database_connections'] = cursor.fetchone()[0]
        except Exception as e:
            logger.warning(f"Failed to get database metrics: {e}")
        
        try:
            # Redis memory usage
            r = redis.from_url(settings.CELERY_BROKER_URL)
            redis_info = r.info()
            health_indicators['redis_memory_usage'] = redis_info.get('used_memory', 0)
        except Exception as e:
            logger.warning(f"Failed to get Redis metrics: {e}")
        
        try:
            # Disk usage
            disk_usage = psutil.disk_usage('/')
            health_indicators['disk_usage_percent'] = (disk_usage.used / disk_usage.total) * 100
        except Exception as e:
            logger.warning(f"Failed to get disk metrics: {e}")
        
        # Performance recommendations
        recommendations = []
        
        if stream_performance['failed_sessions'] > stream_performance['successful_sessions'] * 0.1:
            recommendations.append("High stream failure rate detected - check FFmpeg configuration")
        
        if detection_performance['accuracy_rate'] < 80:
            recommendations.append("Low detection accuracy - review jingle templates and thresholds")
        
        if notification_performance['avg_delivery_time'] > 30:
            recommendations.append("Slow notification delivery - check channel configurations")
        
        if health_indicators['disk_usage_percent'] > 85:
            recommendations.append("High disk usage - consider cleanup of old segments")
        
        report_data = {
            'timestamp': now.isoformat(),
            'period_days': 7,
            'stream_performance': stream_performance,
            'detection_performance': detection_performance,
            'notification_performance': notification_performance,
            'health_indicators': health_indicators,
            'recommendations': recommendations
        }
        
        # Store report in cache
        cache.set('performance_report', report_data, timeout=3600)  # 1 hour
        
        logger.info(f"Performance report generated with {len(recommendations)} recommendations")
        
        return {
            'success': True,
            'recommendations_count': len(recommendations),
            'report_timestamp': now.isoformat()
        }
        
    except Exception as e:
        logger.error(f"Performance report generation failed: {e}")
        raise


@shared_task
def monitor_resource_usage():
    """
    Monitor resource usage and alert on thresholds.
    
    This task checks system resources and triggers alerts
    when usage exceeds defined thresholds.
    
    Returns:
        dict: Monitoring results
    """
    try:
        logger.debug("Monitoring resource usage")
        
        alerts = []
        
        # CPU usage check
        cpu_percent = psutil.cpu_percent(interval=1)
        if cpu_percent > 80:
            alerts.append({
                'type': 'cpu_high',
                'message': f'High CPU usage: {cpu_percent}%',
                'severity': 'warning' if cpu_percent < 90 else 'critical'
            })
        
        # Memory usage check
        memory = psutil.virtual_memory()
        if memory.percent > 85:
            alerts.append({
                'type': 'memory_high',
                'message': f'High memory usage: {memory.percent}%',
                'severity': 'warning' if memory.percent < 95 else 'critical'
            })
        
        # Disk usage check
        disk = psutil.disk_usage('/')
        disk_percent = (disk.used / disk.total) * 100
        if disk_percent > 80:
            alerts.append({
                'type': 'disk_high',
                'message': f'High disk usage: {disk_percent:.1f}%',
                'severity': 'warning' if disk_percent < 90 else 'critical'
            })
        
        # Database connection check
        try:
            with connection.cursor() as cursor:
                cursor.execute("SELECT count(*) FROM pg_stat_activity")
                active_connections = cursor.fetchone()[0]
                
                if active_connections > 150:  # Assuming max_connections = 200
                    alerts.append({
                        'type': 'db_connections_high',
                        'message': f'High database connections: {active_connections}',
                        'severity': 'warning'
                    })
        except Exception as e:
            alerts.append({
                'type': 'db_connection_failed',
                'message': f'Database connection check failed: {e}',
                'severity': 'critical'
            })
        
        # Redis connection check
        try:
            r = redis.from_url(settings.CELERY_BROKER_URL)
            r.ping()
            
            # Check Redis memory usage
            redis_info = r.info()
            redis_memory_mb = redis_info.get('used_memory', 0) / 1024 / 1024
            
            if redis_memory_mb > 500:  # 500MB threshold
                alerts.append({
                    'type': 'redis_memory_high',
                    'message': f'High Redis memory usage: {redis_memory_mb:.1f}MB',
                    'severity': 'warning'
                })
                
        except Exception as e:
            alerts.append({
                'type': 'redis_connection_failed',
                'message': f'Redis connection check failed: {e}',
                'severity': 'critical'
            })
        
        # Stream health check
        failed_streams = StreamSession.objects.filter(
            status='failed',
            updated_at__gte=timezone.now() - timedelta(minutes=10)
        ).count()
        
        if failed_streams > 3:
            alerts.append({
                'type': 'stream_failures_high',
                'message': f'Multiple stream failures: {failed_streams} in last 10 minutes',
                'severity': 'critical'
            })
        
        # Send critical alerts via notification system
        critical_alerts = [alert for alert in alerts if alert['severity'] == 'critical']
        
        if critical_alerts:
            from apps.notifications.tasks import send_notification_via_rule
            
            for alert in critical_alerts:
                send_notification_via_rule.delay(
                    event_type='system_alert',
                    context={
                        'alert_type': alert['type'],
                        'message': alert['message'],
                        'severity': alert['severity'],
                        'timestamp': timezone.now().isoformat()
                    }
                )
        
        logger.info(f"Resource monitoring completed: {len(alerts)} alerts ({len(critical_alerts)} critical)")
        
        return {
            'success': True,
            'total_alerts': len(alerts),
            'critical_alerts': len(critical_alerts),
            'alerts': alerts
        }
        
    except Exception as e:
        logger.error(f"Resource monitoring failed: {e}")
        raise


@shared_task
def cleanup_metrics_cache():
    """
    Clean up old metrics data from cache.
    
    This task handles deleting old cached metrics data to maintain optimal
    cache performance and prevent memory bloat. It removes expired metrics
    data that is no longer needed for monitoring dashboards.
    
    Cache Keys Cleaned:
    - system_metrics_history: Historical system performance data
    - application_metrics_history: Application-specific metrics
    - performance_trends: Calculated performance trend data
    - alert_history: Historical alert and notification data
    
    This cleanup is essential for:
    - Preventing Redis/cache memory exhaustion
    - Ensuring fresh metrics data is prioritized
    - Maintaining cache performance for active monitoring
    
    Scheduled to run every hour via Celery beat.
    
    Returns:
        dict: Cleanup results with count of cleaned keys
    """
    try:
        logger.debug("Starting metrics cache cleanup - removing old monitoring data")
        
        # List of cache keys to clean up - these store historical metrics data
        # that can accumulate over time and consume significant cache memory
        cache_keys = [
            'system_metrics_history',      # CPU, memory, disk usage history
            'application_metrics_history', # Stream processing metrics
            'performance_trends',          # Calculated trend analysis data
            'alert_history'               # Historical alert data
        ]
        
        cleaned_keys = 0
        
        # Delete each cache key to free up memory
        for key in cache_keys:
            try:
                cache.delete(key)
                cleaned_keys += 1
                logger.debug(f"Cleaned cache key: {key}")
            except Exception as e:
                logger.warning(f"Failed to clean cache key {key}: {e}")
        
        logger.info(f"Metrics cache cleanup completed: {cleaned_keys}/{len(cache_keys)} keys cleaned")
        
        return {
            'success': True,
            'cleaned_keys': cleaned_keys,
            'total_keys': len(cache_keys)
        }
        
    except Exception as e:
        logger.error(f"Metrics cache cleanup failed: {e}")
        raise


@shared_task
def health_check_external_services():
    """
    Check health of external services and dependencies.
    
    This task verifies connectivity and performance of external
    services like DAI API, stream sources, and notification services.
    
    Returns:
        dict: Health check results
    """
    try:
        logger.debug("Checking external services health")
        
        health_results = {}
        
        # Check DAI API if configured
        dai_api_url = settings.EXTERNAL_APIS.get('DAI_API_URL')
        if dai_api_url:
            try:
                import requests
                response = requests.get(dai_api_url, timeout=10)
                health_results['dai_api'] = {
                    'status': 'healthy' if response.status_code == 200 else 'unhealthy',
                    'response_time': response.elapsed.total_seconds(),
                    'status_code': response.status_code
                }
            except Exception as e:
                health_results['dai_api'] = {
                    'status': 'unhealthy',
                    'error': str(e)
                }
        
        # Check active stream URLs
        active_channels = Channel.objects.filter(
            is_active=True,
            sessions__status__in=['active', 'processing']
        ).distinct()
        
        stream_health = {}
        for channel in active_channels:
            try:
                import requests
                response = requests.head(channel.hls_url, timeout=10)
                stream_health[channel.name] = {
                    'status': 'healthy' if response.status_code == 200 else 'unhealthy',
                    'response_time': response.elapsed.total_seconds(),
                    'status_code': response.status_code
                }
            except Exception as e:
                stream_health[channel.name] = {
                    'status': 'unhealthy',
                    'error': str(e)
                }
        
        health_results['streams'] = stream_health
        
        # Check Telegram bot if configured
        telegram_config = settings.TELEGRAM_CONFIG
        if telegram_config.get('BOT_TOKEN'):
            try:
                from apps.notifications.services import TelegramService
                telegram_service = TelegramService()
                
                # Test bot connection
                bot_info = telegram_service.get_bot_info()
                health_results['telegram'] = {
                    'status': 'healthy' if bot_info else 'unhealthy',
                    'bot_username': bot_info.get('username') if bot_info else None
                }
            except Exception as e:
                health_results['telegram'] = {
                    'status': 'unhealthy',
                    'error': str(e)
                }
        
        # Store results
        cache.set('external_services_health', health_results, timeout=600)  # 10 minutes
        
        # Count unhealthy services
        unhealthy_count = sum(
            1 for service_data in health_results.values()
            if isinstance(service_data, dict) and service_data.get('status') == 'unhealthy'
        )
        
        # Add stream health
        unhealthy_streams = sum(
            1 for stream_data in health_results.get('streams', {}).values()
            if stream_data.get('status') == 'unhealthy'
        )
        
        total_unhealthy = unhealthy_count + unhealthy_streams
        
        logger.info(f"External services health check completed: {total_unhealthy} unhealthy services")
        
        return {
            'success': True,
            'total_services_checked': len(health_results),
            'unhealthy_services': total_unhealthy,
            'health_results': health_results
        }
        
    except Exception as e:
        logger.error(f"External services health check failed: {e}")
        raise


@shared_task
def create_backup():
    """
    Create a scheduled backup of database, media, and configuration files.
    
    Returns:
        dict: Backup results with file paths and status
    """
    logger.info("Starting scheduled backup")
    
    try:
        backup_results = create_scheduled_backup()
        
        if backup_results['success']:
            logger.info(f"Backup completed successfully: {backup_results}")
            
            # Cache backup status for monitoring
            cache.set('last_backup_status', backup_results, timeout=86400)  # 24 hours
            
        else:
            logger.error(f"Backup failed: {backup_results}")
        
        return backup_results
        
    except Exception as e:
        logger.error(f"Backup task failed: {str(e)}")
        return {
            'success': False,
            'error': str(e),
            'timestamp': timezone.now().isoformat()
        }


@shared_task
def cleanup_backups():
    """
    Clean up old backup files based on retention policy.
    
    This task handles deleting old backup files to manage disk space and
    maintain an organized backup directory. It removes backup files that
    exceed the configured retention period to prevent unlimited storage growth.
    
    Backup Retention Policy:
    - Database backups: Configurable retention period (default: 30 days)
    - Media file backups: Configurable retention period (default: 7 days)
    - Configuration backups: Configurable retention period (default: 90 days)
    
    This cleanup is essential for:
    - Preventing disk space exhaustion from old backups
    - Maintaining backup directory organization
    - Ensuring backup storage costs remain manageable
    - Complying with data retention policies
    
    Scheduled to run weekly via Celery beat to maintain storage efficiency.
    
    Returns:
        dict: Cleanup results with count of deleted files
    """
    logger.info("Starting backup cleanup - removing old backup files based on retention policy")
    
    try:
        # Call the core backup cleanup function which handles the actual file deletion
        # based on configured retention periods for different backup types
        cleaned_count = cleanup_old_backups()
        
        result = {
            'success': True,
            'cleaned_files': cleaned_count,
            'timestamp': timezone.now().isoformat()
        }
        
        logger.info(f"Backup cleanup completed successfully: removed {cleaned_count} old backup files")
        return result
        
    except Exception as e:
        logger.error(f"Backup cleanup failed: {str(e)}")
        return {
            'success': False,
            'error': str(e),
            'timestamp': timezone.now().isoformat()
        }


@shared_task
def create_backup():
    """
    Create a scheduled backup of database, media, and configuration files.
    
    Returns:
        dict: Backup results with file paths and status
    """
    logger.info("Starting scheduled backup")
    
    try:
        backup_results = create_scheduled_backup()
        
        if backup_results['success']:
            logger.info(f"Backup completed successfully: {backup_results}")
            
            # Cache backup status for monitoring
            cache.set('last_backup_status', backup_results, timeout=86400)  # 24 hours
            
        else:
            logger.error(f"Backup failed: {backup_results}")
        
        return backup_results
        
    except Exception as e:
        logger.error(f"Backup task failed: {str(e)}")
        return {
            'success': False,
            'error': str(e),
            'timestamp': timezone.now().isoformat()
        }


@shared_task
def cleanup_backups():
    """
    Clean up old backup files based on retention policy.
    
    Returns:
        dict: Cleanup results
    """
    logger.info("Starting backup cleanup")
    
    try:
        cleaned_count = cleanup_old_backups()
        
        result = {
            'success': True,
            'cleaned_files': cleaned_count,
            'timestamp': timezone.now().isoformat()
        }
        
        logger.info(f"Backup cleanup completed: {result}")
        return result
        
    except Exception as e:
        logger.error(f"Backup cleanup failed: {str(e)}")
        return {
            'success': False,
            'error': str(e),
            'timestamp': timezone.now().isoformat()
        }
