# -*- coding: utf-8 -*-
"""
Adtlas Activities Utilities

This module contains utility functions and helper classes for the activities app,
providing common functionality for activity logging, data processing, and
analysis.

Features:
    - IP address and user agent extraction
    - Activity filtering and validation
    - Data export utilities
    - Performance monitoring
    - Security helpers
    - Analytics calculations

Author: Adtlas Development Team
Version: 1.0.0
Last Updated: 2025-01-27
"""

import re
import json
import hashlib
import ipaddress
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Union, Any
from urllib.parse import urlparse

from django.conf import settings
from django.core.cache import cache
from django.utils import timezone
from django.contrib.auth import get_user_model
from django.contrib.contenttypes.models import ContentType
from django.db.models import Q, Count, Avg, Sum
from django.http import HttpRequest
from django.utils.text import slugify
from django.core.exceptions import ValidationError

# Get the user model
User = get_user_model()


def get_client_ip(request: HttpRequest) -> str:
    """
    Extract the client IP address from the request.
    
    This function handles various proxy configurations and
    header formats to accurately determine the client's IP address.
    
    Args:
        request: Django HttpRequest object
    
    Returns:
        str: Client IP address
    """
    # List of headers to check for IP address (in order of preference)
    ip_headers = [
        'HTTP_X_FORWARDED_FOR',
        'HTTP_X_REAL_IP',
        'HTTP_X_FORWARDED',
        'HTTP_X_CLUSTER_CLIENT_IP',
        'HTTP_FORWARDED_FOR',
        'HTTP_FORWARDED',
        'REMOTE_ADDR'
    ]
    
    for header in ip_headers:
        ip = request.META.get(header)
        if ip:
            # Handle comma-separated IPs (X-Forwarded-For can contain multiple IPs)
            if ',' in ip:
                ip = ip.split(',')[0].strip()
            
            # Validate IP address
            if is_valid_ip(ip):
                return ip
    
    # Fallback to REMOTE_ADDR
    return request.META.get('REMOTE_ADDR', '127.0.0.1')


def get_user_agent(request: HttpRequest) -> str:
    """
    Extract the user agent string from the request.
    
    Args:
        request: Django HttpRequest object
    
    Returns:
        str: User agent string (truncated to 500 characters)
    """
    user_agent = request.META.get('HTTP_USER_AGENT', '')
    
    # Truncate to prevent database issues
    return user_agent[:500] if user_agent else 'Unknown'


def is_valid_ip(ip: str) -> bool:
    """
    Validate if a string is a valid IP address.
    
    Args:
        ip: IP address string to validate
    
    Returns:
        bool: True if valid IP address, False otherwise
    """
    try:
        ipaddress.ip_address(ip)
        return True
    except ValueError:
        return False


def should_log_activity(model_class) -> bool:
    """
    Determine if activities should be logged for a given model.
    
    This function checks various conditions to decide whether
    model changes should be automatically logged as activities.
    
    Args:
        model_class: Django model class
    
    Returns:
        bool: True if activities should be logged, False otherwise
    """
    # Get app-specific settings
    app_label = model_class._meta.app_label
    model_name = model_class._meta.model_name
    
    # Check global settings
    if not getattr(settings, 'ACTIVITIES_AUTO_LOG_ENABLED', True):
        return False
    
    # Check excluded apps
    excluded_apps = getattr(settings, 'ACTIVITIES_EXCLUDED_APPS', [
        'sessions', 'admin', 'contenttypes', 'auth'
    ])
    
    if app_label in excluded_apps:
        return False
    
    # Check excluded models
    excluded_models = getattr(settings, 'ACTIVITIES_EXCLUDED_MODELS', [
        'activities.activity',
        'activities.activitysummary',
        'core.activitylog'
    ])
    
    model_label = f'{app_label}.{model_name}'
    if model_label in excluded_models:
        return False
    
    # Check if model has specific logging configuration
    model_config = getattr(model_class, '_activity_logging', {})
    
    if isinstance(model_config, dict):
        return model_config.get('enabled', True)
    elif isinstance(model_config, bool):
        return model_config
    
    # Default to True for included apps
    included_apps = getattr(settings, 'ACTIVITIES_INCLUDED_APPS', [
        'accounts', 'campaigns', 'channels', 'analytics', 'vast', 'playlists'
    ])
    
    return app_label in included_apps


def get_activity_context(request: HttpRequest) -> Dict[str, Any]:
    """
    Extract activity context information from the request.
    
    Args:
        request: Django HttpRequest object
    
    Returns:
        dict: Context information for activity logging
    """
    context = {
        'ip_address': get_client_ip(request),
        'user_agent': get_user_agent(request),
        'method': request.method,
        'path': request.path,
        'timestamp': timezone.now().isoformat()
    }
    
    # Add referer if available
    referer = request.META.get('HTTP_REFERER')
    if referer:
        context['referer'] = referer
    
    # Add query parameters (excluding sensitive data)
    if request.GET:
        safe_params = {}
        excluded_params = ['password', 'token', 'key', 'secret']
        
        for key, value in request.GET.items():
            if not any(excluded in key.lower() for excluded in excluded_params):
                safe_params[key] = value
        
        if safe_params:
            context['query_params'] = safe_params
    
    return context


def sanitize_metadata(metadata: Dict[str, Any]) -> Dict[str, Any]:
    """
    Sanitize metadata to remove sensitive information.
    
    Args:
        metadata: Raw metadata dictionary
    
    Returns:
        dict: Sanitized metadata
    """
    if not isinstance(metadata, dict):
        return {}
    
    sanitized = {}
    sensitive_keys = [
        'password', 'passwd', 'pwd', 'secret', 'key', 'token',
        'api_key', 'access_token', 'refresh_token', 'session_key',
        'csrf_token', 'auth_token', 'private_key', 'public_key'
    ]
    
    for key, value in metadata.items():
        # Check if key contains sensitive information
        if any(sensitive in key.lower() for sensitive in sensitive_keys):
            sanitized[key] = '[REDACTED]'
        else:
            # Recursively sanitize nested dictionaries
            if isinstance(value, dict):
                sanitized[key] = sanitize_metadata(value)
            elif isinstance(value, list):
                sanitized[key] = [sanitize_metadata(item) if isinstance(item, dict) else item for item in value]
            else:
                sanitized[key] = value
    
    return sanitized


def generate_activity_hash(user_id: Optional[int], action: str, 
                          content_type_id: Optional[int], object_id: Optional[str],
                          timestamp: datetime) -> str:
    """
    Generate a unique hash for an activity to prevent duplicates.
    
    Args:
        user_id: User ID
        action: Activity action
        content_type_id: Content type ID
        object_id: Object ID
        timestamp: Activity timestamp (rounded to minute)
    
    Returns:
        str: SHA-256 hash of the activity
    """
    # Round timestamp to minute to group similar activities
    rounded_timestamp = timestamp.replace(second=0, microsecond=0)
    
    # Create hash input
    hash_input = f"{user_id}:{action}:{content_type_id}:{object_id}:{rounded_timestamp.isoformat()}"
    
    # Generate SHA-256 hash
    return hashlib.sha256(hash_input.encode('utf-8')).hexdigest()


def is_duplicate_activity(user_id: Optional[int], action: str,
                         content_type_id: Optional[int], object_id: Optional[str],
                         timestamp: datetime, window_minutes: int = 1) -> bool:
    """
    Check if an activity is a duplicate within a time window.
    
    Args:
        user_id: User ID
        action: Activity action
        content_type_id: Content type ID
        object_id: Object ID
        timestamp: Activity timestamp
        window_minutes: Time window in minutes to check for duplicates
    
    Returns:
        bool: True if duplicate activity exists, False otherwise
    """
    # Generate activity hash
    activity_hash = generate_activity_hash(
        user_id, action, content_type_id, object_id, timestamp
    )
    
    # Check cache for recent activity
    cache_key = f'activity_hash_{activity_hash}'
    
    if cache.get(cache_key):
        return True
    
    # Set cache with expiration
    cache.set(cache_key, True, window_minutes * 60)
    
    return False


def get_activity_statistics(user_id: Optional[int] = None,
                           category_id: Optional[int] = None,
                           start_date: Optional[datetime] = None,
                           end_date: Optional[datetime] = None) -> Dict[str, Any]:
    """
    Calculate activity statistics for given parameters.
    
    Args:
        user_id: User ID to filter by (optional)
        category_id: Category ID to filter by (optional)
        start_date: Start date for filtering (optional)
        end_date: End date for filtering (optional)
    
    Returns:
        dict: Activity statistics
    """
    from .models import Activity
    
    # Build query
    queryset = Activity.objects.all()
    
    if user_id:
        queryset = queryset.filter(user_id=user_id)
    
    if category_id:
        queryset = queryset.filter(category_id=category_id)
    
    if start_date:
        queryset = queryset.filter(created_at__gte=start_date)
    
    if end_date:
        queryset = queryset.filter(created_at__lte=end_date)
    
    # Calculate statistics
    total_activities = queryset.count()
    successful_activities = queryset.filter(is_successful=True).count()
    failed_activities = queryset.filter(is_successful=False).count()
    
    # Calculate success rate
    success_rate = (successful_activities / total_activities * 100) if total_activities > 0 else 0
    
    # Calculate average duration
    avg_duration = queryset.filter(
        duration_ms__isnull=False
    ).aggregate(avg_duration=Avg('duration_ms'))['avg_duration'] or 0
    
    # Get top actions
    top_actions = list(queryset.values('action').annotate(
        count=Count('action')
    ).order_by('-count')[:10])
    
    # Get unique users count
    unique_users = queryset.filter(user__isnull=False).values('user').distinct().count()
    
    # Get unique IPs count
    unique_ips = queryset.values('ip_address').distinct().count()
    
    return {
        'total_activities': total_activities,
        'successful_activities': successful_activities,
        'failed_activities': failed_activities,
        'success_rate': round(success_rate, 2),
        'avg_duration_ms': round(avg_duration, 2),
        'top_actions': top_actions,
        'unique_users': unique_users,
        'unique_ips': unique_ips
    }


def export_activities_to_csv(queryset, filename: Optional[str] = None) -> str:
    """
    Export activities to CSV format.
    
    Args:
        queryset: Activity queryset to export
        filename: Optional filename for the export
    
    Returns:
        str: CSV content as string
    """
    import csv
    from io import StringIO
    
    output = StringIO()
    writer = csv.writer(output)
    
    # Write header
    headers = [
        'ID', 'User', 'Action', 'Description', 'Category',
        'Content Type', 'Object ID', 'IP Address', 'User Agent',
        'Is Successful', 'Duration (ms)', 'Error Message',
        'Created At', 'Metadata'
    ]
    writer.writerow(headers)
    
    # Write data
    for activity in queryset.select_related('user', 'category', 'content_type'):
        row = [
            str(activity.id),
            activity.user.email if activity.user else 'System',
            activity.get_action_display(),
            activity.description,
            activity.category.name if activity.category else '',
            str(activity.content_type) if activity.content_type else '',
            activity.object_id or '',
            activity.ip_address or '',
            activity.user_agent or '',
            'Yes' if activity.is_successful else 'No',
            activity.duration_ms or '',
            activity.error_message or '',
            activity.created_at.isoformat(),
            json.dumps(activity.metadata) if activity.metadata else '{}'
        ]
        writer.writerow(row)
    
    return output.getvalue()


def export_activities_to_json(queryset) -> str:
    """
    Export activities to JSON format.
    
    Args:
        queryset: Activity queryset to export
    
    Returns:
        str: JSON content as string
    """
    activities = []
    
    for activity in queryset.select_related('user', 'category', 'content_type'):
        activity_data = {
            'id': str(activity.id),
            'user': {
                'id': activity.user.id if activity.user else None,
                'email': activity.user.email if activity.user else None,
                'full_name': activity.user.get_full_name() if activity.user else None
            },
            'action': activity.action,
            'action_display': activity.get_action_display(),
            'description': activity.description,
            'category': {
                'id': activity.category.id if activity.category else None,
                'name': activity.category.name if activity.category else None,
                'code': activity.category.code if activity.category else None
            },
            'content_type': {
                'id': activity.content_type.id if activity.content_type else None,
                'app_label': activity.content_type.app_label if activity.content_type else None,
                'model': activity.content_type.model if activity.content_type else None
            },
            'object_id': activity.object_id,
            'ip_address': activity.ip_address,
            'user_agent': activity.user_agent,
            'is_successful': activity.is_successful,
            'duration_ms': activity.duration_ms,
            'error_message': activity.error_message,
            'metadata': activity.metadata,
            'created_at': activity.created_at.isoformat(),
            'updated_at': activity.updated_at.isoformat()
        }
        activities.append(activity_data)
    
    return json.dumps(activities, indent=2, ensure_ascii=False)


def validate_activity_data(data: Dict[str, Any]) -> Dict[str, Any]:
    """
    Validate activity data before logging.
    
    Args:
        data: Activity data dictionary
    
    Returns:
        dict: Validated and cleaned activity data
    
    Raises:
        ValidationError: If data is invalid
    """
    errors = {}
    
    # Validate required fields
    if not data.get('action'):
        errors['action'] = 'Action is required'
    
    if not data.get('description'):
        errors['description'] = 'Description is required'
    
    # Validate action format
    action = data.get('action', '')
    if action and not re.match(r'^[a-z_]+$', action):
        errors['action'] = 'Action must contain only lowercase letters and underscores'
    
    # Validate IP address
    ip_address = data.get('ip_address')
    if ip_address and not is_valid_ip(ip_address):
        errors['ip_address'] = 'Invalid IP address format'
    
    # Validate duration
    duration_ms = data.get('duration_ms')
    if duration_ms is not None:
        try:
            duration_ms = float(duration_ms)
            if duration_ms < 0:
                errors['duration_ms'] = 'Duration cannot be negative'
            elif duration_ms > 3600000:  # 1 hour in milliseconds
                errors['duration_ms'] = 'Duration cannot exceed 1 hour'
        except (ValueError, TypeError):
            errors['duration_ms'] = 'Duration must be a valid number'
    
    # Validate metadata
    metadata = data.get('metadata')
    if metadata is not None:
        if not isinstance(metadata, dict):
            errors['metadata'] = 'Metadata must be a dictionary'
        else:
            # Check metadata size
            metadata_str = json.dumps(metadata)
            if len(metadata_str) > 10000:  # 10KB limit
                errors['metadata'] = 'Metadata is too large (max 10KB)'
    
    if errors:
        raise ValidationError(errors)
    
    # Clean and return data
    cleaned_data = data.copy()
    
    # Sanitize metadata
    if metadata:
        cleaned_data['metadata'] = sanitize_metadata(metadata)
    
    # Truncate long strings
    if cleaned_data.get('description'):
        cleaned_data['description'] = cleaned_data['description'][:1000]
    
    if cleaned_data.get('error_message'):
        cleaned_data['error_message'] = cleaned_data['error_message'][:1000]
    
    if cleaned_data.get('user_agent'):
        cleaned_data['user_agent'] = cleaned_data['user_agent'][:500]
    
    return cleaned_data


def get_user_activity_summary(user_id: int, days: int = 30) -> Dict[str, Any]:
    """
    Get activity summary for a specific user.
    
    Args:
        user_id: User ID
        days: Number of days to include in summary
    
    Returns:
        dict: User activity summary
    """
    from .models import Activity
    
    end_date = timezone.now()
    start_date = end_date - timedelta(days=days)
    
    # Get user activities
    activities = Activity.objects.filter(
        user_id=user_id,
        created_at__gte=start_date,
        created_at__lte=end_date
    )
    
    # Calculate statistics
    total_activities = activities.count()
    successful_activities = activities.filter(is_successful=True).count()
    failed_activities = activities.filter(is_successful=False).count()
    
    # Get activity breakdown by category
    category_breakdown = list(activities.values(
        'category__name', 'category__code'
    ).annotate(
        count=Count('id')
    ).order_by('-count'))
    
    # Get activity breakdown by action
    action_breakdown = list(activities.values(
        'action'
    ).annotate(
        count=Count('id')
    ).order_by('-count')[:10])
    
    # Get daily activity counts
    daily_activities = []
    for i in range(days):
        day = start_date + timedelta(days=i)
        day_activities = activities.filter(
            created_at__date=day.date()
        ).count()
        daily_activities.append({
            'date': day.date().isoformat(),
            'count': day_activities
        })
    
    # Get most active hours
    hourly_activities = list(activities.extra(
        select={'hour': 'EXTRACT(hour FROM created_at)'}
    ).values('hour').annotate(
        count=Count('id')
    ).order_by('hour'))
    
    return {
        'user_id': user_id,
        'period_days': days,
        'total_activities': total_activities,
        'successful_activities': successful_activities,
        'failed_activities': failed_activities,
        'success_rate': (successful_activities / total_activities * 100) if total_activities > 0 else 0,
        'category_breakdown': category_breakdown,
        'action_breakdown': action_breakdown,
        'daily_activities': daily_activities,
        'hourly_activities': hourly_activities
    }


def cleanup_old_activities(days_to_keep: int = 90) -> int:
    """
    Clean up old activities based on retention policy.
    
    Args:
        days_to_keep: Number of days to keep activities
    
    Returns:
        int: Number of activities deleted
    """
    from .models import Activity
    
    cutoff_date = timezone.now() - timedelta(days=days_to_keep)
    
    # Delete old activities
    deleted_count, _ = Activity.objects.filter(
        created_at__lt=cutoff_date
    ).delete()
    
    return deleted_count


def get_security_alerts(hours: int = 24) -> List[Dict[str, Any]]:
    """
    Get security-related activity alerts.
    
    Args:
        hours: Number of hours to check for alerts
    
    Returns:
        list: List of security alerts
    """
    from .models import Activity
    
    alerts = []
    end_time = timezone.now()
    start_time = end_time - timedelta(hours=hours)
    
    # Check for multiple failed login attempts
    failed_logins = Activity.objects.filter(
        action='login_failed',
        created_at__gte=start_time,
        created_at__lte=end_time
    ).values('ip_address').annotate(
        count=Count('id')
    ).filter(count__gte=5)
    
    for failed_login in failed_logins:
        alerts.append({
            'type': 'multiple_failed_logins',
            'severity': 'high',
            'ip_address': failed_login['ip_address'],
            'count': failed_login['count'],
            'message': f"Multiple failed login attempts ({failed_login['count']}) from IP {failed_login['ip_address']}"
        })
    
    # Check for suspicious activity patterns
    suspicious_ips = Activity.objects.filter(
        created_at__gte=start_time,
        created_at__lte=end_time
    ).values('ip_address').annotate(
        count=Count('id'),
        user_count=Count('user', distinct=True)
    ).filter(count__gte=100, user_count__gte=5)
    
    for suspicious_ip in suspicious_ips:
        alerts.append({
            'type': 'suspicious_activity',
            'severity': 'medium',
            'ip_address': suspicious_ip['ip_address'],
            'activity_count': suspicious_ip['count'],
            'user_count': suspicious_ip['user_count'],
            'message': f"Suspicious activity from IP {suspicious_ip['ip_address']}: {suspicious_ip['count']} activities across {suspicious_ip['user_count']} users"
        })
    
    return alerts