ALwrity/ToBeMigrated/content_scheduler/core/schedule_optimizer.py

"""
Schedule optimization system for content scheduling.
"""

import logging
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass
import numpy as np
from collections import defaultdict

# Use unified database models
from lib.database.models import ContentItem, Schedule, ScheduleStatus, ContentType, Platform, get_session

logger = logging.getLogger(__name__)

@dataclass
class OptimizationResult:
    """Result of schedule optimization."""
    original_schedule: Schedule
    optimized_time: datetime
    improvement_score: float
    optimization_reason: str
    confidence: float

class ScheduleOptimizer:
    """Optimize content scheduling for maximum engagement."""

    def __init__(self):
        """Initialize the schedule optimizer."""
        self.logger = logger
        self.session = get_session()

        # Platform-specific optimal times (can be made configurable)
        self.platform_optimal_times = {
            Platform.TWITTER: [9, 12, 15, 18],  # Hours of day
            Platform.FACEBOOK: [9, 13, 15],
            Platform.LINKEDIN: [8, 12, 17],
            Platform.INSTAGRAM: [11, 14, 17, 19],
            Platform.YOUTUBE: [14, 16, 18, 20]
        }

        # Content type engagement patterns
        self.content_type_patterns = {
            ContentType.ARTICLE: {'peak_hours': [9, 14, 16], 'duration': 2},
            ContentType.VIDEO: {'peak_hours': [12, 18, 20], 'duration': 3},
            ContentType.IMAGE: {'peak_hours': [11, 15, 19], 'duration': 1},
            ContentType.SOCIAL_POST: {'peak_hours': [8, 12, 17, 21], 'duration': 1}
        }

    def optimize_schedule(self, schedule: Schedule) -> OptimizationResult:
        """Optimize a single schedule for better engagement.

        Args:
            schedule: Schedule to optimize

        Returns:
            OptimizationResult with optimization details
        """
        try:
            # Get content item details
            content_item = self.session.query(ContentItem).filter(
                ContentItem.id == schedule.content_item_id
            ).first()

            if not content_item:
                return OptimizationResult(
                    original_schedule=schedule,
                    optimized_time=schedule.scheduled_time,
                    improvement_score=0.0,
                    optimization_reason="Content item not found",
                    confidence=0.0
                )

            # Calculate current engagement score
            current_score = self._calculate_engagement_score(
                schedule.scheduled_time,
                content_item.content_type,
                schedule.priority
            )

            # Find optimal time
            optimal_time, optimal_score = self._find_optimal_time(
                schedule,
                content_item
            )

            # Calculate improvement
            improvement_score = optimal_score - current_score
            confidence = min(improvement_score / current_score, 1.0) if current_score > 0 else 0.0

            # Generate optimization reason
            reason = self._generate_optimization_reason(
                schedule.scheduled_time,
                optimal_time,
                content_item.content_type,
                improvement_score
            )

            return OptimizationResult(
                original_schedule=schedule,
                optimized_time=optimal_time,
                improvement_score=improvement_score,
                optimization_reason=reason,
                confidence=confidence
            )

        except Exception as e:
            self.logger.error(f"Error optimizing schedule: {str(e)}")
            return OptimizationResult(
                original_schedule=schedule,
                optimized_time=schedule.scheduled_time,
                improvement_score=0.0,
                optimization_reason=f"Optimization error: {str(e)}",
                confidence=0.0
            )

    def optimize_multiple_schedules(
        self,
        schedules: List[Schedule],
        avoid_conflicts: bool = True
    ) -> List[OptimizationResult]:
        """Optimize multiple schedules considering conflicts.

        Args:
            schedules: List of schedules to optimize
            avoid_conflicts: Whether to avoid scheduling conflicts

        Returns:
            List of optimization results
        """
        try:
            results = []
            optimized_times = []

            # Sort schedules by priority (high priority first)
            sorted_schedules = sorted(schedules, key=lambda x: x.priority, reverse=True)

            for schedule in sorted_schedules:
                # Optimize individual schedule
                result = self.optimize_schedule(schedule)

                if avoid_conflicts:
                    # Check for conflicts with already optimized schedules
                    conflict_free_time = self._find_conflict_free_time(
                        result.optimized_time,
                        optimized_times,
                        schedule
                    )

                    if conflict_free_time != result.optimized_time:
                        # Recalculate scores for conflict-free time
                        content_item = self.session.query(ContentItem).filter(
                            ContentItem.id == schedule.content_item_id
                        ).first()

                        if content_item:
                            new_score = self._calculate_engagement_score(
                                conflict_free_time,
                                content_item.content_type,
                                schedule.priority
                            )

                            original_score = self._calculate_engagement_score(
                                schedule.scheduled_time,
                                content_item.content_type,
                                schedule.priority
                            )

                            result.optimized_time = conflict_free_time
                            result.improvement_score = new_score - original_score
                            result.optimization_reason += " (adjusted to avoid conflicts)"

                results.append(result)
                optimized_times.append(result.optimized_time)

            return results

        except Exception as e:
            self.logger.error(f"Error optimizing multiple schedules: {str(e)}")
            return []

    def suggest_optimal_times(
        self,
        content_type: ContentType,
        date_range: Tuple[datetime, datetime],
        count: int = 5
    ) -> List[Dict[str, Any]]:
        """Suggest optimal times for new content.

        Args:
            content_type: Type of content to schedule
            date_range: Date range to consider
            count: Number of suggestions to return

        Returns:
            List of suggested optimal times with scores
        """
        try:
            suggestions = []
            start_date, end_date = date_range

            # Generate candidate times
            current_date = start_date
            while current_date <= end_date:
                # Get optimal hours for this content type
                if content_type in self.content_type_patterns:
                    optimal_hours = self.content_type_patterns[content_type]['peak_hours']
                else:
                    optimal_hours = [9, 12, 15, 18]  # Default hours

                for hour in optimal_hours:
                    candidate_time = current_date.replace(
                        hour=hour,
                        minute=0,
                        second=0,
                        microsecond=0
                    )

                    if start_date <= candidate_time <= end_date:
                        score = self._calculate_engagement_score(
                            candidate_time,
                            content_type,
                            priority=5  # Default priority
                        )

                        suggestions.append({
                            'time': candidate_time,
                            'score': score,
                            'day_of_week': candidate_time.strftime('%A'),
                            'hour': hour,
                            'reason': self._get_time_suggestion_reason(candidate_time, content_type)
                        })

                current_date += timedelta(days=1)

            # Sort by score and return top suggestions
            suggestions.sort(key=lambda x: x['score'], reverse=True)
            return suggestions[:count]

        except Exception as e:
            self.logger.error(f"Error suggesting optimal times: {str(e)}")
            return []

    def _calculate_engagement_score(
        self,
        scheduled_time: datetime,
        content_type: ContentType,
        priority: int
    ) -> float:
        """Calculate engagement score for a given time and content type."""
        try:
            score = 0.0

            # Base score from priority
            score += priority * 10

            # Hour of day factor
            hour = scheduled_time.hour
            if content_type in self.content_type_patterns:
                optimal_hours = self.content_type_patterns[content_type]['peak_hours']
                if hour in optimal_hours:
                    score += 50
                else:
                    # Penalty for non-optimal hours
                    min_distance = min(abs(hour - oh) for oh in optimal_hours)
                    score += max(0, 30 - min_distance * 5)

            # Day of week factor
            day_of_week = scheduled_time.weekday()  # 0 = Monday, 6 = Sunday

            if content_type == ContentType.ARTICLE:
                # Articles perform better on weekdays
                if day_of_week < 5:  # Monday to Friday
                    score += 20
                else:
                    score += 5
            elif content_type == ContentType.VIDEO:
                # Videos perform better on weekends and evenings
                if day_of_week >= 5 or hour >= 18:
                    score += 25
                else:
                    score += 10
            elif content_type == ContentType.SOCIAL_POST:
                # Social posts are consistent throughout the week
                score += 15

            # Time spacing factor (avoid clustering)
            existing_schedules = self.session.query(Schedule).filter(
                Schedule.scheduled_time.between(
                    scheduled_time - timedelta(hours=2),
                    scheduled_time + timedelta(hours=2)
                )
            ).all()

            if len(existing_schedules) > 3:
                score -= len(existing_schedules) * 5

            return max(score, 0.0)

        except Exception as e:
            self.logger.error(f"Error calculating engagement score: {str(e)}")
            return 0.0

    def _find_optimal_time(
        self,
        schedule: Schedule,
        content_item: ContentItem
    ) -> Tuple[datetime, float]:
        """Find the optimal time for a schedule."""
        try:
            best_time = schedule.scheduled_time
            best_score = self._calculate_engagement_score(
                schedule.scheduled_time,
                content_item.content_type,
                schedule.priority
            )

            # Search within a week of the original time
            base_date = schedule.scheduled_time.date()

            for day_offset in range(-3, 4):  # ±3 days
                candidate_date = base_date + timedelta(days=day_offset)

                # Get optimal hours for this content type
                if content_item.content_type in self.content_type_patterns:
                    optimal_hours = self.content_type_patterns[content_item.content_type]['peak_hours']
                else:
                    optimal_hours = [9, 12, 15, 18]

                for hour in optimal_hours:
                    candidate_time = datetime.combine(candidate_date, datetime.min.time()).replace(hour=hour)

                    score = self._calculate_engagement_score(
                        candidate_time,
                        content_item.content_type,
                        schedule.priority
                    )

                    if score > best_score:
                        best_time = candidate_time
                        best_score = score

            return best_time, best_score

        except Exception as e:
            self.logger.error(f"Error finding optimal time: {str(e)}")
            return schedule.scheduled_time, 0.0

    def _find_conflict_free_time(
        self,
        preferred_time: datetime,
        existing_times: List[datetime],
        schedule: Schedule,
        min_gap: timedelta = timedelta(minutes=30)
    ) -> datetime:
        """Find a conflict-free time close to the preferred time."""
        try:
            # Check if preferred time has conflicts
            has_conflict = any(
                abs((preferred_time - existing_time).total_seconds()) < min_gap.total_seconds()
                for existing_time in existing_times
            )

            if not has_conflict:
                return preferred_time

            # Search for nearby conflict-free times
            for offset_minutes in [30, 60, 90, 120, -30, -60, -90, -120]:
                candidate_time = preferred_time + timedelta(minutes=offset_minutes)

                has_conflict = any(
                    abs((candidate_time - existing_time).total_seconds()) < min_gap.total_seconds()
                    for existing_time in existing_times
                )

                if not has_conflict:
                    return candidate_time

            # If no conflict-free time found nearby, return preferred time
            return preferred_time

        except Exception as e:
            self.logger.error(f"Error finding conflict-free time: {str(e)}")
            return preferred_time

    def _generate_optimization_reason(
        self,
        original_time: datetime,
        optimized_time: datetime,
        content_type: ContentType,
        improvement_score: float
    ) -> str:
        """Generate a human-readable optimization reason."""
        try:
            if improvement_score <= 0:
                return "Current time is already optimal"

            reasons = []

            # Time difference
            time_diff = optimized_time - original_time
            if abs(time_diff.total_seconds()) > 3600:  # More than 1 hour
                if time_diff.total_seconds() > 0:
                    reasons.append(f"Moved {time_diff.total_seconds() / 3600:.1f} hours later")
                else:
                    reasons.append(f"Moved {abs(time_diff.total_seconds()) / 3600:.1f} hours earlier")

            # Hour optimization
            original_hour = original_time.hour
            optimized_hour = optimized_time.hour

            if content_type in self.content_type_patterns:
                optimal_hours = self.content_type_patterns[content_type]['peak_hours']
                if optimized_hour in optimal_hours and original_hour not in optimal_hours:
                    reasons.append(f"Moved to peak engagement hour ({optimized_hour}:00)")

            # Day optimization
            original_day = original_time.strftime('%A')
            optimized_day = optimized_time.strftime('%A')

            if original_day != optimized_day:
                reasons.append(f"Moved from {original_day} to {optimized_day}")

            # Improvement score
            reasons.append(f"Expected {improvement_score:.1f}% engagement improvement")

            return "; ".join(reasons) if reasons else "Optimized for better engagement"

        except Exception as e:
            self.logger.error(f"Error generating optimization reason: {str(e)}")
            return "Optimized for better engagement"

    def _get_time_suggestion_reason(self, time: datetime, content_type: ContentType) -> str:
        """Get reason for suggesting a specific time."""
        try:
            reasons = []

            hour = time.hour
            day_name = time.strftime('%A')

            # Hour-based reasons
            if content_type in self.content_type_patterns:
                optimal_hours = self.content_type_patterns[content_type]['peak_hours']
                if hour in optimal_hours:
                    reasons.append(f"Peak engagement hour for {content_type.value}")

            # Day-based reasons
            if content_type == ContentType.ARTICLE and time.weekday() < 5:
                reasons.append("Weekday optimal for articles")
            elif content_type == ContentType.VIDEO and (time.weekday() >= 5 or hour >= 18):
                reasons.append("Evening/weekend optimal for videos")

            return "; ".join(reasons) if reasons else f"Good time for {content_type.value}"

        except Exception as e:
            self.logger.error(f"Error getting suggestion reason: {str(e)}")
            return "Recommended time"

    def analyze_schedule_performance(self, days_back: int = 30) -> Dict[str, Any]:
        """Analyze historical schedule performance."""
        try:
            # Get schedules from the last N days
            cutoff_date = datetime.now() - timedelta(days=days_back)

            schedules = self.session.query(Schedule).filter(
                Schedule.created_at >= cutoff_date
            ).all()

            if not schedules:
                return {'error': 'No schedules found for analysis'}

            # Analyze by hour
            hour_performance = defaultdict(list)
            day_performance = defaultdict(list)
            content_type_performance = defaultdict(list)

            for schedule in schedules:
                content_item = self.session.query(ContentItem).filter(
                    ContentItem.id == schedule.content_item_id
                ).first()

                if content_item:
                    hour = schedule.scheduled_time.hour
                    day = schedule.scheduled_time.strftime('%A')

                    # Calculate performance score (simplified)
                    performance_score = self._calculate_performance_score(schedule)

                    hour_performance[hour].append(performance_score)
                    day_performance[day].append(performance_score)
                    content_type_performance[content_item.content_type.value].append(performance_score)

            # Calculate averages
            analysis = {
                'total_schedules': len(schedules),
                'analysis_period_days': days_back,
                'best_hours': self._get_top_performers(hour_performance),
                'best_days': self._get_top_performers(day_performance),
                'content_type_performance': self._get_top_performers(content_type_performance),
                'recommendations': self._generate_performance_recommendations(
                    hour_performance,
                    day_performance,
                    content_type_performance
                )
            }

            return analysis

        except Exception as e:
            self.logger.error(f"Error analyzing schedule performance: {str(e)}")
            return {'error': str(e)}

    def _calculate_performance_score(self, schedule: Schedule) -> float:
        """Calculate a performance score for a schedule (simplified)."""
        try:
            # This is a simplified performance calculation
            # In a real implementation, this would use actual engagement metrics

            base_score = 50.0  # Base performance

            # Status-based scoring
            if schedule.status == ScheduleStatus.COMPLETED:
                base_score += 30
            elif schedule.status == ScheduleStatus.RUNNING:
                base_score += 15
            elif schedule.status == ScheduleStatus.FAILED:
                base_score -= 20

            # Priority-based scoring
            base_score += schedule.priority * 2

            return max(base_score, 0.0)

        except Exception as e:
            self.logger.error(f"Error calculating performance score: {str(e)}")
            return 0.0

    def _get_top_performers(self, performance_data: Dict[str, List[float]]) -> List[Dict[str, Any]]:
        """Get top performing items from performance data."""
        try:
            performers = []

            for key, scores in performance_data.items():
                if scores:
                    avg_score = np.mean(scores)
                    performers.append({
                        'key': key,
                        'average_score': avg_score,
                        'sample_count': len(scores)
                    })

            # Sort by average score
            performers.sort(key=lambda x: x['average_score'], reverse=True)

            return performers[:5]  # Top 5

        except Exception as e:
            self.logger.error(f"Error getting top performers: {str(e)}")
            return []

    def _generate_performance_recommendations(
        self,
        hour_performance: Dict[int, List[float]],
        day_performance: Dict[str, List[float]],
        content_type_performance: Dict[str, List[float]]
    ) -> List[str]:
        """Generate performance-based recommendations."""
        try:
            recommendations = []

            # Hour recommendations
            if hour_performance:
                best_hours = self._get_top_performers(hour_performance)
                if best_hours:
                    best_hour = best_hours[0]['key']
                    recommendations.append(f"Schedule more content around {best_hour}:00 for better performance")

            # Day recommendations
            if day_performance:
                best_days = self._get_top_performers(day_performance)
                if best_days:
                    best_day = best_days[0]['key']
                    recommendations.append(f"Consider scheduling more content on {best_day}s")

            # Content type recommendations
            if content_type_performance:
                best_types = self._get_top_performers(content_type_performance)
                if best_types:
                    best_type = best_types[0]['key']
                    recommendations.append(f"{best_type} content shows the best performance")

            return recommendations

        except Exception as e:
            self.logger.error(f"Error generating recommendations: {str(e)}")
            return []