""" Schedule optimization system for content scheduling. """ import logging from datetime import datetime, timedelta from typing import Dict, List, Any, Optional, Tuple from dataclasses import dataclass import numpy as np from collections import defaultdict # Use unified database models from lib.database.models import ContentItem, Schedule, ScheduleStatus, ContentType, Platform, get_session logger = logging.getLogger(__name__) @dataclass class OptimizationResult: """Result of schedule optimization.""" original_schedule: Schedule optimized_time: datetime improvement_score: float optimization_reason: str confidence: float class ScheduleOptimizer: """Optimize content scheduling for maximum engagement.""" def __init__(self): """Initialize the schedule optimizer.""" self.logger = logger self.session = get_session() # Platform-specific optimal times (can be made configurable) self.platform_optimal_times = { Platform.TWITTER: [9, 12, 15, 18], # Hours of day Platform.FACEBOOK: [9, 13, 15], Platform.LINKEDIN: [8, 12, 17], Platform.INSTAGRAM: [11, 14, 17, 19], Platform.YOUTUBE: [14, 16, 18, 20] } # Content type engagement patterns self.content_type_patterns = { ContentType.ARTICLE: {'peak_hours': [9, 14, 16], 'duration': 2}, ContentType.VIDEO: {'peak_hours': [12, 18, 20], 'duration': 3}, ContentType.IMAGE: {'peak_hours': [11, 15, 19], 'duration': 1}, ContentType.SOCIAL_POST: {'peak_hours': [8, 12, 17, 21], 'duration': 1} } def optimize_schedule(self, schedule: Schedule) -> OptimizationResult: """Optimize a single schedule for better engagement. Args: schedule: Schedule to optimize Returns: OptimizationResult with optimization details """ try: # Get content item details content_item = self.session.query(ContentItem).filter( ContentItem.id == schedule.content_item_id ).first() if not content_item: return OptimizationResult( original_schedule=schedule, optimized_time=schedule.scheduled_time, improvement_score=0.0, optimization_reason="Content item not found", confidence=0.0 ) # Calculate current engagement score current_score = self._calculate_engagement_score( schedule.scheduled_time, content_item.content_type, schedule.priority ) # Find optimal time optimal_time, optimal_score = self._find_optimal_time( schedule, content_item ) # Calculate improvement improvement_score = optimal_score - current_score confidence = min(improvement_score / current_score, 1.0) if current_score > 0 else 0.0 # Generate optimization reason reason = self._generate_optimization_reason( schedule.scheduled_time, optimal_time, content_item.content_type, improvement_score ) return OptimizationResult( original_schedule=schedule, optimized_time=optimal_time, improvement_score=improvement_score, optimization_reason=reason, confidence=confidence ) except Exception as e: self.logger.error(f"Error optimizing schedule: {str(e)}") return OptimizationResult( original_schedule=schedule, optimized_time=schedule.scheduled_time, improvement_score=0.0, optimization_reason=f"Optimization error: {str(e)}", confidence=0.0 ) def optimize_multiple_schedules( self, schedules: List[Schedule], avoid_conflicts: bool = True ) -> List[OptimizationResult]: """Optimize multiple schedules considering conflicts. Args: schedules: List of schedules to optimize avoid_conflicts: Whether to avoid scheduling conflicts Returns: List of optimization results """ try: results = [] optimized_times = [] # Sort schedules by priority (high priority first) sorted_schedules = sorted(schedules, key=lambda x: x.priority, reverse=True) for schedule in sorted_schedules: # Optimize individual schedule result = self.optimize_schedule(schedule) if avoid_conflicts: # Check for conflicts with already optimized schedules conflict_free_time = self._find_conflict_free_time( result.optimized_time, optimized_times, schedule ) if conflict_free_time != result.optimized_time: # Recalculate scores for conflict-free time content_item = self.session.query(ContentItem).filter( ContentItem.id == schedule.content_item_id ).first() if content_item: new_score = self._calculate_engagement_score( conflict_free_time, content_item.content_type, schedule.priority ) original_score = self._calculate_engagement_score( schedule.scheduled_time, content_item.content_type, schedule.priority ) result.optimized_time = conflict_free_time result.improvement_score = new_score - original_score result.optimization_reason += " (adjusted to avoid conflicts)" results.append(result) optimized_times.append(result.optimized_time) return results except Exception as e: self.logger.error(f"Error optimizing multiple schedules: {str(e)}") return [] def suggest_optimal_times( self, content_type: ContentType, date_range: Tuple[datetime, datetime], count: int = 5 ) -> List[Dict[str, Any]]: """Suggest optimal times for new content. Args: content_type: Type of content to schedule date_range: Date range to consider count: Number of suggestions to return Returns: List of suggested optimal times with scores """ try: suggestions = [] start_date, end_date = date_range # Generate candidate times current_date = start_date while current_date <= end_date: # Get optimal hours for this content type if content_type in self.content_type_patterns: optimal_hours = self.content_type_patterns[content_type]['peak_hours'] else: optimal_hours = [9, 12, 15, 18] # Default hours for hour in optimal_hours: candidate_time = current_date.replace( hour=hour, minute=0, second=0, microsecond=0 ) if start_date <= candidate_time <= end_date: score = self._calculate_engagement_score( candidate_time, content_type, priority=5 # Default priority ) suggestions.append({ 'time': candidate_time, 'score': score, 'day_of_week': candidate_time.strftime('%A'), 'hour': hour, 'reason': self._get_time_suggestion_reason(candidate_time, content_type) }) current_date += timedelta(days=1) # Sort by score and return top suggestions suggestions.sort(key=lambda x: x['score'], reverse=True) return suggestions[:count] except Exception as e: self.logger.error(f"Error suggesting optimal times: {str(e)}") return [] def _calculate_engagement_score( self, scheduled_time: datetime, content_type: ContentType, priority: int ) -> float: """Calculate engagement score for a given time and content type.""" try: score = 0.0 # Base score from priority score += priority * 10 # Hour of day factor hour = scheduled_time.hour if content_type in self.content_type_patterns: optimal_hours = self.content_type_patterns[content_type]['peak_hours'] if hour in optimal_hours: score += 50 else: # Penalty for non-optimal hours min_distance = min(abs(hour - oh) for oh in optimal_hours) score += max(0, 30 - min_distance * 5) # Day of week factor day_of_week = scheduled_time.weekday() # 0 = Monday, 6 = Sunday if content_type == ContentType.ARTICLE: # Articles perform better on weekdays if day_of_week < 5: # Monday to Friday score += 20 else: score += 5 elif content_type == ContentType.VIDEO: # Videos perform better on weekends and evenings if day_of_week >= 5 or hour >= 18: score += 25 else: score += 10 elif content_type == ContentType.SOCIAL_POST: # Social posts are consistent throughout the week score += 15 # Time spacing factor (avoid clustering) existing_schedules = self.session.query(Schedule).filter( Schedule.scheduled_time.between( scheduled_time - timedelta(hours=2), scheduled_time + timedelta(hours=2) ) ).all() if len(existing_schedules) > 3: score -= len(existing_schedules) * 5 return max(score, 0.0) except Exception as e: self.logger.error(f"Error calculating engagement score: {str(e)}") return 0.0 def _find_optimal_time( self, schedule: Schedule, content_item: ContentItem ) -> Tuple[datetime, float]: """Find the optimal time for a schedule.""" try: best_time = schedule.scheduled_time best_score = self._calculate_engagement_score( schedule.scheduled_time, content_item.content_type, schedule.priority ) # Search within a week of the original time base_date = schedule.scheduled_time.date() for day_offset in range(-3, 4): # ±3 days candidate_date = base_date + timedelta(days=day_offset) # Get optimal hours for this content type if content_item.content_type in self.content_type_patterns: optimal_hours = self.content_type_patterns[content_item.content_type]['peak_hours'] else: optimal_hours = [9, 12, 15, 18] for hour in optimal_hours: candidate_time = datetime.combine(candidate_date, datetime.min.time()).replace(hour=hour) score = self._calculate_engagement_score( candidate_time, content_item.content_type, schedule.priority ) if score > best_score: best_time = candidate_time best_score = score return best_time, best_score except Exception as e: self.logger.error(f"Error finding optimal time: {str(e)}") return schedule.scheduled_time, 0.0 def _find_conflict_free_time( self, preferred_time: datetime, existing_times: List[datetime], schedule: Schedule, min_gap: timedelta = timedelta(minutes=30) ) -> datetime: """Find a conflict-free time close to the preferred time.""" try: # Check if preferred time has conflicts has_conflict = any( abs((preferred_time - existing_time).total_seconds()) < min_gap.total_seconds() for existing_time in existing_times ) if not has_conflict: return preferred_time # Search for nearby conflict-free times for offset_minutes in [30, 60, 90, 120, -30, -60, -90, -120]: candidate_time = preferred_time + timedelta(minutes=offset_minutes) has_conflict = any( abs((candidate_time - existing_time).total_seconds()) < min_gap.total_seconds() for existing_time in existing_times ) if not has_conflict: return candidate_time # If no conflict-free time found nearby, return preferred time return preferred_time except Exception as e: self.logger.error(f"Error finding conflict-free time: {str(e)}") return preferred_time def _generate_optimization_reason( self, original_time: datetime, optimized_time: datetime, content_type: ContentType, improvement_score: float ) -> str: """Generate a human-readable optimization reason.""" try: if improvement_score <= 0: return "Current time is already optimal" reasons = [] # Time difference time_diff = optimized_time - original_time if abs(time_diff.total_seconds()) > 3600: # More than 1 hour if time_diff.total_seconds() > 0: reasons.append(f"Moved {time_diff.total_seconds() / 3600:.1f} hours later") else: reasons.append(f"Moved {abs(time_diff.total_seconds()) / 3600:.1f} hours earlier") # Hour optimization original_hour = original_time.hour optimized_hour = optimized_time.hour if content_type in self.content_type_patterns: optimal_hours = self.content_type_patterns[content_type]['peak_hours'] if optimized_hour in optimal_hours and original_hour not in optimal_hours: reasons.append(f"Moved to peak engagement hour ({optimized_hour}:00)") # Day optimization original_day = original_time.strftime('%A') optimized_day = optimized_time.strftime('%A') if original_day != optimized_day: reasons.append(f"Moved from {original_day} to {optimized_day}") # Improvement score reasons.append(f"Expected {improvement_score:.1f}% engagement improvement") return "; ".join(reasons) if reasons else "Optimized for better engagement" except Exception as e: self.logger.error(f"Error generating optimization reason: {str(e)}") return "Optimized for better engagement" def _get_time_suggestion_reason(self, time: datetime, content_type: ContentType) -> str: """Get reason for suggesting a specific time.""" try: reasons = [] hour = time.hour day_name = time.strftime('%A') # Hour-based reasons if content_type in self.content_type_patterns: optimal_hours = self.content_type_patterns[content_type]['peak_hours'] if hour in optimal_hours: reasons.append(f"Peak engagement hour for {content_type.value}") # Day-based reasons if content_type == ContentType.ARTICLE and time.weekday() < 5: reasons.append("Weekday optimal for articles") elif content_type == ContentType.VIDEO and (time.weekday() >= 5 or hour >= 18): reasons.append("Evening/weekend optimal for videos") return "; ".join(reasons) if reasons else f"Good time for {content_type.value}" except Exception as e: self.logger.error(f"Error getting suggestion reason: {str(e)}") return "Recommended time" def analyze_schedule_performance(self, days_back: int = 30) -> Dict[str, Any]: """Analyze historical schedule performance.""" try: # Get schedules from the last N days cutoff_date = datetime.now() - timedelta(days=days_back) schedules = self.session.query(Schedule).filter( Schedule.created_at >= cutoff_date ).all() if not schedules: return {'error': 'No schedules found for analysis'} # Analyze by hour hour_performance = defaultdict(list) day_performance = defaultdict(list) content_type_performance = defaultdict(list) for schedule in schedules: content_item = self.session.query(ContentItem).filter( ContentItem.id == schedule.content_item_id ).first() if content_item: hour = schedule.scheduled_time.hour day = schedule.scheduled_time.strftime('%A') # Calculate performance score (simplified) performance_score = self._calculate_performance_score(schedule) hour_performance[hour].append(performance_score) day_performance[day].append(performance_score) content_type_performance[content_item.content_type.value].append(performance_score) # Calculate averages analysis = { 'total_schedules': len(schedules), 'analysis_period_days': days_back, 'best_hours': self._get_top_performers(hour_performance), 'best_days': self._get_top_performers(day_performance), 'content_type_performance': self._get_top_performers(content_type_performance), 'recommendations': self._generate_performance_recommendations( hour_performance, day_performance, content_type_performance ) } return analysis except Exception as e: self.logger.error(f"Error analyzing schedule performance: {str(e)}") return {'error': str(e)} def _calculate_performance_score(self, schedule: Schedule) -> float: """Calculate a performance score for a schedule (simplified).""" try: # This is a simplified performance calculation # In a real implementation, this would use actual engagement metrics base_score = 50.0 # Base performance # Status-based scoring if schedule.status == ScheduleStatus.COMPLETED: base_score += 30 elif schedule.status == ScheduleStatus.RUNNING: base_score += 15 elif schedule.status == ScheduleStatus.FAILED: base_score -= 20 # Priority-based scoring base_score += schedule.priority * 2 return max(base_score, 0.0) except Exception as e: self.logger.error(f"Error calculating performance score: {str(e)}") return 0.0 def _get_top_performers(self, performance_data: Dict[str, List[float]]) -> List[Dict[str, Any]]: """Get top performing items from performance data.""" try: performers = [] for key, scores in performance_data.items(): if scores: avg_score = np.mean(scores) performers.append({ 'key': key, 'average_score': avg_score, 'sample_count': len(scores) }) # Sort by average score performers.sort(key=lambda x: x['average_score'], reverse=True) return performers[:5] # Top 5 except Exception as e: self.logger.error(f"Error getting top performers: {str(e)}") return [] def _generate_performance_recommendations( self, hour_performance: Dict[int, List[float]], day_performance: Dict[str, List[float]], content_type_performance: Dict[str, List[float]] ) -> List[str]: """Generate performance-based recommendations.""" try: recommendations = [] # Hour recommendations if hour_performance: best_hours = self._get_top_performers(hour_performance) if best_hours: best_hour = best_hours[0]['key'] recommendations.append(f"Schedule more content around {best_hour}:00 for better performance") # Day recommendations if day_performance: best_days = self._get_top_performers(day_performance) if best_days: best_day = best_days[0]['key'] recommendations.append(f"Consider scheduling more content on {best_day}s") # Content type recommendations if content_type_performance: best_types = self._get_top_performers(content_type_performance) if best_types: best_type = best_types[0]['key'] recommendations.append(f"{best_type} content shows the best performance") return recommendations except Exception as e: self.logger.error(f"Error generating recommendations: {str(e)}") return []