alwrity chatbot assistant, content scheduler, and content repurposing

This commit is contained in:
ajaysi
2025-06-02 00:00:18 +05:30
parent 889021c078
commit 5ca2fd5977
69 changed files with 13952 additions and 3279 deletions

View File

@@ -0,0 +1,403 @@
"""
Conflict resolution system for content scheduling.
"""
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass
# Use unified database models
from lib.database.models import ContentItem, Schedule, ScheduleStatus
logger = logging.getLogger(__name__)
@dataclass
class ConflictInfo:
"""Information about a scheduling conflict."""
schedule_1: Schedule
schedule_2: Schedule
conflict_type: str
severity: str
description: str
suggested_resolution: str
class ConflictResolver:
"""Resolve scheduling conflicts automatically."""
def __init__(self):
"""Initialize the conflict resolver."""
self.logger = logger
self.resolution_strategies = {
'time_overlap': self._resolve_time_overlap,
'platform_conflict': self._resolve_platform_conflict,
'resource_conflict': self._resolve_resource_conflict,
'priority_conflict': self._resolve_priority_conflict
}
def detect_conflicts(self, schedules: List[Schedule]) -> List[ConflictInfo]:
"""Detect conflicts between schedules.
Args:
schedules: List of Schedule objects to check
Returns:
List of detected conflicts
"""
try:
conflicts = []
# Sort schedules by time
sorted_schedules = sorted(schedules, key=lambda x: x.scheduled_time)
for i in range(len(sorted_schedules)):
for j in range(i + 1, len(sorted_schedules)):
schedule_1 = sorted_schedules[i]
schedule_2 = sorted_schedules[j]
# Check for time overlap conflicts
time_conflicts = self._check_time_overlap(schedule_1, schedule_2)
conflicts.extend(time_conflicts)
# Check for platform conflicts
platform_conflicts = self._check_platform_conflict(schedule_1, schedule_2)
conflicts.extend(platform_conflicts)
# Check for priority conflicts
priority_conflicts = self._check_priority_conflict(schedule_1, schedule_2)
conflicts.extend(priority_conflicts)
return conflicts
except Exception as e:
self.logger.error(f"Error detecting conflicts: {str(e)}")
return []
def _check_time_overlap(self, schedule_1: Schedule, schedule_2: Schedule) -> List[ConflictInfo]:
"""Check for time overlap conflicts."""
conflicts = []
try:
# Assume each schedule takes 1 hour (can be made configurable)
duration = timedelta(hours=1)
end_1 = schedule_1.scheduled_time + duration
end_2 = schedule_2.scheduled_time + duration
# Check for overlap
if (schedule_1.scheduled_time < end_2 and end_1 > schedule_2.scheduled_time):
time_diff = abs((schedule_2.scheduled_time - schedule_1.scheduled_time).total_seconds() / 60)
severity = 'high' if time_diff < 30 else 'medium'
conflicts.append(ConflictInfo(
schedule_1=schedule_1,
schedule_2=schedule_2,
conflict_type='time_overlap',
severity=severity,
description=f"Schedules overlap by {60 - time_diff:.0f} minutes",
suggested_resolution=f"Move one schedule by at least {60 - time_diff + 15:.0f} minutes"
))
except Exception as e:
self.logger.error(f"Error checking time overlap: {str(e)}")
return conflicts
def _check_platform_conflict(self, schedule_1: Schedule, schedule_2: Schedule) -> List[ConflictInfo]:
"""Check for platform conflicts."""
conflicts = []
try:
# This is a placeholder - platform conflicts would depend on specific platform limitations
# For now, we'll check if schedules are too close on the same platform
time_diff = abs((schedule_2.scheduled_time - schedule_1.scheduled_time).total_seconds() / 60)
# If schedules are within 15 minutes, it might be a platform conflict
if time_diff < 15:
conflicts.append(ConflictInfo(
schedule_1=schedule_1,
schedule_2=schedule_2,
conflict_type='platform_conflict',
severity='medium',
description=f"Schedules too close for optimal platform performance",
suggested_resolution="Space schedules at least 15 minutes apart"
))
except Exception as e:
self.logger.error(f"Error checking platform conflict: {str(e)}")
return conflicts
def _check_priority_conflict(self, schedule_1: Schedule, schedule_2: Schedule) -> List[ConflictInfo]:
"""Check for priority conflicts."""
conflicts = []
try:
# Check if high priority items are scheduled too close to low priority items
if schedule_1.priority > 7 and schedule_2.priority < 4:
time_diff = abs((schedule_2.scheduled_time - schedule_1.scheduled_time).total_seconds() / 60)
if time_diff < 60: # Within 1 hour
conflicts.append(ConflictInfo(
schedule_1=schedule_1,
schedule_2=schedule_2,
conflict_type='priority_conflict',
severity='low',
description="High priority content scheduled close to low priority content",
suggested_resolution="Consider spacing high and low priority content further apart"
))
except Exception as e:
self.logger.error(f"Error checking priority conflict: {str(e)}")
return conflicts
def resolve_conflicts(self, conflicts: List[ConflictInfo]) -> Dict[str, Any]:
"""Resolve detected conflicts automatically.
Args:
conflicts: List of conflicts to resolve
Returns:
Dictionary containing resolution results
"""
try:
resolved_conflicts = []
unresolved_conflicts = []
schedule_adjustments = {}
for conflict in conflicts:
try:
# Get resolution strategy
strategy = self.resolution_strategies.get(conflict.conflict_type)
if strategy:
resolution = strategy(conflict)
if resolution['success']:
resolved_conflicts.append({
'conflict': conflict,
'resolution': resolution
})
# Track schedule adjustments
for schedule_id, adjustments in resolution.get('adjustments', {}).items():
if schedule_id not in schedule_adjustments:
schedule_adjustments[schedule_id] = {}
schedule_adjustments[schedule_id].update(adjustments)
else:
unresolved_conflicts.append(conflict)
else:
unresolved_conflicts.append(conflict)
except Exception as e:
self.logger.error(f"Error resolving conflict: {str(e)}")
unresolved_conflicts.append(conflict)
return {
'resolved_conflicts': resolved_conflicts,
'unresolved_conflicts': unresolved_conflicts,
'schedule_adjustments': schedule_adjustments,
'success_rate': len(resolved_conflicts) / len(conflicts) if conflicts else 1.0
}
except Exception as e:
self.logger.error(f"Error resolving conflicts: {str(e)}")
return {
'resolved_conflicts': [],
'unresolved_conflicts': conflicts,
'schedule_adjustments': {},
'success_rate': 0.0
}
def _resolve_time_overlap(self, conflict: ConflictInfo) -> Dict[str, Any]:
"""Resolve time overlap conflicts."""
try:
# Strategy: Move the lower priority schedule
schedule_1 = conflict.schedule_1
schedule_2 = conflict.schedule_2
# Determine which schedule to move
if schedule_1.priority >= schedule_2.priority:
schedule_to_move = schedule_2
anchor_schedule = schedule_1
else:
schedule_to_move = schedule_1
anchor_schedule = schedule_2
# Calculate new time (move 1.5 hours after anchor)
new_time = anchor_schedule.scheduled_time + timedelta(hours=1.5)
return {
'success': True,
'strategy': 'move_lower_priority',
'adjustments': {
str(schedule_to_move.id): {
'new_scheduled_time': new_time,
'reason': 'Resolved time overlap conflict'
}
},
'description': f"Moved schedule {schedule_to_move.id} to {new_time}"
}
except Exception as e:
self.logger.error(f"Error resolving time overlap: {str(e)}")
return {'success': False, 'error': str(e)}
def _resolve_platform_conflict(self, conflict: ConflictInfo) -> Dict[str, Any]:
"""Resolve platform conflicts."""
try:
# Strategy: Space schedules 20 minutes apart
schedule_1 = conflict.schedule_1
schedule_2 = conflict.schedule_2
# Move the later schedule
if schedule_1.scheduled_time < schedule_2.scheduled_time:
schedule_to_move = schedule_2
anchor_time = schedule_1.scheduled_time
else:
schedule_to_move = schedule_1
anchor_time = schedule_2.scheduled_time
new_time = anchor_time + timedelta(minutes=20)
return {
'success': True,
'strategy': 'space_schedules',
'adjustments': {
str(schedule_to_move.id): {
'new_scheduled_time': new_time,
'reason': 'Resolved platform conflict'
}
},
'description': f"Spaced schedule {schedule_to_move.id} to {new_time}"
}
except Exception as e:
self.logger.error(f"Error resolving platform conflict: {str(e)}")
return {'success': False, 'error': str(e)}
def _resolve_resource_conflict(self, conflict: ConflictInfo) -> Dict[str, Any]:
"""Resolve resource conflicts."""
try:
# This is a placeholder for resource conflict resolution
return {
'success': False,
'reason': 'Resource conflict resolution not implemented'
}
except Exception as e:
self.logger.error(f"Error resolving resource conflict: {str(e)}")
return {'success': False, 'error': str(e)}
def _resolve_priority_conflict(self, conflict: ConflictInfo) -> Dict[str, Any]:
"""Resolve priority conflicts."""
try:
# Strategy: Move low priority content away from high priority content
schedule_1 = conflict.schedule_1
schedule_2 = conflict.schedule_2
# Identify high and low priority schedules
if schedule_1.priority > schedule_2.priority:
high_priority = schedule_1
low_priority = schedule_2
else:
high_priority = schedule_2
low_priority = schedule_1
# Move low priority content 2 hours away
new_time = high_priority.scheduled_time + timedelta(hours=2)
return {
'success': True,
'strategy': 'separate_priorities',
'adjustments': {
str(low_priority.id): {
'new_scheduled_time': new_time,
'reason': 'Resolved priority conflict'
}
},
'description': f"Moved low priority schedule {low_priority.id} to {new_time}"
}
except Exception as e:
self.logger.error(f"Error resolving priority conflict: {str(e)}")
return {'success': False, 'error': str(e)}
def suggest_optimal_schedule(
self,
new_schedule: Schedule,
existing_schedules: List[Schedule]
) -> Dict[str, Any]:
"""Suggest optimal scheduling for new content.
Args:
new_schedule: New schedule to optimize
existing_schedules: List of existing schedules
Returns:
Dictionary containing optimization suggestions
"""
try:
suggestions = []
# Check for conflicts with proposed time
all_schedules = existing_schedules + [new_schedule]
conflicts = self.detect_conflicts(all_schedules)
if not conflicts:
return {
'optimal_time': new_schedule.scheduled_time,
'conflicts': [],
'suggestions': ['Current time is optimal']
}
# Generate alternative times
base_time = new_schedule.scheduled_time
alternative_times = []
# Try different time slots
for hours_offset in [1, 2, 3, -1, -2, -3]:
alt_time = base_time + timedelta(hours=hours_offset)
alt_schedule = Schedule(
content_item_id=new_schedule.content_item_id,
scheduled_time=alt_time,
status=new_schedule.status,
recurrence=new_schedule.recurrence,
priority=new_schedule.priority
)
# Check conflicts for this alternative
alt_conflicts = self.detect_conflicts(existing_schedules + [alt_schedule])
alternative_times.append({
'time': alt_time,
'conflicts': len(alt_conflicts),
'severity': max([c.severity for c in alt_conflicts], default='none')
})
# Sort by number of conflicts and severity
alternative_times.sort(key=lambda x: (x['conflicts'], x['severity']))
optimal_time = alternative_times[0]['time'] if alternative_times else new_schedule.scheduled_time
return {
'optimal_time': optimal_time,
'conflicts': conflicts,
'alternatives': alternative_times[:3], # Top 3 alternatives
'suggestions': [
f"Consider scheduling at {optimal_time}",
f"Current time has {len(conflicts)} conflicts",
"Review alternative times for better optimization"
]
}
except Exception as e:
self.logger.error(f"Error suggesting optimal schedule: {str(e)}")
return {
'optimal_time': new_schedule.scheduled_time,
'conflicts': [],
'suggestions': ['Error occurred during optimization']
}

View File

@@ -0,0 +1,584 @@
"""
Schedule health monitoring system.
"""
import logging
import asyncio
from typing import Dict, Any, List, Optional
from datetime import datetime, timedelta
from dataclasses import dataclass
from enum import Enum
from ..utils.error_handling import SchedulingError
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
class HealthStatus(Enum):
"""Health check status."""
HEALTHY = "healthy"
WARNING = "warning"
CRITICAL = "critical"
UNKNOWN = "unknown"
@dataclass
class HealthCheck:
"""Health check result."""
component: str
status: HealthStatus
message: str
details: Dict[str, Any]
timestamp: datetime
class ScheduleHealthChecker:
"""Schedule health monitoring system."""
def __init__(
self,
scheduler,
check_interval: int = 300, # 5 minutes
warning_threshold: int = 3,
critical_threshold: int = 5
):
"""Initialize the health checker.
Args:
scheduler: ContentScheduler instance
check_interval: Health check interval in seconds
warning_threshold: Number of failures before warning
critical_threshold: Number of failures before critical
"""
self.logger = logger
self.scheduler = scheduler
self.check_interval = check_interval
self.warning_threshold = warning_threshold
self.critical_threshold = critical_threshold
# Initialize health check history
self.health_history = []
# Initialize failure counters
self.failure_counts = {
'job_execution': 0,
'platform_publish': 0,
'schedule_conflicts': 0,
'resource_usage': 0
}
# Initialize monitoring task
self.monitoring_task = None
async def start_monitoring(self):
"""Start the health monitoring system."""
try:
if not self.monitoring_task:
self.monitoring_task = asyncio.create_task(self._monitor_health())
self.logger.info("Health monitoring started")
except Exception as e:
self.logger.error(f"Failed to start health monitoring: {str(e)}")
raise SchedulingError(f"Health monitoring start failed: {str(e)}")
async def stop_monitoring(self):
"""Stop the health monitoring system."""
try:
if self.monitoring_task:
self.monitoring_task.cancel()
self.monitoring_task = None
self.logger.info("Health monitoring stopped")
except Exception as e:
self.logger.error(f"Failed to stop health monitoring: {str(e)}")
raise SchedulingError(f"Health monitoring stop failed: {str(e)}")
async def _monitor_health(self):
"""Monitor system health periodically."""
while True:
try:
# Perform health checks
health_checks = await self._perform_health_checks()
# Update health history
self.health_history.extend(health_checks)
# Trim history if too long
if len(self.health_history) > 1000:
self.health_history = self.health_history[-1000:]
# Check for critical issues
critical_checks = [
check for check in health_checks
if check.status == HealthStatus.CRITICAL
]
if critical_checks:
await self._handle_critical_issues(critical_checks)
# Wait for next check
await asyncio.sleep(self.check_interval)
except asyncio.CancelledError:
break
except Exception as e:
self.logger.error(f"Health monitoring error: {str(e)}")
await asyncio.sleep(self.check_interval)
async def _perform_health_checks(self) -> List[HealthCheck]:
"""Perform all health checks.
Returns:
List of health check results
"""
checks = []
try:
# Check scheduler status
checks.append(await self._check_scheduler_status())
# Check job execution
checks.append(await self._check_job_execution())
# Check platform connectivity
checks.append(await self._check_platform_connectivity())
# Check resource usage
checks.append(await self._check_resource_usage())
# Check schedule conflicts
checks.append(await self._check_schedule_conflicts())
# Check database connection
checks.append(await self._check_database_connection())
# Check job store
checks.append(await self._check_job_store())
return checks
except Exception as e:
self.logger.error(f"Health check failed: {str(e)}")
return [
HealthCheck(
component="health_checker",
status=HealthStatus.CRITICAL,
message=f"Health check system error: {str(e)}",
details={'error': str(e)},
timestamp=datetime.utcnow()
)
]
async def _check_scheduler_status(self) -> HealthCheck:
"""Check scheduler status.
Returns:
Health check result
"""
try:
is_running = self.scheduler.scheduler.running
job_count = len(self.scheduler.scheduler.get_jobs())
if not is_running:
return HealthCheck(
component="scheduler",
status=HealthStatus.CRITICAL,
message="Scheduler is not running",
details={'job_count': job_count},
timestamp=datetime.utcnow()
)
return HealthCheck(
component="scheduler",
status=HealthStatus.HEALTHY,
message="Scheduler is running",
details={'job_count': job_count},
timestamp=datetime.utcnow()
)
except Exception as e:
return HealthCheck(
component="scheduler",
status=HealthStatus.CRITICAL,
message=f"Scheduler check failed: {str(e)}",
details={'error': str(e)},
timestamp=datetime.utcnow()
)
async def _check_job_execution(self) -> HealthCheck:
"""Check job execution health.
Returns:
Health check result
"""
try:
# Get recent job history
recent_jobs = [
job for job in self.scheduler.job_status.values()
if datetime.utcnow() - job['created_at'] < timedelta(hours=24)
]
# Calculate failure rate
total_jobs = len(recent_jobs)
failed_jobs = len([
job for job in recent_jobs
if job['status'] == 'FAILED'
])
failure_rate = failed_jobs / total_jobs if total_jobs > 0 else 0
# Update failure counter
self.failure_counts['job_execution'] = failed_jobs
if failure_rate >= 0.2: # 20% failure rate
return HealthCheck(
component="job_execution",
status=HealthStatus.CRITICAL,
message="High job failure rate detected",
details={
'total_jobs': total_jobs,
'failed_jobs': failed_jobs,
'failure_rate': failure_rate
},
timestamp=datetime.utcnow()
)
elif failure_rate >= 0.1: # 10% failure rate
return HealthCheck(
component="job_execution",
status=HealthStatus.WARNING,
message="Elevated job failure rate",
details={
'total_jobs': total_jobs,
'failed_jobs': failed_jobs,
'failure_rate': failure_rate
},
timestamp=datetime.utcnow()
)
return HealthCheck(
component="job_execution",
status=HealthStatus.HEALTHY,
message="Job execution is healthy",
details={
'total_jobs': total_jobs,
'failed_jobs': failed_jobs,
'failure_rate': failure_rate
},
timestamp=datetime.utcnow()
)
except Exception as e:
return HealthCheck(
component="job_execution",
status=HealthStatus.CRITICAL,
message=f"Job execution check failed: {str(e)}",
details={'error': str(e)},
timestamp=datetime.utcnow()
)
async def _check_platform_connectivity(self) -> HealthCheck:
"""Check platform connectivity.
Returns:
Health check result
"""
try:
# Get unique platforms from recent jobs
platforms = set()
for job in self.scheduler.job_status.values():
if 'schedule' in job:
platforms.update(job['schedule'].platforms)
# Check each platform
platform_status = {}
for platform in platforms:
try:
adapter = self.scheduler._get_platform_adapter(platform)
# Try to get platform status
status = await adapter.get_platform_status()
platform_status[platform] = status['status']
except Exception as e:
platform_status[platform] = 'error'
self.failure_counts['platform_publish'] += 1
# Check overall status
if any(status == 'error' for status in platform_status.values()):
return HealthCheck(
component="platform_connectivity",
status=HealthStatus.CRITICAL,
message="Platform connectivity issues detected",
details={'platform_status': platform_status},
timestamp=datetime.utcnow()
)
return HealthCheck(
component="platform_connectivity",
status=HealthStatus.HEALTHY,
message="Platform connectivity is healthy",
details={'platform_status': platform_status},
timestamp=datetime.utcnow()
)
except Exception as e:
return HealthCheck(
component="platform_connectivity",
status=HealthStatus.CRITICAL,
message=f"Platform connectivity check failed: {str(e)}",
details={'error': str(e)},
timestamp=datetime.utcnow()
)
async def _check_resource_usage(self) -> HealthCheck:
"""Check system resource usage.
Returns:
Health check result
"""
try:
import psutil
# Get system metrics
cpu_percent = psutil.cpu_percent()
memory_percent = psutil.virtual_memory().percent
disk_percent = psutil.disk_usage('/').percent
# Check thresholds
if cpu_percent > 90 or memory_percent > 90 or disk_percent > 90:
self.failure_counts['resource_usage'] += 1
return HealthCheck(
component="resource_usage",
status=HealthStatus.CRITICAL,
message="High resource usage detected",
details={
'cpu_percent': cpu_percent,
'memory_percent': memory_percent,
'disk_percent': disk_percent
},
timestamp=datetime.utcnow()
)
elif cpu_percent > 70 or memory_percent > 70 or disk_percent > 70:
return HealthCheck(
component="resource_usage",
status=HealthStatus.WARNING,
message="Elevated resource usage",
details={
'cpu_percent': cpu_percent,
'memory_percent': memory_percent,
'disk_percent': disk_percent
},
timestamp=datetime.utcnow()
)
return HealthCheck(
component="resource_usage",
status=HealthStatus.HEALTHY,
message="Resource usage is healthy",
details={
'cpu_percent': cpu_percent,
'memory_percent': memory_percent,
'disk_percent': disk_percent
},
timestamp=datetime.utcnow()
)
except Exception as e:
return HealthCheck(
component="resource_usage",
status=HealthStatus.CRITICAL,
message=f"Resource usage check failed: {str(e)}",
details={'error': str(e)},
timestamp=datetime.utcnow()
)
async def _check_schedule_conflicts(self) -> HealthCheck:
"""Check for schedule conflicts.
Returns:
Health check result
"""
try:
# Get all pending schedules
pending_schedules = [
job['schedule'] for job in self.scheduler.job_status.values()
if job['status'] == 'PENDING'
]
# Check for conflicts
conflicts = await self.scheduler.conflict_resolver.detect_conflicts(
pending_schedules
)
if conflicts:
self.failure_counts['schedule_conflicts'] += len(conflicts)
return HealthCheck(
component="schedule_conflicts",
status=HealthStatus.WARNING,
message="Schedule conflicts detected",
details={
'conflict_count': len(conflicts),
'conflicts': [c.dict() for c in conflicts]
},
timestamp=datetime.utcnow()
)
return HealthCheck(
component="schedule_conflicts",
status=HealthStatus.HEALTHY,
message="No schedule conflicts detected",
details={'conflict_count': 0},
timestamp=datetime.utcnow()
)
except Exception as e:
return HealthCheck(
component="schedule_conflicts",
status=HealthStatus.CRITICAL,
message=f"Schedule conflict check failed: {str(e)}",
details={'error': str(e)},
timestamp=datetime.utcnow()
)
async def _check_database_connection(self) -> HealthCheck:
"""Check database connection health.
Returns:
Health check result
"""
try:
session = self.scheduler.Session()
session.execute("SELECT 1")
session.close()
return HealthCheck(
component="database",
status=HealthStatus.HEALTHY,
message="Database connection is healthy",
details={},
timestamp=datetime.utcnow()
)
except Exception as e:
return HealthCheck(
component="database",
status=HealthStatus.CRITICAL,
message=f"Database connection failed: {str(e)}",
details={'error': str(e)},
timestamp=datetime.utcnow()
)
async def _check_job_store(self) -> HealthCheck:
"""Check job store health.
Returns:
Health check result
"""
try:
# Get job store statistics
job_count = len(self.scheduler.scheduler.get_jobs())
store_size = len(self.scheduler.job_status)
if job_count != store_size:
return HealthCheck(
component="job_store",
status=HealthStatus.WARNING,
message="Job store inconsistency detected",
details={
'job_count': job_count,
'store_size': store_size
},
timestamp=datetime.utcnow()
)
return HealthCheck(
component="job_store",
status=HealthStatus.HEALTHY,
message="Job store is healthy",
details={
'job_count': job_count,
'store_size': store_size
},
timestamp=datetime.utcnow()
)
except Exception as e:
return HealthCheck(
component="job_store",
status=HealthStatus.CRITICAL,
message=f"Job store check failed: {str(e)}",
details={'error': str(e)},
timestamp=datetime.utcnow()
)
async def _handle_critical_issues(self, critical_checks: List[HealthCheck]):
"""Handle critical health issues.
Args:
critical_checks: List of critical health checks
"""
try:
# Log critical issues
for check in critical_checks:
self.logger.error(
f"Critical health issue in {check.component}: {check.message}"
)
# Attempt recovery actions
for check in critical_checks:
if check.component == "scheduler" and not self.scheduler.scheduler.running:
await self.scheduler.start()
elif check.component == "database":
# Attempt to reconnect
self.scheduler.engine.dispose()
self.scheduler.engine = create_engine(self.scheduler.db_url)
self.scheduler.Session = sessionmaker(bind=self.scheduler.engine)
elif check.component == "job_store":
# Attempt to recover job store
await self.scheduler._recover_jobs()
# Reset failure counters if recovery successful
self.failure_counts = {k: 0 for k in self.failure_counts}
except Exception as e:
self.logger.error(f"Failed to handle critical issues: {str(e)}")
def get_health_summary(self) -> Dict[str, Any]:
"""Get health check summary.
Returns:
Dictionary containing health summary
"""
try:
# Get latest health checks
latest_checks = {
check.component: check
for check in self.health_history[-len(self.health_history):]
}
# Calculate overall status
if any(check.status == HealthStatus.CRITICAL for check in latest_checks.values()):
overall_status = HealthStatus.CRITICAL
elif any(check.status == HealthStatus.WARNING for check in latest_checks.values()):
overall_status = HealthStatus.WARNING
else:
overall_status = HealthStatus.HEALTHY
return {
'status': overall_status.value,
'components': {
component: {
'status': check.status.value,
'message': check.message,
'details': check.details,
'timestamp': check.timestamp.isoformat()
}
for component, check in latest_checks.items()
},
'failure_counts': self.failure_counts,
'last_check': datetime.utcnow().isoformat()
}
except Exception as e:
self.logger.error(f"Failed to get health summary: {str(e)}")
return {
'status': HealthStatus.UNKNOWN.value,
'error': str(e),
'last_check': datetime.utcnow().isoformat()
}

View File

@@ -0,0 +1,597 @@
"""
Schedule optimization system for content scheduling.
"""
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass
import numpy as np
from collections import defaultdict
# Use unified database models
from lib.database.models import ContentItem, Schedule, ScheduleStatus, ContentType, Platform, get_session
logger = logging.getLogger(__name__)
@dataclass
class OptimizationResult:
"""Result of schedule optimization."""
original_schedule: Schedule
optimized_time: datetime
improvement_score: float
optimization_reason: str
confidence: float
class ScheduleOptimizer:
"""Optimize content scheduling for maximum engagement."""
def __init__(self):
"""Initialize the schedule optimizer."""
self.logger = logger
self.session = get_session()
# Platform-specific optimal times (can be made configurable)
self.platform_optimal_times = {
Platform.TWITTER: [9, 12, 15, 18], # Hours of day
Platform.FACEBOOK: [9, 13, 15],
Platform.LINKEDIN: [8, 12, 17],
Platform.INSTAGRAM: [11, 14, 17, 19],
Platform.YOUTUBE: [14, 16, 18, 20]
}
# Content type engagement patterns
self.content_type_patterns = {
ContentType.ARTICLE: {'peak_hours': [9, 14, 16], 'duration': 2},
ContentType.VIDEO: {'peak_hours': [12, 18, 20], 'duration': 3},
ContentType.IMAGE: {'peak_hours': [11, 15, 19], 'duration': 1},
ContentType.SOCIAL_POST: {'peak_hours': [8, 12, 17, 21], 'duration': 1}
}
def optimize_schedule(self, schedule: Schedule) -> OptimizationResult:
"""Optimize a single schedule for better engagement.
Args:
schedule: Schedule to optimize
Returns:
OptimizationResult with optimization details
"""
try:
# Get content item details
content_item = self.session.query(ContentItem).filter(
ContentItem.id == schedule.content_item_id
).first()
if not content_item:
return OptimizationResult(
original_schedule=schedule,
optimized_time=schedule.scheduled_time,
improvement_score=0.0,
optimization_reason="Content item not found",
confidence=0.0
)
# Calculate current engagement score
current_score = self._calculate_engagement_score(
schedule.scheduled_time,
content_item.content_type,
schedule.priority
)
# Find optimal time
optimal_time, optimal_score = self._find_optimal_time(
schedule,
content_item
)
# Calculate improvement
improvement_score = optimal_score - current_score
confidence = min(improvement_score / current_score, 1.0) if current_score > 0 else 0.0
# Generate optimization reason
reason = self._generate_optimization_reason(
schedule.scheduled_time,
optimal_time,
content_item.content_type,
improvement_score
)
return OptimizationResult(
original_schedule=schedule,
optimized_time=optimal_time,
improvement_score=improvement_score,
optimization_reason=reason,
confidence=confidence
)
except Exception as e:
self.logger.error(f"Error optimizing schedule: {str(e)}")
return OptimizationResult(
original_schedule=schedule,
optimized_time=schedule.scheduled_time,
improvement_score=0.0,
optimization_reason=f"Optimization error: {str(e)}",
confidence=0.0
)
def optimize_multiple_schedules(
self,
schedules: List[Schedule],
avoid_conflicts: bool = True
) -> List[OptimizationResult]:
"""Optimize multiple schedules considering conflicts.
Args:
schedules: List of schedules to optimize
avoid_conflicts: Whether to avoid scheduling conflicts
Returns:
List of optimization results
"""
try:
results = []
optimized_times = []
# Sort schedules by priority (high priority first)
sorted_schedules = sorted(schedules, key=lambda x: x.priority, reverse=True)
for schedule in sorted_schedules:
# Optimize individual schedule
result = self.optimize_schedule(schedule)
if avoid_conflicts:
# Check for conflicts with already optimized schedules
conflict_free_time = self._find_conflict_free_time(
result.optimized_time,
optimized_times,
schedule
)
if conflict_free_time != result.optimized_time:
# Recalculate scores for conflict-free time
content_item = self.session.query(ContentItem).filter(
ContentItem.id == schedule.content_item_id
).first()
if content_item:
new_score = self._calculate_engagement_score(
conflict_free_time,
content_item.content_type,
schedule.priority
)
original_score = self._calculate_engagement_score(
schedule.scheduled_time,
content_item.content_type,
schedule.priority
)
result.optimized_time = conflict_free_time
result.improvement_score = new_score - original_score
result.optimization_reason += " (adjusted to avoid conflicts)"
results.append(result)
optimized_times.append(result.optimized_time)
return results
except Exception as e:
self.logger.error(f"Error optimizing multiple schedules: {str(e)}")
return []
def suggest_optimal_times(
self,
content_type: ContentType,
date_range: Tuple[datetime, datetime],
count: int = 5
) -> List[Dict[str, Any]]:
"""Suggest optimal times for new content.
Args:
content_type: Type of content to schedule
date_range: Date range to consider
count: Number of suggestions to return
Returns:
List of suggested optimal times with scores
"""
try:
suggestions = []
start_date, end_date = date_range
# Generate candidate times
current_date = start_date
while current_date <= end_date:
# Get optimal hours for this content type
if content_type in self.content_type_patterns:
optimal_hours = self.content_type_patterns[content_type]['peak_hours']
else:
optimal_hours = [9, 12, 15, 18] # Default hours
for hour in optimal_hours:
candidate_time = current_date.replace(
hour=hour,
minute=0,
second=0,
microsecond=0
)
if start_date <= candidate_time <= end_date:
score = self._calculate_engagement_score(
candidate_time,
content_type,
priority=5 # Default priority
)
suggestions.append({
'time': candidate_time,
'score': score,
'day_of_week': candidate_time.strftime('%A'),
'hour': hour,
'reason': self._get_time_suggestion_reason(candidate_time, content_type)
})
current_date += timedelta(days=1)
# Sort by score and return top suggestions
suggestions.sort(key=lambda x: x['score'], reverse=True)
return suggestions[:count]
except Exception as e:
self.logger.error(f"Error suggesting optimal times: {str(e)}")
return []
def _calculate_engagement_score(
self,
scheduled_time: datetime,
content_type: ContentType,
priority: int
) -> float:
"""Calculate engagement score for a given time and content type."""
try:
score = 0.0
# Base score from priority
score += priority * 10
# Hour of day factor
hour = scheduled_time.hour
if content_type in self.content_type_patterns:
optimal_hours = self.content_type_patterns[content_type]['peak_hours']
if hour in optimal_hours:
score += 50
else:
# Penalty for non-optimal hours
min_distance = min(abs(hour - oh) for oh in optimal_hours)
score += max(0, 30 - min_distance * 5)
# Day of week factor
day_of_week = scheduled_time.weekday() # 0 = Monday, 6 = Sunday
if content_type == ContentType.ARTICLE:
# Articles perform better on weekdays
if day_of_week < 5: # Monday to Friday
score += 20
else:
score += 5
elif content_type == ContentType.VIDEO:
# Videos perform better on weekends and evenings
if day_of_week >= 5 or hour >= 18:
score += 25
else:
score += 10
elif content_type == ContentType.SOCIAL_POST:
# Social posts are consistent throughout the week
score += 15
# Time spacing factor (avoid clustering)
existing_schedules = self.session.query(Schedule).filter(
Schedule.scheduled_time.between(
scheduled_time - timedelta(hours=2),
scheduled_time + timedelta(hours=2)
)
).all()
if len(existing_schedules) > 3:
score -= len(existing_schedules) * 5
return max(score, 0.0)
except Exception as e:
self.logger.error(f"Error calculating engagement score: {str(e)}")
return 0.0
def _find_optimal_time(
self,
schedule: Schedule,
content_item: ContentItem
) -> Tuple[datetime, float]:
"""Find the optimal time for a schedule."""
try:
best_time = schedule.scheduled_time
best_score = self._calculate_engagement_score(
schedule.scheduled_time,
content_item.content_type,
schedule.priority
)
# Search within a week of the original time
base_date = schedule.scheduled_time.date()
for day_offset in range(-3, 4): # ±3 days
candidate_date = base_date + timedelta(days=day_offset)
# Get optimal hours for this content type
if content_item.content_type in self.content_type_patterns:
optimal_hours = self.content_type_patterns[content_item.content_type]['peak_hours']
else:
optimal_hours = [9, 12, 15, 18]
for hour in optimal_hours:
candidate_time = datetime.combine(candidate_date, datetime.min.time()).replace(hour=hour)
score = self._calculate_engagement_score(
candidate_time,
content_item.content_type,
schedule.priority
)
if score > best_score:
best_time = candidate_time
best_score = score
return best_time, best_score
except Exception as e:
self.logger.error(f"Error finding optimal time: {str(e)}")
return schedule.scheduled_time, 0.0
def _find_conflict_free_time(
self,
preferred_time: datetime,
existing_times: List[datetime],
schedule: Schedule,
min_gap: timedelta = timedelta(minutes=30)
) -> datetime:
"""Find a conflict-free time close to the preferred time."""
try:
# Check if preferred time has conflicts
has_conflict = any(
abs((preferred_time - existing_time).total_seconds()) < min_gap.total_seconds()
for existing_time in existing_times
)
if not has_conflict:
return preferred_time
# Search for nearby conflict-free times
for offset_minutes in [30, 60, 90, 120, -30, -60, -90, -120]:
candidate_time = preferred_time + timedelta(minutes=offset_minutes)
has_conflict = any(
abs((candidate_time - existing_time).total_seconds()) < min_gap.total_seconds()
for existing_time in existing_times
)
if not has_conflict:
return candidate_time
# If no conflict-free time found nearby, return preferred time
return preferred_time
except Exception as e:
self.logger.error(f"Error finding conflict-free time: {str(e)}")
return preferred_time
def _generate_optimization_reason(
self,
original_time: datetime,
optimized_time: datetime,
content_type: ContentType,
improvement_score: float
) -> str:
"""Generate a human-readable optimization reason."""
try:
if improvement_score <= 0:
return "Current time is already optimal"
reasons = []
# Time difference
time_diff = optimized_time - original_time
if abs(time_diff.total_seconds()) > 3600: # More than 1 hour
if time_diff.total_seconds() > 0:
reasons.append(f"Moved {time_diff.total_seconds() / 3600:.1f} hours later")
else:
reasons.append(f"Moved {abs(time_diff.total_seconds()) / 3600:.1f} hours earlier")
# Hour optimization
original_hour = original_time.hour
optimized_hour = optimized_time.hour
if content_type in self.content_type_patterns:
optimal_hours = self.content_type_patterns[content_type]['peak_hours']
if optimized_hour in optimal_hours and original_hour not in optimal_hours:
reasons.append(f"Moved to peak engagement hour ({optimized_hour}:00)")
# Day optimization
original_day = original_time.strftime('%A')
optimized_day = optimized_time.strftime('%A')
if original_day != optimized_day:
reasons.append(f"Moved from {original_day} to {optimized_day}")
# Improvement score
reasons.append(f"Expected {improvement_score:.1f}% engagement improvement")
return "; ".join(reasons) if reasons else "Optimized for better engagement"
except Exception as e:
self.logger.error(f"Error generating optimization reason: {str(e)}")
return "Optimized for better engagement"
def _get_time_suggestion_reason(self, time: datetime, content_type: ContentType) -> str:
"""Get reason for suggesting a specific time."""
try:
reasons = []
hour = time.hour
day_name = time.strftime('%A')
# Hour-based reasons
if content_type in self.content_type_patterns:
optimal_hours = self.content_type_patterns[content_type]['peak_hours']
if hour in optimal_hours:
reasons.append(f"Peak engagement hour for {content_type.value}")
# Day-based reasons
if content_type == ContentType.ARTICLE and time.weekday() < 5:
reasons.append("Weekday optimal for articles")
elif content_type == ContentType.VIDEO and (time.weekday() >= 5 or hour >= 18):
reasons.append("Evening/weekend optimal for videos")
return "; ".join(reasons) if reasons else f"Good time for {content_type.value}"
except Exception as e:
self.logger.error(f"Error getting suggestion reason: {str(e)}")
return "Recommended time"
def analyze_schedule_performance(self, days_back: int = 30) -> Dict[str, Any]:
"""Analyze historical schedule performance."""
try:
# Get schedules from the last N days
cutoff_date = datetime.now() - timedelta(days=days_back)
schedules = self.session.query(Schedule).filter(
Schedule.created_at >= cutoff_date
).all()
if not schedules:
return {'error': 'No schedules found for analysis'}
# Analyze by hour
hour_performance = defaultdict(list)
day_performance = defaultdict(list)
content_type_performance = defaultdict(list)
for schedule in schedules:
content_item = self.session.query(ContentItem).filter(
ContentItem.id == schedule.content_item_id
).first()
if content_item:
hour = schedule.scheduled_time.hour
day = schedule.scheduled_time.strftime('%A')
# Calculate performance score (simplified)
performance_score = self._calculate_performance_score(schedule)
hour_performance[hour].append(performance_score)
day_performance[day].append(performance_score)
content_type_performance[content_item.content_type.value].append(performance_score)
# Calculate averages
analysis = {
'total_schedules': len(schedules),
'analysis_period_days': days_back,
'best_hours': self._get_top_performers(hour_performance),
'best_days': self._get_top_performers(day_performance),
'content_type_performance': self._get_top_performers(content_type_performance),
'recommendations': self._generate_performance_recommendations(
hour_performance,
day_performance,
content_type_performance
)
}
return analysis
except Exception as e:
self.logger.error(f"Error analyzing schedule performance: {str(e)}")
return {'error': str(e)}
def _calculate_performance_score(self, schedule: Schedule) -> float:
"""Calculate a performance score for a schedule (simplified)."""
try:
# This is a simplified performance calculation
# In a real implementation, this would use actual engagement metrics
base_score = 50.0 # Base performance
# Status-based scoring
if schedule.status == ScheduleStatus.COMPLETED:
base_score += 30
elif schedule.status == ScheduleStatus.RUNNING:
base_score += 15
elif schedule.status == ScheduleStatus.FAILED:
base_score -= 20
# Priority-based scoring
base_score += schedule.priority * 2
return max(base_score, 0.0)
except Exception as e:
self.logger.error(f"Error calculating performance score: {str(e)}")
return 0.0
def _get_top_performers(self, performance_data: Dict[str, List[float]]) -> List[Dict[str, Any]]:
"""Get top performing items from performance data."""
try:
performers = []
for key, scores in performance_data.items():
if scores:
avg_score = np.mean(scores)
performers.append({
'key': key,
'average_score': avg_score,
'sample_count': len(scores)
})
# Sort by average score
performers.sort(key=lambda x: x['average_score'], reverse=True)
return performers[:5] # Top 5
except Exception as e:
self.logger.error(f"Error getting top performers: {str(e)}")
return []
def _generate_performance_recommendations(
self,
hour_performance: Dict[int, List[float]],
day_performance: Dict[str, List[float]],
content_type_performance: Dict[str, List[float]]
) -> List[str]:
"""Generate performance-based recommendations."""
try:
recommendations = []
# Hour recommendations
if hour_performance:
best_hours = self._get_top_performers(hour_performance)
if best_hours:
best_hour = best_hours[0]['key']
recommendations.append(f"Schedule more content around {best_hour}:00 for better performance")
# Day recommendations
if day_performance:
best_days = self._get_top_performers(day_performance)
if best_days:
best_day = best_days[0]['key']
recommendations.append(f"Consider scheduling more content on {best_day}s")
# Content type recommendations
if content_type_performance:
best_types = self._get_top_performers(content_type_performance)
if best_types:
best_type = best_types[0]['key']
recommendations.append(f"{best_type} content shows the best performance")
return recommendations
except Exception as e:
self.logger.error(f"Error generating recommendations: {str(e)}")
return []

View File

@@ -0,0 +1,611 @@
"""
Schedule validation system for content scheduling.
"""
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass
import re
# Use unified database models
from lib.database.models import ContentItem, Schedule, ScheduleStatus, ContentType, Platform, get_session
logger = logging.getLogger(__name__)
@dataclass
class ValidationResult:
"""Result of schedule validation."""
is_valid: bool
errors: List[str]
warnings: List[str]
suggestions: List[str]
confidence: float
class ScheduleValidator:
"""Validate content schedules for compliance and optimization."""
def __init__(self):
"""Initialize the schedule validator."""
self.logger = logger
self.session = get_session()
# Platform-specific validation rules
self.platform_rules = {
Platform.TWITTER: {
'max_text_length': 280,
'max_images': 4,
'max_videos': 1,
'allowed_formats': ['jpg', 'png', 'gif', 'mp4'],
'max_file_size_mb': 5,
'posting_frequency_limit': {'per_hour': 10, 'per_day': 100}
},
Platform.FACEBOOK: {
'max_text_length': 63206,
'max_images': 10,
'max_videos': 1,
'allowed_formats': ['jpg', 'png', 'gif', 'mp4', 'mov'],
'max_file_size_mb': 100,
'posting_frequency_limit': {'per_hour': 5, 'per_day': 25}
},
Platform.LINKEDIN: {
'max_text_length': 3000,
'max_images': 9,
'max_videos': 1,
'allowed_formats': ['jpg', 'png', 'gif', 'mp4'],
'max_file_size_mb': 200,
'posting_frequency_limit': {'per_hour': 3, 'per_day': 20}
},
Platform.INSTAGRAM: {
'max_text_length': 2200,
'max_images': 10,
'max_videos': 1,
'allowed_formats': ['jpg', 'png', 'mp4'],
'max_file_size_mb': 100,
'posting_frequency_limit': {'per_hour': 2, 'per_day': 10}
}
}
# Content type validation rules
self.content_type_rules = {
ContentType.ARTICLE: {
'min_title_length': 10,
'max_title_length': 200,
'min_content_length': 100,
'required_fields': ['title', 'content', 'summary']
},
ContentType.VIDEO: {
'min_duration_sec': 5,
'max_duration_sec': 3600,
'required_fields': ['title', 'description'],
'recommended_formats': ['mp4', 'mov']
},
ContentType.IMAGE: {
'min_width': 400,
'min_height': 400,
'max_width': 4096,
'max_height': 4096,
'required_fields': ['title', 'alt_text']
},
ContentType.SOCIAL_POST: {
'min_length': 10,
'max_length': 500,
'required_fields': ['content']
}
}
def validate_schedule(self, schedule: Schedule) -> ValidationResult:
"""Validate a single schedule.
Args:
schedule: Schedule to validate
Returns:
ValidationResult with validation details
"""
try:
errors = []
warnings = []
suggestions = []
# Get content item details
content_item = self.session.query(ContentItem).filter(
ContentItem.id == schedule.content_item_id
).first()
if not content_item:
return ValidationResult(
is_valid=False,
errors=["Content item not found"],
warnings=[],
suggestions=[],
confidence=0.0
)
# Validate basic schedule properties
basic_validation = self._validate_basic_properties(schedule)
errors.extend(basic_validation['errors'])
warnings.extend(basic_validation['warnings'])
suggestions.extend(basic_validation['suggestions'])
# Validate content properties
content_validation = self._validate_content_properties(content_item)
errors.extend(content_validation['errors'])
warnings.extend(content_validation['warnings'])
suggestions.extend(content_validation['suggestions'])
# Validate timing
timing_validation = self._validate_timing(schedule)
errors.extend(timing_validation['errors'])
warnings.extend(timing_validation['warnings'])
suggestions.extend(timing_validation['suggestions'])
# Validate conflicts
conflict_validation = self._validate_conflicts(schedule)
errors.extend(conflict_validation['errors'])
warnings.extend(conflict_validation['warnings'])
suggestions.extend(conflict_validation['suggestions'])
# Calculate confidence
confidence = self._calculate_validation_confidence(errors, warnings)
return ValidationResult(
is_valid=len(errors) == 0,
errors=errors,
warnings=warnings,
suggestions=suggestions,
confidence=confidence
)
except Exception as e:
self.logger.error(f"Error validating schedule: {str(e)}")
return ValidationResult(
is_valid=False,
errors=[f"Validation error: {str(e)}"],
warnings=[],
suggestions=[],
confidence=0.0
)
def validate_multiple_schedules(self, schedules: List[Schedule]) -> Dict[str, ValidationResult]:
"""Validate multiple schedules and check for cross-schedule issues.
Args:
schedules: List of schedules to validate
Returns:
Dictionary mapping schedule IDs to validation results
"""
try:
results = {}
# Validate individual schedules
for schedule in schedules:
results[str(schedule.id)] = self.validate_schedule(schedule)
# Check for cross-schedule conflicts
cross_validation = self._validate_cross_schedule_conflicts(schedules)
# Add cross-validation issues to individual results
for schedule_id, issues in cross_validation.items():
if schedule_id in results:
results[schedule_id].warnings.extend(issues.get('warnings', []))
results[schedule_id].suggestions.extend(issues.get('suggestions', []))
return results
except Exception as e:
self.logger.error(f"Error validating multiple schedules: {str(e)}")
return {}
def _validate_basic_properties(self, schedule: Schedule) -> Dict[str, List[str]]:
"""Validate basic schedule properties."""
errors = []
warnings = []
suggestions = []
try:
# Check required fields
if not schedule.content_item_id:
errors.append("Content item ID is required")
if not schedule.scheduled_time:
errors.append("Scheduled time is required")
if not schedule.status:
errors.append("Schedule status is required")
# Check priority range
if schedule.priority < 1 or schedule.priority > 10:
warnings.append(f"Priority {schedule.priority} is outside recommended range (1-10)")
# Check if schedule is in the past
if schedule.scheduled_time < datetime.now():
if schedule.status == ScheduleStatus.PENDING:
errors.append("Cannot schedule content in the past")
else:
warnings.append("Schedule time is in the past")
# Check if schedule is too far in the future
max_future_days = 365 # 1 year
if schedule.scheduled_time > datetime.now() + timedelta(days=max_future_days):
warnings.append(f"Schedule is more than {max_future_days} days in the future")
suggestions.append("Consider scheduling closer to the current date for better relevance")
# Validate recurrence pattern
if schedule.recurrence:
recurrence_validation = self._validate_recurrence_pattern(schedule.recurrence)
errors.extend(recurrence_validation['errors'])
warnings.extend(recurrence_validation['warnings'])
suggestions.extend(recurrence_validation['suggestions'])
except Exception as e:
self.logger.error(f"Error validating basic properties: {str(e)}")
errors.append(f"Basic validation error: {str(e)}")
return {'errors': errors, 'warnings': warnings, 'suggestions': suggestions}
def _validate_content_properties(self, content_item: ContentItem) -> Dict[str, List[str]]:
"""Validate content item properties."""
errors = []
warnings = []
suggestions = []
try:
# Check required fields
if not content_item.title or len(content_item.title.strip()) == 0:
errors.append("Content title is required")
if not content_item.content or len(content_item.content.strip()) == 0:
errors.append("Content body is required")
# Validate based on content type
if content_item.content_type:
type_rules = self.content_type_rules.get(content_item.content_type)
if type_rules:
type_validation = self._validate_content_type_rules(content_item, type_rules)
errors.extend(type_validation['errors'])
warnings.extend(type_validation['warnings'])
suggestions.extend(type_validation['suggestions'])
# Check for potentially problematic content
content_check = self._check_content_quality(content_item)
warnings.extend(content_check['warnings'])
suggestions.extend(content_check['suggestions'])
except Exception as e:
self.logger.error(f"Error validating content properties: {str(e)}")
errors.append(f"Content validation error: {str(e)}")
return {'errors': errors, 'warnings': warnings, 'suggestions': suggestions}
def _validate_timing(self, schedule: Schedule) -> Dict[str, List[str]]:
"""Validate schedule timing."""
errors = []
warnings = []
suggestions = []
try:
scheduled_time = schedule.scheduled_time
# Check if it's a reasonable time to post
hour = scheduled_time.hour
day_of_week = scheduled_time.weekday() # 0 = Monday, 6 = Sunday
# Check for very early or very late hours
if hour < 6 or hour > 23:
warnings.append(f"Scheduled for {hour}:00 - consider posting during peak hours (6 AM - 11 PM)")
suggestions.append("Peak engagement typically occurs between 9 AM and 9 PM")
# Check for weekend posting (depending on content type)
content_item = self.session.query(ContentItem).filter(
ContentItem.id == schedule.content_item_id
).first()
if content_item and content_item.content_type == ContentType.ARTICLE:
if day_of_week >= 5: # Weekend
warnings.append("Business content typically performs better on weekdays")
suggestions.append("Consider rescheduling to Monday-Friday for better engagement")
# Check for holidays or special dates (simplified)
if self._is_holiday(scheduled_time.date()):
warnings.append("Scheduled for a holiday - engagement may be lower")
suggestions.append("Consider rescheduling to avoid holidays for better reach")
# Check frequency limits
frequency_check = self._check_posting_frequency(schedule)
warnings.extend(frequency_check['warnings'])
suggestions.extend(frequency_check['suggestions'])
except Exception as e:
self.logger.error(f"Error validating timing: {str(e)}")
errors.append(f"Timing validation error: {str(e)}")
return {'errors': errors, 'warnings': warnings, 'suggestions': suggestions}
def _validate_conflicts(self, schedule: Schedule) -> Dict[str, List[str]]:
"""Validate for scheduling conflicts."""
errors = []
warnings = []
suggestions = []
try:
# Check for nearby schedules
time_window = timedelta(minutes=30)
nearby_schedules = self.session.query(Schedule).filter(
Schedule.id != schedule.id,
Schedule.scheduled_time.between(
schedule.scheduled_time - time_window,
schedule.scheduled_time + time_window
)
).all()
if nearby_schedules:
warnings.append(f"Found {len(nearby_schedules)} other schedule(s) within 30 minutes")
suggestions.append("Consider spacing schedules at least 30 minutes apart for better visibility")
# Check for same-day content overload
same_day_schedules = self.session.query(Schedule).filter(
Schedule.id != schedule.id,
Schedule.scheduled_time >= schedule.scheduled_time.replace(hour=0, minute=0, second=0),
Schedule.scheduled_time < schedule.scheduled_time.replace(hour=0, minute=0, second=0) + timedelta(days=1)
).all()
if len(same_day_schedules) > 5:
warnings.append(f"Found {len(same_day_schedules)} other schedules on the same day")
suggestions.append("Consider distributing content across multiple days to avoid overwhelming your audience")
except Exception as e:
self.logger.error(f"Error validating conflicts: {str(e)}")
errors.append(f"Conflict validation error: {str(e)}")
return {'errors': errors, 'warnings': warnings, 'suggestions': suggestions}
def _validate_recurrence_pattern(self, recurrence: str) -> Dict[str, List[str]]:
"""Validate recurrence pattern."""
errors = []
warnings = []
suggestions = []
try:
# Define valid recurrence patterns
valid_patterns = [
'daily', 'weekly', 'monthly', 'yearly',
'weekdays', 'weekends',
'every 2 days', 'every 3 days', 'every 7 days',
'every 2 weeks', 'every 2 months'
]
if recurrence.lower() not in valid_patterns:
# Check if it's a cron-like pattern
if not self._is_valid_cron_pattern(recurrence):
errors.append(f"Invalid recurrence pattern: {recurrence}")
suggestions.append(f"Valid patterns include: {', '.join(valid_patterns[:5])}")
# Check for overly frequent recurrence
if 'hour' in recurrence.lower():
warnings.append("Hourly recurrence may overwhelm your audience")
suggestions.append("Consider daily or weekly recurrence for better engagement")
except Exception as e:
self.logger.error(f"Error validating recurrence: {str(e)}")
errors.append(f"Recurrence validation error: {str(e)}")
return {'errors': errors, 'warnings': warnings, 'suggestions': suggestions}
def _validate_content_type_rules(self, content_item: ContentItem, rules: Dict[str, Any]) -> Dict[str, List[str]]:
"""Validate content against type-specific rules."""
errors = []
warnings = []
suggestions = []
try:
# Check title length
if 'min_title_length' in rules and len(content_item.title) < rules['min_title_length']:
errors.append(f"Title too short (minimum {rules['min_title_length']} characters)")
if 'max_title_length' in rules and len(content_item.title) > rules['max_title_length']:
errors.append(f"Title too long (maximum {rules['max_title_length']} characters)")
# Check content length
if 'min_content_length' in rules and len(content_item.content) < rules['min_content_length']:
errors.append(f"Content too short (minimum {rules['min_content_length']} characters)")
if 'max_length' in rules and len(content_item.content) > rules['max_length']:
errors.append(f"Content too long (maximum {rules['max_length']} characters)")
# Check required fields
if 'required_fields' in rules:
for field in rules['required_fields']:
if not hasattr(content_item, field) or not getattr(content_item, field):
errors.append(f"Required field missing: {field}")
except Exception as e:
self.logger.error(f"Error validating content type rules: {str(e)}")
errors.append(f"Content type validation error: {str(e)}")
return {'errors': errors, 'warnings': warnings, 'suggestions': suggestions}
def _check_content_quality(self, content_item: ContentItem) -> Dict[str, List[str]]:
"""Check content quality and provide suggestions."""
warnings = []
suggestions = []
try:
content = content_item.content
title = content_item.title
# Check for excessive capitalization
if title and title.isupper():
warnings.append("Title is in all caps")
suggestions.append("Consider using proper capitalization for better readability")
# Check for excessive punctuation
if content and content.count('!') > 3:
warnings.append("Excessive exclamation marks detected")
suggestions.append("Reduce exclamation marks for more professional tone")
# Check for spelling/grammar (simplified)
if content:
# Simple checks for common issues
if ' ' in content: # Double spaces
suggestions.append("Remove extra spaces for cleaner formatting")
if content.count('?') > 5:
warnings.append("Many question marks detected")
suggestions.append("Consider reducing questions for clearer messaging")
# Check for hashtag usage
hashtag_count = len(re.findall(r'#\w+', content)) if content else 0
if hashtag_count > 10:
warnings.append(f"High number of hashtags ({hashtag_count})")
suggestions.append("Consider using 3-5 relevant hashtags for optimal reach")
# Check for URL presence
url_count = len(re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', content)) if content else 0
if url_count > 2:
warnings.append(f"Multiple URLs detected ({url_count})")
suggestions.append("Consider limiting to 1-2 URLs to avoid appearing spammy")
except Exception as e:
self.logger.error(f"Error checking content quality: {str(e)}")
return {'warnings': warnings, 'suggestions': suggestions}
def _check_posting_frequency(self, schedule: Schedule) -> Dict[str, List[str]]:
"""Check posting frequency limits."""
warnings = []
suggestions = []
try:
# Check hourly frequency
hour_start = schedule.scheduled_time.replace(minute=0, second=0, microsecond=0)
hour_end = hour_start + timedelta(hours=1)
hourly_schedules = self.session.query(Schedule).filter(
Schedule.scheduled_time >= hour_start,
Schedule.scheduled_time < hour_end
).count()
if hourly_schedules > 3:
warnings.append(f"High posting frequency: {hourly_schedules} posts in the same hour")
suggestions.append("Consider spacing posts throughout the day for better engagement")
# Check daily frequency
day_start = schedule.scheduled_time.replace(hour=0, minute=0, second=0, microsecond=0)
day_end = day_start + timedelta(days=1)
daily_schedules = self.session.query(Schedule).filter(
Schedule.scheduled_time >= day_start,
Schedule.scheduled_time < day_end
).count()
if daily_schedules > 10:
warnings.append(f"High daily posting frequency: {daily_schedules} posts")
suggestions.append("Consider reducing daily posts to 3-5 for optimal audience engagement")
except Exception as e:
self.logger.error(f"Error checking posting frequency: {str(e)}")
return {'warnings': warnings, 'suggestions': suggestions}
def _validate_cross_schedule_conflicts(self, schedules: List[Schedule]) -> Dict[str, Dict[str, List[str]]]:
"""Validate conflicts across multiple schedules."""
conflicts = {}
try:
# Sort schedules by time
sorted_schedules = sorted(schedules, key=lambda x: x.scheduled_time)
for i, schedule in enumerate(sorted_schedules):
schedule_id = str(schedule.id)
conflicts[schedule_id] = {'warnings': [], 'suggestions': []}
# Check with subsequent schedules
for j in range(i + 1, len(sorted_schedules)):
other_schedule = sorted_schedules[j]
time_diff = other_schedule.scheduled_time - schedule.scheduled_time
# Check if schedules are too close
if time_diff < timedelta(minutes=15):
conflicts[schedule_id]['warnings'].append(
f"Schedule conflicts with another schedule {time_diff.total_seconds() / 60:.0f} minutes later"
)
conflicts[schedule_id]['suggestions'].append(
"Consider spacing schedules at least 15 minutes apart"
)
# Stop checking if schedules are more than 2 hours apart
if time_diff > timedelta(hours=2):
break
except Exception as e:
self.logger.error(f"Error validating cross-schedule conflicts: {str(e)}")
return conflicts
def _calculate_validation_confidence(self, errors: List[str], warnings: List[str]) -> float:
"""Calculate confidence in validation results."""
try:
# Start with full confidence
confidence = 1.0
# Reduce confidence based on errors and warnings
confidence -= len(errors) * 0.2 # Each error reduces confidence by 20%
confidence -= len(warnings) * 0.05 # Each warning reduces confidence by 5%
# Ensure confidence is between 0 and 1
return max(0.0, min(1.0, confidence))
except Exception as e:
self.logger.error(f"Error calculating validation confidence: {str(e)}")
return 0.0
def _is_holiday(self, date) -> bool:
"""Check if a date is a holiday (simplified implementation)."""
try:
# This is a simplified implementation
# In a real system, you would use a proper holiday library
# Check for some common holidays
month = date.month
day = date.day
# New Year's Day
if month == 1 and day == 1:
return True
# Christmas
if month == 12 and day == 25:
return True
# Independence Day (US)
if month == 7 and day == 4:
return True
return False
except Exception as e:
self.logger.error(f"Error checking holiday: {str(e)}")
return False
def _is_valid_cron_pattern(self, pattern: str) -> bool:
"""Check if a string is a valid cron pattern (simplified)."""
try:
# This is a very simplified cron validation
# A proper implementation would use a cron parsing library
parts = pattern.split()
if len(parts) != 5:
return False
# Basic validation for each part
for part in parts:
if not (part.isdigit() or part == '*' or '/' in part or '-' in part or ',' in part):
return False
return True
except Exception as e:
self.logger.error(f"Error validating cron pattern: {str(e)}")
return False

View File

@@ -0,0 +1,402 @@
"""
Core scheduler implementation using APScheduler.
"""
import logging
import asyncio
from typing import Dict, Any, List, Optional, Union
from datetime import datetime, timedelta
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor
from apscheduler.triggers.date import DateTrigger
from apscheduler.triggers.cron import CronTrigger
from apscheduler.events import EVENT_JOB_ERROR, EVENT_JOB_EXECUTED, EVENT_JOB_MISSED
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
# Use unified database models
from lib.database.models import ContentItem, Schedule, ScheduleStatus, get_engine, get_session, init_db
from ..utils.error_handling import SchedulingError
from .conflict_resolver import ConflictResolver
from .health_checker import ScheduleHealthChecker
from .schedule_validator import ScheduleValidator
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
class ContentScheduler:
"""Core content scheduler implementation."""
def __init__(
self,
db_url: str = "sqlite:///content_scheduler.db",
max_workers: int = 10,
job_timeout: int = 300,
max_retries: int = 3,
retry_delay: int = 300,
health_check_interval: int = 300,
validation_config: Dict[str, Any] = None
):
"""Initialize the content scheduler.
Args:
db_url: Database URL for job persistence
max_workers: Maximum number of worker threads
job_timeout: Job execution timeout in seconds
max_retries: Maximum number of retry attempts
retry_delay: Delay between retries in seconds
health_check_interval: Health check interval in seconds
validation_config: Configuration for schedule validation
"""
self.logger = logger
self.db_url = db_url
self.max_workers = max_workers
self.job_timeout = job_timeout
self.max_retries = max_retries
self.retry_delay = retry_delay
# Use unified database connection
self.engine = get_engine(db_url)
init_db(self.engine)
self.Session = sessionmaker(bind=self.engine)
# Initialize job stores
self.jobstores = {
'default': SQLAlchemyJobStore(url=db_url)
}
# Initialize executors
self.executors = {
'default': ThreadPoolExecutor(max_workers),
'processpool': ProcessPoolExecutor(max_workers)
}
# Initialize scheduler
self.scheduler = AsyncIOScheduler(
jobstores=self.jobstores,
executors=self.executors,
timezone='UTC',
job_defaults={
'coalesce': True,
'max_instances': 1,
'misfire_grace_time': 60
}
)
# Initialize conflict resolver
self.conflict_resolver = ConflictResolver()
# Initialize health checker
self.health_checker = ScheduleHealthChecker(
scheduler=self,
check_interval=health_check_interval
)
# Initialize validator
self.validator = ScheduleValidator(validation_config or {})
# Add event listeners
self.scheduler.add_listener(
self._handle_job_event,
EVENT_JOB_EXECUTED | EVENT_JOB_ERROR | EVENT_JOB_MISSED
)
# Track active jobs
self.active_jobs = {}
self.job_stats = {
'total_scheduled': 0,
'successful': 0,
'failed': 0,
'retries': 0
}
async def start(self):
"""Start the scheduler."""
try:
if not self.scheduler.running:
self.scheduler.start()
await self._recover_jobs()
await self.health_checker.start()
self.logger.info("Content scheduler started successfully")
except Exception as e:
self.logger.error(f"Failed to start scheduler: {str(e)}")
raise SchedulingError(f"Scheduler startup failed: {str(e)}")
async def stop(self):
"""Stop the scheduler."""
try:
if self.scheduler.running:
self.scheduler.shutdown(wait=True)
await self.health_checker.stop()
self.logger.info("Content scheduler stopped successfully")
except Exception as e:
self.logger.error(f"Failed to stop scheduler: {str(e)}")
raise SchedulingError(f"Scheduler shutdown failed: {str(e)}")
async def schedule_content(self, content_item: ContentItem, schedule_time: datetime,
platforms: List[str], recurrence: str = None,
validate: bool = True) -> str:
"""Schedule content for publishing.
Args:
content_item: ContentItem to schedule
schedule_time: When to publish
platforms: List of platforms to publish to
recurrence: Recurrence pattern (optional)
validate: Whether to validate the schedule
Returns:
Schedule ID
"""
try:
session = self.Session()
# Create schedule record
schedule = Schedule(
content_item_id=content_item.id,
scheduled_time=schedule_time,
status=ScheduleStatus.SCHEDULED,
recurrence=recurrence,
priority=1
)
session.add(schedule)
session.commit()
# Schedule the job
if recurrence:
job_id = await self._schedule_recurring(schedule, platforms)
else:
job_id = await self._schedule_one_time(schedule, platforms)
# Update schedule with job ID
schedule.result = f"job_id:{job_id}"
session.commit()
session.close()
self.job_stats['total_scheduled'] += 1
self.logger.info(f"Scheduled content {content_item.id} for {schedule_time}")
return str(schedule.id)
except Exception as e:
self.logger.error(f"Failed to schedule content: {str(e)}")
if 'session' in locals():
session.rollback()
session.close()
raise SchedulingError(f"Content scheduling failed: {str(e)}")
async def _schedule_one_time(self, schedule: Schedule, platforms: List[str]) -> str:
"""Schedule a one-time content publish.
Args:
schedule: Schedule object
platforms: List of platforms
Returns:
Job ID
"""
try:
job_id = f"one_time_{schedule.content_item_id}_{int(schedule.scheduled_time.timestamp())}"
self.scheduler.add_job(
self._run_async_job,
trigger=DateTrigger(run_date=schedule.scheduled_time),
args=[schedule, platforms],
id=job_id,
replace_existing=True,
misfire_grace_time=self.job_timeout
)
return job_id
except Exception as e:
self.logger.error(f"Failed to schedule one-time job: {str(e)}")
raise SchedulingError(f"One-time scheduling failed: {str(e)}")
async def _schedule_recurring(self, schedule: Schedule, platforms: List[str]) -> str:
"""Schedule a recurring content publish.
Args:
schedule: Schedule object
platforms: List of platforms
Returns:
Job ID
"""
try:
job_id = f"recurring_{schedule.content_item_id}_{int(datetime.utcnow().timestamp())}"
# Parse recurrence pattern (simplified)
if schedule.recurrence == "daily":
trigger = CronTrigger(hour=schedule.scheduled_time.hour, minute=schedule.scheduled_time.minute)
elif schedule.recurrence == "weekly":
trigger = CronTrigger(day_of_week=schedule.scheduled_time.weekday(),
hour=schedule.scheduled_time.hour,
minute=schedule.scheduled_time.minute)
else:
# Default to daily
trigger = CronTrigger(hour=schedule.scheduled_time.hour, minute=schedule.scheduled_time.minute)
self.scheduler.add_job(
self._run_async_job,
trigger=trigger,
args=[schedule, platforms],
id=job_id,
replace_existing=True,
misfire_grace_time=self.job_timeout
)
return job_id
except Exception as e:
self.logger.error(f"Failed to schedule recurring job: {str(e)}")
raise SchedulingError(f"Recurring scheduling failed: {str(e)}")
async def _run_async_job(self, schedule: Schedule, platforms: List[str]):
"""Run an async job in the event loop.
Args:
schedule: Schedule object
platforms: List of platforms
"""
try:
await self._publish_content(schedule, platforms)
except Exception as e:
self.logger.error(f"Job execution failed: {str(e)}")
await self._handle_job_failure(schedule, str(e))
async def _publish_content(self, schedule: Schedule, platforms: List[str]):
"""Publish content to specified platforms.
Args:
schedule: Schedule object
platforms: List of platforms
"""
try:
session = self.Session()
content_item = session.query(ContentItem).get(schedule.content_item_id)
if not content_item:
raise SchedulingError(f"Content item {schedule.content_item_id} not found")
# Update schedule status
schedule.status = ScheduleStatus.RUNNING
session.commit()
# Simulate content publishing (replace with actual platform publishing logic)
self.logger.info(f"Publishing content '{content_item.title}' to platforms: {platforms}")
# Mark as completed
schedule.status = ScheduleStatus.COMPLETED
schedule.result = f"Published to {', '.join(platforms)} at {datetime.utcnow()}"
session.commit()
session.close()
self.job_stats['successful'] += 1
except Exception as e:
session = self.Session()
schedule.status = ScheduleStatus.FAILED
schedule.result = f"Failed: {str(e)}"
session.commit()
session.close()
self.job_stats['failed'] += 1
raise
async def _handle_job_failure(self, schedule: Schedule, error: str):
"""Handle job failure and retry logic.
Args:
schedule: Schedule object
error: Error message
"""
try:
session = self.Session()
schedule.status = ScheduleStatus.FAILED
schedule.result = f"Failed: {error}"
session.commit()
session.close()
self.job_stats['failed'] += 1
self.logger.error(f"Job failed for schedule {schedule.id}: {error}")
except Exception as e:
self.logger.error(f"Error handling job failure: {str(e)}")
def _handle_job_event(self, event):
"""Handle scheduler events.
Args:
event: Scheduler event
"""
try:
job_id = event.job_id
if event.code == EVENT_JOB_EXECUTED:
self.logger.info(f"Job {job_id} executed successfully")
elif event.code == EVENT_JOB_ERROR:
self.logger.error(f"Job {job_id} failed: {str(event.exception)}")
elif event.code == EVENT_JOB_MISSED:
self.logger.warning(f"Job {job_id} missed execution time")
except Exception as e:
self.logger.error(f"Error handling job event: {str(e)}")
async def _recover_jobs(self):
"""Recover pending jobs from the database."""
try:
session = self.Session()
# Get all scheduled jobs
pending_schedules = session.query(Schedule).filter(
Schedule.status == ScheduleStatus.SCHEDULED
).all()
# Reschedule each job
for schedule in pending_schedules:
try:
content_item = session.query(ContentItem).get(schedule.content_item_id)
if content_item:
platforms = content_item.platforms if isinstance(content_item.platforms, list) else []
await self.schedule_content(content_item, schedule.scheduled_time, platforms,
schedule.recurrence, validate=False)
except Exception as e:
self.logger.error(f"Failed to recover schedule {schedule.id}: {str(e)}")
session.close()
except Exception as e:
self.logger.error(f"Job recovery failed: {str(e)}")
raise SchedulingError(f"Job recovery failed: {str(e)}")
def get_job_stats(self) -> Dict[str, int]:
"""Get job statistics.
Returns:
Dictionary with job statistics
"""
return self.job_stats.copy()
def get_active_jobs(self) -> List[Dict[str, Any]]:
"""Get list of active jobs.
Returns:
List of active job information
"""
try:
jobs = []
for job in self.scheduler.get_jobs():
jobs.append({
'id': job.id,
'next_run_time': job.next_run_time.isoformat() if job.next_run_time else None,
'trigger': str(job.trigger)
})
return jobs
except Exception as e:
self.logger.error(f"Error getting active jobs: {str(e)}")
return []