""" Core scheduler implementation using APScheduler. """ import logging import asyncio from typing import Dict, Any, List, Optional, Union from datetime import datetime, timedelta from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor from apscheduler.triggers.date import DateTrigger from apscheduler.triggers.cron import CronTrigger from apscheduler.events import EVENT_JOB_ERROR, EVENT_JOB_EXECUTED, EVENT_JOB_MISSED from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker # Use unified database models from lib.database.models import ContentItem, Schedule, ScheduleStatus, get_engine, get_session, init_db from ..utils.error_handling import SchedulingError from .conflict_resolver import ConflictResolver from .health_checker import ScheduleHealthChecker from .schedule_validator import ScheduleValidator logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) class ContentScheduler: """Core content scheduler implementation.""" def __init__( self, db_url: str = "sqlite:///content_scheduler.db", max_workers: int = 10, job_timeout: int = 300, max_retries: int = 3, retry_delay: int = 300, health_check_interval: int = 300, validation_config: Dict[str, Any] = None ): """Initialize the content scheduler. Args: db_url: Database URL for job persistence max_workers: Maximum number of worker threads job_timeout: Job execution timeout in seconds max_retries: Maximum number of retry attempts retry_delay: Delay between retries in seconds health_check_interval: Health check interval in seconds validation_config: Configuration for schedule validation """ self.logger = logger self.db_url = db_url self.max_workers = max_workers self.job_timeout = job_timeout self.max_retries = max_retries self.retry_delay = retry_delay # Use unified database connection self.engine = get_engine(db_url) init_db(self.engine) self.Session = sessionmaker(bind=self.engine) # Initialize job stores self.jobstores = { 'default': SQLAlchemyJobStore(url=db_url) } # Initialize executors self.executors = { 'default': ThreadPoolExecutor(max_workers), 'processpool': ProcessPoolExecutor(max_workers) } # Initialize scheduler self.scheduler = AsyncIOScheduler( jobstores=self.jobstores, executors=self.executors, timezone='UTC', job_defaults={ 'coalesce': True, 'max_instances': 1, 'misfire_grace_time': 60 } ) # Initialize conflict resolver self.conflict_resolver = ConflictResolver() # Initialize health checker self.health_checker = ScheduleHealthChecker( scheduler=self, check_interval=health_check_interval ) # Initialize validator self.validator = ScheduleValidator(validation_config or {}) # Add event listeners self.scheduler.add_listener( self._handle_job_event, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR | EVENT_JOB_MISSED ) # Track active jobs self.active_jobs = {} self.job_stats = { 'total_scheduled': 0, 'successful': 0, 'failed': 0, 'retries': 0 } async def start(self): """Start the scheduler.""" try: if not self.scheduler.running: self.scheduler.start() await self._recover_jobs() await self.health_checker.start() self.logger.info("Content scheduler started successfully") except Exception as e: self.logger.error(f"Failed to start scheduler: {str(e)}") raise SchedulingError(f"Scheduler startup failed: {str(e)}") async def stop(self): """Stop the scheduler.""" try: if self.scheduler.running: self.scheduler.shutdown(wait=True) await self.health_checker.stop() self.logger.info("Content scheduler stopped successfully") except Exception as e: self.logger.error(f"Failed to stop scheduler: {str(e)}") raise SchedulingError(f"Scheduler shutdown failed: {str(e)}") async def schedule_content(self, content_item: ContentItem, schedule_time: datetime, platforms: List[str], recurrence: str = None, validate: bool = True) -> str: """Schedule content for publishing. Args: content_item: ContentItem to schedule schedule_time: When to publish platforms: List of platforms to publish to recurrence: Recurrence pattern (optional) validate: Whether to validate the schedule Returns: Schedule ID """ try: session = self.Session() # Create schedule record schedule = Schedule( content_item_id=content_item.id, scheduled_time=schedule_time, status=ScheduleStatus.SCHEDULED, recurrence=recurrence, priority=1 ) session.add(schedule) session.commit() # Schedule the job if recurrence: job_id = await self._schedule_recurring(schedule, platforms) else: job_id = await self._schedule_one_time(schedule, platforms) # Update schedule with job ID schedule.result = f"job_id:{job_id}" session.commit() session.close() self.job_stats['total_scheduled'] += 1 self.logger.info(f"Scheduled content {content_item.id} for {schedule_time}") return str(schedule.id) except Exception as e: self.logger.error(f"Failed to schedule content: {str(e)}") if 'session' in locals(): session.rollback() session.close() raise SchedulingError(f"Content scheduling failed: {str(e)}") async def _schedule_one_time(self, schedule: Schedule, platforms: List[str]) -> str: """Schedule a one-time content publish. Args: schedule: Schedule object platforms: List of platforms Returns: Job ID """ try: job_id = f"one_time_{schedule.content_item_id}_{int(schedule.scheduled_time.timestamp())}" self.scheduler.add_job( self._run_async_job, trigger=DateTrigger(run_date=schedule.scheduled_time), args=[schedule, platforms], id=job_id, replace_existing=True, misfire_grace_time=self.job_timeout ) return job_id except Exception as e: self.logger.error(f"Failed to schedule one-time job: {str(e)}") raise SchedulingError(f"One-time scheduling failed: {str(e)}") async def _schedule_recurring(self, schedule: Schedule, platforms: List[str]) -> str: """Schedule a recurring content publish. Args: schedule: Schedule object platforms: List of platforms Returns: Job ID """ try: job_id = f"recurring_{schedule.content_item_id}_{int(datetime.utcnow().timestamp())}" # Parse recurrence pattern (simplified) if schedule.recurrence == "daily": trigger = CronTrigger(hour=schedule.scheduled_time.hour, minute=schedule.scheduled_time.minute) elif schedule.recurrence == "weekly": trigger = CronTrigger(day_of_week=schedule.scheduled_time.weekday(), hour=schedule.scheduled_time.hour, minute=schedule.scheduled_time.minute) else: # Default to daily trigger = CronTrigger(hour=schedule.scheduled_time.hour, minute=schedule.scheduled_time.minute) self.scheduler.add_job( self._run_async_job, trigger=trigger, args=[schedule, platforms], id=job_id, replace_existing=True, misfire_grace_time=self.job_timeout ) return job_id except Exception as e: self.logger.error(f"Failed to schedule recurring job: {str(e)}") raise SchedulingError(f"Recurring scheduling failed: {str(e)}") async def _run_async_job(self, schedule: Schedule, platforms: List[str]): """Run an async job in the event loop. Args: schedule: Schedule object platforms: List of platforms """ try: await self._publish_content(schedule, platforms) except Exception as e: self.logger.error(f"Job execution failed: {str(e)}") await self._handle_job_failure(schedule, str(e)) async def _publish_content(self, schedule: Schedule, platforms: List[str]): """Publish content to specified platforms. Args: schedule: Schedule object platforms: List of platforms """ try: session = self.Session() content_item = session.query(ContentItem).get(schedule.content_item_id) if not content_item: raise SchedulingError(f"Content item {schedule.content_item_id} not found") # Update schedule status schedule.status = ScheduleStatus.RUNNING session.commit() # Simulate content publishing (replace with actual platform publishing logic) self.logger.info(f"Publishing content '{content_item.title}' to platforms: {platforms}") # Mark as completed schedule.status = ScheduleStatus.COMPLETED schedule.result = f"Published to {', '.join(platforms)} at {datetime.utcnow()}" session.commit() session.close() self.job_stats['successful'] += 1 except Exception as e: session = self.Session() schedule.status = ScheduleStatus.FAILED schedule.result = f"Failed: {str(e)}" session.commit() session.close() self.job_stats['failed'] += 1 raise async def _handle_job_failure(self, schedule: Schedule, error: str): """Handle job failure and retry logic. Args: schedule: Schedule object error: Error message """ try: session = self.Session() schedule.status = ScheduleStatus.FAILED schedule.result = f"Failed: {error}" session.commit() session.close() self.job_stats['failed'] += 1 self.logger.error(f"Job failed for schedule {schedule.id}: {error}") except Exception as e: self.logger.error(f"Error handling job failure: {str(e)}") def _handle_job_event(self, event): """Handle scheduler events. Args: event: Scheduler event """ try: job_id = event.job_id if event.code == EVENT_JOB_EXECUTED: self.logger.info(f"Job {job_id} executed successfully") elif event.code == EVENT_JOB_ERROR: self.logger.error(f"Job {job_id} failed: {str(event.exception)}") elif event.code == EVENT_JOB_MISSED: self.logger.warning(f"Job {job_id} missed execution time") except Exception as e: self.logger.error(f"Error handling job event: {str(e)}") async def _recover_jobs(self): """Recover pending jobs from the database.""" try: session = self.Session() # Get all scheduled jobs pending_schedules = session.query(Schedule).filter( Schedule.status == ScheduleStatus.SCHEDULED ).all() # Reschedule each job for schedule in pending_schedules: try: content_item = session.query(ContentItem).get(schedule.content_item_id) if content_item: platforms = content_item.platforms if isinstance(content_item.platforms, list) else [] await self.schedule_content(content_item, schedule.scheduled_time, platforms, schedule.recurrence, validate=False) except Exception as e: self.logger.error(f"Failed to recover schedule {schedule.id}: {str(e)}") session.close() except Exception as e: self.logger.error(f"Job recovery failed: {str(e)}") raise SchedulingError(f"Job recovery failed: {str(e)}") def get_job_stats(self) -> Dict[str, int]: """Get job statistics. Returns: Dictionary with job statistics """ return self.job_stats.copy() def get_active_jobs(self) -> List[Dict[str, Any]]: """Get list of active jobs. Returns: List of active job information """ try: jobs = [] for job in self.scheduler.get_jobs(): jobs.append({ 'id': job.id, 'next_run_time': job.next_run_time.isoformat() if job.next_run_time else None, 'trigger': str(job.trigger) }) return jobs except Exception as e: self.logger.error(f"Error getting active jobs: {str(e)}") return []