1030 lines
49 KiB
Python
1030 lines
49 KiB
Python
"""
|
|
Core Task Scheduler Service
|
|
Pluggable task scheduler that can work with any task model.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from typing import Dict, Any, Optional, List, Callable
|
|
from datetime import datetime
|
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
from apscheduler.triggers.cron import CronTrigger
|
|
from apscheduler.triggers.interval import IntervalTrigger
|
|
from apscheduler.triggers.date import DateTrigger
|
|
from sqlalchemy.orm import Session
|
|
|
|
from .executor_interface import TaskExecutor, TaskExecutionResult
|
|
from .task_registry import TaskRegistry
|
|
from .exception_handler import (
|
|
SchedulerExceptionHandler, SchedulerException, TaskExecutionError, DatabaseError,
|
|
TaskLoaderError, SchedulerConfigError
|
|
)
|
|
from services.database import get_db_session
|
|
from utils.logger_utils import get_service_logger
|
|
from ..utils.user_job_store import get_user_job_store_name
|
|
from models.scheduler_models import SchedulerEventLog
|
|
from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
|
|
from .job_restoration import restore_persona_jobs
|
|
from .oauth_task_restoration import restore_oauth_monitoring_tasks
|
|
from .website_analysis_task_restoration import restore_website_analysis_tasks
|
|
from .platform_insights_task_restoration import restore_platform_insights_tasks
|
|
from .check_cycle_handler import check_and_execute_due_tasks
|
|
from .task_execution_handler import execute_task_async
|
|
|
|
logger = get_service_logger("task_scheduler")
|
|
|
|
|
|
class TaskScheduler:
|
|
"""
|
|
Pluggable task scheduler that can work with any task model.
|
|
|
|
Features:
|
|
- Async task execution
|
|
- Plugin-based executor system
|
|
- Database-backed task persistence
|
|
- Configurable check intervals
|
|
- Automatic retry logic
|
|
- User isolation: All tasks are filtered by user_id for isolation
|
|
- Per-user job store context: Logs show user's website root for debugging
|
|
|
|
User Isolation:
|
|
- Tasks are filtered by user_id in task loaders
|
|
- Execution logs include user_id for tracking
|
|
- Per-user statistics are maintained
|
|
- Job store names (based on website root) are logged for debugging
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
check_interval_minutes: int = 15,
|
|
max_concurrent_executions: int = 10,
|
|
enable_retries: bool = True,
|
|
max_retries: int = 3
|
|
):
|
|
"""
|
|
Initialize the task scheduler.
|
|
|
|
Args:
|
|
check_interval_minutes: How often to check for due tasks
|
|
max_concurrent_executions: Maximum concurrent task executions
|
|
enable_retries: Whether to retry failed tasks
|
|
max_retries: Maximum retry attempts
|
|
"""
|
|
self.check_interval_minutes = check_interval_minutes
|
|
self.max_concurrent_executions = max_concurrent_executions
|
|
self.enable_retries = enable_retries
|
|
self.max_retries = max_retries
|
|
|
|
# Initialize APScheduler
|
|
self.scheduler = AsyncIOScheduler(
|
|
timezone='UTC',
|
|
job_defaults={
|
|
'coalesce': True,
|
|
'max_instances': 1,
|
|
'misfire_grace_time': 3600 # 1 hour grace period for missed jobs
|
|
}
|
|
)
|
|
|
|
# Task executor registry
|
|
self.registry = TaskRegistry()
|
|
|
|
# Track running executions
|
|
self.active_executions: Dict[str, asyncio.Task] = {}
|
|
|
|
# Exception handler for robust error handling
|
|
self.exception_handler = SchedulerExceptionHandler()
|
|
|
|
# Intelligent scheduling configuration
|
|
self.min_check_interval_minutes = 15 # Check every 15min when active strategies exist
|
|
self.max_check_interval_minutes = 60 # Check every 60min when no active strategies
|
|
self.current_check_interval_minutes = check_interval_minutes # Current interval
|
|
|
|
# Statistics
|
|
self.stats = {
|
|
'total_checks': 0,
|
|
'tasks_found': 0,
|
|
'tasks_executed': 0,
|
|
'tasks_failed': 0,
|
|
'tasks_skipped': 0,
|
|
'last_check': None,
|
|
'last_update': datetime.utcnow().isoformat(), # Timestamp for frontend polling
|
|
'per_user_stats': {}, # Track metrics per user for user isolation
|
|
'active_strategies_count': 0, # Track active strategies with tasks
|
|
'last_interval_adjustment': None # Track when interval was last adjusted
|
|
}
|
|
|
|
self._running = False
|
|
|
|
def _get_trigger_for_interval(self, interval_minutes: int):
|
|
"""
|
|
Get the appropriate trigger for the given interval.
|
|
|
|
For intervals >= 60 minutes, use IntervalTrigger.
|
|
For intervals < 60 minutes, use CronTrigger.
|
|
|
|
Args:
|
|
interval_minutes: Interval in minutes
|
|
|
|
Returns:
|
|
Appropriate APScheduler trigger
|
|
"""
|
|
if interval_minutes >= 60:
|
|
# Use IntervalTrigger for intervals >= 60 minutes
|
|
return IntervalTrigger(minutes=interval_minutes)
|
|
else:
|
|
# Use CronTrigger for intervals < 60 minutes (valid range: 0-59)
|
|
return CronTrigger(minute=f'*/{interval_minutes}')
|
|
|
|
def register_executor(
|
|
self,
|
|
task_type: str,
|
|
executor: TaskExecutor,
|
|
task_loader: Callable[[Session], List[Any]]
|
|
):
|
|
"""
|
|
Register a task executor for a specific task type.
|
|
|
|
Args:
|
|
task_type: Unique identifier for task type (e.g., 'monitoring_task')
|
|
executor: TaskExecutor instance that handles execution
|
|
task_loader: Function that loads due tasks from database
|
|
"""
|
|
self.registry.register(task_type, executor, task_loader)
|
|
logger.info(f"Registered executor for task type: {task_type}")
|
|
|
|
async def start(self):
|
|
"""Start the scheduler with intelligent interval adjustment."""
|
|
if self._running:
|
|
logger.warning("Scheduler is already running")
|
|
return
|
|
|
|
try:
|
|
# Determine initial check interval based on active strategies
|
|
initial_interval = await determine_optimal_interval(
|
|
self,
|
|
self.min_check_interval_minutes,
|
|
self.max_check_interval_minutes
|
|
)
|
|
self.current_check_interval_minutes = initial_interval
|
|
|
|
# Add periodic job to check for due tasks
|
|
self.scheduler.add_job(
|
|
self._check_and_execute_due_tasks,
|
|
trigger=self._get_trigger_for_interval(initial_interval),
|
|
id='check_due_tasks',
|
|
replace_existing=True
|
|
)
|
|
|
|
self.scheduler.start()
|
|
self._running = True
|
|
|
|
# Check for and execute any missed jobs that are still within grace period
|
|
await self._execute_missed_jobs()
|
|
|
|
# Restore one-time persona generation jobs for users who completed onboarding
|
|
await restore_persona_jobs(self)
|
|
|
|
# Restore/create missing OAuth token monitoring tasks for connected platforms
|
|
await restore_oauth_monitoring_tasks(self)
|
|
|
|
# Restore/create missing website analysis tasks for users who completed onboarding
|
|
await restore_website_analysis_tasks(self)
|
|
|
|
# Restore/create missing platform insights tasks for users with connected GSC/Bing
|
|
await restore_platform_insights_tasks(self)
|
|
|
|
# Validate and rebuild cumulative stats if needed
|
|
await self._validate_and_rebuild_cumulative_stats()
|
|
|
|
# Get all scheduled APScheduler jobs (including one-time tasks)
|
|
all_jobs = self.scheduler.get_jobs()
|
|
registered_types = self.registry.get_registered_types()
|
|
active_strategies = self.stats.get('active_strategies_count', 0)
|
|
|
|
# Count OAuth token monitoring tasks from database (recurring weekly tasks)
|
|
oauth_tasks_count = 0
|
|
oauth_tasks_details = []
|
|
try:
|
|
db = get_db_session()
|
|
if db:
|
|
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
|
# Count active tasks
|
|
oauth_tasks_count = db.query(OAuthTokenMonitoringTask).filter(
|
|
OAuthTokenMonitoringTask.status == 'active'
|
|
).count()
|
|
|
|
# Get all tasks (for detailed logging)
|
|
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
|
total_oauth_tasks = len(all_oauth_tasks)
|
|
|
|
# Show platform breakdown for ALL tasks (active and inactive)
|
|
all_platforms = {}
|
|
active_platforms = {}
|
|
for task in all_oauth_tasks:
|
|
all_platforms[task.platform] = all_platforms.get(task.platform, 0) + 1
|
|
if task.status == 'active':
|
|
active_platforms[task.platform] = active_platforms.get(task.platform, 0) + 1
|
|
|
|
if total_oauth_tasks > 0:
|
|
# Log details about all tasks (not just active)
|
|
for task in all_oauth_tasks:
|
|
oauth_tasks_details.append(
|
|
f"user={task.user_id}, platform={task.platform}, status={task.status}"
|
|
)
|
|
|
|
if total_oauth_tasks > 0 and oauth_tasks_count == 0:
|
|
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
|
|
logger.warning(
|
|
f"[Scheduler] Found {total_oauth_tasks} OAuth monitoring tasks in database, "
|
|
f"but {oauth_tasks_count} are active. "
|
|
f"All platforms: {all_platform_summary}. "
|
|
f"Task details: {', '.join(oauth_tasks_details[:5])}" # Limit to first 5 for readability
|
|
)
|
|
elif oauth_tasks_count > 0:
|
|
# Show platform breakdown for active tasks
|
|
active_platform_summary = ", ".join([f"{platform}: {count}" for platform, count in sorted(active_platforms.items())])
|
|
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
|
|
|
|
# Check for missing platforms (expected: gsc, bing, wordpress, wix)
|
|
expected_platforms = ['gsc', 'bing', 'wordpress', 'wix']
|
|
missing_in_db = [p for p in expected_platforms if p not in all_platforms]
|
|
|
|
if missing_in_db:
|
|
logger.warning(
|
|
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
|
|
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
|
|
f"All platforms: {all_platform_summary}. "
|
|
f"⚠️ Missing platforms (not connected or no tasks): {', '.join(missing_in_db)}"
|
|
)
|
|
else:
|
|
logger.warning(
|
|
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
|
|
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
|
|
f"All platforms: {all_platform_summary}"
|
|
)
|
|
|
|
db.close()
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"[Scheduler] Could not get OAuth token monitoring tasks count: {e}. "
|
|
f"This may indicate the oauth_token_monitoring_tasks table doesn't exist yet or "
|
|
f"tasks haven't been created. Error type: {type(e).__name__}"
|
|
)
|
|
|
|
# Get website analysis tasks count
|
|
website_analysis_tasks_count = 0
|
|
try:
|
|
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
|
website_analysis_tasks_count = db.query(WebsiteAnalysisTask).filter(
|
|
WebsiteAnalysisTask.status == 'active'
|
|
).count()
|
|
except Exception as e:
|
|
logger.debug(f"Could not get website analysis tasks count: {e}")
|
|
|
|
# Get platform insights tasks count
|
|
platform_insights_tasks_count = 0
|
|
try:
|
|
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
|
platform_insights_tasks_count = db.query(PlatformInsightsTask).filter(
|
|
PlatformInsightsTask.status == 'active'
|
|
).count()
|
|
except Exception as e:
|
|
logger.debug(f"Could not get platform insights tasks count: {e}")
|
|
|
|
# Calculate job counts
|
|
apscheduler_recurring = 1 # check_due_tasks
|
|
apscheduler_one_time = len(all_jobs) - 1
|
|
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
|
|
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
|
|
|
|
# Build comprehensive startup log message
|
|
recurring_breakdown = f"check_due_tasks: {apscheduler_recurring}"
|
|
if oauth_tasks_count > 0:
|
|
recurring_breakdown += f", OAuth monitoring: {oauth_tasks_count}"
|
|
if website_analysis_tasks_count > 0:
|
|
recurring_breakdown += f", Website analysis: {website_analysis_tasks_count}"
|
|
if platform_insights_tasks_count > 0:
|
|
recurring_breakdown += f", Platform insights: {platform_insights_tasks_count}"
|
|
|
|
startup_lines = [
|
|
f"[Scheduler] ✅ Task Scheduler Started",
|
|
f" ├─ Check Interval: {initial_interval} minutes",
|
|
f" ├─ Registered Task Types: {len(registered_types)} ({', '.join(registered_types) if registered_types else 'none'})",
|
|
f" ├─ Active Strategies: {active_strategies}",
|
|
f" ├─ Total Scheduled Jobs: {total_jobs}",
|
|
f" ├─ Recurring Jobs: {total_recurring} ({recurring_breakdown})",
|
|
f" └─ One-Time Jobs: {apscheduler_one_time}"
|
|
]
|
|
|
|
# Add APScheduler job details
|
|
if all_jobs:
|
|
for idx, job in enumerate(all_jobs):
|
|
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0
|
|
prefix = " └─" if is_last else " ├─"
|
|
next_run = job.next_run_time
|
|
trigger_type = type(job.trigger).__name__
|
|
|
|
# Try to extract user_id from job ID or kwargs for context
|
|
user_context = ""
|
|
user_id_from_job = None
|
|
|
|
# First try to get from kwargs
|
|
if hasattr(job, 'kwargs') and job.kwargs and job.kwargs.get('user_id'):
|
|
user_id_from_job = job.kwargs.get('user_id')
|
|
# Otherwise, try to extract from job ID (e.g., "research_persona_user_123..." or "research_persona_user123")
|
|
elif job.id and ('research_persona_' in job.id or 'facebook_persona_' in job.id):
|
|
# Job ID format: research_persona_{user_id} or facebook_persona_{user_id}
|
|
# where user_id is Clerk format (e.g., "user_33Gz1FPI86VDXhRY8QN4ragRFGN")
|
|
if job.id.startswith('research_persona_'):
|
|
user_id_from_job = job.id.replace('research_persona_', '')
|
|
elif job.id.startswith('facebook_persona_'):
|
|
user_id_from_job = job.id.replace('facebook_persona_', '')
|
|
else:
|
|
# Fallback: try to extract from parts (old format with timestamp)
|
|
parts = job.id.split('_')
|
|
if len(parts) >= 3:
|
|
user_id_from_job = parts[2] # Extract user_id from job ID
|
|
|
|
if user_id_from_job:
|
|
try:
|
|
db = get_db_session()
|
|
if db:
|
|
user_job_store = get_user_job_store_name(user_id_from_job, db)
|
|
if user_job_store == 'default':
|
|
logger.debug(
|
|
f"[Scheduler] Job store extraction returned 'default' for user {user_id_from_job}. "
|
|
f"This may indicate no onboarding data or website URL not found."
|
|
)
|
|
user_context = f" | User: {user_id_from_job} | Store: {user_job_store}"
|
|
db.close()
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"[Scheduler] Could not extract job store name for user {user_id_from_job}: {e}. "
|
|
f"Error type: {type(e).__name__}"
|
|
)
|
|
user_context = f" | User: {user_id_from_job}"
|
|
|
|
startup_lines.append(f"{prefix} Job: {job.id} | Trigger: {trigger_type} | Next Run: {next_run}{user_context}")
|
|
|
|
# Add OAuth token monitoring tasks details
|
|
# Show ALL OAuth tasks (active and inactive) for complete visibility
|
|
if total_oauth_tasks > 0:
|
|
try:
|
|
db = get_db_session()
|
|
if db:
|
|
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
|
# Get ALL tasks, not just active ones
|
|
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
|
|
|
for idx, task in enumerate(oauth_tasks):
|
|
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0
|
|
prefix = " └─" if is_last else " ├─"
|
|
|
|
try:
|
|
user_job_store = get_user_job_store_name(task.user_id, db)
|
|
if user_job_store == 'default':
|
|
logger.debug(
|
|
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
|
|
f"This may indicate no onboarding data or website URL not found."
|
|
)
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
|
|
f"Using 'default'. Error type: {type(e).__name__}"
|
|
)
|
|
user_job_store = 'default'
|
|
|
|
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
|
# Include status in the log line for visibility
|
|
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
|
startup_lines.append(
|
|
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
|
|
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
|
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
|
|
)
|
|
db.close()
|
|
except Exception as e:
|
|
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
|
|
|
|
# Add website analysis tasks details
|
|
if website_analysis_tasks_count > 0:
|
|
try:
|
|
db = get_db_session()
|
|
if db:
|
|
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
|
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
|
|
|
|
for idx, task in enumerate(website_analysis_tasks):
|
|
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0
|
|
prefix = " └─" if is_last else " ├─"
|
|
|
|
try:
|
|
user_job_store = get_user_job_store_name(task.user_id, db)
|
|
except Exception as e:
|
|
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
|
user_job_store = 'default'
|
|
|
|
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
|
frequency = f"Every {task.frequency_days} days"
|
|
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
|
|
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
|
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
|
|
|
|
startup_lines.append(
|
|
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
|
|
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
|
|
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
|
|
)
|
|
db.close()
|
|
except Exception as e:
|
|
logger.debug(f"Could not get website analysis task details: {e}")
|
|
|
|
# Add platform insights tasks details
|
|
if platform_insights_tasks_count > 0:
|
|
try:
|
|
db = get_db_session()
|
|
if db:
|
|
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
|
platform_insights_tasks = db.query(PlatformInsightsTask).all()
|
|
|
|
for idx, task in enumerate(platform_insights_tasks):
|
|
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0
|
|
prefix = " └─" if is_last else " ├─"
|
|
|
|
try:
|
|
user_job_store = get_user_job_store_name(task.user_id, db)
|
|
except Exception as e:
|
|
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
|
user_job_store = 'default'
|
|
|
|
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
|
platform_label = task.platform.upper() if task.platform else 'Unknown'
|
|
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
|
|
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
|
|
|
startup_lines.append(
|
|
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
|
|
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
|
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
|
|
)
|
|
db.close()
|
|
except Exception as e:
|
|
logger.debug(f"Could not get platform insights task details: {e}")
|
|
|
|
# Log comprehensive startup information in single message
|
|
logger.warning("\n".join(startup_lines))
|
|
|
|
# Save scheduler start event to database
|
|
try:
|
|
db = get_db_session()
|
|
if db:
|
|
event_log = SchedulerEventLog(
|
|
event_type='start',
|
|
event_date=datetime.utcnow(),
|
|
check_interval_minutes=initial_interval,
|
|
active_strategies_count=active_strategies,
|
|
event_data={
|
|
'registered_types': registered_types,
|
|
'total_jobs': total_jobs,
|
|
'recurring_jobs': total_recurring,
|
|
'one_time_jobs': apscheduler_one_time,
|
|
'oauth_monitoring_tasks': oauth_tasks_count,
|
|
'website_analysis_tasks': website_analysis_tasks_count,
|
|
'platform_insights_tasks': platform_insights_tasks_count
|
|
}
|
|
)
|
|
db.add(event_log)
|
|
db.commit()
|
|
db.close()
|
|
except Exception as e:
|
|
logger.warning(f"Failed to save scheduler start event log: {e}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to start scheduler: {e}")
|
|
raise
|
|
|
|
|
|
async def stop(self):
|
|
"""Stop the scheduler gracefully."""
|
|
if not self._running:
|
|
return
|
|
|
|
try:
|
|
# Cancel all active executions
|
|
for task_id, execution_task in self.active_executions.items():
|
|
execution_task.cancel()
|
|
|
|
# Wait for active executions to complete (with timeout)
|
|
if self.active_executions:
|
|
await asyncio.wait(
|
|
self.active_executions.values(),
|
|
timeout=30
|
|
)
|
|
|
|
# Get final job count before shutdown
|
|
all_jobs_before = self.scheduler.get_jobs()
|
|
|
|
# Shutdown scheduler
|
|
self.scheduler.shutdown(wait=True)
|
|
self._running = False
|
|
|
|
# Log comprehensive shutdown information (use WARNING level for visibility)
|
|
total_checks = self.stats.get('total_checks', 0)
|
|
total_executed = self.stats.get('tasks_executed', 0)
|
|
total_failed = self.stats.get('tasks_failed', 0)
|
|
|
|
shutdown_message = (
|
|
f"[Scheduler] 🛑 Task Scheduler Stopped\n"
|
|
f" ├─ Total Check Cycles: {total_checks}\n"
|
|
f" ├─ Total Tasks Executed: {total_executed}\n"
|
|
f" ├─ Total Tasks Failed: {total_failed}\n"
|
|
f" ├─ Jobs Cancelled: {len(all_jobs_before)}\n"
|
|
f" └─ Shutdown: Graceful"
|
|
)
|
|
logger.warning(shutdown_message)
|
|
|
|
# Save scheduler stop event to database
|
|
try:
|
|
db = get_db_session()
|
|
if db:
|
|
event_log = SchedulerEventLog(
|
|
event_type='stop',
|
|
event_date=datetime.utcnow(),
|
|
check_interval_minutes=self.current_check_interval_minutes,
|
|
event_data={
|
|
'total_checks': total_checks,
|
|
'total_executed': total_executed,
|
|
'total_failed': total_failed,
|
|
'jobs_cancelled': len(all_jobs_before)
|
|
}
|
|
)
|
|
db.add(event_log)
|
|
db.commit()
|
|
db.close()
|
|
except Exception as e:
|
|
logger.warning(f"Failed to save scheduler stop event log: {e}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error stopping scheduler: {e}")
|
|
raise
|
|
|
|
async def _check_and_execute_due_tasks(self):
|
|
"""
|
|
Main scheduler loop: check for due tasks and execute them.
|
|
This runs periodically with intelligent interval adjustment based on active strategies.
|
|
"""
|
|
await check_and_execute_due_tasks(self)
|
|
|
|
async def _adjust_check_interval_if_needed(self, db: Session):
|
|
"""
|
|
Intelligently adjust check interval based on active strategies.
|
|
|
|
Args:
|
|
db: Database session
|
|
"""
|
|
await adjust_check_interval_if_needed(self, db)
|
|
|
|
async def _execute_missed_jobs(self):
|
|
"""
|
|
Check for and execute any missed DateTrigger jobs that are still within grace period.
|
|
APScheduler marks jobs as 'missed' if they were scheduled to run while the scheduler wasn't running.
|
|
"""
|
|
try:
|
|
all_jobs = self.scheduler.get_jobs()
|
|
now = datetime.utcnow().replace(tzinfo=self.scheduler.timezone)
|
|
|
|
missed_jobs = []
|
|
for job in all_jobs:
|
|
# Only check DateTrigger jobs (one-time tasks)
|
|
if hasattr(job, 'trigger') and isinstance(job.trigger, DateTrigger):
|
|
if job.next_run_time and job.next_run_time < now:
|
|
# Job's scheduled time has passed
|
|
time_since_scheduled = (now - job.next_run_time).total_seconds()
|
|
# Check if still within grace period (1 hour = 3600 seconds)
|
|
if time_since_scheduled <= 3600:
|
|
missed_jobs.append(job)
|
|
|
|
if missed_jobs:
|
|
logger.warning(
|
|
f"[Scheduler] Found {len(missed_jobs)} missed job(s) within grace period, executing now..."
|
|
)
|
|
for job in missed_jobs:
|
|
try:
|
|
# Execute the job immediately
|
|
logger.info(f"[Scheduler] Executing missed job: {job.id}")
|
|
await job.func(*job.args, **job.kwargs)
|
|
except Exception as e:
|
|
logger.error(f"[Scheduler] Error executing missed job {job.id}: {e}")
|
|
except Exception as e:
|
|
logger.warning(f"[Scheduler] Error checking for missed jobs: {e}")
|
|
|
|
async def trigger_interval_adjustment(self):
|
|
"""
|
|
Trigger immediate interval adjustment check.
|
|
|
|
This should be called when a strategy is activated or deactivated
|
|
to immediately adjust the scheduler interval based on current active strategies.
|
|
"""
|
|
if not self._running:
|
|
logger.debug("Scheduler not running, skipping interval adjustment")
|
|
return
|
|
|
|
try:
|
|
db = get_db_session()
|
|
if db:
|
|
await adjust_check_interval_if_needed(self, db)
|
|
db.close()
|
|
else:
|
|
logger.warning("Could not get database session for interval adjustment")
|
|
except Exception as e:
|
|
logger.warning(f"Error triggering interval adjustment: {e}")
|
|
|
|
async def _validate_and_rebuild_cumulative_stats(self):
|
|
"""
|
|
Validate cumulative stats on scheduler startup and rebuild if needed.
|
|
This ensures cumulative stats are accurate after restarts.
|
|
"""
|
|
db = None
|
|
try:
|
|
db = get_db_session()
|
|
if not db:
|
|
logger.warning("[Scheduler] Could not get database session for cumulative stats validation")
|
|
return
|
|
|
|
try:
|
|
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
|
|
from models.scheduler_models import SchedulerEventLog
|
|
from sqlalchemy import func
|
|
|
|
# Get cumulative stats from persistent table
|
|
cumulative_stats = db.query(SchedulerCumulativeStats).filter(
|
|
SchedulerCumulativeStats.id == 1
|
|
).first()
|
|
|
|
# Count check_cycle events in database
|
|
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
|
SchedulerEventLog.event_type == 'check_cycle'
|
|
).scalar() or 0
|
|
|
|
if cumulative_stats:
|
|
# Validate: cumulative stats should match event log count
|
|
if cumulative_stats.total_check_cycles != check_cycle_count:
|
|
logger.warning(
|
|
f"[Scheduler] ⚠️ Cumulative stats validation failed on startup: "
|
|
f"cumulative_stats.total_check_cycles={cumulative_stats.total_check_cycles} "
|
|
f"vs event_logs.count={check_cycle_count}. "
|
|
f"Rebuilding cumulative stats from event logs..."
|
|
)
|
|
|
|
# Rebuild from event logs
|
|
result = db.query(
|
|
func.count(SchedulerEventLog.id),
|
|
func.sum(SchedulerEventLog.tasks_found),
|
|
func.sum(SchedulerEventLog.tasks_executed),
|
|
func.sum(SchedulerEventLog.tasks_failed)
|
|
).filter(
|
|
SchedulerEventLog.event_type == 'check_cycle'
|
|
).first()
|
|
|
|
if result:
|
|
total_cycles = result[0] if result[0] is not None else 0
|
|
total_found = result[1] if result[1] is not None else 0
|
|
total_executed = result[2] if result[2] is not None else 0
|
|
total_failed = result[3] if result[3] is not None else 0
|
|
|
|
# Update cumulative stats
|
|
cumulative_stats.total_check_cycles = int(total_cycles)
|
|
cumulative_stats.cumulative_tasks_found = int(total_found)
|
|
cumulative_stats.cumulative_tasks_executed = int(total_executed)
|
|
cumulative_stats.cumulative_tasks_failed = int(total_failed)
|
|
cumulative_stats.last_updated = datetime.utcnow()
|
|
cumulative_stats.updated_at = datetime.utcnow()
|
|
|
|
db.commit()
|
|
logger.warning(
|
|
f"[Scheduler] ✅ Rebuilt cumulative stats on startup: "
|
|
f"cycles={total_cycles}, found={total_found}, "
|
|
f"executed={total_executed}, failed={total_failed}"
|
|
)
|
|
else:
|
|
logger.warning("[Scheduler] No check_cycle events found to rebuild from")
|
|
else:
|
|
logger.warning(
|
|
f"[Scheduler] ✅ Cumulative stats validated: "
|
|
f"{cumulative_stats.total_check_cycles} check cycles match event logs"
|
|
)
|
|
else:
|
|
# Cumulative stats table doesn't exist, create it from event logs
|
|
logger.warning(
|
|
"[Scheduler] Cumulative stats table not found. "
|
|
"Creating from event logs..."
|
|
)
|
|
|
|
result = db.query(
|
|
func.count(SchedulerEventLog.id),
|
|
func.sum(SchedulerEventLog.tasks_found),
|
|
func.sum(SchedulerEventLog.tasks_executed),
|
|
func.sum(SchedulerEventLog.tasks_failed)
|
|
).filter(
|
|
SchedulerEventLog.event_type == 'check_cycle'
|
|
).first()
|
|
|
|
if result:
|
|
total_cycles = result[0] if result[0] is not None else 0
|
|
total_found = result[1] if result[1] is not None else 0
|
|
total_executed = result[2] if result[2] is not None else 0
|
|
total_failed = result[3] if result[3] is not None else 0
|
|
|
|
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
|
|
cumulative_stats.total_check_cycles = int(total_cycles)
|
|
cumulative_stats.cumulative_tasks_found = int(total_found)
|
|
cumulative_stats.cumulative_tasks_executed = int(total_executed)
|
|
cumulative_stats.cumulative_tasks_failed = int(total_failed)
|
|
cumulative_stats.last_updated = datetime.utcnow()
|
|
cumulative_stats.updated_at = datetime.utcnow()
|
|
|
|
db.commit()
|
|
logger.warning(
|
|
f"[Scheduler] ✅ Created cumulative stats from event logs: "
|
|
f"cycles={total_cycles}, found={total_found}, "
|
|
f"executed={total_executed}, failed={total_failed}"
|
|
)
|
|
except ImportError:
|
|
logger.warning(
|
|
"[Scheduler] Cumulative stats model not available. "
|
|
"Migration may not have been run yet. "
|
|
"Run: python backend/scripts/run_cumulative_stats_migration.py"
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"[Scheduler] Error validating cumulative stats: {e}", exc_info=True)
|
|
finally:
|
|
if db:
|
|
db.close()
|
|
|
|
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Process due tasks for a specific task type.
|
|
|
|
Returns:
|
|
Summary dict with 'found', 'executed', 'failed' counts, or None if no tasks
|
|
"""
|
|
summary = {'found': 0, 'executed': 0, 'failed': 0}
|
|
|
|
try:
|
|
# Get task loader for this type
|
|
try:
|
|
task_loader = self.registry.get_task_loader(task_type)
|
|
except Exception as e:
|
|
error = TaskLoaderError(
|
|
message=f"Failed to get task loader for type {task_type}: {str(e)}",
|
|
task_type=task_type,
|
|
original_error=e
|
|
)
|
|
self.exception_handler.handle_exception(error)
|
|
return None
|
|
|
|
# Load due tasks (with error handling)
|
|
try:
|
|
due_tasks = task_loader(db)
|
|
except Exception as e:
|
|
error = TaskLoaderError(
|
|
message=f"Failed to load due tasks for type {task_type}: {str(e)}",
|
|
task_type=task_type,
|
|
original_error=e
|
|
)
|
|
self.exception_handler.handle_exception(error)
|
|
return None
|
|
|
|
if not due_tasks:
|
|
return None
|
|
|
|
summary['found'] = len(due_tasks)
|
|
self.stats['tasks_found'] += len(due_tasks)
|
|
|
|
# Execute tasks (with concurrency limit)
|
|
execution_tasks = []
|
|
skipped_count = 0
|
|
for task in due_tasks:
|
|
if len(self.active_executions) >= self.max_concurrent_executions:
|
|
skipped_count = len(due_tasks) - len(execution_tasks)
|
|
logger.warning(
|
|
f"[Scheduler] ⚠️ Max concurrent executions reached ({self.max_concurrent_executions}), "
|
|
f"skipping {skipped_count} tasks for {task_type}"
|
|
)
|
|
break
|
|
|
|
# Execute task asynchronously
|
|
# Note: Each task gets its own database session to prevent concurrent access issues
|
|
execution_task = asyncio.create_task(
|
|
execute_task_async(self, task_type, task, summary)
|
|
)
|
|
|
|
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
|
|
self.active_executions[task_id] = execution_task
|
|
|
|
execution_tasks.append(execution_task)
|
|
|
|
# Wait for executions to complete (with timeout per task)
|
|
if execution_tasks:
|
|
await asyncio.wait(execution_tasks, timeout=300)
|
|
|
|
return summary
|
|
|
|
except Exception as e:
|
|
error = TaskLoaderError(
|
|
message=f"Error processing task type {task_type}: {str(e)}",
|
|
task_type=task_type,
|
|
original_error=e
|
|
)
|
|
self.exception_handler.handle_exception(error)
|
|
return summary
|
|
|
|
|
|
def _update_user_stats(self, user_id: Optional[int], success: bool):
|
|
"""
|
|
Update per-user statistics for user isolation tracking.
|
|
|
|
Args:
|
|
user_id: User ID (None if user context not available)
|
|
success: Whether task execution was successful
|
|
"""
|
|
if user_id is None:
|
|
return
|
|
|
|
if user_id not in self.stats['per_user_stats']:
|
|
self.stats['per_user_stats'][user_id] = {
|
|
'executed': 0,
|
|
'failed': 0,
|
|
'success_rate': 0.0
|
|
}
|
|
|
|
user_stats = self.stats['per_user_stats'][user_id]
|
|
if success:
|
|
user_stats['executed'] += 1
|
|
else:
|
|
user_stats['failed'] += 1
|
|
|
|
# Calculate success rate
|
|
total = user_stats['executed'] + user_stats['failed']
|
|
if total > 0:
|
|
user_stats['success_rate'] = (user_stats['executed'] / total) * 100.0
|
|
|
|
async def _schedule_retry(self, task: Any, delay_seconds: int):
|
|
"""Schedule a retry for a failed task."""
|
|
# This would update the task's next_execution time
|
|
# For now, just log - could be enhanced to update next_execution
|
|
logger.debug(f"Scheduling retry for task in {delay_seconds}s")
|
|
|
|
def get_stats(self, user_id: Optional[int] = None) -> Dict[str, Any]:
|
|
"""
|
|
Get scheduler statistics with optional user filtering.
|
|
|
|
Args:
|
|
user_id: Optional user ID to filter statistics for specific user
|
|
|
|
Returns:
|
|
Dictionary with scheduler statistics
|
|
"""
|
|
base_stats = {
|
|
**{k: v for k, v in self.stats.items() if k not in ['per_user_stats']},
|
|
'active_executions': len(self.active_executions),
|
|
'registered_types': self.registry.get_registered_types(),
|
|
'running': self._running,
|
|
'check_interval_minutes': self.current_check_interval_minutes,
|
|
'min_check_interval_minutes': self.min_check_interval_minutes,
|
|
'max_check_interval_minutes': self.max_check_interval_minutes,
|
|
'intelligent_scheduling': True
|
|
}
|
|
|
|
# Include per-user stats (all users or filtered)
|
|
if user_id is not None:
|
|
if user_id in self.stats['per_user_stats']:
|
|
base_stats['user_stats'] = self.stats['per_user_stats'][user_id]
|
|
else:
|
|
base_stats['user_stats'] = {
|
|
'executed': 0,
|
|
'failed': 0,
|
|
'success_rate': 0.0
|
|
}
|
|
else:
|
|
# Include all per-user stats (for admin/debugging)
|
|
base_stats['per_user_stats'] = self.stats['per_user_stats']
|
|
|
|
return base_stats
|
|
|
|
def schedule_one_time_task(
|
|
self,
|
|
func: Callable,
|
|
run_date: datetime,
|
|
job_id: str,
|
|
args: tuple = (),
|
|
kwargs: Dict[str, Any] = None,
|
|
replace_existing: bool = True
|
|
) -> str:
|
|
"""
|
|
Schedule a one-time task to run at a specific datetime.
|
|
|
|
Args:
|
|
func: Async function to execute
|
|
run_date: Datetime when the task should run (must be timezone-aware UTC)
|
|
job_id: Unique identifier for this job
|
|
args: Positional arguments to pass to func
|
|
kwargs: Keyword arguments to pass to func
|
|
replace_existing: If True, replace existing job with same ID
|
|
|
|
Returns:
|
|
Job ID
|
|
"""
|
|
if not self._running:
|
|
logger.warning(
|
|
f"Scheduler not running, but scheduling job {job_id} anyway. "
|
|
"APScheduler will start automatically when needed."
|
|
)
|
|
|
|
try:
|
|
# Ensure run_date is timezone-aware (UTC)
|
|
if run_date.tzinfo is None:
|
|
from datetime import timezone
|
|
run_date = run_date.replace(tzinfo=timezone.utc)
|
|
logger.debug(f"Added UTC timezone to run_date: {run_date}")
|
|
|
|
self.scheduler.add_job(
|
|
func,
|
|
trigger=DateTrigger(run_date=run_date),
|
|
args=args,
|
|
kwargs=kwargs or {},
|
|
id=job_id,
|
|
replace_existing=replace_existing,
|
|
misfire_grace_time=3600 # 1 hour grace period for missed jobs
|
|
)
|
|
|
|
# Get updated job count
|
|
all_jobs = self.scheduler.get_jobs()
|
|
one_time_jobs = [j for j in all_jobs if j.id != 'check_due_tasks']
|
|
|
|
# Extract user_id from kwargs if available for logging and job store
|
|
user_id = kwargs.get('user_id', None) if kwargs else None
|
|
func_name = func.__name__ if hasattr(func, '__name__') else str(func)
|
|
|
|
# Get job store name for user (if user_id provided)
|
|
job_store_name = 'default'
|
|
if user_id:
|
|
try:
|
|
db = get_db_session()
|
|
if db:
|
|
job_store_name = get_user_job_store_name(user_id, db)
|
|
db.close()
|
|
except Exception as e:
|
|
logger.warning(f"Could not determine job store for user {user_id}: {e}")
|
|
|
|
# Note: APScheduler doesn't support dynamic job store creation
|
|
# We use 'default' for all jobs but log the user's job store name for debugging
|
|
# The actual user isolation is handled through task filtering by user_id
|
|
|
|
# Log detailed one-time task scheduling information (use WARNING level for visibility)
|
|
log_message = (
|
|
f"[Scheduler] 📅 Scheduled One-Time Task\n"
|
|
f" ├─ Job ID: {job_id}\n"
|
|
f" ├─ Function: {func_name}\n"
|
|
f" ├─ User ID: {user_id or 'system'}\n"
|
|
f" ├─ Job Store: {job_store_name} (user context)\n"
|
|
f" ├─ Scheduled For: {run_date}\n"
|
|
f" ├─ Replace Existing: {replace_existing}\n"
|
|
f" ├─ Total One-Time Jobs: {len(one_time_jobs)}\n"
|
|
f" └─ Total Scheduled Jobs: {len(all_jobs)}"
|
|
)
|
|
logger.warning(log_message)
|
|
|
|
# Log job scheduling to event log for dashboard
|
|
try:
|
|
event_db = get_db_session()
|
|
if event_db:
|
|
event_log = SchedulerEventLog(
|
|
event_type='job_scheduled',
|
|
event_date=datetime.utcnow(),
|
|
job_id=job_id,
|
|
job_type='one_time',
|
|
user_id=user_id,
|
|
event_data={
|
|
'function_name': func_name,
|
|
'job_store': job_store_name,
|
|
'scheduled_for': run_date.isoformat(),
|
|
'replace_existing': replace_existing
|
|
}
|
|
)
|
|
event_db.add(event_log)
|
|
event_db.commit()
|
|
event_db.close()
|
|
except Exception as e:
|
|
logger.debug(f"Failed to log job scheduling event: {e}")
|
|
|
|
return job_id
|
|
except Exception as e:
|
|
logger.error(f"Failed to schedule one-time task {job_id}: {e}")
|
|
raise
|
|
|
|
def is_running(self) -> bool:
|
|
"""Check if scheduler is running."""
|
|
return self._running
|
|
|