Release Candidate: Production Release with Multi-Tenant & Onboarding Enhancements

2026-02-28 20:06:26 +05:30
parent 08a1f4a1d8
commit 4828274cbf
162 changed files with 19489 additions and 4300 deletions
--- a/backend/services/scheduler/core/scheduler.py
+++ b/backend/services/scheduler/core/scheduler.py
@@ -5,13 +5,15 @@ Pluggable task scheduler that can work with any task model.

 import asyncio
 import logging
+import os
 from typing import Dict, Any, Optional, List, Callable
-from datetime import datetime
+from datetime import datetime, timedelta
 from apscheduler.schedulers.asyncio import AsyncIOScheduler
 from apscheduler.triggers.cron import CronTrigger
 from apscheduler.triggers.interval import IntervalTrigger
 from apscheduler.triggers.date import DateTrigger
 from sqlalchemy.orm import Session
+from sqlalchemy import text

 from .executor_interface import TaskExecutor, TaskExecutionResult
 from .task_registry import TaskRegistry
@@ -19,8 +21,10 @@ from .exception_handler import (
    SchedulerExceptionHandler, SchedulerException, TaskExecutionError, DatabaseError,
    TaskLoaderError, SchedulerConfigError
 )
+
 from services.database import get_all_user_ids, get_session_for_user
 from utils.logger_utils import get_service_logger
+
 from ..utils.user_job_store import get_user_job_store_name
 from models.scheduler_models import SchedulerEventLog
 from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
@@ -86,6 +90,9 @@ class TaskScheduler:
            }
        )
        
+        # Configure APScheduler to use unified logging system
+        self._configure_apscheduler_logging()
+        
        # Task executor registry
        self.registry = TaskRegistry()
        
@@ -115,6 +122,21 @@ class TaskScheduler:
        }
        
        self._running = False
+
+        # Local Desktop App: Always leader, no advisory locks needed
+        self._leader_lock_key = int(os.getenv("SCHEDULER_LEADER_LOCK_KEY", "84321017"))
+        self._leadership_check_interval_seconds = int(os.getenv("SCHEDULER_LEADERSHIP_CHECK_INTERVAL", "15"))
+        self._leader_session = None
+        self._is_leader = True  # Always leader in local desktop app
+        self._execution_enabled = True # Always enabled
+        self._leader_since = datetime.utcnow().isoformat()
+        self._last_leadership_check = None
+        self._last_leadership_error = None
+
+
+        # Execution lease registry (prevents duplicate redispatch across check cycles)
+        self._task_leases: Dict[str, str] = {}
+        self._task_lease_ttl_seconds = int(os.getenv("SCHEDULER_TASK_LEASE_TTL_SECONDS", "900"))
    
    def _get_trigger_for_interval(self, interval_minutes: int):
        """
@@ -153,6 +175,144 @@ class TaskScheduler:
        self.registry.register(task_type, executor, task_loader)
        logger.info(f"Registered executor for task type: {task_type}")
    
+    def _configure_apscheduler_logging(self):
+        """Configure APScheduler to use unified logging system."""
+        import logging
+        
+        # Get APScheduler loggers and redirect them to unified logging
+        apscheduler_logger = logging.getLogger("apscheduler")
+        apscheduler_scheduler_logger = logging.getLogger("apscheduler.scheduler")
+        apscheduler_executors_logger = logging.getLogger("apscheduler.executors")
+        apscheduler_jobstores_logger = logging.getLogger("apscheduler.jobstores")
+        
+        # Create a custom handler that redirects to unified logger
+        class APSchedulerUnifiedHandler(logging.Handler):
+            def __init__(self, service_logger):
+                super().__init__()
+                self.service_logger = service_logger
+            
+            def emit(self, record):
+                try:
+                    # Format the message
+                    msg = self.format(record)
+                    
+                    # Map APScheduler log levels to unified logger
+                    if record.levelno >= logging.ERROR:
+                        self.service_logger.error(f"[APScheduler] {msg}")
+                    elif record.levelno >= logging.WARNING:
+                        self.service_logger.warning(f"[APScheduler] {msg}")
+                    elif record.levelno >= logging.INFO:
+                        self.service_logger.info(f"[APScheduler] {msg}")
+                    else:
+                        self.service_logger.debug(f"[APScheduler] {msg}")
+                except Exception:
+                    # Don't let logging errors break the scheduler
+                    pass
+        
+        # Create and add the handler
+        unified_handler = APSchedulerUnifiedHandler(logger)
+        unified_handler.setLevel(logging.DEBUG)
+        
+        # Add handler to all APScheduler loggers
+        apscheduler_logger.addHandler(unified_handler)
+        apscheduler_scheduler_logger.addHandler(unified_handler)
+        apscheduler_executors_logger.addHandler(unified_handler)
+        apscheduler_jobstores_logger.addHandler(unified_handler)
+        
+        # Set levels to capture all logs
+        apscheduler_logger.setLevel(logging.DEBUG)
+        apscheduler_scheduler_logger.setLevel(logging.DEBUG)
+        apscheduler_executors_logger.setLevel(logging.DEBUG)
+        apscheduler_jobstores_logger.setLevel(logging.DEBUG)
+        
+        # Prevent propagation to avoid duplicate logs
+        apscheduler_logger.propagate = False
+        apscheduler_scheduler_logger.propagate = False
+        apscheduler_executors_logger.propagate = False
+        apscheduler_jobstores_logger.propagate = False
+        
+        logger.info("APScheduler logging configured to use unified logging system")
+    
+
+    def _scheduler_identity(self) -> str:
+        return f"{os.getenv('HOSTNAME', 'local')}-{os.getpid()}"
+
+    def _acquire_leadership(self) -> bool:
+        """Always return True for local desktop app (no HA needed)."""
+        self._is_leader = True
+        self._execution_enabled = True
+        if not self._leader_since:
+            self._leader_since = datetime.utcnow().isoformat()
+        self._last_leadership_check = datetime.utcnow().isoformat()
+        return True
+
+    def _release_leadership(self):
+        """No-op for local desktop app."""
+        pass
+
+    def _sync_check_due_tasks_job(self):
+        """Ensure check_due_tasks job exists only for leader."""
+        job = self.scheduler.get_job('check_due_tasks')
+        if self._is_leader and self._execution_enabled:
+            if job is None:
+                self.scheduler.add_job(
+                    self._check_and_execute_due_tasks,
+                    trigger=self._get_trigger_for_interval(self.current_check_interval_minutes),
+                    id='check_due_tasks',
+                    replace_existing=True
+                )
+        else:
+            if job is not None:
+                self.scheduler.remove_job('check_due_tasks')
+
+    async def _leadership_tick(self):
+        """Periodic leadership check/renewal (Stub for local)."""
+        if not self._running:
+            return
+
+        self._acquire_leadership()
+        self._sync_check_due_tasks_job()
+
+    def _acquire_task_lease(self, task_key: str) -> bool:
+        """Acquire in-memory lease for a task key if available/expired."""
+        now = datetime.utcnow()
+        expiry_str = self._task_leases.get(task_key)
+
+        if expiry_str:
+            try:
+                expiry = datetime.fromisoformat(expiry_str)
+                if expiry > now:
+                    return False
+            except Exception:
+                # Corrupted lease value: overwrite safely
+                pass
+
+        expiry = now + timedelta(seconds=self._task_lease_ttl_seconds)
+        self._task_leases[task_key] = expiry.isoformat()
+        return True
+
+    def _release_task_lease(self, task_key: str):
+        """Release lease for task key."""
+        if task_key in self._task_leases:
+            del self._task_leases[task_key]
+
+    def _is_task_leased(self, task_key: str) -> bool:
+        """Check whether task key is currently leased and not expired."""
+        expiry_str = self._task_leases.get(task_key)
+        if not expiry_str:
+            return False
+
+        try:
+            expiry = datetime.fromisoformat(expiry_str)
+            if expiry > datetime.utcnow():
+                return True
+        except Exception:
+            pass
+
+        # Expired/corrupt lease gets cleaned up lazily
+        self._release_task_lease(task_key)
+        return False
+
    async def start(self):
        """Start the scheduler with intelligent interval adjustment."""
        if self._running:
@@ -168,16 +328,21 @@ class TaskScheduler:
            )
            self.current_check_interval_minutes = initial_interval
            
-            # Add periodic job to check for due tasks
-            self.scheduler.add_job(
-                self._check_and_execute_due_tasks,
-                trigger=self._get_trigger_for_interval(initial_interval),
-                id='check_due_tasks',
-                replace_existing=True
-            )
-            
            self.scheduler.start()
            self._running = True
+
+            # Leadership monitor runs on all replicas; only leader executes due-task loop.
+            self.scheduler.add_job(
+                self._leadership_tick,
+                trigger=IntervalTrigger(seconds=self._leadership_check_interval_seconds),
+                id='leadership_monitor',
+                replace_existing=True,
+                max_instances=1,
+                coalesce=True
+            )
+
+            # Initial leader election
+            await self._leadership_tick()
            
            # Check for and execute any missed jobs that are still within grace period
            await self._execute_missed_jobs()
@@ -206,7 +371,7 @@ class TaskScheduler:
            registered_types = self.registry.get_registered_types()
            active_strategies = self.stats.get('active_strategies_count', 0)
            
-            # Count OAuth token monitoring tasks from database (recurring weekly tasks)
+            # Count tasks per user (Multi-tenant SQLite)
            oauth_tasks_count = 0
            website_analysis_tasks_count = 0
            platform_insights_tasks_count = 0
@@ -323,126 +488,6 @@ class TaskScheduler:
                    
                    startup_lines.append(f"{prefix} Job: {job.id} | Trigger: {trigger_type} | Next Run: {next_run}{user_context}")
            
-            # Add OAuth token monitoring tasks details
-            # Show ALL OAuth tasks (active and inactive) for complete visibility
-            if total_oauth_tasks > 0:
-                try:
-                    user_ids = get_all_user_ids()
-                    for user_id in user_ids:
-                        try:
-                            db = get_session_for_user(user_id)
-                            if db:
-                                from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
-                                # Get ALL tasks for this user
-                                oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
-                                
-                                for idx, task in enumerate(oauth_tasks):
-                                    is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and user_id == user_ids[-1]
-                                    prefix = "   ├─" # Simplified prefix logic for multi-user list
-                                    
-                                    try:
-                                        user_job_store = get_user_job_store_name(task.user_id, db)
-                                        if user_job_store == 'default':
-                                            logger.debug(
-                                                f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
-                                                f"This may indicate no onboarding data or website URL not found."
-                                            )
-                                    except Exception as e:
-                                        logger.warning(
-                                            f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
-                                            f"Using 'default'. Error type: {type(e).__name__}"
-                                        )
-                                        user_job_store = 'default'
-                                    
-                                    next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
-                                    # Include status in the log line for visibility
-                                    status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
-                                    startup_lines.append(
-                                        f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
-                                        f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
-                                        f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
-                                    )
-                                db.close()
-                        except Exception as e:
-                             logger.warning(f"Error checking OAuth tasks for user {user_id}: {e}")
-                except Exception as e:
-                    logger.debug(f"Could not get OAuth token monitoring task details: {e}")
-            
-            # Add website analysis tasks details
-            if website_analysis_tasks_count > 0:
-                try:
-                    user_ids = get_all_user_ids()
-                    for user_id in user_ids:
-                        try:
-                            db = get_session_for_user(user_id)
-                            if db:
-                                from models.website_analysis_monitoring_models import WebsiteAnalysisTask
-                                website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
-                                
-                                for idx, task in enumerate(website_analysis_tasks):
-                                    is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0 and user_id == user_ids[-1]
-                                    prefix = "   ├─" # Simplified
-                                    
-                                    try:
-                                        user_job_store = get_user_job_store_name(task.user_id, db)
-                                    except Exception as e:
-                                        logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
-                                        user_job_store = 'default'
-                                    
-                                    next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
-                                    frequency = f"Every {task.frequency_days} days"
-                                    task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
-                                    status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
-                                    website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
-                                    
-                                    startup_lines.append(
-                                        f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
-                                        f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
-                                        f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
-                                    )
-                                db.close()
-                        except Exception as e:
-                            logger.warning(f"Error checking website analysis tasks for user {user_id}: {e}")
-                except Exception as e:
-                    logger.debug(f"Could not get website analysis task details: {e}")
-            
-            # Add platform insights tasks details
-            if platform_insights_tasks_count > 0:
-                try:
-                    user_ids = get_all_user_ids()
-                    for user_id in user_ids:
-                        try:
-                            db = get_session_for_user(user_id)
-                            if db:
-                                from models.platform_insights_monitoring_models import PlatformInsightsTask
-                                platform_insights_tasks = db.query(PlatformInsightsTask).all()
-                                
-                                for idx, task in enumerate(platform_insights_tasks):
-                                    is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0 and user_id == user_ids[-1]
-                                    prefix = "   ├─" # Simplified
-                                    
-                                    try:
-                                        user_job_store = get_user_job_store_name(task.user_id, db)
-                                    except Exception as e:
-                                        logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
-                                        user_job_store = 'default'
-                                    
-                                    next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
-                                    platform_label = task.platform.upper() if task.platform else 'Unknown'
-                                    site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
-                                    status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
-                                    
-                                    startup_lines.append(
-                                        f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
-                                        f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
-                                        f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
-                                    )
-                                db.close()
-                        except Exception as e:
-                            logger.warning(f"Error checking platform insights tasks for user {user_id}: {e}")
-                except Exception as e:
-                    logger.debug(f"Could not get platform insights task details: {e}")
-            
            # Add Advertools tasks details
            if advertools_tasks_count > 0:
                try:
@@ -518,7 +563,15 @@ class TaskScheduler:
            
            # Get final job count before shutdown
            all_jobs_before = self.scheduler.get_jobs()
-            
+
+            # Release leadership lock and stop leadership monitor
+            try:
+                if self.scheduler.get_job('leadership_monitor') is not None:
+                    self.scheduler.remove_job('leadership_monitor')
+            except Exception:
+                pass
+            self._release_leadership()
+
            # Shutdown scheduler
            self.scheduler.shutdown(wait=True)
            self._running = False
@@ -569,6 +622,10 @@ class TaskScheduler:
        Main scheduler loop: check for due tasks and execute them.
        This runs periodically with intelligent interval adjustment based on active strategies.
        """
+        if not self._execution_enabled or not self._is_leader:
+            logger.debug("[Scheduler] Skipping due-task loop on standby replica")
+            return
+
        await check_and_execute_due_tasks(self)
    
    async def _adjust_check_interval_if_needed(self, db: Session):
@@ -614,309 +671,156 @@ class TaskScheduler:
        except Exception as e:
            logger.warning(f"[Scheduler] Error checking for missed jobs: {e}")
    
-    async def trigger_interval_adjustment(self):
-        """
-        Trigger immediate interval adjustment check.
-        
-        This should be called when a strategy is activated or deactivated
-        to immediately adjust the scheduler interval based on current active strategies.
-        """
-        if not self._running:
-            logger.debug("Scheduler not running, skipping interval adjustment")
-            return
-        
-        try:
-            # Multi-tenant aware adjustment (iterates all users internally)
-            await adjust_check_interval_if_needed(self)
-        except Exception as e:
-            logger.warning(f"Error triggering interval adjustment: {e}")
-    
    async def _validate_and_rebuild_cumulative_stats(self):
        """
-        Validate cumulative stats on scheduler startup and rebuild if needed.
-        This ensures cumulative stats are accurate after restarts.
-        
-        NOTE: Disabled in multi-tenant mode as there is no global database for cumulative stats.
-        TODO: Implement per-user cumulative stats or a global admin database.
+        Validate and rebuild cumulative stats if needed.
+        Currently a placeholder for future implementation.
        """
-        logger.info("[Scheduler] Cumulative stats validation skipped (multi-tenant mode)")
-        return
-    
-    async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None, user_id: str = None) -> Optional[Dict[str, Any]]:
-        """
-        Process due tasks for a specific task type.
-        
-        Returns:
-            Summary dict with 'found', 'executed', 'failed' counts, or None if no tasks
-        """
-        summary = {'found': 0, 'executed': 0, 'failed': 0}
-        
+        pass
+
+    async def _process_task_type(
+        self,
+        task_type: str,
+        db: Session,
+        cycle_summary: Dict[str, Any],
+        user_id: Optional[str] = None
+    ) -> Dict[str, int]:
+        summary = {"found": 0, "executed": 0, "failed": 0}
        try:
-            # Get task loader for this type
-            try:
-                task_loader = self.registry.get_task_loader(task_type)
-            except Exception as e:
-                error = TaskLoaderError(
-                    message=f"Failed to get task loader for type {task_type}: {str(e)}",
-                    task_type=task_type,
-                    original_error=e
-                )
-                self.exception_handler.handle_exception(error)
-                return None
-            
-            # Load due tasks (with error handling)
-            try:
-                due_tasks = task_loader(db)
-            except Exception as e:
-                error = TaskLoaderError(
-                    message=f"Failed to load due tasks for type {task_type}: {str(e)}",
-                    task_type=task_type,
-                    original_error=e
-                )
-                self.exception_handler.handle_exception(error)
-                return None
-            
-            if not due_tasks:
-                return None
-            
-            summary['found'] = len(due_tasks)
-            self.stats['tasks_found'] += len(due_tasks)
-            
-            # Execute tasks (with concurrency limit)
-            execution_tasks = []
-            skipped_count = 0
-            for task in due_tasks:
-                if len(self.active_executions) >= self.max_concurrent_executions:
-                    skipped_count = len(due_tasks) - len(execution_tasks)
-                    logger.warning(
-                        f"[Scheduler] ⚠️ Max concurrent executions reached ({self.max_concurrent_executions}), "
-                        f"skipping {skipped_count} tasks for {task_type}"
-                    )
-                    break
-                
-                # Execute task asynchronously
-                # Note: Each task gets its own database session to prevent concurrent access issues
-                execution_task = asyncio.create_task(
-                    execute_task_async(self, task_type, task, summary, user_id=user_id)
-                )
-                
-                task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
-                self.active_executions[task_id] = execution_task
-                
-                execution_tasks.append(execution_task)
-            
-            # Wait for executions to complete (with timeout per task)
-            if execution_tasks:
-                await asyncio.wait(execution_tasks, timeout=300)
-            
-            return summary
-            
+            task_loader = self.registry.get_task_loader(task_type)
        except Exception as e:
            error = TaskLoaderError(
-                message=f"Error processing task type {task_type}: {str(e)}",
-                task_type=task_type,
+                message=f"Failed to get task loader for type {task_type}: {str(e)}",
+                user_id=user_id,
+                context={"task_type": task_type},
                original_error=e
            )
            self.exception_handler.handle_exception(error)
+            self.stats["tasks_failed"] += 1
            return summary
-    
-    
-    def _update_user_stats(self, user_id: Optional[int], success: bool):
-        """
-        Update per-user statistics for user isolation tracking.
-        
-        Args:
-            user_id: User ID (None if user context not available)
-            success: Whether task execution was successful
-        """
-        if user_id is None:
+
+        try:
+            tasks = task_loader(db)
+            if not tasks:
+                return summary
+
+            summary["found"] = len(tasks)
+            max_concurrent = self.max_concurrent_executions
+
+            for task in tasks:
+                task_id = getattr(task, "id", None)
+                lease_key = f"{task_type}_{task_id or id(task)}"
+
+                if self._is_task_leased(lease_key):
+                    continue
+
+                if len(self.active_executions) >= max_concurrent:
+                    break
+
+                if not self._acquire_task_lease(lease_key):
+                    continue
+
+                execution_task = asyncio.create_task(
+                    execute_task_async(
+                        self,
+                        task_type,
+                        task,
+                        summary,
+                        execution_source="scheduler",
+                        user_id=user_id,
+                    )
+                )
+                self.active_executions[lease_key] = execution_task
+
+            cycle_summary.setdefault("tasks_found_by_type", {})
+            cycle_summary.setdefault("tasks_executed_by_type", {})
+            cycle_summary.setdefault("tasks_failed_by_type", {})
+
+            cycle_summary["tasks_found_by_type"][task_type] = (
+                cycle_summary["tasks_found_by_type"].get(task_type, 0)
+                + summary["found"]
+            )
+            cycle_summary["tasks_executed_by_type"][task_type] = (
+                cycle_summary["tasks_executed_by_type"].get(task_type, 0)
+                + summary["executed"]
+            )
+            cycle_summary["tasks_failed_by_type"][task_type] = (
+                cycle_summary["tasks_failed_by_type"].get(task_type, 0)
+                + summary["failed"]
+            )
+
+            return summary
+        except Exception as e:
+            error = TaskLoaderError(
+                message=f"Error processing task type {task_type}: {str(e)}",
+                user_id=user_id,
+                context={"task_type": task_type},
+                original_error=e
+            )
+            self.exception_handler.handle_exception(error)
+            self.stats["tasks_failed"] += 1
+            return summary
+
+    def _update_user_stats(self, user_id: Optional[str], success: bool):
+        if not user_id:
            return
-        
-        if user_id not in self.stats['per_user_stats']:
-            self.stats['per_user_stats'][user_id] = {
-                'executed': 0,
-                'failed': 0,
-                'success_rate': 0.0
-            }
-        
-        user_stats = self.stats['per_user_stats'][user_id]
+        per_user = self.stats.setdefault("per_user_stats", {})
+        user_stats = per_user.setdefault(
+            user_id,
+            {
+                "tasks_executed": 0,
+                "tasks_failed": 0,
+                "last_update": None,
+            },
+        )
        if success:
-            user_stats['executed'] += 1
+            user_stats["tasks_executed"] += 1
        else:
-            user_stats['failed'] += 1
-        
-        # Calculate success rate
-        total = user_stats['executed'] + user_stats['failed']
-        if total > 0:
-            user_stats['success_rate'] = (user_stats['executed'] / total) * 100.0
-    
-    async def _schedule_retry(self, task: Any, delay_seconds: int):
-        """Schedule a retry for a failed task."""
-        # This would update the task's next_execution time
-        # For now, just log - could be enhanced to update next_execution
-        logger.debug(f"Scheduling retry for task in {delay_seconds}s")
-    
-    def get_stats(self, user_id: Optional[int] = None) -> Dict[str, Any]:
-        """
-        Get scheduler statistics with optional user filtering.
-        
-        Args:
-            user_id: Optional user ID to filter statistics for specific user
-            
-        Returns:
-            Dictionary with scheduler statistics
-        """
-        base_stats = {
-            **{k: v for k, v in self.stats.items() if k not in ['per_user_stats']},
-            'active_executions': len(self.active_executions),
-            'registered_types': self.registry.get_registered_types(),
-            'running': self._running,
-            'check_interval_minutes': self.current_check_interval_minutes,
-            'min_check_interval_minutes': self.min_check_interval_minutes,
-            'max_check_interval_minutes': self.max_check_interval_minutes,
-            'intelligent_scheduling': True
-        }
-        
-        # Include per-user stats (all users or filtered)
-        if user_id is not None:
-            if user_id in self.stats['per_user_stats']:
-                base_stats['user_stats'] = self.stats['per_user_stats'][user_id]
-            else:
-                base_stats['user_stats'] = {
-                    'executed': 0,
-                    'failed': 0,
-                    'success_rate': 0.0
-                }
-        else:
-            # Include all per-user stats (for admin/debugging)
-            base_stats['per_user_stats'] = self.stats['per_user_stats']
-        
-        return base_stats
-    
+            user_stats["tasks_failed"] += 1
+        user_stats["last_update"] = datetime.utcnow().isoformat()
+
+    async def _schedule_retry(self, task: Any, retry_delay: int):
+        try:
+            task_id = getattr(task, "id", None)
+            logger.warning(
+                f"[Scheduler] Retry requested for task {task_id} in {retry_delay}s, "
+                f"using loader-based retry semantics."
+            )
+        except Exception:
+            pass
+
    def schedule_one_time_task(
        self,
        func: Callable,
        run_date: datetime,
        job_id: str,
-        args: tuple = (),
-        kwargs: Dict[str, Any] = None,
+        kwargs: Optional[Dict[str, Any]] = None,
        replace_existing: bool = True
    ) -> str:
        """
-        Schedule a one-time task to run at a specific datetime.
+        Schedule a one-time task execution.
        
        Args:
-            func: Async function to execute
-            run_date: Datetime when the task should run (must be timezone-aware UTC)
-            job_id: Unique identifier for this job
-            args: Positional arguments to pass to func
-            kwargs: Keyword arguments to pass to func
-            replace_existing: If True, replace existing job with same ID
+            func: Function to execute
+            run_date: Date/time to run the task
+            job_id: Unique job ID
+            kwargs: Keyword arguments for the function
+            replace_existing: Whether to replace existing job with same ID
            
        Returns:
            Job ID
        """
-        if not self._running:
-            logger.warning(
-                f"Scheduler not running, but scheduling job {job_id} anyway. "
-                "APScheduler will start automatically when needed."
-            )
-        
        try:
-            # Ensure run_date is timezone-aware (UTC)
-            if run_date.tzinfo is None:
-                from datetime import timezone
-                run_date = run_date.replace(tzinfo=timezone.utc)
-                logger.debug(f"Added UTC timezone to run_date: {run_date}")
-            
            self.scheduler.add_job(
                func,
                trigger=DateTrigger(run_date=run_date),
-                args=args,
-                kwargs=kwargs or {},
                id=job_id,
+                kwargs=kwargs or {},
                replace_existing=replace_existing,
-                misfire_grace_time=3600  # 1 hour grace period for missed jobs
+                misfire_grace_time=3600  # 1 hour grace period
            )
-            
-            # Get updated job count
-            all_jobs = self.scheduler.get_jobs()
-            one_time_jobs = [j for j in all_jobs if j.id != 'check_due_tasks']
-            
-            # Extract user_id from kwargs if available for logging and job store
-            user_id = kwargs.get('user_id', None) if kwargs else None
-            func_name = func.__name__ if hasattr(func, '__name__') else str(func)
-            
-            # Get job store name for user (if user_id provided)
-            job_store_name = 'default'
-            if user_id:
-                try:
-                    db = get_session_for_user(user_id)
-                    if db:
-                        job_store_name = get_user_job_store_name(user_id, db)
-                        db.close()
-                except Exception as e:
-                    logger.warning(f"Could not determine job store for user {user_id}: {e}")
-            
-            # Note: APScheduler doesn't support dynamic job store creation
-            # We use 'default' for all jobs but log the user's job store name for debugging
-            # The actual user isolation is handled through task filtering by user_id
-            
-            # Log detailed one-time task scheduling information (use WARNING level for visibility)
-            log_message = (
-                f"[Scheduler] 📅 Scheduled One-Time Task\n"
-                f"   ├─ Job ID: {job_id}\n"
-                f"   ├─ Function: {func_name}\n"
-                f"   ├─ User ID: {user_id or 'system'}\n"
-                f"   ├─ Job Store: {job_store_name} (user context)\n"
-                f"   ├─ Scheduled For: {run_date}\n"
-                f"   ├─ Replace Existing: {replace_existing}\n"
-                f"   ├─ Total One-Time Jobs: {len(one_time_jobs)}\n"
-                f"   └─ Total Scheduled Jobs: {len(all_jobs)}"
-            )
-            logger.warning(log_message)
-            
-            # Log job scheduling to event log for dashboard
-            if user_id:
-                try:
-                    event_db = get_session_for_user(user_id)
-                    if event_db:
-                        event_log = SchedulerEventLog(
-                            event_type='job_scheduled',
-                            event_date=datetime.utcnow(),
-                            job_id=job_id,
-                            job_type='one_time',
-                            user_id=user_id,
-                            event_data={
-                                'function_name': func_name,
-                                'job_store': job_store_name,
-                                'scheduled_for': run_date.isoformat(),
-                                'replace_existing': replace_existing
-                            }
-                        )
-                        event_db.add(event_log)
-                        event_db.commit()
-                        event_db.close()
-                except Exception as e:
-                    logger.debug(f"Failed to log job scheduling event: {e}")
-            
+            logger.info(f"Scheduled one-time task {job_id} at {run_date}")
            return job_id
        except Exception as e:
            logger.error(f"Failed to schedule one-time task {job_id}: {e}")
            raise
-    
-    def is_running(self) -> bool:
-        """Check if scheduler is running."""
-        return self._running
-
-    async def execute_task_by_type(self, task_type: str, user_id: str, payload: Dict[str, Any]):
-        """
-        Execute a task by type and payload immediately.
-        Used for one-time tasks triggered by system events.
-        """
-        from collections import namedtuple
-        TaskStub = namedtuple('TaskStub', ['user_id', 'payload', 'id'])
-        task_stub = TaskStub(user_id=user_id, payload=payload, id=f"manual_{datetime.utcnow().timestamp()}")
-        
-        await execute_task_async(self, task_type, task_stub, execution_source="manual")
-