feat: image generation overhaul (model-aware text, dim clamping, \.30 pricing), event-driven dashboard cache invalidation, SEO insights (AI visibility, GSC, keyword gap), YouTube OAuth/publish, blog writer & content planning improvements, scheduler monitoring updates

2026-05-30 07:58:22 +05:30
parent aaf94049da
commit 64f1f88cdd
129 changed files with 8796 additions and 8755 deletions
--- a/backend/services/scheduler/core/check_cycle_handler.py
+++ b/backend/services/scheduler/core/check_cycle_handler.py
@@ -3,25 +3,67 @@ Check Cycle Handler
 Handles the main scheduler check cycle that finds and executes due tasks.
 """

+import json
+import os
 from typing import TYPE_CHECKING, Dict, Any
 from datetime import datetime
 from sqlalchemy.orm import Session

 from services.database import get_all_user_ids, get_session_for_user
 from utils.logger_utils import get_service_logger
-from .interval_manager import adjust_check_interval_if_needed
-
-# Import semantic monitoring for Phase 2B integration
-from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor

 if TYPE_CHECKING:
    from .scheduler import TaskScheduler

 logger = get_service_logger("check_cycle_handler")

-# Track last semantic check per user to enforce 24-hour interval
-# In-memory cache is sufficient as it resets on restart (which is fine)
-LAST_SEMANTIC_CHECKS: Dict[str, datetime] = {}
+# Cache for RealTimeSemanticMonitor instances per user (avoids expensive re-instantiation)
+# Uses the global SemanticDashboardAPI singleton which provides get-or-create caching.
+from services.intelligence.monitoring.semantic_dashboard import semantic_dashboard_api
+
+# Persisted last-check timestamps for semantic health monitoring (24-hour cadence).
+# Survives scheduler restarts via a JSON file in the app state directory.
+_SEMANTIC_STATE_DIR = os.path.join(
+    os.path.expanduser("~"), ".alwrity", "scheduler_state"
+)
+_SEMANTIC_STATE_FILE = os.path.join(_SEMANTIC_STATE_DIR, "semantic_last_checks.json")
+
+
+def _load_semantic_check_timestamps() -> Dict[str, datetime]:
+    """Load persisted check timestamps from disk. Returns empty dict on any failure."""
+    try:
+        if not os.path.exists(_SEMANTIC_STATE_FILE):
+            return {}
+        with open(_SEMANTIC_STATE_FILE, "r") as f:
+            raw = json.load(f)
+        return {
+            uid: datetime.fromisoformat(ts)
+            for uid, ts in raw.items() if ts
+        }
+    except Exception as e:
+        logger.warning(f"Failed to load semantic check timestamps: {e}")
+        return {}
+
+
+def _save_semantic_check_timestamps(checks: Dict[str, datetime]):
+    """Persist check timestamps to disk."""
+    try:
+        os.makedirs(_SEMANTIC_STATE_DIR, exist_ok=True)
+        serializable = {
+            uid: ts.isoformat() if isinstance(ts, datetime) else ts
+            for uid, ts in checks.items()
+        }
+        with open(_SEMANTIC_STATE_FILE, "w") as f:
+            json.dump(serializable, f)
+    except Exception as e:
+        logger.warning(f"Failed to save semantic check timestamps: {e}")
+
+
+# Load persisted timestamps on startup so the 24-hour cadence survives restarts.
+# If the file is missing (first start), all users will get an immediate check —
+# that is acceptable because monitor instances are now cached via SemanticDashboardAPI,
+# meaning heavy model initialisation happens at most once per user.
+LAST_SEMANTIC_CHECKS: Dict[str, datetime] = _load_semantic_check_timestamps()

 async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
    """
@@ -48,7 +90,10 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
    # Iterate through all users (Multi-tenancy support)
    user_ids = get_all_user_ids()
    total_active_strategies = 0
-    
+
+    # Evict stale semantic monitor instances to prevent unbounded memory growth
+    semantic_dashboard_api.evict_stale_monitors()
+
    for user_id in user_ids:
        db = get_session_for_user(user_id)
        if not db:
@@ -76,30 +121,25 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
                except Exception as e:
                    logger.warning(f"Error counting active strategies for user {user_id}: {e}")

-            # Phase 2B: Real-time semantic health monitoring (runs every 24 hours)
-            # Check if 24 hours have passed since last check
-            should_run_semantic = False
+            # Phase 2B: Semantic health monitoring (24-hour cadence)
+            # Uses cached monitor instances via SemanticDashboardAPI singleton
+            # to avoid re-initializing TxtaiIntelligenceService and SIFIntegrationService.
            now = datetime.utcnow()
            last_check = LAST_SEMANTIC_CHECKS.get(user_id)
-            
-            if not last_check or (now - last_check).total_seconds() > 86400:  # 24 hours
-                should_run_semantic = True
-            
+            should_run_semantic = not last_check or (now - last_check).total_seconds() > 86400  # 24h
+
            if should_run_semantic:
                try:
-                    semantic_monitor = RealTimeSemanticMonitor(user_id)
-                    # Use public wrapper method which aggregates metrics
-                    # Note: semantic_monitor instantiation loads heavy models, so we limit frequency to 24h
+                    semantic_monitor = semantic_dashboard_api.get_monitor(user_id)
                    semantic_health = await semantic_monitor.check_semantic_health(user_id)
-                    logger.info(f"[Semantic Monitor] User {user_id} health check: {semantic_health.status} (score: {semantic_health.value:.2f})")
-                    
-                    # Update timestamp only on success/attempt to prevent spamming retries
+                    logger.info(
+                        f"[Semantic Monitor] User {user_id} health check: "
+                        f"{semantic_health.status} (score: {semantic_health.value:.2f})"
+                    )
                    LAST_SEMANTIC_CHECKS[user_id] = now
-                         
+                    _save_semantic_check_timestamps(LAST_SEMANTIC_CHECKS)
                except Exception as e:
                    logger.warning(f"[Semantic Monitor] Error checking semantic health for user {user_id}: {e}")
-            else:
-                pass


            # Check each registered task type for this user
@@ -113,11 +153,10 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
        finally:
            db.close()
    
-    # Adjust interval based on TOTAL active strategies across all users
-    # We manually update the stats and check interval, skipping adjust_check_interval_if_needed 
-    # because it's not multi-tenant aware yet.
+    # Adjust interval based on active strategy presence across all users.
+    # Only one strategy can be active per user at a time, so > 0 check is sufficient.
    scheduler.stats['active_strategies_count'] = total_active_strategies
-    
+
    if total_active_strategies > 0:
        optimal_interval = scheduler.min_check_interval_minutes
    else:
--- a/backend/services/scheduler/core/interval_manager.py
+++ b/backend/services/scheduler/core/interval_manager.py
@@ -1,10 +1,9 @@
 """
 Interval Manager
-Handles intelligent scheduling interval adjustment based on active strategies.
+Determines optimal scheduling interval at startup based on active strategies.
 """

 from typing import TYPE_CHECKING
-from datetime import datetime
 from sqlalchemy.orm import Session

 from services.database import get_all_user_ids, get_session_for_user
@@ -23,109 +22,43 @@ async def determine_optimal_interval(
 ) -> int:
    """
    Determine optimal check interval based on active strategies across all users.
-    
+
+    Only one strategy can be active per user at a time, so this is a simple
+    exists/not-exists check: does any user have an active strategy?
+
    Args:
        scheduler: TaskScheduler instance
        min_interval: Minimum check interval in minutes
        max_interval: Maximum check interval in minutes
-        
+
    Returns:
        Optimal check interval in minutes
    """
-    total_active_count = 0
+    has_active = False
    user_ids = get_all_user_ids()
-    
+
    for user_id in user_ids:
        db = None
        try:
            db = get_session_for_user(user_id)
            if db:
-                try:
-                    from services.active_strategy_service import ActiveStrategyService
-                    active_strategy_service = ActiveStrategyService(db_session=db)
-                    user_active_count = active_strategy_service.count_active_strategies_with_tasks()
-                    total_active_count += user_active_count
-                    
-                    # Optimization: If we found at least one active strategy, we can stop and return min_interval
-                    # (unless we want accurate stats)
-                    # For stats accuracy, we should continue.
-                except Exception as e:
-                    logger.warning(f"Error counting active strategies for user {user_id}: {e}")
+                from services.active_strategy_service import ActiveStrategyService
+                active_strategy_service = ActiveStrategyService(db_session=db)
+                if active_strategy_service.has_active_strategies_with_tasks():
+                    has_active = True
+                    break
        except Exception as e:
-            logger.warning(f"Error checking user {user_id} for strategies: {e}")
+            logger.warning(f"Error checking active strategies for user {user_id}: {e}")
        finally:
            if db:
                db.close()
-                
-    scheduler.stats['active_strategies_count'] = total_active_count
-    
-    if total_active_count > 0:
-        logger.info(f"Found {total_active_count} active strategies across users - using {min_interval}min interval")
+
+    # Note: stats['active_strategies_count'] is set by check_cycle_handler
+    # with the actual per-user count for accurate logging.
+
+    if has_active:
+        logger.info(f"Active strategies found - using {min_interval}min interval")
        return min_interval
    else:
        logger.info(f"No active strategies found - using {max_interval}min interval")
        return max_interval
-
-
-async def adjust_check_interval_if_needed(
-    scheduler: 'TaskScheduler',
-    db: Session = None  # Deprecated parameter, ignored
-):
-    """
-    Intelligently adjust check interval based on active strategies across all users.
-    
-    If there are active strategies with tasks, check more frequently.
-    If there are no active strategies, check less frequently.
-    
-    Args:
-        scheduler: TaskScheduler instance
-        db: Deprecated/Ignored
-    """
-    total_active_count = 0
-    user_ids = get_all_user_ids()
-    
-    for user_id in user_ids:
-        user_db = None
-        try:
-            user_db = get_session_for_user(user_id)
-            if user_db:
-                try:
-                    from services.active_strategy_service import ActiveStrategyService
-                    active_strategy_service = ActiveStrategyService(db_session=user_db)
-                    user_active_count = active_strategy_service.count_active_strategies_with_tasks()
-                    total_active_count += user_active_count
-                except Exception as e:
-                    logger.warning(f"Error counting active strategies for user {user_id}: {e}")
-        except Exception as e:
-            logger.warning(f"Error checking user {user_id} for strategies: {e}")
-        finally:
-            if user_db:
-                user_db.close()
-    
-    scheduler.stats['active_strategies_count'] = total_active_count
-    
-    # Determine optimal interval
-    if total_active_count > 0:
-        optimal_interval = scheduler.min_check_interval_minutes
-    else:
-        optimal_interval = scheduler.max_check_interval_minutes
-    
-    # Only reschedule if interval needs to change
-    if optimal_interval != scheduler.current_check_interval_minutes:
-        interval_message = (
-            f"[Scheduler] ⚙️ Adjusting Check Interval\n"
-            f"   ├─ Current: {scheduler.current_check_interval_minutes}min\n"
-            f"   ├─ Optimal: {optimal_interval}min\n"
-            f"   ├─ Active Strategies: {total_active_count}\n"
-            f"   └─ Reason: {'Active strategies detected' if total_active_count > 0 else 'No active strategies'}"
-        )
-        logger.warning(interval_message)
-        
-        # Reschedule the job with new interval
-        scheduler.scheduler.modify_job(
-            job_id='check_due_tasks',  # Fixed job_id from check_cycle to check_due_tasks to match scheduler.py
-            trigger=scheduler._get_trigger_for_interval(optimal_interval)
-        )
-        scheduler.current_check_interval_minutes = optimal_interval
-        scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
-
--- a/backend/services/scheduler/core/scheduler.py
+++ b/backend/services/scheduler/core/scheduler.py
@@ -27,7 +27,7 @@ from utils.logger_utils import get_service_logger

 from ..utils.user_job_store import get_user_job_store_name
 from models.scheduler_models import SchedulerEventLog
-from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
+from .interval_manager import determine_optimal_interval
 from .job_restoration import restore_persona_jobs
 from .oauth_task_restoration import restore_oauth_monitoring_tasks
 from .website_analysis_task_restoration import restore_website_analysis_tasks
@@ -628,15 +628,6 @@ class TaskScheduler:

        await check_and_execute_due_tasks(self)
    
-    async def _adjust_check_interval_if_needed(self, db: Session):
-        """
-        Intelligently adjust check interval based on active strategies.
-        
-        Args:
-            db: Database session
-        """
-        await adjust_check_interval_if_needed(self, db)
-    
    async def _execute_missed_jobs(self):
        """
        Check for and execute any missed DateTrigger jobs that are still within grace period.