feat: image generation overhaul (model-aware text, dim clamping, \.30 pricing), event-driven dashboard cache invalidation, SEO insights (AI visibility, GSC, keyword gap), YouTube OAuth/publish, blog writer & content planning improvements, scheduler monitoring updates

This commit is contained in:
ajaysi
2026-05-30 07:58:22 +05:30
parent aaf94049da
commit 64f1f88cdd
129 changed files with 8796 additions and 8755 deletions

View File

@@ -3,25 +3,67 @@ Check Cycle Handler
Handles the main scheduler check cycle that finds and executes due tasks.
"""
import json
import os
from typing import TYPE_CHECKING, Dict, Any
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_all_user_ids, get_session_for_user
from utils.logger_utils import get_service_logger
from .interval_manager import adjust_check_interval_if_needed
# Import semantic monitoring for Phase 2B integration
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("check_cycle_handler")
# Track last semantic check per user to enforce 24-hour interval
# In-memory cache is sufficient as it resets on restart (which is fine)
LAST_SEMANTIC_CHECKS: Dict[str, datetime] = {}
# Cache for RealTimeSemanticMonitor instances per user (avoids expensive re-instantiation)
# Uses the global SemanticDashboardAPI singleton which provides get-or-create caching.
from services.intelligence.monitoring.semantic_dashboard import semantic_dashboard_api
# Persisted last-check timestamps for semantic health monitoring (24-hour cadence).
# Survives scheduler restarts via a JSON file in the app state directory.
_SEMANTIC_STATE_DIR = os.path.join(
os.path.expanduser("~"), ".alwrity", "scheduler_state"
)
_SEMANTIC_STATE_FILE = os.path.join(_SEMANTIC_STATE_DIR, "semantic_last_checks.json")
def _load_semantic_check_timestamps() -> Dict[str, datetime]:
"""Load persisted check timestamps from disk. Returns empty dict on any failure."""
try:
if not os.path.exists(_SEMANTIC_STATE_FILE):
return {}
with open(_SEMANTIC_STATE_FILE, "r") as f:
raw = json.load(f)
return {
uid: datetime.fromisoformat(ts)
for uid, ts in raw.items() if ts
}
except Exception as e:
logger.warning(f"Failed to load semantic check timestamps: {e}")
return {}
def _save_semantic_check_timestamps(checks: Dict[str, datetime]):
"""Persist check timestamps to disk."""
try:
os.makedirs(_SEMANTIC_STATE_DIR, exist_ok=True)
serializable = {
uid: ts.isoformat() if isinstance(ts, datetime) else ts
for uid, ts in checks.items()
}
with open(_SEMANTIC_STATE_FILE, "w") as f:
json.dump(serializable, f)
except Exception as e:
logger.warning(f"Failed to save semantic check timestamps: {e}")
# Load persisted timestamps on startup so the 24-hour cadence survives restarts.
# If the file is missing (first start), all users will get an immediate check —
# that is acceptable because monitor instances are now cached via SemanticDashboardAPI,
# meaning heavy model initialisation happens at most once per user.
LAST_SEMANTIC_CHECKS: Dict[str, datetime] = _load_semantic_check_timestamps()
async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
"""
@@ -48,7 +90,10 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
# Iterate through all users (Multi-tenancy support)
user_ids = get_all_user_ids()
total_active_strategies = 0
# Evict stale semantic monitor instances to prevent unbounded memory growth
semantic_dashboard_api.evict_stale_monitors()
for user_id in user_ids:
db = get_session_for_user(user_id)
if not db:
@@ -76,30 +121,25 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
except Exception as e:
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
# Phase 2B: Real-time semantic health monitoring (runs every 24 hours)
# Check if 24 hours have passed since last check
should_run_semantic = False
# Phase 2B: Semantic health monitoring (24-hour cadence)
# Uses cached monitor instances via SemanticDashboardAPI singleton
# to avoid re-initializing TxtaiIntelligenceService and SIFIntegrationService.
now = datetime.utcnow()
last_check = LAST_SEMANTIC_CHECKS.get(user_id)
if not last_check or (now - last_check).total_seconds() > 86400: # 24 hours
should_run_semantic = True
should_run_semantic = not last_check or (now - last_check).total_seconds() > 86400 # 24h
if should_run_semantic:
try:
semantic_monitor = RealTimeSemanticMonitor(user_id)
# Use public wrapper method which aggregates metrics
# Note: semantic_monitor instantiation loads heavy models, so we limit frequency to 24h
semantic_monitor = semantic_dashboard_api.get_monitor(user_id)
semantic_health = await semantic_monitor.check_semantic_health(user_id)
logger.info(f"[Semantic Monitor] User {user_id} health check: {semantic_health.status} (score: {semantic_health.value:.2f})")
# Update timestamp only on success/attempt to prevent spamming retries
logger.info(
f"[Semantic Monitor] User {user_id} health check: "
f"{semantic_health.status} (score: {semantic_health.value:.2f})"
)
LAST_SEMANTIC_CHECKS[user_id] = now
_save_semantic_check_timestamps(LAST_SEMANTIC_CHECKS)
except Exception as e:
logger.warning(f"[Semantic Monitor] Error checking semantic health for user {user_id}: {e}")
else:
pass
# Check each registered task type for this user
@@ -113,11 +153,10 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
finally:
db.close()
# Adjust interval based on TOTAL active strategies across all users
# We manually update the stats and check interval, skipping adjust_check_interval_if_needed
# because it's not multi-tenant aware yet.
# Adjust interval based on active strategy presence across all users.
# Only one strategy can be active per user at a time, so > 0 check is sufficient.
scheduler.stats['active_strategies_count'] = total_active_strategies
if total_active_strategies > 0:
optimal_interval = scheduler.min_check_interval_minutes
else:

View File

@@ -1,10 +1,9 @@
"""
Interval Manager
Handles intelligent scheduling interval adjustment based on active strategies.
Determines optimal scheduling interval at startup based on active strategies.
"""
from typing import TYPE_CHECKING
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_all_user_ids, get_session_for_user
@@ -23,109 +22,43 @@ async def determine_optimal_interval(
) -> int:
"""
Determine optimal check interval based on active strategies across all users.
Only one strategy can be active per user at a time, so this is a simple
exists/not-exists check: does any user have an active strategy?
Args:
scheduler: TaskScheduler instance
min_interval: Minimum check interval in minutes
max_interval: Maximum check interval in minutes
Returns:
Optimal check interval in minutes
"""
total_active_count = 0
has_active = False
user_ids = get_all_user_ids()
for user_id in user_ids:
db = None
try:
db = get_session_for_user(user_id)
if db:
try:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
total_active_count += user_active_count
# Optimization: If we found at least one active strategy, we can stop and return min_interval
# (unless we want accurate stats)
# For stats accuracy, we should continue.
except Exception as e:
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
if active_strategy_service.has_active_strategies_with_tasks():
has_active = True
break
except Exception as e:
logger.warning(f"Error checking user {user_id} for strategies: {e}")
logger.warning(f"Error checking active strategies for user {user_id}: {e}")
finally:
if db:
db.close()
scheduler.stats['active_strategies_count'] = total_active_count
if total_active_count > 0:
logger.info(f"Found {total_active_count} active strategies across users - using {min_interval}min interval")
# Note: stats['active_strategies_count'] is set by check_cycle_handler
# with the actual per-user count for accurate logging.
if has_active:
logger.info(f"Active strategies found - using {min_interval}min interval")
return min_interval
else:
logger.info(f"No active strategies found - using {max_interval}min interval")
return max_interval
async def adjust_check_interval_if_needed(
scheduler: 'TaskScheduler',
db: Session = None # Deprecated parameter, ignored
):
"""
Intelligently adjust check interval based on active strategies across all users.
If there are active strategies with tasks, check more frequently.
If there are no active strategies, check less frequently.
Args:
scheduler: TaskScheduler instance
db: Deprecated/Ignored
"""
total_active_count = 0
user_ids = get_all_user_ids()
for user_id in user_ids:
user_db = None
try:
user_db = get_session_for_user(user_id)
if user_db:
try:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=user_db)
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
total_active_count += user_active_count
except Exception as e:
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
except Exception as e:
logger.warning(f"Error checking user {user_id} for strategies: {e}")
finally:
if user_db:
user_db.close()
scheduler.stats['active_strategies_count'] = total_active_count
# Determine optimal interval
if total_active_count > 0:
optimal_interval = scheduler.min_check_interval_minutes
else:
optimal_interval = scheduler.max_check_interval_minutes
# Only reschedule if interval needs to change
if optimal_interval != scheduler.current_check_interval_minutes:
interval_message = (
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
f" ├─ Optimal: {optimal_interval}min\n"
f" ├─ Active Strategies: {total_active_count}\n"
f" └─ Reason: {'Active strategies detected' if total_active_count > 0 else 'No active strategies'}"
)
logger.warning(interval_message)
# Reschedule the job with new interval
scheduler.scheduler.modify_job(
job_id='check_due_tasks', # Fixed job_id from check_cycle to check_due_tasks to match scheduler.py
trigger=scheduler._get_trigger_for_interval(optimal_interval)
)
scheduler.current_check_interval_minutes = optimal_interval
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()

View File

@@ -27,7 +27,7 @@ from utils.logger_utils import get_service_logger
from ..utils.user_job_store import get_user_job_store_name
from models.scheduler_models import SchedulerEventLog
from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
from .interval_manager import determine_optimal_interval
from .job_restoration import restore_persona_jobs
from .oauth_task_restoration import restore_oauth_monitoring_tasks
from .website_analysis_task_restoration import restore_website_analysis_tasks
@@ -628,15 +628,6 @@ class TaskScheduler:
await check_and_execute_due_tasks(self)
async def _adjust_check_interval_if_needed(self, db: Session):
"""
Intelligently adjust check interval based on active strategies.
Args:
db: Database session
"""
await adjust_check_interval_if_needed(self, db)
async def _execute_missed_jobs(self):
"""
Check for and execute any missed DateTrigger jobs that are still within grace period.