Recovered state: integrated TrendSurferAgent, restored frontend/backend files, and cleaned up recovery scripts

This commit is contained in:
ajaysi
2026-02-08 13:56:57 +05:30
parent 1db10ccd0f
commit e404a86502
333 changed files with 42223 additions and 10875 deletions

View File

@@ -0,0 +1,94 @@
"""
Advertools Task Restoration Utility
Handles creation and restoration of Advertools intelligence tasks for users.
"""
from datetime import datetime, timedelta
from typing import Any
from loguru import logger
from sqlalchemy import func
from sqlalchemy.orm import Session
from models.onboarding import WebsiteAnalysis, OnboardingSession
from models.advertools_monitoring_models import AdvertoolsTask
from services.database import get_all_user_ids, get_session_for_user
async def restore_advertools_tasks(scheduler: Any) -> int:
"""
Restore/create Advertools tasks for all users who have completed Step 2.
Returns:
Number of tasks created/restored
"""
logger.info("Restoring Advertools intelligence tasks...")
total_created = 0
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if not db:
continue
try:
# Check if user has completed Step 2 (has WebsiteAnalysis)
session = db.query(OnboardingSession).filter(OnboardingSession.user_id == user_id).first()
if not session:
continue
analysis = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.session_id == session.id).first()
if not analysis or not analysis.website_url:
continue
# Check for existing Advertools tasks
existing_audit = db.query(AdvertoolsTask).filter(
AdvertoolsTask.user_id == user_id,
func.json_extract(AdvertoolsTask.payload, '$.type') == 'content_audit'
).first()
if not existing_audit:
# Create weekly content audit task
new_audit = AdvertoolsTask(
user_id=user_id,
website_url=analysis.website_url,
status='active',
next_execution=datetime.utcnow() + timedelta(days=1), # Start tomorrow
frequency_days=7,
payload={
"type": "content_audit",
"website_url": analysis.website_url
}
)
db.add(new_audit)
total_created += 1
logger.info(f"Created weekly content audit task for user {user_id}")
existing_health = db.query(AdvertoolsTask).filter(
AdvertoolsTask.user_id == user_id,
func.json_extract(AdvertoolsTask.payload, '$.type') == 'site_health'
).first()
if not existing_health:
# Create weekly site health task
new_health = AdvertoolsTask(
user_id=user_id,
website_url=analysis.website_url,
status='active',
next_execution=datetime.utcnow() + timedelta(days=2), # Start in 2 days
frequency_days=7,
payload={
"type": "site_health",
"website_url": analysis.website_url
}
)
db.add(new_health)
total_created += 1
logger.info(f"Created weekly site health task for user {user_id}")
db.commit()
finally:
db.close()
except Exception as e:
logger.error(f"Error restoring Advertools tasks for user {user_id}: {e}")
return total_created

View File

@@ -7,18 +7,21 @@ from typing import TYPE_CHECKING, Dict, Any
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_db_session
from services.database import get_all_user_ids, get_session_for_user
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
from .exception_handler import DatabaseError
from .interval_manager import adjust_check_interval_if_needed
# Import semantic monitoring for Phase 2B integration
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("check_cycle_handler")
# Track last semantic check per user to enforce 24-hour interval
# In-memory cache is sufficient as it resets on restart (which is fine)
LAST_SEMANTIC_CHECKS: Dict[str, datetime] = {}
async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
"""
@@ -42,154 +45,133 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
'total_failed': 0
}
db = None
try:
db = get_db_session()
if db is None:
logger.error("[Scheduler Check] ❌ Failed to get database session")
return
# Check for active strategies and adjust interval intelligently
await adjust_check_interval_if_needed(scheduler, db)
# Check each registered task type
registered_types = scheduler.registry.get_registered_types()
for task_type in registered_types:
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary)
if type_summary:
cycle_summary['tasks_found_by_type'][task_type] = type_summary.get('found', 0)
cycle_summary['tasks_executed_by_type'][task_type] = type_summary.get('executed', 0)
cycle_summary['tasks_failed_by_type'][task_type] = type_summary.get('failed', 0)
# Calculate totals
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
# Log comprehensive check cycle summary
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
active_strategies = scheduler.stats.get('active_strategies_count', 0)
active_executions = len(scheduler.active_executions)
# Build comprehensive check cycle summary log message
check_lines = [
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
f" ├─ Duration: {check_duration:.2f}s",
f" ├─ Active Strategies: {active_strategies}",
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
f" ├─ User Isolation: Enabled (tasks filtered by user_id)",
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
]
if cycle_summary['tasks_found_by_type']:
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
for idx, (task_type, count) in enumerate(task_types_list):
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
prefix = " └─" if is_last_task_type else " ├─"
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
if cycle_summary['total_found'] > 0:
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
else:
check_lines.append(f" └─ No tasks found - scheduler idle")
# Log comprehensive check cycle summary in single message
logger.warning("\n".join(check_lines))
# Save check cycle event to database for historical tracking
event_log_id = None
# Iterate through all users (Multi-tenancy support)
user_ids = get_all_user_ids()
total_active_strategies = 0
for user_id in user_ids:
db = get_session_for_user(user_id)
if not db:
logger.warning(f"[Scheduler Check] Could not get database session for user {user_id}")
continue
try:
event_log = SchedulerEventLog(
event_type='check_cycle',
event_date=check_start_time,
check_cycle_number=scheduler.stats['total_checks'],
check_interval_minutes=scheduler.current_check_interval_minutes,
tasks_found=cycle_summary.get('total_found', 0),
tasks_executed=cycle_summary.get('total_executed', 0),
tasks_failed=cycle_summary.get('total_failed', 0),
tasks_by_type=cycle_summary.get('tasks_found_by_type', {}),
check_duration_seconds=check_duration,
active_strategies_count=active_strategies,
active_executions=active_executions,
event_data={
'executed_by_type': cycle_summary.get('tasks_executed_by_type', {}),
'failed_by_type': cycle_summary.get('tasks_failed_by_type', {})
}
)
db.add(event_log)
db.flush() # Flush to get the ID without committing
event_log_id = event_log.id
db.commit()
logger.debug(f"[Check Cycle] Saved event log with ID: {event_log_id}")
except Exception as e:
logger.error(f"[Check Cycle] ❌ Failed to save check cycle event log: {e}", exc_info=True)
if db:
db.rollback()
# Continue execution even if event log save fails
# Update cumulative stats table (persistent across restarts)
try:
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
# Update cumulative metrics by adding this cycle's values
# Get current cycle values (incremental, not total)
cycle_tasks_found = cycle_summary.get('total_found', 0)
cycle_tasks_executed = cycle_summary.get('total_executed', 0)
cycle_tasks_failed = cycle_summary.get('total_failed', 0)
# Update cumulative totals (additive)
cumulative_stats.total_check_cycles += 1
cumulative_stats.cumulative_tasks_found += cycle_tasks_found
cumulative_stats.cumulative_tasks_executed += cycle_tasks_executed
cumulative_stats.cumulative_tasks_failed += cycle_tasks_failed
# Note: tasks_skipped in scheduler.stats is a running total, not per-cycle
# We track it as-is from scheduler.stats (it's already cumulative)
# This ensures we don't double-count skipped tasks
if cumulative_stats.cumulative_tasks_skipped is None:
cumulative_stats.cumulative_tasks_skipped = 0
# Update to current total from scheduler (which is already cumulative)
current_skipped = scheduler.stats.get('tasks_skipped', 0)
if current_skipped > cumulative_stats.cumulative_tasks_skipped:
cumulative_stats.cumulative_tasks_skipped = current_skipped
cumulative_stats.last_check_cycle_id = event_log_id
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
# Log at DEBUG level to avoid noise during normal operation
# This is expected behavior, not a warning
logger.debug(
f"[Check Cycle] Updated cumulative stats: "
f"cycles={cumulative_stats.total_check_cycles}, "
f"found={cumulative_stats.cumulative_tasks_found}, "
f"executed={cumulative_stats.cumulative_tasks_executed}, "
f"failed={cumulative_stats.cumulative_tasks_failed}"
)
except Exception as e:
logger.error(f"[Check Cycle] ❌ Failed to update cumulative stats: {e}", exc_info=True)
if db:
db.rollback()
# Log warning but continue - cumulative stats can be rebuilt from event logs
logger.warning(
"[Check Cycle] ⚠️ Cumulative stats update failed. "
"Stats can be rebuilt from event logs on next dashboard load."
)
# Update last_update timestamp for frontend polling
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
except Exception as e:
error = DatabaseError(
message=f"Error checking for due tasks: {str(e)}",
original_error=e
)
scheduler.exception_handler.handle_exception(error)
logger.error(f"[Scheduler Check] ❌ Error in check cycle: {str(e)}")
finally:
if db:
db.close()
# Check active strategies for this user (for interval adjustment)
try:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
user_active_strategies = active_strategy_service.count_active_strategies_with_tasks()
total_active_strategies += user_active_strategies
except Exception as e:
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
# Phase 2B: Real-time semantic health monitoring (runs every 24 hours)
# Check if 24 hours have passed since last check
should_run_semantic = False
now = datetime.utcnow()
last_check = LAST_SEMANTIC_CHECKS.get(user_id)
if not last_check or (now - last_check).total_seconds() > 86400: # 24 hours
should_run_semantic = True
if should_run_semantic:
try:
semantic_monitor = RealTimeSemanticMonitor(user_id)
# Use public wrapper method which aggregates metrics
# Note: semantic_monitor instantiation loads heavy models, so we limit frequency to 24h
semantic_health = await semantic_monitor.check_semantic_health(user_id)
logger.info(f"[Semantic Monitor] User {user_id} health check: {semantic_health.status} (score: {semantic_health.value:.2f})")
# Update timestamp only on success/attempt to prevent spamming retries
LAST_SEMANTIC_CHECKS[user_id] = now
except Exception as e:
logger.warning(f"[Semantic Monitor] Error checking semantic health for user {user_id}: {e}")
else:
pass
# Check each registered task type for this user
registered_types = scheduler.registry.get_registered_types()
for task_type in registered_types:
# Pass the user-specific session
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary, user_id=user_id)
if type_summary:
cycle_summary['tasks_found_by_type'][task_type] = cycle_summary['tasks_found_by_type'].get(task_type, 0) + type_summary.get('found', 0)
cycle_summary['tasks_executed_by_type'][task_type] = cycle_summary['tasks_executed_by_type'].get(task_type, 0) + type_summary.get('executed', 0)
cycle_summary['tasks_failed_by_type'][task_type] = cycle_summary['tasks_failed_by_type'].get(task_type, 0) + type_summary.get('failed', 0)
except Exception as e:
logger.error(f"[Scheduler Check] Error processing user {user_id}: {e}")
finally:
db.close()
# Adjust interval based on TOTAL active strategies across all users
# We manually update the stats and check interval, skipping adjust_check_interval_if_needed
# because it's not multi-tenant aware yet.
scheduler.stats['active_strategies_count'] = total_active_strategies
if total_active_strategies > 0:
optimal_interval = scheduler.min_check_interval_minutes
else:
optimal_interval = scheduler.max_check_interval_minutes
if optimal_interval != scheduler.current_check_interval_minutes:
interval_message = (
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
f" ├─ Optimal: {optimal_interval}min\n"
f" ├─ Active Strategies: {total_active_strategies}\n"
f" └─ Reason: {'Active strategies detected' if total_active_strategies > 0 else 'No active strategies'}"
)
logger.warning(interval_message)
# Reschedule the job with new interval
scheduler.scheduler.modify_job(
job_id='check_due_tasks',
trigger=scheduler._get_trigger_for_interval(optimal_interval)
)
scheduler.current_check_interval_minutes = optimal_interval
# Calculate totals
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
# Log comprehensive check cycle summary
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
active_executions = len(scheduler.active_executions)
# Build comprehensive check cycle summary log message
check_lines = [
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
f" ├─ Duration: {check_duration:.2f}s",
f" ├─ Active Strategies: {total_active_strategies}",
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
f" ├─ User Isolation: Enabled (Scanned {len(user_ids)} users)",
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
]
if cycle_summary['tasks_found_by_type']:
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
for idx, (task_type, count) in enumerate(task_types_list):
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
prefix = " └─" if is_last_task_type else " ├─"
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
if cycle_summary['total_found'] > 0:
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
else:
check_lines.append(f" └─ No tasks found - scheduler idle")
# Log comprehensive check cycle summary in single message
logger.warning("\n".join(check_lines))
# Update last_update timestamp for frontend polling
scheduler.stats['last_update'] = datetime.utcnow().isoformat()

View File

@@ -106,6 +106,7 @@ class DatabaseError(SchedulerException):
message: str,
user_id: Optional[int] = None,
task_id: Optional[int] = None,
task_type: Optional[str] = None,
context: Dict[str, Any] = None,
original_error: Exception = None
):
@@ -115,6 +116,7 @@ class DatabaseError(SchedulerException):
severity=SchedulerErrorSeverity.CRITICAL,
user_id=user_id,
task_id=task_id,
task_type=task_type,
context=context or {},
original_error=original_error
)
@@ -180,6 +182,9 @@ class SchedulerConfigError(SchedulerException):
def __init__(
self,
message: str,
user_id: Optional[int] = None,
task_id: Optional[int] = None,
task_type: Optional[str] = None,
context: Dict[str, Any] = None,
original_error: Exception = None
):
@@ -187,6 +192,9 @@ class SchedulerConfigError(SchedulerException):
message=message,
error_type=SchedulerErrorType.SCHEDULER_CONFIG_ERROR,
severity=SchedulerErrorSeverity.CRITICAL,
user_id=user_id,
task_id=task_id,
task_type=task_type,
context=context or {},
original_error=original_error
)

View File

@@ -7,9 +7,8 @@ from typing import TYPE_CHECKING
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_db_session
from services.database import get_all_user_ids, get_session_for_user
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
if TYPE_CHECKING:
from .scheduler import TaskScheduler
@@ -23,7 +22,7 @@ async def determine_optimal_interval(
max_interval: int
) -> int:
"""
Determine optimal check interval based on active strategies.
Determine optimal check interval based on active strategies across all users.
Args:
scheduler: TaskScheduler instance
@@ -33,107 +32,100 @@ async def determine_optimal_interval(
Returns:
Optimal check interval in minutes
"""
db = None
try:
db = get_db_session()
if db:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
scheduler.stats['active_strategies_count'] = active_count
if active_count > 0:
logger.info(f"Found {active_count} active strategies with tasks - using {min_interval}min interval")
return min_interval
else:
logger.info(f"No active strategies with tasks - using {max_interval}min interval")
return max_interval
except Exception as e:
logger.warning(f"Error determining optimal interval: {e}, using default {min_interval}min")
finally:
if db:
db.close()
total_active_count = 0
user_ids = get_all_user_ids()
# Default to shorter interval on error (safer)
return min_interval
for user_id in user_ids:
db = None
try:
db = get_session_for_user(user_id)
if db:
try:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
total_active_count += user_active_count
# Optimization: If we found at least one active strategy, we can stop and return min_interval
# (unless we want accurate stats)
# For stats accuracy, we should continue.
except Exception as e:
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
except Exception as e:
logger.warning(f"Error checking user {user_id} for strategies: {e}")
finally:
if db:
db.close()
scheduler.stats['active_strategies_count'] = total_active_count
if total_active_count > 0:
logger.info(f"Found {total_active_count} active strategies across users - using {min_interval}min interval")
return min_interval
else:
logger.info(f"No active strategies found - using {max_interval}min interval")
return max_interval
async def adjust_check_interval_if_needed(
scheduler: 'TaskScheduler',
db: Session
db: Session = None # Deprecated parameter, ignored
):
"""
Intelligently adjust check interval based on active strategies.
Intelligently adjust check interval based on active strategies across all users.
If there are active strategies with tasks, check more frequently.
If there are no active strategies, check less frequently.
Args:
scheduler: TaskScheduler instance
db: Database session
db: Deprecated/Ignored
"""
try:
from services.active_strategy_service import ActiveStrategyService
total_active_count = 0
user_ids = get_all_user_ids()
for user_id in user_ids:
user_db = None
try:
user_db = get_session_for_user(user_id)
if user_db:
try:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=user_db)
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
total_active_count += user_active_count
except Exception as e:
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
except Exception as e:
logger.warning(f"Error checking user {user_id} for strategies: {e}")
finally:
if user_db:
user_db.close()
scheduler.stats['active_strategies_count'] = total_active_count
# Determine optimal interval
if total_active_count > 0:
optimal_interval = scheduler.min_check_interval_minutes
else:
optimal_interval = scheduler.max_check_interval_minutes
# Only reschedule if interval needs to change
if optimal_interval != scheduler.current_check_interval_minutes:
interval_message = (
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
f" ├─ Optimal: {optimal_interval}min\n"
f" ├─ Active Strategies: {total_active_count}\n"
f" └─ Reason: {'Active strategies detected' if total_active_count > 0 else 'No active strategies'}"
)
logger.warning(interval_message)
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
scheduler.stats['active_strategies_count'] = active_count
# Determine optimal interval
if active_count > 0:
optimal_interval = scheduler.min_check_interval_minutes
else:
optimal_interval = scheduler.max_check_interval_minutes
# Only reschedule if interval needs to change
if optimal_interval != scheduler.current_check_interval_minutes:
interval_message = (
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
f" ├─ Optimal: {optimal_interval}min\n"
f" ├─ Active Strategies: {active_count}\n"
f" └─ Reason: {'Active strategies detected' if active_count > 0 else 'No active strategies'}"
)
logger.warning(interval_message)
# Reschedule the job with new interval
scheduler.scheduler.modify_job(
'check_due_tasks',
trigger=scheduler._get_trigger_for_interval(optimal_interval)
)
# Save previous interval before updating
previous_interval = scheduler.current_check_interval_minutes
# Update current interval
scheduler.current_check_interval_minutes = optimal_interval
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
# Save interval adjustment event to database
try:
event_db = get_db_session()
if event_db:
event_log = SchedulerEventLog(
event_type='interval_adjustment',
event_date=datetime.utcnow(),
previous_interval_minutes=previous_interval,
new_interval_minutes=optimal_interval,
check_interval_minutes=optimal_interval,
active_strategies_count=active_count,
event_data={
'reason': 'intelligent_scheduling',
'min_interval': scheduler.min_check_interval_minutes,
'max_interval': scheduler.max_check_interval_minutes
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.warning(f"Failed to save interval adjustment event log: {e}")
logger.warning(f"[Scheduler] ✅ Interval adjusted to {optimal_interval}min")
except Exception as e:
logger.warning(f"Error adjusting check interval: {e}")
# Reschedule the job with new interval
scheduler.scheduler.modify_job(
job_id='check_due_tasks', # Fixed job_id from check_cycle to check_due_tasks to match scheduler.py
trigger=scheduler._get_trigger_for_interval(optimal_interval)
)
scheduler.current_check_interval_minutes = optimal_interval
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()

View File

@@ -7,7 +7,7 @@ Preserves original scheduled times from database to avoid rescheduling on server
from typing import TYPE_CHECKING
from datetime import datetime, timezone, timedelta
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from services.database import get_db_session, get_all_user_ids, get_session_for_user
from models.scheduler_models import SchedulerEventLog
if TYPE_CHECKING:
@@ -28,35 +28,39 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
scheduler: TaskScheduler instance
"""
try:
db = get_db_session()
if not db:
logger.warning("Could not get database session to restore persona jobs")
return
user_ids = get_all_user_ids()
logger.info(f"[Restoration] Found {len(user_ids)} users to check for persona jobs")
try:
from models.onboarding import OnboardingSession
from services.research.research_persona_scheduler import (
schedule_research_persona_generation,
generate_research_persona_task
)
from services.persona.facebook.facebook_persona_scheduler import (
schedule_facebook_persona_generation,
generate_facebook_persona_task
)
from services.research.research_persona_service import ResearchPersonaService
from services.persona_data_service import PersonaDataService
for user_id in user_ids:
db = get_session_for_user(user_id)
if not db:
logger.warning(f"Could not get database session for user {user_id}")
continue
# Get all users who completed onboarding
completed_sessions = db.query(OnboardingSession).filter(
OnboardingSession.progress == 100.0
).all()
restored_count = 0
skipped_count = 0
now = datetime.utcnow().replace(tzinfo=timezone.utc)
for session in completed_sessions:
user_id = session.user_id
try:
from models.onboarding import OnboardingSession
from services.research.research_persona_scheduler import (
schedule_research_persona_generation,
generate_research_persona_task
)
from services.persona.facebook.facebook_persona_scheduler import (
schedule_facebook_persona_generation,
generate_facebook_persona_task
)
from services.research.research_persona_service import ResearchPersonaService
from services.persona_data_service import PersonaDataService
# Check if user completed onboarding
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session or session.progress < 100.0:
continue
restored_count = 0
skipped_count = 0
now = datetime.utcnow().replace(tzinfo=timezone.utc)
# Restore research persona job
try:
@@ -69,7 +73,7 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
research_persona_exists = bool(research_persona_data)
if not research_persona_exists:
# Note: Clerk user_id already includes "user_" prefix
# Note: Clerk user_id already includes "user_" prefix if applicable, or we use the string as is
job_id = f"research_persona_{user_id}"
# Check if job already exists in scheduler (just started, so unlikely)
@@ -256,13 +260,13 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
except Exception as e:
logger.debug(f"Could not restore Facebook persona for user {user_id}: {e}")
if restored_count > 0:
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) on startup (preserved original scheduled times)")
if skipped_count > 0:
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) (already completed/failed or exist)")
finally:
db.close()
if restored_count > 0:
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) for user {user_id}")
if skipped_count > 0:
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) for user {user_id}")
finally:
db.close()
except Exception as e:
logger.warning(f"Error restoring persona jobs: {e}")

View File

@@ -9,7 +9,7 @@ from typing import List
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from services.database import get_session_for_user, get_all_user_ids
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
from services.oauth_token_monitoring_service import get_connected_platforms, create_oauth_monitoring_tasks
@@ -31,98 +31,41 @@ async def restore_oauth_monitoring_tasks(scheduler):
"""
try:
logger.warning("[OAuth Task Restoration] Starting OAuth monitoring task restoration...")
db = get_db_session()
if not db:
logger.warning("[OAuth Task Restoration] Could not get database session")
return
try:
# Get all existing OAuth tasks to find unique user_ids
existing_tasks = db.query(OAuthTokenMonitoringTask).all()
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Log existing tasks breakdown by platform
existing_by_platform = {}
for task in existing_tasks:
existing_by_platform[task.platform] = existing_by_platform.get(task.platform, 0) + 1
platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(existing_by_platform.items())])
logger.warning(
f"[OAuth Task Restoration] Found {len(existing_tasks)} existing OAuth tasks "
f"for {len(user_ids_with_tasks)} users. Platforms: {platform_summary}"
)
# Check users who already have at least one OAuth task
users_to_check = list(user_ids_with_tasks)
# Also query all users from onboarding who completed step 5 (integrations)
# to catch users who connected platforms but tasks weren't created
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
# This matches the logic used in home page redirect and persona generation checks
user_ids = get_all_user_ids()
total_created = 0
users_processed = 0
total_existing_tasks = 0
restoration_summary = []
for user_id in user_ids:
try:
from services.onboarding.progress_service import get_onboarding_progress_service
from models.onboarding import OnboardingSession
from sqlalchemy import or_
db = get_session_for_user(user_id)
if not db:
logger.debug(f"[OAuth Task Restoration] Could not get database session for user {user_id}")
continue
# Get onboarding progress service (same as used throughout the app)
progress_service = get_onboarding_progress_service()
# Query all sessions and filter using the same completion logic as the service
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
completed_sessions = db.query(OnboardingSession).filter(
or_(
OnboardingSession.current_step >= 6,
OnboardingSession.progress >= 100.0
)
).all()
# Validate using the service method for consistency
onboarding_user_ids = set()
for session in completed_sessions:
# Use the same service method as the rest of the app
status = progress_service.get_onboarding_status(session.user_id)
if status.get('is_completed', False):
onboarding_user_ids.add(session.user_id)
all_user_ids = users_to_check.copy()
# Add users from onboarding who might not have tasks yet
for user_id in onboarding_user_ids:
if user_id not in all_user_ids:
all_user_ids.append(user_id)
users_to_check = all_user_ids
logger.warning(
f"[OAuth Task Restoration] Checking {len(users_to_check)} users "
f"({len(user_ids_with_tasks)} with existing tasks, "
f"{len(onboarding_user_ids)} from onboarding sessions, "
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
)
except Exception as e:
logger.warning(f"[OAuth Task Restoration] Could not query onboarding users: {e}")
# Fallback to users with existing tasks only
total_created = 0
restoration_summary = [] # Collect summary for single log
for user_id in users_to_check:
try:
users_processed += 1
# Get existing tasks for this user
try:
existing_tasks = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.user_id == user_id
).all()
total_existing_tasks += len(existing_tasks)
except Exception as table_error:
# Table might not exist for this user yet
continue
# Get connected platforms for this user (silent - no logging)
connected_platforms = get_connected_platforms(user_id)
if not connected_platforms:
logger.debug(
f"[OAuth Task Restoration] No connected platforms for user {user_id[:20]}..., skipping"
)
continue
# Check which platforms are missing tasks
existing_platforms = {
task.platform
for task in existing_tasks
if task.user_id == user_id
}
existing_platforms = {task.platform for task in existing_tasks}
missing_platforms = [
platform
@@ -138,53 +81,44 @@ async def restore_oauth_monitoring_tasks(scheduler):
platforms=missing_platforms
)
total_created += len(created)
# Collect summary info instead of logging immediately
platforms_str = ", ".join([p.upper() for p in missing_platforms])
restoration_summary.append(
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
)
if created:
total_created += len(created)
platforms_str = ", ".join([p.upper() for p in missing_platforms])
restoration_summary.append(
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
)
except Exception as e:
logger.warning(
f"[OAuth Task Restoration] Error checking/creating tasks for user {user_id}: {e}",
exc_info=True
)
continue
finally:
db.close()
except Exception as e:
logger.warning(f"[OAuth Task Restoration] Error processing user {user_id}: {e}")
continue
# Log summary
if total_created > 0:
summary_lines = "\n".join(restoration_summary[:5])
if len(restoration_summary) > 5:
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
# Final summary log with platform breakdown
final_existing_tasks = db.query(OAuthTokenMonitoringTask).all()
final_by_platform = {}
for task in final_existing_tasks:
final_by_platform[task.platform] = final_by_platform.get(task.platform, 0) + 1
logger.warning(
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
f" ├─ Users Processed: {users_processed}\n"
f" ├─ Existing Tasks: {total_existing_tasks}\n"
f" ├─ New Tasks Created: {total_created}\n"
+ summary_lines
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
f"Processed {users_processed} users."
)
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
# Single formatted summary log (similar to scheduler startup)
if total_created > 0:
summary_lines = "\n".join(restoration_summary[:5]) # Show first 5 users
if len(restoration_summary) > 5:
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
logger.warning(
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
f" ├─ Tasks Created: {total_created}\n"
f" ├─ Users Processed: {len(users_to_check)}\n"
f" ├─ Platform Breakdown: {final_platform_summary}\n"
+ summary_lines
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
f"Checked {len(users_to_check)} users. Platform breakdown: {final_platform_summary}"
)
finally:
db.close()
return total_existing_tasks + total_created
except Exception as e:
logger.error(
f"[OAuth Task Restoration] Error restoring OAuth monitoring tasks: {e}",
exc_info=True
)
return 0

View File

@@ -9,7 +9,7 @@ from typing import List
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from services.database import get_session_for_user, get_all_user_ids
from models.platform_insights_monitoring_models import PlatformInsightsTask
from services.platform_insights_monitoring_service import create_platform_insights_task
from services.oauth_token_monitoring_service import get_connected_platforms
@@ -32,44 +32,36 @@ async def restore_platform_insights_tasks(scheduler):
"""
try:
logger.warning("[Platform Insights Restoration] Starting platform insights task restoration...")
db = get_db_session()
if not db:
logger.warning("[Platform Insights Restoration] Could not get database session")
return
try:
# Get all existing insights tasks to find unique user_ids
existing_tasks = db.query(PlatformInsightsTask).all()
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Get all OAuth tasks to find users with connected platforms
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
user_ids_with_oauth = set(task.user_id for task in oauth_tasks)
# Platforms that support insights (GSC and Bing only)
insights_platforms = ['gsc', 'bing']
# Get users who have OAuth tasks for GSC or Bing
users_to_check = set()
for task in oauth_tasks:
if task.platform in insights_platforms:
users_to_check.add(task.user_id)
logger.warning(
f"[Platform Insights Restoration] Found {len(existing_tasks)} existing insights tasks "
f"for {len(user_ids_with_tasks)} users. Checking {len(users_to_check)} users "
f"with GSC/Bing OAuth connections."
)
if not users_to_check:
logger.warning("[Platform Insights Restoration] No users with GSC/Bing connections found")
return
total_created = 0
restoration_summary = []
for user_id in users_to_check:
user_ids = get_all_user_ids()
total_created = 0
users_processed = 0
total_existing_tasks = 0
restoration_summary = []
# Platforms that support insights (GSC and Bing only)
insights_platforms = ['gsc', 'bing']
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if not db:
logger.debug(f"[Platform Insights Restoration] Could not get database session for user {user_id}")
continue
try:
users_processed += 1
# Get existing insights tasks
try:
existing_tasks = db.query(PlatformInsightsTask).filter(
PlatformInsightsTask.user_id == user_id
).all()
total_existing_tasks += len(existing_tasks)
except Exception as table_error:
# Table might not exist
continue
# Get connected platforms for this user
connected_platforms = get_connected_platforms(user_id)
@@ -77,17 +69,10 @@ async def restore_platform_insights_tasks(scheduler):
insights_connected = [p for p in connected_platforms if p in insights_platforms]
if not insights_connected:
logger.debug(
f"[Platform Insights Restoration] No GSC/Bing connections for user {user_id[:20]}..., skipping"
)
continue
# Check which platforms are missing insights tasks
existing_platforms = {
task.platform
for task in existing_tasks
if task.user_id == user_id
}
existing_platforms = {task.platform for task in existing_tasks}
missing_platforms = [
platform
@@ -101,11 +86,10 @@ async def restore_platform_insights_tasks(scheduler):
try:
# Don't fetch site_url here - it requires API calls
# The executor will fetch it when the task runs (weekly)
# This avoids API calls during restoration
result = create_platform_insights_task(
user_id=user_id,
platform=platform,
site_url=None, # Will be fetched by executor when task runs
site_url=None,
db=db
)
@@ -125,28 +109,28 @@ async def restore_platform_insights_tasks(scheduler):
f"for user {user_id}: {e}"
)
continue
finally:
db.close()
except Exception as e:
logger.debug(
f"[Platform Insights Restoration] Error processing user {user_id}: {e}"
)
continue
except Exception as e:
logger.warning(f"[Platform Insights Restoration] Error processing user {user_id}: {e}")
continue
# Log summary
if total_created > 0:
logger.warning(
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
"\n".join(restoration_summary)
)
else:
logger.warning(
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
f"Processed {users_processed} users."
)
# Log summary
if total_created > 0:
logger.warning(
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
"\n".join(restoration_summary)
)
else:
logger.warning(
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks."
)
finally:
db.close()
return total_existing_tasks + total_created
except Exception as e:
logger.error(f"[Platform Insights Restoration] Error during restoration: {e}", exc_info=True)
return 0

View File

@@ -19,7 +19,7 @@ from .exception_handler import (
SchedulerExceptionHandler, SchedulerException, TaskExecutionError, DatabaseError,
TaskLoaderError, SchedulerConfigError
)
from services.database import get_db_session
from services.database import get_all_user_ids, get_session_for_user
from utils.logger_utils import get_service_logger
from ..utils.user_job_store import get_user_job_store_name
from models.scheduler_models import SchedulerEventLog
@@ -28,6 +28,7 @@ from .job_restoration import restore_persona_jobs
from .oauth_task_restoration import restore_oauth_monitoring_tasks
from .website_analysis_task_restoration import restore_website_analysis_tasks
from .platform_insights_task_restoration import restore_platform_insights_tasks
from .advertools_task_restoration import restore_advertools_tasks
from .check_cycle_handler import check_and_execute_due_tasks
from .task_execution_handler import execute_task_async
@@ -185,13 +186,17 @@ class TaskScheduler:
await restore_persona_jobs(self)
# Restore/create missing OAuth token monitoring tasks for connected platforms
await restore_oauth_monitoring_tasks(self)
total_oauth_tasks = await restore_oauth_monitoring_tasks(self)
oauth_tasks_count = total_oauth_tasks
# Restore/create missing website analysis tasks for users who completed onboarding
await restore_website_analysis_tasks(self)
website_analysis_tasks_count = await restore_website_analysis_tasks(self)
# Restore/create missing platform insights tasks for users with connected GSC/Bing
await restore_platform_insights_tasks(self)
platform_insights_tasks_count = await restore_platform_insights_tasks(self)
# Restore/create missing Advertools intelligence tasks
advertools_tasks_count = await restore_advertools_tasks(self)
# Validate and rebuild cumulative stats if needed
await self._validate_and_rebuild_cumulative_stats()
@@ -203,99 +208,47 @@ class TaskScheduler:
# Count OAuth token monitoring tasks from database (recurring weekly tasks)
oauth_tasks_count = 0
oauth_tasks_details = []
try:
db = get_db_session()
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Count active tasks
oauth_tasks_count = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.status == 'active'
).count()
# Get all tasks (for detailed logging)
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
total_oauth_tasks = len(all_oauth_tasks)
# Show platform breakdown for ALL tasks (active and inactive)
all_platforms = {}
active_platforms = {}
for task in all_oauth_tasks:
all_platforms[task.platform] = all_platforms.get(task.platform, 0) + 1
if task.status == 'active':
active_platforms[task.platform] = active_platforms.get(task.platform, 0) + 1
if total_oauth_tasks > 0:
# Log details about all tasks (not just active)
for task in all_oauth_tasks:
oauth_tasks_details.append(
f"user={task.user_id}, platform={task.platform}, status={task.status}"
)
if total_oauth_tasks > 0 and oauth_tasks_count == 0:
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
logger.warning(
f"[Scheduler] Found {total_oauth_tasks} OAuth monitoring tasks in database, "
f"but {oauth_tasks_count} are active. "
f"All platforms: {all_platform_summary}. "
f"Task details: {', '.join(oauth_tasks_details[:5])}" # Limit to first 5 for readability
)
elif oauth_tasks_count > 0:
# Show platform breakdown for active tasks
active_platform_summary = ", ".join([f"{platform}: {count}" for platform, count in sorted(active_platforms.items())])
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
# Check for missing platforms (expected: gsc, bing, wordpress, wix)
expected_platforms = ['gsc', 'bing', 'wordpress', 'wix']
missing_in_db = [p for p in expected_platforms if p not in all_platforms]
if missing_in_db:
logger.warning(
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
f"All platforms: {all_platform_summary}. "
f"⚠️ Missing platforms (not connected or no tasks): {', '.join(missing_in_db)}"
)
else:
logger.warning(
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
f"All platforms: {all_platform_summary}"
)
db.close()
except Exception as e:
logger.warning(
f"[Scheduler] Could not get OAuth token monitoring tasks count: {e}. "
f"This may indicate the oauth_token_monitoring_tasks table doesn't exist yet or "
f"tasks haven't been created. Error type: {type(e).__name__}"
)
# Get website analysis tasks count
website_analysis_tasks_count = 0
try:
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks_count = db.query(WebsiteAnalysisTask).filter(
WebsiteAnalysisTask.status == 'active'
).count()
except Exception as e:
logger.debug(f"Could not get website analysis tasks count: {e}")
# Get platform insights tasks count
platform_insights_tasks_count = 0
try:
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks_count = db.query(PlatformInsightsTask).filter(
PlatformInsightsTask.status == 'active'
).count()
except Exception as e:
logger.debug(f"Could not get platform insights tasks count: {e}")
advertools_tasks_count = 0
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if not db:
continue
try:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
oauth_tasks_count += db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.status == 'active'
).count()
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks_count += db.query(WebsiteAnalysisTask).filter(
WebsiteAnalysisTask.status == 'active'
).count()
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks_count += db.query(PlatformInsightsTask).filter(
PlatformInsightsTask.status == 'active'
).count()
from models.advertools_monitoring_models import AdvertoolsTask
advertools_tasks_count += db.query(AdvertoolsTask).filter(
AdvertoolsTask.status == 'active'
).count()
finally:
db.close()
except Exception as e:
logger.debug(f"Error counting tasks for user {user_id}: {e}")
# Calculate job counts
apscheduler_recurring = 1 # check_due_tasks
apscheduler_one_time = len(all_jobs) - 1
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count + advertools_tasks_count
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count + advertools_tasks_count
# Build comprehensive startup log message
recurring_breakdown = f"check_due_tasks: {apscheduler_recurring}"
@@ -305,6 +258,8 @@ class TaskScheduler:
recurring_breakdown += f", Website analysis: {website_analysis_tasks_count}"
if platform_insights_tasks_count > 0:
recurring_breakdown += f", Platform insights: {platform_insights_tasks_count}"
if advertools_tasks_count > 0:
recurring_breakdown += f", Advertools: {advertools_tasks_count}"
startup_lines = [
f"[Scheduler] ✅ Task Scheduler Started",
@@ -347,7 +302,7 @@ class TaskScheduler:
if user_id_from_job:
try:
db = get_db_session()
db = get_session_for_user(user_id_from_job)
if db:
user_job_store = get_user_job_store_name(user_id_from_job, db)
if user_job_store == 'default':
@@ -357,6 +312,8 @@ class TaskScheduler:
)
user_context = f" | User: {user_id_from_job} | Store: {user_job_store}"
db.close()
else:
user_context = f" | User: {user_id_from_job} | DB: Not Found"
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {user_id_from_job}: {e}. "
@@ -370,134 +327,172 @@ class TaskScheduler:
# Show ALL OAuth tasks (active and inactive) for complete visibility
if total_oauth_tasks > 0:
try:
db = get_db_session()
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Get ALL tasks, not just active ones
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
for idx, task in enumerate(oauth_tasks):
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
if user_job_store == 'default':
logger.debug(
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
f"This may indicate no onboarding data or website URL not found."
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Get ALL tasks for this user
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
for idx, task in enumerate(oauth_tasks):
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and user_id == user_ids[-1]
prefix = " ├─" # Simplified prefix logic for multi-user list
try:
user_job_store = get_user_job_store_name(task.user_id, db)
if user_job_store == 'default':
logger.debug(
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
f"This may indicate no onboarding data or website URL not found."
)
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
f"Using 'default'. Error type: {type(e).__name__}"
)
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
# Include status in the log line for visibility
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
)
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
f"Using 'default'. Error type: {type(e).__name__}"
)
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
# Include status in the log line for visibility
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
)
db.close()
db.close()
except Exception as e:
logger.warning(f"Error checking OAuth tasks for user {user_id}: {e}")
except Exception as e:
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
# Add website analysis tasks details
if website_analysis_tasks_count > 0:
try:
db = get_db_session()
if db:
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
for idx, task in enumerate(website_analysis_tasks):
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
frequency = f"Every {task.frequency_days} days"
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
startup_lines.append(
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
)
db.close()
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if db:
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
for idx, task in enumerate(website_analysis_tasks):
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0 and user_id == user_ids[-1]
prefix = " ├─" # Simplified
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
frequency = f"Every {task.frequency_days} days"
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
startup_lines.append(
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
)
db.close()
except Exception as e:
logger.warning(f"Error checking website analysis tasks for user {user_id}: {e}")
except Exception as e:
logger.debug(f"Could not get website analysis task details: {e}")
# Add platform insights tasks details
if platform_insights_tasks_count > 0:
try:
db = get_db_session()
if db:
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks = db.query(PlatformInsightsTask).all()
for idx, task in enumerate(platform_insights_tasks):
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
platform_label = task.platform.upper() if task.platform else 'Unknown'
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
)
db.close()
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if db:
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks = db.query(PlatformInsightsTask).all()
for idx, task in enumerate(platform_insights_tasks):
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0 and user_id == user_ids[-1]
prefix = " ├─" # Simplified
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
platform_label = task.platform.upper() if task.platform else 'Unknown'
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
)
db.close()
except Exception as e:
logger.warning(f"Error checking platform insights tasks for user {user_id}: {e}")
except Exception as e:
logger.debug(f"Could not get platform insights task details: {e}")
# Add Advertools tasks details
if advertools_tasks_count > 0:
try:
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if db:
from models.advertools_monitoring_models import AdvertoolsTask
advertools_tasks = db.query(AdvertoolsTask).all()
for idx, task in enumerate(advertools_tasks):
is_last = idx == len(advertools_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and user_id == user_ids[-1]
prefix = " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_execution.isoformat() if task.next_execution else 'Not scheduled'
task_type = task.payload.get('type') if task.payload else 'unknown'
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: advertools_{task_type}_{task.user_id}_{task.id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type} {status_indicator}"
)
db.close()
except Exception as e:
logger.warning(f"Error checking Advertools tasks for user {user_id}: {e}")
except Exception as e:
logger.debug(f"Could not get Advertools task details: {e}")
# Log comprehensive startup information in single message
logger.warning("\n".join(startup_lines))
# Save scheduler start event to database
try:
db = get_db_session()
if db:
event_log = SchedulerEventLog(
event_type='start',
event_date=datetime.utcnow(),
check_interval_minutes=initial_interval,
active_strategies_count=active_strategies,
event_data={
'registered_types': registered_types,
'total_jobs': total_jobs,
'recurring_jobs': total_recurring,
'one_time_jobs': apscheduler_one_time,
'oauth_monitoring_tasks': oauth_tasks_count,
'website_analysis_tasks': website_analysis_tasks_count,
'platform_insights_tasks': platform_insights_tasks_count
}
)
db.add(event_log)
db.commit()
db.close()
except Exception as e:
logger.warning(f"Failed to save scheduler start event log: {e}")
# Disabled in multi-tenant mode as there is no global DB
# try:
# db = get_db_session()
# if db:
# event_log = SchedulerEventLog(...)
# db.add(event_log)
# db.commit()
# db.close()
# except Exception as e:
# logger.warning(f"Failed to save scheduler start event log: {e}")
except Exception as e:
logger.error(f"Failed to start scheduler: {e}")
@@ -544,25 +539,26 @@ class TaskScheduler:
logger.warning(shutdown_message)
# Save scheduler stop event to database
try:
db = get_db_session()
if db:
event_log = SchedulerEventLog(
event_type='stop',
event_date=datetime.utcnow(),
check_interval_minutes=self.current_check_interval_minutes,
event_data={
'total_checks': total_checks,
'total_executed': total_executed,
'total_failed': total_failed,
'jobs_cancelled': len(all_jobs_before)
}
)
db.add(event_log)
db.commit()
db.close()
except Exception as e:
logger.warning(f"Failed to save scheduler stop event log: {e}")
# Disabled in multi-tenant mode as there is no global DB
# try:
# db = get_db_session()
# if db:
# event_log = SchedulerEventLog(
# event_type='stop',
# event_date=datetime.utcnow(),
# check_interval_minutes=self.current_check_interval_minutes,
# event_data={
# 'total_checks': total_checks,
# 'total_executed': total_executed,
# 'total_failed': total_failed,
# 'jobs_cancelled': len(all_jobs_before)
# }
# )
# db.add(event_log)
# db.commit()
# db.close()
# except Exception as e:
# logger.warning(f"Failed to save scheduler stop event log: {e}")
except Exception as e:
logger.error(f"Error stopping scheduler: {e}")
@@ -630,12 +626,8 @@ class TaskScheduler:
return
try:
db = get_db_session()
if db:
await adjust_check_interval_if_needed(self, db)
db.close()
else:
logger.warning("Could not get database session for interval adjustment")
# Multi-tenant aware adjustment (iterates all users internally)
await adjust_check_interval_if_needed(self)
except Exception as e:
logger.warning(f"Error triggering interval adjustment: {e}")
@@ -643,125 +635,14 @@ class TaskScheduler:
"""
Validate cumulative stats on scheduler startup and rebuild if needed.
This ensures cumulative stats are accurate after restarts.
NOTE: Disabled in multi-tenant mode as there is no global database for cumulative stats.
TODO: Implement per-user cumulative stats or a global admin database.
"""
db = None
try:
db = get_db_session()
if not db:
logger.warning("[Scheduler] Could not get database session for cumulative stats validation")
return
try:
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
from models.scheduler_models import SchedulerEventLog
from sqlalchemy import func
# Get cumulative stats from persistent table
cumulative_stats = db.query(SchedulerCumulativeStats).filter(
SchedulerCumulativeStats.id == 1
).first()
# Count check_cycle events in database
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'check_cycle'
).scalar() or 0
if cumulative_stats:
# Validate: cumulative stats should match event log count
if cumulative_stats.total_check_cycles != check_cycle_count:
logger.warning(
f"[Scheduler] ⚠️ Cumulative stats validation failed on startup: "
f"cumulative_stats.total_check_cycles={cumulative_stats.total_check_cycles} "
f"vs event_logs.count={check_cycle_count}. "
f"Rebuilding cumulative stats from event logs..."
)
# Rebuild from event logs
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if result:
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
# Update cumulative stats
cumulative_stats.total_check_cycles = int(total_cycles)
cumulative_stats.cumulative_tasks_found = int(total_found)
cumulative_stats.cumulative_tasks_executed = int(total_executed)
cumulative_stats.cumulative_tasks_failed = int(total_failed)
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
logger.warning(
f"[Scheduler] ✅ Rebuilt cumulative stats on startup: "
f"cycles={total_cycles}, found={total_found}, "
f"executed={total_executed}, failed={total_failed}"
)
else:
logger.warning("[Scheduler] No check_cycle events found to rebuild from")
else:
logger.warning(
f"[Scheduler] ✅ Cumulative stats validated: "
f"{cumulative_stats.total_check_cycles} check cycles match event logs"
)
else:
# Cumulative stats table doesn't exist, create it from event logs
logger.warning(
"[Scheduler] Cumulative stats table not found. "
"Creating from event logs..."
)
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if result:
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
cumulative_stats.total_check_cycles = int(total_cycles)
cumulative_stats.cumulative_tasks_found = int(total_found)
cumulative_stats.cumulative_tasks_executed = int(total_executed)
cumulative_stats.cumulative_tasks_failed = int(total_failed)
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
logger.warning(
f"[Scheduler] ✅ Created cumulative stats from event logs: "
f"cycles={total_cycles}, found={total_found}, "
f"executed={total_executed}, failed={total_failed}"
)
except ImportError:
logger.warning(
"[Scheduler] Cumulative stats model not available. "
"Migration may not have been run yet. "
"Run: python backend/scripts/run_cumulative_stats_migration.py"
)
except Exception as e:
logger.error(f"[Scheduler] Error validating cumulative stats: {e}", exc_info=True)
finally:
if db:
db.close()
logger.info("[Scheduler] Cumulative stats validation skipped (multi-tenant mode)")
return
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None, user_id: str = None) -> Optional[Dict[str, Any]]:
"""
Process due tasks for a specific task type.
@@ -816,7 +697,7 @@ class TaskScheduler:
# Execute task asynchronously
# Note: Each task gets its own database session to prevent concurrent access issues
execution_task = asyncio.create_task(
execute_task_async(self, task_type, task, summary)
execute_task_async(self, task_type, task, summary, user_id=user_id)
)
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
@@ -970,7 +851,7 @@ class TaskScheduler:
job_store_name = 'default'
if user_id:
try:
db = get_db_session()
db = get_session_for_user(user_id)
if db:
job_store_name = get_user_job_store_name(user_id, db)
db.close()
@@ -996,27 +877,28 @@ class TaskScheduler:
logger.warning(log_message)
# Log job scheduling to event log for dashboard
try:
event_db = get_db_session()
if event_db:
event_log = SchedulerEventLog(
event_type='job_scheduled',
event_date=datetime.utcnow(),
job_id=job_id,
job_type='one_time',
user_id=user_id,
event_data={
'function_name': func_name,
'job_store': job_store_name,
'scheduled_for': run_date.isoformat(),
'replace_existing': replace_existing
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.debug(f"Failed to log job scheduling event: {e}")
if user_id:
try:
event_db = get_session_for_user(user_id)
if event_db:
event_log = SchedulerEventLog(
event_type='job_scheduled',
event_date=datetime.utcnow(),
job_id=job_id,
job_type='one_time',
user_id=user_id,
event_data={
'function_name': func_name,
'job_store': job_store_name,
'scheduled_for': run_date.isoformat(),
'replace_existing': replace_existing
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.debug(f"Failed to log job scheduling event: {e}")
return job_id
except Exception as e:
@@ -1027,3 +909,14 @@ class TaskScheduler:
"""Check if scheduler is running."""
return self._running
async def execute_task_by_type(self, task_type: str, user_id: str, payload: Dict[str, Any]):
"""
Execute a task by type and payload immediately.
Used for one-time tasks triggered by system events.
"""
from collections import namedtuple
TaskStub = namedtuple('TaskStub', ['user_id', 'payload', 'id'])
task_stub = TaskStub(user_id=user_id, payload=payload, id=f"manual_{datetime.utcnow().timestamp()}")
await execute_task_async(self, task_type, task_stub, execution_source="manual")

View File

@@ -23,7 +23,8 @@ async def execute_task_async(
task_type: str,
task: Any,
summary: Optional[Dict[str, Any]] = None,
execution_source: str = "scheduler" # "scheduler" or "manual"
execution_source: str = "scheduler", # "scheduler" or "manual"
user_id: Optional[str] = None
):
"""
Execute a single task asynchronously with user isolation.
@@ -38,21 +39,25 @@ async def execute_task_async(
task_type: Type of task
task: Task instance from database (detached from original session)
summary: Optional summary dict to update with execution results
user_id: Optional user ID for user isolation (overrides extraction from task)
"""
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
db = None
user_id = None
try:
# Extract user context if available (for user isolation tracking)
try:
if hasattr(task, 'strategy') and task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id') and task.strategy_id:
# Will query user_id after we have db session
pass
except Exception as e:
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
if user_id is None:
try:
if hasattr(task, 'strategy') and task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id') and task.strategy_id:
# Will query user_id after we have db session
pass
elif hasattr(task, 'user_id') and task.user_id:
# Direct user_id on task object
user_id = task.user_id
except Exception as e:
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
# Log task execution start (detailed for important tasks)
task_db_id = getattr(task, 'id', None)
@@ -61,7 +66,7 @@ async def execute_task_async(
# Create a new database session for this async task
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
db = get_db_session()
db = get_db_session(user_id)
if db is None:
error = DatabaseError(
message=f"Failed to get database session for task {task_id}",
@@ -79,7 +84,15 @@ async def execute_task_async(
# Merge the detached task object into this session
# The task object was loaded in a different session and is now detached
if object_session(task) is None:
from sqlalchemy.inspection import inspect
is_model = False
try:
inspect(task)
is_model = True
except:
pass
if is_model and object_session(task) is None:
# Task is detached, need to merge it into this session
task = db.merge(task)

View File

@@ -4,15 +4,13 @@ Automatically creates missing website analysis tasks for users who completed onb
but don't have monitoring tasks created yet.
"""
from typing import List
from sqlalchemy.orm import Session
from datetime import datetime, timedelta, timezone
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from services.database import get_all_user_ids, get_session_for_user
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
from services.website_analysis_monitoring_service import create_website_analysis_tasks
from services.website_analysis_monitoring_service import generate_website_analysis_tasks_task
from models.onboarding import OnboardingSession
from sqlalchemy import or_
# Use service logger for consistent logging (WARNING level visible in production)
logger = get_service_logger("website_analysis_restoration")
@@ -32,162 +30,103 @@ async def restore_website_analysis_tasks(scheduler):
"""
try:
logger.warning("[Website Analysis Restoration] Starting website analysis task restoration...")
db = get_db_session()
if not db:
logger.warning("[Website Analysis Restoration] Could not get database session")
return
try:
# Check if table exists (may not exist if migration hasn't run)
user_ids = get_all_user_ids()
total_created = 0
users_processed = 0
total_existing_tasks = 0
for user_id in user_ids:
try:
existing_tasks = db.query(WebsiteAnalysisTask).all()
except Exception as table_error:
logger.error(
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist: {table_error}. "
f"Please run database migration: create_website_analysis_monitoring_tables.sql"
)
return
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Log existing tasks breakdown by type
existing_by_type = {}
for task in existing_tasks:
existing_by_type[task.task_type] = existing_by_type.get(task.task_type, 0) + 1
type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(existing_by_type.items())])
logger.warning(
f"[Website Analysis Restoration] Found {len(existing_tasks)} existing website analysis tasks "
f"for {len(user_ids_with_tasks)} users. Types: {type_summary}"
)
# Check users who already have at least one website analysis task
users_to_check = list(user_ids_with_tasks)
# Also query all users from onboarding who completed step 2 (website analysis)
# to catch users who completed onboarding but tasks weren't created
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
# This matches the logic used in home page redirect and persona generation checks
try:
from services.onboarding.progress_service import get_onboarding_progress_service
from models.onboarding import OnboardingSession
from sqlalchemy import or_
db = get_session_for_user(user_id)
if not db:
logger.warning(f"[Website Analysis Restoration] Could not get database session for user {user_id}")
continue
# Get onboarding progress service (same as used throughout the app)
progress_service = get_onboarding_progress_service()
# Query all sessions and filter using the same completion logic as the service
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
completed_sessions = db.query(OnboardingSession).filter(
or_(
OnboardingSession.current_step >= 6,
OnboardingSession.progress >= 100.0
)
).all()
# Validate using the service method for consistency
onboarding_user_ids = set()
for session in completed_sessions:
# Use the same service method as the rest of the app
status = progress_service.get_onboarding_status(session.user_id)
if status.get('is_completed', False):
onboarding_user_ids.add(session.user_id)
all_user_ids = users_to_check.copy()
# Add users from onboarding who might not have tasks yet
for user_id in onboarding_user_ids:
if user_id not in all_user_ids:
all_user_ids.append(user_id)
users_to_check = all_user_ids
logger.warning(
f"[Website Analysis Restoration] Checking {len(users_to_check)} users "
f"({len(user_ids_with_tasks)} with existing tasks, "
f"{len(onboarding_user_ids)} from onboarding sessions, "
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
)
except Exception as e:
logger.warning(f"[Website Analysis Restoration] Could not query onboarding users: {e}")
# Fallback to users with existing tasks only
users_to_check = list(user_ids_with_tasks)
total_created = 0
users_processed = 0
for user_id in users_to_check:
try:
users_processed += 1
# Check if user already has tasks
existing_user_tasks = [
task for task in existing_tasks
if task.user_id == user_id
]
if existing_user_tasks:
logger.debug(
f"[Website Analysis Restoration] User {user_id} already has "
f"{len(existing_user_tasks)} website analysis tasks, skipping"
# Check if table exists
try:
existing_user_tasks = db.query(WebsiteAnalysisTask).filter(
WebsiteAnalysisTask.user_id == user_id
).all()
total_existing_tasks += len(existing_user_tasks)
except Exception as table_error:
logger.error(
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist for user {user_id}: {table_error}"
)
continue
logger.warning(
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
f"but has no website analysis tasks. Creating tasks..."
)
# Create missing tasks
result = create_website_analysis_tasks(user_id=user_id, db=db)
if result.get('success'):
tasks_count = result.get('tasks_created', 0)
total_created += tasks_count
if existing_user_tasks:
# User has tasks, we assume they are fine for now
continue
# Check onboarding status
try:
from services.onboarding.progress_service import OnboardingProgressService
# Use a local instance or static logic if service expects global DB (it shouldn't anymore)
# We can query OnboardingSession directly
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session:
continue
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
if not is_completed:
continue
logger.warning(
f"[Website Analysis Restoration] ✅ Created {tasks_count} website analysis tasks "
f"for user {user_id}"
)
else:
error = result.get('error', 'Unknown error')
logger.warning(
f"[Website Analysis Restoration] ⚠️ Could not create tasks for user {user_id}: {error}"
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
f"but has no website analysis tasks. Creating tasks..."
)
except Exception as e:
logger.warning(
f"[Website Analysis Restoration] Error checking/creating tasks for user {user_id}: {e}",
exc_info=True
)
continue
# Final summary log
final_existing_tasks = db.query(WebsiteAnalysisTask).all()
final_by_type = {}
for task in final_existing_tasks:
final_by_type[task.task_type] = final_by_type.get(task.task_type, 0) + 1
final_type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(final_by_type.items())])
if total_created > 0:
logger.warning(
f"[Website Analysis Restoration] ✅ Created {total_created} missing website analysis tasks. "
f"Processed {users_processed} users. Final type breakdown: {final_type_summary}"
)
else:
logger.warning(
f"[Website Analysis Restoration] ✅ All users have required website analysis tasks. "
f"Checked {users_processed} users, found {len(existing_tasks)} existing tasks. "
f"Type breakdown: {final_type_summary}"
)
finally:
db.close()
job_id = f"website_analysis_tasks_{user_id}"
existing_jobs = [j for j in scheduler.scheduler.get_jobs() if j.id == job_id]
if existing_jobs:
continue
run_date = datetime.now(timezone.utc) + timedelta(minutes=5)
scheduler.schedule_one_time_task(
func=generate_website_analysis_tasks_task,
run_date=run_date,
job_id=job_id,
kwargs={"user_id": user_id},
replace_existing=True,
)
total_created += 1
logger.warning(
f"[Website Analysis Restoration] ✅ Scheduled website analysis task creation "
f"for user {user_id} at {run_date.isoformat()}"
)
except Exception as e:
logger.warning(f"[Website Analysis Restoration] Could not check onboarding for user {user_id}: {e}")
finally:
db.close()
except Exception as e:
logger.warning(f"[Website Analysis Restoration] Error processing user {user_id}: {e}")
logger.warning(
f"[Website Analysis Restoration] ✅ Completed. "
f"Processed {users_processed} users. "
f"Found {total_existing_tasks} existing tasks. "
f"Created {total_created} new tasks."
)
return total_existing_tasks + total_created
except Exception as e:
logger.error(
f"[Website Analysis Restoration] Error restoring website analysis tasks: {e}",
exc_info=True
)
return 0