Recovered state: integrated TrendSurferAgent, restored frontend/backend files, and cleaned up recovery scripts
This commit is contained in:
@@ -0,0 +1,94 @@
|
||||
"""
|
||||
Advertools Task Restoration Utility
|
||||
Handles creation and restoration of Advertools intelligence tasks for users.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any
|
||||
from loguru import logger
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.onboarding import WebsiteAnalysis, OnboardingSession
|
||||
from models.advertools_monitoring_models import AdvertoolsTask
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
|
||||
async def restore_advertools_tasks(scheduler: Any) -> int:
|
||||
"""
|
||||
Restore/create Advertools tasks for all users who have completed Step 2.
|
||||
|
||||
Returns:
|
||||
Number of tasks created/restored
|
||||
"""
|
||||
logger.info("Restoring Advertools intelligence tasks...")
|
||||
total_created = 0
|
||||
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Check if user has completed Step 2 (has WebsiteAnalysis)
|
||||
session = db.query(OnboardingSession).filter(OnboardingSession.user_id == user_id).first()
|
||||
if not session:
|
||||
continue
|
||||
|
||||
analysis = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.session_id == session.id).first()
|
||||
if not analysis or not analysis.website_url:
|
||||
continue
|
||||
|
||||
# Check for existing Advertools tasks
|
||||
existing_audit = db.query(AdvertoolsTask).filter(
|
||||
AdvertoolsTask.user_id == user_id,
|
||||
func.json_extract(AdvertoolsTask.payload, '$.type') == 'content_audit'
|
||||
).first()
|
||||
|
||||
if not existing_audit:
|
||||
# Create weekly content audit task
|
||||
new_audit = AdvertoolsTask(
|
||||
user_id=user_id,
|
||||
website_url=analysis.website_url,
|
||||
status='active',
|
||||
next_execution=datetime.utcnow() + timedelta(days=1), # Start tomorrow
|
||||
frequency_days=7,
|
||||
payload={
|
||||
"type": "content_audit",
|
||||
"website_url": analysis.website_url
|
||||
}
|
||||
)
|
||||
db.add(new_audit)
|
||||
total_created += 1
|
||||
logger.info(f"Created weekly content audit task for user {user_id}")
|
||||
|
||||
existing_health = db.query(AdvertoolsTask).filter(
|
||||
AdvertoolsTask.user_id == user_id,
|
||||
func.json_extract(AdvertoolsTask.payload, '$.type') == 'site_health'
|
||||
).first()
|
||||
|
||||
if not existing_health:
|
||||
# Create weekly site health task
|
||||
new_health = AdvertoolsTask(
|
||||
user_id=user_id,
|
||||
website_url=analysis.website_url,
|
||||
status='active',
|
||||
next_execution=datetime.utcnow() + timedelta(days=2), # Start in 2 days
|
||||
frequency_days=7,
|
||||
payload={
|
||||
"type": "site_health",
|
||||
"website_url": analysis.website_url
|
||||
}
|
||||
)
|
||||
db.add(new_health)
|
||||
total_created += 1
|
||||
logger.info(f"Created weekly site health task for user {user_id}")
|
||||
|
||||
db.commit()
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Error restoring Advertools tasks for user {user_id}: {e}")
|
||||
|
||||
return total_created
|
||||
@@ -7,18 +7,21 @@ from typing import TYPE_CHECKING, Dict, Any
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
from utils.logger_utils import get_service_logger
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
|
||||
from .exception_handler import DatabaseError
|
||||
from .interval_manager import adjust_check_interval_if_needed
|
||||
|
||||
# Import semantic monitoring for Phase 2B integration
|
||||
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
|
||||
logger = get_service_logger("check_cycle_handler")
|
||||
|
||||
# Track last semantic check per user to enforce 24-hour interval
|
||||
# In-memory cache is sufficient as it resets on restart (which is fine)
|
||||
LAST_SEMANTIC_CHECKS: Dict[str, datetime] = {}
|
||||
|
||||
async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
"""
|
||||
@@ -42,154 +45,133 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
'total_failed': 0
|
||||
}
|
||||
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db is None:
|
||||
logger.error("[Scheduler Check] ❌ Failed to get database session")
|
||||
return
|
||||
|
||||
# Check for active strategies and adjust interval intelligently
|
||||
await adjust_check_interval_if_needed(scheduler, db)
|
||||
|
||||
# Check each registered task type
|
||||
registered_types = scheduler.registry.get_registered_types()
|
||||
for task_type in registered_types:
|
||||
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary)
|
||||
if type_summary:
|
||||
cycle_summary['tasks_found_by_type'][task_type] = type_summary.get('found', 0)
|
||||
cycle_summary['tasks_executed_by_type'][task_type] = type_summary.get('executed', 0)
|
||||
cycle_summary['tasks_failed_by_type'][task_type] = type_summary.get('failed', 0)
|
||||
|
||||
# Calculate totals
|
||||
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
|
||||
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
|
||||
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
|
||||
|
||||
# Log comprehensive check cycle summary
|
||||
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
|
||||
active_strategies = scheduler.stats.get('active_strategies_count', 0)
|
||||
active_executions = len(scheduler.active_executions)
|
||||
|
||||
# Build comprehensive check cycle summary log message
|
||||
check_lines = [
|
||||
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
|
||||
f" ├─ Duration: {check_duration:.2f}s",
|
||||
f" ├─ Active Strategies: {active_strategies}",
|
||||
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
|
||||
f" ├─ User Isolation: Enabled (tasks filtered by user_id)",
|
||||
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
|
||||
]
|
||||
|
||||
if cycle_summary['tasks_found_by_type']:
|
||||
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
|
||||
for idx, (task_type, count) in enumerate(task_types_list):
|
||||
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
|
||||
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
|
||||
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
|
||||
prefix = " └─" if is_last_task_type else " ├─"
|
||||
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
|
||||
|
||||
if cycle_summary['total_found'] > 0:
|
||||
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
|
||||
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
|
||||
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
|
||||
else:
|
||||
check_lines.append(f" └─ No tasks found - scheduler idle")
|
||||
|
||||
# Log comprehensive check cycle summary in single message
|
||||
logger.warning("\n".join(check_lines))
|
||||
|
||||
# Save check cycle event to database for historical tracking
|
||||
event_log_id = None
|
||||
# Iterate through all users (Multi-tenancy support)
|
||||
user_ids = get_all_user_ids()
|
||||
total_active_strategies = 0
|
||||
|
||||
for user_id in user_ids:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.warning(f"[Scheduler Check] Could not get database session for user {user_id}")
|
||||
continue
|
||||
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='check_cycle',
|
||||
event_date=check_start_time,
|
||||
check_cycle_number=scheduler.stats['total_checks'],
|
||||
check_interval_minutes=scheduler.current_check_interval_minutes,
|
||||
tasks_found=cycle_summary.get('total_found', 0),
|
||||
tasks_executed=cycle_summary.get('total_executed', 0),
|
||||
tasks_failed=cycle_summary.get('total_failed', 0),
|
||||
tasks_by_type=cycle_summary.get('tasks_found_by_type', {}),
|
||||
check_duration_seconds=check_duration,
|
||||
active_strategies_count=active_strategies,
|
||||
active_executions=active_executions,
|
||||
event_data={
|
||||
'executed_by_type': cycle_summary.get('tasks_executed_by_type', {}),
|
||||
'failed_by_type': cycle_summary.get('tasks_failed_by_type', {})
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.flush() # Flush to get the ID without committing
|
||||
event_log_id = event_log.id
|
||||
db.commit()
|
||||
logger.debug(f"[Check Cycle] Saved event log with ID: {event_log_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Check Cycle] ❌ Failed to save check cycle event log: {e}", exc_info=True)
|
||||
if db:
|
||||
db.rollback()
|
||||
# Continue execution even if event log save fails
|
||||
|
||||
# Update cumulative stats table (persistent across restarts)
|
||||
try:
|
||||
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
|
||||
|
||||
# Update cumulative metrics by adding this cycle's values
|
||||
# Get current cycle values (incremental, not total)
|
||||
cycle_tasks_found = cycle_summary.get('total_found', 0)
|
||||
cycle_tasks_executed = cycle_summary.get('total_executed', 0)
|
||||
cycle_tasks_failed = cycle_summary.get('total_failed', 0)
|
||||
|
||||
# Update cumulative totals (additive)
|
||||
cumulative_stats.total_check_cycles += 1
|
||||
cumulative_stats.cumulative_tasks_found += cycle_tasks_found
|
||||
cumulative_stats.cumulative_tasks_executed += cycle_tasks_executed
|
||||
cumulative_stats.cumulative_tasks_failed += cycle_tasks_failed
|
||||
# Note: tasks_skipped in scheduler.stats is a running total, not per-cycle
|
||||
# We track it as-is from scheduler.stats (it's already cumulative)
|
||||
# This ensures we don't double-count skipped tasks
|
||||
if cumulative_stats.cumulative_tasks_skipped is None:
|
||||
cumulative_stats.cumulative_tasks_skipped = 0
|
||||
# Update to current total from scheduler (which is already cumulative)
|
||||
current_skipped = scheduler.stats.get('tasks_skipped', 0)
|
||||
if current_skipped > cumulative_stats.cumulative_tasks_skipped:
|
||||
cumulative_stats.cumulative_tasks_skipped = current_skipped
|
||||
cumulative_stats.last_check_cycle_id = event_log_id
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
# Log at DEBUG level to avoid noise during normal operation
|
||||
# This is expected behavior, not a warning
|
||||
logger.debug(
|
||||
f"[Check Cycle] Updated cumulative stats: "
|
||||
f"cycles={cumulative_stats.total_check_cycles}, "
|
||||
f"found={cumulative_stats.cumulative_tasks_found}, "
|
||||
f"executed={cumulative_stats.cumulative_tasks_executed}, "
|
||||
f"failed={cumulative_stats.cumulative_tasks_failed}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Check Cycle] ❌ Failed to update cumulative stats: {e}", exc_info=True)
|
||||
if db:
|
||||
db.rollback()
|
||||
# Log warning but continue - cumulative stats can be rebuilt from event logs
|
||||
logger.warning(
|
||||
"[Check Cycle] ⚠️ Cumulative stats update failed. "
|
||||
"Stats can be rebuilt from event logs on next dashboard load."
|
||||
)
|
||||
|
||||
# Update last_update timestamp for frontend polling
|
||||
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
|
||||
|
||||
except Exception as e:
|
||||
error = DatabaseError(
|
||||
message=f"Error checking for due tasks: {str(e)}",
|
||||
original_error=e
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error)
|
||||
logger.error(f"[Scheduler Check] ❌ Error in check cycle: {str(e)}")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
# Check active strategies for this user (for interval adjustment)
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
user_active_strategies = active_strategy_service.count_active_strategies_with_tasks()
|
||||
total_active_strategies += user_active_strategies
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
|
||||
|
||||
# Phase 2B: Real-time semantic health monitoring (runs every 24 hours)
|
||||
# Check if 24 hours have passed since last check
|
||||
should_run_semantic = False
|
||||
now = datetime.utcnow()
|
||||
last_check = LAST_SEMANTIC_CHECKS.get(user_id)
|
||||
|
||||
if not last_check or (now - last_check).total_seconds() > 86400: # 24 hours
|
||||
should_run_semantic = True
|
||||
|
||||
if should_run_semantic:
|
||||
try:
|
||||
semantic_monitor = RealTimeSemanticMonitor(user_id)
|
||||
# Use public wrapper method which aggregates metrics
|
||||
# Note: semantic_monitor instantiation loads heavy models, so we limit frequency to 24h
|
||||
semantic_health = await semantic_monitor.check_semantic_health(user_id)
|
||||
logger.info(f"[Semantic Monitor] User {user_id} health check: {semantic_health.status} (score: {semantic_health.value:.2f})")
|
||||
|
||||
# Update timestamp only on success/attempt to prevent spamming retries
|
||||
LAST_SEMANTIC_CHECKS[user_id] = now
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[Semantic Monitor] Error checking semantic health for user {user_id}: {e}")
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
# Check each registered task type for this user
|
||||
registered_types = scheduler.registry.get_registered_types()
|
||||
for task_type in registered_types:
|
||||
# Pass the user-specific session
|
||||
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary, user_id=user_id)
|
||||
if type_summary:
|
||||
cycle_summary['tasks_found_by_type'][task_type] = cycle_summary['tasks_found_by_type'].get(task_type, 0) + type_summary.get('found', 0)
|
||||
cycle_summary['tasks_executed_by_type'][task_type] = cycle_summary['tasks_executed_by_type'].get(task_type, 0) + type_summary.get('executed', 0)
|
||||
cycle_summary['tasks_failed_by_type'][task_type] = cycle_summary['tasks_failed_by_type'].get(task_type, 0) + type_summary.get('failed', 0)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler Check] Error processing user {user_id}: {e}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Adjust interval based on TOTAL active strategies across all users
|
||||
# We manually update the stats and check interval, skipping adjust_check_interval_if_needed
|
||||
# because it's not multi-tenant aware yet.
|
||||
scheduler.stats['active_strategies_count'] = total_active_strategies
|
||||
|
||||
if total_active_strategies > 0:
|
||||
optimal_interval = scheduler.min_check_interval_minutes
|
||||
else:
|
||||
optimal_interval = scheduler.max_check_interval_minutes
|
||||
|
||||
if optimal_interval != scheduler.current_check_interval_minutes:
|
||||
interval_message = (
|
||||
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
|
||||
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
|
||||
f" ├─ Optimal: {optimal_interval}min\n"
|
||||
f" ├─ Active Strategies: {total_active_strategies}\n"
|
||||
f" └─ Reason: {'Active strategies detected' if total_active_strategies > 0 else 'No active strategies'}"
|
||||
)
|
||||
logger.warning(interval_message)
|
||||
|
||||
# Reschedule the job with new interval
|
||||
scheduler.scheduler.modify_job(
|
||||
job_id='check_due_tasks',
|
||||
trigger=scheduler._get_trigger_for_interval(optimal_interval)
|
||||
)
|
||||
scheduler.current_check_interval_minutes = optimal_interval
|
||||
|
||||
# Calculate totals
|
||||
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
|
||||
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
|
||||
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
|
||||
|
||||
# Log comprehensive check cycle summary
|
||||
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
|
||||
active_executions = len(scheduler.active_executions)
|
||||
|
||||
# Build comprehensive check cycle summary log message
|
||||
check_lines = [
|
||||
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
|
||||
f" ├─ Duration: {check_duration:.2f}s",
|
||||
f" ├─ Active Strategies: {total_active_strategies}",
|
||||
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
|
||||
f" ├─ User Isolation: Enabled (Scanned {len(user_ids)} users)",
|
||||
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
|
||||
]
|
||||
|
||||
if cycle_summary['tasks_found_by_type']:
|
||||
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
|
||||
for idx, (task_type, count) in enumerate(task_types_list):
|
||||
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
|
||||
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
|
||||
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
|
||||
prefix = " └─" if is_last_task_type else " ├─"
|
||||
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
|
||||
|
||||
if cycle_summary['total_found'] > 0:
|
||||
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
|
||||
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
|
||||
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
|
||||
else:
|
||||
check_lines.append(f" └─ No tasks found - scheduler idle")
|
||||
|
||||
# Log comprehensive check cycle summary in single message
|
||||
logger.warning("\n".join(check_lines))
|
||||
|
||||
# Update last_update timestamp for frontend polling
|
||||
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
|
||||
|
||||
|
||||
|
||||
@@ -106,6 +106,7 @@ class DatabaseError(SchedulerException):
|
||||
message: str,
|
||||
user_id: Optional[int] = None,
|
||||
task_id: Optional[int] = None,
|
||||
task_type: Optional[str] = None,
|
||||
context: Dict[str, Any] = None,
|
||||
original_error: Exception = None
|
||||
):
|
||||
@@ -115,6 +116,7 @@ class DatabaseError(SchedulerException):
|
||||
severity=SchedulerErrorSeverity.CRITICAL,
|
||||
user_id=user_id,
|
||||
task_id=task_id,
|
||||
task_type=task_type,
|
||||
context=context or {},
|
||||
original_error=original_error
|
||||
)
|
||||
@@ -180,6 +182,9 @@ class SchedulerConfigError(SchedulerException):
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
user_id: Optional[int] = None,
|
||||
task_id: Optional[int] = None,
|
||||
task_type: Optional[str] = None,
|
||||
context: Dict[str, Any] = None,
|
||||
original_error: Exception = None
|
||||
):
|
||||
@@ -187,6 +192,9 @@ class SchedulerConfigError(SchedulerException):
|
||||
message=message,
|
||||
error_type=SchedulerErrorType.SCHEDULER_CONFIG_ERROR,
|
||||
severity=SchedulerErrorSeverity.CRITICAL,
|
||||
user_id=user_id,
|
||||
task_id=task_id,
|
||||
task_type=task_type,
|
||||
context=context or {},
|
||||
original_error=original_error
|
||||
)
|
||||
|
||||
@@ -7,9 +7,8 @@ from typing import TYPE_CHECKING
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
from utils.logger_utils import get_service_logger
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
@@ -23,7 +22,7 @@ async def determine_optimal_interval(
|
||||
max_interval: int
|
||||
) -> int:
|
||||
"""
|
||||
Determine optimal check interval based on active strategies.
|
||||
Determine optimal check interval based on active strategies across all users.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
@@ -33,107 +32,100 @@ async def determine_optimal_interval(
|
||||
Returns:
|
||||
Optimal check interval in minutes
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
scheduler.stats['active_strategies_count'] = active_count
|
||||
|
||||
if active_count > 0:
|
||||
logger.info(f"Found {active_count} active strategies with tasks - using {min_interval}min interval")
|
||||
return min_interval
|
||||
else:
|
||||
logger.info(f"No active strategies with tasks - using {max_interval}min interval")
|
||||
return max_interval
|
||||
except Exception as e:
|
||||
logger.warning(f"Error determining optimal interval: {e}, using default {min_interval}min")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
total_active_count = 0
|
||||
user_ids = get_all_user_ids()
|
||||
|
||||
# Default to shorter interval on error (safer)
|
||||
return min_interval
|
||||
for user_id in user_ids:
|
||||
db = None
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
total_active_count += user_active_count
|
||||
|
||||
# Optimization: If we found at least one active strategy, we can stop and return min_interval
|
||||
# (unless we want accurate stats)
|
||||
# For stats accuracy, we should continue.
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking user {user_id} for strategies: {e}")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
scheduler.stats['active_strategies_count'] = total_active_count
|
||||
|
||||
if total_active_count > 0:
|
||||
logger.info(f"Found {total_active_count} active strategies across users - using {min_interval}min interval")
|
||||
return min_interval
|
||||
else:
|
||||
logger.info(f"No active strategies found - using {max_interval}min interval")
|
||||
return max_interval
|
||||
|
||||
|
||||
async def adjust_check_interval_if_needed(
|
||||
scheduler: 'TaskScheduler',
|
||||
db: Session
|
||||
db: Session = None # Deprecated parameter, ignored
|
||||
):
|
||||
"""
|
||||
Intelligently adjust check interval based on active strategies.
|
||||
Intelligently adjust check interval based on active strategies across all users.
|
||||
|
||||
If there are active strategies with tasks, check more frequently.
|
||||
If there are no active strategies, check less frequently.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
db: Database session
|
||||
db: Deprecated/Ignored
|
||||
"""
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
total_active_count = 0
|
||||
user_ids = get_all_user_ids()
|
||||
|
||||
for user_id in user_ids:
|
||||
user_db = None
|
||||
try:
|
||||
user_db = get_session_for_user(user_id)
|
||||
if user_db:
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=user_db)
|
||||
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
total_active_count += user_active_count
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking user {user_id} for strategies: {e}")
|
||||
finally:
|
||||
if user_db:
|
||||
user_db.close()
|
||||
|
||||
scheduler.stats['active_strategies_count'] = total_active_count
|
||||
|
||||
# Determine optimal interval
|
||||
if total_active_count > 0:
|
||||
optimal_interval = scheduler.min_check_interval_minutes
|
||||
else:
|
||||
optimal_interval = scheduler.max_check_interval_minutes
|
||||
|
||||
# Only reschedule if interval needs to change
|
||||
if optimal_interval != scheduler.current_check_interval_minutes:
|
||||
interval_message = (
|
||||
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
|
||||
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
|
||||
f" ├─ Optimal: {optimal_interval}min\n"
|
||||
f" ├─ Active Strategies: {total_active_count}\n"
|
||||
f" └─ Reason: {'Active strategies detected' if total_active_count > 0 else 'No active strategies'}"
|
||||
)
|
||||
logger.warning(interval_message)
|
||||
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
scheduler.stats['active_strategies_count'] = active_count
|
||||
|
||||
# Determine optimal interval
|
||||
if active_count > 0:
|
||||
optimal_interval = scheduler.min_check_interval_minutes
|
||||
else:
|
||||
optimal_interval = scheduler.max_check_interval_minutes
|
||||
|
||||
# Only reschedule if interval needs to change
|
||||
if optimal_interval != scheduler.current_check_interval_minutes:
|
||||
interval_message = (
|
||||
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
|
||||
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
|
||||
f" ├─ Optimal: {optimal_interval}min\n"
|
||||
f" ├─ Active Strategies: {active_count}\n"
|
||||
f" └─ Reason: {'Active strategies detected' if active_count > 0 else 'No active strategies'}"
|
||||
)
|
||||
logger.warning(interval_message)
|
||||
|
||||
# Reschedule the job with new interval
|
||||
scheduler.scheduler.modify_job(
|
||||
'check_due_tasks',
|
||||
trigger=scheduler._get_trigger_for_interval(optimal_interval)
|
||||
)
|
||||
|
||||
# Save previous interval before updating
|
||||
previous_interval = scheduler.current_check_interval_minutes
|
||||
|
||||
# Update current interval
|
||||
scheduler.current_check_interval_minutes = optimal_interval
|
||||
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
|
||||
|
||||
# Save interval adjustment event to database
|
||||
try:
|
||||
event_db = get_db_session()
|
||||
if event_db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='interval_adjustment',
|
||||
event_date=datetime.utcnow(),
|
||||
previous_interval_minutes=previous_interval,
|
||||
new_interval_minutes=optimal_interval,
|
||||
check_interval_minutes=optimal_interval,
|
||||
active_strategies_count=active_count,
|
||||
event_data={
|
||||
'reason': 'intelligent_scheduling',
|
||||
'min_interval': scheduler.min_check_interval_minutes,
|
||||
'max_interval': scheduler.max_check_interval_minutes
|
||||
}
|
||||
)
|
||||
event_db.add(event_log)
|
||||
event_db.commit()
|
||||
event_db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save interval adjustment event log: {e}")
|
||||
|
||||
logger.warning(f"[Scheduler] ✅ Interval adjusted to {optimal_interval}min")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error adjusting check interval: {e}")
|
||||
# Reschedule the job with new interval
|
||||
scheduler.scheduler.modify_job(
|
||||
job_id='check_due_tasks', # Fixed job_id from check_cycle to check_due_tasks to match scheduler.py
|
||||
trigger=scheduler._get_trigger_for_interval(optimal_interval)
|
||||
)
|
||||
scheduler.current_check_interval_minutes = optimal_interval
|
||||
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ Preserves original scheduled times from database to avoid rescheduling on server
|
||||
from typing import TYPE_CHECKING
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from utils.logger_utils import get_service_logger
|
||||
from services.database import get_db_session
|
||||
from services.database import get_db_session, get_all_user_ids, get_session_for_user
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -28,35 +28,39 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("Could not get database session to restore persona jobs")
|
||||
return
|
||||
user_ids = get_all_user_ids()
|
||||
logger.info(f"[Restoration] Found {len(user_ids)} users to check for persona jobs")
|
||||
|
||||
try:
|
||||
from models.onboarding import OnboardingSession
|
||||
from services.research.research_persona_scheduler import (
|
||||
schedule_research_persona_generation,
|
||||
generate_research_persona_task
|
||||
)
|
||||
from services.persona.facebook.facebook_persona_scheduler import (
|
||||
schedule_facebook_persona_generation,
|
||||
generate_facebook_persona_task
|
||||
)
|
||||
from services.research.research_persona_service import ResearchPersonaService
|
||||
from services.persona_data_service import PersonaDataService
|
||||
for user_id in user_ids:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.warning(f"Could not get database session for user {user_id}")
|
||||
continue
|
||||
|
||||
# Get all users who completed onboarding
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.progress == 100.0
|
||||
).all()
|
||||
|
||||
restored_count = 0
|
||||
skipped_count = 0
|
||||
now = datetime.utcnow().replace(tzinfo=timezone.utc)
|
||||
|
||||
for session in completed_sessions:
|
||||
user_id = session.user_id
|
||||
try:
|
||||
from models.onboarding import OnboardingSession
|
||||
from services.research.research_persona_scheduler import (
|
||||
schedule_research_persona_generation,
|
||||
generate_research_persona_task
|
||||
)
|
||||
from services.persona.facebook.facebook_persona_scheduler import (
|
||||
schedule_facebook_persona_generation,
|
||||
generate_facebook_persona_task
|
||||
)
|
||||
from services.research.research_persona_service import ResearchPersonaService
|
||||
from services.persona_data_service import PersonaDataService
|
||||
|
||||
# Check if user completed onboarding
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session or session.progress < 100.0:
|
||||
continue
|
||||
|
||||
restored_count = 0
|
||||
skipped_count = 0
|
||||
now = datetime.utcnow().replace(tzinfo=timezone.utc)
|
||||
|
||||
# Restore research persona job
|
||||
try:
|
||||
@@ -69,7 +73,7 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
|
||||
research_persona_exists = bool(research_persona_data)
|
||||
|
||||
if not research_persona_exists:
|
||||
# Note: Clerk user_id already includes "user_" prefix
|
||||
# Note: Clerk user_id already includes "user_" prefix if applicable, or we use the string as is
|
||||
job_id = f"research_persona_{user_id}"
|
||||
|
||||
# Check if job already exists in scheduler (just started, so unlikely)
|
||||
@@ -256,13 +260,13 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not restore Facebook persona for user {user_id}: {e}")
|
||||
|
||||
if restored_count > 0:
|
||||
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) on startup (preserved original scheduled times)")
|
||||
if skipped_count > 0:
|
||||
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) (already completed/failed or exist)")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
if restored_count > 0:
|
||||
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) for user {user_id}")
|
||||
if skipped_count > 0:
|
||||
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) for user {user_id}")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error restoring persona jobs: {e}")
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.database import get_session_for_user, get_all_user_ids
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
from services.oauth_token_monitoring_service import get_connected_platforms, create_oauth_monitoring_tasks
|
||||
|
||||
@@ -31,98 +31,41 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
"""
|
||||
try:
|
||||
logger.warning("[OAuth Task Restoration] Starting OAuth monitoring task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[OAuth Task Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Get all existing OAuth tasks to find unique user_ids
|
||||
existing_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Log existing tasks breakdown by platform
|
||||
existing_by_platform = {}
|
||||
for task in existing_tasks:
|
||||
existing_by_platform[task.platform] = existing_by_platform.get(task.platform, 0) + 1
|
||||
|
||||
platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(existing_by_platform.items())])
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Found {len(existing_tasks)} existing OAuth tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Platforms: {platform_summary}"
|
||||
)
|
||||
|
||||
# Check users who already have at least one OAuth task
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
# Also query all users from onboarding who completed step 5 (integrations)
|
||||
# to catch users who connected platforms but tasks weren't created
|
||||
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
|
||||
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
|
||||
# This matches the logic used in home page redirect and persona generation checks
|
||||
user_ids = get_all_user_ids()
|
||||
total_created = 0
|
||||
users_processed = 0
|
||||
total_existing_tasks = 0
|
||||
restoration_summary = []
|
||||
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.debug(f"[OAuth Task Restoration] Could not get database session for user {user_id}")
|
||||
continue
|
||||
|
||||
# Get onboarding progress service (same as used throughout the app)
|
||||
progress_service = get_onboarding_progress_service()
|
||||
|
||||
# Query all sessions and filter using the same completion logic as the service
|
||||
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
|
||||
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
or_(
|
||||
OnboardingSession.current_step >= 6,
|
||||
OnboardingSession.progress >= 100.0
|
||||
)
|
||||
).all()
|
||||
|
||||
# Validate using the service method for consistency
|
||||
onboarding_user_ids = set()
|
||||
for session in completed_sessions:
|
||||
# Use the same service method as the rest of the app
|
||||
status = progress_service.get_onboarding_status(session.user_id)
|
||||
if status.get('is_completed', False):
|
||||
onboarding_user_ids.add(session.user_id)
|
||||
all_user_ids = users_to_check.copy()
|
||||
|
||||
# Add users from onboarding who might not have tasks yet
|
||||
for user_id in onboarding_user_ids:
|
||||
if user_id not in all_user_ids:
|
||||
all_user_ids.append(user_id)
|
||||
|
||||
users_to_check = all_user_ids
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Checking {len(users_to_check)} users "
|
||||
f"({len(user_ids_with_tasks)} with existing tasks, "
|
||||
f"{len(onboarding_user_ids)} from onboarding sessions, "
|
||||
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Task Restoration] Could not query onboarding users: {e}")
|
||||
# Fallback to users with existing tasks only
|
||||
|
||||
total_created = 0
|
||||
restoration_summary = [] # Collect summary for single log
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
users_processed += 1
|
||||
|
||||
# Get existing tasks for this user
|
||||
try:
|
||||
existing_tasks = db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.user_id == user_id
|
||||
).all()
|
||||
total_existing_tasks += len(existing_tasks)
|
||||
except Exception as table_error:
|
||||
# Table might not exist for this user yet
|
||||
continue
|
||||
|
||||
# Get connected platforms for this user (silent - no logging)
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
|
||||
if not connected_platforms:
|
||||
logger.debug(
|
||||
f"[OAuth Task Restoration] No connected platforms for user {user_id[:20]}..., skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
# Check which platforms are missing tasks
|
||||
existing_platforms = {
|
||||
task.platform
|
||||
for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
}
|
||||
existing_platforms = {task.platform for task in existing_tasks}
|
||||
|
||||
missing_platforms = [
|
||||
platform
|
||||
@@ -138,53 +81,44 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
platforms=missing_platforms
|
||||
)
|
||||
|
||||
total_created += len(created)
|
||||
# Collect summary info instead of logging immediately
|
||||
platforms_str = ", ".join([p.upper() for p in missing_platforms])
|
||||
restoration_summary.append(
|
||||
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
|
||||
)
|
||||
if created:
|
||||
total_created += len(created)
|
||||
platforms_str = ", ".join([p.upper() for p in missing_platforms])
|
||||
restoration_summary.append(
|
||||
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Error checking/creating tasks for user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
continue
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Task Restoration] Error processing user {user_id}: {e}")
|
||||
continue
|
||||
|
||||
# Log summary
|
||||
if total_created > 0:
|
||||
summary_lines = "\n".join(restoration_summary[:5])
|
||||
if len(restoration_summary) > 5:
|
||||
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
|
||||
|
||||
# Final summary log with platform breakdown
|
||||
final_existing_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
final_by_platform = {}
|
||||
for task in final_existing_tasks:
|
||||
final_by_platform[task.platform] = final_by_platform.get(task.platform, 0) + 1
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
|
||||
f" ├─ Users Processed: {users_processed}\n"
|
||||
f" ├─ Existing Tasks: {total_existing_tasks}\n"
|
||||
f" ├─ New Tasks Created: {total_created}\n"
|
||||
+ summary_lines
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
|
||||
f"Processed {users_processed} users."
|
||||
)
|
||||
|
||||
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
|
||||
|
||||
# Single formatted summary log (similar to scheduler startup)
|
||||
if total_created > 0:
|
||||
summary_lines = "\n".join(restoration_summary[:5]) # Show first 5 users
|
||||
if len(restoration_summary) > 5:
|
||||
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
|
||||
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
|
||||
f" ├─ Tasks Created: {total_created}\n"
|
||||
f" ├─ Users Processed: {len(users_to_check)}\n"
|
||||
f" ├─ Platform Breakdown: {final_platform_summary}\n"
|
||||
+ summary_lines
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
|
||||
f"Checked {len(users_to_check)} users. Platform breakdown: {final_platform_summary}"
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
return total_existing_tasks + total_created
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[OAuth Task Restoration] Error restoring OAuth monitoring tasks: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.database import get_session_for_user, get_all_user_ids
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
from services.platform_insights_monitoring_service import create_platform_insights_task
|
||||
from services.oauth_token_monitoring_service import get_connected_platforms
|
||||
@@ -32,44 +32,36 @@ async def restore_platform_insights_tasks(scheduler):
|
||||
"""
|
||||
try:
|
||||
logger.warning("[Platform Insights Restoration] Starting platform insights task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Platform Insights Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Get all existing insights tasks to find unique user_ids
|
||||
existing_tasks = db.query(PlatformInsightsTask).all()
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Get all OAuth tasks to find users with connected platforms
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
user_ids_with_oauth = set(task.user_id for task in oauth_tasks)
|
||||
|
||||
# Platforms that support insights (GSC and Bing only)
|
||||
insights_platforms = ['gsc', 'bing']
|
||||
|
||||
# Get users who have OAuth tasks for GSC or Bing
|
||||
users_to_check = set()
|
||||
for task in oauth_tasks:
|
||||
if task.platform in insights_platforms:
|
||||
users_to_check.add(task.user_id)
|
||||
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] Found {len(existing_tasks)} existing insights tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Checking {len(users_to_check)} users "
|
||||
f"with GSC/Bing OAuth connections."
|
||||
)
|
||||
|
||||
if not users_to_check:
|
||||
logger.warning("[Platform Insights Restoration] No users with GSC/Bing connections found")
|
||||
return
|
||||
|
||||
total_created = 0
|
||||
restoration_summary = []
|
||||
|
||||
for user_id in users_to_check:
|
||||
user_ids = get_all_user_ids()
|
||||
total_created = 0
|
||||
users_processed = 0
|
||||
total_existing_tasks = 0
|
||||
restoration_summary = []
|
||||
|
||||
# Platforms that support insights (GSC and Bing only)
|
||||
insights_platforms = ['gsc', 'bing']
|
||||
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.debug(f"[Platform Insights Restoration] Could not get database session for user {user_id}")
|
||||
continue
|
||||
|
||||
try:
|
||||
users_processed += 1
|
||||
|
||||
# Get existing insights tasks
|
||||
try:
|
||||
existing_tasks = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.user_id == user_id
|
||||
).all()
|
||||
total_existing_tasks += len(existing_tasks)
|
||||
except Exception as table_error:
|
||||
# Table might not exist
|
||||
continue
|
||||
|
||||
# Get connected platforms for this user
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
|
||||
@@ -77,17 +69,10 @@ async def restore_platform_insights_tasks(scheduler):
|
||||
insights_connected = [p for p in connected_platforms if p in insights_platforms]
|
||||
|
||||
if not insights_connected:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] No GSC/Bing connections for user {user_id[:20]}..., skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
# Check which platforms are missing insights tasks
|
||||
existing_platforms = {
|
||||
task.platform
|
||||
for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
}
|
||||
existing_platforms = {task.platform for task in existing_tasks}
|
||||
|
||||
missing_platforms = [
|
||||
platform
|
||||
@@ -101,11 +86,10 @@ async def restore_platform_insights_tasks(scheduler):
|
||||
try:
|
||||
# Don't fetch site_url here - it requires API calls
|
||||
# The executor will fetch it when the task runs (weekly)
|
||||
# This avoids API calls during restoration
|
||||
result = create_platform_insights_task(
|
||||
user_id=user_id,
|
||||
platform=platform,
|
||||
site_url=None, # Will be fetched by executor when task runs
|
||||
site_url=None,
|
||||
db=db
|
||||
)
|
||||
|
||||
@@ -125,28 +109,28 @@ async def restore_platform_insights_tasks(scheduler):
|
||||
f"for user {user_id}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] Error processing user {user_id}: {e}"
|
||||
)
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.warning(f"[Platform Insights Restoration] Error processing user {user_id}: {e}")
|
||||
continue
|
||||
|
||||
# Log summary
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
|
||||
"\n".join(restoration_summary)
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
|
||||
f"Processed {users_processed} users."
|
||||
)
|
||||
|
||||
# Log summary
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
|
||||
"\n".join(restoration_summary)
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
|
||||
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks."
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
return total_existing_tasks + total_created
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Platform Insights Restoration] Error during restoration: {e}", exc_info=True)
|
||||
|
||||
return 0
|
||||
|
||||
@@ -19,7 +19,7 @@ from .exception_handler import (
|
||||
SchedulerExceptionHandler, SchedulerException, TaskExecutionError, DatabaseError,
|
||||
TaskLoaderError, SchedulerConfigError
|
||||
)
|
||||
from services.database import get_db_session
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..utils.user_job_store import get_user_job_store_name
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
@@ -28,6 +28,7 @@ from .job_restoration import restore_persona_jobs
|
||||
from .oauth_task_restoration import restore_oauth_monitoring_tasks
|
||||
from .website_analysis_task_restoration import restore_website_analysis_tasks
|
||||
from .platform_insights_task_restoration import restore_platform_insights_tasks
|
||||
from .advertools_task_restoration import restore_advertools_tasks
|
||||
from .check_cycle_handler import check_and_execute_due_tasks
|
||||
from .task_execution_handler import execute_task_async
|
||||
|
||||
@@ -185,13 +186,17 @@ class TaskScheduler:
|
||||
await restore_persona_jobs(self)
|
||||
|
||||
# Restore/create missing OAuth token monitoring tasks for connected platforms
|
||||
await restore_oauth_monitoring_tasks(self)
|
||||
total_oauth_tasks = await restore_oauth_monitoring_tasks(self)
|
||||
oauth_tasks_count = total_oauth_tasks
|
||||
|
||||
# Restore/create missing website analysis tasks for users who completed onboarding
|
||||
await restore_website_analysis_tasks(self)
|
||||
website_analysis_tasks_count = await restore_website_analysis_tasks(self)
|
||||
|
||||
# Restore/create missing platform insights tasks for users with connected GSC/Bing
|
||||
await restore_platform_insights_tasks(self)
|
||||
platform_insights_tasks_count = await restore_platform_insights_tasks(self)
|
||||
|
||||
# Restore/create missing Advertools intelligence tasks
|
||||
advertools_tasks_count = await restore_advertools_tasks(self)
|
||||
|
||||
# Validate and rebuild cumulative stats if needed
|
||||
await self._validate_and_rebuild_cumulative_stats()
|
||||
@@ -203,99 +208,47 @@ class TaskScheduler:
|
||||
|
||||
# Count OAuth token monitoring tasks from database (recurring weekly tasks)
|
||||
oauth_tasks_count = 0
|
||||
oauth_tasks_details = []
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
# Count active tasks
|
||||
oauth_tasks_count = db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.status == 'active'
|
||||
).count()
|
||||
|
||||
# Get all tasks (for detailed logging)
|
||||
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
total_oauth_tasks = len(all_oauth_tasks)
|
||||
|
||||
# Show platform breakdown for ALL tasks (active and inactive)
|
||||
all_platforms = {}
|
||||
active_platforms = {}
|
||||
for task in all_oauth_tasks:
|
||||
all_platforms[task.platform] = all_platforms.get(task.platform, 0) + 1
|
||||
if task.status == 'active':
|
||||
active_platforms[task.platform] = active_platforms.get(task.platform, 0) + 1
|
||||
|
||||
if total_oauth_tasks > 0:
|
||||
# Log details about all tasks (not just active)
|
||||
for task in all_oauth_tasks:
|
||||
oauth_tasks_details.append(
|
||||
f"user={task.user_id}, platform={task.platform}, status={task.status}"
|
||||
)
|
||||
|
||||
if total_oauth_tasks > 0 and oauth_tasks_count == 0:
|
||||
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
|
||||
logger.warning(
|
||||
f"[Scheduler] Found {total_oauth_tasks} OAuth monitoring tasks in database, "
|
||||
f"but {oauth_tasks_count} are active. "
|
||||
f"All platforms: {all_platform_summary}. "
|
||||
f"Task details: {', '.join(oauth_tasks_details[:5])}" # Limit to first 5 for readability
|
||||
)
|
||||
elif oauth_tasks_count > 0:
|
||||
# Show platform breakdown for active tasks
|
||||
active_platform_summary = ", ".join([f"{platform}: {count}" for platform, count in sorted(active_platforms.items())])
|
||||
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
|
||||
|
||||
# Check for missing platforms (expected: gsc, bing, wordpress, wix)
|
||||
expected_platforms = ['gsc', 'bing', 'wordpress', 'wix']
|
||||
missing_in_db = [p for p in expected_platforms if p not in all_platforms]
|
||||
|
||||
if missing_in_db:
|
||||
logger.warning(
|
||||
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
|
||||
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
|
||||
f"All platforms: {all_platform_summary}. "
|
||||
f"⚠️ Missing platforms (not connected or no tasks): {', '.join(missing_in_db)}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
|
||||
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
|
||||
f"All platforms: {all_platform_summary}"
|
||||
)
|
||||
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Scheduler] Could not get OAuth token monitoring tasks count: {e}. "
|
||||
f"This may indicate the oauth_token_monitoring_tasks table doesn't exist yet or "
|
||||
f"tasks haven't been created. Error type: {type(e).__name__}"
|
||||
)
|
||||
|
||||
# Get website analysis tasks count
|
||||
website_analysis_tasks_count = 0
|
||||
try:
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks_count = db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.status == 'active'
|
||||
).count()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get website analysis tasks count: {e}")
|
||||
|
||||
# Get platform insights tasks count
|
||||
platform_insights_tasks_count = 0
|
||||
try:
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks_count = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.status == 'active'
|
||||
).count()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get platform insights tasks count: {e}")
|
||||
advertools_tasks_count = 0
|
||||
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
continue
|
||||
|
||||
try:
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
oauth_tasks_count += db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.status == 'active'
|
||||
).count()
|
||||
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks_count += db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.status == 'active'
|
||||
).count()
|
||||
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks_count += db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.status == 'active'
|
||||
).count()
|
||||
|
||||
from models.advertools_monitoring_models import AdvertoolsTask
|
||||
advertools_tasks_count += db.query(AdvertoolsTask).filter(
|
||||
AdvertoolsTask.status == 'active'
|
||||
).count()
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Error counting tasks for user {user_id}: {e}")
|
||||
|
||||
# Calculate job counts
|
||||
apscheduler_recurring = 1 # check_due_tasks
|
||||
apscheduler_one_time = len(all_jobs) - 1
|
||||
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
|
||||
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
|
||||
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count + advertools_tasks_count
|
||||
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count + advertools_tasks_count
|
||||
|
||||
# Build comprehensive startup log message
|
||||
recurring_breakdown = f"check_due_tasks: {apscheduler_recurring}"
|
||||
@@ -305,6 +258,8 @@ class TaskScheduler:
|
||||
recurring_breakdown += f", Website analysis: {website_analysis_tasks_count}"
|
||||
if platform_insights_tasks_count > 0:
|
||||
recurring_breakdown += f", Platform insights: {platform_insights_tasks_count}"
|
||||
if advertools_tasks_count > 0:
|
||||
recurring_breakdown += f", Advertools: {advertools_tasks_count}"
|
||||
|
||||
startup_lines = [
|
||||
f"[Scheduler] ✅ Task Scheduler Started",
|
||||
@@ -347,7 +302,7 @@ class TaskScheduler:
|
||||
|
||||
if user_id_from_job:
|
||||
try:
|
||||
db = get_db_session()
|
||||
db = get_session_for_user(user_id_from_job)
|
||||
if db:
|
||||
user_job_store = get_user_job_store_name(user_id_from_job, db)
|
||||
if user_job_store == 'default':
|
||||
@@ -357,6 +312,8 @@ class TaskScheduler:
|
||||
)
|
||||
user_context = f" | User: {user_id_from_job} | Store: {user_job_store}"
|
||||
db.close()
|
||||
else:
|
||||
user_context = f" | User: {user_id_from_job} | DB: Not Found"
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Scheduler] Could not extract job store name for user {user_id_from_job}: {e}. "
|
||||
@@ -370,134 +327,172 @@ class TaskScheduler:
|
||||
# Show ALL OAuth tasks (active and inactive) for complete visibility
|
||||
if total_oauth_tasks > 0:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
# Get ALL tasks, not just active ones
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
|
||||
for idx, task in enumerate(oauth_tasks):
|
||||
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
if user_job_store == 'default':
|
||||
logger.debug(
|
||||
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
|
||||
f"This may indicate no onboarding data or website URL not found."
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
# Get ALL tasks for this user
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
|
||||
for idx, task in enumerate(oauth_tasks):
|
||||
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and user_id == user_ids[-1]
|
||||
prefix = " ├─" # Simplified prefix logic for multi-user list
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
if user_job_store == 'default':
|
||||
logger.debug(
|
||||
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
|
||||
f"This may indicate no onboarding data or website URL not found."
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
|
||||
f"Using 'default'. Error type: {type(e).__name__}"
|
||||
)
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
# Include status in the log line for visibility
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
|
||||
f"Using 'default'. Error type: {type(e).__name__}"
|
||||
)
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
# Include status in the log line for visibility
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking OAuth tasks for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
|
||||
|
||||
# Add website analysis tasks details
|
||||
if website_analysis_tasks_count > 0:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
|
||||
for idx, task in enumerate(website_analysis_tasks):
|
||||
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
frequency = f"Every {task.frequency_days} days"
|
||||
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
|
||||
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
|
||||
for idx, task in enumerate(website_analysis_tasks):
|
||||
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0 and user_id == user_ids[-1]
|
||||
prefix = " ├─" # Simplified
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
frequency = f"Every {task.frequency_days} days"
|
||||
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
|
||||
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking website analysis tasks for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get website analysis task details: {e}")
|
||||
|
||||
# Add platform insights tasks details
|
||||
if platform_insights_tasks_count > 0:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks = db.query(PlatformInsightsTask).all()
|
||||
|
||||
for idx, task in enumerate(platform_insights_tasks):
|
||||
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
platform_label = task.platform.upper() if task.platform else 'Unknown'
|
||||
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks = db.query(PlatformInsightsTask).all()
|
||||
|
||||
for idx, task in enumerate(platform_insights_tasks):
|
||||
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0 and user_id == user_ids[-1]
|
||||
prefix = " ├─" # Simplified
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
platform_label = task.platform.upper() if task.platform else 'Unknown'
|
||||
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking platform insights tasks for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get platform insights task details: {e}")
|
||||
|
||||
# Add Advertools tasks details
|
||||
if advertools_tasks_count > 0:
|
||||
try:
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
from models.advertools_monitoring_models import AdvertoolsTask
|
||||
advertools_tasks = db.query(AdvertoolsTask).all()
|
||||
|
||||
for idx, task in enumerate(advertools_tasks):
|
||||
is_last = idx == len(advertools_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and user_id == user_ids[-1]
|
||||
prefix = " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_execution.isoformat() if task.next_execution else 'Not scheduled'
|
||||
task_type = task.payload.get('type') if task.payload else 'unknown'
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: advertools_{task_type}_{task.user_id}_{task.id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking Advertools tasks for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get Advertools task details: {e}")
|
||||
|
||||
# Log comprehensive startup information in single message
|
||||
logger.warning("\n".join(startup_lines))
|
||||
|
||||
# Save scheduler start event to database
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='start',
|
||||
event_date=datetime.utcnow(),
|
||||
check_interval_minutes=initial_interval,
|
||||
active_strategies_count=active_strategies,
|
||||
event_data={
|
||||
'registered_types': registered_types,
|
||||
'total_jobs': total_jobs,
|
||||
'recurring_jobs': total_recurring,
|
||||
'one_time_jobs': apscheduler_one_time,
|
||||
'oauth_monitoring_tasks': oauth_tasks_count,
|
||||
'website_analysis_tasks': website_analysis_tasks_count,
|
||||
'platform_insights_tasks': platform_insights_tasks_count
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save scheduler start event log: {e}")
|
||||
# Disabled in multi-tenant mode as there is no global DB
|
||||
# try:
|
||||
# db = get_db_session()
|
||||
# if db:
|
||||
# event_log = SchedulerEventLog(...)
|
||||
# db.add(event_log)
|
||||
# db.commit()
|
||||
# db.close()
|
||||
# except Exception as e:
|
||||
# logger.warning(f"Failed to save scheduler start event log: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start scheduler: {e}")
|
||||
@@ -544,25 +539,26 @@ class TaskScheduler:
|
||||
logger.warning(shutdown_message)
|
||||
|
||||
# Save scheduler stop event to database
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='stop',
|
||||
event_date=datetime.utcnow(),
|
||||
check_interval_minutes=self.current_check_interval_minutes,
|
||||
event_data={
|
||||
'total_checks': total_checks,
|
||||
'total_executed': total_executed,
|
||||
'total_failed': total_failed,
|
||||
'jobs_cancelled': len(all_jobs_before)
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save scheduler stop event log: {e}")
|
||||
# Disabled in multi-tenant mode as there is no global DB
|
||||
# try:
|
||||
# db = get_db_session()
|
||||
# if db:
|
||||
# event_log = SchedulerEventLog(
|
||||
# event_type='stop',
|
||||
# event_date=datetime.utcnow(),
|
||||
# check_interval_minutes=self.current_check_interval_minutes,
|
||||
# event_data={
|
||||
# 'total_checks': total_checks,
|
||||
# 'total_executed': total_executed,
|
||||
# 'total_failed': total_failed,
|
||||
# 'jobs_cancelled': len(all_jobs_before)
|
||||
# }
|
||||
# )
|
||||
# db.add(event_log)
|
||||
# db.commit()
|
||||
# db.close()
|
||||
# except Exception as e:
|
||||
# logger.warning(f"Failed to save scheduler stop event log: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping scheduler: {e}")
|
||||
@@ -630,12 +626,8 @@ class TaskScheduler:
|
||||
return
|
||||
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
await adjust_check_interval_if_needed(self, db)
|
||||
db.close()
|
||||
else:
|
||||
logger.warning("Could not get database session for interval adjustment")
|
||||
# Multi-tenant aware adjustment (iterates all users internally)
|
||||
await adjust_check_interval_if_needed(self)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error triggering interval adjustment: {e}")
|
||||
|
||||
@@ -643,125 +635,14 @@ class TaskScheduler:
|
||||
"""
|
||||
Validate cumulative stats on scheduler startup and rebuild if needed.
|
||||
This ensures cumulative stats are accurate after restarts.
|
||||
|
||||
NOTE: Disabled in multi-tenant mode as there is no global database for cumulative stats.
|
||||
TODO: Implement per-user cumulative stats or a global admin database.
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Scheduler] Could not get database session for cumulative stats validation")
|
||||
return
|
||||
|
||||
try:
|
||||
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from sqlalchemy import func
|
||||
|
||||
# Get cumulative stats from persistent table
|
||||
cumulative_stats = db.query(SchedulerCumulativeStats).filter(
|
||||
SchedulerCumulativeStats.id == 1
|
||||
).first()
|
||||
|
||||
# Count check_cycle events in database
|
||||
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).scalar() or 0
|
||||
|
||||
if cumulative_stats:
|
||||
# Validate: cumulative stats should match event log count
|
||||
if cumulative_stats.total_check_cycles != check_cycle_count:
|
||||
logger.warning(
|
||||
f"[Scheduler] ⚠️ Cumulative stats validation failed on startup: "
|
||||
f"cumulative_stats.total_check_cycles={cumulative_stats.total_check_cycles} "
|
||||
f"vs event_logs.count={check_cycle_count}. "
|
||||
f"Rebuilding cumulative stats from event logs..."
|
||||
)
|
||||
|
||||
# Rebuild from event logs
|
||||
result = db.query(
|
||||
func.count(SchedulerEventLog.id),
|
||||
func.sum(SchedulerEventLog.tasks_found),
|
||||
func.sum(SchedulerEventLog.tasks_executed),
|
||||
func.sum(SchedulerEventLog.tasks_failed)
|
||||
).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).first()
|
||||
|
||||
if result:
|
||||
total_cycles = result[0] if result[0] is not None else 0
|
||||
total_found = result[1] if result[1] is not None else 0
|
||||
total_executed = result[2] if result[2] is not None else 0
|
||||
total_failed = result[3] if result[3] is not None else 0
|
||||
|
||||
# Update cumulative stats
|
||||
cumulative_stats.total_check_cycles = int(total_cycles)
|
||||
cumulative_stats.cumulative_tasks_found = int(total_found)
|
||||
cumulative_stats.cumulative_tasks_executed = int(total_executed)
|
||||
cumulative_stats.cumulative_tasks_failed = int(total_failed)
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Rebuilt cumulative stats on startup: "
|
||||
f"cycles={total_cycles}, found={total_found}, "
|
||||
f"executed={total_executed}, failed={total_failed}"
|
||||
)
|
||||
else:
|
||||
logger.warning("[Scheduler] No check_cycle events found to rebuild from")
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Cumulative stats validated: "
|
||||
f"{cumulative_stats.total_check_cycles} check cycles match event logs"
|
||||
)
|
||||
else:
|
||||
# Cumulative stats table doesn't exist, create it from event logs
|
||||
logger.warning(
|
||||
"[Scheduler] Cumulative stats table not found. "
|
||||
"Creating from event logs..."
|
||||
)
|
||||
|
||||
result = db.query(
|
||||
func.count(SchedulerEventLog.id),
|
||||
func.sum(SchedulerEventLog.tasks_found),
|
||||
func.sum(SchedulerEventLog.tasks_executed),
|
||||
func.sum(SchedulerEventLog.tasks_failed)
|
||||
).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).first()
|
||||
|
||||
if result:
|
||||
total_cycles = result[0] if result[0] is not None else 0
|
||||
total_found = result[1] if result[1] is not None else 0
|
||||
total_executed = result[2] if result[2] is not None else 0
|
||||
total_failed = result[3] if result[3] is not None else 0
|
||||
|
||||
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
|
||||
cumulative_stats.total_check_cycles = int(total_cycles)
|
||||
cumulative_stats.cumulative_tasks_found = int(total_found)
|
||||
cumulative_stats.cumulative_tasks_executed = int(total_executed)
|
||||
cumulative_stats.cumulative_tasks_failed = int(total_failed)
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Created cumulative stats from event logs: "
|
||||
f"cycles={total_cycles}, found={total_found}, "
|
||||
f"executed={total_executed}, failed={total_failed}"
|
||||
)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"[Scheduler] Cumulative stats model not available. "
|
||||
"Migration may not have been run yet. "
|
||||
"Run: python backend/scripts/run_cumulative_stats_migration.py"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error validating cumulative stats: {e}", exc_info=True)
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
logger.info("[Scheduler] Cumulative stats validation skipped (multi-tenant mode)")
|
||||
return
|
||||
|
||||
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
|
||||
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None, user_id: str = None) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Process due tasks for a specific task type.
|
||||
|
||||
@@ -816,7 +697,7 @@ class TaskScheduler:
|
||||
# Execute task asynchronously
|
||||
# Note: Each task gets its own database session to prevent concurrent access issues
|
||||
execution_task = asyncio.create_task(
|
||||
execute_task_async(self, task_type, task, summary)
|
||||
execute_task_async(self, task_type, task, summary, user_id=user_id)
|
||||
)
|
||||
|
||||
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
|
||||
@@ -970,7 +851,7 @@ class TaskScheduler:
|
||||
job_store_name = 'default'
|
||||
if user_id:
|
||||
try:
|
||||
db = get_db_session()
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
job_store_name = get_user_job_store_name(user_id, db)
|
||||
db.close()
|
||||
@@ -996,27 +877,28 @@ class TaskScheduler:
|
||||
logger.warning(log_message)
|
||||
|
||||
# Log job scheduling to event log for dashboard
|
||||
try:
|
||||
event_db = get_db_session()
|
||||
if event_db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_scheduled',
|
||||
event_date=datetime.utcnow(),
|
||||
job_id=job_id,
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
event_data={
|
||||
'function_name': func_name,
|
||||
'job_store': job_store_name,
|
||||
'scheduled_for': run_date.isoformat(),
|
||||
'replace_existing': replace_existing
|
||||
}
|
||||
)
|
||||
event_db.add(event_log)
|
||||
event_db.commit()
|
||||
event_db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to log job scheduling event: {e}")
|
||||
if user_id:
|
||||
try:
|
||||
event_db = get_session_for_user(user_id)
|
||||
if event_db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_scheduled',
|
||||
event_date=datetime.utcnow(),
|
||||
job_id=job_id,
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
event_data={
|
||||
'function_name': func_name,
|
||||
'job_store': job_store_name,
|
||||
'scheduled_for': run_date.isoformat(),
|
||||
'replace_existing': replace_existing
|
||||
}
|
||||
)
|
||||
event_db.add(event_log)
|
||||
event_db.commit()
|
||||
event_db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to log job scheduling event: {e}")
|
||||
|
||||
return job_id
|
||||
except Exception as e:
|
||||
@@ -1027,3 +909,14 @@ class TaskScheduler:
|
||||
"""Check if scheduler is running."""
|
||||
return self._running
|
||||
|
||||
async def execute_task_by_type(self, task_type: str, user_id: str, payload: Dict[str, Any]):
|
||||
"""
|
||||
Execute a task by type and payload immediately.
|
||||
Used for one-time tasks triggered by system events.
|
||||
"""
|
||||
from collections import namedtuple
|
||||
TaskStub = namedtuple('TaskStub', ['user_id', 'payload', 'id'])
|
||||
task_stub = TaskStub(user_id=user_id, payload=payload, id=f"manual_{datetime.utcnow().timestamp()}")
|
||||
|
||||
await execute_task_async(self, task_type, task_stub, execution_source="manual")
|
||||
|
||||
|
||||
@@ -23,7 +23,8 @@ async def execute_task_async(
|
||||
task_type: str,
|
||||
task: Any,
|
||||
summary: Optional[Dict[str, Any]] = None,
|
||||
execution_source: str = "scheduler" # "scheduler" or "manual"
|
||||
execution_source: str = "scheduler", # "scheduler" or "manual"
|
||||
user_id: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Execute a single task asynchronously with user isolation.
|
||||
@@ -38,21 +39,25 @@ async def execute_task_async(
|
||||
task_type: Type of task
|
||||
task: Task instance from database (detached from original session)
|
||||
summary: Optional summary dict to update with execution results
|
||||
user_id: Optional user ID for user isolation (overrides extraction from task)
|
||||
"""
|
||||
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
|
||||
db = None
|
||||
user_id = None
|
||||
|
||||
try:
|
||||
# Extract user context if available (for user isolation tracking)
|
||||
try:
|
||||
if hasattr(task, 'strategy') and task.strategy:
|
||||
user_id = getattr(task.strategy, 'user_id', None)
|
||||
elif hasattr(task, 'strategy_id') and task.strategy_id:
|
||||
# Will query user_id after we have db session
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
|
||||
if user_id is None:
|
||||
try:
|
||||
if hasattr(task, 'strategy') and task.strategy:
|
||||
user_id = getattr(task.strategy, 'user_id', None)
|
||||
elif hasattr(task, 'strategy_id') and task.strategy_id:
|
||||
# Will query user_id after we have db session
|
||||
pass
|
||||
elif hasattr(task, 'user_id') and task.user_id:
|
||||
# Direct user_id on task object
|
||||
user_id = task.user_id
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
|
||||
|
||||
# Log task execution start (detailed for important tasks)
|
||||
task_db_id = getattr(task, 'id', None)
|
||||
@@ -61,7 +66,7 @@ async def execute_task_async(
|
||||
|
||||
# Create a new database session for this async task
|
||||
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
|
||||
db = get_db_session()
|
||||
db = get_db_session(user_id)
|
||||
if db is None:
|
||||
error = DatabaseError(
|
||||
message=f"Failed to get database session for task {task_id}",
|
||||
@@ -79,7 +84,15 @@ async def execute_task_async(
|
||||
|
||||
# Merge the detached task object into this session
|
||||
# The task object was loaded in a different session and is now detached
|
||||
if object_session(task) is None:
|
||||
from sqlalchemy.inspection import inspect
|
||||
is_model = False
|
||||
try:
|
||||
inspect(task)
|
||||
is_model = True
|
||||
except:
|
||||
pass
|
||||
|
||||
if is_model and object_session(task) is None:
|
||||
# Task is detached, need to merge it into this session
|
||||
task = db.merge(task)
|
||||
|
||||
|
||||
@@ -4,15 +4,13 @@ Automatically creates missing website analysis tasks for users who completed onb
|
||||
but don't have monitoring tasks created yet.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
from services.website_analysis_monitoring_service import create_website_analysis_tasks
|
||||
from services.website_analysis_monitoring_service import generate_website_analysis_tasks_task
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
|
||||
# Use service logger for consistent logging (WARNING level visible in production)
|
||||
logger = get_service_logger("website_analysis_restoration")
|
||||
@@ -32,162 +30,103 @@ async def restore_website_analysis_tasks(scheduler):
|
||||
"""
|
||||
try:
|
||||
logger.warning("[Website Analysis Restoration] Starting website analysis task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Website Analysis Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Check if table exists (may not exist if migration hasn't run)
|
||||
user_ids = get_all_user_ids()
|
||||
total_created = 0
|
||||
users_processed = 0
|
||||
total_existing_tasks = 0
|
||||
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
existing_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
except Exception as table_error:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist: {table_error}. "
|
||||
f"Please run database migration: create_website_analysis_monitoring_tables.sql"
|
||||
)
|
||||
return
|
||||
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Log existing tasks breakdown by type
|
||||
existing_by_type = {}
|
||||
for task in existing_tasks:
|
||||
existing_by_type[task.task_type] = existing_by_type.get(task.task_type, 0) + 1
|
||||
|
||||
type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(existing_by_type.items())])
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Found {len(existing_tasks)} existing website analysis tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Types: {type_summary}"
|
||||
)
|
||||
|
||||
# Check users who already have at least one website analysis task
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
# Also query all users from onboarding who completed step 2 (website analysis)
|
||||
# to catch users who completed onboarding but tasks weren't created
|
||||
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
|
||||
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
|
||||
# This matches the logic used in home page redirect and persona generation checks
|
||||
try:
|
||||
from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.warning(f"[Website Analysis Restoration] Could not get database session for user {user_id}")
|
||||
continue
|
||||
|
||||
# Get onboarding progress service (same as used throughout the app)
|
||||
progress_service = get_onboarding_progress_service()
|
||||
|
||||
# Query all sessions and filter using the same completion logic as the service
|
||||
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
|
||||
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
or_(
|
||||
OnboardingSession.current_step >= 6,
|
||||
OnboardingSession.progress >= 100.0
|
||||
)
|
||||
).all()
|
||||
|
||||
# Validate using the service method for consistency
|
||||
onboarding_user_ids = set()
|
||||
for session in completed_sessions:
|
||||
# Use the same service method as the rest of the app
|
||||
status = progress_service.get_onboarding_status(session.user_id)
|
||||
if status.get('is_completed', False):
|
||||
onboarding_user_ids.add(session.user_id)
|
||||
|
||||
all_user_ids = users_to_check.copy()
|
||||
|
||||
# Add users from onboarding who might not have tasks yet
|
||||
for user_id in onboarding_user_ids:
|
||||
if user_id not in all_user_ids:
|
||||
all_user_ids.append(user_id)
|
||||
|
||||
users_to_check = all_user_ids
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Checking {len(users_to_check)} users "
|
||||
f"({len(user_ids_with_tasks)} with existing tasks, "
|
||||
f"{len(onboarding_user_ids)} from onboarding sessions, "
|
||||
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Website Analysis Restoration] Could not query onboarding users: {e}")
|
||||
# Fallback to users with existing tasks only
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
total_created = 0
|
||||
users_processed = 0
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
users_processed += 1
|
||||
|
||||
# Check if user already has tasks
|
||||
existing_user_tasks = [
|
||||
task for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
]
|
||||
|
||||
if existing_user_tasks:
|
||||
logger.debug(
|
||||
f"[Website Analysis Restoration] User {user_id} already has "
|
||||
f"{len(existing_user_tasks)} website analysis tasks, skipping"
|
||||
# Check if table exists
|
||||
try:
|
||||
existing_user_tasks = db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.user_id == user_id
|
||||
).all()
|
||||
total_existing_tasks += len(existing_user_tasks)
|
||||
except Exception as table_error:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist for user {user_id}: {table_error}"
|
||||
)
|
||||
continue
|
||||
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
|
||||
f"but has no website analysis tasks. Creating tasks..."
|
||||
)
|
||||
|
||||
# Create missing tasks
|
||||
result = create_website_analysis_tasks(user_id=user_id, db=db)
|
||||
|
||||
if result.get('success'):
|
||||
tasks_count = result.get('tasks_created', 0)
|
||||
total_created += tasks_count
|
||||
if existing_user_tasks:
|
||||
# User has tasks, we assume they are fine for now
|
||||
continue
|
||||
|
||||
# Check onboarding status
|
||||
try:
|
||||
from services.onboarding.progress_service import OnboardingProgressService
|
||||
|
||||
# Use a local instance or static logic if service expects global DB (it shouldn't anymore)
|
||||
# We can query OnboardingSession directly
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
continue
|
||||
|
||||
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
|
||||
if not is_completed:
|
||||
continue
|
||||
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Created {tasks_count} website analysis tasks "
|
||||
f"for user {user_id}"
|
||||
)
|
||||
else:
|
||||
error = result.get('error', 'Unknown error')
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ⚠️ Could not create tasks for user {user_id}: {error}"
|
||||
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
|
||||
f"but has no website analysis tasks. Creating tasks..."
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Error checking/creating tasks for user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
continue
|
||||
|
||||
# Final summary log
|
||||
final_existing_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
final_by_type = {}
|
||||
for task in final_existing_tasks:
|
||||
final_by_type[task.task_type] = final_by_type.get(task.task_type, 0) + 1
|
||||
|
||||
final_type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(final_by_type.items())])
|
||||
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Created {total_created} missing website analysis tasks. "
|
||||
f"Processed {users_processed} users. Final type breakdown: {final_type_summary}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ All users have required website analysis tasks. "
|
||||
f"Checked {users_processed} users, found {len(existing_tasks)} existing tasks. "
|
||||
f"Type breakdown: {final_type_summary}"
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
job_id = f"website_analysis_tasks_{user_id}"
|
||||
existing_jobs = [j for j in scheduler.scheduler.get_jobs() if j.id == job_id]
|
||||
if existing_jobs:
|
||||
continue
|
||||
|
||||
run_date = datetime.now(timezone.utc) + timedelta(minutes=5)
|
||||
scheduler.schedule_one_time_task(
|
||||
func=generate_website_analysis_tasks_task,
|
||||
run_date=run_date,
|
||||
job_id=job_id,
|
||||
kwargs={"user_id": user_id},
|
||||
replace_existing=True,
|
||||
)
|
||||
total_created += 1
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Scheduled website analysis task creation "
|
||||
f"for user {user_id} at {run_date.isoformat()}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[Website Analysis Restoration] Could not check onboarding for user {user_id}: {e}")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[Website Analysis Restoration] Error processing user {user_id}: {e}")
|
||||
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Completed. "
|
||||
f"Processed {users_processed} users. "
|
||||
f"Found {total_existing_tasks} existing tasks. "
|
||||
f"Created {total_created} new tasks."
|
||||
)
|
||||
|
||||
return total_existing_tasks + total_created
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] Error restoring website analysis tasks: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user