AI platform insights monitoring and website analysis monitoring services added

This commit is contained in:
ajaysi
2025-11-11 15:57:45 +05:30
parent d99c7c83a7
commit 7191c7e7f0
81 changed files with 10860 additions and 1567 deletions

View File

@@ -10,6 +10,7 @@ from sqlalchemy.orm import Session
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
from .exception_handler import DatabaseError
from .interval_manager import adjust_check_interval_if_needed
@@ -100,6 +101,7 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
logger.warning("\n".join(check_lines))
# Save check cycle event to database for historical tracking
event_log_id = None
try:
event_log = SchedulerEventLog(
event_type='check_cycle',
@@ -119,11 +121,63 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
}
)
db.add(event_log)
db.flush() # Flush to get the ID without committing
event_log_id = event_log.id
db.commit()
logger.debug(f"[Check Cycle] Saved event log with ID: {event_log_id}")
except Exception as e:
logger.warning(f"Failed to save check cycle event log: {e}")
logger.error(f"[Check Cycle] ❌ Failed to save check cycle event log: {e}", exc_info=True)
if db:
db.rollback()
# Continue execution even if event log save fails
# Update cumulative stats table (persistent across restarts)
try:
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
# Update cumulative metrics by adding this cycle's values
# Get current cycle values (incremental, not total)
cycle_tasks_found = cycle_summary.get('total_found', 0)
cycle_tasks_executed = cycle_summary.get('total_executed', 0)
cycle_tasks_failed = cycle_summary.get('total_failed', 0)
# Update cumulative totals (additive)
cumulative_stats.total_check_cycles += 1
cumulative_stats.cumulative_tasks_found += cycle_tasks_found
cumulative_stats.cumulative_tasks_executed += cycle_tasks_executed
cumulative_stats.cumulative_tasks_failed += cycle_tasks_failed
# Note: tasks_skipped in scheduler.stats is a running total, not per-cycle
# We track it as-is from scheduler.stats (it's already cumulative)
# This ensures we don't double-count skipped tasks
if cumulative_stats.cumulative_tasks_skipped is None:
cumulative_stats.cumulative_tasks_skipped = 0
# Update to current total from scheduler (which is already cumulative)
current_skipped = scheduler.stats.get('tasks_skipped', 0)
if current_skipped > cumulative_stats.cumulative_tasks_skipped:
cumulative_stats.cumulative_tasks_skipped = current_skipped
cumulative_stats.last_check_cycle_id = event_log_id
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
# Log at DEBUG level to avoid noise during normal operation
# This is expected behavior, not a warning
logger.debug(
f"[Check Cycle] Updated cumulative stats: "
f"cycles={cumulative_stats.total_check_cycles}, "
f"found={cumulative_stats.cumulative_tasks_found}, "
f"executed={cumulative_stats.cumulative_tasks_executed}, "
f"failed={cumulative_stats.cumulative_tasks_failed}"
)
except Exception as e:
logger.error(f"[Check Cycle] ❌ Failed to update cumulative stats: {e}", exc_info=True)
if db:
db.rollback()
# Log warning but continue - cumulative stats can be rebuilt from event logs
logger.warning(
"[Check Cycle] ⚠️ Cumulative stats update failed. "
"Stats can be rebuilt from event logs on next dashboard load."
)
# Update last_update timestamp for frontend polling
scheduler.stats['last_update'] = datetime.utcnow().isoformat()

View File

@@ -104,19 +104,16 @@ async def restore_oauth_monitoring_tasks(scheduler):
# Fallback to users with existing tasks only
total_created = 0
restoration_summary = [] # Collect summary for single log
for user_id in users_to_check:
try:
# Get connected platforms for this user
# Get connected platforms for this user (silent - no logging)
connected_platforms = get_connected_platforms(user_id)
logger.warning(
f"[OAuth Task Restoration] User {user_id}: "
f"Connected platforms: {connected_platforms}"
)
if not connected_platforms:
logger.debug(
f"[OAuth Task Restoration] No connected platforms for user {user_id}, skipping"
f"[OAuth Task Restoration] No connected platforms for user {user_id[:20]}..., skipping"
)
continue
@@ -134,11 +131,6 @@ async def restore_oauth_monitoring_tasks(scheduler):
]
if missing_platforms:
logger.warning(
f"[OAuth Task Restoration] ⚠️ User {user_id} has connected platforms "
f"{connected_platforms} but missing tasks for: {missing_platforms}"
)
# Create missing tasks
created = create_oauth_monitoring_tasks(
user_id=user_id,
@@ -147,15 +139,10 @@ async def restore_oauth_monitoring_tasks(scheduler):
)
total_created += len(created)
logger.warning(
f"[OAuth Task Restoration] ✅ Created {len(created)} missing OAuth tasks "
f"for user {user_id}, platforms: {missing_platforms}"
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ User {user_id} has all required tasks "
f"for connected platforms: {connected_platforms}"
# Collect summary info instead of logging immediately
platforms_str = ", ".join([p.upper() for p in missing_platforms])
restoration_summary.append(
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
)
except Exception as e:
@@ -173,16 +160,23 @@ async def restore_oauth_monitoring_tasks(scheduler):
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
# Single formatted summary log (similar to scheduler startup)
if total_created > 0:
summary_lines = "\n".join(restoration_summary[:5]) # Show first 5 users
if len(restoration_summary) > 5:
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
logger.warning(
f"[OAuth Task Restoration] ✅ Created {total_created} missing OAuth monitoring tasks. "
f"Final platform breakdown: {final_platform_summary}"
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
f" ├─ Tasks Created: {total_created}\n"
f" ├─ Users Processed: {len(users_to_check)}\n"
f" ├─ Platform Breakdown: {final_platform_summary}\n"
+ summary_lines
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks. "
f"Platform breakdown: {final_platform_summary}"
f"Checked {len(users_to_check)} users. Platform breakdown: {final_platform_summary}"
)
finally:

View File

@@ -0,0 +1,152 @@
"""
Platform Insights Task Restoration
Automatically creates missing platform insights tasks for users who have connected platforms
but don't have insights tasks created yet.
"""
from datetime import datetime, timedelta
from typing import List
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from models.platform_insights_monitoring_models import PlatformInsightsTask
from services.platform_insights_monitoring_service import create_platform_insights_task
from services.oauth_token_monitoring_service import get_connected_platforms
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
logger = get_service_logger("platform_insights_task_restoration")
async def restore_platform_insights_tasks(scheduler):
"""
Restore/create missing platform insights tasks for all users.
This checks all users who have connected platforms (GSC/Bing) and ensures they have
insights tasks created. Tasks are created for platforms that are:
- Connected (detected via get_connected_platforms or OAuth tasks)
- Missing insights tasks (no PlatformInsightsTask exists)
Args:
scheduler: TaskScheduler instance
"""
try:
logger.warning("[Platform Insights Restoration] Starting platform insights task restoration...")
db = get_db_session()
if not db:
logger.warning("[Platform Insights Restoration] Could not get database session")
return
try:
# Get all existing insights tasks to find unique user_ids
existing_tasks = db.query(PlatformInsightsTask).all()
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Get all OAuth tasks to find users with connected platforms
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
user_ids_with_oauth = set(task.user_id for task in oauth_tasks)
# Platforms that support insights (GSC and Bing only)
insights_platforms = ['gsc', 'bing']
# Get users who have OAuth tasks for GSC or Bing
users_to_check = set()
for task in oauth_tasks:
if task.platform in insights_platforms:
users_to_check.add(task.user_id)
logger.warning(
f"[Platform Insights Restoration] Found {len(existing_tasks)} existing insights tasks "
f"for {len(user_ids_with_tasks)} users. Checking {len(users_to_check)} users "
f"with GSC/Bing OAuth connections."
)
if not users_to_check:
logger.warning("[Platform Insights Restoration] No users with GSC/Bing connections found")
return
total_created = 0
restoration_summary = []
for user_id in users_to_check:
try:
# Get connected platforms for this user
connected_platforms = get_connected_platforms(user_id)
# Filter to only GSC and Bing
insights_connected = [p for p in connected_platforms if p in insights_platforms]
if not insights_connected:
logger.debug(
f"[Platform Insights Restoration] No GSC/Bing connections for user {user_id[:20]}..., skipping"
)
continue
# Check which platforms are missing insights tasks
existing_platforms = {
task.platform
for task in existing_tasks
if task.user_id == user_id
}
missing_platforms = [
platform
for platform in insights_connected
if platform not in existing_platforms
]
if missing_platforms:
# Create missing tasks for each platform
for platform in missing_platforms:
try:
# Don't fetch site_url here - it requires API calls
# The executor will fetch it when the task runs (weekly)
# This avoids API calls during restoration
result = create_platform_insights_task(
user_id=user_id,
platform=platform,
site_url=None, # Will be fetched by executor when task runs
db=db
)
if result.get('success'):
total_created += 1
restoration_summary.append(
f" ├─ User {user_id[:20]}...: {platform.upper()} task created"
)
else:
logger.debug(
f"[Platform Insights Restoration] Failed to create {platform} task "
f"for user {user_id}: {result.get('error')}"
)
except Exception as e:
logger.debug(
f"[Platform Insights Restoration] Error creating {platform} task "
f"for user {user_id}: {e}"
)
continue
except Exception as e:
logger.debug(
f"[Platform Insights Restoration] Error processing user {user_id}: {e}"
)
continue
# Log summary
if total_created > 0:
logger.warning(
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
"\n".join(restoration_summary)
)
else:
logger.warning(
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks."
)
finally:
db.close()
except Exception as e:
logger.error(f"[Platform Insights Restoration] Error during restoration: {e}", exc_info=True)

View File

@@ -26,6 +26,8 @@ from models.scheduler_models import SchedulerEventLog
from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
from .job_restoration import restore_persona_jobs
from .oauth_task_restoration import restore_oauth_monitoring_tasks
from .website_analysis_task_restoration import restore_website_analysis_tasks
from .platform_insights_task_restoration import restore_platform_insights_tasks
from .check_cycle_handler import check_and_execute_due_tasks
from .task_execution_handler import execute_task_async
@@ -185,6 +187,15 @@ class TaskScheduler:
# Restore/create missing OAuth token monitoring tasks for connected platforms
await restore_oauth_monitoring_tasks(self)
# Restore/create missing website analysis tasks for users who completed onboarding
await restore_website_analysis_tasks(self)
# Restore/create missing platform insights tasks for users with connected GSC/Bing
await restore_platform_insights_tasks(self)
# Validate and rebuild cumulative stats if needed
await self._validate_and_rebuild_cumulative_stats()
# Get all scheduled APScheduler jobs (including one-time tasks)
all_jobs = self.scheduler.get_jobs()
registered_types = self.registry.get_registered_types()
@@ -260,27 +271,55 @@ class TaskScheduler:
f"tasks haven't been created. Error type: {type(e).__name__}"
)
# Get website analysis tasks count
website_analysis_tasks_count = 0
try:
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks_count = db.query(WebsiteAnalysisTask).filter(
WebsiteAnalysisTask.status == 'active'
).count()
except Exception as e:
logger.debug(f"Could not get website analysis tasks count: {e}")
# Get platform insights tasks count
platform_insights_tasks_count = 0
try:
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks_count = db.query(PlatformInsightsTask).filter(
PlatformInsightsTask.status == 'active'
).count()
except Exception as e:
logger.debug(f"Could not get platform insights tasks count: {e}")
# Calculate job counts
apscheduler_recurring = 1 # check_due_tasks
apscheduler_one_time = len(all_jobs) - 1
total_recurring = apscheduler_recurring + oauth_tasks_count
total_jobs = len(all_jobs) + oauth_tasks_count
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
# Build comprehensive startup log message
recurring_breakdown = f"check_due_tasks: {apscheduler_recurring}"
if oauth_tasks_count > 0:
recurring_breakdown += f", OAuth monitoring: {oauth_tasks_count}"
if website_analysis_tasks_count > 0:
recurring_breakdown += f", Website analysis: {website_analysis_tasks_count}"
if platform_insights_tasks_count > 0:
recurring_breakdown += f", Platform insights: {platform_insights_tasks_count}"
startup_lines = [
f"[Scheduler] ✅ Task Scheduler Started",
f" ├─ Check Interval: {initial_interval} minutes",
f" ├─ Registered Task Types: {len(registered_types)} ({', '.join(registered_types) if registered_types else 'none'})",
f" ├─ Active Strategies: {active_strategies}",
f" ├─ Total Scheduled Jobs: {total_jobs}",
f" ├─ Recurring Jobs: {total_recurring} (check_due_tasks: {apscheduler_recurring}, OAuth monitoring: {oauth_tasks_count})",
f" ├─ Recurring Jobs: {total_recurring} ({recurring_breakdown})",
f" └─ One-Time Jobs: {apscheduler_one_time}"
]
# Add APScheduler job details
if all_jobs:
for idx, job in enumerate(all_jobs):
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0
prefix = " └─" if is_last else " ├─"
next_run = job.next_run_time
trigger_type = type(job.trigger).__name__
@@ -338,7 +377,7 @@ class TaskScheduler:
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
for idx, task in enumerate(oauth_tasks):
is_last = idx == len(oauth_tasks) - 1 and len(all_jobs) == 0
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0
prefix = " └─" if is_last else " ├─"
try:
@@ -367,6 +406,71 @@ class TaskScheduler:
except Exception as e:
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
# Add website analysis tasks details
if website_analysis_tasks_count > 0:
try:
db = get_db_session()
if db:
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
for idx, task in enumerate(website_analysis_tasks):
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
frequency = f"Every {task.frequency_days} days"
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
startup_lines.append(
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
)
db.close()
except Exception as e:
logger.debug(f"Could not get website analysis task details: {e}")
# Add platform insights tasks details
if platform_insights_tasks_count > 0:
try:
db = get_db_session()
if db:
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks = db.query(PlatformInsightsTask).all()
for idx, task in enumerate(platform_insights_tasks):
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
platform_label = task.platform.upper() if task.platform else 'Unknown'
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
)
db.close()
except Exception as e:
logger.debug(f"Could not get platform insights task details: {e}")
# Log comprehensive startup information in single message
logger.warning("\n".join(startup_lines))
@@ -384,7 +488,9 @@ class TaskScheduler:
'total_jobs': total_jobs,
'recurring_jobs': total_recurring,
'one_time_jobs': apscheduler_one_time,
'oauth_monitoring_tasks': oauth_tasks_count
'oauth_monitoring_tasks': oauth_tasks_count,
'website_analysis_tasks': website_analysis_tasks_count,
'platform_insights_tasks': platform_insights_tasks_count
}
)
db.add(event_log)
@@ -533,6 +639,128 @@ class TaskScheduler:
except Exception as e:
logger.warning(f"Error triggering interval adjustment: {e}")
async def _validate_and_rebuild_cumulative_stats(self):
"""
Validate cumulative stats on scheduler startup and rebuild if needed.
This ensures cumulative stats are accurate after restarts.
"""
db = None
try:
db = get_db_session()
if not db:
logger.warning("[Scheduler] Could not get database session for cumulative stats validation")
return
try:
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
from models.scheduler_models import SchedulerEventLog
from sqlalchemy import func
# Get cumulative stats from persistent table
cumulative_stats = db.query(SchedulerCumulativeStats).filter(
SchedulerCumulativeStats.id == 1
).first()
# Count check_cycle events in database
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'check_cycle'
).scalar() or 0
if cumulative_stats:
# Validate: cumulative stats should match event log count
if cumulative_stats.total_check_cycles != check_cycle_count:
logger.warning(
f"[Scheduler] ⚠️ Cumulative stats validation failed on startup: "
f"cumulative_stats.total_check_cycles={cumulative_stats.total_check_cycles} "
f"vs event_logs.count={check_cycle_count}. "
f"Rebuilding cumulative stats from event logs..."
)
# Rebuild from event logs
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if result:
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
# Update cumulative stats
cumulative_stats.total_check_cycles = int(total_cycles)
cumulative_stats.cumulative_tasks_found = int(total_found)
cumulative_stats.cumulative_tasks_executed = int(total_executed)
cumulative_stats.cumulative_tasks_failed = int(total_failed)
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
logger.warning(
f"[Scheduler] ✅ Rebuilt cumulative stats on startup: "
f"cycles={total_cycles}, found={total_found}, "
f"executed={total_executed}, failed={total_failed}"
)
else:
logger.warning("[Scheduler] No check_cycle events found to rebuild from")
else:
logger.warning(
f"[Scheduler] ✅ Cumulative stats validated: "
f"{cumulative_stats.total_check_cycles} check cycles match event logs"
)
else:
# Cumulative stats table doesn't exist, create it from event logs
logger.warning(
"[Scheduler] Cumulative stats table not found. "
"Creating from event logs..."
)
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if result:
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
cumulative_stats.total_check_cycles = int(total_cycles)
cumulative_stats.cumulative_tasks_found = int(total_found)
cumulative_stats.cumulative_tasks_executed = int(total_executed)
cumulative_stats.cumulative_tasks_failed = int(total_failed)
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
logger.warning(
f"[Scheduler] ✅ Created cumulative stats from event logs: "
f"cycles={total_cycles}, found={total_found}, "
f"executed={total_executed}, failed={total_failed}"
)
except ImportError:
logger.warning(
"[Scheduler] Cumulative stats model not available. "
"Migration may not have been run yet. "
"Run: python backend/scripts/run_cumulative_stats_migration.py"
)
except Exception as e:
logger.error(f"[Scheduler] Error validating cumulative stats: {e}", exc_info=True)
finally:
if db:
db.close()
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
"""
Process due tasks for a specific task type.

View File

@@ -0,0 +1,193 @@
"""
Website Analysis Task Restoration
Automatically creates missing website analysis tasks for users who completed onboarding
but don't have monitoring tasks created yet.
"""
from typing import List
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
from services.website_analysis_monitoring_service import create_website_analysis_tasks
from models.onboarding import OnboardingSession
from sqlalchemy import or_
# Use service logger for consistent logging (WARNING level visible in production)
logger = get_service_logger("website_analysis_restoration")
async def restore_website_analysis_tasks(scheduler):
"""
Restore/create missing website analysis tasks for all users.
This checks all users who completed onboarding and ensures they have
website analysis tasks created. Tasks are created for:
- User's website (if analysis exists)
- All competitors (from onboarding step 3)
Args:
scheduler: TaskScheduler instance
"""
try:
logger.warning("[Website Analysis Restoration] Starting website analysis task restoration...")
db = get_db_session()
if not db:
logger.warning("[Website Analysis Restoration] Could not get database session")
return
try:
# Check if table exists (may not exist if migration hasn't run)
try:
existing_tasks = db.query(WebsiteAnalysisTask).all()
except Exception as table_error:
logger.error(
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist: {table_error}. "
f"Please run database migration: create_website_analysis_monitoring_tables.sql"
)
return
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Log existing tasks breakdown by type
existing_by_type = {}
for task in existing_tasks:
existing_by_type[task.task_type] = existing_by_type.get(task.task_type, 0) + 1
type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(existing_by_type.items())])
logger.warning(
f"[Website Analysis Restoration] Found {len(existing_tasks)} existing website analysis tasks "
f"for {len(user_ids_with_tasks)} users. Types: {type_summary}"
)
# Check users who already have at least one website analysis task
users_to_check = list(user_ids_with_tasks)
# Also query all users from onboarding who completed step 2 (website analysis)
# to catch users who completed onboarding but tasks weren't created
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
# This matches the logic used in home page redirect and persona generation checks
try:
from services.onboarding.progress_service import get_onboarding_progress_service
from models.onboarding import OnboardingSession
from sqlalchemy import or_
# Get onboarding progress service (same as used throughout the app)
progress_service = get_onboarding_progress_service()
# Query all sessions and filter using the same completion logic as the service
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
completed_sessions = db.query(OnboardingSession).filter(
or_(
OnboardingSession.current_step >= 6,
OnboardingSession.progress >= 100.0
)
).all()
# Validate using the service method for consistency
onboarding_user_ids = set()
for session in completed_sessions:
# Use the same service method as the rest of the app
status = progress_service.get_onboarding_status(session.user_id)
if status.get('is_completed', False):
onboarding_user_ids.add(session.user_id)
all_user_ids = users_to_check.copy()
# Add users from onboarding who might not have tasks yet
for user_id in onboarding_user_ids:
if user_id not in all_user_ids:
all_user_ids.append(user_id)
users_to_check = all_user_ids
logger.warning(
f"[Website Analysis Restoration] Checking {len(users_to_check)} users "
f"({len(user_ids_with_tasks)} with existing tasks, "
f"{len(onboarding_user_ids)} from onboarding sessions, "
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
)
except Exception as e:
logger.warning(f"[Website Analysis Restoration] Could not query onboarding users: {e}")
# Fallback to users with existing tasks only
users_to_check = list(user_ids_with_tasks)
total_created = 0
users_processed = 0
for user_id in users_to_check:
try:
users_processed += 1
# Check if user already has tasks
existing_user_tasks = [
task for task in existing_tasks
if task.user_id == user_id
]
if existing_user_tasks:
logger.debug(
f"[Website Analysis Restoration] User {user_id} already has "
f"{len(existing_user_tasks)} website analysis tasks, skipping"
)
continue
logger.warning(
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
f"but has no website analysis tasks. Creating tasks..."
)
# Create missing tasks
result = create_website_analysis_tasks(user_id=user_id, db=db)
if result.get('success'):
tasks_count = result.get('tasks_created', 0)
total_created += tasks_count
logger.warning(
f"[Website Analysis Restoration] ✅ Created {tasks_count} website analysis tasks "
f"for user {user_id}"
)
else:
error = result.get('error', 'Unknown error')
logger.warning(
f"[Website Analysis Restoration] ⚠️ Could not create tasks for user {user_id}: {error}"
)
except Exception as e:
logger.warning(
f"[Website Analysis Restoration] Error checking/creating tasks for user {user_id}: {e}",
exc_info=True
)
continue
# Final summary log
final_existing_tasks = db.query(WebsiteAnalysisTask).all()
final_by_type = {}
for task in final_existing_tasks:
final_by_type[task.task_type] = final_by_type.get(task.task_type, 0) + 1
final_type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(final_by_type.items())])
if total_created > 0:
logger.warning(
f"[Website Analysis Restoration] ✅ Created {total_created} missing website analysis tasks. "
f"Processed {users_processed} users. Final type breakdown: {final_type_summary}"
)
else:
logger.warning(
f"[Website Analysis Restoration] ✅ All users have required website analysis tasks. "
f"Checked {users_processed} users, found {len(existing_tasks)} existing tasks. "
f"Type breakdown: {final_type_summary}"
)
finally:
db.close()
except Exception as e:
logger.error(
f"[Website Analysis Restoration] Error restoring website analysis tasks: {e}",
exc_info=True
)