AI platform insights monitoring and website analysis monitoring services added
This commit is contained in:
@@ -10,6 +10,7 @@ from sqlalchemy.orm import Session
|
||||
from services.database import get_db_session
|
||||
from utils.logger_utils import get_service_logger
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
|
||||
from .exception_handler import DatabaseError
|
||||
from .interval_manager import adjust_check_interval_if_needed
|
||||
|
||||
@@ -100,6 +101,7 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
logger.warning("\n".join(check_lines))
|
||||
|
||||
# Save check cycle event to database for historical tracking
|
||||
event_log_id = None
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='check_cycle',
|
||||
@@ -119,11 +121,63 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.flush() # Flush to get the ID without committing
|
||||
event_log_id = event_log.id
|
||||
db.commit()
|
||||
logger.debug(f"[Check Cycle] Saved event log with ID: {event_log_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save check cycle event log: {e}")
|
||||
logger.error(f"[Check Cycle] ❌ Failed to save check cycle event log: {e}", exc_info=True)
|
||||
if db:
|
||||
db.rollback()
|
||||
# Continue execution even if event log save fails
|
||||
|
||||
# Update cumulative stats table (persistent across restarts)
|
||||
try:
|
||||
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
|
||||
|
||||
# Update cumulative metrics by adding this cycle's values
|
||||
# Get current cycle values (incremental, not total)
|
||||
cycle_tasks_found = cycle_summary.get('total_found', 0)
|
||||
cycle_tasks_executed = cycle_summary.get('total_executed', 0)
|
||||
cycle_tasks_failed = cycle_summary.get('total_failed', 0)
|
||||
|
||||
# Update cumulative totals (additive)
|
||||
cumulative_stats.total_check_cycles += 1
|
||||
cumulative_stats.cumulative_tasks_found += cycle_tasks_found
|
||||
cumulative_stats.cumulative_tasks_executed += cycle_tasks_executed
|
||||
cumulative_stats.cumulative_tasks_failed += cycle_tasks_failed
|
||||
# Note: tasks_skipped in scheduler.stats is a running total, not per-cycle
|
||||
# We track it as-is from scheduler.stats (it's already cumulative)
|
||||
# This ensures we don't double-count skipped tasks
|
||||
if cumulative_stats.cumulative_tasks_skipped is None:
|
||||
cumulative_stats.cumulative_tasks_skipped = 0
|
||||
# Update to current total from scheduler (which is already cumulative)
|
||||
current_skipped = scheduler.stats.get('tasks_skipped', 0)
|
||||
if current_skipped > cumulative_stats.cumulative_tasks_skipped:
|
||||
cumulative_stats.cumulative_tasks_skipped = current_skipped
|
||||
cumulative_stats.last_check_cycle_id = event_log_id
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
# Log at DEBUG level to avoid noise during normal operation
|
||||
# This is expected behavior, not a warning
|
||||
logger.debug(
|
||||
f"[Check Cycle] Updated cumulative stats: "
|
||||
f"cycles={cumulative_stats.total_check_cycles}, "
|
||||
f"found={cumulative_stats.cumulative_tasks_found}, "
|
||||
f"executed={cumulative_stats.cumulative_tasks_executed}, "
|
||||
f"failed={cumulative_stats.cumulative_tasks_failed}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Check Cycle] ❌ Failed to update cumulative stats: {e}", exc_info=True)
|
||||
if db:
|
||||
db.rollback()
|
||||
# Log warning but continue - cumulative stats can be rebuilt from event logs
|
||||
logger.warning(
|
||||
"[Check Cycle] ⚠️ Cumulative stats update failed. "
|
||||
"Stats can be rebuilt from event logs on next dashboard load."
|
||||
)
|
||||
|
||||
# Update last_update timestamp for frontend polling
|
||||
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
|
||||
|
||||
@@ -104,19 +104,16 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
# Fallback to users with existing tasks only
|
||||
|
||||
total_created = 0
|
||||
restoration_summary = [] # Collect summary for single log
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
# Get connected platforms for this user
|
||||
# Get connected platforms for this user (silent - no logging)
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] User {user_id}: "
|
||||
f"Connected platforms: {connected_platforms}"
|
||||
)
|
||||
|
||||
if not connected_platforms:
|
||||
logger.debug(
|
||||
f"[OAuth Task Restoration] No connected platforms for user {user_id}, skipping"
|
||||
f"[OAuth Task Restoration] No connected platforms for user {user_id[:20]}..., skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
@@ -134,11 +131,6 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
]
|
||||
|
||||
if missing_platforms:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ⚠️ User {user_id} has connected platforms "
|
||||
f"{connected_platforms} but missing tasks for: {missing_platforms}"
|
||||
)
|
||||
|
||||
# Create missing tasks
|
||||
created = create_oauth_monitoring_tasks(
|
||||
user_id=user_id,
|
||||
@@ -147,15 +139,10 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
)
|
||||
|
||||
total_created += len(created)
|
||||
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ Created {len(created)} missing OAuth tasks "
|
||||
f"for user {user_id}, platforms: {missing_platforms}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ User {user_id} has all required tasks "
|
||||
f"for connected platforms: {connected_platforms}"
|
||||
# Collect summary info instead of logging immediately
|
||||
platforms_str = ", ".join([p.upper() for p in missing_platforms])
|
||||
restoration_summary.append(
|
||||
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -173,16 +160,23 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
|
||||
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
|
||||
|
||||
# Single formatted summary log (similar to scheduler startup)
|
||||
if total_created > 0:
|
||||
summary_lines = "\n".join(restoration_summary[:5]) # Show first 5 users
|
||||
if len(restoration_summary) > 5:
|
||||
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
|
||||
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ Created {total_created} missing OAuth monitoring tasks. "
|
||||
f"Final platform breakdown: {final_platform_summary}"
|
||||
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
|
||||
f" ├─ Tasks Created: {total_created}\n"
|
||||
f" ├─ Users Processed: {len(users_to_check)}\n"
|
||||
f" ├─ Platform Breakdown: {final_platform_summary}\n"
|
||||
+ summary_lines
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
|
||||
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks. "
|
||||
f"Platform breakdown: {final_platform_summary}"
|
||||
f"Checked {len(users_to_check)} users. Platform breakdown: {final_platform_summary}"
|
||||
)
|
||||
|
||||
finally:
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Platform Insights Task Restoration
|
||||
Automatically creates missing platform insights tasks for users who have connected platforms
|
||||
but don't have insights tasks created yet.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
from services.platform_insights_monitoring_service import create_platform_insights_task
|
||||
from services.oauth_token_monitoring_service import get_connected_platforms
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
|
||||
logger = get_service_logger("platform_insights_task_restoration")
|
||||
|
||||
|
||||
async def restore_platform_insights_tasks(scheduler):
|
||||
"""
|
||||
Restore/create missing platform insights tasks for all users.
|
||||
|
||||
This checks all users who have connected platforms (GSC/Bing) and ensures they have
|
||||
insights tasks created. Tasks are created for platforms that are:
|
||||
- Connected (detected via get_connected_platforms or OAuth tasks)
|
||||
- Missing insights tasks (no PlatformInsightsTask exists)
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
logger.warning("[Platform Insights Restoration] Starting platform insights task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Platform Insights Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Get all existing insights tasks to find unique user_ids
|
||||
existing_tasks = db.query(PlatformInsightsTask).all()
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Get all OAuth tasks to find users with connected platforms
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
user_ids_with_oauth = set(task.user_id for task in oauth_tasks)
|
||||
|
||||
# Platforms that support insights (GSC and Bing only)
|
||||
insights_platforms = ['gsc', 'bing']
|
||||
|
||||
# Get users who have OAuth tasks for GSC or Bing
|
||||
users_to_check = set()
|
||||
for task in oauth_tasks:
|
||||
if task.platform in insights_platforms:
|
||||
users_to_check.add(task.user_id)
|
||||
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] Found {len(existing_tasks)} existing insights tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Checking {len(users_to_check)} users "
|
||||
f"with GSC/Bing OAuth connections."
|
||||
)
|
||||
|
||||
if not users_to_check:
|
||||
logger.warning("[Platform Insights Restoration] No users with GSC/Bing connections found")
|
||||
return
|
||||
|
||||
total_created = 0
|
||||
restoration_summary = []
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
# Get connected platforms for this user
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
|
||||
# Filter to only GSC and Bing
|
||||
insights_connected = [p for p in connected_platforms if p in insights_platforms]
|
||||
|
||||
if not insights_connected:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] No GSC/Bing connections for user {user_id[:20]}..., skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
# Check which platforms are missing insights tasks
|
||||
existing_platforms = {
|
||||
task.platform
|
||||
for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
}
|
||||
|
||||
missing_platforms = [
|
||||
platform
|
||||
for platform in insights_connected
|
||||
if platform not in existing_platforms
|
||||
]
|
||||
|
||||
if missing_platforms:
|
||||
# Create missing tasks for each platform
|
||||
for platform in missing_platforms:
|
||||
try:
|
||||
# Don't fetch site_url here - it requires API calls
|
||||
# The executor will fetch it when the task runs (weekly)
|
||||
# This avoids API calls during restoration
|
||||
result = create_platform_insights_task(
|
||||
user_id=user_id,
|
||||
platform=platform,
|
||||
site_url=None, # Will be fetched by executor when task runs
|
||||
db=db
|
||||
)
|
||||
|
||||
if result.get('success'):
|
||||
total_created += 1
|
||||
restoration_summary.append(
|
||||
f" ├─ User {user_id[:20]}...: {platform.upper()} task created"
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] Failed to create {platform} task "
|
||||
f"for user {user_id}: {result.get('error')}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] Error creating {platform} task "
|
||||
f"for user {user_id}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] Error processing user {user_id}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Log summary
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
|
||||
"\n".join(restoration_summary)
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
|
||||
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks."
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Platform Insights Restoration] Error during restoration: {e}", exc_info=True)
|
||||
|
||||
@@ -26,6 +26,8 @@ from models.scheduler_models import SchedulerEventLog
|
||||
from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
|
||||
from .job_restoration import restore_persona_jobs
|
||||
from .oauth_task_restoration import restore_oauth_monitoring_tasks
|
||||
from .website_analysis_task_restoration import restore_website_analysis_tasks
|
||||
from .platform_insights_task_restoration import restore_platform_insights_tasks
|
||||
from .check_cycle_handler import check_and_execute_due_tasks
|
||||
from .task_execution_handler import execute_task_async
|
||||
|
||||
@@ -185,6 +187,15 @@ class TaskScheduler:
|
||||
# Restore/create missing OAuth token monitoring tasks for connected platforms
|
||||
await restore_oauth_monitoring_tasks(self)
|
||||
|
||||
# Restore/create missing website analysis tasks for users who completed onboarding
|
||||
await restore_website_analysis_tasks(self)
|
||||
|
||||
# Restore/create missing platform insights tasks for users with connected GSC/Bing
|
||||
await restore_platform_insights_tasks(self)
|
||||
|
||||
# Validate and rebuild cumulative stats if needed
|
||||
await self._validate_and_rebuild_cumulative_stats()
|
||||
|
||||
# Get all scheduled APScheduler jobs (including one-time tasks)
|
||||
all_jobs = self.scheduler.get_jobs()
|
||||
registered_types = self.registry.get_registered_types()
|
||||
@@ -260,27 +271,55 @@ class TaskScheduler:
|
||||
f"tasks haven't been created. Error type: {type(e).__name__}"
|
||||
)
|
||||
|
||||
# Get website analysis tasks count
|
||||
website_analysis_tasks_count = 0
|
||||
try:
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks_count = db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.status == 'active'
|
||||
).count()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get website analysis tasks count: {e}")
|
||||
|
||||
# Get platform insights tasks count
|
||||
platform_insights_tasks_count = 0
|
||||
try:
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks_count = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.status == 'active'
|
||||
).count()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get platform insights tasks count: {e}")
|
||||
|
||||
# Calculate job counts
|
||||
apscheduler_recurring = 1 # check_due_tasks
|
||||
apscheduler_one_time = len(all_jobs) - 1
|
||||
total_recurring = apscheduler_recurring + oauth_tasks_count
|
||||
total_jobs = len(all_jobs) + oauth_tasks_count
|
||||
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
|
||||
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
|
||||
|
||||
# Build comprehensive startup log message
|
||||
recurring_breakdown = f"check_due_tasks: {apscheduler_recurring}"
|
||||
if oauth_tasks_count > 0:
|
||||
recurring_breakdown += f", OAuth monitoring: {oauth_tasks_count}"
|
||||
if website_analysis_tasks_count > 0:
|
||||
recurring_breakdown += f", Website analysis: {website_analysis_tasks_count}"
|
||||
if platform_insights_tasks_count > 0:
|
||||
recurring_breakdown += f", Platform insights: {platform_insights_tasks_count}"
|
||||
|
||||
startup_lines = [
|
||||
f"[Scheduler] ✅ Task Scheduler Started",
|
||||
f" ├─ Check Interval: {initial_interval} minutes",
|
||||
f" ├─ Registered Task Types: {len(registered_types)} ({', '.join(registered_types) if registered_types else 'none'})",
|
||||
f" ├─ Active Strategies: {active_strategies}",
|
||||
f" ├─ Total Scheduled Jobs: {total_jobs}",
|
||||
f" ├─ Recurring Jobs: {total_recurring} (check_due_tasks: {apscheduler_recurring}, OAuth monitoring: {oauth_tasks_count})",
|
||||
f" ├─ Recurring Jobs: {total_recurring} ({recurring_breakdown})",
|
||||
f" └─ One-Time Jobs: {apscheduler_one_time}"
|
||||
]
|
||||
|
||||
# Add APScheduler job details
|
||||
if all_jobs:
|
||||
for idx, job in enumerate(all_jobs):
|
||||
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0
|
||||
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
next_run = job.next_run_time
|
||||
trigger_type = type(job.trigger).__name__
|
||||
@@ -338,7 +377,7 @@ class TaskScheduler:
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
|
||||
for idx, task in enumerate(oauth_tasks):
|
||||
is_last = idx == len(oauth_tasks) - 1 and len(all_jobs) == 0
|
||||
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
@@ -367,6 +406,71 @@ class TaskScheduler:
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
|
||||
|
||||
# Add website analysis tasks details
|
||||
if website_analysis_tasks_count > 0:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
|
||||
for idx, task in enumerate(website_analysis_tasks):
|
||||
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
frequency = f"Every {task.frequency_days} days"
|
||||
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
|
||||
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get website analysis task details: {e}")
|
||||
|
||||
# Add platform insights tasks details
|
||||
if platform_insights_tasks_count > 0:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks = db.query(PlatformInsightsTask).all()
|
||||
|
||||
for idx, task in enumerate(platform_insights_tasks):
|
||||
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
platform_label = task.platform.upper() if task.platform else 'Unknown'
|
||||
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get platform insights task details: {e}")
|
||||
|
||||
# Log comprehensive startup information in single message
|
||||
logger.warning("\n".join(startup_lines))
|
||||
|
||||
@@ -384,7 +488,9 @@ class TaskScheduler:
|
||||
'total_jobs': total_jobs,
|
||||
'recurring_jobs': total_recurring,
|
||||
'one_time_jobs': apscheduler_one_time,
|
||||
'oauth_monitoring_tasks': oauth_tasks_count
|
||||
'oauth_monitoring_tasks': oauth_tasks_count,
|
||||
'website_analysis_tasks': website_analysis_tasks_count,
|
||||
'platform_insights_tasks': platform_insights_tasks_count
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
@@ -533,6 +639,128 @@ class TaskScheduler:
|
||||
except Exception as e:
|
||||
logger.warning(f"Error triggering interval adjustment: {e}")
|
||||
|
||||
async def _validate_and_rebuild_cumulative_stats(self):
|
||||
"""
|
||||
Validate cumulative stats on scheduler startup and rebuild if needed.
|
||||
This ensures cumulative stats are accurate after restarts.
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Scheduler] Could not get database session for cumulative stats validation")
|
||||
return
|
||||
|
||||
try:
|
||||
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from sqlalchemy import func
|
||||
|
||||
# Get cumulative stats from persistent table
|
||||
cumulative_stats = db.query(SchedulerCumulativeStats).filter(
|
||||
SchedulerCumulativeStats.id == 1
|
||||
).first()
|
||||
|
||||
# Count check_cycle events in database
|
||||
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).scalar() or 0
|
||||
|
||||
if cumulative_stats:
|
||||
# Validate: cumulative stats should match event log count
|
||||
if cumulative_stats.total_check_cycles != check_cycle_count:
|
||||
logger.warning(
|
||||
f"[Scheduler] ⚠️ Cumulative stats validation failed on startup: "
|
||||
f"cumulative_stats.total_check_cycles={cumulative_stats.total_check_cycles} "
|
||||
f"vs event_logs.count={check_cycle_count}. "
|
||||
f"Rebuilding cumulative stats from event logs..."
|
||||
)
|
||||
|
||||
# Rebuild from event logs
|
||||
result = db.query(
|
||||
func.count(SchedulerEventLog.id),
|
||||
func.sum(SchedulerEventLog.tasks_found),
|
||||
func.sum(SchedulerEventLog.tasks_executed),
|
||||
func.sum(SchedulerEventLog.tasks_failed)
|
||||
).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).first()
|
||||
|
||||
if result:
|
||||
total_cycles = result[0] if result[0] is not None else 0
|
||||
total_found = result[1] if result[1] is not None else 0
|
||||
total_executed = result[2] if result[2] is not None else 0
|
||||
total_failed = result[3] if result[3] is not None else 0
|
||||
|
||||
# Update cumulative stats
|
||||
cumulative_stats.total_check_cycles = int(total_cycles)
|
||||
cumulative_stats.cumulative_tasks_found = int(total_found)
|
||||
cumulative_stats.cumulative_tasks_executed = int(total_executed)
|
||||
cumulative_stats.cumulative_tasks_failed = int(total_failed)
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Rebuilt cumulative stats on startup: "
|
||||
f"cycles={total_cycles}, found={total_found}, "
|
||||
f"executed={total_executed}, failed={total_failed}"
|
||||
)
|
||||
else:
|
||||
logger.warning("[Scheduler] No check_cycle events found to rebuild from")
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Cumulative stats validated: "
|
||||
f"{cumulative_stats.total_check_cycles} check cycles match event logs"
|
||||
)
|
||||
else:
|
||||
# Cumulative stats table doesn't exist, create it from event logs
|
||||
logger.warning(
|
||||
"[Scheduler] Cumulative stats table not found. "
|
||||
"Creating from event logs..."
|
||||
)
|
||||
|
||||
result = db.query(
|
||||
func.count(SchedulerEventLog.id),
|
||||
func.sum(SchedulerEventLog.tasks_found),
|
||||
func.sum(SchedulerEventLog.tasks_executed),
|
||||
func.sum(SchedulerEventLog.tasks_failed)
|
||||
).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).first()
|
||||
|
||||
if result:
|
||||
total_cycles = result[0] if result[0] is not None else 0
|
||||
total_found = result[1] if result[1] is not None else 0
|
||||
total_executed = result[2] if result[2] is not None else 0
|
||||
total_failed = result[3] if result[3] is not None else 0
|
||||
|
||||
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
|
||||
cumulative_stats.total_check_cycles = int(total_cycles)
|
||||
cumulative_stats.cumulative_tasks_found = int(total_found)
|
||||
cumulative_stats.cumulative_tasks_executed = int(total_executed)
|
||||
cumulative_stats.cumulative_tasks_failed = int(total_failed)
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Created cumulative stats from event logs: "
|
||||
f"cycles={total_cycles}, found={total_found}, "
|
||||
f"executed={total_executed}, failed={total_failed}"
|
||||
)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"[Scheduler] Cumulative stats model not available. "
|
||||
"Migration may not have been run yet. "
|
||||
"Run: python backend/scripts/run_cumulative_stats_migration.py"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error validating cumulative stats: {e}", exc_info=True)
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Process due tasks for a specific task type.
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
"""
|
||||
Website Analysis Task Restoration
|
||||
Automatically creates missing website analysis tasks for users who completed onboarding
|
||||
but don't have monitoring tasks created yet.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
from services.website_analysis_monitoring_service import create_website_analysis_tasks
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
|
||||
# Use service logger for consistent logging (WARNING level visible in production)
|
||||
logger = get_service_logger("website_analysis_restoration")
|
||||
|
||||
|
||||
async def restore_website_analysis_tasks(scheduler):
|
||||
"""
|
||||
Restore/create missing website analysis tasks for all users.
|
||||
|
||||
This checks all users who completed onboarding and ensures they have
|
||||
website analysis tasks created. Tasks are created for:
|
||||
- User's website (if analysis exists)
|
||||
- All competitors (from onboarding step 3)
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
logger.warning("[Website Analysis Restoration] Starting website analysis task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Website Analysis Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Check if table exists (may not exist if migration hasn't run)
|
||||
try:
|
||||
existing_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
except Exception as table_error:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist: {table_error}. "
|
||||
f"Please run database migration: create_website_analysis_monitoring_tables.sql"
|
||||
)
|
||||
return
|
||||
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Log existing tasks breakdown by type
|
||||
existing_by_type = {}
|
||||
for task in existing_tasks:
|
||||
existing_by_type[task.task_type] = existing_by_type.get(task.task_type, 0) + 1
|
||||
|
||||
type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(existing_by_type.items())])
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Found {len(existing_tasks)} existing website analysis tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Types: {type_summary}"
|
||||
)
|
||||
|
||||
# Check users who already have at least one website analysis task
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
# Also query all users from onboarding who completed step 2 (website analysis)
|
||||
# to catch users who completed onboarding but tasks weren't created
|
||||
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
|
||||
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
|
||||
# This matches the logic used in home page redirect and persona generation checks
|
||||
try:
|
||||
from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
|
||||
# Get onboarding progress service (same as used throughout the app)
|
||||
progress_service = get_onboarding_progress_service()
|
||||
|
||||
# Query all sessions and filter using the same completion logic as the service
|
||||
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
|
||||
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
or_(
|
||||
OnboardingSession.current_step >= 6,
|
||||
OnboardingSession.progress >= 100.0
|
||||
)
|
||||
).all()
|
||||
|
||||
# Validate using the service method for consistency
|
||||
onboarding_user_ids = set()
|
||||
for session in completed_sessions:
|
||||
# Use the same service method as the rest of the app
|
||||
status = progress_service.get_onboarding_status(session.user_id)
|
||||
if status.get('is_completed', False):
|
||||
onboarding_user_ids.add(session.user_id)
|
||||
|
||||
all_user_ids = users_to_check.copy()
|
||||
|
||||
# Add users from onboarding who might not have tasks yet
|
||||
for user_id in onboarding_user_ids:
|
||||
if user_id not in all_user_ids:
|
||||
all_user_ids.append(user_id)
|
||||
|
||||
users_to_check = all_user_ids
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Checking {len(users_to_check)} users "
|
||||
f"({len(user_ids_with_tasks)} with existing tasks, "
|
||||
f"{len(onboarding_user_ids)} from onboarding sessions, "
|
||||
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Website Analysis Restoration] Could not query onboarding users: {e}")
|
||||
# Fallback to users with existing tasks only
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
total_created = 0
|
||||
users_processed = 0
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
users_processed += 1
|
||||
|
||||
# Check if user already has tasks
|
||||
existing_user_tasks = [
|
||||
task for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
]
|
||||
|
||||
if existing_user_tasks:
|
||||
logger.debug(
|
||||
f"[Website Analysis Restoration] User {user_id} already has "
|
||||
f"{len(existing_user_tasks)} website analysis tasks, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
|
||||
f"but has no website analysis tasks. Creating tasks..."
|
||||
)
|
||||
|
||||
# Create missing tasks
|
||||
result = create_website_analysis_tasks(user_id=user_id, db=db)
|
||||
|
||||
if result.get('success'):
|
||||
tasks_count = result.get('tasks_created', 0)
|
||||
total_created += tasks_count
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Created {tasks_count} website analysis tasks "
|
||||
f"for user {user_id}"
|
||||
)
|
||||
else:
|
||||
error = result.get('error', 'Unknown error')
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ⚠️ Could not create tasks for user {user_id}: {error}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Error checking/creating tasks for user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
continue
|
||||
|
||||
# Final summary log
|
||||
final_existing_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
final_by_type = {}
|
||||
for task in final_existing_tasks:
|
||||
final_by_type[task.task_type] = final_by_type.get(task.task_type, 0) + 1
|
||||
|
||||
final_type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(final_by_type.items())])
|
||||
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Created {total_created} missing website analysis tasks. "
|
||||
f"Processed {users_processed} users. Final type breakdown: {final_type_summary}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ All users have required website analysis tasks. "
|
||||
f"Checked {users_processed} users, found {len(existing_tasks)} existing tasks. "
|
||||
f"Type breakdown: {final_type_summary}"
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] Error restoring website analysis tasks: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user