AI platform insights monitoring and website analysis monitoring services added

This commit is contained in:
ajaysi
2025-11-11 15:57:45 +05:30
parent d99c7c83a7
commit 7191c7e7f0
81 changed files with 10860 additions and 1567 deletions

View File

@@ -3,6 +3,8 @@ Task Scheduler Package
Modular, pluggable scheduler for ALwrity tasks.
"""
from sqlalchemy.orm import Session
from .core.scheduler import TaskScheduler
from .core.executor_interface import TaskExecutor, TaskExecutionResult
from .core.exception_handler import (
@@ -11,8 +13,13 @@ from .core.exception_handler import (
)
from .executors.monitoring_task_executor import MonitoringTaskExecutor
from .executors.oauth_token_monitoring_executor import OAuthTokenMonitoringExecutor
from .executors.website_analysis_executor import WebsiteAnalysisExecutor
from .executors.gsc_insights_executor import GSCInsightsExecutor
from .executors.bing_insights_executor import BingInsightsExecutor
from .utils.task_loader import load_due_monitoring_tasks
from .utils.oauth_token_task_loader import load_due_oauth_token_monitoring_tasks
from .utils.website_analysis_task_loader import load_due_website_analysis_tasks
from .utils.platform_insights_task_loader import load_due_platform_insights_tasks
# Global scheduler instance (initialized on first access)
_scheduler_instance: TaskScheduler = None
@@ -47,6 +54,37 @@ def get_scheduler() -> TaskScheduler:
oauth_token_executor,
load_due_oauth_token_monitoring_tasks
)
# Register website analysis executor
website_analysis_executor = WebsiteAnalysisExecutor()
_scheduler_instance.register_executor(
'website_analysis',
website_analysis_executor,
load_due_website_analysis_tasks
)
# Register platform insights executors
# GSC insights executor
def load_due_gsc_insights_tasks(db: Session, user_id=None):
return load_due_platform_insights_tasks(db, user_id, platform='gsc')
gsc_insights_executor = GSCInsightsExecutor()
_scheduler_instance.register_executor(
'gsc_insights',
gsc_insights_executor,
load_due_gsc_insights_tasks
)
# Bing insights executor
def load_due_bing_insights_tasks(db: Session, user_id=None):
return load_due_platform_insights_tasks(db, user_id, platform='bing')
bing_insights_executor = BingInsightsExecutor()
_scheduler_instance.register_executor(
'bing_insights',
bing_insights_executor,
load_due_bing_insights_tasks
)
return _scheduler_instance
@@ -57,6 +95,9 @@ __all__ = [
'TaskExecutionResult',
'MonitoringTaskExecutor',
'OAuthTokenMonitoringExecutor',
'WebsiteAnalysisExecutor',
'GSCInsightsExecutor',
'BingInsightsExecutor',
'get_scheduler',
# Exception handling
'SchedulerExceptionHandler',

View File

@@ -10,6 +10,7 @@ from sqlalchemy.orm import Session
from services.database import get_db_session
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
from .exception_handler import DatabaseError
from .interval_manager import adjust_check_interval_if_needed
@@ -100,6 +101,7 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
logger.warning("\n".join(check_lines))
# Save check cycle event to database for historical tracking
event_log_id = None
try:
event_log = SchedulerEventLog(
event_type='check_cycle',
@@ -119,11 +121,63 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
}
)
db.add(event_log)
db.flush() # Flush to get the ID without committing
event_log_id = event_log.id
db.commit()
logger.debug(f"[Check Cycle] Saved event log with ID: {event_log_id}")
except Exception as e:
logger.warning(f"Failed to save check cycle event log: {e}")
logger.error(f"[Check Cycle] ❌ Failed to save check cycle event log: {e}", exc_info=True)
if db:
db.rollback()
# Continue execution even if event log save fails
# Update cumulative stats table (persistent across restarts)
try:
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
# Update cumulative metrics by adding this cycle's values
# Get current cycle values (incremental, not total)
cycle_tasks_found = cycle_summary.get('total_found', 0)
cycle_tasks_executed = cycle_summary.get('total_executed', 0)
cycle_tasks_failed = cycle_summary.get('total_failed', 0)
# Update cumulative totals (additive)
cumulative_stats.total_check_cycles += 1
cumulative_stats.cumulative_tasks_found += cycle_tasks_found
cumulative_stats.cumulative_tasks_executed += cycle_tasks_executed
cumulative_stats.cumulative_tasks_failed += cycle_tasks_failed
# Note: tasks_skipped in scheduler.stats is a running total, not per-cycle
# We track it as-is from scheduler.stats (it's already cumulative)
# This ensures we don't double-count skipped tasks
if cumulative_stats.cumulative_tasks_skipped is None:
cumulative_stats.cumulative_tasks_skipped = 0
# Update to current total from scheduler (which is already cumulative)
current_skipped = scheduler.stats.get('tasks_skipped', 0)
if current_skipped > cumulative_stats.cumulative_tasks_skipped:
cumulative_stats.cumulative_tasks_skipped = current_skipped
cumulative_stats.last_check_cycle_id = event_log_id
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
# Log at DEBUG level to avoid noise during normal operation
# This is expected behavior, not a warning
logger.debug(
f"[Check Cycle] Updated cumulative stats: "
f"cycles={cumulative_stats.total_check_cycles}, "
f"found={cumulative_stats.cumulative_tasks_found}, "
f"executed={cumulative_stats.cumulative_tasks_executed}, "
f"failed={cumulative_stats.cumulative_tasks_failed}"
)
except Exception as e:
logger.error(f"[Check Cycle] ❌ Failed to update cumulative stats: {e}", exc_info=True)
if db:
db.rollback()
# Log warning but continue - cumulative stats can be rebuilt from event logs
logger.warning(
"[Check Cycle] ⚠️ Cumulative stats update failed. "
"Stats can be rebuilt from event logs on next dashboard load."
)
# Update last_update timestamp for frontend polling
scheduler.stats['last_update'] = datetime.utcnow().isoformat()

View File

@@ -104,19 +104,16 @@ async def restore_oauth_monitoring_tasks(scheduler):
# Fallback to users with existing tasks only
total_created = 0
restoration_summary = [] # Collect summary for single log
for user_id in users_to_check:
try:
# Get connected platforms for this user
# Get connected platforms for this user (silent - no logging)
connected_platforms = get_connected_platforms(user_id)
logger.warning(
f"[OAuth Task Restoration] User {user_id}: "
f"Connected platforms: {connected_platforms}"
)
if not connected_platforms:
logger.debug(
f"[OAuth Task Restoration] No connected platforms for user {user_id}, skipping"
f"[OAuth Task Restoration] No connected platforms for user {user_id[:20]}..., skipping"
)
continue
@@ -134,11 +131,6 @@ async def restore_oauth_monitoring_tasks(scheduler):
]
if missing_platforms:
logger.warning(
f"[OAuth Task Restoration] ⚠️ User {user_id} has connected platforms "
f"{connected_platforms} but missing tasks for: {missing_platforms}"
)
# Create missing tasks
created = create_oauth_monitoring_tasks(
user_id=user_id,
@@ -147,15 +139,10 @@ async def restore_oauth_monitoring_tasks(scheduler):
)
total_created += len(created)
logger.warning(
f"[OAuth Task Restoration] ✅ Created {len(created)} missing OAuth tasks "
f"for user {user_id}, platforms: {missing_platforms}"
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ User {user_id} has all required tasks "
f"for connected platforms: {connected_platforms}"
# Collect summary info instead of logging immediately
platforms_str = ", ".join([p.upper() for p in missing_platforms])
restoration_summary.append(
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
)
except Exception as e:
@@ -173,16 +160,23 @@ async def restore_oauth_monitoring_tasks(scheduler):
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
# Single formatted summary log (similar to scheduler startup)
if total_created > 0:
summary_lines = "\n".join(restoration_summary[:5]) # Show first 5 users
if len(restoration_summary) > 5:
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
logger.warning(
f"[OAuth Task Restoration] ✅ Created {total_created} missing OAuth monitoring tasks. "
f"Final platform breakdown: {final_platform_summary}"
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
f" ├─ Tasks Created: {total_created}\n"
f" ├─ Users Processed: {len(users_to_check)}\n"
f" ├─ Platform Breakdown: {final_platform_summary}\n"
+ summary_lines
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks. "
f"Platform breakdown: {final_platform_summary}"
f"Checked {len(users_to_check)} users. Platform breakdown: {final_platform_summary}"
)
finally:

View File

@@ -0,0 +1,152 @@
"""
Platform Insights Task Restoration
Automatically creates missing platform insights tasks for users who have connected platforms
but don't have insights tasks created yet.
"""
from datetime import datetime, timedelta
from typing import List
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from models.platform_insights_monitoring_models import PlatformInsightsTask
from services.platform_insights_monitoring_service import create_platform_insights_task
from services.oauth_token_monitoring_service import get_connected_platforms
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
logger = get_service_logger("platform_insights_task_restoration")
async def restore_platform_insights_tasks(scheduler):
"""
Restore/create missing platform insights tasks for all users.
This checks all users who have connected platforms (GSC/Bing) and ensures they have
insights tasks created. Tasks are created for platforms that are:
- Connected (detected via get_connected_platforms or OAuth tasks)
- Missing insights tasks (no PlatformInsightsTask exists)
Args:
scheduler: TaskScheduler instance
"""
try:
logger.warning("[Platform Insights Restoration] Starting platform insights task restoration...")
db = get_db_session()
if not db:
logger.warning("[Platform Insights Restoration] Could not get database session")
return
try:
# Get all existing insights tasks to find unique user_ids
existing_tasks = db.query(PlatformInsightsTask).all()
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Get all OAuth tasks to find users with connected platforms
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
user_ids_with_oauth = set(task.user_id for task in oauth_tasks)
# Platforms that support insights (GSC and Bing only)
insights_platforms = ['gsc', 'bing']
# Get users who have OAuth tasks for GSC or Bing
users_to_check = set()
for task in oauth_tasks:
if task.platform in insights_platforms:
users_to_check.add(task.user_id)
logger.warning(
f"[Platform Insights Restoration] Found {len(existing_tasks)} existing insights tasks "
f"for {len(user_ids_with_tasks)} users. Checking {len(users_to_check)} users "
f"with GSC/Bing OAuth connections."
)
if not users_to_check:
logger.warning("[Platform Insights Restoration] No users with GSC/Bing connections found")
return
total_created = 0
restoration_summary = []
for user_id in users_to_check:
try:
# Get connected platforms for this user
connected_platforms = get_connected_platforms(user_id)
# Filter to only GSC and Bing
insights_connected = [p for p in connected_platforms if p in insights_platforms]
if not insights_connected:
logger.debug(
f"[Platform Insights Restoration] No GSC/Bing connections for user {user_id[:20]}..., skipping"
)
continue
# Check which platforms are missing insights tasks
existing_platforms = {
task.platform
for task in existing_tasks
if task.user_id == user_id
}
missing_platforms = [
platform
for platform in insights_connected
if platform not in existing_platforms
]
if missing_platforms:
# Create missing tasks for each platform
for platform in missing_platforms:
try:
# Don't fetch site_url here - it requires API calls
# The executor will fetch it when the task runs (weekly)
# This avoids API calls during restoration
result = create_platform_insights_task(
user_id=user_id,
platform=platform,
site_url=None, # Will be fetched by executor when task runs
db=db
)
if result.get('success'):
total_created += 1
restoration_summary.append(
f" ├─ User {user_id[:20]}...: {platform.upper()} task created"
)
else:
logger.debug(
f"[Platform Insights Restoration] Failed to create {platform} task "
f"for user {user_id}: {result.get('error')}"
)
except Exception as e:
logger.debug(
f"[Platform Insights Restoration] Error creating {platform} task "
f"for user {user_id}: {e}"
)
continue
except Exception as e:
logger.debug(
f"[Platform Insights Restoration] Error processing user {user_id}: {e}"
)
continue
# Log summary
if total_created > 0:
logger.warning(
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
"\n".join(restoration_summary)
)
else:
logger.warning(
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks."
)
finally:
db.close()
except Exception as e:
logger.error(f"[Platform Insights Restoration] Error during restoration: {e}", exc_info=True)

View File

@@ -26,6 +26,8 @@ from models.scheduler_models import SchedulerEventLog
from .interval_manager import determine_optimal_interval, adjust_check_interval_if_needed
from .job_restoration import restore_persona_jobs
from .oauth_task_restoration import restore_oauth_monitoring_tasks
from .website_analysis_task_restoration import restore_website_analysis_tasks
from .platform_insights_task_restoration import restore_platform_insights_tasks
from .check_cycle_handler import check_and_execute_due_tasks
from .task_execution_handler import execute_task_async
@@ -185,6 +187,15 @@ class TaskScheduler:
# Restore/create missing OAuth token monitoring tasks for connected platforms
await restore_oauth_monitoring_tasks(self)
# Restore/create missing website analysis tasks for users who completed onboarding
await restore_website_analysis_tasks(self)
# Restore/create missing platform insights tasks for users with connected GSC/Bing
await restore_platform_insights_tasks(self)
# Validate and rebuild cumulative stats if needed
await self._validate_and_rebuild_cumulative_stats()
# Get all scheduled APScheduler jobs (including one-time tasks)
all_jobs = self.scheduler.get_jobs()
registered_types = self.registry.get_registered_types()
@@ -260,27 +271,55 @@ class TaskScheduler:
f"tasks haven't been created. Error type: {type(e).__name__}"
)
# Get website analysis tasks count
website_analysis_tasks_count = 0
try:
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks_count = db.query(WebsiteAnalysisTask).filter(
WebsiteAnalysisTask.status == 'active'
).count()
except Exception as e:
logger.debug(f"Could not get website analysis tasks count: {e}")
# Get platform insights tasks count
platform_insights_tasks_count = 0
try:
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks_count = db.query(PlatformInsightsTask).filter(
PlatformInsightsTask.status == 'active'
).count()
except Exception as e:
logger.debug(f"Could not get platform insights tasks count: {e}")
# Calculate job counts
apscheduler_recurring = 1 # check_due_tasks
apscheduler_one_time = len(all_jobs) - 1
total_recurring = apscheduler_recurring + oauth_tasks_count
total_jobs = len(all_jobs) + oauth_tasks_count
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
# Build comprehensive startup log message
recurring_breakdown = f"check_due_tasks: {apscheduler_recurring}"
if oauth_tasks_count > 0:
recurring_breakdown += f", OAuth monitoring: {oauth_tasks_count}"
if website_analysis_tasks_count > 0:
recurring_breakdown += f", Website analysis: {website_analysis_tasks_count}"
if platform_insights_tasks_count > 0:
recurring_breakdown += f", Platform insights: {platform_insights_tasks_count}"
startup_lines = [
f"[Scheduler] ✅ Task Scheduler Started",
f" ├─ Check Interval: {initial_interval} minutes",
f" ├─ Registered Task Types: {len(registered_types)} ({', '.join(registered_types) if registered_types else 'none'})",
f" ├─ Active Strategies: {active_strategies}",
f" ├─ Total Scheduled Jobs: {total_jobs}",
f" ├─ Recurring Jobs: {total_recurring} (check_due_tasks: {apscheduler_recurring}, OAuth monitoring: {oauth_tasks_count})",
f" ├─ Recurring Jobs: {total_recurring} ({recurring_breakdown})",
f" └─ One-Time Jobs: {apscheduler_one_time}"
]
# Add APScheduler job details
if all_jobs:
for idx, job in enumerate(all_jobs):
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0
is_last = idx == len(all_jobs) - 1 and oauth_tasks_count == 0 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0
prefix = " └─" if is_last else " ├─"
next_run = job.next_run_time
trigger_type = type(job.trigger).__name__
@@ -338,7 +377,7 @@ class TaskScheduler:
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
for idx, task in enumerate(oauth_tasks):
is_last = idx == len(oauth_tasks) - 1 and len(all_jobs) == 0
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0
prefix = " └─" if is_last else " ├─"
try:
@@ -367,6 +406,71 @@ class TaskScheduler:
except Exception as e:
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
# Add website analysis tasks details
if website_analysis_tasks_count > 0:
try:
db = get_db_session()
if db:
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
for idx, task in enumerate(website_analysis_tasks):
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
frequency = f"Every {task.frequency_days} days"
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
startup_lines.append(
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
)
db.close()
except Exception as e:
logger.debug(f"Could not get website analysis task details: {e}")
# Add platform insights tasks details
if platform_insights_tasks_count > 0:
try:
db = get_db_session()
if db:
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks = db.query(PlatformInsightsTask).all()
for idx, task in enumerate(platform_insights_tasks):
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
platform_label = task.platform.upper() if task.platform else 'Unknown'
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
)
db.close()
except Exception as e:
logger.debug(f"Could not get platform insights task details: {e}")
# Log comprehensive startup information in single message
logger.warning("\n".join(startup_lines))
@@ -384,7 +488,9 @@ class TaskScheduler:
'total_jobs': total_jobs,
'recurring_jobs': total_recurring,
'one_time_jobs': apscheduler_one_time,
'oauth_monitoring_tasks': oauth_tasks_count
'oauth_monitoring_tasks': oauth_tasks_count,
'website_analysis_tasks': website_analysis_tasks_count,
'platform_insights_tasks': platform_insights_tasks_count
}
)
db.add(event_log)
@@ -533,6 +639,128 @@ class TaskScheduler:
except Exception as e:
logger.warning(f"Error triggering interval adjustment: {e}")
async def _validate_and_rebuild_cumulative_stats(self):
"""
Validate cumulative stats on scheduler startup and rebuild if needed.
This ensures cumulative stats are accurate after restarts.
"""
db = None
try:
db = get_db_session()
if not db:
logger.warning("[Scheduler] Could not get database session for cumulative stats validation")
return
try:
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
from models.scheduler_models import SchedulerEventLog
from sqlalchemy import func
# Get cumulative stats from persistent table
cumulative_stats = db.query(SchedulerCumulativeStats).filter(
SchedulerCumulativeStats.id == 1
).first()
# Count check_cycle events in database
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'check_cycle'
).scalar() or 0
if cumulative_stats:
# Validate: cumulative stats should match event log count
if cumulative_stats.total_check_cycles != check_cycle_count:
logger.warning(
f"[Scheduler] ⚠️ Cumulative stats validation failed on startup: "
f"cumulative_stats.total_check_cycles={cumulative_stats.total_check_cycles} "
f"vs event_logs.count={check_cycle_count}. "
f"Rebuilding cumulative stats from event logs..."
)
# Rebuild from event logs
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if result:
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
# Update cumulative stats
cumulative_stats.total_check_cycles = int(total_cycles)
cumulative_stats.cumulative_tasks_found = int(total_found)
cumulative_stats.cumulative_tasks_executed = int(total_executed)
cumulative_stats.cumulative_tasks_failed = int(total_failed)
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
logger.warning(
f"[Scheduler] ✅ Rebuilt cumulative stats on startup: "
f"cycles={total_cycles}, found={total_found}, "
f"executed={total_executed}, failed={total_failed}"
)
else:
logger.warning("[Scheduler] No check_cycle events found to rebuild from")
else:
logger.warning(
f"[Scheduler] ✅ Cumulative stats validated: "
f"{cumulative_stats.total_check_cycles} check cycles match event logs"
)
else:
# Cumulative stats table doesn't exist, create it from event logs
logger.warning(
"[Scheduler] Cumulative stats table not found. "
"Creating from event logs..."
)
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if result:
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
cumulative_stats.total_check_cycles = int(total_cycles)
cumulative_stats.cumulative_tasks_found = int(total_found)
cumulative_stats.cumulative_tasks_executed = int(total_executed)
cumulative_stats.cumulative_tasks_failed = int(total_failed)
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
logger.warning(
f"[Scheduler] ✅ Created cumulative stats from event logs: "
f"cycles={total_cycles}, found={total_found}, "
f"executed={total_executed}, failed={total_failed}"
)
except ImportError:
logger.warning(
"[Scheduler] Cumulative stats model not available. "
"Migration may not have been run yet. "
"Run: python backend/scripts/run_cumulative_stats_migration.py"
)
except Exception as e:
logger.error(f"[Scheduler] Error validating cumulative stats: {e}", exc_info=True)
finally:
if db:
db.close()
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
"""
Process due tasks for a specific task type.

View File

@@ -0,0 +1,193 @@
"""
Website Analysis Task Restoration
Automatically creates missing website analysis tasks for users who completed onboarding
but don't have monitoring tasks created yet.
"""
from typing import List
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
from services.website_analysis_monitoring_service import create_website_analysis_tasks
from models.onboarding import OnboardingSession
from sqlalchemy import or_
# Use service logger for consistent logging (WARNING level visible in production)
logger = get_service_logger("website_analysis_restoration")
async def restore_website_analysis_tasks(scheduler):
"""
Restore/create missing website analysis tasks for all users.
This checks all users who completed onboarding and ensures they have
website analysis tasks created. Tasks are created for:
- User's website (if analysis exists)
- All competitors (from onboarding step 3)
Args:
scheduler: TaskScheduler instance
"""
try:
logger.warning("[Website Analysis Restoration] Starting website analysis task restoration...")
db = get_db_session()
if not db:
logger.warning("[Website Analysis Restoration] Could not get database session")
return
try:
# Check if table exists (may not exist if migration hasn't run)
try:
existing_tasks = db.query(WebsiteAnalysisTask).all()
except Exception as table_error:
logger.error(
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist: {table_error}. "
f"Please run database migration: create_website_analysis_monitoring_tables.sql"
)
return
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Log existing tasks breakdown by type
existing_by_type = {}
for task in existing_tasks:
existing_by_type[task.task_type] = existing_by_type.get(task.task_type, 0) + 1
type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(existing_by_type.items())])
logger.warning(
f"[Website Analysis Restoration] Found {len(existing_tasks)} existing website analysis tasks "
f"for {len(user_ids_with_tasks)} users. Types: {type_summary}"
)
# Check users who already have at least one website analysis task
users_to_check = list(user_ids_with_tasks)
# Also query all users from onboarding who completed step 2 (website analysis)
# to catch users who completed onboarding but tasks weren't created
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
# This matches the logic used in home page redirect and persona generation checks
try:
from services.onboarding.progress_service import get_onboarding_progress_service
from models.onboarding import OnboardingSession
from sqlalchemy import or_
# Get onboarding progress service (same as used throughout the app)
progress_service = get_onboarding_progress_service()
# Query all sessions and filter using the same completion logic as the service
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
completed_sessions = db.query(OnboardingSession).filter(
or_(
OnboardingSession.current_step >= 6,
OnboardingSession.progress >= 100.0
)
).all()
# Validate using the service method for consistency
onboarding_user_ids = set()
for session in completed_sessions:
# Use the same service method as the rest of the app
status = progress_service.get_onboarding_status(session.user_id)
if status.get('is_completed', False):
onboarding_user_ids.add(session.user_id)
all_user_ids = users_to_check.copy()
# Add users from onboarding who might not have tasks yet
for user_id in onboarding_user_ids:
if user_id not in all_user_ids:
all_user_ids.append(user_id)
users_to_check = all_user_ids
logger.warning(
f"[Website Analysis Restoration] Checking {len(users_to_check)} users "
f"({len(user_ids_with_tasks)} with existing tasks, "
f"{len(onboarding_user_ids)} from onboarding sessions, "
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
)
except Exception as e:
logger.warning(f"[Website Analysis Restoration] Could not query onboarding users: {e}")
# Fallback to users with existing tasks only
users_to_check = list(user_ids_with_tasks)
total_created = 0
users_processed = 0
for user_id in users_to_check:
try:
users_processed += 1
# Check if user already has tasks
existing_user_tasks = [
task for task in existing_tasks
if task.user_id == user_id
]
if existing_user_tasks:
logger.debug(
f"[Website Analysis Restoration] User {user_id} already has "
f"{len(existing_user_tasks)} website analysis tasks, skipping"
)
continue
logger.warning(
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
f"but has no website analysis tasks. Creating tasks..."
)
# Create missing tasks
result = create_website_analysis_tasks(user_id=user_id, db=db)
if result.get('success'):
tasks_count = result.get('tasks_created', 0)
total_created += tasks_count
logger.warning(
f"[Website Analysis Restoration] ✅ Created {tasks_count} website analysis tasks "
f"for user {user_id}"
)
else:
error = result.get('error', 'Unknown error')
logger.warning(
f"[Website Analysis Restoration] ⚠️ Could not create tasks for user {user_id}: {error}"
)
except Exception as e:
logger.warning(
f"[Website Analysis Restoration] Error checking/creating tasks for user {user_id}: {e}",
exc_info=True
)
continue
# Final summary log
final_existing_tasks = db.query(WebsiteAnalysisTask).all()
final_by_type = {}
for task in final_existing_tasks:
final_by_type[task.task_type] = final_by_type.get(task.task_type, 0) + 1
final_type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(final_by_type.items())])
if total_created > 0:
logger.warning(
f"[Website Analysis Restoration] ✅ Created {total_created} missing website analysis tasks. "
f"Processed {users_processed} users. Final type breakdown: {final_type_summary}"
)
else:
logger.warning(
f"[Website Analysis Restoration] ✅ All users have required website analysis tasks. "
f"Checked {users_processed} users, found {len(existing_tasks)} existing tasks. "
f"Type breakdown: {final_type_summary}"
)
finally:
db.close()
except Exception as e:
logger.error(
f"[Website Analysis Restoration] Error restoring website analysis tasks: {e}",
exc_info=True
)

View File

@@ -0,0 +1,298 @@
"""
Bing Insights Task Executor
Handles execution of Bing insights fetch tasks for connected platforms.
"""
import logging
import os
import time
from datetime import datetime, timedelta
from typing import Dict, Any, Optional
from sqlalchemy.orm import Session
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
from services.bing_analytics_storage_service import BingAnalyticsStorageService
from services.integrations.bing_oauth import BingOAuthService
from utils.logger_utils import get_service_logger
logger = get_service_logger("bing_insights_executor")
class BingInsightsExecutor(TaskExecutor):
"""
Executor for Bing insights fetch tasks.
Handles:
- Fetching Bing insights data weekly
- On first run: Loads existing cached data
- On subsequent runs: Fetches fresh data from Bing API
- Logging results and updating task status
"""
def __init__(self):
self.logger = logger
self.exception_handler = SchedulerExceptionHandler()
database_url = os.getenv('DATABASE_URL', 'sqlite:///alwrity.db')
self.storage_service = BingAnalyticsStorageService(database_url)
self.bing_oauth = BingOAuthService()
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
"""
Execute a Bing insights fetch task.
Args:
task: PlatformInsightsTask instance
db: Database session
Returns:
TaskExecutionResult
"""
start_time = time.time()
user_id = task.user_id
site_url = task.site_url
try:
self.logger.info(
f"Executing Bing insights fetch: task_id={task.id} | "
f"user_id={user_id} | site_url={site_url}"
)
# Create execution log
execution_log = PlatformInsightsExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='running'
)
db.add(execution_log)
db.flush()
# Fetch insights
result = await self._fetch_insights(task, db)
# Update execution log
execution_time_ms = int((time.time() - start_time) * 1000)
execution_log.status = 'success' if result.success else 'failed'
execution_log.result_data = result.result_data
execution_log.error_message = result.error_message
execution_log.execution_time_ms = execution_time_ms
execution_log.data_source = result.result_data.get('data_source') if result.success else None
# Update task based on result
task.last_check = datetime.utcnow()
if result.success:
task.last_success = datetime.utcnow()
task.status = 'active'
task.failure_reason = None
# Schedule next check (7 days from now)
task.next_check = self.calculate_next_execution(
task=task,
frequency='Weekly',
last_execution=task.last_check
)
else:
task.last_failure = datetime.utcnow()
task.failure_reason = result.error_message
task.status = 'failed'
# Schedule retry in 1 day
task.next_check = datetime.utcnow() + timedelta(days=1)
task.updated_at = datetime.utcnow()
db.commit()
return result
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
# Set database session for exception handler
self.exception_handler.db = db
error_result = self.exception_handler.handle_task_execution_error(
task=task,
error=e,
execution_time_ms=execution_time_ms,
context="Bing insights fetch"
)
# Update task
task.last_check = datetime.utcnow()
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.status = 'failed'
task.next_check = datetime.utcnow() + timedelta(days=1)
task.updated_at = datetime.utcnow()
db.commit()
return error_result
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
"""
Fetch Bing insights data.
On first run (no last_success), loads cached data.
On subsequent runs, fetches fresh data from API.
"""
user_id = task.user_id
site_url = task.site_url
try:
# Check if this is first run (no previous success)
is_first_run = task.last_success is None
if is_first_run:
# First run: Try to load from cache
self.logger.info(f"First run for Bing insights task {task.id} - loading cached data")
cached_data = self._load_cached_data(user_id, site_url)
if cached_data:
self.logger.info(f"Loaded cached Bing data for user {user_id}")
return TaskExecutionResult(
success=True,
result_data={
'data_source': 'cached',
'insights': cached_data,
'message': 'Loaded from cached data (first run)'
}
)
else:
# No cached data - try to fetch from API
self.logger.info(f"No cached data found, fetching from Bing API")
return await self._fetch_fresh_data(user_id, site_url)
else:
# Subsequent run: Always fetch fresh data
self.logger.info(f"Subsequent run for Bing insights task {task.id} - fetching fresh data")
return await self._fetch_fresh_data(user_id, site_url)
except Exception as e:
self.logger.error(f"Error fetching Bing insights for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"Failed to fetch Bing insights: {str(e)}",
result_data={'error': str(e)}
)
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
"""Load most recent cached Bing data from database."""
try:
# Get analytics summary from storage service
summary = self.storage_service.get_analytics_summary(
user_id=user_id,
site_url=site_url or '',
days=30
)
if summary and isinstance(summary, dict):
self.logger.info(f"Found cached Bing data for user {user_id}")
return summary
return None
except Exception as e:
self.logger.warning(f"Error loading cached Bing data: {e}")
return None
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
"""Fetch fresh Bing insights from API."""
try:
# Check if user has active tokens
token_status = self.bing_oauth.get_user_token_status(user_id)
if not token_status.get('has_active_tokens'):
return TaskExecutionResult(
success=False,
error_message="Bing Webmaster tokens not available or expired",
result_data={'error': 'No active tokens'}
)
# Get user's sites
sites = self.bing_oauth.get_user_sites(user_id)
if not sites:
return TaskExecutionResult(
success=False,
error_message="No Bing Webmaster sites found",
result_data={'error': 'No sites found'}
)
# Use provided site_url or first site
if not site_url:
site_url = sites[0].get('Url', '') if isinstance(sites[0], dict) else sites[0]
# Get active token
active_tokens = token_status.get('active_tokens', [])
if not active_tokens:
return TaskExecutionResult(
success=False,
error_message="No active Bing Webmaster tokens",
result_data={'error': 'No tokens'}
)
# For now, use stored analytics data (Bing API integration can be added later)
# This ensures we have data available even if the API class doesn't exist yet
summary = self.storage_service.get_analytics_summary(user_id, site_url, days=30)
if summary and isinstance(summary, dict):
# Format insights data from stored analytics
insights_data = {
'site_url': site_url,
'date_range': {
'start': (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'),
'end': datetime.now().strftime('%Y-%m-%d')
},
'summary': summary.get('summary', {}),
'fetched_at': datetime.utcnow().isoformat()
}
self.logger.info(
f"Successfully loaded Bing insights from storage for user {user_id}, site {site_url}"
)
return TaskExecutionResult(
success=True,
result_data={
'data_source': 'storage',
'insights': insights_data,
'message': 'Loaded from stored analytics data'
}
)
else:
# No stored data available
return TaskExecutionResult(
success=False,
error_message="No Bing analytics data available. Data will be collected during next onboarding refresh.",
result_data={'error': 'No stored data available'}
)
except Exception as e:
self.logger.error(f"Error fetching fresh Bing data: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"API fetch failed: {str(e)}",
result_data={'error': str(e)}
)
def calculate_next_execution(
self,
task: PlatformInsightsTask,
frequency: str,
last_execution: Optional[datetime] = None
) -> datetime:
"""
Calculate next execution time based on frequency.
For platform insights, frequency is always 'Weekly' (7 days).
"""
if last_execution is None:
last_execution = datetime.utcnow()
if frequency == 'Weekly':
return last_execution + timedelta(days=7)
elif frequency == 'Daily':
return last_execution + timedelta(days=1)
else:
# Default to weekly
return last_execution + timedelta(days=7)

View File

@@ -0,0 +1,307 @@
"""
GSC Insights Task Executor
Handles execution of GSC insights fetch tasks for connected platforms.
"""
import logging
import os
import time
import json
from datetime import datetime, timedelta
from typing import Dict, Any, Optional
from sqlalchemy.orm import Session
import sqlite3
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
from services.gsc_service import GSCService
from utils.logger_utils import get_service_logger
logger = get_service_logger("gsc_insights_executor")
class GSCInsightsExecutor(TaskExecutor):
"""
Executor for GSC insights fetch tasks.
Handles:
- Fetching GSC insights data weekly
- On first run: Loads existing cached data
- On subsequent runs: Fetches fresh data from GSC API
- Logging results and updating task status
"""
def __init__(self):
self.logger = logger
self.exception_handler = SchedulerExceptionHandler()
self.gsc_service = GSCService()
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
"""
Execute a GSC insights fetch task.
Args:
task: PlatformInsightsTask instance
db: Database session
Returns:
TaskExecutionResult
"""
start_time = time.time()
user_id = task.user_id
site_url = task.site_url
try:
self.logger.info(
f"Executing GSC insights fetch: task_id={task.id} | "
f"user_id={user_id} | site_url={site_url}"
)
# Create execution log
execution_log = PlatformInsightsExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='running'
)
db.add(execution_log)
db.flush()
# Fetch insights
result = await self._fetch_insights(task, db)
# Update execution log
execution_time_ms = int((time.time() - start_time) * 1000)
execution_log.status = 'success' if result.success else 'failed'
execution_log.result_data = result.result_data
execution_log.error_message = result.error_message
execution_log.execution_time_ms = execution_time_ms
execution_log.data_source = result.result_data.get('data_source') if result.success else None
# Update task based on result
task.last_check = datetime.utcnow()
if result.success:
task.last_success = datetime.utcnow()
task.status = 'active'
task.failure_reason = None
# Schedule next check (7 days from now)
task.next_check = self.calculate_next_execution(
task=task,
frequency='Weekly',
last_execution=task.last_check
)
else:
task.last_failure = datetime.utcnow()
task.failure_reason = result.error_message
task.status = 'failed'
# Schedule retry in 1 day
task.next_check = datetime.utcnow() + timedelta(days=1)
task.updated_at = datetime.utcnow()
db.commit()
return result
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
# Set database session for exception handler
self.exception_handler.db = db
error_result = self.exception_handler.handle_task_execution_error(
task=task,
error=e,
execution_time_ms=execution_time_ms,
context="GSC insights fetch"
)
# Update task
task.last_check = datetime.utcnow()
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.status = 'failed'
task.next_check = datetime.utcnow() + timedelta(days=1)
task.updated_at = datetime.utcnow()
db.commit()
return error_result
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
"""
Fetch GSC insights data.
On first run (no last_success), loads cached data.
On subsequent runs, fetches fresh data from API.
"""
user_id = task.user_id
site_url = task.site_url
try:
# Check if this is first run (no previous success)
is_first_run = task.last_success is None
if is_first_run:
# First run: Try to load from cache
self.logger.info(f"First run for GSC insights task {task.id} - loading cached data")
cached_data = self._load_cached_data(user_id, site_url)
if cached_data:
self.logger.info(f"Loaded cached GSC data for user {user_id}")
return TaskExecutionResult(
success=True,
result_data={
'data_source': 'cached',
'insights': cached_data,
'message': 'Loaded from cached data (first run)'
}
)
else:
# No cached data - try to fetch from API
self.logger.info(f"No cached data found, fetching from GSC API")
return await self._fetch_fresh_data(user_id, site_url)
else:
# Subsequent run: Always fetch fresh data
self.logger.info(f"Subsequent run for GSC insights task {task.id} - fetching fresh data")
return await self._fetch_fresh_data(user_id, site_url)
except Exception as e:
self.logger.error(f"Error fetching GSC insights for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"Failed to fetch GSC insights: {str(e)}",
result_data={'error': str(e)}
)
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
"""Load most recent cached GSC data from database."""
try:
db_path = self.gsc_service.db_path
with sqlite3.connect(db_path) as conn:
cursor = conn.cursor()
# Find most recent cached data
if site_url:
cursor.execute('''
SELECT data_json, created_at
FROM gsc_data_cache
WHERE user_id = ? AND site_url = ? AND data_type = 'analytics'
ORDER BY created_at DESC
LIMIT 1
''', (user_id, site_url))
else:
cursor.execute('''
SELECT data_json, created_at
FROM gsc_data_cache
WHERE user_id = ? AND data_type = 'analytics'
ORDER BY created_at DESC
LIMIT 1
''', (user_id,))
result = cursor.fetchone()
if result:
data_json, created_at = result
insights_data = json.loads(data_json) if isinstance(data_json, str) else data_json
self.logger.info(
f"Found cached GSC data from {created_at} for user {user_id}"
)
return insights_data
return None
except Exception as e:
self.logger.warning(f"Error loading cached GSC data: {e}")
return None
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
"""Fetch fresh GSC insights from API."""
try:
# If no site_url, get first site
if not site_url:
sites = self.gsc_service.get_site_list(user_id)
if not sites:
return TaskExecutionResult(
success=False,
error_message="No GSC sites found for user",
result_data={'error': 'No sites found'}
)
site_url = sites[0]['siteUrl']
# Get analytics for last 30 days
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
# Fetch search analytics
search_analytics = self.gsc_service.get_search_analytics(
user_id=user_id,
site_url=site_url,
start_date=start_date,
end_date=end_date
)
if 'error' in search_analytics:
return TaskExecutionResult(
success=False,
error_message=search_analytics.get('error', 'Unknown error'),
result_data=search_analytics
)
# Format insights data
insights_data = {
'site_url': site_url,
'date_range': {
'start': start_date,
'end': end_date
},
'overall_metrics': search_analytics.get('overall_metrics', {}),
'query_data': search_analytics.get('query_data', {}),
'fetched_at': datetime.utcnow().isoformat()
}
self.logger.info(
f"Successfully fetched GSC insights for user {user_id}, site {site_url}"
)
return TaskExecutionResult(
success=True,
result_data={
'data_source': 'api',
'insights': insights_data,
'message': 'Fetched fresh data from GSC API'
}
)
except Exception as e:
self.logger.error(f"Error fetching fresh GSC data: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"API fetch failed: {str(e)}",
result_data={'error': str(e)}
)
def calculate_next_execution(
self,
task: PlatformInsightsTask,
frequency: str,
last_execution: Optional[datetime] = None
) -> datetime:
"""
Calculate next execution time based on frequency.
For platform insights, frequency is always 'Weekly' (7 days).
"""
if last_execution is None:
last_execution = datetime.utcnow()
if frequency == 'Weekly':
return last_execution + timedelta(days=7)
elif frequency == 'Daily':
return last_execution + timedelta(days=1)
else:
# Default to weekly
return last_execution + timedelta(days=7)

View File

@@ -197,7 +197,7 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
- GSC: gsc_credentials table (via GSCService)
- Bing: bing_oauth_tokens table (via BingOAuthService)
- WordPress: wordpress_oauth_tokens table (via WordPressOAuthService)
- Wix: Currently in frontend sessionStorage (backend storage TODO)
- Wix: wix_oauth_tokens table (via WixOAuthService)
Args:
task: OAuthTokenMonitoringTask instance

View File

@@ -0,0 +1,458 @@
"""
Website Analysis Task Executor
Handles execution of website analysis tasks for user and competitor websites.
"""
import logging
import os
import time
import asyncio
from datetime import datetime, timedelta
from typing import Dict, Any, Optional
from sqlalchemy.orm import Session
from functools import partial
from urllib.parse import urlparse
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
from models.website_analysis_monitoring_models import WebsiteAnalysisTask, WebsiteAnalysisExecutionLog
from models.onboarding import CompetitorAnalysis, OnboardingSession
from utils.logger_utils import get_service_logger
# Import website analysis services
from services.component_logic.web_crawler_logic import WebCrawlerLogic
from services.component_logic.style_detection_logic import StyleDetectionLogic
from services.website_analysis_service import WebsiteAnalysisService
logger = get_service_logger("website_analysis_executor")
class WebsiteAnalysisExecutor(TaskExecutor):
"""
Executor for website analysis tasks.
Handles:
- Analyzing user's website (updates existing WebsiteAnalysis record)
- Analyzing competitor websites (stores in CompetitorAnalysis table)
- Logging results and updating task status
- Scheduling next execution based on frequency_days
"""
def __init__(self):
self.logger = logger
self.exception_handler = SchedulerExceptionHandler()
self.crawler_logic = WebCrawlerLogic()
self.style_logic = StyleDetectionLogic()
async def execute_task(
self,
task: WebsiteAnalysisTask,
db: Session
) -> TaskExecutionResult:
"""
Execute a website analysis task.
This performs complete website analysis using the same logic as
/api/onboarding/style-detection/complete endpoint.
Args:
task: WebsiteAnalysisTask instance
db: Database session
Returns:
TaskExecutionResult
"""
start_time = time.time()
user_id = task.user_id
website_url = task.website_url
task_type = task.task_type
try:
self.logger.info(
f"Executing website analysis: task_id={task.id} | "
f"user_id={user_id} | url={website_url} | type={task_type}"
)
# Create execution log
execution_log = WebsiteAnalysisExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='running'
)
db.add(execution_log)
db.flush()
# Perform website analysis
result = await self._perform_website_analysis(
website_url=website_url,
user_id=user_id,
task_type=task_type,
task=task,
db=db
)
# Update execution log
execution_time_ms = int((time.time() - start_time) * 1000)
execution_log.status = 'success' if result.success else 'failed'
execution_log.result_data = result.result_data
execution_log.error_message = result.error_message
execution_log.execution_time_ms = execution_time_ms
# Update task based on result
task.last_check = datetime.utcnow()
task.updated_at = datetime.utcnow()
if result.success:
task.last_success = datetime.utcnow()
task.status = 'active'
task.failure_reason = None
# Schedule next check based on frequency_days
task.next_check = self.calculate_next_execution(
task=task,
frequency='Custom',
last_execution=task.last_check,
custom_days=task.frequency_days
)
# Commit all changes to database
db.commit()
self.logger.info(
f"Website analysis completed successfully for task {task.id}. "
f"Next check scheduled for {task.next_check}"
)
return result
else:
task.last_failure = datetime.utcnow()
task.failure_reason = result.error_message
task.status = 'failed'
# Do NOT update next_check - wait for manual retry
# Commit all changes to database
db.commit()
self.logger.warning(
f"Website analysis failed for task {task.id}. "
f"Error: {result.error_message}. Waiting for manual retry."
)
return result
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
# Set database session for exception handler
self.exception_handler.db = db
# Create structured error
error = TaskExecutionError(
message=f"Error executing website analysis task {task.id}: {str(e)}",
user_id=user_id,
task_id=task.id,
task_type="website_analysis",
execution_time_ms=execution_time_ms,
context={
"website_url": website_url,
"task_type": task_type,
"user_id": user_id
},
original_error=e
)
# Handle exception with structured logging
self.exception_handler.handle_exception(error)
# Update execution log with error
try:
execution_log = WebsiteAnalysisExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='failed',
error_message=str(e),
execution_time_ms=execution_time_ms,
result_data={
"error_type": error.error_type.value,
"severity": error.severity.value,
"context": error.context
}
)
db.add(execution_log)
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.status = 'failed'
task.last_check = datetime.utcnow()
task.updated_at = datetime.utcnow()
# Do NOT update next_check - wait for manual retry
db.commit()
except Exception as commit_error:
db_error = DatabaseError(
message=f"Error saving execution log: {str(commit_error)}",
user_id=user_id,
task_id=task.id,
original_error=commit_error
)
self.exception_handler.handle_exception(db_error)
db.rollback()
return TaskExecutionResult(
success=False,
error_message=str(e),
execution_time_ms=execution_time_ms,
retryable=True
)
async def _perform_website_analysis(
self,
website_url: str,
user_id: str,
task_type: str,
task: WebsiteAnalysisTask,
db: Session
) -> TaskExecutionResult:
"""
Perform website analysis using existing service logic.
Reuses the same logic as /api/onboarding/style-detection/complete.
"""
try:
# Step 1: Crawl website content
self.logger.info(f"Crawling website: {website_url}")
crawl_result = await self.crawler_logic.crawl_website(website_url)
if not crawl_result.get('success'):
error_msg = crawl_result.get('error', 'Crawling failed')
self.logger.error(f"Crawling failed for {website_url}: {error_msg}")
return TaskExecutionResult(
success=False,
error_message=f"Crawling failed: {error_msg}",
result_data={'crawl_result': crawl_result},
retryable=True
)
# Step 2: Run style analysis and patterns analysis in parallel
self.logger.info(f"Running style analysis for {website_url}")
async def run_style_analysis():
"""Run style analysis in executor"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
None,
partial(self.style_logic.analyze_content_style, crawl_result['content'])
)
async def run_patterns_analysis():
"""Run patterns analysis in executor"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
None,
partial(self.style_logic.analyze_style_patterns, crawl_result['content'])
)
# Execute style and patterns analysis in parallel
style_analysis, patterns_result = await asyncio.gather(
run_style_analysis(),
run_patterns_analysis(),
return_exceptions=True
)
# Check for exceptions
if isinstance(style_analysis, Exception):
self.logger.error(f"Style analysis exception: {style_analysis}")
return TaskExecutionResult(
success=False,
error_message=f"Style analysis failed: {str(style_analysis)}",
retryable=True
)
if isinstance(patterns_result, Exception):
self.logger.warning(f"Patterns analysis exception: {patterns_result}")
patterns_result = None
# Step 3: Generate style guidelines
style_guidelines = None
if style_analysis and style_analysis.get('success'):
loop = asyncio.get_event_loop()
guidelines_result = await loop.run_in_executor(
None,
partial(self.style_logic.generate_style_guidelines, style_analysis.get('analysis', {}))
)
if guidelines_result and guidelines_result.get('success'):
style_guidelines = guidelines_result.get('guidelines')
# Prepare analysis data
analysis_data = {
'crawl_result': crawl_result,
'style_analysis': style_analysis.get('analysis') if style_analysis and style_analysis.get('success') else None,
'style_patterns': patterns_result if patterns_result and not isinstance(patterns_result, Exception) else None,
'style_guidelines': style_guidelines,
}
# Step 4: Store results based on task type
if task_type == 'user_website':
# Update existing WebsiteAnalysis record
await self._update_user_website_analysis(
user_id=user_id,
website_url=website_url,
analysis_data=analysis_data,
db=db
)
elif task_type == 'competitor':
# Store in CompetitorAnalysis table
await self._store_competitor_analysis(
user_id=user_id,
competitor_url=website_url,
competitor_id=task.competitor_id,
analysis_data=analysis_data,
db=db
)
self.logger.info(f"Website analysis completed successfully for {website_url}")
return TaskExecutionResult(
success=True,
result_data=analysis_data,
retryable=False
)
except Exception as e:
self.logger.error(f"Error performing website analysis: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=str(e),
retryable=True
)
async def _update_user_website_analysis(
self,
user_id: str,
website_url: str,
analysis_data: Dict[str, Any],
db: Session
):
"""Update existing WebsiteAnalysis record for user's website."""
try:
# Convert Clerk user ID to integer (same as component_logic.py)
# Use the same conversion logic as the website analysis API
import hashlib
user_id_int = int(hashlib.sha256(user_id.encode()).hexdigest()[:15], 16)
# Use WebsiteAnalysisService to update
analysis_service = WebsiteAnalysisService(db)
# Prepare data in format expected by save_analysis
response_data = {
'crawl_result': analysis_data.get('crawl_result'),
'style_analysis': analysis_data.get('style_analysis'),
'style_patterns': analysis_data.get('style_patterns'),
'style_guidelines': analysis_data.get('style_guidelines'),
}
# Save/update analysis
analysis_id = analysis_service.save_analysis(
session_id=user_id_int,
website_url=website_url,
analysis_data=response_data
)
if analysis_id:
self.logger.info(f"Updated user website analysis for {website_url} (analysis_id: {analysis_id})")
else:
self.logger.warning(f"Failed to update user website analysis for {website_url}")
except Exception as e:
self.logger.error(f"Error updating user website analysis: {e}", exc_info=True)
raise
async def _store_competitor_analysis(
self,
user_id: str,
competitor_url: str,
competitor_id: Optional[str],
analysis_data: Dict[str, Any],
db: Session
):
"""Store competitor analysis in CompetitorAnalysis table."""
try:
# Get onboarding session for user
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).first()
if not session:
raise ValueError(f"No onboarding session found for user {user_id}")
# Extract domain from URL
parsed_url = urlparse(competitor_url)
competitor_domain = parsed_url.netloc or competitor_id
# Check if analysis already exists for this competitor
existing = db.query(CompetitorAnalysis).filter(
CompetitorAnalysis.session_id == session.id,
CompetitorAnalysis.competitor_url == competitor_url
).first()
if existing:
# Update existing analysis
existing.analysis_data = analysis_data
existing.analysis_date = datetime.utcnow()
existing.status = 'completed'
existing.error_message = None
existing.warning_message = None
existing.updated_at = datetime.utcnow()
self.logger.info(f"Updated competitor analysis for {competitor_url}")
else:
# Create new analysis
competitor_analysis = CompetitorAnalysis(
session_id=session.id,
competitor_url=competitor_url,
competitor_domain=competitor_domain,
analysis_data=analysis_data,
status='completed',
analysis_date=datetime.utcnow()
)
db.add(competitor_analysis)
self.logger.info(f"Created new competitor analysis for {competitor_url}")
db.commit()
except Exception as e:
db.rollback()
self.logger.error(f"Error storing competitor analysis: {e}", exc_info=True)
raise
def calculate_next_execution(
self,
task: WebsiteAnalysisTask,
frequency: str,
last_execution: Optional[datetime] = None,
custom_days: Optional[int] = None
) -> datetime:
"""
Calculate next execution time based on frequency or custom days.
Args:
task: WebsiteAnalysisTask instance
frequency: Frequency string ('Custom' for website analysis)
last_execution: Last execution datetime (defaults to task.last_check or now)
custom_days: Custom number of days (from task.frequency_days)
Returns:
Next execution datetime
"""
if last_execution is None:
last_execution = task.last_check if task.last_check else datetime.utcnow()
# Use custom_days if provided, otherwise use task.frequency_days
days = custom_days if custom_days is not None else task.frequency_days
if frequency == 'Custom' and days:
return last_execution + timedelta(days=days)
else:
# Default to task's frequency_days
self.logger.warning(
f"Unknown frequency '{frequency}' for website analysis task {task.id}. "
f"Using frequency_days={task.frequency_days}."
)
return last_execution + timedelta(days=task.frequency_days)

View File

@@ -0,0 +1,60 @@
"""
Platform Insights Task Loader
Functions to load due platform insights tasks from database.
"""
from datetime import datetime
from typing import List, Optional, Union
from sqlalchemy.orm import Session
from sqlalchemy import and_, or_
from models.platform_insights_monitoring_models import PlatformInsightsTask
def load_due_platform_insights_tasks(
db: Session,
user_id: Optional[Union[str, int]] = None,
platform: Optional[str] = None
) -> List[PlatformInsightsTask]:
"""
Load all platform insights tasks that are due for execution.
Criteria:
- status == 'active' (only check active tasks)
- next_check <= now (or is None for first execution)
- Optional: user_id filter for specific user
- Optional: platform filter ('gsc' or 'bing')
Args:
db: Database session
user_id: Optional user ID (Clerk string) to filter tasks
platform: Optional platform filter ('gsc' or 'bing')
Returns:
List of due PlatformInsightsTask instances
"""
now = datetime.utcnow()
# Build query for due tasks
query = db.query(PlatformInsightsTask).filter(
and_(
PlatformInsightsTask.status == 'active',
or_(
PlatformInsightsTask.next_check <= now,
PlatformInsightsTask.next_check.is_(None)
)
)
)
# Apply user filter if provided
if user_id is not None:
query = query.filter(PlatformInsightsTask.user_id == str(user_id))
# Apply platform filter if provided
if platform is not None:
query = query.filter(PlatformInsightsTask.platform == platform)
tasks = query.all()
return tasks

View File

@@ -0,0 +1,54 @@
"""
Website Analysis Task Loader
Functions to load due website analysis tasks from database.
"""
from datetime import datetime
from typing import List, Optional, Union
from sqlalchemy.orm import Session
from sqlalchemy import and_, or_
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
def load_due_website_analysis_tasks(
db: Session,
user_id: Optional[Union[str, int]] = None
) -> List[WebsiteAnalysisTask]:
"""
Load all website analysis tasks that are due for execution.
Criteria:
- status == 'active' (only check active tasks)
- next_check <= now (or is None for first execution)
- Optional: user_id filter for specific user (for user isolation)
User isolation is enforced through filtering by user_id when provided.
If no user_id is provided, loads tasks for all users (for system-wide monitoring).
Args:
db: Database session
user_id: Optional user ID (Clerk string) to filter tasks (if None, loads all users' tasks)
Returns:
List of due WebsiteAnalysisTask instances
"""
now = datetime.utcnow()
# Build query for due tasks
query = db.query(WebsiteAnalysisTask).filter(
and_(
WebsiteAnalysisTask.status == 'active',
or_(
WebsiteAnalysisTask.next_check <= now,
WebsiteAnalysisTask.next_check.is_(None)
)
)
)
# Apply user filter if provided (for user isolation)
if user_id is not None:
query = query.filter(WebsiteAnalysisTask.user_id == str(user_id))
return query.all()