Base code
This commit is contained in:
111
backend/services/scheduler/__init__.py
Normal file
111
backend/services/scheduler/__init__.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
Task Scheduler Package
|
||||
Modular, pluggable scheduler for ALwrity tasks.
|
||||
"""
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .core.scheduler import TaskScheduler
|
||||
from .core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from .core.exception_handler import (
|
||||
SchedulerExceptionHandler, SchedulerException, SchedulerErrorType, SchedulerErrorSeverity,
|
||||
TaskExecutionError, DatabaseError, TaskLoaderError, SchedulerConfigError
|
||||
)
|
||||
from .executors.monitoring_task_executor import MonitoringTaskExecutor
|
||||
from .executors.oauth_token_monitoring_executor import OAuthTokenMonitoringExecutor
|
||||
from .executors.website_analysis_executor import WebsiteAnalysisExecutor
|
||||
from .executors.gsc_insights_executor import GSCInsightsExecutor
|
||||
from .executors.bing_insights_executor import BingInsightsExecutor
|
||||
from .utils.task_loader import load_due_monitoring_tasks
|
||||
from .utils.oauth_token_task_loader import load_due_oauth_token_monitoring_tasks
|
||||
from .utils.website_analysis_task_loader import load_due_website_analysis_tasks
|
||||
from .utils.platform_insights_task_loader import load_due_platform_insights_tasks
|
||||
|
||||
# Global scheduler instance (initialized on first access)
|
||||
_scheduler_instance: TaskScheduler = None
|
||||
|
||||
|
||||
def get_scheduler() -> TaskScheduler:
|
||||
"""
|
||||
Get global scheduler instance (singleton pattern).
|
||||
|
||||
Returns:
|
||||
TaskScheduler instance
|
||||
"""
|
||||
global _scheduler_instance
|
||||
if _scheduler_instance is None:
|
||||
_scheduler_instance = TaskScheduler(
|
||||
check_interval_minutes=15,
|
||||
max_concurrent_executions=10
|
||||
)
|
||||
|
||||
# Register monitoring task executor
|
||||
monitoring_executor = MonitoringTaskExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'monitoring_task',
|
||||
monitoring_executor,
|
||||
load_due_monitoring_tasks
|
||||
)
|
||||
|
||||
# Register OAuth token monitoring executor
|
||||
oauth_token_executor = OAuthTokenMonitoringExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'oauth_token_monitoring',
|
||||
oauth_token_executor,
|
||||
load_due_oauth_token_monitoring_tasks
|
||||
)
|
||||
|
||||
# Register website analysis executor
|
||||
website_analysis_executor = WebsiteAnalysisExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'website_analysis',
|
||||
website_analysis_executor,
|
||||
load_due_website_analysis_tasks
|
||||
)
|
||||
|
||||
# Register platform insights executors
|
||||
# GSC insights executor
|
||||
def load_due_gsc_insights_tasks(db: Session, user_id=None):
|
||||
return load_due_platform_insights_tasks(db, user_id, platform='gsc')
|
||||
|
||||
gsc_insights_executor = GSCInsightsExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'gsc_insights',
|
||||
gsc_insights_executor,
|
||||
load_due_gsc_insights_tasks
|
||||
)
|
||||
|
||||
# Bing insights executor
|
||||
def load_due_bing_insights_tasks(db: Session, user_id=None):
|
||||
return load_due_platform_insights_tasks(db, user_id, platform='bing')
|
||||
|
||||
bing_insights_executor = BingInsightsExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'bing_insights',
|
||||
bing_insights_executor,
|
||||
load_due_bing_insights_tasks
|
||||
)
|
||||
|
||||
return _scheduler_instance
|
||||
|
||||
|
||||
__all__ = [
|
||||
'TaskScheduler',
|
||||
'TaskExecutor',
|
||||
'TaskExecutionResult',
|
||||
'MonitoringTaskExecutor',
|
||||
'OAuthTokenMonitoringExecutor',
|
||||
'WebsiteAnalysisExecutor',
|
||||
'GSCInsightsExecutor',
|
||||
'BingInsightsExecutor',
|
||||
'get_scheduler',
|
||||
# Exception handling
|
||||
'SchedulerExceptionHandler',
|
||||
'SchedulerException',
|
||||
'SchedulerErrorType',
|
||||
'SchedulerErrorSeverity',
|
||||
'TaskExecutionError',
|
||||
'DatabaseError',
|
||||
'TaskLoaderError',
|
||||
'SchedulerConfigError'
|
||||
]
|
||||
4
backend/services/scheduler/core/__init__.py
Normal file
4
backend/services/scheduler/core/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
"""
|
||||
Core scheduler components.
|
||||
"""
|
||||
|
||||
195
backend/services/scheduler/core/check_cycle_handler.py
Normal file
195
backend/services/scheduler/core/check_cycle_handler.py
Normal file
@@ -0,0 +1,195 @@
|
||||
"""
|
||||
Check Cycle Handler
|
||||
Handles the main scheduler check cycle that finds and executes due tasks.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Dict, Any
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from services.database import get_db_session
|
||||
from utils.logger_utils import get_service_logger
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
|
||||
from .exception_handler import DatabaseError
|
||||
from .interval_manager import adjust_check_interval_if_needed
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
|
||||
logger = get_service_logger("check_cycle_handler")
|
||||
|
||||
|
||||
async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
"""
|
||||
Main scheduler loop: check for due tasks and execute them.
|
||||
This runs periodically with intelligent interval adjustment based on active strategies.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
scheduler.stats['total_checks'] += 1
|
||||
check_start_time = datetime.utcnow()
|
||||
scheduler.stats['last_check'] = check_start_time.isoformat()
|
||||
|
||||
# Track execution summary for this check cycle
|
||||
cycle_summary = {
|
||||
'tasks_found_by_type': {},
|
||||
'tasks_executed_by_type': {},
|
||||
'tasks_failed_by_type': {},
|
||||
'total_found': 0,
|
||||
'total_executed': 0,
|
||||
'total_failed': 0
|
||||
}
|
||||
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db is None:
|
||||
logger.error("[Scheduler Check] ❌ Failed to get database session")
|
||||
return
|
||||
|
||||
# Check for active strategies and adjust interval intelligently
|
||||
await adjust_check_interval_if_needed(scheduler, db)
|
||||
|
||||
# Check each registered task type
|
||||
registered_types = scheduler.registry.get_registered_types()
|
||||
for task_type in registered_types:
|
||||
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary)
|
||||
if type_summary:
|
||||
cycle_summary['tasks_found_by_type'][task_type] = type_summary.get('found', 0)
|
||||
cycle_summary['tasks_executed_by_type'][task_type] = type_summary.get('executed', 0)
|
||||
cycle_summary['tasks_failed_by_type'][task_type] = type_summary.get('failed', 0)
|
||||
|
||||
# Calculate totals
|
||||
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
|
||||
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
|
||||
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
|
||||
|
||||
# Log comprehensive check cycle summary
|
||||
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
|
||||
active_strategies = scheduler.stats.get('active_strategies_count', 0)
|
||||
active_executions = len(scheduler.active_executions)
|
||||
|
||||
# Build comprehensive check cycle summary log message
|
||||
check_lines = [
|
||||
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
|
||||
f" ├─ Duration: {check_duration:.2f}s",
|
||||
f" ├─ Active Strategies: {active_strategies}",
|
||||
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
|
||||
f" ├─ User Isolation: Enabled (tasks filtered by user_id)",
|
||||
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
|
||||
]
|
||||
|
||||
if cycle_summary['tasks_found_by_type']:
|
||||
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
|
||||
for idx, (task_type, count) in enumerate(task_types_list):
|
||||
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
|
||||
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
|
||||
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
|
||||
prefix = " └─" if is_last_task_type else " ├─"
|
||||
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
|
||||
|
||||
if cycle_summary['total_found'] > 0:
|
||||
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
|
||||
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
|
||||
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
|
||||
else:
|
||||
check_lines.append(f" └─ No tasks found - scheduler idle")
|
||||
|
||||
# Log comprehensive check cycle summary in single message
|
||||
logger.warning("\n".join(check_lines))
|
||||
|
||||
# Save check cycle event to database for historical tracking
|
||||
event_log_id = None
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='check_cycle',
|
||||
event_date=check_start_time,
|
||||
check_cycle_number=scheduler.stats['total_checks'],
|
||||
check_interval_minutes=scheduler.current_check_interval_minutes,
|
||||
tasks_found=cycle_summary.get('total_found', 0),
|
||||
tasks_executed=cycle_summary.get('total_executed', 0),
|
||||
tasks_failed=cycle_summary.get('total_failed', 0),
|
||||
tasks_by_type=cycle_summary.get('tasks_found_by_type', {}),
|
||||
check_duration_seconds=check_duration,
|
||||
active_strategies_count=active_strategies,
|
||||
active_executions=active_executions,
|
||||
event_data={
|
||||
'executed_by_type': cycle_summary.get('tasks_executed_by_type', {}),
|
||||
'failed_by_type': cycle_summary.get('tasks_failed_by_type', {})
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.flush() # Flush to get the ID without committing
|
||||
event_log_id = event_log.id
|
||||
db.commit()
|
||||
logger.debug(f"[Check Cycle] Saved event log with ID: {event_log_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Check Cycle] ❌ Failed to save check cycle event log: {e}", exc_info=True)
|
||||
if db:
|
||||
db.rollback()
|
||||
# Continue execution even if event log save fails
|
||||
|
||||
# Update cumulative stats table (persistent across restarts)
|
||||
try:
|
||||
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
|
||||
|
||||
# Update cumulative metrics by adding this cycle's values
|
||||
# Get current cycle values (incremental, not total)
|
||||
cycle_tasks_found = cycle_summary.get('total_found', 0)
|
||||
cycle_tasks_executed = cycle_summary.get('total_executed', 0)
|
||||
cycle_tasks_failed = cycle_summary.get('total_failed', 0)
|
||||
|
||||
# Update cumulative totals (additive)
|
||||
cumulative_stats.total_check_cycles += 1
|
||||
cumulative_stats.cumulative_tasks_found += cycle_tasks_found
|
||||
cumulative_stats.cumulative_tasks_executed += cycle_tasks_executed
|
||||
cumulative_stats.cumulative_tasks_failed += cycle_tasks_failed
|
||||
# Note: tasks_skipped in scheduler.stats is a running total, not per-cycle
|
||||
# We track it as-is from scheduler.stats (it's already cumulative)
|
||||
# This ensures we don't double-count skipped tasks
|
||||
if cumulative_stats.cumulative_tasks_skipped is None:
|
||||
cumulative_stats.cumulative_tasks_skipped = 0
|
||||
# Update to current total from scheduler (which is already cumulative)
|
||||
current_skipped = scheduler.stats.get('tasks_skipped', 0)
|
||||
if current_skipped > cumulative_stats.cumulative_tasks_skipped:
|
||||
cumulative_stats.cumulative_tasks_skipped = current_skipped
|
||||
cumulative_stats.last_check_cycle_id = event_log_id
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
# Log at DEBUG level to avoid noise during normal operation
|
||||
# This is expected behavior, not a warning
|
||||
logger.debug(
|
||||
f"[Check Cycle] Updated cumulative stats: "
|
||||
f"cycles={cumulative_stats.total_check_cycles}, "
|
||||
f"found={cumulative_stats.cumulative_tasks_found}, "
|
||||
f"executed={cumulative_stats.cumulative_tasks_executed}, "
|
||||
f"failed={cumulative_stats.cumulative_tasks_failed}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Check Cycle] ❌ Failed to update cumulative stats: {e}", exc_info=True)
|
||||
if db:
|
||||
db.rollback()
|
||||
# Log warning but continue - cumulative stats can be rebuilt from event logs
|
||||
logger.warning(
|
||||
"[Check Cycle] ⚠️ Cumulative stats update failed. "
|
||||
"Stats can be rebuilt from event logs on next dashboard load."
|
||||
)
|
||||
|
||||
# Update last_update timestamp for frontend polling
|
||||
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
|
||||
|
||||
except Exception as e:
|
||||
error = DatabaseError(
|
||||
message=f"Error checking for due tasks: {str(e)}",
|
||||
original_error=e
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error)
|
||||
logger.error(f"[Scheduler Check] ❌ Error in check cycle: {str(e)}")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
395
backend/services/scheduler/core/exception_handler.py
Normal file
395
backend/services/scheduler/core/exception_handler.py
Normal file
@@ -0,0 +1,395 @@
|
||||
"""
|
||||
Comprehensive Exception Handling and Logging for Task Scheduler
|
||||
Provides robust error handling, logging, and monitoring for the scheduler system.
|
||||
"""
|
||||
|
||||
import traceback
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, Optional, Union
|
||||
from enum import Enum
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.exc import SQLAlchemyError, OperationalError, IntegrityError
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("scheduler_exception_handler")
|
||||
|
||||
|
||||
class SchedulerErrorType(Enum):
|
||||
"""Error types for scheduler system."""
|
||||
DATABASE_ERROR = "database_error"
|
||||
TASK_EXECUTION_ERROR = "task_execution_error"
|
||||
TASK_LOADER_ERROR = "task_loader_error"
|
||||
SCHEDULER_CONFIG_ERROR = "scheduler_config_error"
|
||||
RETRY_ERROR = "retry_error"
|
||||
CONCURRENCY_ERROR = "concurrency_error"
|
||||
TIMEOUT_ERROR = "timeout_error"
|
||||
|
||||
|
||||
class SchedulerErrorSeverity(Enum):
|
||||
"""Severity levels for scheduler errors."""
|
||||
LOW = "low"
|
||||
MEDIUM = "medium"
|
||||
HIGH = "high"
|
||||
CRITICAL = "critical"
|
||||
|
||||
|
||||
class SchedulerException(Exception):
|
||||
"""Base exception for scheduler system errors."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
error_type: SchedulerErrorType,
|
||||
severity: SchedulerErrorSeverity = SchedulerErrorSeverity.MEDIUM,
|
||||
user_id: Optional[int] = None,
|
||||
task_id: Optional[int] = None,
|
||||
task_type: Optional[str] = None,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
original_error: Optional[Exception] = None
|
||||
):
|
||||
self.message = message
|
||||
self.error_type = error_type
|
||||
self.severity = severity
|
||||
self.user_id = user_id
|
||||
self.task_id = task_id
|
||||
self.task_type = task_type
|
||||
self.context = context or {}
|
||||
self.original_error = original_error
|
||||
self.timestamp = datetime.utcnow()
|
||||
|
||||
# Capture stack trace if original error provided
|
||||
self.stack_trace = None
|
||||
if self.original_error:
|
||||
try:
|
||||
exc_type, exc_value, exc_traceback = sys.exc_info()
|
||||
if exc_traceback:
|
||||
self.stack_trace = ''.join(traceback.format_exception(
|
||||
exc_type, exc_value, exc_traceback
|
||||
))
|
||||
else:
|
||||
self.stack_trace = traceback.format_exception(
|
||||
type(self.original_error),
|
||||
self.original_error,
|
||||
self.original_error.__traceback__
|
||||
)
|
||||
except Exception:
|
||||
self.stack_trace = str(self.original_error)
|
||||
|
||||
super().__init__(message)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert exception to dictionary for logging/storage."""
|
||||
return {
|
||||
"message": self.message,
|
||||
"error_type": self.error_type.value,
|
||||
"severity": self.severity.value,
|
||||
"user_id": self.user_id,
|
||||
"task_id": self.task_id,
|
||||
"task_type": self.task_type,
|
||||
"context": self.context,
|
||||
"timestamp": self.timestamp.isoformat() if isinstance(self.timestamp, datetime) else self.timestamp,
|
||||
"original_error": str(self.original_error) if self.original_error else None,
|
||||
"stack_trace": self.stack_trace
|
||||
}
|
||||
|
||||
def __str__(self):
|
||||
return f"[{self.error_type.value}] {self.message}"
|
||||
|
||||
|
||||
class DatabaseError(SchedulerException):
|
||||
"""Exception raised for database-related errors."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
user_id: Optional[int] = None,
|
||||
task_id: Optional[int] = None,
|
||||
context: Dict[str, Any] = None,
|
||||
original_error: Exception = None
|
||||
):
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_type=SchedulerErrorType.DATABASE_ERROR,
|
||||
severity=SchedulerErrorSeverity.CRITICAL,
|
||||
user_id=user_id,
|
||||
task_id=task_id,
|
||||
context=context or {},
|
||||
original_error=original_error
|
||||
)
|
||||
|
||||
|
||||
class TaskExecutionError(SchedulerException):
|
||||
"""Exception raised for task execution failures."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
user_id: Optional[int] = None,
|
||||
task_id: Optional[int] = None,
|
||||
task_type: Optional[str] = None,
|
||||
retry_count: int = 0,
|
||||
execution_time_ms: Optional[int] = None,
|
||||
context: Dict[str, Any] = None,
|
||||
original_error: Exception = None
|
||||
):
|
||||
context = context or {}
|
||||
context.update({
|
||||
"retry_count": retry_count,
|
||||
"execution_time_ms": execution_time_ms
|
||||
})
|
||||
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_type=SchedulerErrorType.TASK_EXECUTION_ERROR,
|
||||
severity=SchedulerErrorSeverity.HIGH,
|
||||
user_id=user_id,
|
||||
task_id=task_id,
|
||||
task_type=task_type,
|
||||
context=context,
|
||||
original_error=original_error
|
||||
)
|
||||
|
||||
|
||||
class TaskLoaderError(SchedulerException):
|
||||
"""Exception raised for task loading failures."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
task_type: Optional[str] = None,
|
||||
user_id: Optional[int] = None,
|
||||
context: Dict[str, Any] = None,
|
||||
original_error: Exception = None
|
||||
):
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_type=SchedulerErrorType.TASK_LOADER_ERROR,
|
||||
severity=SchedulerErrorSeverity.HIGH,
|
||||
user_id=user_id,
|
||||
task_type=task_type,
|
||||
context=context or {},
|
||||
original_error=original_error
|
||||
)
|
||||
|
||||
|
||||
class SchedulerConfigError(SchedulerException):
|
||||
"""Exception raised for scheduler configuration errors."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
context: Dict[str, Any] = None,
|
||||
original_error: Exception = None
|
||||
):
|
||||
super().__init__(
|
||||
message=message,
|
||||
error_type=SchedulerErrorType.SCHEDULER_CONFIG_ERROR,
|
||||
severity=SchedulerErrorSeverity.CRITICAL,
|
||||
context=context or {},
|
||||
original_error=original_error
|
||||
)
|
||||
|
||||
|
||||
class SchedulerExceptionHandler:
|
||||
"""Comprehensive exception handler for the scheduler system."""
|
||||
|
||||
def __init__(self, db: Session = None):
|
||||
self.db = db
|
||||
self.logger = logger
|
||||
|
||||
def handle_exception(
|
||||
self,
|
||||
error: Union[Exception, SchedulerException],
|
||||
context: Dict[str, Any] = None,
|
||||
log_level: str = "error"
|
||||
) -> Dict[str, Any]:
|
||||
"""Handle and log scheduler exceptions."""
|
||||
|
||||
context = context or {}
|
||||
|
||||
# Convert regular exceptions to SchedulerException
|
||||
if not isinstance(error, SchedulerException):
|
||||
error = SchedulerException(
|
||||
message=str(error),
|
||||
error_type=self._classify_error(error),
|
||||
severity=self._determine_severity(error),
|
||||
context=context,
|
||||
original_error=error
|
||||
)
|
||||
|
||||
# Log the error
|
||||
error_data = error.to_dict()
|
||||
error_data.update(context)
|
||||
|
||||
log_message = f"Scheduler Error: {error.message}"
|
||||
|
||||
if log_level == "critical" or error.severity == SchedulerErrorSeverity.CRITICAL:
|
||||
self.logger.critical(log_message, extra={"error_data": error_data})
|
||||
elif log_level == "error" or error.severity == SchedulerErrorSeverity.HIGH:
|
||||
self.logger.error(log_message, extra={"error_data": error_data})
|
||||
elif log_level == "warning" or error.severity == SchedulerErrorSeverity.MEDIUM:
|
||||
self.logger.warning(log_message, extra={"error_data": error_data})
|
||||
else:
|
||||
self.logger.info(log_message, extra={"error_data": error_data})
|
||||
|
||||
# Store critical errors in database for alerting
|
||||
if error.severity in [SchedulerErrorSeverity.HIGH, SchedulerErrorSeverity.CRITICAL]:
|
||||
self._store_error_alert(error)
|
||||
|
||||
# Return formatted error response
|
||||
return self._format_error_response(error)
|
||||
|
||||
def _classify_error(self, error: Exception) -> SchedulerErrorType:
|
||||
"""Classify an exception into a scheduler error type."""
|
||||
|
||||
error_str = str(error).lower()
|
||||
error_type_name = type(error).__name__.lower()
|
||||
|
||||
# Database errors
|
||||
if isinstance(error, (SQLAlchemyError, OperationalError, IntegrityError)):
|
||||
return SchedulerErrorType.DATABASE_ERROR
|
||||
if "database" in error_str or "sql" in error_type_name or "connection" in error_str:
|
||||
return SchedulerErrorType.DATABASE_ERROR
|
||||
|
||||
# Timeout errors
|
||||
if "timeout" in error_str or "timed out" in error_str:
|
||||
return SchedulerErrorType.TIMEOUT_ERROR
|
||||
|
||||
# Concurrency errors
|
||||
if "concurrent" in error_str or "race" in error_str or "lock" in error_str:
|
||||
return SchedulerErrorType.CONCURRENCY_ERROR
|
||||
|
||||
# Task execution errors
|
||||
if "task" in error_str and "execut" in error_str:
|
||||
return SchedulerErrorType.TASK_EXECUTION_ERROR
|
||||
|
||||
# Task loader errors
|
||||
if "load" in error_str and "task" in error_str:
|
||||
return SchedulerErrorType.TASK_LOADER_ERROR
|
||||
|
||||
# Retry errors
|
||||
if "retry" in error_str:
|
||||
return SchedulerErrorType.RETRY_ERROR
|
||||
|
||||
# Config errors
|
||||
if "config" in error_str or "scheduler" in error_str and "init" in error_str:
|
||||
return SchedulerErrorType.SCHEDULER_CONFIG_ERROR
|
||||
|
||||
# Default to task execution error for unknown errors
|
||||
return SchedulerErrorType.TASK_EXECUTION_ERROR
|
||||
|
||||
def _determine_severity(self, error: Exception) -> SchedulerErrorSeverity:
|
||||
"""Determine the severity of an error."""
|
||||
|
||||
error_str = str(error).lower()
|
||||
error_type = type(error)
|
||||
|
||||
# Critical errors
|
||||
if isinstance(error, (SQLAlchemyError, OperationalError, ConnectionError)):
|
||||
return SchedulerErrorSeverity.CRITICAL
|
||||
if "database" in error_str or "connection" in error_str:
|
||||
return SchedulerErrorSeverity.CRITICAL
|
||||
|
||||
# High severity errors
|
||||
if "timeout" in error_str or "concurrent" in error_str:
|
||||
return SchedulerErrorSeverity.HIGH
|
||||
if isinstance(error, (KeyError, AttributeError)) and "config" in error_str:
|
||||
return SchedulerErrorSeverity.HIGH
|
||||
|
||||
# Medium severity errors
|
||||
if "task" in error_str or "execution" in error_str:
|
||||
return SchedulerErrorSeverity.MEDIUM
|
||||
|
||||
# Default to low
|
||||
return SchedulerErrorSeverity.LOW
|
||||
|
||||
def _store_error_alert(self, error: SchedulerException):
|
||||
"""Store critical errors in database for alerting."""
|
||||
|
||||
if not self.db:
|
||||
return
|
||||
|
||||
try:
|
||||
# Import here to avoid circular dependencies
|
||||
from models.monitoring_models import TaskExecutionLog
|
||||
|
||||
# Store as failed execution log if we have task_id (even without user_id for system errors)
|
||||
if error.task_id:
|
||||
try:
|
||||
execution_log = TaskExecutionLog(
|
||||
task_id=error.task_id,
|
||||
user_id=error.user_id, # Can be None for system-level errors
|
||||
execution_date=error.timestamp,
|
||||
status='failed',
|
||||
error_message=error.message,
|
||||
result_data={
|
||||
"error_type": error.error_type.value,
|
||||
"severity": error.severity.value,
|
||||
"context": error.context,
|
||||
"stack_trace": error.stack_trace,
|
||||
"task_type": error.task_type
|
||||
}
|
||||
)
|
||||
self.db.add(execution_log)
|
||||
self.db.commit()
|
||||
self.logger.info(f"Stored error alert in execution log for task {error.task_id}")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to store error in execution log: {e}")
|
||||
self.db.rollback()
|
||||
# Note: For errors without task_id, we rely on structured logging only
|
||||
# Future: Could create a separate scheduler_error_logs table for system-level errors
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to store error alert: {e}")
|
||||
|
||||
def _format_error_response(self, error: SchedulerException) -> Dict[str, Any]:
|
||||
"""Format error for API response or logging."""
|
||||
|
||||
response = {
|
||||
"success": False,
|
||||
"error": {
|
||||
"type": error.error_type.value,
|
||||
"message": error.message,
|
||||
"severity": error.severity.value,
|
||||
"timestamp": error.timestamp.isoformat() if isinstance(error.timestamp, datetime) else str(error.timestamp),
|
||||
"user_id": error.user_id,
|
||||
"task_id": error.task_id,
|
||||
"task_type": error.task_type
|
||||
}
|
||||
}
|
||||
|
||||
# Add context for debugging (non-sensitive info only)
|
||||
if error.context:
|
||||
safe_context = {
|
||||
k: v for k, v in error.context.items()
|
||||
if k not in ["password", "token", "key", "secret", "credential"]
|
||||
}
|
||||
response["error"]["context"] = safe_context
|
||||
|
||||
# Add user-friendly message based on error type
|
||||
user_messages = {
|
||||
SchedulerErrorType.DATABASE_ERROR:
|
||||
"A database error occurred while processing the task. Please try again later.",
|
||||
SchedulerErrorType.TASK_EXECUTION_ERROR:
|
||||
"The task failed to execute. Please check the task configuration and try again.",
|
||||
SchedulerErrorType.TASK_LOADER_ERROR:
|
||||
"Failed to load tasks. The scheduler may be experiencing issues.",
|
||||
SchedulerErrorType.SCHEDULER_CONFIG_ERROR:
|
||||
"The scheduler configuration is invalid. Contact support.",
|
||||
SchedulerErrorType.RETRY_ERROR:
|
||||
"Task retry failed. The task will be rescheduled.",
|
||||
SchedulerErrorType.CONCURRENCY_ERROR:
|
||||
"A concurrency issue occurred. The task will be retried.",
|
||||
SchedulerErrorType.TIMEOUT_ERROR:
|
||||
"The task execution timed out. The task will be retried."
|
||||
}
|
||||
|
||||
response["error"]["user_message"] = user_messages.get(
|
||||
error.error_type,
|
||||
"An error occurred while processing the task."
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
75
backend/services/scheduler/core/executor_interface.py
Normal file
75
backend/services/scheduler/core/executor_interface.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""
|
||||
Task Executor Interface
|
||||
Abstract base class for all task executors.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any, Optional
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskExecutionResult:
|
||||
"""Result of task execution."""
|
||||
success: bool
|
||||
error_message: Optional[str] = None
|
||||
result_data: Optional[Dict[str, Any]] = None
|
||||
execution_time_ms: Optional[int] = None
|
||||
retryable: bool = True
|
||||
retry_delay: int = 300 # seconds
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'success': self.success,
|
||||
'error_message': self.error_message,
|
||||
'result_data': self.result_data,
|
||||
'execution_time_ms': self.execution_time_ms,
|
||||
'retryable': self.retryable,
|
||||
'retry_delay': self.retry_delay
|
||||
}
|
||||
|
||||
|
||||
class TaskExecutor(ABC):
|
||||
"""
|
||||
Abstract base class for task executors.
|
||||
|
||||
Each task type must implement this interface to be schedulable.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def execute_task(self, task: Any, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a task.
|
||||
|
||||
Args:
|
||||
task: Task instance from database
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult with execution details
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: Any,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
Args:
|
||||
task: Task instance
|
||||
frequency: Task frequency (e.g., 'Daily', 'Weekly')
|
||||
last_execution: Last execution datetime
|
||||
|
||||
Returns:
|
||||
Next execution datetime
|
||||
"""
|
||||
pass
|
||||
|
||||
378
backend/services/scheduler/core/failure_detection_service.py
Normal file
378
backend/services/scheduler/core/failure_detection_service.py
Normal file
@@ -0,0 +1,378 @@
|
||||
"""
|
||||
Failure Detection Service
|
||||
Analyzes execution logs to detect failure patterns and mark tasks for human intervention.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional, List
|
||||
from sqlalchemy.orm import Session
|
||||
from enum import Enum
|
||||
import json
|
||||
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("failure_detection")
|
||||
|
||||
|
||||
class FailureReason(Enum):
|
||||
"""Categories of failure reasons."""
|
||||
API_LIMIT = "api_limit" # 429, rate limits, quota exceeded
|
||||
AUTH_ERROR = "auth_error" # 401, 403, token expired
|
||||
NETWORK_ERROR = "network_error" # Connection errors, timeouts
|
||||
CONFIG_ERROR = "config_error" # Missing config, invalid parameters
|
||||
UNKNOWN = "unknown" # Other errors
|
||||
|
||||
|
||||
class FailurePattern:
|
||||
"""Represents a failure pattern for a task."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task_id: int,
|
||||
task_type: str,
|
||||
user_id: str,
|
||||
consecutive_failures: int,
|
||||
recent_failures: int,
|
||||
failure_reason: FailureReason,
|
||||
last_failure_time: Optional[datetime],
|
||||
error_patterns: List[str],
|
||||
should_cool_off: bool
|
||||
):
|
||||
self.task_id = task_id
|
||||
self.task_type = task_type
|
||||
self.user_id = user_id
|
||||
self.consecutive_failures = consecutive_failures
|
||||
self.recent_failures = recent_failures
|
||||
self.failure_reason = failure_reason
|
||||
self.last_failure_time = last_failure_time
|
||||
self.error_patterns = error_patterns
|
||||
self.should_cool_off = should_cool_off
|
||||
|
||||
|
||||
class FailureDetectionService:
|
||||
"""Service for detecting failure patterns in task execution logs."""
|
||||
|
||||
# Cool-off thresholds
|
||||
CONSECUTIVE_FAILURE_THRESHOLD = 3 # 3 consecutive failures
|
||||
RECENT_FAILURE_THRESHOLD = 5 # 5 failures in last 7 days
|
||||
COOL_OFF_PERIOD_DAYS = 7 # Cool-off period after marking for intervention
|
||||
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
self.logger = logger
|
||||
|
||||
def analyze_task_failures(
|
||||
self,
|
||||
task_id: int,
|
||||
task_type: str,
|
||||
user_id: str
|
||||
) -> Optional[FailurePattern]:
|
||||
"""
|
||||
Analyze failure patterns for a specific task.
|
||||
|
||||
Args:
|
||||
task_id: Task ID
|
||||
task_type: Task type (oauth_token_monitoring, website_analysis, etc.)
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
FailurePattern if pattern detected, None otherwise
|
||||
"""
|
||||
try:
|
||||
# Get execution logs for this task
|
||||
execution_logs = self._get_execution_logs(task_id, task_type)
|
||||
|
||||
if not execution_logs:
|
||||
return None
|
||||
|
||||
# Analyze failure patterns
|
||||
consecutive_failures = self._count_consecutive_failures(execution_logs)
|
||||
recent_failures = self._count_recent_failures(execution_logs, days=7)
|
||||
failure_reason = self._classify_failure_reason(execution_logs)
|
||||
error_patterns = self._extract_error_patterns(execution_logs)
|
||||
last_failure_time = self._get_last_failure_time(execution_logs)
|
||||
|
||||
# Determine if task should be cooled off
|
||||
should_cool_off = (
|
||||
consecutive_failures >= self.CONSECUTIVE_FAILURE_THRESHOLD or
|
||||
recent_failures >= self.RECENT_FAILURE_THRESHOLD
|
||||
)
|
||||
|
||||
if should_cool_off:
|
||||
self.logger.warning(
|
||||
f"Failure pattern detected for task {task_id} ({task_type}): "
|
||||
f"consecutive={consecutive_failures}, recent={recent_failures}, "
|
||||
f"reason={failure_reason.value}"
|
||||
)
|
||||
|
||||
return FailurePattern(
|
||||
task_id=task_id,
|
||||
task_type=task_type,
|
||||
user_id=user_id,
|
||||
consecutive_failures=consecutive_failures,
|
||||
recent_failures=recent_failures,
|
||||
failure_reason=failure_reason,
|
||||
last_failure_time=last_failure_time,
|
||||
error_patterns=error_patterns,
|
||||
should_cool_off=should_cool_off
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error analyzing task failures for task {task_id}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def _get_execution_logs(self, task_id: int, task_type: str) -> List[Dict[str, Any]]:
|
||||
"""Get execution logs for a task."""
|
||||
try:
|
||||
if task_type == "oauth_token_monitoring":
|
||||
from models.oauth_token_monitoring_models import OAuthTokenExecutionLog
|
||||
logs = self.db.query(OAuthTokenExecutionLog).filter(
|
||||
OAuthTokenExecutionLog.task_id == task_id
|
||||
).order_by(OAuthTokenExecutionLog.execution_date.desc()).all()
|
||||
|
||||
return [
|
||||
{
|
||||
"status": log.status,
|
||||
"error_message": log.error_message,
|
||||
"execution_date": log.execution_date,
|
||||
"result_data": log.result_data
|
||||
}
|
||||
for log in logs
|
||||
]
|
||||
elif task_type == "website_analysis":
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisExecutionLog
|
||||
logs = self.db.query(WebsiteAnalysisExecutionLog).filter(
|
||||
WebsiteAnalysisExecutionLog.task_id == task_id
|
||||
).order_by(WebsiteAnalysisExecutionLog.execution_date.desc()).all()
|
||||
|
||||
return [
|
||||
{
|
||||
"status": log.status,
|
||||
"error_message": log.error_message,
|
||||
"execution_date": log.execution_date,
|
||||
"result_data": log.result_data
|
||||
}
|
||||
for log in logs
|
||||
]
|
||||
elif task_type in ["gsc_insights", "bing_insights", "platform_insights"]:
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsExecutionLog
|
||||
logs = self.db.query(PlatformInsightsExecutionLog).filter(
|
||||
PlatformInsightsExecutionLog.task_id == task_id
|
||||
).order_by(PlatformInsightsExecutionLog.execution_date.desc()).all()
|
||||
|
||||
return [
|
||||
{
|
||||
"status": log.status,
|
||||
"error_message": log.error_message,
|
||||
"execution_date": log.execution_date,
|
||||
"result_data": log.result_data
|
||||
}
|
||||
for log in logs
|
||||
]
|
||||
else:
|
||||
# Fallback to monitoring_task execution logs
|
||||
from models.monitoring_models import TaskExecutionLog
|
||||
logs = self.db.query(TaskExecutionLog).filter(
|
||||
TaskExecutionLog.task_id == task_id
|
||||
).order_by(TaskExecutionLog.execution_date.desc()).all()
|
||||
|
||||
return [
|
||||
{
|
||||
"status": log.status,
|
||||
"error_message": log.error_message,
|
||||
"execution_date": log.execution_date,
|
||||
"result_data": log.result_data
|
||||
}
|
||||
for log in logs
|
||||
]
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error getting execution logs for task {task_id}: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
def _count_consecutive_failures(self, logs: List[Dict[str, Any]]) -> int:
|
||||
"""Count consecutive failures from most recent."""
|
||||
count = 0
|
||||
for log in logs:
|
||||
if log["status"] == "failed":
|
||||
count += 1
|
||||
else:
|
||||
break # Stop at first success
|
||||
return count
|
||||
|
||||
def _count_recent_failures(self, logs: List[Dict[str, Any]], days: int = 7) -> int:
|
||||
"""Count failures in the last N days."""
|
||||
cutoff = datetime.utcnow() - timedelta(days=days)
|
||||
return sum(
|
||||
1 for log in logs
|
||||
if log["status"] == "failed" and log["execution_date"] >= cutoff
|
||||
)
|
||||
|
||||
def _classify_failure_reason(self, logs: List[Dict[str, Any]]) -> FailureReason:
|
||||
"""Classify the primary failure reason from error messages."""
|
||||
# Check most recent failures first
|
||||
recent_failures = [log for log in logs if log["status"] == "failed"][:5]
|
||||
|
||||
for log in recent_failures:
|
||||
error_message = (log.get("error_message") or "").lower()
|
||||
result_data = log.get("result_data") or {}
|
||||
|
||||
# Check for API limits (429)
|
||||
if "429" in error_message or "rate limit" in error_message or "limit reached" in error_message:
|
||||
return FailureReason.API_LIMIT
|
||||
|
||||
# Check result_data for API limit info
|
||||
if isinstance(result_data, dict):
|
||||
if result_data.get("error_status") == 429:
|
||||
return FailureReason.API_LIMIT
|
||||
if "limit" in str(result_data).lower() and "reached" in str(result_data).lower():
|
||||
return FailureReason.API_LIMIT
|
||||
# Check for usage info indicating limits
|
||||
usage_info = result_data.get("usage_info", {})
|
||||
if isinstance(usage_info, dict):
|
||||
if usage_info.get("usage_percentage", 0) >= 100:
|
||||
return FailureReason.API_LIMIT
|
||||
|
||||
# Check for auth errors
|
||||
if "401" in error_message or "403" in error_message or "unauthorized" in error_message or "forbidden" in error_message:
|
||||
return FailureReason.AUTH_ERROR
|
||||
if "token" in error_message and ("expired" in error_message or "invalid" in error_message):
|
||||
return FailureReason.AUTH_ERROR
|
||||
|
||||
# Check for network errors
|
||||
if "timeout" in error_message or "connection" in error_message or "network" in error_message:
|
||||
return FailureReason.NETWORK_ERROR
|
||||
|
||||
# Check for config errors
|
||||
if "config" in error_message or "missing" in error_message or "invalid" in error_message:
|
||||
return FailureReason.CONFIG_ERROR
|
||||
|
||||
return FailureReason.UNKNOWN
|
||||
|
||||
def _extract_error_patterns(self, logs: List[Dict[str, Any]]) -> List[str]:
|
||||
"""Extract common error patterns from failure logs."""
|
||||
patterns = []
|
||||
recent_failures = [log for log in logs if log["status"] == "failed"][:5]
|
||||
|
||||
for log in recent_failures:
|
||||
error_message = log.get("error_message") or ""
|
||||
if error_message:
|
||||
# Extract key phrases (first 100 chars)
|
||||
pattern = error_message[:100].strip()
|
||||
if pattern and pattern not in patterns:
|
||||
patterns.append(pattern)
|
||||
|
||||
return patterns[:3] # Return top 3 patterns
|
||||
|
||||
def _get_last_failure_time(self, logs: List[Dict[str, Any]]) -> Optional[datetime]:
|
||||
"""Get the timestamp of the most recent failure."""
|
||||
for log in logs:
|
||||
if log["status"] == "failed":
|
||||
return log["execution_date"]
|
||||
return None
|
||||
|
||||
def get_tasks_needing_intervention(
|
||||
self,
|
||||
user_id: Optional[str] = None,
|
||||
task_type: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all tasks that need human intervention.
|
||||
|
||||
Args:
|
||||
user_id: Optional user ID filter
|
||||
task_type: Optional task type filter
|
||||
|
||||
Returns:
|
||||
List of task dictionaries with failure pattern info
|
||||
"""
|
||||
try:
|
||||
tasks_needing_intervention = []
|
||||
|
||||
# Check OAuth token monitoring tasks
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
oauth_tasks = self.db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.status == "needs_intervention"
|
||||
)
|
||||
if user_id:
|
||||
oauth_tasks = oauth_tasks.filter(OAuthTokenMonitoringTask.user_id == user_id)
|
||||
|
||||
for task in oauth_tasks.all():
|
||||
pattern = self.analyze_task_failures(task.id, "oauth_token_monitoring", task.user_id)
|
||||
if pattern:
|
||||
tasks_needing_intervention.append({
|
||||
"task_id": task.id,
|
||||
"task_type": "oauth_token_monitoring",
|
||||
"user_id": task.user_id,
|
||||
"platform": task.platform,
|
||||
"failure_pattern": {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"last_failure_time": pattern.last_failure_time.isoformat() if pattern.last_failure_time else None,
|
||||
"error_patterns": pattern.error_patterns
|
||||
},
|
||||
"failure_reason": task.failure_reason,
|
||||
"last_failure": task.last_failure.isoformat() if task.last_failure else None
|
||||
})
|
||||
|
||||
# Check website analysis tasks
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_tasks = self.db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.status == "needs_intervention"
|
||||
)
|
||||
if user_id:
|
||||
website_tasks = website_tasks.filter(WebsiteAnalysisTask.user_id == user_id)
|
||||
|
||||
for task in website_tasks.all():
|
||||
pattern = self.analyze_task_failures(task.id, "website_analysis", task.user_id)
|
||||
if pattern:
|
||||
tasks_needing_intervention.append({
|
||||
"task_id": task.id,
|
||||
"task_type": "website_analysis",
|
||||
"user_id": task.user_id,
|
||||
"website_url": task.website_url,
|
||||
"failure_pattern": {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"last_failure_time": pattern.last_failure_time.isoformat() if pattern.last_failure_time else None,
|
||||
"error_patterns": pattern.error_patterns
|
||||
},
|
||||
"failure_reason": task.failure_reason,
|
||||
"last_failure": task.last_failure.isoformat() if task.last_failure else None
|
||||
})
|
||||
|
||||
# Check platform insights tasks
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
insights_tasks = self.db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.status == "needs_intervention"
|
||||
)
|
||||
if user_id:
|
||||
insights_tasks = insights_tasks.filter(PlatformInsightsTask.user_id == user_id)
|
||||
|
||||
for task in insights_tasks.all():
|
||||
task_type_str = f"{task.platform}_insights"
|
||||
pattern = self.analyze_task_failures(task.id, task_type_str, task.user_id)
|
||||
if pattern:
|
||||
tasks_needing_intervention.append({
|
||||
"task_id": task.id,
|
||||
"task_type": task_type_str,
|
||||
"user_id": task.user_id,
|
||||
"platform": task.platform,
|
||||
"failure_pattern": {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"last_failure_time": pattern.last_failure_time.isoformat() if pattern.last_failure_time else None,
|
||||
"error_patterns": pattern.error_patterns
|
||||
},
|
||||
"failure_reason": task.failure_reason,
|
||||
"last_failure": task.last_failure.isoformat() if task.last_failure else None
|
||||
})
|
||||
|
||||
return tasks_needing_intervention
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error getting tasks needing intervention: {e}", exc_info=True)
|
||||
return []
|
||||
|
||||
139
backend/services/scheduler/core/interval_manager.py
Normal file
139
backend/services/scheduler/core/interval_manager.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""
|
||||
Interval Manager
|
||||
Handles intelligent scheduling interval adjustment based on active strategies.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from services.database import get_db_session
|
||||
from utils.logger_utils import get_service_logger
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
|
||||
logger = get_service_logger("interval_manager")
|
||||
|
||||
|
||||
async def determine_optimal_interval(
|
||||
scheduler: 'TaskScheduler',
|
||||
min_interval: int,
|
||||
max_interval: int
|
||||
) -> int:
|
||||
"""
|
||||
Determine optimal check interval based on active strategies.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
min_interval: Minimum check interval in minutes
|
||||
max_interval: Maximum check interval in minutes
|
||||
|
||||
Returns:
|
||||
Optimal check interval in minutes
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
scheduler.stats['active_strategies_count'] = active_count
|
||||
|
||||
if active_count > 0:
|
||||
logger.info(f"Found {active_count} active strategies with tasks - using {min_interval}min interval")
|
||||
return min_interval
|
||||
else:
|
||||
logger.info(f"No active strategies with tasks - using {max_interval}min interval")
|
||||
return max_interval
|
||||
except Exception as e:
|
||||
logger.warning(f"Error determining optimal interval: {e}, using default {min_interval}min")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
# Default to shorter interval on error (safer)
|
||||
return min_interval
|
||||
|
||||
|
||||
async def adjust_check_interval_if_needed(
|
||||
scheduler: 'TaskScheduler',
|
||||
db: Session
|
||||
):
|
||||
"""
|
||||
Intelligently adjust check interval based on active strategies.
|
||||
|
||||
If there are active strategies with tasks, check more frequently.
|
||||
If there are no active strategies, check less frequently.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
db: Database session
|
||||
"""
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
scheduler.stats['active_strategies_count'] = active_count
|
||||
|
||||
# Determine optimal interval
|
||||
if active_count > 0:
|
||||
optimal_interval = scheduler.min_check_interval_minutes
|
||||
else:
|
||||
optimal_interval = scheduler.max_check_interval_minutes
|
||||
|
||||
# Only reschedule if interval needs to change
|
||||
if optimal_interval != scheduler.current_check_interval_minutes:
|
||||
interval_message = (
|
||||
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
|
||||
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
|
||||
f" ├─ Optimal: {optimal_interval}min\n"
|
||||
f" ├─ Active Strategies: {active_count}\n"
|
||||
f" └─ Reason: {'Active strategies detected' if active_count > 0 else 'No active strategies'}"
|
||||
)
|
||||
logger.warning(interval_message)
|
||||
|
||||
# Reschedule the job with new interval
|
||||
scheduler.scheduler.modify_job(
|
||||
'check_due_tasks',
|
||||
trigger=scheduler._get_trigger_for_interval(optimal_interval)
|
||||
)
|
||||
|
||||
# Save previous interval before updating
|
||||
previous_interval = scheduler.current_check_interval_minutes
|
||||
|
||||
# Update current interval
|
||||
scheduler.current_check_interval_minutes = optimal_interval
|
||||
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
|
||||
|
||||
# Save interval adjustment event to database
|
||||
try:
|
||||
event_db = get_db_session()
|
||||
if event_db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='interval_adjustment',
|
||||
event_date=datetime.utcnow(),
|
||||
previous_interval_minutes=previous_interval,
|
||||
new_interval_minutes=optimal_interval,
|
||||
check_interval_minutes=optimal_interval,
|
||||
active_strategies_count=active_count,
|
||||
event_data={
|
||||
'reason': 'intelligent_scheduling',
|
||||
'min_interval': scheduler.min_check_interval_minutes,
|
||||
'max_interval': scheduler.max_check_interval_minutes
|
||||
}
|
||||
)
|
||||
event_db.add(event_log)
|
||||
event_db.commit()
|
||||
event_db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save interval adjustment event log: {e}")
|
||||
|
||||
logger.warning(f"[Scheduler] ✅ Interval adjusted to {optimal_interval}min")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error adjusting check interval: {e}")
|
||||
|
||||
269
backend/services/scheduler/core/job_restoration.py
Normal file
269
backend/services/scheduler/core/job_restoration.py
Normal file
@@ -0,0 +1,269 @@
|
||||
"""
|
||||
Job Restoration
|
||||
Handles restoration of one-time jobs (e.g., persona generation) on scheduler startup.
|
||||
Preserves original scheduled times from database to avoid rescheduling on server restarts.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from utils.logger_utils import get_service_logger
|
||||
from services.database import get_db_session
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
|
||||
logger = get_service_logger("job_restoration")
|
||||
|
||||
|
||||
async def restore_persona_jobs(scheduler: 'TaskScheduler'):
|
||||
"""
|
||||
Restore one-time persona generation jobs for users who completed onboarding
|
||||
but don't have personas yet. This ensures jobs persist across server restarts.
|
||||
|
||||
IMPORTANT: Preserves original scheduled times from SchedulerEventLog to avoid
|
||||
rescheduling jobs with new times on server restarts.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("Could not get database session to restore persona jobs")
|
||||
return
|
||||
|
||||
try:
|
||||
from models.onboarding import OnboardingSession
|
||||
from services.research.research_persona_scheduler import (
|
||||
schedule_research_persona_generation,
|
||||
generate_research_persona_task
|
||||
)
|
||||
from services.persona.facebook.facebook_persona_scheduler import (
|
||||
schedule_facebook_persona_generation,
|
||||
generate_facebook_persona_task
|
||||
)
|
||||
from services.research.research_persona_service import ResearchPersonaService
|
||||
from services.persona_data_service import PersonaDataService
|
||||
|
||||
# Get all users who completed onboarding
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.progress == 100.0
|
||||
).all()
|
||||
|
||||
restored_count = 0
|
||||
skipped_count = 0
|
||||
now = datetime.utcnow().replace(tzinfo=timezone.utc)
|
||||
|
||||
for session in completed_sessions:
|
||||
user_id = session.user_id
|
||||
|
||||
# Restore research persona job
|
||||
try:
|
||||
research_service = ResearchPersonaService(db_session=db)
|
||||
persona_data_record = research_service._get_persona_data_record(user_id)
|
||||
research_persona_exists = False
|
||||
|
||||
if persona_data_record:
|
||||
research_persona_data = getattr(persona_data_record, 'research_persona', None)
|
||||
research_persona_exists = bool(research_persona_data)
|
||||
|
||||
if not research_persona_exists:
|
||||
# Note: Clerk user_id already includes "user_" prefix
|
||||
job_id = f"research_persona_{user_id}"
|
||||
|
||||
# Check if job already exists in scheduler (just started, so unlikely)
|
||||
existing_jobs = [j for j in scheduler.scheduler.get_jobs()
|
||||
if j.id == job_id]
|
||||
|
||||
if not existing_jobs:
|
||||
# Check SchedulerEventLog for original scheduled time
|
||||
original_scheduled_event = db.query(SchedulerEventLog).filter(
|
||||
SchedulerEventLog.event_type == 'job_scheduled',
|
||||
SchedulerEventLog.job_id == job_id,
|
||||
SchedulerEventLog.user_id == user_id
|
||||
).order_by(SchedulerEventLog.event_date.desc()).first()
|
||||
|
||||
# Check if job was already completed or failed
|
||||
completed_event = db.query(SchedulerEventLog).filter(
|
||||
SchedulerEventLog.event_type.in_(['job_completed', 'job_failed']),
|
||||
SchedulerEventLog.job_id == job_id,
|
||||
SchedulerEventLog.user_id == user_id
|
||||
).order_by(SchedulerEventLog.event_date.desc()).first()
|
||||
|
||||
if completed_event:
|
||||
# Job was already completed/failed, skip
|
||||
skipped_count += 1
|
||||
logger.debug(f"Research persona job {job_id} already completed/failed, skipping restoration")
|
||||
elif original_scheduled_event and original_scheduled_event.event_data:
|
||||
# Restore with original scheduled time
|
||||
scheduled_for_str = original_scheduled_event.event_data.get('scheduled_for')
|
||||
if scheduled_for_str:
|
||||
try:
|
||||
original_time = datetime.fromisoformat(scheduled_for_str.replace('Z', '+00:00'))
|
||||
if original_time.tzinfo is None:
|
||||
original_time = original_time.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Check if original time is in the past (within grace period)
|
||||
time_since_scheduled = (now - original_time).total_seconds()
|
||||
if time_since_scheduled > 0 and time_since_scheduled <= 3600: # Within 1 hour grace period
|
||||
# Execute immediately (missed job)
|
||||
logger.warning(f"Restoring research persona job {job_id} - original time was {original_time}, executing now (missed)")
|
||||
try:
|
||||
await generate_research_persona_task(user_id)
|
||||
except Exception as exec_error:
|
||||
logger.error(f"Error executing missed research persona job {job_id}: {exec_error}")
|
||||
elif original_time > now:
|
||||
# Restore with original future time
|
||||
time_until_run = (original_time - now).total_seconds() / 60 # minutes
|
||||
logger.warning(
|
||||
f"[Restoration] Restoring research persona job {job_id} with ORIGINAL scheduled time: "
|
||||
f"{original_time} (UTC) = {original_time.astimezone().strftime('%H:%M:%S %Z')} (local), "
|
||||
f"will run in {time_until_run:.1f} minutes"
|
||||
)
|
||||
scheduler.schedule_one_time_task(
|
||||
func=generate_research_persona_task,
|
||||
run_date=original_time,
|
||||
job_id=job_id,
|
||||
kwargs={'user_id': user_id},
|
||||
replace_existing=True
|
||||
)
|
||||
restored_count += 1
|
||||
else:
|
||||
# Too old (beyond grace period), skip
|
||||
skipped_count += 1
|
||||
logger.debug(f"Research persona job {job_id} scheduled time {original_time} is too old, skipping")
|
||||
except Exception as time_error:
|
||||
logger.warning(f"Error parsing original scheduled time for {job_id}: {time_error}, scheduling new job")
|
||||
# Fall through to schedule new job
|
||||
schedule_research_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
# No original time in event data, schedule new job
|
||||
logger.warning(
|
||||
f"[Restoration] No original scheduled time found for research persona job {job_id}, "
|
||||
f"scheduling NEW job with current time + 20 minutes"
|
||||
)
|
||||
schedule_research_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
# No previous scheduled event, schedule new job
|
||||
logger.warning(
|
||||
f"[Restoration] No previous scheduled event found for research persona job {job_id}, "
|
||||
f"scheduling NEW job with current time + 20 minutes"
|
||||
)
|
||||
schedule_research_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
logger.debug(f"Research persona job {job_id} already exists in scheduler, skipping restoration")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not restore research persona for user {user_id}: {e}")
|
||||
|
||||
# Restore Facebook persona job
|
||||
try:
|
||||
persona_data_service = PersonaDataService(db_session=db)
|
||||
persona_data = persona_data_service.get_user_persona_data(user_id)
|
||||
platform_personas = persona_data.get('platform_personas', {}) if persona_data else {}
|
||||
facebook_persona_exists = bool(platform_personas.get('facebook') if platform_personas else None)
|
||||
has_core_persona = bool(persona_data.get('core_persona') if persona_data else False)
|
||||
|
||||
if not facebook_persona_exists and has_core_persona:
|
||||
# Note: Clerk user_id already includes "user_" prefix
|
||||
job_id = f"facebook_persona_{user_id}"
|
||||
|
||||
# Check if job already exists in scheduler
|
||||
existing_jobs = [j for j in scheduler.scheduler.get_jobs()
|
||||
if j.id == job_id]
|
||||
|
||||
if not existing_jobs:
|
||||
# Check SchedulerEventLog for original scheduled time
|
||||
original_scheduled_event = db.query(SchedulerEventLog).filter(
|
||||
SchedulerEventLog.event_type == 'job_scheduled',
|
||||
SchedulerEventLog.job_id == job_id,
|
||||
SchedulerEventLog.user_id == user_id
|
||||
).order_by(SchedulerEventLog.event_date.desc()).first()
|
||||
|
||||
# Check if job was already completed or failed
|
||||
completed_event = db.query(SchedulerEventLog).filter(
|
||||
SchedulerEventLog.event_type.in_(['job_completed', 'job_failed']),
|
||||
SchedulerEventLog.job_id == job_id,
|
||||
SchedulerEventLog.user_id == user_id
|
||||
).order_by(SchedulerEventLog.event_date.desc()).first()
|
||||
|
||||
if completed_event:
|
||||
skipped_count += 1
|
||||
logger.debug(f"Facebook persona job {job_id} already completed/failed, skipping restoration")
|
||||
elif original_scheduled_event and original_scheduled_event.event_data:
|
||||
# Restore with original scheduled time
|
||||
scheduled_for_str = original_scheduled_event.event_data.get('scheduled_for')
|
||||
if scheduled_for_str:
|
||||
try:
|
||||
original_time = datetime.fromisoformat(scheduled_for_str.replace('Z', '+00:00'))
|
||||
if original_time.tzinfo is None:
|
||||
original_time = original_time.replace(tzinfo=timezone.utc)
|
||||
|
||||
# Check if original time is in the past (within grace period)
|
||||
time_since_scheduled = (now - original_time).total_seconds()
|
||||
if time_since_scheduled > 0 and time_since_scheduled <= 3600: # Within 1 hour grace period
|
||||
# Execute immediately (missed job)
|
||||
logger.warning(f"Restoring Facebook persona job {job_id} - original time was {original_time}, executing now (missed)")
|
||||
try:
|
||||
await generate_facebook_persona_task(user_id)
|
||||
except Exception as exec_error:
|
||||
logger.error(f"Error executing missed Facebook persona job {job_id}: {exec_error}")
|
||||
elif original_time > now:
|
||||
# Restore with original future time
|
||||
time_until_run = (original_time - now).total_seconds() / 60 # minutes
|
||||
logger.warning(
|
||||
f"[Restoration] Restoring Facebook persona job {job_id} with ORIGINAL scheduled time: "
|
||||
f"{original_time} (UTC) = {original_time.astimezone().strftime('%H:%M:%S %Z')} (local), "
|
||||
f"will run in {time_until_run:.1f} minutes"
|
||||
)
|
||||
scheduler.schedule_one_time_task(
|
||||
func=generate_facebook_persona_task,
|
||||
run_date=original_time,
|
||||
job_id=job_id,
|
||||
kwargs={'user_id': user_id},
|
||||
replace_existing=True
|
||||
)
|
||||
restored_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
logger.debug(f"Facebook persona job {job_id} scheduled time {original_time} is too old, skipping")
|
||||
except Exception as time_error:
|
||||
logger.warning(f"Error parsing original scheduled time for {job_id}: {time_error}, scheduling new job")
|
||||
schedule_facebook_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Restoration] No original scheduled time found for Facebook persona job {job_id}, "
|
||||
f"scheduling NEW job with current time + 20 minutes"
|
||||
)
|
||||
schedule_facebook_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
# No previous scheduled event, schedule new job
|
||||
logger.warning(
|
||||
f"[Restoration] No previous scheduled event found for Facebook persona job {job_id}, "
|
||||
f"scheduling NEW job with current time + 20 minutes"
|
||||
)
|
||||
schedule_facebook_persona_generation(user_id, delay_minutes=20)
|
||||
restored_count += 1
|
||||
else:
|
||||
skipped_count += 1
|
||||
logger.debug(f"Facebook persona job {job_id} already exists in scheduler, skipping restoration")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not restore Facebook persona for user {user_id}: {e}")
|
||||
|
||||
if restored_count > 0:
|
||||
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) on startup (preserved original scheduled times)")
|
||||
if skipped_count > 0:
|
||||
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) (already completed/failed or exist)")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error restoring persona jobs: {e}")
|
||||
|
||||
190
backend/services/scheduler/core/oauth_task_restoration.py
Normal file
190
backend/services/scheduler/core/oauth_task_restoration.py
Normal file
@@ -0,0 +1,190 @@
|
||||
"""
|
||||
OAuth Token Monitoring Task Restoration
|
||||
Automatically creates missing OAuth monitoring tasks for users who have connected platforms
|
||||
but don't have monitoring tasks created yet.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
from services.oauth_token_monitoring_service import get_connected_platforms, create_oauth_monitoring_tasks
|
||||
|
||||
# Use service logger for consistent logging (WARNING level visible in production)
|
||||
logger = get_service_logger("oauth_task_restoration")
|
||||
|
||||
|
||||
async def restore_oauth_monitoring_tasks(scheduler):
|
||||
"""
|
||||
Restore/create missing OAuth token monitoring tasks for all users.
|
||||
|
||||
This checks all users who have connected platforms and ensures they have
|
||||
monitoring tasks created. Tasks are created for platforms that are:
|
||||
- Connected (detected via get_connected_platforms)
|
||||
- Missing monitoring tasks (no OAuthTokenMonitoringTask exists)
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
logger.warning("[OAuth Task Restoration] Starting OAuth monitoring task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[OAuth Task Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Get all existing OAuth tasks to find unique user_ids
|
||||
existing_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Log existing tasks breakdown by platform
|
||||
existing_by_platform = {}
|
||||
for task in existing_tasks:
|
||||
existing_by_platform[task.platform] = existing_by_platform.get(task.platform, 0) + 1
|
||||
|
||||
platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(existing_by_platform.items())])
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Found {len(existing_tasks)} existing OAuth tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Platforms: {platform_summary}"
|
||||
)
|
||||
|
||||
# Check users who already have at least one OAuth task
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
# Also query all users from onboarding who completed step 5 (integrations)
|
||||
# to catch users who connected platforms but tasks weren't created
|
||||
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
|
||||
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
|
||||
# This matches the logic used in home page redirect and persona generation checks
|
||||
try:
|
||||
from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
|
||||
# Get onboarding progress service (same as used throughout the app)
|
||||
progress_service = get_onboarding_progress_service()
|
||||
|
||||
# Query all sessions and filter using the same completion logic as the service
|
||||
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
|
||||
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
or_(
|
||||
OnboardingSession.current_step >= 6,
|
||||
OnboardingSession.progress >= 100.0
|
||||
)
|
||||
).all()
|
||||
|
||||
# Validate using the service method for consistency
|
||||
onboarding_user_ids = set()
|
||||
for session in completed_sessions:
|
||||
# Use the same service method as the rest of the app
|
||||
status = progress_service.get_onboarding_status(session.user_id)
|
||||
if status.get('is_completed', False):
|
||||
onboarding_user_ids.add(session.user_id)
|
||||
all_user_ids = users_to_check.copy()
|
||||
|
||||
# Add users from onboarding who might not have tasks yet
|
||||
for user_id in onboarding_user_ids:
|
||||
if user_id not in all_user_ids:
|
||||
all_user_ids.append(user_id)
|
||||
|
||||
users_to_check = all_user_ids
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Checking {len(users_to_check)} users "
|
||||
f"({len(user_ids_with_tasks)} with existing tasks, "
|
||||
f"{len(onboarding_user_ids)} from onboarding sessions, "
|
||||
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Task Restoration] Could not query onboarding users: {e}")
|
||||
# Fallback to users with existing tasks only
|
||||
|
||||
total_created = 0
|
||||
restoration_summary = [] # Collect summary for single log
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
# Get connected platforms for this user (silent - no logging)
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
|
||||
if not connected_platforms:
|
||||
logger.debug(
|
||||
f"[OAuth Task Restoration] No connected platforms for user {user_id[:20]}..., skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
# Check which platforms are missing tasks
|
||||
existing_platforms = {
|
||||
task.platform
|
||||
for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
}
|
||||
|
||||
missing_platforms = [
|
||||
platform
|
||||
for platform in connected_platforms
|
||||
if platform not in existing_platforms
|
||||
]
|
||||
|
||||
if missing_platforms:
|
||||
# Create missing tasks
|
||||
created = create_oauth_monitoring_tasks(
|
||||
user_id=user_id,
|
||||
db=db,
|
||||
platforms=missing_platforms
|
||||
)
|
||||
|
||||
total_created += len(created)
|
||||
# Collect summary info instead of logging immediately
|
||||
platforms_str = ", ".join([p.upper() for p in missing_platforms])
|
||||
restoration_summary.append(
|
||||
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Error checking/creating tasks for user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
continue
|
||||
|
||||
# Final summary log with platform breakdown
|
||||
final_existing_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
final_by_platform = {}
|
||||
for task in final_existing_tasks:
|
||||
final_by_platform[task.platform] = final_by_platform.get(task.platform, 0) + 1
|
||||
|
||||
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
|
||||
|
||||
# Single formatted summary log (similar to scheduler startup)
|
||||
if total_created > 0:
|
||||
summary_lines = "\n".join(restoration_summary[:5]) # Show first 5 users
|
||||
if len(restoration_summary) > 5:
|
||||
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
|
||||
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
|
||||
f" ├─ Tasks Created: {total_created}\n"
|
||||
f" ├─ Users Processed: {len(users_to_check)}\n"
|
||||
f" ├─ Platform Breakdown: {final_platform_summary}\n"
|
||||
+ summary_lines
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
|
||||
f"Checked {len(users_to_check)} users. Platform breakdown: {final_platform_summary}"
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[OAuth Task Restoration] Error restoring OAuth monitoring tasks: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
"""
|
||||
Platform Insights Task Restoration
|
||||
Automatically creates missing platform insights tasks for users who have connected platforms
|
||||
but don't have insights tasks created yet.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
from services.platform_insights_monitoring_service import create_platform_insights_task
|
||||
from services.oauth_token_monitoring_service import get_connected_platforms
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
|
||||
logger = get_service_logger("platform_insights_task_restoration")
|
||||
|
||||
|
||||
async def restore_platform_insights_tasks(scheduler):
|
||||
"""
|
||||
Restore/create missing platform insights tasks for all users.
|
||||
|
||||
This checks all users who have connected platforms (GSC/Bing) and ensures they have
|
||||
insights tasks created. Tasks are created for platforms that are:
|
||||
- Connected (detected via get_connected_platforms or OAuth tasks)
|
||||
- Missing insights tasks (no PlatformInsightsTask exists)
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
logger.warning("[Platform Insights Restoration] Starting platform insights task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Platform Insights Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Get all existing insights tasks to find unique user_ids
|
||||
existing_tasks = db.query(PlatformInsightsTask).all()
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Get all OAuth tasks to find users with connected platforms
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
user_ids_with_oauth = set(task.user_id for task in oauth_tasks)
|
||||
|
||||
# Platforms that support insights (GSC and Bing only)
|
||||
insights_platforms = ['gsc', 'bing']
|
||||
|
||||
# Get users who have OAuth tasks for GSC or Bing
|
||||
users_to_check = set()
|
||||
for task in oauth_tasks:
|
||||
if task.platform in insights_platforms:
|
||||
users_to_check.add(task.user_id)
|
||||
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] Found {len(existing_tasks)} existing insights tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Checking {len(users_to_check)} users "
|
||||
f"with GSC/Bing OAuth connections."
|
||||
)
|
||||
|
||||
if not users_to_check:
|
||||
logger.warning("[Platform Insights Restoration] No users with GSC/Bing connections found")
|
||||
return
|
||||
|
||||
total_created = 0
|
||||
restoration_summary = []
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
# Get connected platforms for this user
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
|
||||
# Filter to only GSC and Bing
|
||||
insights_connected = [p for p in connected_platforms if p in insights_platforms]
|
||||
|
||||
if not insights_connected:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] No GSC/Bing connections for user {user_id[:20]}..., skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
# Check which platforms are missing insights tasks
|
||||
existing_platforms = {
|
||||
task.platform
|
||||
for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
}
|
||||
|
||||
missing_platforms = [
|
||||
platform
|
||||
for platform in insights_connected
|
||||
if platform not in existing_platforms
|
||||
]
|
||||
|
||||
if missing_platforms:
|
||||
# Create missing tasks for each platform
|
||||
for platform in missing_platforms:
|
||||
try:
|
||||
# Don't fetch site_url here - it requires API calls
|
||||
# The executor will fetch it when the task runs (weekly)
|
||||
# This avoids API calls during restoration
|
||||
result = create_platform_insights_task(
|
||||
user_id=user_id,
|
||||
platform=platform,
|
||||
site_url=None, # Will be fetched by executor when task runs
|
||||
db=db
|
||||
)
|
||||
|
||||
if result.get('success'):
|
||||
total_created += 1
|
||||
restoration_summary.append(
|
||||
f" ├─ User {user_id[:20]}...: {platform.upper()} task created"
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] Failed to create {platform} task "
|
||||
f"for user {user_id}: {result.get('error')}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] Error creating {platform} task "
|
||||
f"for user {user_id}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] Error processing user {user_id}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Log summary
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
|
||||
"\n".join(restoration_summary)
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
|
||||
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks."
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Platform Insights Restoration] Error during restoration: {e}", exc_info=True)
|
||||
|
||||
1029
backend/services/scheduler/core/scheduler.py
Normal file
1029
backend/services/scheduler/core/scheduler.py
Normal file
File diff suppressed because it is too large
Load Diff
211
backend/services/scheduler/core/task_execution_handler.py
Normal file
211
backend/services/scheduler/core/task_execution_handler.py
Normal file
@@ -0,0 +1,211 @@
|
||||
"""
|
||||
Task Execution Handler
|
||||
Handles asynchronous execution of individual tasks with proper session isolation.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
from sqlalchemy.orm import object_session
|
||||
|
||||
from services.database import get_db_session
|
||||
from utils.logger_utils import get_service_logger
|
||||
from .exception_handler import (
|
||||
SchedulerException, TaskExecutionError, DatabaseError, SchedulerConfigError
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
|
||||
logger = get_service_logger("task_execution_handler")
|
||||
|
||||
|
||||
async def execute_task_async(
|
||||
scheduler: 'TaskScheduler',
|
||||
task_type: str,
|
||||
task: Any,
|
||||
summary: Optional[Dict[str, Any]] = None,
|
||||
execution_source: str = "scheduler" # "scheduler" or "manual"
|
||||
):
|
||||
"""
|
||||
Execute a single task asynchronously with user isolation.
|
||||
|
||||
Each task gets its own database session to prevent concurrent access issues,
|
||||
as SQLAlchemy sessions are not async-safe or concurrent-safe.
|
||||
|
||||
User context is extracted and tracked for user isolation.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
task_type: Type of task
|
||||
task: Task instance from database (detached from original session)
|
||||
summary: Optional summary dict to update with execution results
|
||||
"""
|
||||
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
|
||||
db = None
|
||||
user_id = None
|
||||
|
||||
try:
|
||||
# Extract user context if available (for user isolation tracking)
|
||||
try:
|
||||
if hasattr(task, 'strategy') and task.strategy:
|
||||
user_id = getattr(task.strategy, 'user_id', None)
|
||||
elif hasattr(task, 'strategy_id') and task.strategy_id:
|
||||
# Will query user_id after we have db session
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
|
||||
|
||||
# Log task execution start (detailed for important tasks)
|
||||
task_db_id = getattr(task, 'id', None)
|
||||
if task_db_id:
|
||||
logger.debug(f"[Scheduler] ▶️ Executing {task_type} task {task_db_id} | user_id: {user_id}")
|
||||
|
||||
# Create a new database session for this async task
|
||||
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
|
||||
db = get_db_session()
|
||||
if db is None:
|
||||
error = DatabaseError(
|
||||
message=f"Failed to get database session for task {task_id}",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error, log_level="error")
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
return
|
||||
|
||||
# Set database session for exception handler
|
||||
scheduler.exception_handler.db = db
|
||||
|
||||
# Merge the detached task object into this session
|
||||
# The task object was loaded in a different session and is now detached
|
||||
if object_session(task) is None:
|
||||
# Task is detached, need to merge it into this session
|
||||
task = db.merge(task)
|
||||
|
||||
# Extract user_id after merge if not already available
|
||||
if user_id is None and hasattr(task, 'strategy'):
|
||||
try:
|
||||
if task.strategy:
|
||||
user_id = getattr(task.strategy, 'user_id', None)
|
||||
elif hasattr(task, 'strategy_id'):
|
||||
# Query strategy if relationship not loaded
|
||||
from models.enhanced_strategy_models import EnhancedContentStrategy
|
||||
strategy = db.query(EnhancedContentStrategy).filter(
|
||||
EnhancedContentStrategy.id == task.strategy_id
|
||||
).first()
|
||||
if strategy:
|
||||
user_id = strategy.user_id
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract user_id after merge for task {task_id}: {e}")
|
||||
|
||||
# Check if task is in cool-off (skip if scheduler-triggered, allow if manual)
|
||||
if execution_source == "scheduler":
|
||||
if hasattr(task, 'status') and task.status == "needs_intervention":
|
||||
logger.warning(
|
||||
f"[Scheduler] ⏸️ Skipping task {task_id} - marked for human intervention. "
|
||||
f"Use manual trigger to retry."
|
||||
)
|
||||
scheduler.stats['tasks_skipped'] += 1
|
||||
if summary:
|
||||
summary.setdefault('skipped', 0)
|
||||
summary['skipped'] += 1
|
||||
return
|
||||
|
||||
# Get executor for this task type
|
||||
try:
|
||||
executor = scheduler.registry.get_executor(task_type)
|
||||
except Exception as e:
|
||||
error = SchedulerConfigError(
|
||||
message=f"Failed to get executor for task type {task_type}: {str(e)}",
|
||||
user_id=user_id,
|
||||
context={
|
||||
"task_id": getattr(task, 'id', None),
|
||||
"task_type": task_type
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error)
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
return
|
||||
|
||||
# Execute task with its own session (with error handling)
|
||||
try:
|
||||
result = await executor.execute_task(task, db)
|
||||
|
||||
# Handle result and update statistics
|
||||
if result.success:
|
||||
scheduler.stats['tasks_executed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=True)
|
||||
if summary:
|
||||
summary['executed'] += 1
|
||||
logger.debug(f"[Scheduler] ✅ Task {task_id} executed successfully | user_id: {user_id} | time: {result.execution_time_ms}ms")
|
||||
else:
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
if summary:
|
||||
summary['failed'] += 1
|
||||
|
||||
# Create structured error for failed execution
|
||||
error = TaskExecutionError(
|
||||
message=result.error_message or "Task execution failed",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type,
|
||||
execution_time_ms=result.execution_time_ms,
|
||||
context={"result_data": result.result_data}
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error, log_level="warning")
|
||||
|
||||
logger.warning(f"[Scheduler] ❌ Task {task_id} failed | user_id: {user_id} | error: {result.error_message}")
|
||||
|
||||
# Retry logic if enabled
|
||||
if scheduler.enable_retries and result.retryable:
|
||||
await scheduler._schedule_retry(task, result.retry_delay)
|
||||
|
||||
except SchedulerException as e:
|
||||
# Re-raise scheduler exceptions (they're already handled)
|
||||
raise
|
||||
except Exception as e:
|
||||
# Wrap unexpected exceptions
|
||||
error = TaskExecutionError(
|
||||
message=f"Unexpected error during task execution: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type,
|
||||
original_error=e
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error)
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
|
||||
except SchedulerException as e:
|
||||
# Handle scheduler exceptions
|
||||
scheduler.exception_handler.handle_exception(e)
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
except Exception as e:
|
||||
# Handle any other unexpected errors
|
||||
error = TaskExecutionError(
|
||||
message=f"Unexpected error in task execution wrapper: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=getattr(task, 'id', None),
|
||||
task_type=task_type,
|
||||
original_error=e
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error)
|
||||
scheduler.stats['tasks_failed'] += 1
|
||||
scheduler._update_user_stats(user_id, success=False)
|
||||
finally:
|
||||
# Clean up database session
|
||||
if db:
|
||||
try:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Error closing database session for task {task_id}: {e}")
|
||||
|
||||
# Remove from active executions
|
||||
if task_id in scheduler.active_executions:
|
||||
del scheduler.active_executions[task_id]
|
||||
|
||||
59
backend/services/scheduler/core/task_registry.py
Normal file
59
backend/services/scheduler/core/task_registry.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
Task Registry
|
||||
Manages registration of task executors and loaders.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Callable, List, Any
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from .executor_interface import TaskExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TaskRegistry:
|
||||
"""Registry for task executors and loaders."""
|
||||
|
||||
def __init__(self):
|
||||
self.executors: Dict[str, TaskExecutor] = {}
|
||||
self.task_loaders: Dict[str, Callable[[Session], List[Any]]] = {}
|
||||
|
||||
def register(
|
||||
self,
|
||||
task_type: str,
|
||||
executor: TaskExecutor,
|
||||
task_loader: Callable[[Session], List[Any]]
|
||||
):
|
||||
"""
|
||||
Register a task executor and loader.
|
||||
|
||||
Args:
|
||||
task_type: Unique identifier for task type
|
||||
executor: TaskExecutor instance
|
||||
task_loader: Function that loads due tasks from database
|
||||
"""
|
||||
if task_type in self.executors:
|
||||
logger.warning(f"Overwriting existing executor for task type: {task_type}")
|
||||
|
||||
self.executors[task_type] = executor
|
||||
self.task_loaders[task_type] = task_loader
|
||||
|
||||
logger.info(f"Registered task type: {task_type}")
|
||||
|
||||
def get_executor(self, task_type: str) -> TaskExecutor:
|
||||
"""Get executor for task type."""
|
||||
if task_type not in self.executors:
|
||||
raise ValueError(f"No executor registered for task type: {task_type}")
|
||||
return self.executors[task_type]
|
||||
|
||||
def get_task_loader(self, task_type: str) -> Callable[[Session], List[Any]]:
|
||||
"""Get task loader for task type."""
|
||||
if task_type not in self.task_loaders:
|
||||
raise ValueError(f"No task loader registered for task type: {task_type}")
|
||||
return self.task_loaders[task_type]
|
||||
|
||||
def get_registered_types(self) -> List[str]:
|
||||
"""Get list of registered task types."""
|
||||
return list(self.executors.keys())
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
"""
|
||||
Website Analysis Task Restoration
|
||||
Automatically creates missing website analysis tasks for users who completed onboarding
|
||||
but don't have monitoring tasks created yet.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
from services.website_analysis_monitoring_service import create_website_analysis_tasks
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
|
||||
# Use service logger for consistent logging (WARNING level visible in production)
|
||||
logger = get_service_logger("website_analysis_restoration")
|
||||
|
||||
|
||||
async def restore_website_analysis_tasks(scheduler):
|
||||
"""
|
||||
Restore/create missing website analysis tasks for all users.
|
||||
|
||||
This checks all users who completed onboarding and ensures they have
|
||||
website analysis tasks created. Tasks are created for:
|
||||
- User's website (if analysis exists)
|
||||
- All competitors (from onboarding step 3)
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
logger.warning("[Website Analysis Restoration] Starting website analysis task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Website Analysis Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Check if table exists (may not exist if migration hasn't run)
|
||||
try:
|
||||
existing_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
except Exception as table_error:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist: {table_error}. "
|
||||
f"Please run database migration: create_website_analysis_monitoring_tables.sql"
|
||||
)
|
||||
return
|
||||
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Log existing tasks breakdown by type
|
||||
existing_by_type = {}
|
||||
for task in existing_tasks:
|
||||
existing_by_type[task.task_type] = existing_by_type.get(task.task_type, 0) + 1
|
||||
|
||||
type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(existing_by_type.items())])
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Found {len(existing_tasks)} existing website analysis tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Types: {type_summary}"
|
||||
)
|
||||
|
||||
# Check users who already have at least one website analysis task
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
# Also query all users from onboarding who completed step 2 (website analysis)
|
||||
# to catch users who completed onboarding but tasks weren't created
|
||||
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
|
||||
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
|
||||
# This matches the logic used in home page redirect and persona generation checks
|
||||
try:
|
||||
from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
|
||||
# Get onboarding progress service (same as used throughout the app)
|
||||
progress_service = get_onboarding_progress_service()
|
||||
|
||||
# Query all sessions and filter using the same completion logic as the service
|
||||
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
|
||||
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
or_(
|
||||
OnboardingSession.current_step >= 6,
|
||||
OnboardingSession.progress >= 100.0
|
||||
)
|
||||
).all()
|
||||
|
||||
# Validate using the service method for consistency
|
||||
onboarding_user_ids = set()
|
||||
for session in completed_sessions:
|
||||
# Use the same service method as the rest of the app
|
||||
status = progress_service.get_onboarding_status(session.user_id)
|
||||
if status.get('is_completed', False):
|
||||
onboarding_user_ids.add(session.user_id)
|
||||
|
||||
all_user_ids = users_to_check.copy()
|
||||
|
||||
# Add users from onboarding who might not have tasks yet
|
||||
for user_id in onboarding_user_ids:
|
||||
if user_id not in all_user_ids:
|
||||
all_user_ids.append(user_id)
|
||||
|
||||
users_to_check = all_user_ids
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Checking {len(users_to_check)} users "
|
||||
f"({len(user_ids_with_tasks)} with existing tasks, "
|
||||
f"{len(onboarding_user_ids)} from onboarding sessions, "
|
||||
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Website Analysis Restoration] Could not query onboarding users: {e}")
|
||||
# Fallback to users with existing tasks only
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
total_created = 0
|
||||
users_processed = 0
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
users_processed += 1
|
||||
|
||||
# Check if user already has tasks
|
||||
existing_user_tasks = [
|
||||
task for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
]
|
||||
|
||||
if existing_user_tasks:
|
||||
logger.debug(
|
||||
f"[Website Analysis Restoration] User {user_id} already has "
|
||||
f"{len(existing_user_tasks)} website analysis tasks, skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
|
||||
f"but has no website analysis tasks. Creating tasks..."
|
||||
)
|
||||
|
||||
# Create missing tasks
|
||||
result = create_website_analysis_tasks(user_id=user_id, db=db)
|
||||
|
||||
if result.get('success'):
|
||||
tasks_count = result.get('tasks_created', 0)
|
||||
total_created += tasks_count
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Created {tasks_count} website analysis tasks "
|
||||
f"for user {user_id}"
|
||||
)
|
||||
else:
|
||||
error = result.get('error', 'Unknown error')
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ⚠️ Could not create tasks for user {user_id}: {error}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Error checking/creating tasks for user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
continue
|
||||
|
||||
# Final summary log
|
||||
final_existing_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
final_by_type = {}
|
||||
for task in final_existing_tasks:
|
||||
final_by_type[task.task_type] = final_by_type.get(task.task_type, 0) + 1
|
||||
|
||||
final_type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(final_by_type.items())])
|
||||
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Created {total_created} missing website analysis tasks. "
|
||||
f"Processed {users_processed} users. Final type breakdown: {final_type_summary}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ All users have required website analysis tasks. "
|
||||
f"Checked {users_processed} users, found {len(existing_tasks)} existing tasks. "
|
||||
f"Type breakdown: {final_type_summary}"
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] Error restoring website analysis tasks: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
4
backend/services/scheduler/executors/__init__.py
Normal file
4
backend/services/scheduler/executors/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
"""
|
||||
Task executor implementations.
|
||||
"""
|
||||
|
||||
354
backend/services/scheduler/executors/bing_insights_executor.py
Normal file
354
backend/services/scheduler/executors/bing_insights_executor.py
Normal file
@@ -0,0 +1,354 @@
|
||||
"""
|
||||
Bing Insights Task Executor
|
||||
Handles execution of Bing insights fetch tasks for connected platforms.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
|
||||
from services.bing_analytics_storage_service import BingAnalyticsStorageService
|
||||
from services.integrations.bing_oauth import BingOAuthService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("bing_insights_executor")
|
||||
|
||||
|
||||
class BingInsightsExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for Bing insights fetch tasks.
|
||||
|
||||
Handles:
|
||||
- Fetching Bing insights data weekly
|
||||
- On first run: Loads existing cached data
|
||||
- On subsequent runs: Fetches fresh data from Bing API
|
||||
- Logging results and updating task status
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
database_url = os.getenv('DATABASE_URL', 'sqlite:///alwrity.db')
|
||||
self.storage_service = BingAnalyticsStorageService(database_url)
|
||||
self.bing_oauth = BingOAuthService()
|
||||
|
||||
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a Bing insights fetch task.
|
||||
|
||||
Args:
|
||||
task: PlatformInsightsTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing Bing insights fetch: task_id={task.id} | "
|
||||
f"user_id={user_id} | site_url={site_url}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = PlatformInsightsExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Fetch insights
|
||||
result = await self._fetch_insights(task, db)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
execution_log.data_source = result.result_data.get('data_source') if result.success else None
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Reset failure tracking on success
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
# Schedule next check (7 days from now)
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Weekly',
|
||||
last_execution=task.last_check
|
||||
)
|
||||
else:
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "bing_insights", task.user_id
|
||||
)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
# Clear next_check - task won't run automatically
|
||||
task.next_check = None
|
||||
|
||||
self.logger.warning(
|
||||
f"Task {task.id} marked for human intervention: "
|
||||
f"{pattern.consecutive_failures} consecutive failures, "
|
||||
f"reason: {pattern.failure_reason.value}"
|
||||
)
|
||||
else:
|
||||
# Normal failure handling
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
# Schedule retry in 1 day
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
error_result = self.exception_handler.handle_task_execution_error(
|
||||
task=task,
|
||||
error=e,
|
||||
execution_time_ms=execution_time_ms,
|
||||
context="Bing insights fetch"
|
||||
)
|
||||
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "bing_insights", task.user_id
|
||||
)
|
||||
|
||||
# Update task
|
||||
task.last_check = datetime.utcnow()
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
task.next_check = None
|
||||
else:
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return error_result
|
||||
|
||||
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Fetch Bing insights data.
|
||||
|
||||
On first run (no last_success), loads cached data.
|
||||
On subsequent runs, fetches fresh data from API.
|
||||
"""
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
# Check if this is first run (no previous success)
|
||||
is_first_run = task.last_success is None
|
||||
|
||||
if is_first_run:
|
||||
# First run: Try to load from cache
|
||||
self.logger.info(f"First run for Bing insights task {task.id} - loading cached data")
|
||||
cached_data = self._load_cached_data(user_id, site_url)
|
||||
|
||||
if cached_data:
|
||||
self.logger.info(f"Loaded cached Bing data for user {user_id}")
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'cached',
|
||||
'insights': cached_data,
|
||||
'message': 'Loaded from cached data (first run)'
|
||||
}
|
||||
)
|
||||
else:
|
||||
# No cached data - try to fetch from API
|
||||
self.logger.info(f"No cached data found, fetching from Bing API")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
else:
|
||||
# Subsequent run: Always fetch fresh data
|
||||
self.logger.info(f"Subsequent run for Bing insights task {task.id} - fetching fresh data")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching Bing insights for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Failed to fetch Bing insights: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Load most recent cached Bing data from database."""
|
||||
try:
|
||||
# Get analytics summary from storage service
|
||||
summary = self.storage_service.get_analytics_summary(
|
||||
user_id=user_id,
|
||||
site_url=site_url or '',
|
||||
days=30
|
||||
)
|
||||
|
||||
if summary and isinstance(summary, dict):
|
||||
self.logger.info(f"Found cached Bing data for user {user_id}")
|
||||
return summary
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error loading cached Bing data: {e}")
|
||||
return None
|
||||
|
||||
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
|
||||
"""Fetch fresh Bing insights from API."""
|
||||
try:
|
||||
# Check if user has active tokens
|
||||
token_status = self.bing_oauth.get_user_token_status(user_id)
|
||||
|
||||
if not token_status.get('has_active_tokens'):
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Bing Webmaster tokens not available or expired",
|
||||
result_data={'error': 'No active tokens'}
|
||||
)
|
||||
|
||||
# Get user's sites
|
||||
sites = self.bing_oauth.get_user_sites(user_id)
|
||||
|
||||
if not sites:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No Bing Webmaster sites found",
|
||||
result_data={'error': 'No sites found'}
|
||||
)
|
||||
|
||||
# Use provided site_url or first site
|
||||
if not site_url:
|
||||
site_url = sites[0].get('Url', '') if isinstance(sites[0], dict) else sites[0]
|
||||
|
||||
# Get active token
|
||||
active_tokens = token_status.get('active_tokens', [])
|
||||
if not active_tokens:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No active Bing Webmaster tokens",
|
||||
result_data={'error': 'No tokens'}
|
||||
)
|
||||
|
||||
# For now, use stored analytics data (Bing API integration can be added later)
|
||||
# This ensures we have data available even if the API class doesn't exist yet
|
||||
summary = self.storage_service.get_analytics_summary(user_id, site_url, days=30)
|
||||
|
||||
if summary and isinstance(summary, dict):
|
||||
# Format insights data from stored analytics
|
||||
insights_data = {
|
||||
'site_url': site_url,
|
||||
'date_range': {
|
||||
'start': (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'),
|
||||
'end': datetime.now().strftime('%Y-%m-%d')
|
||||
},
|
||||
'summary': summary.get('summary', {}),
|
||||
'fetched_at': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
self.logger.info(
|
||||
f"Successfully loaded Bing insights from storage for user {user_id}, site {site_url}"
|
||||
)
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'storage',
|
||||
'insights': insights_data,
|
||||
'message': 'Loaded from stored analytics data'
|
||||
}
|
||||
)
|
||||
else:
|
||||
# No stored data available
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No Bing analytics data available. Data will be collected during next onboarding refresh.",
|
||||
result_data={'error': 'No stored data available'}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching fresh Bing data: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"API fetch failed: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: PlatformInsightsTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
For platform insights, frequency is always 'Weekly' (7 days).
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = datetime.utcnow()
|
||||
|
||||
if frequency == 'Weekly':
|
||||
return last_execution + timedelta(days=7)
|
||||
elif frequency == 'Daily':
|
||||
return last_execution + timedelta(days=1)
|
||||
else:
|
||||
# Default to weekly
|
||||
return last_execution + timedelta(days=7)
|
||||
|
||||
363
backend/services/scheduler/executors/gsc_insights_executor.py
Normal file
363
backend/services/scheduler/executors/gsc_insights_executor.py
Normal file
@@ -0,0 +1,363 @@
|
||||
"""
|
||||
GSC Insights Task Executor
|
||||
Handles execution of GSC insights fetch tasks for connected platforms.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
import sqlite3
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
|
||||
from services.gsc_service import GSCService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("gsc_insights_executor")
|
||||
|
||||
|
||||
class GSCInsightsExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for GSC insights fetch tasks.
|
||||
|
||||
Handles:
|
||||
- Fetching GSC insights data weekly
|
||||
- On first run: Loads existing cached data
|
||||
- On subsequent runs: Fetches fresh data from GSC API
|
||||
- Logging results and updating task status
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
self.gsc_service = GSCService()
|
||||
|
||||
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a GSC insights fetch task.
|
||||
|
||||
Args:
|
||||
task: PlatformInsightsTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing GSC insights fetch: task_id={task.id} | "
|
||||
f"user_id={user_id} | site_url={site_url}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = PlatformInsightsExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Fetch insights
|
||||
result = await self._fetch_insights(task, db)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
execution_log.data_source = result.result_data.get('data_source') if result.success else None
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Reset failure tracking on success
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
# Schedule next check (7 days from now)
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Weekly',
|
||||
last_execution=task.last_check
|
||||
)
|
||||
else:
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "gsc_insights", task.user_id
|
||||
)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
# Clear next_check - task won't run automatically
|
||||
task.next_check = None
|
||||
|
||||
self.logger.warning(
|
||||
f"Task {task.id} marked for human intervention: "
|
||||
f"{pattern.consecutive_failures} consecutive failures, "
|
||||
f"reason: {pattern.failure_reason.value}"
|
||||
)
|
||||
else:
|
||||
# Normal failure handling
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
# Schedule retry in 1 day
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
error_result = self.exception_handler.handle_task_execution_error(
|
||||
task=task,
|
||||
error=e,
|
||||
execution_time_ms=execution_time_ms,
|
||||
context="GSC insights fetch"
|
||||
)
|
||||
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "gsc_insights", task.user_id
|
||||
)
|
||||
|
||||
# Update task
|
||||
task.last_check = datetime.utcnow()
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
task.next_check = None
|
||||
else:
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return error_result
|
||||
|
||||
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Fetch GSC insights data.
|
||||
|
||||
On first run (no last_success), loads cached data.
|
||||
On subsequent runs, fetches fresh data from API.
|
||||
"""
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
# Check if this is first run (no previous success)
|
||||
is_first_run = task.last_success is None
|
||||
|
||||
if is_first_run:
|
||||
# First run: Try to load from cache
|
||||
self.logger.info(f"First run for GSC insights task {task.id} - loading cached data")
|
||||
cached_data = self._load_cached_data(user_id, site_url)
|
||||
|
||||
if cached_data:
|
||||
self.logger.info(f"Loaded cached GSC data for user {user_id}")
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'cached',
|
||||
'insights': cached_data,
|
||||
'message': 'Loaded from cached data (first run)'
|
||||
}
|
||||
)
|
||||
else:
|
||||
# No cached data - try to fetch from API
|
||||
self.logger.info(f"No cached data found, fetching from GSC API")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
else:
|
||||
# Subsequent run: Always fetch fresh data
|
||||
self.logger.info(f"Subsequent run for GSC insights task {task.id} - fetching fresh data")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching GSC insights for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Failed to fetch GSC insights: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Load most recent cached GSC data from database."""
|
||||
try:
|
||||
db_path = self.gsc_service.db_path
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Find most recent cached data
|
||||
if site_url:
|
||||
cursor.execute('''
|
||||
SELECT data_json, created_at
|
||||
FROM gsc_data_cache
|
||||
WHERE user_id = ? AND site_url = ? AND data_type = 'analytics'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
''', (user_id, site_url))
|
||||
else:
|
||||
cursor.execute('''
|
||||
SELECT data_json, created_at
|
||||
FROM gsc_data_cache
|
||||
WHERE user_id = ? AND data_type = 'analytics'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
''', (user_id,))
|
||||
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
data_json, created_at = result
|
||||
insights_data = json.loads(data_json) if isinstance(data_json, str) else data_json
|
||||
|
||||
self.logger.info(
|
||||
f"Found cached GSC data from {created_at} for user {user_id}"
|
||||
)
|
||||
|
||||
return insights_data
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error loading cached GSC data: {e}")
|
||||
return None
|
||||
|
||||
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
|
||||
"""Fetch fresh GSC insights from API."""
|
||||
try:
|
||||
# If no site_url, get first site
|
||||
if not site_url:
|
||||
sites = self.gsc_service.get_site_list(user_id)
|
||||
if not sites:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No GSC sites found for user",
|
||||
result_data={'error': 'No sites found'}
|
||||
)
|
||||
site_url = sites[0]['siteUrl']
|
||||
|
||||
# Get analytics for last 30 days
|
||||
end_date = datetime.now().strftime('%Y-%m-%d')
|
||||
start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
|
||||
|
||||
# Fetch search analytics
|
||||
search_analytics = self.gsc_service.get_search_analytics(
|
||||
user_id=user_id,
|
||||
site_url=site_url,
|
||||
start_date=start_date,
|
||||
end_date=end_date
|
||||
)
|
||||
|
||||
if 'error' in search_analytics:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=search_analytics.get('error', 'Unknown error'),
|
||||
result_data=search_analytics
|
||||
)
|
||||
|
||||
# Format insights data
|
||||
insights_data = {
|
||||
'site_url': site_url,
|
||||
'date_range': {
|
||||
'start': start_date,
|
||||
'end': end_date
|
||||
},
|
||||
'overall_metrics': search_analytics.get('overall_metrics', {}),
|
||||
'query_data': search_analytics.get('query_data', {}),
|
||||
'fetched_at': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
self.logger.info(
|
||||
f"Successfully fetched GSC insights for user {user_id}, site {site_url}"
|
||||
)
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'api',
|
||||
'insights': insights_data,
|
||||
'message': 'Fetched fresh data from GSC API'
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching fresh GSC data: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"API fetch failed: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: PlatformInsightsTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
For platform insights, frequency is always 'Weekly' (7 days).
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = datetime.utcnow()
|
||||
|
||||
if frequency == 'Weekly':
|
||||
return last_execution + timedelta(days=7)
|
||||
elif frequency == 'Daily':
|
||||
return last_execution + timedelta(days=1)
|
||||
else:
|
||||
# Default to weekly
|
||||
return last_execution + timedelta(days=7)
|
||||
|
||||
266
backend/services/scheduler/executors/monitoring_task_executor.py
Normal file
266
backend/services/scheduler/executors/monitoring_task_executor.py
Normal file
@@ -0,0 +1,266 @@
|
||||
"""
|
||||
Monitoring Task Executor
|
||||
Handles execution of content strategy monitoring tasks.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from ..utils.frequency_calculator import calculate_next_execution
|
||||
from models.monitoring_models import MonitoringTask, TaskExecutionLog
|
||||
from models.enhanced_strategy_models import EnhancedContentStrategy
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("monitoring_task_executor")
|
||||
|
||||
|
||||
class MonitoringTaskExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for content strategy monitoring tasks.
|
||||
|
||||
Handles:
|
||||
- ALwrity tasks (automated execution)
|
||||
- Human tasks (notifications/queuing)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
|
||||
async def execute_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a monitoring task with user isolation.
|
||||
|
||||
Args:
|
||||
task: MonitoringTask instance (with strategy relationship loaded)
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Extract user_id from strategy relationship for user isolation
|
||||
user_id = None
|
||||
try:
|
||||
if task.strategy and hasattr(task.strategy, 'user_id'):
|
||||
user_id = task.strategy.user_id
|
||||
elif task.strategy_id:
|
||||
# Fallback: query strategy if relationship not loaded
|
||||
strategy = db.query(EnhancedContentStrategy).filter(
|
||||
EnhancedContentStrategy.id == task.strategy_id
|
||||
).first()
|
||||
if strategy:
|
||||
user_id = strategy.user_id
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not extract user_id for task {task.id}: {e}")
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing monitoring task: {task.id} | "
|
||||
f"user_id: {user_id} | "
|
||||
f"assignee: {task.assignee} | "
|
||||
f"frequency: {task.frequency}"
|
||||
)
|
||||
|
||||
# Create execution log with user_id for user isolation tracking
|
||||
execution_log = TaskExecutionLog(
|
||||
task_id=task.id,
|
||||
user_id=user_id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Execute based on assignee
|
||||
if task.assignee == 'ALwrity':
|
||||
result = await self._execute_alwrity_task(task, db)
|
||||
else:
|
||||
result = await self._execute_human_task(task, db)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
|
||||
# Update task
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.next_execution = self.calculate_next_execution(
|
||||
task,
|
||||
task.frequency,
|
||||
task.last_executed
|
||||
)
|
||||
|
||||
if result.success:
|
||||
task.status = 'completed'
|
||||
else:
|
||||
task.status = 'failed'
|
||||
|
||||
db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
# Create structured error
|
||||
error = TaskExecutionError(
|
||||
message=f"Error executing monitoring task {task.id}: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
task_type="monitoring_task",
|
||||
execution_time_ms=execution_time_ms,
|
||||
context={
|
||||
"assignee": task.assignee,
|
||||
"frequency": task.frequency,
|
||||
"component": task.component_name
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
|
||||
# Handle exception with structured logging
|
||||
self.exception_handler.handle_exception(error)
|
||||
|
||||
# Update execution log with error (include user_id for isolation)
|
||||
try:
|
||||
execution_log = TaskExecutionLog(
|
||||
task_id=task.id,
|
||||
user_id=user_id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='failed',
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
result_data={
|
||||
"error_type": error.error_type.value,
|
||||
"severity": error.severity.value,
|
||||
"context": error.context
|
||||
}
|
||||
)
|
||||
db.add(execution_log)
|
||||
|
||||
task.status = 'failed'
|
||||
task.last_executed = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
db_error = DatabaseError(
|
||||
message=f"Error saving execution log: {str(commit_error)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
original_error=commit_error
|
||||
)
|
||||
self.exception_handler.handle_exception(db_error)
|
||||
db.rollback()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
retryable=True,
|
||||
retry_delay=300
|
||||
)
|
||||
|
||||
async def _execute_alwrity_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute an ALwrity (automated) monitoring task.
|
||||
|
||||
This is where the actual monitoring logic would go.
|
||||
For now, we'll implement a placeholder that can be extended.
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Executing ALwrity task: {task.task_title}")
|
||||
|
||||
# TODO: Implement actual monitoring logic based on:
|
||||
# - task.metric
|
||||
# - task.measurement_method
|
||||
# - task.success_criteria
|
||||
# - task.alert_threshold
|
||||
|
||||
# Placeholder: Simulate task execution
|
||||
result_data = {
|
||||
'metric_value': 0,
|
||||
'status': 'measured',
|
||||
'message': f"Task {task.task_title} executed successfully",
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=result_data
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in ALwrity task execution: {e}")
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
retryable=True
|
||||
)
|
||||
|
||||
async def _execute_human_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a Human monitoring task (notification/queuing).
|
||||
|
||||
For human tasks, we don't execute the task directly,
|
||||
but rather queue it for human review or send notifications.
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Queuing human task: {task.task_title}")
|
||||
|
||||
# TODO: Implement notification/queuing system:
|
||||
# - Send email notification
|
||||
# - Add to user's task queue
|
||||
# - Create in-app notification
|
||||
|
||||
result_data = {
|
||||
'status': 'queued',
|
||||
'message': f"Task {task.task_title} queued for human review",
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=result_data
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error queuing human task: {e}")
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
retryable=True
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: MonitoringTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
Args:
|
||||
task: MonitoringTask instance
|
||||
frequency: Frequency string (Daily, Weekly, Monthly, Quarterly)
|
||||
last_execution: Last execution datetime (defaults to now)
|
||||
|
||||
Returns:
|
||||
Next execution datetime
|
||||
"""
|
||||
return calculate_next_execution(
|
||||
frequency=frequency,
|
||||
base_time=last_execution or datetime.utcnow()
|
||||
)
|
||||
|
||||
@@ -0,0 +1,789 @@
|
||||
"""
|
||||
OAuth Token Monitoring Task Executor
|
||||
Handles execution of OAuth token monitoring tasks for connected platforms.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask, OAuthTokenExecutionLog
|
||||
from models.subscription_models import UsageAlert
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
# Import platform-specific services
|
||||
from services.gsc_service import GSCService
|
||||
from services.integrations.bing_oauth import BingOAuthService
|
||||
from services.integrations.wordpress_oauth import WordPressOAuthService
|
||||
from services.wix_service import WixService
|
||||
|
||||
logger = get_service_logger("oauth_token_monitoring_executor")
|
||||
|
||||
|
||||
class OAuthTokenMonitoringExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for OAuth token monitoring tasks.
|
||||
|
||||
Handles:
|
||||
- Checking token validity and expiration
|
||||
- Attempting automatic token refresh
|
||||
- Logging results and updating task status
|
||||
- One-time refresh attempt (no automatic retries on failure)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
# Expiration warning window (7 days before expiration)
|
||||
self.expiration_warning_days = 7
|
||||
|
||||
async def execute_task(self, task: OAuthTokenMonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute an OAuth token monitoring task.
|
||||
|
||||
This checks token status and attempts refresh if needed.
|
||||
If refresh fails, marks task as failed and does not retry automatically.
|
||||
|
||||
Args:
|
||||
task: OAuthTokenMonitoringTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
platform = task.platform
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing OAuth token monitoring: task_id={task.id} | "
|
||||
f"user_id={user_id} | platform={platform}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = OAuthTokenExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Check and refresh token
|
||||
result = await self._check_and_refresh_token(task, db)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Reset failure tracking on success
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
# Schedule next check (7 days from now)
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Weekly',
|
||||
last_execution=task.last_check
|
||||
)
|
||||
else:
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "oauth_token_monitoring", task.user_id
|
||||
)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
# Clear next_check - task won't run automatically
|
||||
task.next_check = None
|
||||
|
||||
self.logger.warning(
|
||||
f"Task {task.id} marked for human intervention: "
|
||||
f"{pattern.consecutive_failures} consecutive failures, "
|
||||
f"reason: {pattern.failure_reason.value}"
|
||||
)
|
||||
else:
|
||||
# Normal failure handling
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
# Do NOT update next_check - wait for manual trigger
|
||||
|
||||
self.logger.warning(
|
||||
f"OAuth token refresh failed for user {user_id}, platform {platform}. "
|
||||
f"{'Task marked for human intervention' if pattern and pattern.should_cool_off else 'Task marked as failed. No automatic retry will be scheduled.'}"
|
||||
)
|
||||
|
||||
# Create UsageAlert notification for the user
|
||||
self._create_failure_alert(user_id, platform, result.error_message, result.result_data, db)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
# Create structured error
|
||||
error = TaskExecutionError(
|
||||
message=f"Error executing OAuth token monitoring task {task.id}: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
task_type="oauth_token_monitoring",
|
||||
execution_time_ms=execution_time_ms,
|
||||
context={
|
||||
"platform": platform,
|
||||
"user_id": user_id
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
|
||||
# Handle exception with structured logging
|
||||
self.exception_handler.handle_exception(error)
|
||||
|
||||
# Update execution log with error
|
||||
try:
|
||||
execution_log = OAuthTokenExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='failed',
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
result_data={
|
||||
"error_type": error.error_type.value,
|
||||
"severity": error.severity.value,
|
||||
"context": error.context
|
||||
}
|
||||
)
|
||||
db.add(execution_log)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.status = 'failed'
|
||||
task.last_check = datetime.utcnow()
|
||||
task.updated_at = datetime.utcnow()
|
||||
# Do NOT update next_check - wait for manual trigger
|
||||
|
||||
# Create UsageAlert notification for the user
|
||||
self._create_failure_alert(user_id, task.platform, str(e), None, db)
|
||||
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
db_error = DatabaseError(
|
||||
message=f"Error saving execution log: {str(commit_error)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
original_error=commit_error
|
||||
)
|
||||
self.exception_handler.handle_exception(db_error)
|
||||
db.rollback()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
retryable=False, # Do not retry automatically
|
||||
retry_delay=0
|
||||
)
|
||||
|
||||
async def _check_and_refresh_token(
|
||||
self,
|
||||
task: OAuthTokenMonitoringTask,
|
||||
db: Session
|
||||
) -> TaskExecutionResult:
|
||||
"""
|
||||
Check token status and attempt refresh if needed.
|
||||
|
||||
Tokens are stored in the database from onboarding step 5:
|
||||
- GSC: gsc_credentials table (via GSCService)
|
||||
- Bing: bing_oauth_tokens table (via BingOAuthService)
|
||||
- WordPress: wordpress_oauth_tokens table (via WordPressOAuthService)
|
||||
- Wix: wix_oauth_tokens table (via WixOAuthService)
|
||||
|
||||
Args:
|
||||
task: OAuthTokenMonitoringTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult with success status and details
|
||||
"""
|
||||
platform = task.platform
|
||||
user_id = task.user_id
|
||||
|
||||
try:
|
||||
self.logger.info(f"Checking token for platform: {platform}, user: {user_id}")
|
||||
|
||||
# Route to platform-specific checking logic
|
||||
if platform == 'gsc':
|
||||
return await self._check_gsc_token(user_id)
|
||||
elif platform == 'bing':
|
||||
return await self._check_bing_token(user_id)
|
||||
elif platform == 'wordpress':
|
||||
return await self._check_wordpress_token(user_id)
|
||||
elif platform == 'wix':
|
||||
return await self._check_wix_token(user_id)
|
||||
else:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Unsupported platform: {platform}",
|
||||
result_data={
|
||||
'platform': platform,
|
||||
'user_id': user_id,
|
||||
'error': 'Unsupported platform'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f"Error checking/refreshing token for platform {platform}, user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': platform,
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False # Do not retry automatically
|
||||
)
|
||||
|
||||
async def _check_gsc_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check and refresh GSC (Google Search Console) token.
|
||||
|
||||
GSC service auto-refreshes tokens if expired when loading credentials.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
gsc_service = GSCService(db_path=db_path)
|
||||
credentials = gsc_service.load_user_credentials(user_id)
|
||||
|
||||
if not credentials:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="GSC credentials not found or could not be loaded",
|
||||
result_data={
|
||||
'platform': 'gsc',
|
||||
'user_id': user_id,
|
||||
'status': 'not_found',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
# GSC service auto-refreshes if expired, so if we get here, token is valid
|
||||
result_data = {
|
||||
'platform': 'gsc',
|
||||
'user_id': user_id,
|
||||
'status': 'valid',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'GSC token is valid (auto-refreshed if expired)'
|
||||
}
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=result_data
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking GSC token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"GSC token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'gsc',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
async def _check_bing_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check and refresh Bing Webmaster Tools token.
|
||||
|
||||
Checks token expiration and attempts refresh if needed.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
bing_service = BingOAuthService(db_path=db_path)
|
||||
|
||||
# Get token status (includes expired tokens)
|
||||
token_status = bing_service.get_user_token_status(user_id)
|
||||
|
||||
if not token_status.get('has_tokens'):
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No Bing tokens found for user",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'not_found',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
active_tokens = token_status.get('active_tokens', [])
|
||||
expired_tokens = token_status.get('expired_tokens', [])
|
||||
|
||||
# If we have active tokens, check if any are expiring soon (< 7 days)
|
||||
if active_tokens:
|
||||
now = datetime.utcnow()
|
||||
needs_refresh = False
|
||||
token_to_refresh = None
|
||||
|
||||
for token in active_tokens:
|
||||
expires_at_str = token.get('expires_at')
|
||||
if expires_at_str:
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
|
||||
# Check if expires within warning window (7 days)
|
||||
days_until_expiry = (expires_at - now).days
|
||||
if days_until_expiry < self.expiration_warning_days:
|
||||
needs_refresh = True
|
||||
token_to_refresh = token
|
||||
break
|
||||
except Exception:
|
||||
# If parsing fails, assume token is valid
|
||||
pass
|
||||
|
||||
if needs_refresh and token_to_refresh:
|
||||
# Attempt to refresh
|
||||
refresh_token = token_to_refresh.get('refresh_token')
|
||||
if refresh_token:
|
||||
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
|
||||
if refresh_result:
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'refreshed',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token refreshed successfully'
|
||||
}
|
||||
)
|
||||
else:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Failed to refresh Bing token",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'refresh_failed',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
# Token is valid and not expiring soon
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'valid',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token is valid'
|
||||
}
|
||||
)
|
||||
|
||||
# No active tokens, check if we can refresh expired ones
|
||||
if expired_tokens:
|
||||
# Try to refresh the most recent expired token
|
||||
latest_token = expired_tokens[0] # Already sorted by created_at DESC
|
||||
refresh_token = latest_token.get('refresh_token')
|
||||
|
||||
if refresh_token:
|
||||
# Check if token expired recently (within grace period)
|
||||
expires_at_str = latest_token.get('expires_at')
|
||||
if expires_at_str:
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
|
||||
# Only refresh if expired within last 24 hours (grace period)
|
||||
hours_since_expiry = (datetime.utcnow() - expires_at).total_seconds() / 3600
|
||||
if hours_since_expiry < 24:
|
||||
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
|
||||
if refresh_result:
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'refreshed',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token refreshed from expired state'
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Bing token expired and could not be refreshed",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'expired',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token expired. User needs to reconnect.'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No valid Bing tokens found",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'invalid',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking Bing token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Bing token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
async def _check_wordpress_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check WordPress token validity.
|
||||
|
||||
Note: WordPress tokens cannot be refreshed. They expire after 2 weeks
|
||||
and require user re-authorization. We only check if token is valid.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
wordpress_service = WordPressOAuthService(db_path=db_path)
|
||||
tokens = wordpress_service.get_user_tokens(user_id)
|
||||
|
||||
if not tokens:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No WordPress tokens found for user",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'not_found',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
# Check each token - WordPress tokens expire in 2 weeks
|
||||
now = datetime.utcnow()
|
||||
valid_tokens = []
|
||||
expiring_soon = []
|
||||
expired_tokens = []
|
||||
|
||||
for token in tokens:
|
||||
expires_at_str = token.get('expires_at')
|
||||
if expires_at_str:
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
|
||||
days_until_expiry = (expires_at - now).days
|
||||
|
||||
if days_until_expiry < 0:
|
||||
expired_tokens.append(token)
|
||||
elif days_until_expiry < self.expiration_warning_days:
|
||||
expiring_soon.append(token)
|
||||
else:
|
||||
valid_tokens.append(token)
|
||||
except Exception:
|
||||
# If parsing fails, test token validity via API
|
||||
access_token = token.get('access_token')
|
||||
if access_token and wordpress_service.test_token(access_token):
|
||||
valid_tokens.append(token)
|
||||
else:
|
||||
expired_tokens.append(token)
|
||||
else:
|
||||
# No expiration date - test token validity
|
||||
access_token = token.get('access_token')
|
||||
if access_token and wordpress_service.test_token(access_token):
|
||||
valid_tokens.append(token)
|
||||
else:
|
||||
expired_tokens.append(token)
|
||||
|
||||
if valid_tokens:
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'valid',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'WordPress token is valid',
|
||||
'valid_tokens_count': len(valid_tokens)
|
||||
}
|
||||
)
|
||||
elif expiring_soon:
|
||||
# WordPress tokens cannot be refreshed - user needs to reconnect
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="WordPress token expiring soon and cannot be auto-refreshed",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'expiring_soon',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'WordPress token expires soon. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
else:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="WordPress token expired and cannot be refreshed",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'expired',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'WordPress token expired. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking WordPress token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"WordPress token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
async def _check_wix_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check Wix token validity.
|
||||
|
||||
Note: Wix tokens are currently stored in frontend sessionStorage.
|
||||
Backend storage needs to be implemented for automated checking.
|
||||
"""
|
||||
try:
|
||||
# TODO: Wix tokens are stored in frontend sessionStorage, not backend database
|
||||
# Once backend storage is implemented, we can check tokens here
|
||||
# For now, return not supported
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Wix token monitoring not yet supported - tokens stored in frontend sessionStorage",
|
||||
result_data={
|
||||
'platform': 'wix',
|
||||
'user_id': user_id,
|
||||
'status': 'not_supported',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Wix token monitoring requires backend token storage implementation'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking Wix token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Wix token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'wix',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
def _create_failure_alert(
|
||||
self,
|
||||
user_id: str,
|
||||
platform: str,
|
||||
error_message: str,
|
||||
result_data: Optional[Dict[str, Any]],
|
||||
db: Session
|
||||
):
|
||||
"""
|
||||
Create a UsageAlert notification when OAuth token refresh fails.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
platform: Platform identifier (gsc, bing, wordpress, wix)
|
||||
error_message: Error message from token check
|
||||
result_data: Optional result data from token check
|
||||
db: Database session
|
||||
"""
|
||||
try:
|
||||
# Determine severity based on error type
|
||||
status = result_data.get('status', 'unknown') if result_data else 'unknown'
|
||||
|
||||
if status in ['expired', 'refresh_failed']:
|
||||
severity = 'error'
|
||||
alert_type = 'oauth_token_failure'
|
||||
elif status in ['expiring_soon', 'not_found']:
|
||||
severity = 'warning'
|
||||
alert_type = 'oauth_token_warning'
|
||||
else:
|
||||
severity = 'error'
|
||||
alert_type = 'oauth_token_failure'
|
||||
|
||||
# Format platform name for display
|
||||
platform_names = {
|
||||
'gsc': 'Google Search Console',
|
||||
'bing': 'Bing Webmaster Tools',
|
||||
'wordpress': 'WordPress',
|
||||
'wix': 'Wix'
|
||||
}
|
||||
platform_display = platform_names.get(platform, platform.upper())
|
||||
|
||||
# Create alert title and message
|
||||
if status == 'expired':
|
||||
title = f"{platform_display} Token Expired"
|
||||
message = (
|
||||
f"Your {platform_display} access token has expired and could not be automatically renewed. "
|
||||
f"Please reconnect your {platform_display} account to continue using this integration."
|
||||
)
|
||||
elif status == 'expiring_soon':
|
||||
title = f"{platform_display} Token Expiring Soon"
|
||||
message = (
|
||||
f"Your {platform_display} access token will expire soon. "
|
||||
f"Please reconnect your {platform_display} account to avoid interruption."
|
||||
)
|
||||
elif status == 'refresh_failed':
|
||||
title = f"{platform_display} Token Renewal Failed"
|
||||
message = (
|
||||
f"Failed to automatically renew your {platform_display} access token. "
|
||||
f"Please reconnect your {platform_display} account. "
|
||||
f"Error: {error_message}"
|
||||
)
|
||||
elif status == 'not_found':
|
||||
title = f"{platform_display} Token Not Found"
|
||||
message = (
|
||||
f"No {platform_display} access token found. "
|
||||
f"Please connect your {platform_display} account in the onboarding settings."
|
||||
)
|
||||
else:
|
||||
title = f"{platform_display} Token Error"
|
||||
message = (
|
||||
f"An error occurred while checking your {platform_display} access token. "
|
||||
f"Please reconnect your {platform_display} account. "
|
||||
f"Error: {error_message}"
|
||||
)
|
||||
|
||||
# Get current billing period (YYYY-MM format)
|
||||
from datetime import datetime
|
||||
billing_period = datetime.utcnow().strftime("%Y-%m")
|
||||
|
||||
# Create UsageAlert
|
||||
alert = UsageAlert(
|
||||
user_id=user_id,
|
||||
alert_type=alert_type,
|
||||
threshold_percentage=0, # Not applicable for OAuth alerts
|
||||
provider=None, # Not applicable for OAuth alerts
|
||||
title=title,
|
||||
message=message,
|
||||
severity=severity,
|
||||
is_sent=False, # Will be marked as sent when frontend polls
|
||||
is_read=False,
|
||||
billing_period=billing_period
|
||||
)
|
||||
|
||||
db.add(alert)
|
||||
# Note: We don't commit here - let the caller commit
|
||||
# This allows the alert to be created atomically with the task update
|
||||
|
||||
self.logger.info(
|
||||
f"Created UsageAlert for OAuth token failure: user={user_id}, "
|
||||
f"platform={platform}, severity={severity}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Don't fail the entire task execution if alert creation fails
|
||||
self.logger.error(
|
||||
f"Failed to create UsageAlert for OAuth token failure: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: OAuthTokenMonitoringTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
For OAuth token monitoring, frequency is always 'Weekly' (7 days).
|
||||
|
||||
Args:
|
||||
task: OAuthTokenMonitoringTask instance
|
||||
frequency: Frequency string (should be 'Weekly' for token monitoring)
|
||||
last_execution: Last execution datetime (defaults to task.last_check or now)
|
||||
|
||||
Returns:
|
||||
Next execution datetime
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = task.last_check if task.last_check else datetime.utcnow()
|
||||
|
||||
# OAuth token monitoring is always weekly (7 days)
|
||||
if frequency == 'Weekly':
|
||||
return last_execution + timedelta(days=7)
|
||||
else:
|
||||
# Default to weekly if frequency is not recognized
|
||||
self.logger.warning(
|
||||
f"Unknown frequency '{frequency}' for OAuth token monitoring task {task.id}. "
|
||||
f"Defaulting to Weekly (7 days)."
|
||||
)
|
||||
return last_execution + timedelta(days=7)
|
||||
|
||||
@@ -0,0 +1,492 @@
|
||||
"""
|
||||
Website Analysis Task Executor
|
||||
Handles execution of website analysis tasks for user and competitor websites.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import asyncio
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from functools import partial
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask, WebsiteAnalysisExecutionLog
|
||||
from models.onboarding import CompetitorAnalysis, OnboardingSession
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
# Import website analysis services
|
||||
from services.component_logic.web_crawler_logic import WebCrawlerLogic
|
||||
from services.component_logic.style_detection_logic import StyleDetectionLogic
|
||||
from services.website_analysis_service import WebsiteAnalysisService
|
||||
|
||||
logger = get_service_logger("website_analysis_executor")
|
||||
|
||||
|
||||
class WebsiteAnalysisExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for website analysis tasks.
|
||||
|
||||
Handles:
|
||||
- Analyzing user's website (updates existing WebsiteAnalysis record)
|
||||
- Analyzing competitor websites (stores in CompetitorAnalysis table)
|
||||
- Logging results and updating task status
|
||||
- Scheduling next execution based on frequency_days
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
self.crawler_logic = WebCrawlerLogic()
|
||||
self.style_logic = StyleDetectionLogic()
|
||||
|
||||
async def execute_task(
|
||||
self,
|
||||
task: WebsiteAnalysisTask,
|
||||
db: Session
|
||||
) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a website analysis task.
|
||||
|
||||
This performs complete website analysis using the same logic as
|
||||
/api/onboarding/style-detection/complete endpoint.
|
||||
|
||||
Args:
|
||||
task: WebsiteAnalysisTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
website_url = task.website_url
|
||||
task_type = task.task_type
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing website analysis: task_id={task.id} | "
|
||||
f"user_id={user_id} | url={website_url} | type={task_type}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = WebsiteAnalysisExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Perform website analysis
|
||||
result = await self._perform_website_analysis(
|
||||
website_url=website_url,
|
||||
user_id=user_id,
|
||||
task_type=task_type,
|
||||
task=task,
|
||||
db=db
|
||||
)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
task.updated_at = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Reset failure tracking on success
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
# Schedule next check based on frequency_days
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Custom',
|
||||
last_execution=task.last_check,
|
||||
custom_days=task.frequency_days
|
||||
)
|
||||
|
||||
# Commit all changes to database
|
||||
db.commit()
|
||||
|
||||
self.logger.info(
|
||||
f"Website analysis completed successfully for task {task.id}. "
|
||||
f"Next check scheduled for {task.next_check}"
|
||||
)
|
||||
return result
|
||||
else:
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "website_analysis", task.user_id
|
||||
)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
# Clear next_check - task won't run automatically
|
||||
task.next_check = None
|
||||
|
||||
self.logger.warning(
|
||||
f"Task {task.id} marked for human intervention: "
|
||||
f"{pattern.consecutive_failures} consecutive failures, "
|
||||
f"reason: {pattern.failure_reason.value}"
|
||||
)
|
||||
else:
|
||||
# Normal failure handling
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
# Do NOT update next_check - wait for manual retry
|
||||
|
||||
# Commit all changes to database
|
||||
db.commit()
|
||||
|
||||
self.logger.warning(
|
||||
f"Website analysis failed for task {task.id}. "
|
||||
f"Error: {result.error_message}. "
|
||||
f"{'Marked for human intervention' if pattern and pattern.should_cool_off else 'Waiting for manual retry'}."
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
# Create structured error
|
||||
error = TaskExecutionError(
|
||||
message=f"Error executing website analysis task {task.id}: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
task_type="website_analysis",
|
||||
execution_time_ms=execution_time_ms,
|
||||
context={
|
||||
"website_url": website_url,
|
||||
"task_type": task_type,
|
||||
"user_id": user_id
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
|
||||
# Handle exception with structured logging
|
||||
self.exception_handler.handle_exception(error)
|
||||
|
||||
# Update execution log with error
|
||||
try:
|
||||
execution_log = WebsiteAnalysisExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='failed',
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
result_data={
|
||||
"error_type": error.error_type.value,
|
||||
"severity": error.severity.value,
|
||||
"context": error.context
|
||||
}
|
||||
)
|
||||
db.add(execution_log)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.status = 'failed'
|
||||
task.last_check = datetime.utcnow()
|
||||
task.updated_at = datetime.utcnow()
|
||||
# Do NOT update next_check - wait for manual retry
|
||||
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
db_error = DatabaseError(
|
||||
message=f"Error saving execution log: {str(commit_error)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
original_error=commit_error
|
||||
)
|
||||
self.exception_handler.handle_exception(db_error)
|
||||
db.rollback()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
retryable=True
|
||||
)
|
||||
|
||||
async def _perform_website_analysis(
|
||||
self,
|
||||
website_url: str,
|
||||
user_id: str,
|
||||
task_type: str,
|
||||
task: WebsiteAnalysisTask,
|
||||
db: Session
|
||||
) -> TaskExecutionResult:
|
||||
"""
|
||||
Perform website analysis using existing service logic.
|
||||
|
||||
Reuses the same logic as /api/onboarding/style-detection/complete.
|
||||
"""
|
||||
try:
|
||||
# Step 1: Crawl website content
|
||||
self.logger.info(f"Crawling website: {website_url}")
|
||||
crawl_result = await self.crawler_logic.crawl_website(website_url)
|
||||
|
||||
if not crawl_result.get('success'):
|
||||
error_msg = crawl_result.get('error', 'Crawling failed')
|
||||
self.logger.error(f"Crawling failed for {website_url}: {error_msg}")
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Crawling failed: {error_msg}",
|
||||
result_data={'crawl_result': crawl_result},
|
||||
retryable=True
|
||||
)
|
||||
|
||||
# Step 2: Run style analysis and patterns analysis in parallel
|
||||
self.logger.info(f"Running style analysis for {website_url}")
|
||||
|
||||
async def run_style_analysis():
|
||||
"""Run style analysis in executor"""
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(
|
||||
None,
|
||||
partial(self.style_logic.analyze_content_style, crawl_result['content'])
|
||||
)
|
||||
|
||||
async def run_patterns_analysis():
|
||||
"""Run patterns analysis in executor"""
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(
|
||||
None,
|
||||
partial(self.style_logic.analyze_style_patterns, crawl_result['content'])
|
||||
)
|
||||
|
||||
# Execute style and patterns analysis in parallel
|
||||
style_analysis, patterns_result = await asyncio.gather(
|
||||
run_style_analysis(),
|
||||
run_patterns_analysis(),
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
# Check for exceptions
|
||||
if isinstance(style_analysis, Exception):
|
||||
self.logger.error(f"Style analysis exception: {style_analysis}")
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Style analysis failed: {str(style_analysis)}",
|
||||
retryable=True
|
||||
)
|
||||
|
||||
if isinstance(patterns_result, Exception):
|
||||
self.logger.warning(f"Patterns analysis exception: {patterns_result}")
|
||||
patterns_result = None
|
||||
|
||||
# Step 3: Generate style guidelines
|
||||
style_guidelines = None
|
||||
if style_analysis and style_analysis.get('success'):
|
||||
loop = asyncio.get_event_loop()
|
||||
guidelines_result = await loop.run_in_executor(
|
||||
None,
|
||||
partial(self.style_logic.generate_style_guidelines, style_analysis.get('analysis', {}))
|
||||
)
|
||||
if guidelines_result and guidelines_result.get('success'):
|
||||
style_guidelines = guidelines_result.get('guidelines')
|
||||
|
||||
# Prepare analysis data
|
||||
analysis_data = {
|
||||
'crawl_result': crawl_result,
|
||||
'style_analysis': style_analysis.get('analysis') if style_analysis and style_analysis.get('success') else None,
|
||||
'style_patterns': patterns_result if patterns_result and not isinstance(patterns_result, Exception) else None,
|
||||
'style_guidelines': style_guidelines,
|
||||
}
|
||||
|
||||
# Step 4: Store results based on task type
|
||||
if task_type == 'user_website':
|
||||
# Update existing WebsiteAnalysis record
|
||||
await self._update_user_website_analysis(
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
analysis_data=analysis_data,
|
||||
db=db
|
||||
)
|
||||
elif task_type == 'competitor':
|
||||
# Store in CompetitorAnalysis table
|
||||
await self._store_competitor_analysis(
|
||||
user_id=user_id,
|
||||
competitor_url=website_url,
|
||||
competitor_id=task.competitor_id,
|
||||
analysis_data=analysis_data,
|
||||
db=db
|
||||
)
|
||||
|
||||
self.logger.info(f"Website analysis completed successfully for {website_url}")
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=analysis_data,
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error performing website analysis: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
retryable=True
|
||||
)
|
||||
|
||||
async def _update_user_website_analysis(
|
||||
self,
|
||||
user_id: str,
|
||||
website_url: str,
|
||||
analysis_data: Dict[str, Any],
|
||||
db: Session
|
||||
):
|
||||
"""Update existing WebsiteAnalysis record for user's website."""
|
||||
try:
|
||||
# Convert Clerk user ID to integer (same as component_logic.py)
|
||||
# Use the same conversion logic as the website analysis API
|
||||
import hashlib
|
||||
user_id_int = int(hashlib.sha256(user_id.encode()).hexdigest()[:15], 16)
|
||||
|
||||
# Use WebsiteAnalysisService to update
|
||||
analysis_service = WebsiteAnalysisService(db)
|
||||
|
||||
# Prepare data in format expected by save_analysis
|
||||
response_data = {
|
||||
'crawl_result': analysis_data.get('crawl_result'),
|
||||
'style_analysis': analysis_data.get('style_analysis'),
|
||||
'style_patterns': analysis_data.get('style_patterns'),
|
||||
'style_guidelines': analysis_data.get('style_guidelines'),
|
||||
}
|
||||
|
||||
# Save/update analysis
|
||||
analysis_id = analysis_service.save_analysis(
|
||||
session_id=user_id_int,
|
||||
website_url=website_url,
|
||||
analysis_data=response_data
|
||||
)
|
||||
|
||||
if analysis_id:
|
||||
self.logger.info(f"Updated user website analysis for {website_url} (analysis_id: {analysis_id})")
|
||||
else:
|
||||
self.logger.warning(f"Failed to update user website analysis for {website_url}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error updating user website analysis: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def _store_competitor_analysis(
|
||||
self,
|
||||
user_id: str,
|
||||
competitor_url: str,
|
||||
competitor_id: Optional[str],
|
||||
analysis_data: Dict[str, Any],
|
||||
db: Session
|
||||
):
|
||||
"""Store competitor analysis in CompetitorAnalysis table."""
|
||||
try:
|
||||
# Get onboarding session for user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
raise ValueError(f"No onboarding session found for user {user_id}")
|
||||
|
||||
# Extract domain from URL
|
||||
parsed_url = urlparse(competitor_url)
|
||||
competitor_domain = parsed_url.netloc or competitor_id
|
||||
|
||||
# Check if analysis already exists for this competitor
|
||||
existing = db.query(CompetitorAnalysis).filter(
|
||||
CompetitorAnalysis.session_id == session.id,
|
||||
CompetitorAnalysis.competitor_url == competitor_url
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
# Update existing analysis
|
||||
existing.analysis_data = analysis_data
|
||||
existing.analysis_date = datetime.utcnow()
|
||||
existing.status = 'completed'
|
||||
existing.error_message = None
|
||||
existing.warning_message = None
|
||||
existing.updated_at = datetime.utcnow()
|
||||
self.logger.info(f"Updated competitor analysis for {competitor_url}")
|
||||
else:
|
||||
# Create new analysis
|
||||
competitor_analysis = CompetitorAnalysis(
|
||||
session_id=session.id,
|
||||
competitor_url=competitor_url,
|
||||
competitor_domain=competitor_domain,
|
||||
analysis_data=analysis_data,
|
||||
status='completed',
|
||||
analysis_date=datetime.utcnow()
|
||||
)
|
||||
db.add(competitor_analysis)
|
||||
self.logger.info(f"Created new competitor analysis for {competitor_url}")
|
||||
|
||||
db.commit()
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
self.logger.error(f"Error storing competitor analysis: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: WebsiteAnalysisTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None,
|
||||
custom_days: Optional[int] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency or custom days.
|
||||
|
||||
Args:
|
||||
task: WebsiteAnalysisTask instance
|
||||
frequency: Frequency string ('Custom' for website analysis)
|
||||
last_execution: Last execution datetime (defaults to task.last_check or now)
|
||||
custom_days: Custom number of days (from task.frequency_days)
|
||||
|
||||
Returns:
|
||||
Next execution datetime
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = task.last_check if task.last_check else datetime.utcnow()
|
||||
|
||||
# Use custom_days if provided, otherwise use task.frequency_days
|
||||
days = custom_days if custom_days is not None else task.frequency_days
|
||||
|
||||
if frequency == 'Custom' and days:
|
||||
return last_execution + timedelta(days=days)
|
||||
else:
|
||||
# Default to task's frequency_days
|
||||
self.logger.warning(
|
||||
f"Unknown frequency '{frequency}' for website analysis task {task.id}. "
|
||||
f"Using frequency_days={task.frequency_days}."
|
||||
)
|
||||
return last_execution + timedelta(days=task.frequency_days)
|
||||
|
||||
12
backend/services/scheduler/utils/__init__.py
Normal file
12
backend/services/scheduler/utils/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""
|
||||
Scheduler Utilities Package
|
||||
"""
|
||||
|
||||
from .task_loader import load_due_monitoring_tasks
|
||||
from .user_job_store import extract_domain_root, get_user_job_store_name
|
||||
|
||||
__all__ = [
|
||||
'load_due_monitoring_tasks',
|
||||
'extract_domain_root',
|
||||
'get_user_job_store_name'
|
||||
]
|
||||
33
backend/services/scheduler/utils/frequency_calculator.py
Normal file
33
backend/services/scheduler/utils/frequency_calculator.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""
|
||||
Frequency Calculator Utility
|
||||
Calculates next execution time based on frequency string.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def calculate_next_execution(frequency: str, base_time: Optional[datetime] = None) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
Args:
|
||||
frequency: Frequency string ('Daily', 'Weekly', 'Monthly', 'Quarterly')
|
||||
base_time: Base time to calculate from (defaults to now if None)
|
||||
|
||||
Returns:
|
||||
Next execution datetime
|
||||
"""
|
||||
if base_time is None:
|
||||
base_time = datetime.utcnow()
|
||||
|
||||
frequency_map = {
|
||||
'Daily': timedelta(days=1),
|
||||
'Weekly': timedelta(weeks=1),
|
||||
'Monthly': timedelta(days=30),
|
||||
'Quarterly': timedelta(days=90)
|
||||
}
|
||||
|
||||
delta = frequency_map.get(frequency, timedelta(days=1))
|
||||
return base_time + delta
|
||||
|
||||
54
backend/services/scheduler/utils/oauth_token_task_loader.py
Normal file
54
backend/services/scheduler/utils/oauth_token_task_loader.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""
|
||||
OAuth Token Monitoring Task Loader
|
||||
Functions to load due OAuth token monitoring tasks from database.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Union
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, or_
|
||||
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
|
||||
|
||||
def load_due_oauth_token_monitoring_tasks(
|
||||
db: Session,
|
||||
user_id: Optional[Union[str, int]] = None
|
||||
) -> List[OAuthTokenMonitoringTask]:
|
||||
"""
|
||||
Load all OAuth token monitoring tasks that are due for execution.
|
||||
|
||||
Criteria:
|
||||
- status == 'active' (only check active tasks)
|
||||
- next_check <= now (or is None for first execution)
|
||||
- Optional: user_id filter for specific user (for user isolation)
|
||||
|
||||
User isolation is enforced through filtering by user_id when provided.
|
||||
If no user_id is provided, loads tasks for all users (for system-wide monitoring).
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Optional user ID (Clerk string) to filter tasks (if None, loads all users' tasks)
|
||||
|
||||
Returns:
|
||||
List of due OAuthTokenMonitoringTask instances
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
|
||||
# Build query for due tasks
|
||||
query = db.query(OAuthTokenMonitoringTask).filter(
|
||||
and_(
|
||||
OAuthTokenMonitoringTask.status == 'active',
|
||||
or_(
|
||||
OAuthTokenMonitoringTask.next_check <= now,
|
||||
OAuthTokenMonitoringTask.next_check.is_(None)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Apply user filter if provided (for user isolation)
|
||||
if user_id is not None:
|
||||
query = query.filter(OAuthTokenMonitoringTask.user_id == str(user_id))
|
||||
|
||||
return query.all()
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
"""
|
||||
Platform Insights Task Loader
|
||||
Functions to load due platform insights tasks from database.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Union
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, or_
|
||||
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
|
||||
|
||||
def load_due_platform_insights_tasks(
|
||||
db: Session,
|
||||
user_id: Optional[Union[str, int]] = None,
|
||||
platform: Optional[str] = None
|
||||
) -> List[PlatformInsightsTask]:
|
||||
"""
|
||||
Load all platform insights tasks that are due for execution.
|
||||
|
||||
Criteria:
|
||||
- status == 'active' (only check active tasks)
|
||||
- next_check <= now (or is None for first execution)
|
||||
- Optional: user_id filter for specific user
|
||||
- Optional: platform filter ('gsc' or 'bing')
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Optional user ID (Clerk string) to filter tasks
|
||||
platform: Optional platform filter ('gsc' or 'bing')
|
||||
|
||||
Returns:
|
||||
List of due PlatformInsightsTask instances
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
|
||||
# Build query for due tasks
|
||||
query = db.query(PlatformInsightsTask).filter(
|
||||
and_(
|
||||
PlatformInsightsTask.status == 'active',
|
||||
or_(
|
||||
PlatformInsightsTask.next_check <= now,
|
||||
PlatformInsightsTask.next_check.is_(None)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Apply user filter if provided
|
||||
if user_id is not None:
|
||||
query = query.filter(PlatformInsightsTask.user_id == str(user_id))
|
||||
|
||||
# Apply platform filter if provided
|
||||
if platform is not None:
|
||||
query = query.filter(PlatformInsightsTask.platform == platform)
|
||||
|
||||
tasks = query.all()
|
||||
|
||||
return tasks
|
||||
|
||||
63
backend/services/scheduler/utils/task_loader.py
Normal file
63
backend/services/scheduler/utils/task_loader.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""
|
||||
Task Loader Utilities
|
||||
Functions to load due tasks from database.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Union
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
from sqlalchemy import and_, or_
|
||||
|
||||
from models.monitoring_models import MonitoringTask
|
||||
from models.enhanced_strategy_models import EnhancedContentStrategy
|
||||
|
||||
|
||||
def load_due_monitoring_tasks(
|
||||
db: Session,
|
||||
user_id: Optional[Union[str, int]] = None
|
||||
) -> List[MonitoringTask]:
|
||||
"""
|
||||
Load all monitoring tasks that are due for execution.
|
||||
|
||||
Criteria:
|
||||
- status == 'active'
|
||||
- next_execution <= now (or is None for first execution)
|
||||
- Optional: user_id filter for specific user (for user isolation)
|
||||
|
||||
Note: Strategy relationship is eagerly loaded to ensure user_id is accessible
|
||||
during task execution for user isolation.
|
||||
|
||||
User isolation is enforced through filtering by user_id when provided.
|
||||
If no user_id is provided, loads tasks for all users (for system-wide monitoring).
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Optional user ID (Clerk string or int) to filter tasks (if None, loads all users' tasks)
|
||||
|
||||
Returns:
|
||||
List of due MonitoringTask instances with strategy relationship loaded
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
|
||||
# Join with strategy to ensure relationship is loaded and support user filtering
|
||||
query = db.query(MonitoringTask).join(
|
||||
EnhancedContentStrategy,
|
||||
MonitoringTask.strategy_id == EnhancedContentStrategy.id
|
||||
).options(
|
||||
joinedload(MonitoringTask.strategy) # Eagerly load strategy relationship
|
||||
).filter(
|
||||
and_(
|
||||
MonitoringTask.status == 'active',
|
||||
or_(
|
||||
MonitoringTask.next_execution <= now,
|
||||
MonitoringTask.next_execution.is_(None)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Apply user filter if provided
|
||||
if user_id is not None:
|
||||
query = query.filter(EnhancedContentStrategy.user_id == user_id)
|
||||
|
||||
return query.all()
|
||||
|
||||
129
backend/services/scheduler/utils/user_job_store.py
Normal file
129
backend/services/scheduler/utils/user_job_store.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
User Job Store Utilities
|
||||
Utilities for managing per-user job stores based on website root.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session as SQLSession
|
||||
|
||||
from services.database import get_db_session
|
||||
from models.onboarding import OnboardingSession, WebsiteAnalysis
|
||||
|
||||
|
||||
def extract_domain_root(url: str) -> str:
|
||||
"""
|
||||
Extract domain root from a website URL for use as job store identifier.
|
||||
|
||||
Examples:
|
||||
https://www.example.com -> example
|
||||
https://blog.example.com -> example
|
||||
https://example.co.uk -> example
|
||||
http://subdomain.example.com/path -> example
|
||||
|
||||
Args:
|
||||
url: Website URL
|
||||
|
||||
Returns:
|
||||
Domain root (e.g., 'example') or 'default' if extraction fails
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.netloc or parsed.path.split('/')[0]
|
||||
|
||||
# Remove www. prefix if present
|
||||
if hostname.startswith('www.'):
|
||||
hostname = hostname[4:]
|
||||
|
||||
# Split by dots and get the root domain
|
||||
# For example.com -> example, for example.co.uk -> example
|
||||
parts = hostname.split('.')
|
||||
if len(parts) >= 2:
|
||||
# Handle common TLDs that might be part of domain (e.g., co.uk)
|
||||
if len(parts) >= 3 and parts[-2] in ['co', 'com', 'net', 'org']:
|
||||
root = parts[-3]
|
||||
else:
|
||||
root = parts[-2]
|
||||
else:
|
||||
root = parts[0] if parts else 'default'
|
||||
|
||||
# Clean and validate root
|
||||
root = root.lower().strip()
|
||||
# Remove invalid characters for job store name
|
||||
root = ''.join(c for c in root if c.isalnum() or c in ['-', '_'])
|
||||
|
||||
if not root or len(root) < 2:
|
||||
return 'default'
|
||||
|
||||
return root
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract domain root from URL '{url}': {e}")
|
||||
return 'default'
|
||||
|
||||
|
||||
def get_user_job_store_name(user_id: str, db: SQLSession = None) -> str:
|
||||
"""
|
||||
Get job store name for a user based on their website root from onboarding.
|
||||
|
||||
Args:
|
||||
user_id: User ID (Clerk string)
|
||||
db: Optional database session (will create if not provided)
|
||||
|
||||
Returns:
|
||||
Job store name (e.g., 'example' or 'default')
|
||||
"""
|
||||
db_session = db
|
||||
close_db = False
|
||||
|
||||
try:
|
||||
if not db_session:
|
||||
db_session = get_db_session()
|
||||
close_db = True
|
||||
|
||||
if not db_session:
|
||||
logger.warning(f"Could not get database session for user {user_id}, using default job store")
|
||||
return 'default'
|
||||
|
||||
# Get user's website URL from onboarding
|
||||
# Query directly since user_id is a string (Clerk ID)
|
||||
onboarding_session = db_session.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not onboarding_session:
|
||||
logger.debug(
|
||||
f"[Job Store] No onboarding session found for user {user_id}, using default job store. "
|
||||
f"This is normal if user hasn't completed onboarding."
|
||||
)
|
||||
return 'default'
|
||||
|
||||
# Get the latest website analysis for this session
|
||||
website_analysis = db_session.query(WebsiteAnalysis).filter(
|
||||
WebsiteAnalysis.session_id == onboarding_session.id
|
||||
).order_by(WebsiteAnalysis.updated_at.desc()).first()
|
||||
|
||||
if not website_analysis or not website_analysis.website_url:
|
||||
logger.debug(
|
||||
f"[Job Store] No website URL found for user {user_id} (session_id: {onboarding_session.id}), "
|
||||
f"using default job store. This is normal if website analysis wasn't completed."
|
||||
)
|
||||
return 'default'
|
||||
|
||||
website_url = website_analysis.website_url
|
||||
domain_root = extract_domain_root(website_url)
|
||||
|
||||
logger.debug(f"Job store for user {user_id}: {domain_root} (from {website_url})")
|
||||
return domain_root
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting job store name for user {user_id}: {e}")
|
||||
return 'default'
|
||||
finally:
|
||||
if close_db and db_session:
|
||||
try:
|
||||
db_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
"""
|
||||
Website Analysis Task Loader
|
||||
Functions to load due website analysis tasks from database.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Union
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, or_
|
||||
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
|
||||
|
||||
def load_due_website_analysis_tasks(
|
||||
db: Session,
|
||||
user_id: Optional[Union[str, int]] = None
|
||||
) -> List[WebsiteAnalysisTask]:
|
||||
"""
|
||||
Load all website analysis tasks that are due for execution.
|
||||
|
||||
Criteria:
|
||||
- status == 'active' (only check active tasks)
|
||||
- next_check <= now (or is None for first execution)
|
||||
- Optional: user_id filter for specific user (for user isolation)
|
||||
|
||||
User isolation is enforced through filtering by user_id when provided.
|
||||
If no user_id is provided, loads tasks for all users (for system-wide monitoring).
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Optional user ID (Clerk string) to filter tasks (if None, loads all users' tasks)
|
||||
|
||||
Returns:
|
||||
List of due WebsiteAnalysisTask instances
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
|
||||
# Build query for due tasks
|
||||
query = db.query(WebsiteAnalysisTask).filter(
|
||||
and_(
|
||||
WebsiteAnalysisTask.status == 'active',
|
||||
or_(
|
||||
WebsiteAnalysisTask.next_check <= now,
|
||||
WebsiteAnalysisTask.next_check.is_(None)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Apply user filter if provided (for user isolation)
|
||||
if user_id is not None:
|
||||
query = query.filter(WebsiteAnalysisTask.user_id == str(user_id))
|
||||
|
||||
return query.all()
|
||||
|
||||
Reference in New Issue
Block a user