Base code
This commit is contained in:
4
backend/services/scheduler/executors/__init__.py
Normal file
4
backend/services/scheduler/executors/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
"""
|
||||
Task executor implementations.
|
||||
"""
|
||||
|
||||
354
backend/services/scheduler/executors/bing_insights_executor.py
Normal file
354
backend/services/scheduler/executors/bing_insights_executor.py
Normal file
@@ -0,0 +1,354 @@
|
||||
"""
|
||||
Bing Insights Task Executor
|
||||
Handles execution of Bing insights fetch tasks for connected platforms.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
|
||||
from services.bing_analytics_storage_service import BingAnalyticsStorageService
|
||||
from services.integrations.bing_oauth import BingOAuthService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("bing_insights_executor")
|
||||
|
||||
|
||||
class BingInsightsExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for Bing insights fetch tasks.
|
||||
|
||||
Handles:
|
||||
- Fetching Bing insights data weekly
|
||||
- On first run: Loads existing cached data
|
||||
- On subsequent runs: Fetches fresh data from Bing API
|
||||
- Logging results and updating task status
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
database_url = os.getenv('DATABASE_URL', 'sqlite:///alwrity.db')
|
||||
self.storage_service = BingAnalyticsStorageService(database_url)
|
||||
self.bing_oauth = BingOAuthService()
|
||||
|
||||
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a Bing insights fetch task.
|
||||
|
||||
Args:
|
||||
task: PlatformInsightsTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing Bing insights fetch: task_id={task.id} | "
|
||||
f"user_id={user_id} | site_url={site_url}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = PlatformInsightsExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Fetch insights
|
||||
result = await self._fetch_insights(task, db)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
execution_log.data_source = result.result_data.get('data_source') if result.success else None
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Reset failure tracking on success
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
# Schedule next check (7 days from now)
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Weekly',
|
||||
last_execution=task.last_check
|
||||
)
|
||||
else:
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "bing_insights", task.user_id
|
||||
)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
# Clear next_check - task won't run automatically
|
||||
task.next_check = None
|
||||
|
||||
self.logger.warning(
|
||||
f"Task {task.id} marked for human intervention: "
|
||||
f"{pattern.consecutive_failures} consecutive failures, "
|
||||
f"reason: {pattern.failure_reason.value}"
|
||||
)
|
||||
else:
|
||||
# Normal failure handling
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
# Schedule retry in 1 day
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
error_result = self.exception_handler.handle_task_execution_error(
|
||||
task=task,
|
||||
error=e,
|
||||
execution_time_ms=execution_time_ms,
|
||||
context="Bing insights fetch"
|
||||
)
|
||||
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "bing_insights", task.user_id
|
||||
)
|
||||
|
||||
# Update task
|
||||
task.last_check = datetime.utcnow()
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
task.next_check = None
|
||||
else:
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return error_result
|
||||
|
||||
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Fetch Bing insights data.
|
||||
|
||||
On first run (no last_success), loads cached data.
|
||||
On subsequent runs, fetches fresh data from API.
|
||||
"""
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
# Check if this is first run (no previous success)
|
||||
is_first_run = task.last_success is None
|
||||
|
||||
if is_first_run:
|
||||
# First run: Try to load from cache
|
||||
self.logger.info(f"First run for Bing insights task {task.id} - loading cached data")
|
||||
cached_data = self._load_cached_data(user_id, site_url)
|
||||
|
||||
if cached_data:
|
||||
self.logger.info(f"Loaded cached Bing data for user {user_id}")
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'cached',
|
||||
'insights': cached_data,
|
||||
'message': 'Loaded from cached data (first run)'
|
||||
}
|
||||
)
|
||||
else:
|
||||
# No cached data - try to fetch from API
|
||||
self.logger.info(f"No cached data found, fetching from Bing API")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
else:
|
||||
# Subsequent run: Always fetch fresh data
|
||||
self.logger.info(f"Subsequent run for Bing insights task {task.id} - fetching fresh data")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching Bing insights for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Failed to fetch Bing insights: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Load most recent cached Bing data from database."""
|
||||
try:
|
||||
# Get analytics summary from storage service
|
||||
summary = self.storage_service.get_analytics_summary(
|
||||
user_id=user_id,
|
||||
site_url=site_url or '',
|
||||
days=30
|
||||
)
|
||||
|
||||
if summary and isinstance(summary, dict):
|
||||
self.logger.info(f"Found cached Bing data for user {user_id}")
|
||||
return summary
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error loading cached Bing data: {e}")
|
||||
return None
|
||||
|
||||
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
|
||||
"""Fetch fresh Bing insights from API."""
|
||||
try:
|
||||
# Check if user has active tokens
|
||||
token_status = self.bing_oauth.get_user_token_status(user_id)
|
||||
|
||||
if not token_status.get('has_active_tokens'):
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Bing Webmaster tokens not available or expired",
|
||||
result_data={'error': 'No active tokens'}
|
||||
)
|
||||
|
||||
# Get user's sites
|
||||
sites = self.bing_oauth.get_user_sites(user_id)
|
||||
|
||||
if not sites:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No Bing Webmaster sites found",
|
||||
result_data={'error': 'No sites found'}
|
||||
)
|
||||
|
||||
# Use provided site_url or first site
|
||||
if not site_url:
|
||||
site_url = sites[0].get('Url', '') if isinstance(sites[0], dict) else sites[0]
|
||||
|
||||
# Get active token
|
||||
active_tokens = token_status.get('active_tokens', [])
|
||||
if not active_tokens:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No active Bing Webmaster tokens",
|
||||
result_data={'error': 'No tokens'}
|
||||
)
|
||||
|
||||
# For now, use stored analytics data (Bing API integration can be added later)
|
||||
# This ensures we have data available even if the API class doesn't exist yet
|
||||
summary = self.storage_service.get_analytics_summary(user_id, site_url, days=30)
|
||||
|
||||
if summary and isinstance(summary, dict):
|
||||
# Format insights data from stored analytics
|
||||
insights_data = {
|
||||
'site_url': site_url,
|
||||
'date_range': {
|
||||
'start': (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'),
|
||||
'end': datetime.now().strftime('%Y-%m-%d')
|
||||
},
|
||||
'summary': summary.get('summary', {}),
|
||||
'fetched_at': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
self.logger.info(
|
||||
f"Successfully loaded Bing insights from storage for user {user_id}, site {site_url}"
|
||||
)
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'storage',
|
||||
'insights': insights_data,
|
||||
'message': 'Loaded from stored analytics data'
|
||||
}
|
||||
)
|
||||
else:
|
||||
# No stored data available
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No Bing analytics data available. Data will be collected during next onboarding refresh.",
|
||||
result_data={'error': 'No stored data available'}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching fresh Bing data: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"API fetch failed: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: PlatformInsightsTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
For platform insights, frequency is always 'Weekly' (7 days).
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = datetime.utcnow()
|
||||
|
||||
if frequency == 'Weekly':
|
||||
return last_execution + timedelta(days=7)
|
||||
elif frequency == 'Daily':
|
||||
return last_execution + timedelta(days=1)
|
||||
else:
|
||||
# Default to weekly
|
||||
return last_execution + timedelta(days=7)
|
||||
|
||||
363
backend/services/scheduler/executors/gsc_insights_executor.py
Normal file
363
backend/services/scheduler/executors/gsc_insights_executor.py
Normal file
@@ -0,0 +1,363 @@
|
||||
"""
|
||||
GSC Insights Task Executor
|
||||
Handles execution of GSC insights fetch tasks for connected platforms.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
import sqlite3
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
|
||||
from services.gsc_service import GSCService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("gsc_insights_executor")
|
||||
|
||||
|
||||
class GSCInsightsExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for GSC insights fetch tasks.
|
||||
|
||||
Handles:
|
||||
- Fetching GSC insights data weekly
|
||||
- On first run: Loads existing cached data
|
||||
- On subsequent runs: Fetches fresh data from GSC API
|
||||
- Logging results and updating task status
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
self.gsc_service = GSCService()
|
||||
|
||||
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a GSC insights fetch task.
|
||||
|
||||
Args:
|
||||
task: PlatformInsightsTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing GSC insights fetch: task_id={task.id} | "
|
||||
f"user_id={user_id} | site_url={site_url}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = PlatformInsightsExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Fetch insights
|
||||
result = await self._fetch_insights(task, db)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
execution_log.data_source = result.result_data.get('data_source') if result.success else None
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Reset failure tracking on success
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
# Schedule next check (7 days from now)
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Weekly',
|
||||
last_execution=task.last_check
|
||||
)
|
||||
else:
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "gsc_insights", task.user_id
|
||||
)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
# Clear next_check - task won't run automatically
|
||||
task.next_check = None
|
||||
|
||||
self.logger.warning(
|
||||
f"Task {task.id} marked for human intervention: "
|
||||
f"{pattern.consecutive_failures} consecutive failures, "
|
||||
f"reason: {pattern.failure_reason.value}"
|
||||
)
|
||||
else:
|
||||
# Normal failure handling
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
# Schedule retry in 1 day
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
error_result = self.exception_handler.handle_task_execution_error(
|
||||
task=task,
|
||||
error=e,
|
||||
execution_time_ms=execution_time_ms,
|
||||
context="GSC insights fetch"
|
||||
)
|
||||
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "gsc_insights", task.user_id
|
||||
)
|
||||
|
||||
# Update task
|
||||
task.last_check = datetime.utcnow()
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
task.next_check = None
|
||||
else:
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
task.next_check = datetime.utcnow() + timedelta(days=1)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return error_result
|
||||
|
||||
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Fetch GSC insights data.
|
||||
|
||||
On first run (no last_success), loads cached data.
|
||||
On subsequent runs, fetches fresh data from API.
|
||||
"""
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
try:
|
||||
# Check if this is first run (no previous success)
|
||||
is_first_run = task.last_success is None
|
||||
|
||||
if is_first_run:
|
||||
# First run: Try to load from cache
|
||||
self.logger.info(f"First run for GSC insights task {task.id} - loading cached data")
|
||||
cached_data = self._load_cached_data(user_id, site_url)
|
||||
|
||||
if cached_data:
|
||||
self.logger.info(f"Loaded cached GSC data for user {user_id}")
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'cached',
|
||||
'insights': cached_data,
|
||||
'message': 'Loaded from cached data (first run)'
|
||||
}
|
||||
)
|
||||
else:
|
||||
# No cached data - try to fetch from API
|
||||
self.logger.info(f"No cached data found, fetching from GSC API")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
else:
|
||||
# Subsequent run: Always fetch fresh data
|
||||
self.logger.info(f"Subsequent run for GSC insights task {task.id} - fetching fresh data")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching GSC insights for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Failed to fetch GSC insights: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Load most recent cached GSC data from database."""
|
||||
try:
|
||||
db_path = self.gsc_service.db_path
|
||||
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Find most recent cached data
|
||||
if site_url:
|
||||
cursor.execute('''
|
||||
SELECT data_json, created_at
|
||||
FROM gsc_data_cache
|
||||
WHERE user_id = ? AND site_url = ? AND data_type = 'analytics'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
''', (user_id, site_url))
|
||||
else:
|
||||
cursor.execute('''
|
||||
SELECT data_json, created_at
|
||||
FROM gsc_data_cache
|
||||
WHERE user_id = ? AND data_type = 'analytics'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
''', (user_id,))
|
||||
|
||||
result = cursor.fetchone()
|
||||
|
||||
if result:
|
||||
data_json, created_at = result
|
||||
insights_data = json.loads(data_json) if isinstance(data_json, str) else data_json
|
||||
|
||||
self.logger.info(
|
||||
f"Found cached GSC data from {created_at} for user {user_id}"
|
||||
)
|
||||
|
||||
return insights_data
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error loading cached GSC data: {e}")
|
||||
return None
|
||||
|
||||
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
|
||||
"""Fetch fresh GSC insights from API."""
|
||||
try:
|
||||
# If no site_url, get first site
|
||||
if not site_url:
|
||||
sites = self.gsc_service.get_site_list(user_id)
|
||||
if not sites:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No GSC sites found for user",
|
||||
result_data={'error': 'No sites found'}
|
||||
)
|
||||
site_url = sites[0]['siteUrl']
|
||||
|
||||
# Get analytics for last 30 days
|
||||
end_date = datetime.now().strftime('%Y-%m-%d')
|
||||
start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
|
||||
|
||||
# Fetch search analytics
|
||||
search_analytics = self.gsc_service.get_search_analytics(
|
||||
user_id=user_id,
|
||||
site_url=site_url,
|
||||
start_date=start_date,
|
||||
end_date=end_date
|
||||
)
|
||||
|
||||
if 'error' in search_analytics:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=search_analytics.get('error', 'Unknown error'),
|
||||
result_data=search_analytics
|
||||
)
|
||||
|
||||
# Format insights data
|
||||
insights_data = {
|
||||
'site_url': site_url,
|
||||
'date_range': {
|
||||
'start': start_date,
|
||||
'end': end_date
|
||||
},
|
||||
'overall_metrics': search_analytics.get('overall_metrics', {}),
|
||||
'query_data': search_analytics.get('query_data', {}),
|
||||
'fetched_at': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
self.logger.info(
|
||||
f"Successfully fetched GSC insights for user {user_id}, site {site_url}"
|
||||
)
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'data_source': 'api',
|
||||
'insights': insights_data,
|
||||
'message': 'Fetched fresh data from GSC API'
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching fresh GSC data: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"API fetch failed: {str(e)}",
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: PlatformInsightsTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
For platform insights, frequency is always 'Weekly' (7 days).
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = datetime.utcnow()
|
||||
|
||||
if frequency == 'Weekly':
|
||||
return last_execution + timedelta(days=7)
|
||||
elif frequency == 'Daily':
|
||||
return last_execution + timedelta(days=1)
|
||||
else:
|
||||
# Default to weekly
|
||||
return last_execution + timedelta(days=7)
|
||||
|
||||
266
backend/services/scheduler/executors/monitoring_task_executor.py
Normal file
266
backend/services/scheduler/executors/monitoring_task_executor.py
Normal file
@@ -0,0 +1,266 @@
|
||||
"""
|
||||
Monitoring Task Executor
|
||||
Handles execution of content strategy monitoring tasks.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from ..utils.frequency_calculator import calculate_next_execution
|
||||
from models.monitoring_models import MonitoringTask, TaskExecutionLog
|
||||
from models.enhanced_strategy_models import EnhancedContentStrategy
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("monitoring_task_executor")
|
||||
|
||||
|
||||
class MonitoringTaskExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for content strategy monitoring tasks.
|
||||
|
||||
Handles:
|
||||
- ALwrity tasks (automated execution)
|
||||
- Human tasks (notifications/queuing)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
|
||||
async def execute_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a monitoring task with user isolation.
|
||||
|
||||
Args:
|
||||
task: MonitoringTask instance (with strategy relationship loaded)
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Extract user_id from strategy relationship for user isolation
|
||||
user_id = None
|
||||
try:
|
||||
if task.strategy and hasattr(task.strategy, 'user_id'):
|
||||
user_id = task.strategy.user_id
|
||||
elif task.strategy_id:
|
||||
# Fallback: query strategy if relationship not loaded
|
||||
strategy = db.query(EnhancedContentStrategy).filter(
|
||||
EnhancedContentStrategy.id == task.strategy_id
|
||||
).first()
|
||||
if strategy:
|
||||
user_id = strategy.user_id
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not extract user_id for task {task.id}: {e}")
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing monitoring task: {task.id} | "
|
||||
f"user_id: {user_id} | "
|
||||
f"assignee: {task.assignee} | "
|
||||
f"frequency: {task.frequency}"
|
||||
)
|
||||
|
||||
# Create execution log with user_id for user isolation tracking
|
||||
execution_log = TaskExecutionLog(
|
||||
task_id=task.id,
|
||||
user_id=user_id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Execute based on assignee
|
||||
if task.assignee == 'ALwrity':
|
||||
result = await self._execute_alwrity_task(task, db)
|
||||
else:
|
||||
result = await self._execute_human_task(task, db)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
|
||||
# Update task
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.next_execution = self.calculate_next_execution(
|
||||
task,
|
||||
task.frequency,
|
||||
task.last_executed
|
||||
)
|
||||
|
||||
if result.success:
|
||||
task.status = 'completed'
|
||||
else:
|
||||
task.status = 'failed'
|
||||
|
||||
db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
# Create structured error
|
||||
error = TaskExecutionError(
|
||||
message=f"Error executing monitoring task {task.id}: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
task_type="monitoring_task",
|
||||
execution_time_ms=execution_time_ms,
|
||||
context={
|
||||
"assignee": task.assignee,
|
||||
"frequency": task.frequency,
|
||||
"component": task.component_name
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
|
||||
# Handle exception with structured logging
|
||||
self.exception_handler.handle_exception(error)
|
||||
|
||||
# Update execution log with error (include user_id for isolation)
|
||||
try:
|
||||
execution_log = TaskExecutionLog(
|
||||
task_id=task.id,
|
||||
user_id=user_id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='failed',
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
result_data={
|
||||
"error_type": error.error_type.value,
|
||||
"severity": error.severity.value,
|
||||
"context": error.context
|
||||
}
|
||||
)
|
||||
db.add(execution_log)
|
||||
|
||||
task.status = 'failed'
|
||||
task.last_executed = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
db_error = DatabaseError(
|
||||
message=f"Error saving execution log: {str(commit_error)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
original_error=commit_error
|
||||
)
|
||||
self.exception_handler.handle_exception(db_error)
|
||||
db.rollback()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
retryable=True,
|
||||
retry_delay=300
|
||||
)
|
||||
|
||||
async def _execute_alwrity_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute an ALwrity (automated) monitoring task.
|
||||
|
||||
This is where the actual monitoring logic would go.
|
||||
For now, we'll implement a placeholder that can be extended.
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Executing ALwrity task: {task.task_title}")
|
||||
|
||||
# TODO: Implement actual monitoring logic based on:
|
||||
# - task.metric
|
||||
# - task.measurement_method
|
||||
# - task.success_criteria
|
||||
# - task.alert_threshold
|
||||
|
||||
# Placeholder: Simulate task execution
|
||||
result_data = {
|
||||
'metric_value': 0,
|
||||
'status': 'measured',
|
||||
'message': f"Task {task.task_title} executed successfully",
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=result_data
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in ALwrity task execution: {e}")
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
retryable=True
|
||||
)
|
||||
|
||||
async def _execute_human_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a Human monitoring task (notification/queuing).
|
||||
|
||||
For human tasks, we don't execute the task directly,
|
||||
but rather queue it for human review or send notifications.
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Queuing human task: {task.task_title}")
|
||||
|
||||
# TODO: Implement notification/queuing system:
|
||||
# - Send email notification
|
||||
# - Add to user's task queue
|
||||
# - Create in-app notification
|
||||
|
||||
result_data = {
|
||||
'status': 'queued',
|
||||
'message': f"Task {task.task_title} queued for human review",
|
||||
'timestamp': datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=result_data
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error queuing human task: {e}")
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
retryable=True
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: MonitoringTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
Args:
|
||||
task: MonitoringTask instance
|
||||
frequency: Frequency string (Daily, Weekly, Monthly, Quarterly)
|
||||
last_execution: Last execution datetime (defaults to now)
|
||||
|
||||
Returns:
|
||||
Next execution datetime
|
||||
"""
|
||||
return calculate_next_execution(
|
||||
frequency=frequency,
|
||||
base_time=last_execution or datetime.utcnow()
|
||||
)
|
||||
|
||||
@@ -0,0 +1,789 @@
|
||||
"""
|
||||
OAuth Token Monitoring Task Executor
|
||||
Handles execution of OAuth token monitoring tasks for connected platforms.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask, OAuthTokenExecutionLog
|
||||
from models.subscription_models import UsageAlert
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
# Import platform-specific services
|
||||
from services.gsc_service import GSCService
|
||||
from services.integrations.bing_oauth import BingOAuthService
|
||||
from services.integrations.wordpress_oauth import WordPressOAuthService
|
||||
from services.wix_service import WixService
|
||||
|
||||
logger = get_service_logger("oauth_token_monitoring_executor")
|
||||
|
||||
|
||||
class OAuthTokenMonitoringExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for OAuth token monitoring tasks.
|
||||
|
||||
Handles:
|
||||
- Checking token validity and expiration
|
||||
- Attempting automatic token refresh
|
||||
- Logging results and updating task status
|
||||
- One-time refresh attempt (no automatic retries on failure)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
# Expiration warning window (7 days before expiration)
|
||||
self.expiration_warning_days = 7
|
||||
|
||||
async def execute_task(self, task: OAuthTokenMonitoringTask, db: Session) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute an OAuth token monitoring task.
|
||||
|
||||
This checks token status and attempts refresh if needed.
|
||||
If refresh fails, marks task as failed and does not retry automatically.
|
||||
|
||||
Args:
|
||||
task: OAuthTokenMonitoringTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
platform = task.platform
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing OAuth token monitoring: task_id={task.id} | "
|
||||
f"user_id={user_id} | platform={platform}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = OAuthTokenExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Check and refresh token
|
||||
result = await self._check_and_refresh_token(task, db)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Reset failure tracking on success
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
# Schedule next check (7 days from now)
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Weekly',
|
||||
last_execution=task.last_check
|
||||
)
|
||||
else:
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "oauth_token_monitoring", task.user_id
|
||||
)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
# Clear next_check - task won't run automatically
|
||||
task.next_check = None
|
||||
|
||||
self.logger.warning(
|
||||
f"Task {task.id} marked for human intervention: "
|
||||
f"{pattern.consecutive_failures} consecutive failures, "
|
||||
f"reason: {pattern.failure_reason.value}"
|
||||
)
|
||||
else:
|
||||
# Normal failure handling
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
# Do NOT update next_check - wait for manual trigger
|
||||
|
||||
self.logger.warning(
|
||||
f"OAuth token refresh failed for user {user_id}, platform {platform}. "
|
||||
f"{'Task marked for human intervention' if pattern and pattern.should_cool_off else 'Task marked as failed. No automatic retry will be scheduled.'}"
|
||||
)
|
||||
|
||||
# Create UsageAlert notification for the user
|
||||
self._create_failure_alert(user_id, platform, result.error_message, result.result_data, db)
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
# Create structured error
|
||||
error = TaskExecutionError(
|
||||
message=f"Error executing OAuth token monitoring task {task.id}: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
task_type="oauth_token_monitoring",
|
||||
execution_time_ms=execution_time_ms,
|
||||
context={
|
||||
"platform": platform,
|
||||
"user_id": user_id
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
|
||||
# Handle exception with structured logging
|
||||
self.exception_handler.handle_exception(error)
|
||||
|
||||
# Update execution log with error
|
||||
try:
|
||||
execution_log = OAuthTokenExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='failed',
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
result_data={
|
||||
"error_type": error.error_type.value,
|
||||
"severity": error.severity.value,
|
||||
"context": error.context
|
||||
}
|
||||
)
|
||||
db.add(execution_log)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.status = 'failed'
|
||||
task.last_check = datetime.utcnow()
|
||||
task.updated_at = datetime.utcnow()
|
||||
# Do NOT update next_check - wait for manual trigger
|
||||
|
||||
# Create UsageAlert notification for the user
|
||||
self._create_failure_alert(user_id, task.platform, str(e), None, db)
|
||||
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
db_error = DatabaseError(
|
||||
message=f"Error saving execution log: {str(commit_error)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
original_error=commit_error
|
||||
)
|
||||
self.exception_handler.handle_exception(db_error)
|
||||
db.rollback()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
retryable=False, # Do not retry automatically
|
||||
retry_delay=0
|
||||
)
|
||||
|
||||
async def _check_and_refresh_token(
|
||||
self,
|
||||
task: OAuthTokenMonitoringTask,
|
||||
db: Session
|
||||
) -> TaskExecutionResult:
|
||||
"""
|
||||
Check token status and attempt refresh if needed.
|
||||
|
||||
Tokens are stored in the database from onboarding step 5:
|
||||
- GSC: gsc_credentials table (via GSCService)
|
||||
- Bing: bing_oauth_tokens table (via BingOAuthService)
|
||||
- WordPress: wordpress_oauth_tokens table (via WordPressOAuthService)
|
||||
- Wix: wix_oauth_tokens table (via WixOAuthService)
|
||||
|
||||
Args:
|
||||
task: OAuthTokenMonitoringTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult with success status and details
|
||||
"""
|
||||
platform = task.platform
|
||||
user_id = task.user_id
|
||||
|
||||
try:
|
||||
self.logger.info(f"Checking token for platform: {platform}, user: {user_id}")
|
||||
|
||||
# Route to platform-specific checking logic
|
||||
if platform == 'gsc':
|
||||
return await self._check_gsc_token(user_id)
|
||||
elif platform == 'bing':
|
||||
return await self._check_bing_token(user_id)
|
||||
elif platform == 'wordpress':
|
||||
return await self._check_wordpress_token(user_id)
|
||||
elif platform == 'wix':
|
||||
return await self._check_wix_token(user_id)
|
||||
else:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Unsupported platform: {platform}",
|
||||
result_data={
|
||||
'platform': platform,
|
||||
'user_id': user_id,
|
||||
'error': 'Unsupported platform'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f"Error checking/refreshing token for platform {platform}, user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': platform,
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False # Do not retry automatically
|
||||
)
|
||||
|
||||
async def _check_gsc_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check and refresh GSC (Google Search Console) token.
|
||||
|
||||
GSC service auto-refreshes tokens if expired when loading credentials.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
gsc_service = GSCService(db_path=db_path)
|
||||
credentials = gsc_service.load_user_credentials(user_id)
|
||||
|
||||
if not credentials:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="GSC credentials not found or could not be loaded",
|
||||
result_data={
|
||||
'platform': 'gsc',
|
||||
'user_id': user_id,
|
||||
'status': 'not_found',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
# GSC service auto-refreshes if expired, so if we get here, token is valid
|
||||
result_data = {
|
||||
'platform': 'gsc',
|
||||
'user_id': user_id,
|
||||
'status': 'valid',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'GSC token is valid (auto-refreshed if expired)'
|
||||
}
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=result_data
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking GSC token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"GSC token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'gsc',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
async def _check_bing_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check and refresh Bing Webmaster Tools token.
|
||||
|
||||
Checks token expiration and attempts refresh if needed.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
bing_service = BingOAuthService(db_path=db_path)
|
||||
|
||||
# Get token status (includes expired tokens)
|
||||
token_status = bing_service.get_user_token_status(user_id)
|
||||
|
||||
if not token_status.get('has_tokens'):
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No Bing tokens found for user",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'not_found',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
active_tokens = token_status.get('active_tokens', [])
|
||||
expired_tokens = token_status.get('expired_tokens', [])
|
||||
|
||||
# If we have active tokens, check if any are expiring soon (< 7 days)
|
||||
if active_tokens:
|
||||
now = datetime.utcnow()
|
||||
needs_refresh = False
|
||||
token_to_refresh = None
|
||||
|
||||
for token in active_tokens:
|
||||
expires_at_str = token.get('expires_at')
|
||||
if expires_at_str:
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
|
||||
# Check if expires within warning window (7 days)
|
||||
days_until_expiry = (expires_at - now).days
|
||||
if days_until_expiry < self.expiration_warning_days:
|
||||
needs_refresh = True
|
||||
token_to_refresh = token
|
||||
break
|
||||
except Exception:
|
||||
# If parsing fails, assume token is valid
|
||||
pass
|
||||
|
||||
if needs_refresh and token_to_refresh:
|
||||
# Attempt to refresh
|
||||
refresh_token = token_to_refresh.get('refresh_token')
|
||||
if refresh_token:
|
||||
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
|
||||
if refresh_result:
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'refreshed',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token refreshed successfully'
|
||||
}
|
||||
)
|
||||
else:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Failed to refresh Bing token",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'refresh_failed',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
# Token is valid and not expiring soon
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'valid',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token is valid'
|
||||
}
|
||||
)
|
||||
|
||||
# No active tokens, check if we can refresh expired ones
|
||||
if expired_tokens:
|
||||
# Try to refresh the most recent expired token
|
||||
latest_token = expired_tokens[0] # Already sorted by created_at DESC
|
||||
refresh_token = latest_token.get('refresh_token')
|
||||
|
||||
if refresh_token:
|
||||
# Check if token expired recently (within grace period)
|
||||
expires_at_str = latest_token.get('expires_at')
|
||||
if expires_at_str:
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
|
||||
# Only refresh if expired within last 24 hours (grace period)
|
||||
hours_since_expiry = (datetime.utcnow() - expires_at).total_seconds() / 3600
|
||||
if hours_since_expiry < 24:
|
||||
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
|
||||
if refresh_result:
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'refreshed',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token refreshed from expired state'
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Bing token expired and could not be refreshed",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'expired',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Bing token expired. User needs to reconnect.'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No valid Bing tokens found",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'status': 'invalid',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking Bing token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Bing token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'bing',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
async def _check_wordpress_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check WordPress token validity.
|
||||
|
||||
Note: WordPress tokens cannot be refreshed. They expire after 2 weeks
|
||||
and require user re-authorization. We only check if token is valid.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
wordpress_service = WordPressOAuthService(db_path=db_path)
|
||||
tokens = wordpress_service.get_user_tokens(user_id)
|
||||
|
||||
if not tokens:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="No WordPress tokens found for user",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'not_found',
|
||||
'check_time': datetime.utcnow().isoformat()
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
# Check each token - WordPress tokens expire in 2 weeks
|
||||
now = datetime.utcnow()
|
||||
valid_tokens = []
|
||||
expiring_soon = []
|
||||
expired_tokens = []
|
||||
|
||||
for token in tokens:
|
||||
expires_at_str = token.get('expires_at')
|
||||
if expires_at_str:
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
|
||||
days_until_expiry = (expires_at - now).days
|
||||
|
||||
if days_until_expiry < 0:
|
||||
expired_tokens.append(token)
|
||||
elif days_until_expiry < self.expiration_warning_days:
|
||||
expiring_soon.append(token)
|
||||
else:
|
||||
valid_tokens.append(token)
|
||||
except Exception:
|
||||
# If parsing fails, test token validity via API
|
||||
access_token = token.get('access_token')
|
||||
if access_token and wordpress_service.test_token(access_token):
|
||||
valid_tokens.append(token)
|
||||
else:
|
||||
expired_tokens.append(token)
|
||||
else:
|
||||
# No expiration date - test token validity
|
||||
access_token = token.get('access_token')
|
||||
if access_token and wordpress_service.test_token(access_token):
|
||||
valid_tokens.append(token)
|
||||
else:
|
||||
expired_tokens.append(token)
|
||||
|
||||
if valid_tokens:
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'valid',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'WordPress token is valid',
|
||||
'valid_tokens_count': len(valid_tokens)
|
||||
}
|
||||
)
|
||||
elif expiring_soon:
|
||||
# WordPress tokens cannot be refreshed - user needs to reconnect
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="WordPress token expiring soon and cannot be auto-refreshed",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'expiring_soon',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'WordPress token expires soon. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
else:
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="WordPress token expired and cannot be refreshed",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'status': 'expired',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'WordPress token expired. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking WordPress token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"WordPress token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'wordpress',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
async def _check_wix_token(self, user_id: str) -> TaskExecutionResult:
|
||||
"""
|
||||
Check Wix token validity.
|
||||
|
||||
Note: Wix tokens are currently stored in frontend sessionStorage.
|
||||
Backend storage needs to be implemented for automated checking.
|
||||
"""
|
||||
try:
|
||||
# TODO: Wix tokens are stored in frontend sessionStorage, not backend database
|
||||
# Once backend storage is implemented, we can check tokens here
|
||||
# For now, return not supported
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Wix token monitoring not yet supported - tokens stored in frontend sessionStorage",
|
||||
result_data={
|
||||
'platform': 'wix',
|
||||
'user_id': user_id,
|
||||
'status': 'not_supported',
|
||||
'check_time': datetime.utcnow().isoformat(),
|
||||
'message': 'Wix token monitoring requires backend token storage implementation'
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error checking Wix token for user {user_id}: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Wix token check failed: {str(e)}",
|
||||
result_data={
|
||||
'platform': 'wix',
|
||||
'user_id': user_id,
|
||||
'error': str(e)
|
||||
},
|
||||
retryable=False
|
||||
)
|
||||
|
||||
def _create_failure_alert(
|
||||
self,
|
||||
user_id: str,
|
||||
platform: str,
|
||||
error_message: str,
|
||||
result_data: Optional[Dict[str, Any]],
|
||||
db: Session
|
||||
):
|
||||
"""
|
||||
Create a UsageAlert notification when OAuth token refresh fails.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
platform: Platform identifier (gsc, bing, wordpress, wix)
|
||||
error_message: Error message from token check
|
||||
result_data: Optional result data from token check
|
||||
db: Database session
|
||||
"""
|
||||
try:
|
||||
# Determine severity based on error type
|
||||
status = result_data.get('status', 'unknown') if result_data else 'unknown'
|
||||
|
||||
if status in ['expired', 'refresh_failed']:
|
||||
severity = 'error'
|
||||
alert_type = 'oauth_token_failure'
|
||||
elif status in ['expiring_soon', 'not_found']:
|
||||
severity = 'warning'
|
||||
alert_type = 'oauth_token_warning'
|
||||
else:
|
||||
severity = 'error'
|
||||
alert_type = 'oauth_token_failure'
|
||||
|
||||
# Format platform name for display
|
||||
platform_names = {
|
||||
'gsc': 'Google Search Console',
|
||||
'bing': 'Bing Webmaster Tools',
|
||||
'wordpress': 'WordPress',
|
||||
'wix': 'Wix'
|
||||
}
|
||||
platform_display = platform_names.get(platform, platform.upper())
|
||||
|
||||
# Create alert title and message
|
||||
if status == 'expired':
|
||||
title = f"{platform_display} Token Expired"
|
||||
message = (
|
||||
f"Your {platform_display} access token has expired and could not be automatically renewed. "
|
||||
f"Please reconnect your {platform_display} account to continue using this integration."
|
||||
)
|
||||
elif status == 'expiring_soon':
|
||||
title = f"{platform_display} Token Expiring Soon"
|
||||
message = (
|
||||
f"Your {platform_display} access token will expire soon. "
|
||||
f"Please reconnect your {platform_display} account to avoid interruption."
|
||||
)
|
||||
elif status == 'refresh_failed':
|
||||
title = f"{platform_display} Token Renewal Failed"
|
||||
message = (
|
||||
f"Failed to automatically renew your {platform_display} access token. "
|
||||
f"Please reconnect your {platform_display} account. "
|
||||
f"Error: {error_message}"
|
||||
)
|
||||
elif status == 'not_found':
|
||||
title = f"{platform_display} Token Not Found"
|
||||
message = (
|
||||
f"No {platform_display} access token found. "
|
||||
f"Please connect your {platform_display} account in the onboarding settings."
|
||||
)
|
||||
else:
|
||||
title = f"{platform_display} Token Error"
|
||||
message = (
|
||||
f"An error occurred while checking your {platform_display} access token. "
|
||||
f"Please reconnect your {platform_display} account. "
|
||||
f"Error: {error_message}"
|
||||
)
|
||||
|
||||
# Get current billing period (YYYY-MM format)
|
||||
from datetime import datetime
|
||||
billing_period = datetime.utcnow().strftime("%Y-%m")
|
||||
|
||||
# Create UsageAlert
|
||||
alert = UsageAlert(
|
||||
user_id=user_id,
|
||||
alert_type=alert_type,
|
||||
threshold_percentage=0, # Not applicable for OAuth alerts
|
||||
provider=None, # Not applicable for OAuth alerts
|
||||
title=title,
|
||||
message=message,
|
||||
severity=severity,
|
||||
is_sent=False, # Will be marked as sent when frontend polls
|
||||
is_read=False,
|
||||
billing_period=billing_period
|
||||
)
|
||||
|
||||
db.add(alert)
|
||||
# Note: We don't commit here - let the caller commit
|
||||
# This allows the alert to be created atomically with the task update
|
||||
|
||||
self.logger.info(
|
||||
f"Created UsageAlert for OAuth token failure: user={user_id}, "
|
||||
f"platform={platform}, severity={severity}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Don't fail the entire task execution if alert creation fails
|
||||
self.logger.error(
|
||||
f"Failed to create UsageAlert for OAuth token failure: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: OAuthTokenMonitoringTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
|
||||
For OAuth token monitoring, frequency is always 'Weekly' (7 days).
|
||||
|
||||
Args:
|
||||
task: OAuthTokenMonitoringTask instance
|
||||
frequency: Frequency string (should be 'Weekly' for token monitoring)
|
||||
last_execution: Last execution datetime (defaults to task.last_check or now)
|
||||
|
||||
Returns:
|
||||
Next execution datetime
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = task.last_check if task.last_check else datetime.utcnow()
|
||||
|
||||
# OAuth token monitoring is always weekly (7 days)
|
||||
if frequency == 'Weekly':
|
||||
return last_execution + timedelta(days=7)
|
||||
else:
|
||||
# Default to weekly if frequency is not recognized
|
||||
self.logger.warning(
|
||||
f"Unknown frequency '{frequency}' for OAuth token monitoring task {task.id}. "
|
||||
f"Defaulting to Weekly (7 days)."
|
||||
)
|
||||
return last_execution + timedelta(days=7)
|
||||
|
||||
@@ -0,0 +1,492 @@
|
||||
"""
|
||||
Website Analysis Task Executor
|
||||
Handles execution of website analysis tasks for user and competitor websites.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import asyncio
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from functools import partial
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask, WebsiteAnalysisExecutionLog
|
||||
from models.onboarding import CompetitorAnalysis, OnboardingSession
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
# Import website analysis services
|
||||
from services.component_logic.web_crawler_logic import WebCrawlerLogic
|
||||
from services.component_logic.style_detection_logic import StyleDetectionLogic
|
||||
from services.website_analysis_service import WebsiteAnalysisService
|
||||
|
||||
logger = get_service_logger("website_analysis_executor")
|
||||
|
||||
|
||||
class WebsiteAnalysisExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for website analysis tasks.
|
||||
|
||||
Handles:
|
||||
- Analyzing user's website (updates existing WebsiteAnalysis record)
|
||||
- Analyzing competitor websites (stores in CompetitorAnalysis table)
|
||||
- Logging results and updating task status
|
||||
- Scheduling next execution based on frequency_days
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
self.crawler_logic = WebCrawlerLogic()
|
||||
self.style_logic = StyleDetectionLogic()
|
||||
|
||||
async def execute_task(
|
||||
self,
|
||||
task: WebsiteAnalysisTask,
|
||||
db: Session
|
||||
) -> TaskExecutionResult:
|
||||
"""
|
||||
Execute a website analysis task.
|
||||
|
||||
This performs complete website analysis using the same logic as
|
||||
/api/onboarding/style-detection/complete endpoint.
|
||||
|
||||
Args:
|
||||
task: WebsiteAnalysisTask instance
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
TaskExecutionResult
|
||||
"""
|
||||
start_time = time.time()
|
||||
user_id = task.user_id
|
||||
website_url = task.website_url
|
||||
task_type = task.task_type
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing website analysis: task_id={task.id} | "
|
||||
f"user_id={user_id} | url={website_url} | type={task_type}"
|
||||
)
|
||||
|
||||
# Create execution log
|
||||
execution_log = WebsiteAnalysisExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='running'
|
||||
)
|
||||
db.add(execution_log)
|
||||
db.flush()
|
||||
|
||||
# Perform website analysis
|
||||
result = await self._perform_website_analysis(
|
||||
website_url=website_url,
|
||||
user_id=user_id,
|
||||
task_type=task_type,
|
||||
task=task,
|
||||
db=db
|
||||
)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
execution_log.status = 'success' if result.success else 'failed'
|
||||
execution_log.result_data = result.result_data
|
||||
execution_log.error_message = result.error_message
|
||||
execution_log.execution_time_ms = execution_time_ms
|
||||
|
||||
# Update task based on result
|
||||
task.last_check = datetime.utcnow()
|
||||
task.updated_at = datetime.utcnow()
|
||||
|
||||
if result.success:
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'active'
|
||||
task.failure_reason = None
|
||||
# Reset failure tracking on success
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
# Schedule next check based on frequency_days
|
||||
task.next_check = self.calculate_next_execution(
|
||||
task=task,
|
||||
frequency='Custom',
|
||||
last_execution=task.last_check,
|
||||
custom_days=task.frequency_days
|
||||
)
|
||||
|
||||
# Commit all changes to database
|
||||
db.commit()
|
||||
|
||||
self.logger.info(
|
||||
f"Website analysis completed successfully for task {task.id}. "
|
||||
f"Next check scheduled for {task.next_check}"
|
||||
)
|
||||
return result
|
||||
else:
|
||||
# Analyze failure pattern
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(
|
||||
task.id, "website_analysis", task.user_id
|
||||
)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = result.error_message
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
# Mark task for human intervention
|
||||
task.status = "needs_intervention"
|
||||
task.consecutive_failures = pattern.consecutive_failures
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
# Clear next_check - task won't run automatically
|
||||
task.next_check = None
|
||||
|
||||
self.logger.warning(
|
||||
f"Task {task.id} marked for human intervention: "
|
||||
f"{pattern.consecutive_failures} consecutive failures, "
|
||||
f"reason: {pattern.failure_reason.value}"
|
||||
)
|
||||
else:
|
||||
# Normal failure handling
|
||||
task.status = 'failed'
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
# Do NOT update next_check - wait for manual retry
|
||||
|
||||
# Commit all changes to database
|
||||
db.commit()
|
||||
|
||||
self.logger.warning(
|
||||
f"Website analysis failed for task {task.id}. "
|
||||
f"Error: {result.error_message}. "
|
||||
f"{'Marked for human intervention' if pattern and pattern.should_cool_off else 'Waiting for manual retry'}."
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Set database session for exception handler
|
||||
self.exception_handler.db = db
|
||||
|
||||
# Create structured error
|
||||
error = TaskExecutionError(
|
||||
message=f"Error executing website analysis task {task.id}: {str(e)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
task_type="website_analysis",
|
||||
execution_time_ms=execution_time_ms,
|
||||
context={
|
||||
"website_url": website_url,
|
||||
"task_type": task_type,
|
||||
"user_id": user_id
|
||||
},
|
||||
original_error=e
|
||||
)
|
||||
|
||||
# Handle exception with structured logging
|
||||
self.exception_handler.handle_exception(error)
|
||||
|
||||
# Update execution log with error
|
||||
try:
|
||||
execution_log = WebsiteAnalysisExecutionLog(
|
||||
task_id=task.id,
|
||||
execution_date=datetime.utcnow(),
|
||||
status='failed',
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
result_data={
|
||||
"error_type": error.error_type.value,
|
||||
"severity": error.severity.value,
|
||||
"context": error.context
|
||||
}
|
||||
)
|
||||
db.add(execution_log)
|
||||
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.status = 'failed'
|
||||
task.last_check = datetime.utcnow()
|
||||
task.updated_at = datetime.utcnow()
|
||||
# Do NOT update next_check - wait for manual retry
|
||||
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
db_error = DatabaseError(
|
||||
message=f"Error saving execution log: {str(commit_error)}",
|
||||
user_id=user_id,
|
||||
task_id=task.id,
|
||||
original_error=commit_error
|
||||
)
|
||||
self.exception_handler.handle_exception(db_error)
|
||||
db.rollback()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=execution_time_ms,
|
||||
retryable=True
|
||||
)
|
||||
|
||||
async def _perform_website_analysis(
|
||||
self,
|
||||
website_url: str,
|
||||
user_id: str,
|
||||
task_type: str,
|
||||
task: WebsiteAnalysisTask,
|
||||
db: Session
|
||||
) -> TaskExecutionResult:
|
||||
"""
|
||||
Perform website analysis using existing service logic.
|
||||
|
||||
Reuses the same logic as /api/onboarding/style-detection/complete.
|
||||
"""
|
||||
try:
|
||||
# Step 1: Crawl website content
|
||||
self.logger.info(f"Crawling website: {website_url}")
|
||||
crawl_result = await self.crawler_logic.crawl_website(website_url)
|
||||
|
||||
if not crawl_result.get('success'):
|
||||
error_msg = crawl_result.get('error', 'Crawling failed')
|
||||
self.logger.error(f"Crawling failed for {website_url}: {error_msg}")
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Crawling failed: {error_msg}",
|
||||
result_data={'crawl_result': crawl_result},
|
||||
retryable=True
|
||||
)
|
||||
|
||||
# Step 2: Run style analysis and patterns analysis in parallel
|
||||
self.logger.info(f"Running style analysis for {website_url}")
|
||||
|
||||
async def run_style_analysis():
|
||||
"""Run style analysis in executor"""
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(
|
||||
None,
|
||||
partial(self.style_logic.analyze_content_style, crawl_result['content'])
|
||||
)
|
||||
|
||||
async def run_patterns_analysis():
|
||||
"""Run patterns analysis in executor"""
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(
|
||||
None,
|
||||
partial(self.style_logic.analyze_style_patterns, crawl_result['content'])
|
||||
)
|
||||
|
||||
# Execute style and patterns analysis in parallel
|
||||
style_analysis, patterns_result = await asyncio.gather(
|
||||
run_style_analysis(),
|
||||
run_patterns_analysis(),
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
# Check for exceptions
|
||||
if isinstance(style_analysis, Exception):
|
||||
self.logger.error(f"Style analysis exception: {style_analysis}")
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=f"Style analysis failed: {str(style_analysis)}",
|
||||
retryable=True
|
||||
)
|
||||
|
||||
if isinstance(patterns_result, Exception):
|
||||
self.logger.warning(f"Patterns analysis exception: {patterns_result}")
|
||||
patterns_result = None
|
||||
|
||||
# Step 3: Generate style guidelines
|
||||
style_guidelines = None
|
||||
if style_analysis and style_analysis.get('success'):
|
||||
loop = asyncio.get_event_loop()
|
||||
guidelines_result = await loop.run_in_executor(
|
||||
None,
|
||||
partial(self.style_logic.generate_style_guidelines, style_analysis.get('analysis', {}))
|
||||
)
|
||||
if guidelines_result and guidelines_result.get('success'):
|
||||
style_guidelines = guidelines_result.get('guidelines')
|
||||
|
||||
# Prepare analysis data
|
||||
analysis_data = {
|
||||
'crawl_result': crawl_result,
|
||||
'style_analysis': style_analysis.get('analysis') if style_analysis and style_analysis.get('success') else None,
|
||||
'style_patterns': patterns_result if patterns_result and not isinstance(patterns_result, Exception) else None,
|
||||
'style_guidelines': style_guidelines,
|
||||
}
|
||||
|
||||
# Step 4: Store results based on task type
|
||||
if task_type == 'user_website':
|
||||
# Update existing WebsiteAnalysis record
|
||||
await self._update_user_website_analysis(
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
analysis_data=analysis_data,
|
||||
db=db
|
||||
)
|
||||
elif task_type == 'competitor':
|
||||
# Store in CompetitorAnalysis table
|
||||
await self._store_competitor_analysis(
|
||||
user_id=user_id,
|
||||
competitor_url=website_url,
|
||||
competitor_id=task.competitor_id,
|
||||
analysis_data=analysis_data,
|
||||
db=db
|
||||
)
|
||||
|
||||
self.logger.info(f"Website analysis completed successfully for {website_url}")
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=analysis_data,
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error performing website analysis: {e}", exc_info=True)
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
retryable=True
|
||||
)
|
||||
|
||||
async def _update_user_website_analysis(
|
||||
self,
|
||||
user_id: str,
|
||||
website_url: str,
|
||||
analysis_data: Dict[str, Any],
|
||||
db: Session
|
||||
):
|
||||
"""Update existing WebsiteAnalysis record for user's website."""
|
||||
try:
|
||||
# Convert Clerk user ID to integer (same as component_logic.py)
|
||||
# Use the same conversion logic as the website analysis API
|
||||
import hashlib
|
||||
user_id_int = int(hashlib.sha256(user_id.encode()).hexdigest()[:15], 16)
|
||||
|
||||
# Use WebsiteAnalysisService to update
|
||||
analysis_service = WebsiteAnalysisService(db)
|
||||
|
||||
# Prepare data in format expected by save_analysis
|
||||
response_data = {
|
||||
'crawl_result': analysis_data.get('crawl_result'),
|
||||
'style_analysis': analysis_data.get('style_analysis'),
|
||||
'style_patterns': analysis_data.get('style_patterns'),
|
||||
'style_guidelines': analysis_data.get('style_guidelines'),
|
||||
}
|
||||
|
||||
# Save/update analysis
|
||||
analysis_id = analysis_service.save_analysis(
|
||||
session_id=user_id_int,
|
||||
website_url=website_url,
|
||||
analysis_data=response_data
|
||||
)
|
||||
|
||||
if analysis_id:
|
||||
self.logger.info(f"Updated user website analysis for {website_url} (analysis_id: {analysis_id})")
|
||||
else:
|
||||
self.logger.warning(f"Failed to update user website analysis for {website_url}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error updating user website analysis: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
async def _store_competitor_analysis(
|
||||
self,
|
||||
user_id: str,
|
||||
competitor_url: str,
|
||||
competitor_id: Optional[str],
|
||||
analysis_data: Dict[str, Any],
|
||||
db: Session
|
||||
):
|
||||
"""Store competitor analysis in CompetitorAnalysis table."""
|
||||
try:
|
||||
# Get onboarding session for user
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
raise ValueError(f"No onboarding session found for user {user_id}")
|
||||
|
||||
# Extract domain from URL
|
||||
parsed_url = urlparse(competitor_url)
|
||||
competitor_domain = parsed_url.netloc or competitor_id
|
||||
|
||||
# Check if analysis already exists for this competitor
|
||||
existing = db.query(CompetitorAnalysis).filter(
|
||||
CompetitorAnalysis.session_id == session.id,
|
||||
CompetitorAnalysis.competitor_url == competitor_url
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
# Update existing analysis
|
||||
existing.analysis_data = analysis_data
|
||||
existing.analysis_date = datetime.utcnow()
|
||||
existing.status = 'completed'
|
||||
existing.error_message = None
|
||||
existing.warning_message = None
|
||||
existing.updated_at = datetime.utcnow()
|
||||
self.logger.info(f"Updated competitor analysis for {competitor_url}")
|
||||
else:
|
||||
# Create new analysis
|
||||
competitor_analysis = CompetitorAnalysis(
|
||||
session_id=session.id,
|
||||
competitor_url=competitor_url,
|
||||
competitor_domain=competitor_domain,
|
||||
analysis_data=analysis_data,
|
||||
status='completed',
|
||||
analysis_date=datetime.utcnow()
|
||||
)
|
||||
db.add(competitor_analysis)
|
||||
self.logger.info(f"Created new competitor analysis for {competitor_url}")
|
||||
|
||||
db.commit()
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
self.logger.error(f"Error storing competitor analysis: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: WebsiteAnalysisTask,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None,
|
||||
custom_days: Optional[int] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency or custom days.
|
||||
|
||||
Args:
|
||||
task: WebsiteAnalysisTask instance
|
||||
frequency: Frequency string ('Custom' for website analysis)
|
||||
last_execution: Last execution datetime (defaults to task.last_check or now)
|
||||
custom_days: Custom number of days (from task.frequency_days)
|
||||
|
||||
Returns:
|
||||
Next execution datetime
|
||||
"""
|
||||
if last_execution is None:
|
||||
last_execution = task.last_check if task.last_check else datetime.utcnow()
|
||||
|
||||
# Use custom_days if provided, otherwise use task.frequency_days
|
||||
days = custom_days if custom_days is not None else task.frequency_days
|
||||
|
||||
if frequency == 'Custom' and days:
|
||||
return last_execution + timedelta(days=days)
|
||||
else:
|
||||
# Default to task's frequency_days
|
||||
self.logger.warning(
|
||||
f"Unknown frequency '{frequency}' for website analysis task {task.id}. "
|
||||
f"Using frequency_days={task.frequency_days}."
|
||||
)
|
||||
return last_execution + timedelta(days=task.frequency_days)
|
||||
|
||||
Reference in New Issue
Block a user