Base code

This commit is contained in:
Kunthawat Greethong
2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions

View File

@@ -0,0 +1,4 @@
"""
Task executor implementations.
"""

View File

@@ -0,0 +1,354 @@
"""
Bing Insights Task Executor
Handles execution of Bing insights fetch tasks for connected platforms.
"""
import logging
import os
import time
from datetime import datetime, timedelta
from typing import Dict, Any, Optional
from sqlalchemy.orm import Session
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
from services.bing_analytics_storage_service import BingAnalyticsStorageService
from services.integrations.bing_oauth import BingOAuthService
from utils.logger_utils import get_service_logger
logger = get_service_logger("bing_insights_executor")
class BingInsightsExecutor(TaskExecutor):
"""
Executor for Bing insights fetch tasks.
Handles:
- Fetching Bing insights data weekly
- On first run: Loads existing cached data
- On subsequent runs: Fetches fresh data from Bing API
- Logging results and updating task status
"""
def __init__(self):
self.logger = logger
self.exception_handler = SchedulerExceptionHandler()
database_url = os.getenv('DATABASE_URL', 'sqlite:///alwrity.db')
self.storage_service = BingAnalyticsStorageService(database_url)
self.bing_oauth = BingOAuthService()
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
"""
Execute a Bing insights fetch task.
Args:
task: PlatformInsightsTask instance
db: Database session
Returns:
TaskExecutionResult
"""
start_time = time.time()
user_id = task.user_id
site_url = task.site_url
try:
self.logger.info(
f"Executing Bing insights fetch: task_id={task.id} | "
f"user_id={user_id} | site_url={site_url}"
)
# Create execution log
execution_log = PlatformInsightsExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='running'
)
db.add(execution_log)
db.flush()
# Fetch insights
result = await self._fetch_insights(task, db)
# Update execution log
execution_time_ms = int((time.time() - start_time) * 1000)
execution_log.status = 'success' if result.success else 'failed'
execution_log.result_data = result.result_data
execution_log.error_message = result.error_message
execution_log.execution_time_ms = execution_time_ms
execution_log.data_source = result.result_data.get('data_source') if result.success else None
# Update task based on result
task.last_check = datetime.utcnow()
if result.success:
task.last_success = datetime.utcnow()
task.status = 'active'
task.failure_reason = None
# Reset failure tracking on success
task.consecutive_failures = 0
task.failure_pattern = None
# Schedule next check (7 days from now)
task.next_check = self.calculate_next_execution(
task=task,
frequency='Weekly',
last_execution=task.last_check
)
else:
# Analyze failure pattern
from services.scheduler.core.failure_detection_service import FailureDetectionService
failure_detection = FailureDetectionService(db)
pattern = failure_detection.analyze_task_failures(
task.id, "bing_insights", task.user_id
)
task.last_failure = datetime.utcnow()
task.failure_reason = result.error_message
if pattern and pattern.should_cool_off:
# Mark task for human intervention
task.status = "needs_intervention"
task.consecutive_failures = pattern.consecutive_failures
task.failure_pattern = {
"consecutive_failures": pattern.consecutive_failures,
"recent_failures": pattern.recent_failures,
"failure_reason": pattern.failure_reason.value,
"error_patterns": pattern.error_patterns,
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
}
# Clear next_check - task won't run automatically
task.next_check = None
self.logger.warning(
f"Task {task.id} marked for human intervention: "
f"{pattern.consecutive_failures} consecutive failures, "
f"reason: {pattern.failure_reason.value}"
)
else:
# Normal failure handling
task.status = 'failed'
task.consecutive_failures = (task.consecutive_failures or 0) + 1
# Schedule retry in 1 day
task.next_check = datetime.utcnow() + timedelta(days=1)
task.updated_at = datetime.utcnow()
db.commit()
return result
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
# Set database session for exception handler
self.exception_handler.db = db
error_result = self.exception_handler.handle_task_execution_error(
task=task,
error=e,
execution_time_ms=execution_time_ms,
context="Bing insights fetch"
)
# Analyze failure pattern
from services.scheduler.core.failure_detection_service import FailureDetectionService
failure_detection = FailureDetectionService(db)
pattern = failure_detection.analyze_task_failures(
task.id, "bing_insights", task.user_id
)
# Update task
task.last_check = datetime.utcnow()
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
if pattern and pattern.should_cool_off:
# Mark task for human intervention
task.status = "needs_intervention"
task.consecutive_failures = pattern.consecutive_failures
task.failure_pattern = {
"consecutive_failures": pattern.consecutive_failures,
"recent_failures": pattern.recent_failures,
"failure_reason": pattern.failure_reason.value,
"error_patterns": pattern.error_patterns,
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
}
task.next_check = None
else:
task.status = 'failed'
task.consecutive_failures = (task.consecutive_failures or 0) + 1
task.next_check = datetime.utcnow() + timedelta(days=1)
task.updated_at = datetime.utcnow()
db.commit()
return error_result
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
"""
Fetch Bing insights data.
On first run (no last_success), loads cached data.
On subsequent runs, fetches fresh data from API.
"""
user_id = task.user_id
site_url = task.site_url
try:
# Check if this is first run (no previous success)
is_first_run = task.last_success is None
if is_first_run:
# First run: Try to load from cache
self.logger.info(f"First run for Bing insights task {task.id} - loading cached data")
cached_data = self._load_cached_data(user_id, site_url)
if cached_data:
self.logger.info(f"Loaded cached Bing data for user {user_id}")
return TaskExecutionResult(
success=True,
result_data={
'data_source': 'cached',
'insights': cached_data,
'message': 'Loaded from cached data (first run)'
}
)
else:
# No cached data - try to fetch from API
self.logger.info(f"No cached data found, fetching from Bing API")
return await self._fetch_fresh_data(user_id, site_url)
else:
# Subsequent run: Always fetch fresh data
self.logger.info(f"Subsequent run for Bing insights task {task.id} - fetching fresh data")
return await self._fetch_fresh_data(user_id, site_url)
except Exception as e:
self.logger.error(f"Error fetching Bing insights for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"Failed to fetch Bing insights: {str(e)}",
result_data={'error': str(e)}
)
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
"""Load most recent cached Bing data from database."""
try:
# Get analytics summary from storage service
summary = self.storage_service.get_analytics_summary(
user_id=user_id,
site_url=site_url or '',
days=30
)
if summary and isinstance(summary, dict):
self.logger.info(f"Found cached Bing data for user {user_id}")
return summary
return None
except Exception as e:
self.logger.warning(f"Error loading cached Bing data: {e}")
return None
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
"""Fetch fresh Bing insights from API."""
try:
# Check if user has active tokens
token_status = self.bing_oauth.get_user_token_status(user_id)
if not token_status.get('has_active_tokens'):
return TaskExecutionResult(
success=False,
error_message="Bing Webmaster tokens not available or expired",
result_data={'error': 'No active tokens'}
)
# Get user's sites
sites = self.bing_oauth.get_user_sites(user_id)
if not sites:
return TaskExecutionResult(
success=False,
error_message="No Bing Webmaster sites found",
result_data={'error': 'No sites found'}
)
# Use provided site_url or first site
if not site_url:
site_url = sites[0].get('Url', '') if isinstance(sites[0], dict) else sites[0]
# Get active token
active_tokens = token_status.get('active_tokens', [])
if not active_tokens:
return TaskExecutionResult(
success=False,
error_message="No active Bing Webmaster tokens",
result_data={'error': 'No tokens'}
)
# For now, use stored analytics data (Bing API integration can be added later)
# This ensures we have data available even if the API class doesn't exist yet
summary = self.storage_service.get_analytics_summary(user_id, site_url, days=30)
if summary and isinstance(summary, dict):
# Format insights data from stored analytics
insights_data = {
'site_url': site_url,
'date_range': {
'start': (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d'),
'end': datetime.now().strftime('%Y-%m-%d')
},
'summary': summary.get('summary', {}),
'fetched_at': datetime.utcnow().isoformat()
}
self.logger.info(
f"Successfully loaded Bing insights from storage for user {user_id}, site {site_url}"
)
return TaskExecutionResult(
success=True,
result_data={
'data_source': 'storage',
'insights': insights_data,
'message': 'Loaded from stored analytics data'
}
)
else:
# No stored data available
return TaskExecutionResult(
success=False,
error_message="No Bing analytics data available. Data will be collected during next onboarding refresh.",
result_data={'error': 'No stored data available'}
)
except Exception as e:
self.logger.error(f"Error fetching fresh Bing data: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"API fetch failed: {str(e)}",
result_data={'error': str(e)}
)
def calculate_next_execution(
self,
task: PlatformInsightsTask,
frequency: str,
last_execution: Optional[datetime] = None
) -> datetime:
"""
Calculate next execution time based on frequency.
For platform insights, frequency is always 'Weekly' (7 days).
"""
if last_execution is None:
last_execution = datetime.utcnow()
if frequency == 'Weekly':
return last_execution + timedelta(days=7)
elif frequency == 'Daily':
return last_execution + timedelta(days=1)
else:
# Default to weekly
return last_execution + timedelta(days=7)

View File

@@ -0,0 +1,363 @@
"""
GSC Insights Task Executor
Handles execution of GSC insights fetch tasks for connected platforms.
"""
import logging
import os
import time
import json
from datetime import datetime, timedelta
from typing import Dict, Any, Optional
from sqlalchemy.orm import Session
import sqlite3
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
from services.gsc_service import GSCService
from utils.logger_utils import get_service_logger
logger = get_service_logger("gsc_insights_executor")
class GSCInsightsExecutor(TaskExecutor):
"""
Executor for GSC insights fetch tasks.
Handles:
- Fetching GSC insights data weekly
- On first run: Loads existing cached data
- On subsequent runs: Fetches fresh data from GSC API
- Logging results and updating task status
"""
def __init__(self):
self.logger = logger
self.exception_handler = SchedulerExceptionHandler()
self.gsc_service = GSCService()
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
"""
Execute a GSC insights fetch task.
Args:
task: PlatformInsightsTask instance
db: Database session
Returns:
TaskExecutionResult
"""
start_time = time.time()
user_id = task.user_id
site_url = task.site_url
try:
self.logger.info(
f"Executing GSC insights fetch: task_id={task.id} | "
f"user_id={user_id} | site_url={site_url}"
)
# Create execution log
execution_log = PlatformInsightsExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='running'
)
db.add(execution_log)
db.flush()
# Fetch insights
result = await self._fetch_insights(task, db)
# Update execution log
execution_time_ms = int((time.time() - start_time) * 1000)
execution_log.status = 'success' if result.success else 'failed'
execution_log.result_data = result.result_data
execution_log.error_message = result.error_message
execution_log.execution_time_ms = execution_time_ms
execution_log.data_source = result.result_data.get('data_source') if result.success else None
# Update task based on result
task.last_check = datetime.utcnow()
if result.success:
task.last_success = datetime.utcnow()
task.status = 'active'
task.failure_reason = None
# Reset failure tracking on success
task.consecutive_failures = 0
task.failure_pattern = None
# Schedule next check (7 days from now)
task.next_check = self.calculate_next_execution(
task=task,
frequency='Weekly',
last_execution=task.last_check
)
else:
# Analyze failure pattern
from services.scheduler.core.failure_detection_service import FailureDetectionService
failure_detection = FailureDetectionService(db)
pattern = failure_detection.analyze_task_failures(
task.id, "gsc_insights", task.user_id
)
task.last_failure = datetime.utcnow()
task.failure_reason = result.error_message
if pattern and pattern.should_cool_off:
# Mark task for human intervention
task.status = "needs_intervention"
task.consecutive_failures = pattern.consecutive_failures
task.failure_pattern = {
"consecutive_failures": pattern.consecutive_failures,
"recent_failures": pattern.recent_failures,
"failure_reason": pattern.failure_reason.value,
"error_patterns": pattern.error_patterns,
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
}
# Clear next_check - task won't run automatically
task.next_check = None
self.logger.warning(
f"Task {task.id} marked for human intervention: "
f"{pattern.consecutive_failures} consecutive failures, "
f"reason: {pattern.failure_reason.value}"
)
else:
# Normal failure handling
task.status = 'failed'
task.consecutive_failures = (task.consecutive_failures or 0) + 1
# Schedule retry in 1 day
task.next_check = datetime.utcnow() + timedelta(days=1)
task.updated_at = datetime.utcnow()
db.commit()
return result
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
# Set database session for exception handler
self.exception_handler.db = db
error_result = self.exception_handler.handle_task_execution_error(
task=task,
error=e,
execution_time_ms=execution_time_ms,
context="GSC insights fetch"
)
# Analyze failure pattern
from services.scheduler.core.failure_detection_service import FailureDetectionService
failure_detection = FailureDetectionService(db)
pattern = failure_detection.analyze_task_failures(
task.id, "gsc_insights", task.user_id
)
# Update task
task.last_check = datetime.utcnow()
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
if pattern and pattern.should_cool_off:
# Mark task for human intervention
task.status = "needs_intervention"
task.consecutive_failures = pattern.consecutive_failures
task.failure_pattern = {
"consecutive_failures": pattern.consecutive_failures,
"recent_failures": pattern.recent_failures,
"failure_reason": pattern.failure_reason.value,
"error_patterns": pattern.error_patterns,
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
}
task.next_check = None
else:
task.status = 'failed'
task.consecutive_failures = (task.consecutive_failures or 0) + 1
task.next_check = datetime.utcnow() + timedelta(days=1)
task.updated_at = datetime.utcnow()
db.commit()
return error_result
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
"""
Fetch GSC insights data.
On first run (no last_success), loads cached data.
On subsequent runs, fetches fresh data from API.
"""
user_id = task.user_id
site_url = task.site_url
try:
# Check if this is first run (no previous success)
is_first_run = task.last_success is None
if is_first_run:
# First run: Try to load from cache
self.logger.info(f"First run for GSC insights task {task.id} - loading cached data")
cached_data = self._load_cached_data(user_id, site_url)
if cached_data:
self.logger.info(f"Loaded cached GSC data for user {user_id}")
return TaskExecutionResult(
success=True,
result_data={
'data_source': 'cached',
'insights': cached_data,
'message': 'Loaded from cached data (first run)'
}
)
else:
# No cached data - try to fetch from API
self.logger.info(f"No cached data found, fetching from GSC API")
return await self._fetch_fresh_data(user_id, site_url)
else:
# Subsequent run: Always fetch fresh data
self.logger.info(f"Subsequent run for GSC insights task {task.id} - fetching fresh data")
return await self._fetch_fresh_data(user_id, site_url)
except Exception as e:
self.logger.error(f"Error fetching GSC insights for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"Failed to fetch GSC insights: {str(e)}",
result_data={'error': str(e)}
)
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
"""Load most recent cached GSC data from database."""
try:
db_path = self.gsc_service.db_path
with sqlite3.connect(db_path) as conn:
cursor = conn.cursor()
# Find most recent cached data
if site_url:
cursor.execute('''
SELECT data_json, created_at
FROM gsc_data_cache
WHERE user_id = ? AND site_url = ? AND data_type = 'analytics'
ORDER BY created_at DESC
LIMIT 1
''', (user_id, site_url))
else:
cursor.execute('''
SELECT data_json, created_at
FROM gsc_data_cache
WHERE user_id = ? AND data_type = 'analytics'
ORDER BY created_at DESC
LIMIT 1
''', (user_id,))
result = cursor.fetchone()
if result:
data_json, created_at = result
insights_data = json.loads(data_json) if isinstance(data_json, str) else data_json
self.logger.info(
f"Found cached GSC data from {created_at} for user {user_id}"
)
return insights_data
return None
except Exception as e:
self.logger.warning(f"Error loading cached GSC data: {e}")
return None
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
"""Fetch fresh GSC insights from API."""
try:
# If no site_url, get first site
if not site_url:
sites = self.gsc_service.get_site_list(user_id)
if not sites:
return TaskExecutionResult(
success=False,
error_message="No GSC sites found for user",
result_data={'error': 'No sites found'}
)
site_url = sites[0]['siteUrl']
# Get analytics for last 30 days
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
# Fetch search analytics
search_analytics = self.gsc_service.get_search_analytics(
user_id=user_id,
site_url=site_url,
start_date=start_date,
end_date=end_date
)
if 'error' in search_analytics:
return TaskExecutionResult(
success=False,
error_message=search_analytics.get('error', 'Unknown error'),
result_data=search_analytics
)
# Format insights data
insights_data = {
'site_url': site_url,
'date_range': {
'start': start_date,
'end': end_date
},
'overall_metrics': search_analytics.get('overall_metrics', {}),
'query_data': search_analytics.get('query_data', {}),
'fetched_at': datetime.utcnow().isoformat()
}
self.logger.info(
f"Successfully fetched GSC insights for user {user_id}, site {site_url}"
)
return TaskExecutionResult(
success=True,
result_data={
'data_source': 'api',
'insights': insights_data,
'message': 'Fetched fresh data from GSC API'
}
)
except Exception as e:
self.logger.error(f"Error fetching fresh GSC data: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"API fetch failed: {str(e)}",
result_data={'error': str(e)}
)
def calculate_next_execution(
self,
task: PlatformInsightsTask,
frequency: str,
last_execution: Optional[datetime] = None
) -> datetime:
"""
Calculate next execution time based on frequency.
For platform insights, frequency is always 'Weekly' (7 days).
"""
if last_execution is None:
last_execution = datetime.utcnow()
if frequency == 'Weekly':
return last_execution + timedelta(days=7)
elif frequency == 'Daily':
return last_execution + timedelta(days=1)
else:
# Default to weekly
return last_execution + timedelta(days=7)

View File

@@ -0,0 +1,266 @@
"""
Monitoring Task Executor
Handles execution of content strategy monitoring tasks.
"""
import logging
import time
from datetime import datetime
from typing import Dict, Any, Optional
from sqlalchemy.orm import Session
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
from ..utils.frequency_calculator import calculate_next_execution
from models.monitoring_models import MonitoringTask, TaskExecutionLog
from models.enhanced_strategy_models import EnhancedContentStrategy
from utils.logger_utils import get_service_logger
logger = get_service_logger("monitoring_task_executor")
class MonitoringTaskExecutor(TaskExecutor):
"""
Executor for content strategy monitoring tasks.
Handles:
- ALwrity tasks (automated execution)
- Human tasks (notifications/queuing)
"""
def __init__(self):
self.logger = logger
self.exception_handler = SchedulerExceptionHandler()
async def execute_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
"""
Execute a monitoring task with user isolation.
Args:
task: MonitoringTask instance (with strategy relationship loaded)
db: Database session
Returns:
TaskExecutionResult
"""
start_time = time.time()
# Extract user_id from strategy relationship for user isolation
user_id = None
try:
if task.strategy and hasattr(task.strategy, 'user_id'):
user_id = task.strategy.user_id
elif task.strategy_id:
# Fallback: query strategy if relationship not loaded
strategy = db.query(EnhancedContentStrategy).filter(
EnhancedContentStrategy.id == task.strategy_id
).first()
if strategy:
user_id = strategy.user_id
except Exception as e:
self.logger.warning(f"Could not extract user_id for task {task.id}: {e}")
try:
self.logger.info(
f"Executing monitoring task: {task.id} | "
f"user_id: {user_id} | "
f"assignee: {task.assignee} | "
f"frequency: {task.frequency}"
)
# Create execution log with user_id for user isolation tracking
execution_log = TaskExecutionLog(
task_id=task.id,
user_id=user_id,
execution_date=datetime.utcnow(),
status='running'
)
db.add(execution_log)
db.flush()
# Execute based on assignee
if task.assignee == 'ALwrity':
result = await self._execute_alwrity_task(task, db)
else:
result = await self._execute_human_task(task, db)
# Update execution log
execution_time_ms = int((time.time() - start_time) * 1000)
execution_log.status = 'success' if result.success else 'failed'
execution_log.result_data = result.result_data
execution_log.error_message = result.error_message
execution_log.execution_time_ms = execution_time_ms
# Update task
task.last_executed = datetime.utcnow()
task.next_execution = self.calculate_next_execution(
task,
task.frequency,
task.last_executed
)
if result.success:
task.status = 'completed'
else:
task.status = 'failed'
db.commit()
return result
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
# Set database session for exception handler
self.exception_handler.db = db
# Create structured error
error = TaskExecutionError(
message=f"Error executing monitoring task {task.id}: {str(e)}",
user_id=user_id,
task_id=task.id,
task_type="monitoring_task",
execution_time_ms=execution_time_ms,
context={
"assignee": task.assignee,
"frequency": task.frequency,
"component": task.component_name
},
original_error=e
)
# Handle exception with structured logging
self.exception_handler.handle_exception(error)
# Update execution log with error (include user_id for isolation)
try:
execution_log = TaskExecutionLog(
task_id=task.id,
user_id=user_id,
execution_date=datetime.utcnow(),
status='failed',
error_message=str(e),
execution_time_ms=execution_time_ms,
result_data={
"error_type": error.error_type.value,
"severity": error.severity.value,
"context": error.context
}
)
db.add(execution_log)
task.status = 'failed'
task.last_executed = datetime.utcnow()
db.commit()
except Exception as commit_error:
db_error = DatabaseError(
message=f"Error saving execution log: {str(commit_error)}",
user_id=user_id,
task_id=task.id,
original_error=commit_error
)
self.exception_handler.handle_exception(db_error)
db.rollback()
return TaskExecutionResult(
success=False,
error_message=str(e),
execution_time_ms=execution_time_ms,
retryable=True,
retry_delay=300
)
async def _execute_alwrity_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
"""
Execute an ALwrity (automated) monitoring task.
This is where the actual monitoring logic would go.
For now, we'll implement a placeholder that can be extended.
"""
try:
self.logger.info(f"Executing ALwrity task: {task.task_title}")
# TODO: Implement actual monitoring logic based on:
# - task.metric
# - task.measurement_method
# - task.success_criteria
# - task.alert_threshold
# Placeholder: Simulate task execution
result_data = {
'metric_value': 0,
'status': 'measured',
'message': f"Task {task.task_title} executed successfully",
'timestamp': datetime.utcnow().isoformat()
}
return TaskExecutionResult(
success=True,
result_data=result_data
)
except Exception as e:
self.logger.error(f"Error in ALwrity task execution: {e}")
return TaskExecutionResult(
success=False,
error_message=str(e),
retryable=True
)
async def _execute_human_task(self, task: MonitoringTask, db: Session) -> TaskExecutionResult:
"""
Execute a Human monitoring task (notification/queuing).
For human tasks, we don't execute the task directly,
but rather queue it for human review or send notifications.
"""
try:
self.logger.info(f"Queuing human task: {task.task_title}")
# TODO: Implement notification/queuing system:
# - Send email notification
# - Add to user's task queue
# - Create in-app notification
result_data = {
'status': 'queued',
'message': f"Task {task.task_title} queued for human review",
'timestamp': datetime.utcnow().isoformat()
}
return TaskExecutionResult(
success=True,
result_data=result_data
)
except Exception as e:
self.logger.error(f"Error queuing human task: {e}")
return TaskExecutionResult(
success=False,
error_message=str(e),
retryable=True
)
def calculate_next_execution(
self,
task: MonitoringTask,
frequency: str,
last_execution: Optional[datetime] = None
) -> datetime:
"""
Calculate next execution time based on frequency.
Args:
task: MonitoringTask instance
frequency: Frequency string (Daily, Weekly, Monthly, Quarterly)
last_execution: Last execution datetime (defaults to now)
Returns:
Next execution datetime
"""
return calculate_next_execution(
frequency=frequency,
base_time=last_execution or datetime.utcnow()
)

View File

@@ -0,0 +1,789 @@
"""
OAuth Token Monitoring Task Executor
Handles execution of OAuth token monitoring tasks for connected platforms.
"""
import logging
import os
import time
from datetime import datetime, timedelta
from typing import Dict, Any, Optional
from sqlalchemy.orm import Session
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask, OAuthTokenExecutionLog
from models.subscription_models import UsageAlert
from utils.logger_utils import get_service_logger
# Import platform-specific services
from services.gsc_service import GSCService
from services.integrations.bing_oauth import BingOAuthService
from services.integrations.wordpress_oauth import WordPressOAuthService
from services.wix_service import WixService
logger = get_service_logger("oauth_token_monitoring_executor")
class OAuthTokenMonitoringExecutor(TaskExecutor):
"""
Executor for OAuth token monitoring tasks.
Handles:
- Checking token validity and expiration
- Attempting automatic token refresh
- Logging results and updating task status
- One-time refresh attempt (no automatic retries on failure)
"""
def __init__(self):
self.logger = logger
self.exception_handler = SchedulerExceptionHandler()
# Expiration warning window (7 days before expiration)
self.expiration_warning_days = 7
async def execute_task(self, task: OAuthTokenMonitoringTask, db: Session) -> TaskExecutionResult:
"""
Execute an OAuth token monitoring task.
This checks token status and attempts refresh if needed.
If refresh fails, marks task as failed and does not retry automatically.
Args:
task: OAuthTokenMonitoringTask instance
db: Database session
Returns:
TaskExecutionResult
"""
start_time = time.time()
user_id = task.user_id
platform = task.platform
try:
self.logger.info(
f"Executing OAuth token monitoring: task_id={task.id} | "
f"user_id={user_id} | platform={platform}"
)
# Create execution log
execution_log = OAuthTokenExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='running'
)
db.add(execution_log)
db.flush()
# Check and refresh token
result = await self._check_and_refresh_token(task, db)
# Update execution log
execution_time_ms = int((time.time() - start_time) * 1000)
execution_log.status = 'success' if result.success else 'failed'
execution_log.result_data = result.result_data
execution_log.error_message = result.error_message
execution_log.execution_time_ms = execution_time_ms
# Update task based on result
task.last_check = datetime.utcnow()
if result.success:
task.last_success = datetime.utcnow()
task.status = 'active'
task.failure_reason = None
# Reset failure tracking on success
task.consecutive_failures = 0
task.failure_pattern = None
# Schedule next check (7 days from now)
task.next_check = self.calculate_next_execution(
task=task,
frequency='Weekly',
last_execution=task.last_check
)
else:
# Analyze failure pattern
from services.scheduler.core.failure_detection_service import FailureDetectionService
failure_detection = FailureDetectionService(db)
pattern = failure_detection.analyze_task_failures(
task.id, "oauth_token_monitoring", task.user_id
)
task.last_failure = datetime.utcnow()
task.failure_reason = result.error_message
if pattern and pattern.should_cool_off:
# Mark task for human intervention
task.status = "needs_intervention"
task.consecutive_failures = pattern.consecutive_failures
task.failure_pattern = {
"consecutive_failures": pattern.consecutive_failures,
"recent_failures": pattern.recent_failures,
"failure_reason": pattern.failure_reason.value,
"error_patterns": pattern.error_patterns,
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
}
# Clear next_check - task won't run automatically
task.next_check = None
self.logger.warning(
f"Task {task.id} marked for human intervention: "
f"{pattern.consecutive_failures} consecutive failures, "
f"reason: {pattern.failure_reason.value}"
)
else:
# Normal failure handling
task.status = 'failed'
task.consecutive_failures = (task.consecutive_failures or 0) + 1
# Do NOT update next_check - wait for manual trigger
self.logger.warning(
f"OAuth token refresh failed for user {user_id}, platform {platform}. "
f"{'Task marked for human intervention' if pattern and pattern.should_cool_off else 'Task marked as failed. No automatic retry will be scheduled.'}"
)
# Create UsageAlert notification for the user
self._create_failure_alert(user_id, platform, result.error_message, result.result_data, db)
task.updated_at = datetime.utcnow()
db.commit()
return result
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
# Set database session for exception handler
self.exception_handler.db = db
# Create structured error
error = TaskExecutionError(
message=f"Error executing OAuth token monitoring task {task.id}: {str(e)}",
user_id=user_id,
task_id=task.id,
task_type="oauth_token_monitoring",
execution_time_ms=execution_time_ms,
context={
"platform": platform,
"user_id": user_id
},
original_error=e
)
# Handle exception with structured logging
self.exception_handler.handle_exception(error)
# Update execution log with error
try:
execution_log = OAuthTokenExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='failed',
error_message=str(e),
execution_time_ms=execution_time_ms,
result_data={
"error_type": error.error_type.value,
"severity": error.severity.value,
"context": error.context
}
)
db.add(execution_log)
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.status = 'failed'
task.last_check = datetime.utcnow()
task.updated_at = datetime.utcnow()
# Do NOT update next_check - wait for manual trigger
# Create UsageAlert notification for the user
self._create_failure_alert(user_id, task.platform, str(e), None, db)
db.commit()
except Exception as commit_error:
db_error = DatabaseError(
message=f"Error saving execution log: {str(commit_error)}",
user_id=user_id,
task_id=task.id,
original_error=commit_error
)
self.exception_handler.handle_exception(db_error)
db.rollback()
return TaskExecutionResult(
success=False,
error_message=str(e),
execution_time_ms=execution_time_ms,
retryable=False, # Do not retry automatically
retry_delay=0
)
async def _check_and_refresh_token(
self,
task: OAuthTokenMonitoringTask,
db: Session
) -> TaskExecutionResult:
"""
Check token status and attempt refresh if needed.
Tokens are stored in the database from onboarding step 5:
- GSC: gsc_credentials table (via GSCService)
- Bing: bing_oauth_tokens table (via BingOAuthService)
- WordPress: wordpress_oauth_tokens table (via WordPressOAuthService)
- Wix: wix_oauth_tokens table (via WixOAuthService)
Args:
task: OAuthTokenMonitoringTask instance
db: Database session
Returns:
TaskExecutionResult with success status and details
"""
platform = task.platform
user_id = task.user_id
try:
self.logger.info(f"Checking token for platform: {platform}, user: {user_id}")
# Route to platform-specific checking logic
if platform == 'gsc':
return await self._check_gsc_token(user_id)
elif platform == 'bing':
return await self._check_bing_token(user_id)
elif platform == 'wordpress':
return await self._check_wordpress_token(user_id)
elif platform == 'wix':
return await self._check_wix_token(user_id)
else:
return TaskExecutionResult(
success=False,
error_message=f"Unsupported platform: {platform}",
result_data={
'platform': platform,
'user_id': user_id,
'error': 'Unsupported platform'
},
retryable=False
)
except Exception as e:
self.logger.error(
f"Error checking/refreshing token for platform {platform}, user {user_id}: {e}",
exc_info=True
)
return TaskExecutionResult(
success=False,
error_message=f"Token check failed: {str(e)}",
result_data={
'platform': platform,
'user_id': user_id,
'error': str(e)
},
retryable=False # Do not retry automatically
)
async def _check_gsc_token(self, user_id: str) -> TaskExecutionResult:
"""
Check and refresh GSC (Google Search Console) token.
GSC service auto-refreshes tokens if expired when loading credentials.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
gsc_service = GSCService(db_path=db_path)
credentials = gsc_service.load_user_credentials(user_id)
if not credentials:
return TaskExecutionResult(
success=False,
error_message="GSC credentials not found or could not be loaded",
result_data={
'platform': 'gsc',
'user_id': user_id,
'status': 'not_found',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
# GSC service auto-refreshes if expired, so if we get here, token is valid
result_data = {
'platform': 'gsc',
'user_id': user_id,
'status': 'valid',
'check_time': datetime.utcnow().isoformat(),
'message': 'GSC token is valid (auto-refreshed if expired)'
}
return TaskExecutionResult(
success=True,
result_data=result_data
)
except Exception as e:
self.logger.error(f"Error checking GSC token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"GSC token check failed: {str(e)}",
result_data={
'platform': 'gsc',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
async def _check_bing_token(self, user_id: str) -> TaskExecutionResult:
"""
Check and refresh Bing Webmaster Tools token.
Checks token expiration and attempts refresh if needed.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
bing_service = BingOAuthService(db_path=db_path)
# Get token status (includes expired tokens)
token_status = bing_service.get_user_token_status(user_id)
if not token_status.get('has_tokens'):
return TaskExecutionResult(
success=False,
error_message="No Bing tokens found for user",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'not_found',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
active_tokens = token_status.get('active_tokens', [])
expired_tokens = token_status.get('expired_tokens', [])
# If we have active tokens, check if any are expiring soon (< 7 days)
if active_tokens:
now = datetime.utcnow()
needs_refresh = False
token_to_refresh = None
for token in active_tokens:
expires_at_str = token.get('expires_at')
if expires_at_str:
try:
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
# Check if expires within warning window (7 days)
days_until_expiry = (expires_at - now).days
if days_until_expiry < self.expiration_warning_days:
needs_refresh = True
token_to_refresh = token
break
except Exception:
# If parsing fails, assume token is valid
pass
if needs_refresh and token_to_refresh:
# Attempt to refresh
refresh_token = token_to_refresh.get('refresh_token')
if refresh_token:
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
if refresh_result:
return TaskExecutionResult(
success=True,
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'refreshed',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token refreshed successfully'
}
)
else:
return TaskExecutionResult(
success=False,
error_message="Failed to refresh Bing token",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'refresh_failed',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
# Token is valid and not expiring soon
return TaskExecutionResult(
success=True,
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'valid',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token is valid'
}
)
# No active tokens, check if we can refresh expired ones
if expired_tokens:
# Try to refresh the most recent expired token
latest_token = expired_tokens[0] # Already sorted by created_at DESC
refresh_token = latest_token.get('refresh_token')
if refresh_token:
# Check if token expired recently (within grace period)
expires_at_str = latest_token.get('expires_at')
if expires_at_str:
try:
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
# Only refresh if expired within last 24 hours (grace period)
hours_since_expiry = (datetime.utcnow() - expires_at).total_seconds() / 3600
if hours_since_expiry < 24:
refresh_result = bing_service.refresh_access_token(user_id, refresh_token)
if refresh_result:
return TaskExecutionResult(
success=True,
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'refreshed',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token refreshed from expired state'
}
)
except Exception:
pass
return TaskExecutionResult(
success=False,
error_message="Bing token expired and could not be refreshed",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'expired',
'check_time': datetime.utcnow().isoformat(),
'message': 'Bing token expired. User needs to reconnect.'
},
retryable=False
)
return TaskExecutionResult(
success=False,
error_message="No valid Bing tokens found",
result_data={
'platform': 'bing',
'user_id': user_id,
'status': 'invalid',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
except Exception as e:
self.logger.error(f"Error checking Bing token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"Bing token check failed: {str(e)}",
result_data={
'platform': 'bing',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
async def _check_wordpress_token(self, user_id: str) -> TaskExecutionResult:
"""
Check WordPress token validity.
Note: WordPress tokens cannot be refreshed. They expire after 2 weeks
and require user re-authorization. We only check if token is valid.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
wordpress_service = WordPressOAuthService(db_path=db_path)
tokens = wordpress_service.get_user_tokens(user_id)
if not tokens:
return TaskExecutionResult(
success=False,
error_message="No WordPress tokens found for user",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'not_found',
'check_time': datetime.utcnow().isoformat()
},
retryable=False
)
# Check each token - WordPress tokens expire in 2 weeks
now = datetime.utcnow()
valid_tokens = []
expiring_soon = []
expired_tokens = []
for token in tokens:
expires_at_str = token.get('expires_at')
if expires_at_str:
try:
expires_at = datetime.fromisoformat(expires_at_str.replace('Z', '+00:00'))
days_until_expiry = (expires_at - now).days
if days_until_expiry < 0:
expired_tokens.append(token)
elif days_until_expiry < self.expiration_warning_days:
expiring_soon.append(token)
else:
valid_tokens.append(token)
except Exception:
# If parsing fails, test token validity via API
access_token = token.get('access_token')
if access_token and wordpress_service.test_token(access_token):
valid_tokens.append(token)
else:
expired_tokens.append(token)
else:
# No expiration date - test token validity
access_token = token.get('access_token')
if access_token and wordpress_service.test_token(access_token):
valid_tokens.append(token)
else:
expired_tokens.append(token)
if valid_tokens:
return TaskExecutionResult(
success=True,
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'valid',
'check_time': datetime.utcnow().isoformat(),
'message': 'WordPress token is valid',
'valid_tokens_count': len(valid_tokens)
}
)
elif expiring_soon:
# WordPress tokens cannot be refreshed - user needs to reconnect
return TaskExecutionResult(
success=False,
error_message="WordPress token expiring soon and cannot be auto-refreshed",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'expiring_soon',
'check_time': datetime.utcnow().isoformat(),
'message': 'WordPress token expires soon. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
},
retryable=False
)
else:
return TaskExecutionResult(
success=False,
error_message="WordPress token expired and cannot be refreshed",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'status': 'expired',
'check_time': datetime.utcnow().isoformat(),
'message': 'WordPress token expired. User needs to reconnect (WordPress tokens cannot be auto-refreshed).'
},
retryable=False
)
except Exception as e:
self.logger.error(f"Error checking WordPress token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"WordPress token check failed: {str(e)}",
result_data={
'platform': 'wordpress',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
async def _check_wix_token(self, user_id: str) -> TaskExecutionResult:
"""
Check Wix token validity.
Note: Wix tokens are currently stored in frontend sessionStorage.
Backend storage needs to be implemented for automated checking.
"""
try:
# TODO: Wix tokens are stored in frontend sessionStorage, not backend database
# Once backend storage is implemented, we can check tokens here
# For now, return not supported
return TaskExecutionResult(
success=False,
error_message="Wix token monitoring not yet supported - tokens stored in frontend sessionStorage",
result_data={
'platform': 'wix',
'user_id': user_id,
'status': 'not_supported',
'check_time': datetime.utcnow().isoformat(),
'message': 'Wix token monitoring requires backend token storage implementation'
},
retryable=False
)
except Exception as e:
self.logger.error(f"Error checking Wix token for user {user_id}: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=f"Wix token check failed: {str(e)}",
result_data={
'platform': 'wix',
'user_id': user_id,
'error': str(e)
},
retryable=False
)
def _create_failure_alert(
self,
user_id: str,
platform: str,
error_message: str,
result_data: Optional[Dict[str, Any]],
db: Session
):
"""
Create a UsageAlert notification when OAuth token refresh fails.
Args:
user_id: User ID
platform: Platform identifier (gsc, bing, wordpress, wix)
error_message: Error message from token check
result_data: Optional result data from token check
db: Database session
"""
try:
# Determine severity based on error type
status = result_data.get('status', 'unknown') if result_data else 'unknown'
if status in ['expired', 'refresh_failed']:
severity = 'error'
alert_type = 'oauth_token_failure'
elif status in ['expiring_soon', 'not_found']:
severity = 'warning'
alert_type = 'oauth_token_warning'
else:
severity = 'error'
alert_type = 'oauth_token_failure'
# Format platform name for display
platform_names = {
'gsc': 'Google Search Console',
'bing': 'Bing Webmaster Tools',
'wordpress': 'WordPress',
'wix': 'Wix'
}
platform_display = platform_names.get(platform, platform.upper())
# Create alert title and message
if status == 'expired':
title = f"{platform_display} Token Expired"
message = (
f"Your {platform_display} access token has expired and could not be automatically renewed. "
f"Please reconnect your {platform_display} account to continue using this integration."
)
elif status == 'expiring_soon':
title = f"{platform_display} Token Expiring Soon"
message = (
f"Your {platform_display} access token will expire soon. "
f"Please reconnect your {platform_display} account to avoid interruption."
)
elif status == 'refresh_failed':
title = f"{platform_display} Token Renewal Failed"
message = (
f"Failed to automatically renew your {platform_display} access token. "
f"Please reconnect your {platform_display} account. "
f"Error: {error_message}"
)
elif status == 'not_found':
title = f"{platform_display} Token Not Found"
message = (
f"No {platform_display} access token found. "
f"Please connect your {platform_display} account in the onboarding settings."
)
else:
title = f"{platform_display} Token Error"
message = (
f"An error occurred while checking your {platform_display} access token. "
f"Please reconnect your {platform_display} account. "
f"Error: {error_message}"
)
# Get current billing period (YYYY-MM format)
from datetime import datetime
billing_period = datetime.utcnow().strftime("%Y-%m")
# Create UsageAlert
alert = UsageAlert(
user_id=user_id,
alert_type=alert_type,
threshold_percentage=0, # Not applicable for OAuth alerts
provider=None, # Not applicable for OAuth alerts
title=title,
message=message,
severity=severity,
is_sent=False, # Will be marked as sent when frontend polls
is_read=False,
billing_period=billing_period
)
db.add(alert)
# Note: We don't commit here - let the caller commit
# This allows the alert to be created atomically with the task update
self.logger.info(
f"Created UsageAlert for OAuth token failure: user={user_id}, "
f"platform={platform}, severity={severity}"
)
except Exception as e:
# Don't fail the entire task execution if alert creation fails
self.logger.error(
f"Failed to create UsageAlert for OAuth token failure: {e}",
exc_info=True
)
def calculate_next_execution(
self,
task: OAuthTokenMonitoringTask,
frequency: str,
last_execution: Optional[datetime] = None
) -> datetime:
"""
Calculate next execution time based on frequency.
For OAuth token monitoring, frequency is always 'Weekly' (7 days).
Args:
task: OAuthTokenMonitoringTask instance
frequency: Frequency string (should be 'Weekly' for token monitoring)
last_execution: Last execution datetime (defaults to task.last_check or now)
Returns:
Next execution datetime
"""
if last_execution is None:
last_execution = task.last_check if task.last_check else datetime.utcnow()
# OAuth token monitoring is always weekly (7 days)
if frequency == 'Weekly':
return last_execution + timedelta(days=7)
else:
# Default to weekly if frequency is not recognized
self.logger.warning(
f"Unknown frequency '{frequency}' for OAuth token monitoring task {task.id}. "
f"Defaulting to Weekly (7 days)."
)
return last_execution + timedelta(days=7)

View File

@@ -0,0 +1,492 @@
"""
Website Analysis Task Executor
Handles execution of website analysis tasks for user and competitor websites.
"""
import logging
import os
import time
import asyncio
from datetime import datetime, timedelta
from typing import Dict, Any, Optional
from sqlalchemy.orm import Session
from functools import partial
from urllib.parse import urlparse
from ..core.executor_interface import TaskExecutor, TaskExecutionResult
from ..core.exception_handler import TaskExecutionError, DatabaseError, SchedulerExceptionHandler
from models.website_analysis_monitoring_models import WebsiteAnalysisTask, WebsiteAnalysisExecutionLog
from models.onboarding import CompetitorAnalysis, OnboardingSession
from utils.logger_utils import get_service_logger
# Import website analysis services
from services.component_logic.web_crawler_logic import WebCrawlerLogic
from services.component_logic.style_detection_logic import StyleDetectionLogic
from services.website_analysis_service import WebsiteAnalysisService
logger = get_service_logger("website_analysis_executor")
class WebsiteAnalysisExecutor(TaskExecutor):
"""
Executor for website analysis tasks.
Handles:
- Analyzing user's website (updates existing WebsiteAnalysis record)
- Analyzing competitor websites (stores in CompetitorAnalysis table)
- Logging results and updating task status
- Scheduling next execution based on frequency_days
"""
def __init__(self):
self.logger = logger
self.exception_handler = SchedulerExceptionHandler()
self.crawler_logic = WebCrawlerLogic()
self.style_logic = StyleDetectionLogic()
async def execute_task(
self,
task: WebsiteAnalysisTask,
db: Session
) -> TaskExecutionResult:
"""
Execute a website analysis task.
This performs complete website analysis using the same logic as
/api/onboarding/style-detection/complete endpoint.
Args:
task: WebsiteAnalysisTask instance
db: Database session
Returns:
TaskExecutionResult
"""
start_time = time.time()
user_id = task.user_id
website_url = task.website_url
task_type = task.task_type
try:
self.logger.info(
f"Executing website analysis: task_id={task.id} | "
f"user_id={user_id} | url={website_url} | type={task_type}"
)
# Create execution log
execution_log = WebsiteAnalysisExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='running'
)
db.add(execution_log)
db.flush()
# Perform website analysis
result = await self._perform_website_analysis(
website_url=website_url,
user_id=user_id,
task_type=task_type,
task=task,
db=db
)
# Update execution log
execution_time_ms = int((time.time() - start_time) * 1000)
execution_log.status = 'success' if result.success else 'failed'
execution_log.result_data = result.result_data
execution_log.error_message = result.error_message
execution_log.execution_time_ms = execution_time_ms
# Update task based on result
task.last_check = datetime.utcnow()
task.updated_at = datetime.utcnow()
if result.success:
task.last_success = datetime.utcnow()
task.status = 'active'
task.failure_reason = None
# Reset failure tracking on success
task.consecutive_failures = 0
task.failure_pattern = None
# Schedule next check based on frequency_days
task.next_check = self.calculate_next_execution(
task=task,
frequency='Custom',
last_execution=task.last_check,
custom_days=task.frequency_days
)
# Commit all changes to database
db.commit()
self.logger.info(
f"Website analysis completed successfully for task {task.id}. "
f"Next check scheduled for {task.next_check}"
)
return result
else:
# Analyze failure pattern
from services.scheduler.core.failure_detection_service import FailureDetectionService
failure_detection = FailureDetectionService(db)
pattern = failure_detection.analyze_task_failures(
task.id, "website_analysis", task.user_id
)
task.last_failure = datetime.utcnow()
task.failure_reason = result.error_message
if pattern and pattern.should_cool_off:
# Mark task for human intervention
task.status = "needs_intervention"
task.consecutive_failures = pattern.consecutive_failures
task.failure_pattern = {
"consecutive_failures": pattern.consecutive_failures,
"recent_failures": pattern.recent_failures,
"failure_reason": pattern.failure_reason.value,
"error_patterns": pattern.error_patterns,
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
}
# Clear next_check - task won't run automatically
task.next_check = None
self.logger.warning(
f"Task {task.id} marked for human intervention: "
f"{pattern.consecutive_failures} consecutive failures, "
f"reason: {pattern.failure_reason.value}"
)
else:
# Normal failure handling
task.status = 'failed'
task.consecutive_failures = (task.consecutive_failures or 0) + 1
# Do NOT update next_check - wait for manual retry
# Commit all changes to database
db.commit()
self.logger.warning(
f"Website analysis failed for task {task.id}. "
f"Error: {result.error_message}. "
f"{'Marked for human intervention' if pattern and pattern.should_cool_off else 'Waiting for manual retry'}."
)
return result
except Exception as e:
execution_time_ms = int((time.time() - start_time) * 1000)
# Set database session for exception handler
self.exception_handler.db = db
# Create structured error
error = TaskExecutionError(
message=f"Error executing website analysis task {task.id}: {str(e)}",
user_id=user_id,
task_id=task.id,
task_type="website_analysis",
execution_time_ms=execution_time_ms,
context={
"website_url": website_url,
"task_type": task_type,
"user_id": user_id
},
original_error=e
)
# Handle exception with structured logging
self.exception_handler.handle_exception(error)
# Update execution log with error
try:
execution_log = WebsiteAnalysisExecutionLog(
task_id=task.id,
execution_date=datetime.utcnow(),
status='failed',
error_message=str(e),
execution_time_ms=execution_time_ms,
result_data={
"error_type": error.error_type.value,
"severity": error.severity.value,
"context": error.context
}
)
db.add(execution_log)
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.status = 'failed'
task.last_check = datetime.utcnow()
task.updated_at = datetime.utcnow()
# Do NOT update next_check - wait for manual retry
db.commit()
except Exception as commit_error:
db_error = DatabaseError(
message=f"Error saving execution log: {str(commit_error)}",
user_id=user_id,
task_id=task.id,
original_error=commit_error
)
self.exception_handler.handle_exception(db_error)
db.rollback()
return TaskExecutionResult(
success=False,
error_message=str(e),
execution_time_ms=execution_time_ms,
retryable=True
)
async def _perform_website_analysis(
self,
website_url: str,
user_id: str,
task_type: str,
task: WebsiteAnalysisTask,
db: Session
) -> TaskExecutionResult:
"""
Perform website analysis using existing service logic.
Reuses the same logic as /api/onboarding/style-detection/complete.
"""
try:
# Step 1: Crawl website content
self.logger.info(f"Crawling website: {website_url}")
crawl_result = await self.crawler_logic.crawl_website(website_url)
if not crawl_result.get('success'):
error_msg = crawl_result.get('error', 'Crawling failed')
self.logger.error(f"Crawling failed for {website_url}: {error_msg}")
return TaskExecutionResult(
success=False,
error_message=f"Crawling failed: {error_msg}",
result_data={'crawl_result': crawl_result},
retryable=True
)
# Step 2: Run style analysis and patterns analysis in parallel
self.logger.info(f"Running style analysis for {website_url}")
async def run_style_analysis():
"""Run style analysis in executor"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
None,
partial(self.style_logic.analyze_content_style, crawl_result['content'])
)
async def run_patterns_analysis():
"""Run patterns analysis in executor"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
None,
partial(self.style_logic.analyze_style_patterns, crawl_result['content'])
)
# Execute style and patterns analysis in parallel
style_analysis, patterns_result = await asyncio.gather(
run_style_analysis(),
run_patterns_analysis(),
return_exceptions=True
)
# Check for exceptions
if isinstance(style_analysis, Exception):
self.logger.error(f"Style analysis exception: {style_analysis}")
return TaskExecutionResult(
success=False,
error_message=f"Style analysis failed: {str(style_analysis)}",
retryable=True
)
if isinstance(patterns_result, Exception):
self.logger.warning(f"Patterns analysis exception: {patterns_result}")
patterns_result = None
# Step 3: Generate style guidelines
style_guidelines = None
if style_analysis and style_analysis.get('success'):
loop = asyncio.get_event_loop()
guidelines_result = await loop.run_in_executor(
None,
partial(self.style_logic.generate_style_guidelines, style_analysis.get('analysis', {}))
)
if guidelines_result and guidelines_result.get('success'):
style_guidelines = guidelines_result.get('guidelines')
# Prepare analysis data
analysis_data = {
'crawl_result': crawl_result,
'style_analysis': style_analysis.get('analysis') if style_analysis and style_analysis.get('success') else None,
'style_patterns': patterns_result if patterns_result and not isinstance(patterns_result, Exception) else None,
'style_guidelines': style_guidelines,
}
# Step 4: Store results based on task type
if task_type == 'user_website':
# Update existing WebsiteAnalysis record
await self._update_user_website_analysis(
user_id=user_id,
website_url=website_url,
analysis_data=analysis_data,
db=db
)
elif task_type == 'competitor':
# Store in CompetitorAnalysis table
await self._store_competitor_analysis(
user_id=user_id,
competitor_url=website_url,
competitor_id=task.competitor_id,
analysis_data=analysis_data,
db=db
)
self.logger.info(f"Website analysis completed successfully for {website_url}")
return TaskExecutionResult(
success=True,
result_data=analysis_data,
retryable=False
)
except Exception as e:
self.logger.error(f"Error performing website analysis: {e}", exc_info=True)
return TaskExecutionResult(
success=False,
error_message=str(e),
retryable=True
)
async def _update_user_website_analysis(
self,
user_id: str,
website_url: str,
analysis_data: Dict[str, Any],
db: Session
):
"""Update existing WebsiteAnalysis record for user's website."""
try:
# Convert Clerk user ID to integer (same as component_logic.py)
# Use the same conversion logic as the website analysis API
import hashlib
user_id_int = int(hashlib.sha256(user_id.encode()).hexdigest()[:15], 16)
# Use WebsiteAnalysisService to update
analysis_service = WebsiteAnalysisService(db)
# Prepare data in format expected by save_analysis
response_data = {
'crawl_result': analysis_data.get('crawl_result'),
'style_analysis': analysis_data.get('style_analysis'),
'style_patterns': analysis_data.get('style_patterns'),
'style_guidelines': analysis_data.get('style_guidelines'),
}
# Save/update analysis
analysis_id = analysis_service.save_analysis(
session_id=user_id_int,
website_url=website_url,
analysis_data=response_data
)
if analysis_id:
self.logger.info(f"Updated user website analysis for {website_url} (analysis_id: {analysis_id})")
else:
self.logger.warning(f"Failed to update user website analysis for {website_url}")
except Exception as e:
self.logger.error(f"Error updating user website analysis: {e}", exc_info=True)
raise
async def _store_competitor_analysis(
self,
user_id: str,
competitor_url: str,
competitor_id: Optional[str],
analysis_data: Dict[str, Any],
db: Session
):
"""Store competitor analysis in CompetitorAnalysis table."""
try:
# Get onboarding session for user
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).first()
if not session:
raise ValueError(f"No onboarding session found for user {user_id}")
# Extract domain from URL
parsed_url = urlparse(competitor_url)
competitor_domain = parsed_url.netloc or competitor_id
# Check if analysis already exists for this competitor
existing = db.query(CompetitorAnalysis).filter(
CompetitorAnalysis.session_id == session.id,
CompetitorAnalysis.competitor_url == competitor_url
).first()
if existing:
# Update existing analysis
existing.analysis_data = analysis_data
existing.analysis_date = datetime.utcnow()
existing.status = 'completed'
existing.error_message = None
existing.warning_message = None
existing.updated_at = datetime.utcnow()
self.logger.info(f"Updated competitor analysis for {competitor_url}")
else:
# Create new analysis
competitor_analysis = CompetitorAnalysis(
session_id=session.id,
competitor_url=competitor_url,
competitor_domain=competitor_domain,
analysis_data=analysis_data,
status='completed',
analysis_date=datetime.utcnow()
)
db.add(competitor_analysis)
self.logger.info(f"Created new competitor analysis for {competitor_url}")
db.commit()
except Exception as e:
db.rollback()
self.logger.error(f"Error storing competitor analysis: {e}", exc_info=True)
raise
def calculate_next_execution(
self,
task: WebsiteAnalysisTask,
frequency: str,
last_execution: Optional[datetime] = None,
custom_days: Optional[int] = None
) -> datetime:
"""
Calculate next execution time based on frequency or custom days.
Args:
task: WebsiteAnalysisTask instance
frequency: Frequency string ('Custom' for website analysis)
last_execution: Last execution datetime (defaults to task.last_check or now)
custom_days: Custom number of days (from task.frequency_days)
Returns:
Next execution datetime
"""
if last_execution is None:
last_execution = task.last_check if task.last_check else datetime.utcnow()
# Use custom_days if provided, otherwise use task.frequency_days
days = custom_days if custom_days is not None else task.frequency_days
if frequency == 'Custom' and days:
return last_execution + timedelta(days=days)
else:
# Default to task's frequency_days
self.logger.warning(
f"Unknown frequency '{frequency}' for website analysis task {task.id}. "
f"Using frequency_days={task.frequency_days}."
)
return last_execution + timedelta(days=task.frequency_days)