Recovered state: integrated TrendSurferAgent, restored frontend/backend files, and cleaned up recovery scripts

This commit is contained in:
ajaysi
2026-02-08 13:56:57 +05:30
parent 1db10ccd0f
commit e404a86502
333 changed files with 42223 additions and 10875 deletions

View File

@@ -14,12 +14,24 @@ from .core.exception_handler import (
from .executors.monitoring_task_executor import MonitoringTaskExecutor
from .executors.oauth_token_monitoring_executor import OAuthTokenMonitoringExecutor
from .executors.website_analysis_executor import WebsiteAnalysisExecutor
from .executors.onboarding_full_website_analysis_executor import OnboardingFullWebsiteAnalysisExecutor
from .executors.deep_competitor_analysis_executor import DeepCompetitorAnalysisExecutor
from .executors.deep_website_crawl_executor import DeepWebsiteCrawlExecutor
from .executors.gsc_insights_executor import GSCInsightsExecutor
from .executors.bing_insights_executor import BingInsightsExecutor
from .executors.advertools_executor import AdvertoolsExecutor
from .executors.sif_indexing_executor import SIFIndexingExecutor
from .executors.market_trends_executor import MarketTrendsExecutor
from .utils.task_loader import load_due_monitoring_tasks
from .utils.oauth_token_task_loader import load_due_oauth_token_monitoring_tasks
from .utils.website_analysis_task_loader import load_due_website_analysis_tasks
from .utils.onboarding_full_website_analysis_task_loader import load_due_onboarding_full_website_analysis_tasks
from .utils.deep_competitor_analysis_task_loader import load_due_deep_competitor_analysis_tasks
from .utils.deep_website_crawl_task_loader import load_due_deep_website_crawl_tasks
from .utils.platform_insights_task_loader import load_due_platform_insights_tasks
from .utils.advertools_task_loader import load_due_advertools_tasks
from .utils.sif_indexing_task_loader import load_due_sif_indexing_tasks
from .utils.market_trends_task_loader import load_due_market_trends_tasks
# Global scheduler instance (initialized on first access)
_scheduler_instance: TaskScheduler = None
@@ -62,6 +74,28 @@ def get_scheduler() -> TaskScheduler:
website_analysis_executor,
load_due_website_analysis_tasks
)
onboarding_full_site_executor = OnboardingFullWebsiteAnalysisExecutor()
_scheduler_instance.register_executor(
'onboarding_full_website_analysis',
onboarding_full_site_executor,
load_due_onboarding_full_website_analysis_tasks
)
deep_competitor_analysis_executor = DeepCompetitorAnalysisExecutor()
_scheduler_instance.register_executor(
'deep_competitor_analysis',
deep_competitor_analysis_executor,
load_due_deep_competitor_analysis_tasks
)
# Register deep website crawl executor
deep_website_crawl_executor = DeepWebsiteCrawlExecutor()
_scheduler_instance.register_executor(
'deep_website_crawl',
deep_website_crawl_executor,
load_due_deep_website_crawl_tasks
)
# Register platform insights executors
# GSC insights executor
@@ -85,6 +119,30 @@ def get_scheduler() -> TaskScheduler:
bing_insights_executor,
load_due_bing_insights_tasks
)
# Register Advertools executor
advertools_executor = AdvertoolsExecutor()
_scheduler_instance.register_executor(
'advertools_intelligence',
advertools_executor,
load_due_advertools_tasks
)
# Register SIF indexing executor
sif_indexing_executor = SIFIndexingExecutor()
_scheduler_instance.register_executor(
'sif_indexing',
sif_indexing_executor,
load_due_sif_indexing_tasks
)
# Register market trends executor
market_trends_executor = MarketTrendsExecutor()
_scheduler_instance.register_executor(
'market_trends',
market_trends_executor,
load_due_market_trends_tasks
)
return _scheduler_instance
@@ -96,8 +154,11 @@ __all__ = [
'MonitoringTaskExecutor',
'OAuthTokenMonitoringExecutor',
'WebsiteAnalysisExecutor',
'OnboardingFullWebsiteAnalysisExecutor',
'GSCInsightsExecutor',
'BingInsightsExecutor',
'SIFIndexingExecutor',
'MarketTrendsExecutor',
'get_scheduler',
# Exception handling
'SchedulerExceptionHandler',

View File

@@ -0,0 +1,94 @@
"""
Advertools Task Restoration Utility
Handles creation and restoration of Advertools intelligence tasks for users.
"""
from datetime import datetime, timedelta
from typing import Any
from loguru import logger
from sqlalchemy import func
from sqlalchemy.orm import Session
from models.onboarding import WebsiteAnalysis, OnboardingSession
from models.advertools_monitoring_models import AdvertoolsTask
from services.database import get_all_user_ids, get_session_for_user
async def restore_advertools_tasks(scheduler: Any) -> int:
"""
Restore/create Advertools tasks for all users who have completed Step 2.
Returns:
Number of tasks created/restored
"""
logger.info("Restoring Advertools intelligence tasks...")
total_created = 0
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if not db:
continue
try:
# Check if user has completed Step 2 (has WebsiteAnalysis)
session = db.query(OnboardingSession).filter(OnboardingSession.user_id == user_id).first()
if not session:
continue
analysis = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.session_id == session.id).first()
if not analysis or not analysis.website_url:
continue
# Check for existing Advertools tasks
existing_audit = db.query(AdvertoolsTask).filter(
AdvertoolsTask.user_id == user_id,
func.json_extract(AdvertoolsTask.payload, '$.type') == 'content_audit'
).first()
if not existing_audit:
# Create weekly content audit task
new_audit = AdvertoolsTask(
user_id=user_id,
website_url=analysis.website_url,
status='active',
next_execution=datetime.utcnow() + timedelta(days=1), # Start tomorrow
frequency_days=7,
payload={
"type": "content_audit",
"website_url": analysis.website_url
}
)
db.add(new_audit)
total_created += 1
logger.info(f"Created weekly content audit task for user {user_id}")
existing_health = db.query(AdvertoolsTask).filter(
AdvertoolsTask.user_id == user_id,
func.json_extract(AdvertoolsTask.payload, '$.type') == 'site_health'
).first()
if not existing_health:
# Create weekly site health task
new_health = AdvertoolsTask(
user_id=user_id,
website_url=analysis.website_url,
status='active',
next_execution=datetime.utcnow() + timedelta(days=2), # Start in 2 days
frequency_days=7,
payload={
"type": "site_health",
"website_url": analysis.website_url
}
)
db.add(new_health)
total_created += 1
logger.info(f"Created weekly site health task for user {user_id}")
db.commit()
finally:
db.close()
except Exception as e:
logger.error(f"Error restoring Advertools tasks for user {user_id}: {e}")
return total_created

View File

@@ -7,18 +7,21 @@ from typing import TYPE_CHECKING, Dict, Any
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_db_session
from services.database import get_all_user_ids, get_session_for_user
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
from .exception_handler import DatabaseError
from .interval_manager import adjust_check_interval_if_needed
# Import semantic monitoring for Phase 2B integration
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor
if TYPE_CHECKING:
from .scheduler import TaskScheduler
logger = get_service_logger("check_cycle_handler")
# Track last semantic check per user to enforce 24-hour interval
# In-memory cache is sufficient as it resets on restart (which is fine)
LAST_SEMANTIC_CHECKS: Dict[str, datetime] = {}
async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
"""
@@ -42,154 +45,133 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
'total_failed': 0
}
db = None
try:
db = get_db_session()
if db is None:
logger.error("[Scheduler Check] ❌ Failed to get database session")
return
# Check for active strategies and adjust interval intelligently
await adjust_check_interval_if_needed(scheduler, db)
# Check each registered task type
registered_types = scheduler.registry.get_registered_types()
for task_type in registered_types:
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary)
if type_summary:
cycle_summary['tasks_found_by_type'][task_type] = type_summary.get('found', 0)
cycle_summary['tasks_executed_by_type'][task_type] = type_summary.get('executed', 0)
cycle_summary['tasks_failed_by_type'][task_type] = type_summary.get('failed', 0)
# Calculate totals
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
# Log comprehensive check cycle summary
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
active_strategies = scheduler.stats.get('active_strategies_count', 0)
active_executions = len(scheduler.active_executions)
# Build comprehensive check cycle summary log message
check_lines = [
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
f" ├─ Duration: {check_duration:.2f}s",
f" ├─ Active Strategies: {active_strategies}",
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
f" ├─ User Isolation: Enabled (tasks filtered by user_id)",
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
]
if cycle_summary['tasks_found_by_type']:
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
for idx, (task_type, count) in enumerate(task_types_list):
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
prefix = " └─" if is_last_task_type else " ├─"
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
if cycle_summary['total_found'] > 0:
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
else:
check_lines.append(f" └─ No tasks found - scheduler idle")
# Log comprehensive check cycle summary in single message
logger.warning("\n".join(check_lines))
# Save check cycle event to database for historical tracking
event_log_id = None
# Iterate through all users (Multi-tenancy support)
user_ids = get_all_user_ids()
total_active_strategies = 0
for user_id in user_ids:
db = get_session_for_user(user_id)
if not db:
logger.warning(f"[Scheduler Check] Could not get database session for user {user_id}")
continue
try:
event_log = SchedulerEventLog(
event_type='check_cycle',
event_date=check_start_time,
check_cycle_number=scheduler.stats['total_checks'],
check_interval_minutes=scheduler.current_check_interval_minutes,
tasks_found=cycle_summary.get('total_found', 0),
tasks_executed=cycle_summary.get('total_executed', 0),
tasks_failed=cycle_summary.get('total_failed', 0),
tasks_by_type=cycle_summary.get('tasks_found_by_type', {}),
check_duration_seconds=check_duration,
active_strategies_count=active_strategies,
active_executions=active_executions,
event_data={
'executed_by_type': cycle_summary.get('tasks_executed_by_type', {}),
'failed_by_type': cycle_summary.get('tasks_failed_by_type', {})
}
)
db.add(event_log)
db.flush() # Flush to get the ID without committing
event_log_id = event_log.id
db.commit()
logger.debug(f"[Check Cycle] Saved event log with ID: {event_log_id}")
except Exception as e:
logger.error(f"[Check Cycle] ❌ Failed to save check cycle event log: {e}", exc_info=True)
if db:
db.rollback()
# Continue execution even if event log save fails
# Update cumulative stats table (persistent across restarts)
try:
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
# Update cumulative metrics by adding this cycle's values
# Get current cycle values (incremental, not total)
cycle_tasks_found = cycle_summary.get('total_found', 0)
cycle_tasks_executed = cycle_summary.get('total_executed', 0)
cycle_tasks_failed = cycle_summary.get('total_failed', 0)
# Update cumulative totals (additive)
cumulative_stats.total_check_cycles += 1
cumulative_stats.cumulative_tasks_found += cycle_tasks_found
cumulative_stats.cumulative_tasks_executed += cycle_tasks_executed
cumulative_stats.cumulative_tasks_failed += cycle_tasks_failed
# Note: tasks_skipped in scheduler.stats is a running total, not per-cycle
# We track it as-is from scheduler.stats (it's already cumulative)
# This ensures we don't double-count skipped tasks
if cumulative_stats.cumulative_tasks_skipped is None:
cumulative_stats.cumulative_tasks_skipped = 0
# Update to current total from scheduler (which is already cumulative)
current_skipped = scheduler.stats.get('tasks_skipped', 0)
if current_skipped > cumulative_stats.cumulative_tasks_skipped:
cumulative_stats.cumulative_tasks_skipped = current_skipped
cumulative_stats.last_check_cycle_id = event_log_id
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
# Log at DEBUG level to avoid noise during normal operation
# This is expected behavior, not a warning
logger.debug(
f"[Check Cycle] Updated cumulative stats: "
f"cycles={cumulative_stats.total_check_cycles}, "
f"found={cumulative_stats.cumulative_tasks_found}, "
f"executed={cumulative_stats.cumulative_tasks_executed}, "
f"failed={cumulative_stats.cumulative_tasks_failed}"
)
except Exception as e:
logger.error(f"[Check Cycle] ❌ Failed to update cumulative stats: {e}", exc_info=True)
if db:
db.rollback()
# Log warning but continue - cumulative stats can be rebuilt from event logs
logger.warning(
"[Check Cycle] ⚠️ Cumulative stats update failed. "
"Stats can be rebuilt from event logs on next dashboard load."
)
# Update last_update timestamp for frontend polling
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
except Exception as e:
error = DatabaseError(
message=f"Error checking for due tasks: {str(e)}",
original_error=e
)
scheduler.exception_handler.handle_exception(error)
logger.error(f"[Scheduler Check] ❌ Error in check cycle: {str(e)}")
finally:
if db:
db.close()
# Check active strategies for this user (for interval adjustment)
try:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
user_active_strategies = active_strategy_service.count_active_strategies_with_tasks()
total_active_strategies += user_active_strategies
except Exception as e:
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
# Phase 2B: Real-time semantic health monitoring (runs every 24 hours)
# Check if 24 hours have passed since last check
should_run_semantic = False
now = datetime.utcnow()
last_check = LAST_SEMANTIC_CHECKS.get(user_id)
if not last_check or (now - last_check).total_seconds() > 86400: # 24 hours
should_run_semantic = True
if should_run_semantic:
try:
semantic_monitor = RealTimeSemanticMonitor(user_id)
# Use public wrapper method which aggregates metrics
# Note: semantic_monitor instantiation loads heavy models, so we limit frequency to 24h
semantic_health = await semantic_monitor.check_semantic_health(user_id)
logger.info(f"[Semantic Monitor] User {user_id} health check: {semantic_health.status} (score: {semantic_health.value:.2f})")
# Update timestamp only on success/attempt to prevent spamming retries
LAST_SEMANTIC_CHECKS[user_id] = now
except Exception as e:
logger.warning(f"[Semantic Monitor] Error checking semantic health for user {user_id}: {e}")
else:
pass
# Check each registered task type for this user
registered_types = scheduler.registry.get_registered_types()
for task_type in registered_types:
# Pass the user-specific session
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary, user_id=user_id)
if type_summary:
cycle_summary['tasks_found_by_type'][task_type] = cycle_summary['tasks_found_by_type'].get(task_type, 0) + type_summary.get('found', 0)
cycle_summary['tasks_executed_by_type'][task_type] = cycle_summary['tasks_executed_by_type'].get(task_type, 0) + type_summary.get('executed', 0)
cycle_summary['tasks_failed_by_type'][task_type] = cycle_summary['tasks_failed_by_type'].get(task_type, 0) + type_summary.get('failed', 0)
except Exception as e:
logger.error(f"[Scheduler Check] Error processing user {user_id}: {e}")
finally:
db.close()
# Adjust interval based on TOTAL active strategies across all users
# We manually update the stats and check interval, skipping adjust_check_interval_if_needed
# because it's not multi-tenant aware yet.
scheduler.stats['active_strategies_count'] = total_active_strategies
if total_active_strategies > 0:
optimal_interval = scheduler.min_check_interval_minutes
else:
optimal_interval = scheduler.max_check_interval_minutes
if optimal_interval != scheduler.current_check_interval_minutes:
interval_message = (
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
f" ├─ Optimal: {optimal_interval}min\n"
f" ├─ Active Strategies: {total_active_strategies}\n"
f" └─ Reason: {'Active strategies detected' if total_active_strategies > 0 else 'No active strategies'}"
)
logger.warning(interval_message)
# Reschedule the job with new interval
scheduler.scheduler.modify_job(
job_id='check_due_tasks',
trigger=scheduler._get_trigger_for_interval(optimal_interval)
)
scheduler.current_check_interval_minutes = optimal_interval
# Calculate totals
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
# Log comprehensive check cycle summary
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
active_executions = len(scheduler.active_executions)
# Build comprehensive check cycle summary log message
check_lines = [
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
f" ├─ Duration: {check_duration:.2f}s",
f" ├─ Active Strategies: {total_active_strategies}",
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
f" ├─ User Isolation: Enabled (Scanned {len(user_ids)} users)",
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
]
if cycle_summary['tasks_found_by_type']:
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
for idx, (task_type, count) in enumerate(task_types_list):
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
prefix = " └─" if is_last_task_type else " ├─"
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
if cycle_summary['total_found'] > 0:
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
else:
check_lines.append(f" └─ No tasks found - scheduler idle")
# Log comprehensive check cycle summary in single message
logger.warning("\n".join(check_lines))
# Update last_update timestamp for frontend polling
scheduler.stats['last_update'] = datetime.utcnow().isoformat()

View File

@@ -106,6 +106,7 @@ class DatabaseError(SchedulerException):
message: str,
user_id: Optional[int] = None,
task_id: Optional[int] = None,
task_type: Optional[str] = None,
context: Dict[str, Any] = None,
original_error: Exception = None
):
@@ -115,6 +116,7 @@ class DatabaseError(SchedulerException):
severity=SchedulerErrorSeverity.CRITICAL,
user_id=user_id,
task_id=task_id,
task_type=task_type,
context=context or {},
original_error=original_error
)
@@ -180,6 +182,9 @@ class SchedulerConfigError(SchedulerException):
def __init__(
self,
message: str,
user_id: Optional[int] = None,
task_id: Optional[int] = None,
task_type: Optional[str] = None,
context: Dict[str, Any] = None,
original_error: Exception = None
):
@@ -187,6 +192,9 @@ class SchedulerConfigError(SchedulerException):
message=message,
error_type=SchedulerErrorType.SCHEDULER_CONFIG_ERROR,
severity=SchedulerErrorSeverity.CRITICAL,
user_id=user_id,
task_id=task_id,
task_type=task_type,
context=context or {},
original_error=original_error
)

View File

@@ -7,9 +7,8 @@ from typing import TYPE_CHECKING
from datetime import datetime
from sqlalchemy.orm import Session
from services.database import get_db_session
from services.database import get_all_user_ids, get_session_for_user
from utils.logger_utils import get_service_logger
from models.scheduler_models import SchedulerEventLog
if TYPE_CHECKING:
from .scheduler import TaskScheduler
@@ -23,7 +22,7 @@ async def determine_optimal_interval(
max_interval: int
) -> int:
"""
Determine optimal check interval based on active strategies.
Determine optimal check interval based on active strategies across all users.
Args:
scheduler: TaskScheduler instance
@@ -33,107 +32,100 @@ async def determine_optimal_interval(
Returns:
Optimal check interval in minutes
"""
db = None
try:
db = get_db_session()
if db:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
scheduler.stats['active_strategies_count'] = active_count
if active_count > 0:
logger.info(f"Found {active_count} active strategies with tasks - using {min_interval}min interval")
return min_interval
else:
logger.info(f"No active strategies with tasks - using {max_interval}min interval")
return max_interval
except Exception as e:
logger.warning(f"Error determining optimal interval: {e}, using default {min_interval}min")
finally:
if db:
db.close()
total_active_count = 0
user_ids = get_all_user_ids()
# Default to shorter interval on error (safer)
return min_interval
for user_id in user_ids:
db = None
try:
db = get_session_for_user(user_id)
if db:
try:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=db)
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
total_active_count += user_active_count
# Optimization: If we found at least one active strategy, we can stop and return min_interval
# (unless we want accurate stats)
# For stats accuracy, we should continue.
except Exception as e:
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
except Exception as e:
logger.warning(f"Error checking user {user_id} for strategies: {e}")
finally:
if db:
db.close()
scheduler.stats['active_strategies_count'] = total_active_count
if total_active_count > 0:
logger.info(f"Found {total_active_count} active strategies across users - using {min_interval}min interval")
return min_interval
else:
logger.info(f"No active strategies found - using {max_interval}min interval")
return max_interval
async def adjust_check_interval_if_needed(
scheduler: 'TaskScheduler',
db: Session
db: Session = None # Deprecated parameter, ignored
):
"""
Intelligently adjust check interval based on active strategies.
Intelligently adjust check interval based on active strategies across all users.
If there are active strategies with tasks, check more frequently.
If there are no active strategies, check less frequently.
Args:
scheduler: TaskScheduler instance
db: Database session
db: Deprecated/Ignored
"""
try:
from services.active_strategy_service import ActiveStrategyService
total_active_count = 0
user_ids = get_all_user_ids()
for user_id in user_ids:
user_db = None
try:
user_db = get_session_for_user(user_id)
if user_db:
try:
from services.active_strategy_service import ActiveStrategyService
active_strategy_service = ActiveStrategyService(db_session=user_db)
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
total_active_count += user_active_count
except Exception as e:
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
except Exception as e:
logger.warning(f"Error checking user {user_id} for strategies: {e}")
finally:
if user_db:
user_db.close()
scheduler.stats['active_strategies_count'] = total_active_count
# Determine optimal interval
if total_active_count > 0:
optimal_interval = scheduler.min_check_interval_minutes
else:
optimal_interval = scheduler.max_check_interval_minutes
# Only reschedule if interval needs to change
if optimal_interval != scheduler.current_check_interval_minutes:
interval_message = (
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
f" ├─ Optimal: {optimal_interval}min\n"
f" ├─ Active Strategies: {total_active_count}\n"
f" └─ Reason: {'Active strategies detected' if total_active_count > 0 else 'No active strategies'}"
)
logger.warning(interval_message)
active_strategy_service = ActiveStrategyService(db_session=db)
active_count = active_strategy_service.count_active_strategies_with_tasks()
scheduler.stats['active_strategies_count'] = active_count
# Determine optimal interval
if active_count > 0:
optimal_interval = scheduler.min_check_interval_minutes
else:
optimal_interval = scheduler.max_check_interval_minutes
# Only reschedule if interval needs to change
if optimal_interval != scheduler.current_check_interval_minutes:
interval_message = (
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
f" ├─ Optimal: {optimal_interval}min\n"
f" ├─ Active Strategies: {active_count}\n"
f" └─ Reason: {'Active strategies detected' if active_count > 0 else 'No active strategies'}"
)
logger.warning(interval_message)
# Reschedule the job with new interval
scheduler.scheduler.modify_job(
'check_due_tasks',
trigger=scheduler._get_trigger_for_interval(optimal_interval)
)
# Save previous interval before updating
previous_interval = scheduler.current_check_interval_minutes
# Update current interval
scheduler.current_check_interval_minutes = optimal_interval
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
# Save interval adjustment event to database
try:
event_db = get_db_session()
if event_db:
event_log = SchedulerEventLog(
event_type='interval_adjustment',
event_date=datetime.utcnow(),
previous_interval_minutes=previous_interval,
new_interval_minutes=optimal_interval,
check_interval_minutes=optimal_interval,
active_strategies_count=active_count,
event_data={
'reason': 'intelligent_scheduling',
'min_interval': scheduler.min_check_interval_minutes,
'max_interval': scheduler.max_check_interval_minutes
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.warning(f"Failed to save interval adjustment event log: {e}")
logger.warning(f"[Scheduler] ✅ Interval adjusted to {optimal_interval}min")
except Exception as e:
logger.warning(f"Error adjusting check interval: {e}")
# Reschedule the job with new interval
scheduler.scheduler.modify_job(
job_id='check_due_tasks', # Fixed job_id from check_cycle to check_due_tasks to match scheduler.py
trigger=scheduler._get_trigger_for_interval(optimal_interval)
)
scheduler.current_check_interval_minutes = optimal_interval
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()

View File

@@ -7,7 +7,7 @@ Preserves original scheduled times from database to avoid rescheduling on server
from typing import TYPE_CHECKING
from datetime import datetime, timezone, timedelta
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from services.database import get_db_session, get_all_user_ids, get_session_for_user
from models.scheduler_models import SchedulerEventLog
if TYPE_CHECKING:
@@ -28,35 +28,39 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
scheduler: TaskScheduler instance
"""
try:
db = get_db_session()
if not db:
logger.warning("Could not get database session to restore persona jobs")
return
user_ids = get_all_user_ids()
logger.info(f"[Restoration] Found {len(user_ids)} users to check for persona jobs")
try:
from models.onboarding import OnboardingSession
from services.research.research_persona_scheduler import (
schedule_research_persona_generation,
generate_research_persona_task
)
from services.persona.facebook.facebook_persona_scheduler import (
schedule_facebook_persona_generation,
generate_facebook_persona_task
)
from services.research.research_persona_service import ResearchPersonaService
from services.persona_data_service import PersonaDataService
for user_id in user_ids:
db = get_session_for_user(user_id)
if not db:
logger.warning(f"Could not get database session for user {user_id}")
continue
# Get all users who completed onboarding
completed_sessions = db.query(OnboardingSession).filter(
OnboardingSession.progress == 100.0
).all()
restored_count = 0
skipped_count = 0
now = datetime.utcnow().replace(tzinfo=timezone.utc)
for session in completed_sessions:
user_id = session.user_id
try:
from models.onboarding import OnboardingSession
from services.research.research_persona_scheduler import (
schedule_research_persona_generation,
generate_research_persona_task
)
from services.persona.facebook.facebook_persona_scheduler import (
schedule_facebook_persona_generation,
generate_facebook_persona_task
)
from services.research.research_persona_service import ResearchPersonaService
from services.persona_data_service import PersonaDataService
# Check if user completed onboarding
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session or session.progress < 100.0:
continue
restored_count = 0
skipped_count = 0
now = datetime.utcnow().replace(tzinfo=timezone.utc)
# Restore research persona job
try:
@@ -69,7 +73,7 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
research_persona_exists = bool(research_persona_data)
if not research_persona_exists:
# Note: Clerk user_id already includes "user_" prefix
# Note: Clerk user_id already includes "user_" prefix if applicable, or we use the string as is
job_id = f"research_persona_{user_id}"
# Check if job already exists in scheduler (just started, so unlikely)
@@ -256,13 +260,13 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
except Exception as e:
logger.debug(f"Could not restore Facebook persona for user {user_id}: {e}")
if restored_count > 0:
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) on startup (preserved original scheduled times)")
if skipped_count > 0:
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) (already completed/failed or exist)")
finally:
db.close()
if restored_count > 0:
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) for user {user_id}")
if skipped_count > 0:
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) for user {user_id}")
finally:
db.close()
except Exception as e:
logger.warning(f"Error restoring persona jobs: {e}")

View File

@@ -9,7 +9,7 @@ from typing import List
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from services.database import get_session_for_user, get_all_user_ids
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
from services.oauth_token_monitoring_service import get_connected_platforms, create_oauth_monitoring_tasks
@@ -31,98 +31,41 @@ async def restore_oauth_monitoring_tasks(scheduler):
"""
try:
logger.warning("[OAuth Task Restoration] Starting OAuth monitoring task restoration...")
db = get_db_session()
if not db:
logger.warning("[OAuth Task Restoration] Could not get database session")
return
try:
# Get all existing OAuth tasks to find unique user_ids
existing_tasks = db.query(OAuthTokenMonitoringTask).all()
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Log existing tasks breakdown by platform
existing_by_platform = {}
for task in existing_tasks:
existing_by_platform[task.platform] = existing_by_platform.get(task.platform, 0) + 1
platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(existing_by_platform.items())])
logger.warning(
f"[OAuth Task Restoration] Found {len(existing_tasks)} existing OAuth tasks "
f"for {len(user_ids_with_tasks)} users. Platforms: {platform_summary}"
)
# Check users who already have at least one OAuth task
users_to_check = list(user_ids_with_tasks)
# Also query all users from onboarding who completed step 5 (integrations)
# to catch users who connected platforms but tasks weren't created
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
# This matches the logic used in home page redirect and persona generation checks
user_ids = get_all_user_ids()
total_created = 0
users_processed = 0
total_existing_tasks = 0
restoration_summary = []
for user_id in user_ids:
try:
from services.onboarding.progress_service import get_onboarding_progress_service
from models.onboarding import OnboardingSession
from sqlalchemy import or_
db = get_session_for_user(user_id)
if not db:
logger.debug(f"[OAuth Task Restoration] Could not get database session for user {user_id}")
continue
# Get onboarding progress service (same as used throughout the app)
progress_service = get_onboarding_progress_service()
# Query all sessions and filter using the same completion logic as the service
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
completed_sessions = db.query(OnboardingSession).filter(
or_(
OnboardingSession.current_step >= 6,
OnboardingSession.progress >= 100.0
)
).all()
# Validate using the service method for consistency
onboarding_user_ids = set()
for session in completed_sessions:
# Use the same service method as the rest of the app
status = progress_service.get_onboarding_status(session.user_id)
if status.get('is_completed', False):
onboarding_user_ids.add(session.user_id)
all_user_ids = users_to_check.copy()
# Add users from onboarding who might not have tasks yet
for user_id in onboarding_user_ids:
if user_id not in all_user_ids:
all_user_ids.append(user_id)
users_to_check = all_user_ids
logger.warning(
f"[OAuth Task Restoration] Checking {len(users_to_check)} users "
f"({len(user_ids_with_tasks)} with existing tasks, "
f"{len(onboarding_user_ids)} from onboarding sessions, "
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
)
except Exception as e:
logger.warning(f"[OAuth Task Restoration] Could not query onboarding users: {e}")
# Fallback to users with existing tasks only
total_created = 0
restoration_summary = [] # Collect summary for single log
for user_id in users_to_check:
try:
users_processed += 1
# Get existing tasks for this user
try:
existing_tasks = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.user_id == user_id
).all()
total_existing_tasks += len(existing_tasks)
except Exception as table_error:
# Table might not exist for this user yet
continue
# Get connected platforms for this user (silent - no logging)
connected_platforms = get_connected_platforms(user_id)
if not connected_platforms:
logger.debug(
f"[OAuth Task Restoration] No connected platforms for user {user_id[:20]}..., skipping"
)
continue
# Check which platforms are missing tasks
existing_platforms = {
task.platform
for task in existing_tasks
if task.user_id == user_id
}
existing_platforms = {task.platform for task in existing_tasks}
missing_platforms = [
platform
@@ -138,53 +81,44 @@ async def restore_oauth_monitoring_tasks(scheduler):
platforms=missing_platforms
)
total_created += len(created)
# Collect summary info instead of logging immediately
platforms_str = ", ".join([p.upper() for p in missing_platforms])
restoration_summary.append(
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
)
if created:
total_created += len(created)
platforms_str = ", ".join([p.upper() for p in missing_platforms])
restoration_summary.append(
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
)
except Exception as e:
logger.warning(
f"[OAuth Task Restoration] Error checking/creating tasks for user {user_id}: {e}",
exc_info=True
)
continue
finally:
db.close()
except Exception as e:
logger.warning(f"[OAuth Task Restoration] Error processing user {user_id}: {e}")
continue
# Log summary
if total_created > 0:
summary_lines = "\n".join(restoration_summary[:5])
if len(restoration_summary) > 5:
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
# Final summary log with platform breakdown
final_existing_tasks = db.query(OAuthTokenMonitoringTask).all()
final_by_platform = {}
for task in final_existing_tasks:
final_by_platform[task.platform] = final_by_platform.get(task.platform, 0) + 1
logger.warning(
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
f" ├─ Users Processed: {users_processed}\n"
f" ├─ Existing Tasks: {total_existing_tasks}\n"
f" ├─ New Tasks Created: {total_created}\n"
+ summary_lines
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
f"Processed {users_processed} users."
)
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
# Single formatted summary log (similar to scheduler startup)
if total_created > 0:
summary_lines = "\n".join(restoration_summary[:5]) # Show first 5 users
if len(restoration_summary) > 5:
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
logger.warning(
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
f" ├─ Tasks Created: {total_created}\n"
f" ├─ Users Processed: {len(users_to_check)}\n"
f" ├─ Platform Breakdown: {final_platform_summary}\n"
+ summary_lines
)
else:
logger.warning(
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
f"Checked {len(users_to_check)} users. Platform breakdown: {final_platform_summary}"
)
finally:
db.close()
return total_existing_tasks + total_created
except Exception as e:
logger.error(
f"[OAuth Task Restoration] Error restoring OAuth monitoring tasks: {e}",
exc_info=True
)
return 0

View File

@@ -9,7 +9,7 @@ from typing import List
from sqlalchemy.orm import Session
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from services.database import get_session_for_user, get_all_user_ids
from models.platform_insights_monitoring_models import PlatformInsightsTask
from services.platform_insights_monitoring_service import create_platform_insights_task
from services.oauth_token_monitoring_service import get_connected_platforms
@@ -32,44 +32,36 @@ async def restore_platform_insights_tasks(scheduler):
"""
try:
logger.warning("[Platform Insights Restoration] Starting platform insights task restoration...")
db = get_db_session()
if not db:
logger.warning("[Platform Insights Restoration] Could not get database session")
return
try:
# Get all existing insights tasks to find unique user_ids
existing_tasks = db.query(PlatformInsightsTask).all()
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Get all OAuth tasks to find users with connected platforms
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
user_ids_with_oauth = set(task.user_id for task in oauth_tasks)
# Platforms that support insights (GSC and Bing only)
insights_platforms = ['gsc', 'bing']
# Get users who have OAuth tasks for GSC or Bing
users_to_check = set()
for task in oauth_tasks:
if task.platform in insights_platforms:
users_to_check.add(task.user_id)
logger.warning(
f"[Platform Insights Restoration] Found {len(existing_tasks)} existing insights tasks "
f"for {len(user_ids_with_tasks)} users. Checking {len(users_to_check)} users "
f"with GSC/Bing OAuth connections."
)
if not users_to_check:
logger.warning("[Platform Insights Restoration] No users with GSC/Bing connections found")
return
total_created = 0
restoration_summary = []
for user_id in users_to_check:
user_ids = get_all_user_ids()
total_created = 0
users_processed = 0
total_existing_tasks = 0
restoration_summary = []
# Platforms that support insights (GSC and Bing only)
insights_platforms = ['gsc', 'bing']
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if not db:
logger.debug(f"[Platform Insights Restoration] Could not get database session for user {user_id}")
continue
try:
users_processed += 1
# Get existing insights tasks
try:
existing_tasks = db.query(PlatformInsightsTask).filter(
PlatformInsightsTask.user_id == user_id
).all()
total_existing_tasks += len(existing_tasks)
except Exception as table_error:
# Table might not exist
continue
# Get connected platforms for this user
connected_platforms = get_connected_platforms(user_id)
@@ -77,17 +69,10 @@ async def restore_platform_insights_tasks(scheduler):
insights_connected = [p for p in connected_platforms if p in insights_platforms]
if not insights_connected:
logger.debug(
f"[Platform Insights Restoration] No GSC/Bing connections for user {user_id[:20]}..., skipping"
)
continue
# Check which platforms are missing insights tasks
existing_platforms = {
task.platform
for task in existing_tasks
if task.user_id == user_id
}
existing_platforms = {task.platform for task in existing_tasks}
missing_platforms = [
platform
@@ -101,11 +86,10 @@ async def restore_platform_insights_tasks(scheduler):
try:
# Don't fetch site_url here - it requires API calls
# The executor will fetch it when the task runs (weekly)
# This avoids API calls during restoration
result = create_platform_insights_task(
user_id=user_id,
platform=platform,
site_url=None, # Will be fetched by executor when task runs
site_url=None,
db=db
)
@@ -125,28 +109,28 @@ async def restore_platform_insights_tasks(scheduler):
f"for user {user_id}: {e}"
)
continue
finally:
db.close()
except Exception as e:
logger.debug(
f"[Platform Insights Restoration] Error processing user {user_id}: {e}"
)
continue
except Exception as e:
logger.warning(f"[Platform Insights Restoration] Error processing user {user_id}: {e}")
continue
# Log summary
if total_created > 0:
logger.warning(
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
"\n".join(restoration_summary)
)
else:
logger.warning(
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
f"Processed {users_processed} users."
)
# Log summary
if total_created > 0:
logger.warning(
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
"\n".join(restoration_summary)
)
else:
logger.warning(
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks."
)
finally:
db.close()
return total_existing_tasks + total_created
except Exception as e:
logger.error(f"[Platform Insights Restoration] Error during restoration: {e}", exc_info=True)
return 0

View File

@@ -19,7 +19,7 @@ from .exception_handler import (
SchedulerExceptionHandler, SchedulerException, TaskExecutionError, DatabaseError,
TaskLoaderError, SchedulerConfigError
)
from services.database import get_db_session
from services.database import get_all_user_ids, get_session_for_user
from utils.logger_utils import get_service_logger
from ..utils.user_job_store import get_user_job_store_name
from models.scheduler_models import SchedulerEventLog
@@ -28,6 +28,7 @@ from .job_restoration import restore_persona_jobs
from .oauth_task_restoration import restore_oauth_monitoring_tasks
from .website_analysis_task_restoration import restore_website_analysis_tasks
from .platform_insights_task_restoration import restore_platform_insights_tasks
from .advertools_task_restoration import restore_advertools_tasks
from .check_cycle_handler import check_and_execute_due_tasks
from .task_execution_handler import execute_task_async
@@ -185,13 +186,17 @@ class TaskScheduler:
await restore_persona_jobs(self)
# Restore/create missing OAuth token monitoring tasks for connected platforms
await restore_oauth_monitoring_tasks(self)
total_oauth_tasks = await restore_oauth_monitoring_tasks(self)
oauth_tasks_count = total_oauth_tasks
# Restore/create missing website analysis tasks for users who completed onboarding
await restore_website_analysis_tasks(self)
website_analysis_tasks_count = await restore_website_analysis_tasks(self)
# Restore/create missing platform insights tasks for users with connected GSC/Bing
await restore_platform_insights_tasks(self)
platform_insights_tasks_count = await restore_platform_insights_tasks(self)
# Restore/create missing Advertools intelligence tasks
advertools_tasks_count = await restore_advertools_tasks(self)
# Validate and rebuild cumulative stats if needed
await self._validate_and_rebuild_cumulative_stats()
@@ -203,99 +208,47 @@ class TaskScheduler:
# Count OAuth token monitoring tasks from database (recurring weekly tasks)
oauth_tasks_count = 0
oauth_tasks_details = []
try:
db = get_db_session()
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Count active tasks
oauth_tasks_count = db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.status == 'active'
).count()
# Get all tasks (for detailed logging)
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
total_oauth_tasks = len(all_oauth_tasks)
# Show platform breakdown for ALL tasks (active and inactive)
all_platforms = {}
active_platforms = {}
for task in all_oauth_tasks:
all_platforms[task.platform] = all_platforms.get(task.platform, 0) + 1
if task.status == 'active':
active_platforms[task.platform] = active_platforms.get(task.platform, 0) + 1
if total_oauth_tasks > 0:
# Log details about all tasks (not just active)
for task in all_oauth_tasks:
oauth_tasks_details.append(
f"user={task.user_id}, platform={task.platform}, status={task.status}"
)
if total_oauth_tasks > 0 and oauth_tasks_count == 0:
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
logger.warning(
f"[Scheduler] Found {total_oauth_tasks} OAuth monitoring tasks in database, "
f"but {oauth_tasks_count} are active. "
f"All platforms: {all_platform_summary}. "
f"Task details: {', '.join(oauth_tasks_details[:5])}" # Limit to first 5 for readability
)
elif oauth_tasks_count > 0:
# Show platform breakdown for active tasks
active_platform_summary = ", ".join([f"{platform}: {count}" for platform, count in sorted(active_platforms.items())])
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
# Check for missing platforms (expected: gsc, bing, wordpress, wix)
expected_platforms = ['gsc', 'bing', 'wordpress', 'wix']
missing_in_db = [p for p in expected_platforms if p not in all_platforms]
if missing_in_db:
logger.warning(
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
f"All platforms: {all_platform_summary}. "
f"⚠️ Missing platforms (not connected or no tasks): {', '.join(missing_in_db)}"
)
else:
logger.warning(
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
f"All platforms: {all_platform_summary}"
)
db.close()
except Exception as e:
logger.warning(
f"[Scheduler] Could not get OAuth token monitoring tasks count: {e}. "
f"This may indicate the oauth_token_monitoring_tasks table doesn't exist yet or "
f"tasks haven't been created. Error type: {type(e).__name__}"
)
# Get website analysis tasks count
website_analysis_tasks_count = 0
try:
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks_count = db.query(WebsiteAnalysisTask).filter(
WebsiteAnalysisTask.status == 'active'
).count()
except Exception as e:
logger.debug(f"Could not get website analysis tasks count: {e}")
# Get platform insights tasks count
platform_insights_tasks_count = 0
try:
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks_count = db.query(PlatformInsightsTask).filter(
PlatformInsightsTask.status == 'active'
).count()
except Exception as e:
logger.debug(f"Could not get platform insights tasks count: {e}")
advertools_tasks_count = 0
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if not db:
continue
try:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
oauth_tasks_count += db.query(OAuthTokenMonitoringTask).filter(
OAuthTokenMonitoringTask.status == 'active'
).count()
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks_count += db.query(WebsiteAnalysisTask).filter(
WebsiteAnalysisTask.status == 'active'
).count()
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks_count += db.query(PlatformInsightsTask).filter(
PlatformInsightsTask.status == 'active'
).count()
from models.advertools_monitoring_models import AdvertoolsTask
advertools_tasks_count += db.query(AdvertoolsTask).filter(
AdvertoolsTask.status == 'active'
).count()
finally:
db.close()
except Exception as e:
logger.debug(f"Error counting tasks for user {user_id}: {e}")
# Calculate job counts
apscheduler_recurring = 1 # check_due_tasks
apscheduler_one_time = len(all_jobs) - 1
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count + advertools_tasks_count
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count + advertools_tasks_count
# Build comprehensive startup log message
recurring_breakdown = f"check_due_tasks: {apscheduler_recurring}"
@@ -305,6 +258,8 @@ class TaskScheduler:
recurring_breakdown += f", Website analysis: {website_analysis_tasks_count}"
if platform_insights_tasks_count > 0:
recurring_breakdown += f", Platform insights: {platform_insights_tasks_count}"
if advertools_tasks_count > 0:
recurring_breakdown += f", Advertools: {advertools_tasks_count}"
startup_lines = [
f"[Scheduler] ✅ Task Scheduler Started",
@@ -347,7 +302,7 @@ class TaskScheduler:
if user_id_from_job:
try:
db = get_db_session()
db = get_session_for_user(user_id_from_job)
if db:
user_job_store = get_user_job_store_name(user_id_from_job, db)
if user_job_store == 'default':
@@ -357,6 +312,8 @@ class TaskScheduler:
)
user_context = f" | User: {user_id_from_job} | Store: {user_job_store}"
db.close()
else:
user_context = f" | User: {user_id_from_job} | DB: Not Found"
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {user_id_from_job}: {e}. "
@@ -370,134 +327,172 @@ class TaskScheduler:
# Show ALL OAuth tasks (active and inactive) for complete visibility
if total_oauth_tasks > 0:
try:
db = get_db_session()
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Get ALL tasks, not just active ones
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
for idx, task in enumerate(oauth_tasks):
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
if user_job_store == 'default':
logger.debug(
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
f"This may indicate no onboarding data or website URL not found."
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if db:
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
# Get ALL tasks for this user
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
for idx, task in enumerate(oauth_tasks):
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and user_id == user_ids[-1]
prefix = " ├─" # Simplified prefix logic for multi-user list
try:
user_job_store = get_user_job_store_name(task.user_id, db)
if user_job_store == 'default':
logger.debug(
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
f"This may indicate no onboarding data or website URL not found."
)
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
f"Using 'default'. Error type: {type(e).__name__}"
)
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
# Include status in the log line for visibility
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
)
except Exception as e:
logger.warning(
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
f"Using 'default'. Error type: {type(e).__name__}"
)
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
# Include status in the log line for visibility
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
)
db.close()
db.close()
except Exception as e:
logger.warning(f"Error checking OAuth tasks for user {user_id}: {e}")
except Exception as e:
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
# Add website analysis tasks details
if website_analysis_tasks_count > 0:
try:
db = get_db_session()
if db:
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
for idx, task in enumerate(website_analysis_tasks):
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
frequency = f"Every {task.frequency_days} days"
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
startup_lines.append(
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
)
db.close()
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if db:
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
for idx, task in enumerate(website_analysis_tasks):
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0 and user_id == user_ids[-1]
prefix = " ├─" # Simplified
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
frequency = f"Every {task.frequency_days} days"
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
startup_lines.append(
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
)
db.close()
except Exception as e:
logger.warning(f"Error checking website analysis tasks for user {user_id}: {e}")
except Exception as e:
logger.debug(f"Could not get website analysis task details: {e}")
# Add platform insights tasks details
if platform_insights_tasks_count > 0:
try:
db = get_db_session()
if db:
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks = db.query(PlatformInsightsTask).all()
for idx, task in enumerate(platform_insights_tasks):
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0
prefix = " └─" if is_last else " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
platform_label = task.platform.upper() if task.platform else 'Unknown'
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
)
db.close()
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if db:
from models.platform_insights_monitoring_models import PlatformInsightsTask
platform_insights_tasks = db.query(PlatformInsightsTask).all()
for idx, task in enumerate(platform_insights_tasks):
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0 and user_id == user_ids[-1]
prefix = " ├─" # Simplified
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
platform_label = task.platform.upper() if task.platform else 'Unknown'
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
)
db.close()
except Exception as e:
logger.warning(f"Error checking platform insights tasks for user {user_id}: {e}")
except Exception as e:
logger.debug(f"Could not get platform insights task details: {e}")
# Add Advertools tasks details
if advertools_tasks_count > 0:
try:
user_ids = get_all_user_ids()
for user_id in user_ids:
try:
db = get_session_for_user(user_id)
if db:
from models.advertools_monitoring_models import AdvertoolsTask
advertools_tasks = db.query(AdvertoolsTask).all()
for idx, task in enumerate(advertools_tasks):
is_last = idx == len(advertools_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and user_id == user_ids[-1]
prefix = " ├─"
try:
user_job_store = get_user_job_store_name(task.user_id, db)
except Exception as e:
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
user_job_store = 'default'
next_check = task.next_execution.isoformat() if task.next_execution else 'Not scheduled'
task_type = task.payload.get('type') if task.payload else 'unknown'
status_indicator = "" if task.status == 'active' else f"[{task.status}]"
startup_lines.append(
f"{prefix} Job: advertools_{task_type}_{task.user_id}_{task.id} | "
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type} {status_indicator}"
)
db.close()
except Exception as e:
logger.warning(f"Error checking Advertools tasks for user {user_id}: {e}")
except Exception as e:
logger.debug(f"Could not get Advertools task details: {e}")
# Log comprehensive startup information in single message
logger.warning("\n".join(startup_lines))
# Save scheduler start event to database
try:
db = get_db_session()
if db:
event_log = SchedulerEventLog(
event_type='start',
event_date=datetime.utcnow(),
check_interval_minutes=initial_interval,
active_strategies_count=active_strategies,
event_data={
'registered_types': registered_types,
'total_jobs': total_jobs,
'recurring_jobs': total_recurring,
'one_time_jobs': apscheduler_one_time,
'oauth_monitoring_tasks': oauth_tasks_count,
'website_analysis_tasks': website_analysis_tasks_count,
'platform_insights_tasks': platform_insights_tasks_count
}
)
db.add(event_log)
db.commit()
db.close()
except Exception as e:
logger.warning(f"Failed to save scheduler start event log: {e}")
# Disabled in multi-tenant mode as there is no global DB
# try:
# db = get_db_session()
# if db:
# event_log = SchedulerEventLog(...)
# db.add(event_log)
# db.commit()
# db.close()
# except Exception as e:
# logger.warning(f"Failed to save scheduler start event log: {e}")
except Exception as e:
logger.error(f"Failed to start scheduler: {e}")
@@ -544,25 +539,26 @@ class TaskScheduler:
logger.warning(shutdown_message)
# Save scheduler stop event to database
try:
db = get_db_session()
if db:
event_log = SchedulerEventLog(
event_type='stop',
event_date=datetime.utcnow(),
check_interval_minutes=self.current_check_interval_minutes,
event_data={
'total_checks': total_checks,
'total_executed': total_executed,
'total_failed': total_failed,
'jobs_cancelled': len(all_jobs_before)
}
)
db.add(event_log)
db.commit()
db.close()
except Exception as e:
logger.warning(f"Failed to save scheduler stop event log: {e}")
# Disabled in multi-tenant mode as there is no global DB
# try:
# db = get_db_session()
# if db:
# event_log = SchedulerEventLog(
# event_type='stop',
# event_date=datetime.utcnow(),
# check_interval_minutes=self.current_check_interval_minutes,
# event_data={
# 'total_checks': total_checks,
# 'total_executed': total_executed,
# 'total_failed': total_failed,
# 'jobs_cancelled': len(all_jobs_before)
# }
# )
# db.add(event_log)
# db.commit()
# db.close()
# except Exception as e:
# logger.warning(f"Failed to save scheduler stop event log: {e}")
except Exception as e:
logger.error(f"Error stopping scheduler: {e}")
@@ -630,12 +626,8 @@ class TaskScheduler:
return
try:
db = get_db_session()
if db:
await adjust_check_interval_if_needed(self, db)
db.close()
else:
logger.warning("Could not get database session for interval adjustment")
# Multi-tenant aware adjustment (iterates all users internally)
await adjust_check_interval_if_needed(self)
except Exception as e:
logger.warning(f"Error triggering interval adjustment: {e}")
@@ -643,125 +635,14 @@ class TaskScheduler:
"""
Validate cumulative stats on scheduler startup and rebuild if needed.
This ensures cumulative stats are accurate after restarts.
NOTE: Disabled in multi-tenant mode as there is no global database for cumulative stats.
TODO: Implement per-user cumulative stats or a global admin database.
"""
db = None
try:
db = get_db_session()
if not db:
logger.warning("[Scheduler] Could not get database session for cumulative stats validation")
return
try:
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
from models.scheduler_models import SchedulerEventLog
from sqlalchemy import func
# Get cumulative stats from persistent table
cumulative_stats = db.query(SchedulerCumulativeStats).filter(
SchedulerCumulativeStats.id == 1
).first()
# Count check_cycle events in database
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
SchedulerEventLog.event_type == 'check_cycle'
).scalar() or 0
if cumulative_stats:
# Validate: cumulative stats should match event log count
if cumulative_stats.total_check_cycles != check_cycle_count:
logger.warning(
f"[Scheduler] ⚠️ Cumulative stats validation failed on startup: "
f"cumulative_stats.total_check_cycles={cumulative_stats.total_check_cycles} "
f"vs event_logs.count={check_cycle_count}. "
f"Rebuilding cumulative stats from event logs..."
)
# Rebuild from event logs
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if result:
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
# Update cumulative stats
cumulative_stats.total_check_cycles = int(total_cycles)
cumulative_stats.cumulative_tasks_found = int(total_found)
cumulative_stats.cumulative_tasks_executed = int(total_executed)
cumulative_stats.cumulative_tasks_failed = int(total_failed)
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
logger.warning(
f"[Scheduler] ✅ Rebuilt cumulative stats on startup: "
f"cycles={total_cycles}, found={total_found}, "
f"executed={total_executed}, failed={total_failed}"
)
else:
logger.warning("[Scheduler] No check_cycle events found to rebuild from")
else:
logger.warning(
f"[Scheduler] ✅ Cumulative stats validated: "
f"{cumulative_stats.total_check_cycles} check cycles match event logs"
)
else:
# Cumulative stats table doesn't exist, create it from event logs
logger.warning(
"[Scheduler] Cumulative stats table not found. "
"Creating from event logs..."
)
result = db.query(
func.count(SchedulerEventLog.id),
func.sum(SchedulerEventLog.tasks_found),
func.sum(SchedulerEventLog.tasks_executed),
func.sum(SchedulerEventLog.tasks_failed)
).filter(
SchedulerEventLog.event_type == 'check_cycle'
).first()
if result:
total_cycles = result[0] if result[0] is not None else 0
total_found = result[1] if result[1] is not None else 0
total_executed = result[2] if result[2] is not None else 0
total_failed = result[3] if result[3] is not None else 0
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
cumulative_stats.total_check_cycles = int(total_cycles)
cumulative_stats.cumulative_tasks_found = int(total_found)
cumulative_stats.cumulative_tasks_executed = int(total_executed)
cumulative_stats.cumulative_tasks_failed = int(total_failed)
cumulative_stats.last_updated = datetime.utcnow()
cumulative_stats.updated_at = datetime.utcnow()
db.commit()
logger.warning(
f"[Scheduler] ✅ Created cumulative stats from event logs: "
f"cycles={total_cycles}, found={total_found}, "
f"executed={total_executed}, failed={total_failed}"
)
except ImportError:
logger.warning(
"[Scheduler] Cumulative stats model not available. "
"Migration may not have been run yet. "
"Run: python backend/scripts/run_cumulative_stats_migration.py"
)
except Exception as e:
logger.error(f"[Scheduler] Error validating cumulative stats: {e}", exc_info=True)
finally:
if db:
db.close()
logger.info("[Scheduler] Cumulative stats validation skipped (multi-tenant mode)")
return
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None, user_id: str = None) -> Optional[Dict[str, Any]]:
"""
Process due tasks for a specific task type.
@@ -816,7 +697,7 @@ class TaskScheduler:
# Execute task asynchronously
# Note: Each task gets its own database session to prevent concurrent access issues
execution_task = asyncio.create_task(
execute_task_async(self, task_type, task, summary)
execute_task_async(self, task_type, task, summary, user_id=user_id)
)
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
@@ -970,7 +851,7 @@ class TaskScheduler:
job_store_name = 'default'
if user_id:
try:
db = get_db_session()
db = get_session_for_user(user_id)
if db:
job_store_name = get_user_job_store_name(user_id, db)
db.close()
@@ -996,27 +877,28 @@ class TaskScheduler:
logger.warning(log_message)
# Log job scheduling to event log for dashboard
try:
event_db = get_db_session()
if event_db:
event_log = SchedulerEventLog(
event_type='job_scheduled',
event_date=datetime.utcnow(),
job_id=job_id,
job_type='one_time',
user_id=user_id,
event_data={
'function_name': func_name,
'job_store': job_store_name,
'scheduled_for': run_date.isoformat(),
'replace_existing': replace_existing
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.debug(f"Failed to log job scheduling event: {e}")
if user_id:
try:
event_db = get_session_for_user(user_id)
if event_db:
event_log = SchedulerEventLog(
event_type='job_scheduled',
event_date=datetime.utcnow(),
job_id=job_id,
job_type='one_time',
user_id=user_id,
event_data={
'function_name': func_name,
'job_store': job_store_name,
'scheduled_for': run_date.isoformat(),
'replace_existing': replace_existing
}
)
event_db.add(event_log)
event_db.commit()
event_db.close()
except Exception as e:
logger.debug(f"Failed to log job scheduling event: {e}")
return job_id
except Exception as e:
@@ -1027,3 +909,14 @@ class TaskScheduler:
"""Check if scheduler is running."""
return self._running
async def execute_task_by_type(self, task_type: str, user_id: str, payload: Dict[str, Any]):
"""
Execute a task by type and payload immediately.
Used for one-time tasks triggered by system events.
"""
from collections import namedtuple
TaskStub = namedtuple('TaskStub', ['user_id', 'payload', 'id'])
task_stub = TaskStub(user_id=user_id, payload=payload, id=f"manual_{datetime.utcnow().timestamp()}")
await execute_task_async(self, task_type, task_stub, execution_source="manual")

View File

@@ -23,7 +23,8 @@ async def execute_task_async(
task_type: str,
task: Any,
summary: Optional[Dict[str, Any]] = None,
execution_source: str = "scheduler" # "scheduler" or "manual"
execution_source: str = "scheduler", # "scheduler" or "manual"
user_id: Optional[str] = None
):
"""
Execute a single task asynchronously with user isolation.
@@ -38,21 +39,25 @@ async def execute_task_async(
task_type: Type of task
task: Task instance from database (detached from original session)
summary: Optional summary dict to update with execution results
user_id: Optional user ID for user isolation (overrides extraction from task)
"""
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
db = None
user_id = None
try:
# Extract user context if available (for user isolation tracking)
try:
if hasattr(task, 'strategy') and task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id') and task.strategy_id:
# Will query user_id after we have db session
pass
except Exception as e:
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
if user_id is None:
try:
if hasattr(task, 'strategy') and task.strategy:
user_id = getattr(task.strategy, 'user_id', None)
elif hasattr(task, 'strategy_id') and task.strategy_id:
# Will query user_id after we have db session
pass
elif hasattr(task, 'user_id') and task.user_id:
# Direct user_id on task object
user_id = task.user_id
except Exception as e:
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
# Log task execution start (detailed for important tasks)
task_db_id = getattr(task, 'id', None)
@@ -61,7 +66,7 @@ async def execute_task_async(
# Create a new database session for this async task
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
db = get_db_session()
db = get_db_session(user_id)
if db is None:
error = DatabaseError(
message=f"Failed to get database session for task {task_id}",
@@ -79,7 +84,15 @@ async def execute_task_async(
# Merge the detached task object into this session
# The task object was loaded in a different session and is now detached
if object_session(task) is None:
from sqlalchemy.inspection import inspect
is_model = False
try:
inspect(task)
is_model = True
except:
pass
if is_model and object_session(task) is None:
# Task is detached, need to merge it into this session
task = db.merge(task)

View File

@@ -4,15 +4,13 @@ Automatically creates missing website analysis tasks for users who completed onb
but don't have monitoring tasks created yet.
"""
from typing import List
from sqlalchemy.orm import Session
from datetime import datetime, timedelta, timezone
from utils.logger_utils import get_service_logger
from services.database import get_db_session
from services.database import get_all_user_ids, get_session_for_user
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
from services.website_analysis_monitoring_service import create_website_analysis_tasks
from services.website_analysis_monitoring_service import generate_website_analysis_tasks_task
from models.onboarding import OnboardingSession
from sqlalchemy import or_
# Use service logger for consistent logging (WARNING level visible in production)
logger = get_service_logger("website_analysis_restoration")
@@ -32,162 +30,103 @@ async def restore_website_analysis_tasks(scheduler):
"""
try:
logger.warning("[Website Analysis Restoration] Starting website analysis task restoration...")
db = get_db_session()
if not db:
logger.warning("[Website Analysis Restoration] Could not get database session")
return
try:
# Check if table exists (may not exist if migration hasn't run)
user_ids = get_all_user_ids()
total_created = 0
users_processed = 0
total_existing_tasks = 0
for user_id in user_ids:
try:
existing_tasks = db.query(WebsiteAnalysisTask).all()
except Exception as table_error:
logger.error(
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist: {table_error}. "
f"Please run database migration: create_website_analysis_monitoring_tables.sql"
)
return
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
# Log existing tasks breakdown by type
existing_by_type = {}
for task in existing_tasks:
existing_by_type[task.task_type] = existing_by_type.get(task.task_type, 0) + 1
type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(existing_by_type.items())])
logger.warning(
f"[Website Analysis Restoration] Found {len(existing_tasks)} existing website analysis tasks "
f"for {len(user_ids_with_tasks)} users. Types: {type_summary}"
)
# Check users who already have at least one website analysis task
users_to_check = list(user_ids_with_tasks)
# Also query all users from onboarding who completed step 2 (website analysis)
# to catch users who completed onboarding but tasks weren't created
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
# This matches the logic used in home page redirect and persona generation checks
try:
from services.onboarding.progress_service import get_onboarding_progress_service
from models.onboarding import OnboardingSession
from sqlalchemy import or_
db = get_session_for_user(user_id)
if not db:
logger.warning(f"[Website Analysis Restoration] Could not get database session for user {user_id}")
continue
# Get onboarding progress service (same as used throughout the app)
progress_service = get_onboarding_progress_service()
# Query all sessions and filter using the same completion logic as the service
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
completed_sessions = db.query(OnboardingSession).filter(
or_(
OnboardingSession.current_step >= 6,
OnboardingSession.progress >= 100.0
)
).all()
# Validate using the service method for consistency
onboarding_user_ids = set()
for session in completed_sessions:
# Use the same service method as the rest of the app
status = progress_service.get_onboarding_status(session.user_id)
if status.get('is_completed', False):
onboarding_user_ids.add(session.user_id)
all_user_ids = users_to_check.copy()
# Add users from onboarding who might not have tasks yet
for user_id in onboarding_user_ids:
if user_id not in all_user_ids:
all_user_ids.append(user_id)
users_to_check = all_user_ids
logger.warning(
f"[Website Analysis Restoration] Checking {len(users_to_check)} users "
f"({len(user_ids_with_tasks)} with existing tasks, "
f"{len(onboarding_user_ids)} from onboarding sessions, "
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
)
except Exception as e:
logger.warning(f"[Website Analysis Restoration] Could not query onboarding users: {e}")
# Fallback to users with existing tasks only
users_to_check = list(user_ids_with_tasks)
total_created = 0
users_processed = 0
for user_id in users_to_check:
try:
users_processed += 1
# Check if user already has tasks
existing_user_tasks = [
task for task in existing_tasks
if task.user_id == user_id
]
if existing_user_tasks:
logger.debug(
f"[Website Analysis Restoration] User {user_id} already has "
f"{len(existing_user_tasks)} website analysis tasks, skipping"
# Check if table exists
try:
existing_user_tasks = db.query(WebsiteAnalysisTask).filter(
WebsiteAnalysisTask.user_id == user_id
).all()
total_existing_tasks += len(existing_user_tasks)
except Exception as table_error:
logger.error(
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist for user {user_id}: {table_error}"
)
continue
logger.warning(
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
f"but has no website analysis tasks. Creating tasks..."
)
# Create missing tasks
result = create_website_analysis_tasks(user_id=user_id, db=db)
if result.get('success'):
tasks_count = result.get('tasks_created', 0)
total_created += tasks_count
if existing_user_tasks:
# User has tasks, we assume they are fine for now
continue
# Check onboarding status
try:
from services.onboarding.progress_service import OnboardingProgressService
# Use a local instance or static logic if service expects global DB (it shouldn't anymore)
# We can query OnboardingSession directly
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session:
continue
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
if not is_completed:
continue
logger.warning(
f"[Website Analysis Restoration] ✅ Created {tasks_count} website analysis tasks "
f"for user {user_id}"
)
else:
error = result.get('error', 'Unknown error')
logger.warning(
f"[Website Analysis Restoration] ⚠️ Could not create tasks for user {user_id}: {error}"
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
f"but has no website analysis tasks. Creating tasks..."
)
except Exception as e:
logger.warning(
f"[Website Analysis Restoration] Error checking/creating tasks for user {user_id}: {e}",
exc_info=True
)
continue
# Final summary log
final_existing_tasks = db.query(WebsiteAnalysisTask).all()
final_by_type = {}
for task in final_existing_tasks:
final_by_type[task.task_type] = final_by_type.get(task.task_type, 0) + 1
final_type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(final_by_type.items())])
if total_created > 0:
logger.warning(
f"[Website Analysis Restoration] ✅ Created {total_created} missing website analysis tasks. "
f"Processed {users_processed} users. Final type breakdown: {final_type_summary}"
)
else:
logger.warning(
f"[Website Analysis Restoration] ✅ All users have required website analysis tasks. "
f"Checked {users_processed} users, found {len(existing_tasks)} existing tasks. "
f"Type breakdown: {final_type_summary}"
)
finally:
db.close()
job_id = f"website_analysis_tasks_{user_id}"
existing_jobs = [j for j in scheduler.scheduler.get_jobs() if j.id == job_id]
if existing_jobs:
continue
run_date = datetime.now(timezone.utc) + timedelta(minutes=5)
scheduler.schedule_one_time_task(
func=generate_website_analysis_tasks_task,
run_date=run_date,
job_id=job_id,
kwargs={"user_id": user_id},
replace_existing=True,
)
total_created += 1
logger.warning(
f"[Website Analysis Restoration] ✅ Scheduled website analysis task creation "
f"for user {user_id} at {run_date.isoformat()}"
)
except Exception as e:
logger.warning(f"[Website Analysis Restoration] Could not check onboarding for user {user_id}: {e}")
finally:
db.close()
except Exception as e:
logger.warning(f"[Website Analysis Restoration] Error processing user {user_id}: {e}")
logger.warning(
f"[Website Analysis Restoration] ✅ Completed. "
f"Processed {users_processed} users. "
f"Found {total_existing_tasks} existing tasks. "
f"Created {total_created} new tasks."
)
return total_existing_tasks + total_created
except Exception as e:
logger.error(
f"[Website Analysis Restoration] Error restoring website analysis tasks: {e}",
exc_info=True
)
return 0

View File

@@ -0,0 +1,230 @@
import asyncio
from datetime import datetime, timedelta
from typing import Any, Dict, List
from loguru import logger
from sqlalchemy.orm import Session
from sqlalchemy import text
from sqlalchemy.exc import SQLAlchemyError
from services.seo.advertools_service import AdvertoolsService
from services.seo_tools.sitemap_service import SitemapService
from models.advertools_monitoring_models import AdvertoolsTask, AdvertoolsExecutionLog
from models.onboarding import WebsiteAnalysis, OnboardingSession
class AdvertoolsExecutor:
"""
Executor for Advertools-based SEO intelligence tasks.
Handles 'content_audit' and 'site_health' task types.
"""
def __init__(self):
self.advertools_service = AdvertoolsService()
self.sitemap_service = SitemapService()
self.logger = logger.bind(service="AdvertoolsExecutor")
async def execute_task(self, task_stub: Any, db: Session, **kwargs) -> Dict[str, Any]:
"""
Execute an Advertools intelligence task.
Args:
task_stub: Tuple or object containing (id, user_id, payload)
db: Database session
Returns:
Execution result dictionary
"""
start_time = datetime.utcnow()
task_id = getattr(task_stub, 'id', None)
user_id = getattr(task_stub, 'user_id', None)
payload = getattr(task_stub, 'payload', {}) or {}
task_type = payload.get('type')
website_url = payload.get('website_url')
self.logger.info(f"🚀 Starting Advertools task {task_id} ({task_type}) for {website_url}")
# Find the actual task record to update state
task_record = None
if isinstance(task_id, int):
task_record = db.query(AdvertoolsTask).filter(AdvertoolsTask.id == task_id).first()
try:
if not website_url:
raise ValueError("Missing website_url in payload")
# 1. Discover exact sitemap URL first (essential for Advertools)
discovered_sitemap = await self.sitemap_service.discover_sitemap_url(website_url)
effective_url = discovered_sitemap if discovered_sitemap else website_url
# Set status to running for UI feedback
if task_record:
task_record.status = 'running'
db.commit()
result = {}
if task_type == 'content_audit':
# Phase 1: Audit content themes using sample URLs from sitemap
# First, get the sitemap to find recent URLs
sitemap_result = await self.advertools_service.analyze_sitemap(effective_url)
audit_urls = []
if sitemap_result.get('success'):
# Use the sample URLs returned by the service
audit_urls = sitemap_result.get('metrics', {}).get('audit_sample_urls', [])
if not audit_urls:
# Fallback to homepage if sitemap fails or empty
audit_urls = [website_url]
# Run the audit on the sample
result = await self.advertools_service.audit_content(audit_urls)
if result.get('success'):
await self._update_persona_augmentation(user_id, website_url, result, db)
elif task_type == 'site_health':
# Phase 1: Check site health (freshness, velocity)
result = await self.advertools_service.analyze_sitemap(effective_url)
if result.get('success'):
await self._update_site_health_metrics(user_id, website_url, result, db)
else:
raise ValueError(f"Unknown task type: {task_type}")
success = result.get('success', False)
execution_time_ms = int((datetime.utcnow() - start_time).total_seconds() * 1000)
# Update task state
if task_record:
task_record.last_executed = datetime.utcnow()
if success:
task_record.last_success = datetime.utcnow()
task_record.consecutive_failures = 0
task_record.status = 'active'
# Smart Scheduling with Backoff reset
freq_days = task_record.frequency_days or 7
task_record.next_execution = datetime.utcnow() + timedelta(days=freq_days)
else:
task_record.last_failure = datetime.utcnow()
task_record.failure_reason = result.get('error', 'Unknown error')
task_record.consecutive_failures = (task_record.consecutive_failures or 0) + 1
# Exponential Backoff for repeated failures (up to 30 days)
backoff_days = min(30, (task_record.frequency_days or 7) * (2 ** (task_record.consecutive_failures - 1)))
task_record.next_execution = datetime.utcnow() + timedelta(days=backoff_days)
if task_record.consecutive_failures >= 5:
task_record.status = 'failed' # Mark as failed after 5 attempts
# Create execution log
if isinstance(task_id, int):
log_entry = AdvertoolsExecutionLog(
task_id=task_id,
status='success' if success else 'failed',
result_data=result,
error_message=result.get('error'),
execution_time_ms=execution_time_ms
)
db.add(log_entry)
db.commit()
if success:
self.logger.info(f"✅ Advertools task {task_id} completed successfully")
else:
self.logger.warning(f"⚠️ Advertools task {task_id} failed: {result.get('error')}")
return result
except Exception as e:
db.rollback()
self.logger.error(f"❌ Advertools task execution failed: {e}")
# Try to update task record with failure even if main logic failed
if task_record:
try:
task_record.last_executed = datetime.utcnow()
task_record.last_failure = datetime.utcnow()
task_record.failure_reason = str(e)
task_record.consecutive_failures = (task_record.consecutive_failures or 0) + 1
db.commit()
except:
db.rollback()
return {"success": False, "error": str(e)}
async def _update_persona_augmentation(self, user_id: str, website_url: str, audit_result: Dict[str, Any], db: Session):
"""
Updates the user's Brand Persona with discovered themes from the content audit.
"""
try:
session = db.query(OnboardingSession).filter(OnboardingSession.user_id == user_id).first()
if not session:
self.logger.warning(f"No onboarding session found for user {user_id}")
return
analysis = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.session_id == session.id).first()
if not analysis:
self.logger.warning(f"No website analysis found for user {user_id}")
return
# Update brand_analysis with augmented themes
current_brand = analysis.brand_analysis or {}
# Add or update the 'augmented_themes' field
current_brand['augmented_themes'] = audit_result.get('themes', [])
current_brand['last_advertools_audit'] = datetime.utcnow().isoformat()
# Force SQLAlchemy to detect change in JSON field
from sqlalchemy.orm.attributes import flag_modified
flag_modified(analysis, "brand_analysis")
# Also update content_strategy_insights if relevant
if 'avg_word_count' in audit_result:
current_strategy = analysis.content_strategy_insights or {}
current_strategy['avg_content_length'] = audit_result['avg_word_count']
analysis.content_strategy_insights = current_strategy
flag_modified(analysis, "content_strategy_insights")
self.logger.info(f"Updated persona augmentation for {user_id}")
except Exception as e:
self.logger.error(f"Failed to update persona augmentation: {e}")
raise e
async def _update_site_health_metrics(self, user_id: str, website_url: str, health_result: Dict[str, Any], db: Session):
"""
Updates the WebsiteAnalysis with site health metrics (velocity, freshness).
"""
try:
session = db.query(OnboardingSession).filter(OnboardingSession.user_id == user_id).first()
if not session:
return
analysis = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.session_id == session.id).first()
if not analysis:
return
# Update seo_audit with health metrics
current_seo = analysis.seo_audit or {}
metrics = health_result.get('metrics', {})
current_seo['site_health'] = {
"total_urls": metrics.get('total_urls'),
"publishing_velocity": metrics.get('publishing_velocity'),
"stale_content_count": metrics.get('stale_content_count'),
"stale_content_percentage": metrics.get('stale_content_percentage'),
"top_pillars": metrics.get('top_pillars')
}
current_seo['last_advertools_health_check'] = datetime.utcnow().isoformat()
analysis.seo_audit = current_seo
from sqlalchemy.orm.attributes import flag_modified
flag_modified(analysis, "seo_audit")
self.logger.info(f"Updated site health metrics for {user_id}")
except Exception as e:
self.logger.error(f"Failed to update site health metrics: {e}")
raise e

View File

@@ -15,6 +15,7 @@ from ..core.exception_handler import TaskExecutionError, DatabaseError, Schedule
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
from services.bing_analytics_storage_service import BingAnalyticsStorageService
from services.integrations.bing_oauth import BingOAuthService
from services.database import get_user_db_path
from utils.logger_utils import get_service_logger
logger = get_service_logger("bing_insights_executor")
@@ -34,8 +35,6 @@ class BingInsightsExecutor(TaskExecutor):
def __init__(self):
self.logger = logger
self.exception_handler = SchedulerExceptionHandler()
database_url = os.getenv('DATABASE_URL', 'sqlite:///alwrity.db')
self.storage_service = BingAnalyticsStorageService(database_url)
self.bing_oauth = BingOAuthService()
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
@@ -53,6 +52,11 @@ class BingInsightsExecutor(TaskExecutor):
user_id = task.user_id
site_url = task.site_url
# Initialize storage service for this user
db_path = get_user_db_path(user_id)
database_url = f'sqlite:///{db_path}'
storage_service = BingAnalyticsStorageService(database_url)
try:
self.logger.info(
f"Executing Bing insights fetch: task_id={task.id} | "
@@ -69,7 +73,7 @@ class BingInsightsExecutor(TaskExecutor):
db.flush()
# Fetch insights
result = await self._fetch_insights(task, db)
result = await self._fetch_insights(task, db, storage_service)
# Update execution log
execution_time_ms = int((time.time() - start_time) * 1000)
@@ -184,7 +188,7 @@ class BingInsightsExecutor(TaskExecutor):
return error_result
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session, storage_service: BingAnalyticsStorageService) -> TaskExecutionResult:
"""
Fetch Bing insights data.
@@ -201,7 +205,7 @@ class BingInsightsExecutor(TaskExecutor):
if is_first_run:
# First run: Try to load from cache
self.logger.info(f"First run for Bing insights task {task.id} - loading cached data")
cached_data = self._load_cached_data(user_id, site_url)
cached_data = self._load_cached_data(user_id, site_url, storage_service)
if cached_data:
self.logger.info(f"Loaded cached Bing data for user {user_id}")
@@ -216,11 +220,11 @@ class BingInsightsExecutor(TaskExecutor):
else:
# No cached data - try to fetch from API
self.logger.info(f"No cached data found, fetching from Bing API")
return await self._fetch_fresh_data(user_id, site_url)
return await self._fetch_fresh_data(user_id, site_url, storage_service)
else:
# Subsequent run: Always fetch fresh data
self.logger.info(f"Subsequent run for Bing insights task {task.id} - fetching fresh data")
return await self._fetch_fresh_data(user_id, site_url)
return await self._fetch_fresh_data(user_id, site_url, storage_service)
except Exception as e:
self.logger.error(f"Error fetching Bing insights for user {user_id}: {e}", exc_info=True)
@@ -230,11 +234,11 @@ class BingInsightsExecutor(TaskExecutor):
result_data={'error': str(e)}
)
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
def _load_cached_data(self, user_id: str, site_url: Optional[str], storage_service: BingAnalyticsStorageService) -> Optional[Dict[str, Any]]:
"""Load most recent cached Bing data from database."""
try:
# Get analytics summary from storage service
summary = self.storage_service.get_analytics_summary(
summary = storage_service.get_analytics_summary(
user_id=user_id,
site_url=site_url or '',
days=30
@@ -250,7 +254,7 @@ class BingInsightsExecutor(TaskExecutor):
self.logger.warning(f"Error loading cached Bing data: {e}")
return None
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str], storage_service: BingAnalyticsStorageService) -> TaskExecutionResult:
"""Fetch fresh Bing insights from API."""
try:
# Check if user has active tokens
@@ -288,7 +292,7 @@ class BingInsightsExecutor(TaskExecutor):
# For now, use stored analytics data (Bing API integration can be added later)
# This ensures we have data available even if the API class doesn't exist yet
summary = self.storage_service.get_analytics_summary(user_id, site_url, days=30)
summary = storage_service.get_analytics_summary(user_id, site_url, days=30)
if summary and isinstance(summary, dict):
# Format insights data from stored analytics

View File

@@ -0,0 +1,200 @@
import time
from datetime import datetime, timedelta
from typing import Any, Dict
from sqlalchemy.orm import Session
from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService
from models.website_analysis_monitoring_models import (
DeepCompetitorAnalysisTask,
DeepCompetitorAnalysisExecutionLog
)
from services.scheduler.core.executor_interface import TaskExecutor, TaskExecutionResult
from services.scheduler.core.failure_detection_service import FailureDetectionService
from services.seo.deep_competitor_analysis_service import DeepCompetitorAnalysisService
from utils.logger_utils import get_service_logger
logger = get_service_logger("deep_competitor_analysis_executor")
class DeepCompetitorAnalysisExecutor(TaskExecutor):
def __init__(self):
self.analysis_service = DeepCompetitorAnalysisService()
self.integration_service = OnboardingDataIntegrationService()
async def execute_task(self, task: Any, db: Session) -> TaskExecutionResult:
start_time = time.time()
if not isinstance(task, DeepCompetitorAnalysisTask):
return TaskExecutionResult(
success=False,
error_message="Invalid task type for deep competitor analysis",
retryable=False
)
task_log = DeepCompetitorAnalysisExecutionLog(
task_id=task.id,
status="running",
execution_date=datetime.utcnow()
)
db.add(task_log)
db.commit()
user_id = str(task.user_id)
try:
integrated = self.integration_service.get_integrated_data_sync(user_id, db)
website_analysis = integrated.get("website_analysis") if isinstance(integrated, dict) else {}
payload = task.payload if isinstance(task.payload, dict) else {}
competitors = payload.get("competitors")
if not isinstance(competitors, list) or not competitors:
# Try to get from research_preferences
research_prefs = integrated.get("research_preferences") if isinstance(integrated, dict) else {}
if isinstance(research_prefs, dict):
competitors = research_prefs.get("competitors")
# If still not found, try to get from competitor_analysis (Step 3 persistence)
if not isinstance(competitors, list) or not competitors:
competitors = integrated.get("competitor_analysis") if isinstance(integrated, dict) else []
if not isinstance(competitors, list) or not competitors:
logger.warning(f"Deep competitor analysis skipped for user {user_id}: No competitors found")
task_log.status = "skipped"
task_log.result_data = {"status": "skipped", "reason": "no_competitors"}
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
# Mark task as completed but maybe pause it until user adds competitors?
# Or just treat it as success (empty report) so it doesn't retry endlessly
task.last_executed = datetime.utcnow()
task.last_success = datetime.utcnow()
task.status = "paused" # Pause it so it doesn't run again until triggered manually
task.next_execution = None
task.consecutive_failures = 0
db.commit()
return TaskExecutionResult(
success=True,
result_data={"status": "skipped", "reason": "no_competitors"},
execution_time_ms=task_log.execution_time_ms,
retryable=False
)
max_competitors = int(payload.get("max_competitors") or 25)
crawl_concurrency = int(payload.get("crawl_concurrency") or 4)
mode = payload.get("mode", "deep_analysis")
if mode == "strategic_insights":
logger.info(f"Executing weekly strategic insights for user {user_id}")
report = await self.analysis_service.generate_weekly_strategy_brief(
user_id=user_id,
website_analysis=website_analysis if isinstance(website_analysis, dict) else {},
competitors=competitors
)
# Persist to WebsiteAnalysis history
analysis_id = website_analysis.get('id')
if analysis_id:
from models.onboarding import WebsiteAnalysis
from sqlalchemy.orm.attributes import flag_modified
wa = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.id == analysis_id).first()
if wa:
history = wa.strategic_insights_history or []
if not isinstance(history, list):
history = []
history.insert(0, report)
wa.strategic_insights_history = history[:52]
flag_modified(wa, "strategic_insights_history")
db.commit()
else:
report = await self.analysis_service.run(
user_id=user_id,
website_analysis=website_analysis if isinstance(website_analysis, dict) else {},
competitors=competitors,
max_competitors=max_competitors,
crawl_concurrency=crawl_concurrency
)
task.last_executed = datetime.utcnow()
task.last_success = datetime.utcnow()
# If it's a recurring task (strategic_insights), set next execution
if mode == "strategic_insights":
task.status = "active"
task.next_execution = self.calculate_next_execution(task, "weekly", task.last_executed)
else:
task.status = "paused"
task.next_execution = None
task.consecutive_failures = 0
task.failure_pattern = None
task.failure_reason = None
task_log.status = "success"
task_log.result_data = report
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
db.commit()
try:
await self.integration_service.refresh_integrated_data(user_id, db)
except Exception as e:
logger.warning(f"Deep competitor analysis SSOT refresh failed for user {user_id}: {e}")
return TaskExecutionResult(
success=True,
result_data=report,
execution_time_ms=task_log.execution_time_ms,
retryable=False
)
except Exception as e:
db.rollback()
logger.warning(f"Deep competitor analysis task failed for user {user_id}: {e}")
failure_detection = FailureDetectionService(db)
pattern = failure_detection.analyze_task_failures(task.id, "deep_competitor_analysis", user_id)
task.last_executed = datetime.utcnow()
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.consecutive_failures = (task.consecutive_failures or 0) + 1
if pattern and pattern.should_cool_off:
task.status = "needs_intervention"
task.failure_pattern = {
"consecutive_failures": pattern.consecutive_failures,
"recent_failures": pattern.recent_failures,
"failure_reason": pattern.failure_reason.value,
"error_patterns": pattern.error_patterns,
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
}
task.next_execution = None
else:
task.status = "failed"
task.next_execution = datetime.utcnow() + timedelta(minutes=30)
task_log.status = "failed"
task_log.error_message = str(e)
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
db.add(task_log)
db.commit()
return TaskExecutionResult(
success=False,
error_message=str(e),
execution_time_ms=task_log.execution_time_ms,
retryable=(task.status != "needs_intervention"),
retry_delay=1800
)
def calculate_next_execution(self, task: Any, frequency: str, last_execution: datetime = None) -> datetime:
base = last_execution or datetime.utcnow()
if frequency == "weekly":
return base + timedelta(days=7)
return base + timedelta(days=365)

View File

@@ -0,0 +1,179 @@
import time
from datetime import datetime, timedelta
from typing import Any, Dict, Optional
from sqlalchemy.orm import Session
from models.website_analysis_monitoring_models import (
DeepWebsiteCrawlTask,
DeepWebsiteCrawlExecutionLog
)
from services.scheduler.core.executor_interface import TaskExecutor, TaskExecutionResult
from services.scheduler.core.failure_detection_service import FailureDetectionService
from services.research.deep_crawl_service import DeepCrawlService
from utils.logger_utils import get_service_logger
logger = get_service_logger("deep_website_crawl_executor")
class DeepWebsiteCrawlExecutor(TaskExecutor):
def __init__(self):
self.crawl_service = DeepCrawlService()
async def execute_task(self, task: Any, db: Session) -> TaskExecutionResult:
start_time = time.time()
if not isinstance(task, DeepWebsiteCrawlTask):
return TaskExecutionResult(
success=False,
error_message="Invalid task type for deep website crawl",
retryable=False
)
task_log = DeepWebsiteCrawlExecutionLog(
task_id=task.id,
status="running",
execution_date=datetime.utcnow()
)
db.add(task_log)
db.commit()
user_id = str(task.user_id)
website_url = task.website_url
try:
logger.info(f"Executing deep website crawl for user {user_id}, url {website_url}")
result = await self.crawl_service.execute_deep_crawl(
user_id=user_id,
website_url=website_url,
task_id=task.id # Pass task_id so service can update logs/task if needed, but we handle some here too.
# Actually, the service updates logs and task status.
# So we should coordinate.
# In DeepCrawlService I wrote logic to update logs/task if task_id provided.
# But here we also create a log "running".
# The service creates a "success" or "failed" log.
# This might result in duplicate logs or "running" log stuck.
# Let's see DeepCrawlService again.
)
# The service creates a new log entry for success/failure.
# So the "running" log created here will stay as "running" unless updated.
# I should probably update the "running" log instead of letting service create new one.
# OR, I should remove task_id from service call and handle logging here.
# Handling logging here is better for separation of concerns, BUT the service has the detailed stats.
# The service returns the stats.
# I will remove task_id from service call in future refactor, but for now let's just update the local log here too if needed.
# Wait, if service creates a log, I have 2 logs.
# I'll modify this executor to NOT pass task_id to service, but rely on return value.
# But `DeepCrawlService.execute_deep_crawl` takes task_id as Optional.
# If I don't pass it, it returns the result dict.
# I'll do that.
# Re-calling service without task_id
# Wait, `execute_deep_crawl` signature: `async def execute_deep_crawl(self, user_id: str, website_url: str, task_id: Optional[int] = None)`
# If I don't pass task_id, the service won't touch the DB for logs/tasks (except for saving content).
# This is cleaner.
# result = await self.crawl_service.execute_deep_crawl(user_id, website_url)
# But wait, in the service I implemented:
# `if task_id: log = ... db.add(log) ...`
# So if I don't pass task_id, it just returns data. Perfect.
# Correction: I need to update the file `backend/services/research/deep_crawl_service.py` ?
# No, it handles optional task_id.
# So here I call it without task_id.
# However, `DeepCrawlService` updates task status (last_executed, etc) if task_id is present.
# If I don't pass task_id, I must update task status here.
task.last_executed = datetime.utcnow()
task.last_success = datetime.utcnow()
task.status = "active" # Keep active for recurring? Or paused?
# User said "schedule this task". So likely recurring.
# But usually crawl is heavy, maybe weekly.
# Calculate next execution
task.next_execution = self.calculate_next_execution(task, "Weekly", task.last_executed)
task.consecutive_failures = 0
task.failure_pattern = None
task.failure_reason = None
task_log.status = "success"
task_log.result_data = result
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
db.commit()
return TaskExecutionResult(
success=True,
result_data=result,
execution_time_ms=task_log.execution_time_ms,
retryable=False
)
except Exception as e:
db.rollback()
logger.warning(f"Deep website crawl task failed for user {user_id}: {e}")
failure_detection = FailureDetectionService(db)
pattern = failure_detection.analyze_task_failures(task.id, "deep_website_crawl", user_id)
task.last_executed = datetime.utcnow()
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.consecutive_failures = (task.consecutive_failures or 0) + 1
if pattern and pattern.should_cool_off:
task.status = "needs_intervention"
task.failure_pattern = {
"consecutive_failures": pattern.consecutive_failures,
"recent_failures": pattern.recent_failures,
"failure_reason": pattern.failure_reason.value,
"error_patterns": pattern.error_patterns,
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
}
task.next_execution = None
else:
task.status = "failed"
task.next_execution = datetime.utcnow() + timedelta(minutes=60) # Retry in hour
task_log.status = "failed"
task_log.error_message = str(e)
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
db.add(task_log)
db.commit()
return TaskExecutionResult(
success=False,
error_message=str(e),
execution_time_ms=task_log.execution_time_ms,
retryable=(task.status != "needs_intervention"),
retry_delay=3600
)
def calculate_next_execution(
self,
task: Any,
frequency: str,
last_execution: Optional[datetime] = None
) -> datetime:
"""
Calculate next execution time based on frequency.
"""
if not last_execution:
last_execution = datetime.utcnow()
if frequency == 'Daily':
return last_execution + timedelta(days=1)
elif frequency == 'Weekly':
return last_execution + timedelta(weeks=1)
elif frequency == 'Monthly':
return last_execution + timedelta(days=30)
else:
# Default to weekly if unknown
return last_execution + timedelta(weeks=1)

View File

@@ -0,0 +1,232 @@
"""
Market Trends Executor
Runs Google Trends (pytrends) periodically and embeds results into the user SIF index.
"""
import time
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
from sqlalchemy.orm import Session
from models.website_analysis_monitoring_models import MarketTrendsTask, MarketTrendsExecutionLog
from services.scheduler.core.executor_interface import TaskExecutor, TaskExecutionResult
from services.scheduler.core.failure_detection_service import FailureDetectionService
from services.intelligence.sif_integration import SIFIntegrationService
from services.research.trends.google_trends_service import GoogleTrendsService
from utils.logger_utils import get_service_logger
logger = get_service_logger("market_trends_executor")
class MarketTrendsExecutor(TaskExecutor):
def __init__(self):
pass
async def execute_task(self, task: Any, db: Session) -> TaskExecutionResult:
start_time = time.time()
if not isinstance(task, MarketTrendsTask):
return TaskExecutionResult(success=False, error_message="Invalid task type for market trends", retryable=False)
task_log = MarketTrendsExecutionLog(task_id=task.id, status="running", execution_date=datetime.utcnow())
db.add(task_log)
db.commit()
user_id = str(task.user_id)
website_url = task.website_url
payload = task.payload or {}
try:
geo = payload.get("geo") or "US"
timeframe = payload.get("timeframe") or "today 12-m"
sif_service = SIFIntegrationService(user_id)
keywords = await self._select_keywords_for_user(db=db, user_id=user_id, website_url=website_url)
if not keywords:
keywords = payload.get("keywords") or []
keywords = [str(k).strip() for k in (keywords or []) if str(k).strip()]
if len(keywords) > 5:
keywords = keywords[:5]
trends_result: Dict[str, Any]
if keywords:
try:
trends_result = await GoogleTrendsService().analyze_trends(
keywords=keywords, timeframe=timeframe, geo=geo, user_id=user_id
)
except Exception as trends_err:
trends_result = {
"error": str(trends_err),
"keywords": keywords,
"timeframe": timeframe,
"geo": geo,
"timestamp": datetime.utcnow().isoformat(),
"cached": False,
}
else:
trends_result = {
"error": "No keywords available for market trends run",
"keywords": [],
"timeframe": timeframe,
"geo": geo,
"timestamp": datetime.utcnow().isoformat(),
"cached": False,
}
run_id = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
await sif_service.index_market_trends_run(trends_result=trends_result, run_id=run_id)
task.last_executed = datetime.utcnow()
task.last_success = datetime.utcnow()
frequency_hours = task.frequency_hours or 72
task.next_execution = datetime.utcnow() + timedelta(hours=frequency_hours)
task.status = "active"
task.consecutive_failures = 0
task.failure_pattern = None
task.failure_reason = None
task_log.status = "success"
task_log.result_data = {
"run_id": run_id,
"keywords": trends_result.get("keywords", keywords),
"geo": geo,
"timeframe": timeframe,
"cached": trends_result.get("cached", False),
}
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
db.commit()
return TaskExecutionResult(
success=True,
result_data=task_log.result_data,
execution_time_ms=task_log.execution_time_ms,
retryable=False,
)
except Exception as e:
db.rollback()
logger.warning(f"Market trends task failed for user {user_id}: {e}")
failure_detection = FailureDetectionService(db)
pattern = failure_detection.analyze_task_failures(task.id, "market_trends", user_id)
task.last_executed = datetime.utcnow()
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.consecutive_failures = (task.consecutive_failures or 0) + 1
if pattern and pattern.should_cool_off:
task.status = "needs_intervention"
task.failure_pattern = {
"consecutive_failures": pattern.consecutive_failures,
"recent_failures": pattern.recent_failures,
"failure_reason": pattern.failure_reason.value,
"error_patterns": pattern.error_patterns,
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat(),
}
task.next_execution = None
else:
task.status = "active"
task.next_execution = datetime.utcnow() + timedelta(hours=6)
task_log.status = "failed"
task_log.error_message = str(e)
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
db.add(task_log)
db.commit()
return TaskExecutionResult(
success=False,
error_message=str(e),
execution_time_ms=task_log.execution_time_ms,
retryable=(task.status != "needs_intervention"),
retry_delay=21600,
)
async def _select_keywords_for_user(self, db: Session, user_id: str, website_url: str) -> List[str]:
keywords: List[str] = []
try:
from sqlalchemy import select, desc
from models.enhanced_strategy_models import EnhancedContentStrategy
stmt = (
select(EnhancedContentStrategy)
.where(EnhancedContentStrategy.user_id == user_id)
.order_by(desc(EnhancedContentStrategy.updated_at))
)
strategy = db.execute(stmt).scalars().first()
if strategy:
if strategy.emerging_trends:
keywords.extend(self._extract_strings(strategy.emerging_trends))
if strategy.industry_trends:
keywords.extend(self._extract_strings(strategy.industry_trends))
if strategy.market_gaps:
keywords.extend(self._extract_strings(strategy.market_gaps))
if strategy.competitor_content_strategies:
keywords.extend(self._extract_strings(strategy.competitor_content_strategies))
except Exception:
pass
if not keywords:
try:
from sqlalchemy import select, desc
from models.onboarding import WebsiteAnalysis, OnboardingSession
stmt = (
select(WebsiteAnalysis)
.join(OnboardingSession, WebsiteAnalysis.session_id == OnboardingSession.id)
.where(OnboardingSession.user_id == user_id)
.order_by(desc(WebsiteAnalysis.created_at))
)
wa = db.execute(stmt).scalars().first()
if wa and wa.content_strategy_insights:
ai_strategy = wa.content_strategy_insights.get("ai_strategy", {})
topic_clusters = ai_strategy.get("topic_clusters") or []
keywords.extend(self._extract_strings(topic_clusters))
except Exception:
pass
deduped = []
seen = set()
for k in keywords:
kk = str(k).strip()
if not kk:
continue
key = kk.lower()
if key in seen:
continue
seen.add(key)
deduped.append(kk)
return deduped[:5]
def _extract_strings(self, value: Any) -> List[str]:
if value is None:
return []
if isinstance(value, str):
return [value]
if isinstance(value, list):
out: List[str] = []
for item in value:
out.extend(self._extract_strings(item))
return out
if isinstance(value, dict):
out: List[str] = []
for k in ["keyword", "topic", "title", "name", "label"]:
if k in value and value.get(k):
out.append(str(value.get(k)))
return out
return [str(value)]
def calculate_next_execution(self, task: Any, frequency: str, last_execution: datetime = None) -> datetime:
base = last_execution or datetime.utcnow()
hours = getattr(task, "frequency_hours", 72) or 72
return base + timedelta(hours=hours)

View File

@@ -21,6 +21,7 @@ from services.gsc_service import GSCService
from services.integrations.bing_oauth import BingOAuthService
from services.integrations.wordpress_oauth import WordPressOAuthService
from services.wix_service import WixService
from services.database import get_user_db_path
logger = get_service_logger("oauth_token_monitoring_executor")
@@ -289,8 +290,8 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
GSC service auto-refreshes tokens if expired when loading credentials.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
# Use dynamic database path
db_path = get_user_db_path(user_id)
gsc_service = GSCService(db_path=db_path)
credentials = gsc_service.load_user_credentials(user_id)
@@ -341,9 +342,8 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
Checks token expiration and attempts refresh if needed.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
bing_service = BingOAuthService(db_path=db_path)
# Initialize Bing service
bing_service = BingOAuthService()
# Get token status (includes expired tokens)
token_status = bing_service.get_user_token_status(user_id)
@@ -502,8 +502,8 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
and require user re-authorization. We only check if token is valid.
"""
try:
# Use absolute database path for consistency with onboarding
db_path = os.path.abspath("alwrity.db")
# Use dynamic database path
db_path = get_user_db_path(user_id)
wordpress_service = WordPressOAuthService(db_path=db_path)
tokens = wordpress_service.get_user_tokens(user_id)

View File

@@ -0,0 +1,584 @@
import asyncio
import time
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional, Set
from urllib.parse import urljoin, urlparse
import aiohttp
from bs4 import BeautifulSoup
from loguru import logger
from sqlalchemy.orm import Session
from models.onboarding import SEOPageAudit
from models.website_analysis_monitoring_models import (
OnboardingFullWebsiteAnalysisTask,
OnboardingFullWebsiteAnalysisExecutionLog
)
from services.scheduler.core.executor_interface import TaskExecutor, TaskExecutionResult
from services.scheduler.core.failure_detection_service import FailureDetectionService
from services.seo_analyzer.analyzers import (
MetaDataAnalyzer,
TechnicalSEOAnalyzer,
ContentAnalyzer,
URLStructureAnalyzer,
AccessibilityAnalyzer,
UserExperienceAnalyzer
)
class OnboardingFullWebsiteAnalysisExecutor(TaskExecutor):
def __init__(self):
self.logger = logger.bind(component="OnboardingFullWebsiteAnalysisExecutor")
self.max_urls_default = 500
self.http_timeout_seconds = 25
self.http_concurrency = 10
self.healthy_threshold = 80
self.warning_threshold = 60
self.weights = {
'meta': 0.15,
'content': 0.20,
'technical': 0.20,
'performance': 0.20,
'accessibility': 0.10,
'ux': 0.10,
'security': 0.05,
}
async def execute_task(self, task: Any, db: Session) -> TaskExecutionResult:
start_time = time.time()
if not isinstance(task, OnboardingFullWebsiteAnalysisTask):
return TaskExecutionResult(
success=False,
error_message="Invalid task type for onboarding full website analysis",
retryable=False
)
task_log = OnboardingFullWebsiteAnalysisExecutionLog(
task_id=task.id,
status='running',
execution_date=datetime.utcnow()
)
db.add(task_log)
db.commit()
user_id = str(task.user_id)
website_url = task.website_url
payload = task.payload or {}
max_urls = int(payload.get('max_urls') or self.max_urls_default)
try:
urls = await self._discover_urls(website_url, max_urls=max_urls)
if not urls:
raise ValueError("No URLs discovered for full-site analysis")
results = await self._audit_urls(user_id, website_url, urls, db)
task.last_executed = datetime.utcnow()
task.last_success = datetime.utcnow()
task.status = 'paused'
task.next_execution = None
task.consecutive_failures = 0
task.failure_pattern = None
task.failure_reason = None
task_log.status = 'success'
task_log.result_data = results
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
db.commit()
return TaskExecutionResult(
success=True,
result_data=results,
execution_time_ms=task_log.execution_time_ms,
retryable=False
)
except Exception as e:
db.rollback()
self.logger.error(f"Full-site SEO audit task failed: {e}", exc_info=True)
failure_detection = FailureDetectionService(db)
pattern = failure_detection.analyze_task_failures(task.id, 'onboarding_full_website_analysis', user_id)
task.last_executed = datetime.utcnow()
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.consecutive_failures = (task.consecutive_failures or 0) + 1
if pattern and pattern.should_cool_off:
task.status = "needs_intervention"
task.failure_pattern = {
"consecutive_failures": pattern.consecutive_failures,
"recent_failures": pattern.recent_failures,
"failure_reason": pattern.failure_reason.value,
"error_patterns": pattern.error_patterns,
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
}
task.next_execution = None
else:
task.status = "failed"
task.next_execution = datetime.utcnow() + timedelta(minutes=30)
task_log.status = 'failed'
task_log.error_message = str(e)
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
db.add(task_log)
db.commit()
return TaskExecutionResult(
success=False,
error_message=str(e),
execution_time_ms=task_log.execution_time_ms,
retryable=(task.status != "needs_intervention"),
retry_delay=1800
)
def calculate_next_execution(
self,
task: Any,
frequency: str,
last_execution: Optional[datetime] = None
) -> datetime:
base = last_execution or datetime.utcnow()
return base + timedelta(days=365)
async def _discover_urls(self, website_url: str, max_urls: int) -> List[str]:
base = self._normalize_url(website_url)
parsed = urlparse(base)
root = f"{parsed.scheme}://{parsed.netloc}"
sitemap_urls: List[str] = []
robots = await self._fetch_text(urljoin(root, "/robots.txt"))
if robots:
for line in robots.splitlines():
if line.lower().startswith("sitemap:"):
sitemap_urls.append(line.split(":", 1)[1].strip())
if not sitemap_urls:
candidates = [
urljoin(root, "/sitemap.xml"),
urljoin(root, "/sitemap_index.xml"),
urljoin(root, "/wp-sitemap.xml"),
]
sitemap_urls.extend(candidates)
discovered: List[str] = []
seen: Set[str] = set()
for sm in sitemap_urls:
if len(discovered) >= max_urls:
break
urls_from_sm = await self._parse_sitemap(sm, max_urls=max_urls - len(discovered))
for u in urls_from_sm:
n = self._normalize_url(u)
if n not in seen and self._same_site(root, n):
seen.add(n)
discovered.append(n)
if len(discovered) >= max_urls:
break
if not discovered:
discovered.append(base)
return discovered
async def _parse_sitemap(self, sitemap_url: str, max_urls: int) -> List[str]:
xml_text = await self._fetch_text(sitemap_url)
if not xml_text:
return []
try:
import xml.etree.ElementTree as ET
root = ET.fromstring(xml_text)
except Exception:
return []
ns = ""
if root.tag.startswith("{"):
ns = root.tag.split("}", 1)[0] + "}"
urls: List[str] = []
if root.tag.endswith("sitemapindex"):
locs = root.findall(f".//{ns}sitemap/{ns}loc")
for loc in locs:
if len(urls) >= max_urls:
break
child_url = (loc.text or "").strip()
if not child_url:
continue
child_urls = await self._parse_sitemap(child_url, max_urls=max_urls - len(urls))
urls.extend(child_urls)
else:
locs = root.findall(f".//{ns}url/{ns}loc")
for loc in locs:
if len(urls) >= max_urls:
break
u = (loc.text or "").strip()
if u:
urls.append(u)
return urls
async def _fetch_text(self, url: str) -> Optional[str]:
try:
timeout = aiohttp.ClientTimeout(total=self.http_timeout_seconds)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(url, allow_redirects=True, headers={"User-Agent": "ALwrity-SEO-Audit/1.0"}) as resp:
if resp.status >= 400:
return None
return await resp.text(errors="ignore")
except Exception:
return None
async def _audit_urls(self, user_id: str, website_url: str, urls: List[str], db: Session) -> Dict[str, Any]:
timeout = aiohttp.ClientTimeout(total=self.http_timeout_seconds)
connector = aiohttp.TCPConnector(limit=self.http_concurrency)
semaphore = asyncio.Semaphore(self.http_concurrency)
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
async def audit_one(url: str) -> Dict[str, Any]:
async with semaphore:
return await self._audit_single_url(user_id, website_url, url, session, db)
audited = await asyncio.gather(*[audit_one(u) for u in urls], return_exceptions=True)
successes = [r for r in audited if isinstance(r, dict) and r.get('success')]
failures = [r for r in audited if not (isinstance(r, dict) and r.get('success'))]
avg_score = round(sum(r['overall_score'] for r in successes) / len(successes)) if successes else 0
fix_scheduled = len([r for r in successes if r.get('status') == 'fix_scheduled'])
worst_pages = sorted(
[{'page_url': r['page_url'], 'overall_score': r['overall_score'], 'status': r.get('status')} for r in successes],
key=lambda x: x['overall_score']
)[:10]
return {
'website_url': website_url,
'pages_discovered': len(urls),
'pages_audited': len(successes),
'pages_failed': len(failures),
'avg_score': avg_score,
'fix_scheduled_pages': fix_scheduled,
'worst_pages': worst_pages,
}
async def _audit_single_url(
self,
user_id: str,
website_url: str,
page_url: str,
session: aiohttp.ClientSession,
db: Session
) -> Dict[str, Any]:
fetch_start = time.time()
try:
async with session.get(page_url, allow_redirects=True, headers={"User-Agent": "ALwrity-SEO-Audit/1.0"}) as resp:
status = resp.status
content_type = resp.headers.get("Content-Type", "")
text = await resp.text(errors="ignore")
headers = dict(resp.headers)
except Exception as e:
self._upsert_page_audit(
db=db,
user_id=user_id,
website_url=website_url,
page_url=page_url,
overall_score=0,
status='error',
audit_data={'error': str(e)}
)
return {'success': False, 'page_url': page_url, 'error': str(e)}
load_time = time.time() - fetch_start
if status >= 400 or "text/html" not in content_type.lower():
self._upsert_page_audit(
db=db,
user_id=user_id,
website_url=website_url,
page_url=page_url,
overall_score=0,
status='error',
audit_data={'http_status': status, 'content_type': content_type}
)
return {'success': False, 'page_url': page_url, 'error': f'HTTP {status} / {content_type}'}
soup = BeautifulSoup(text, 'html.parser')
meta = MetaDataAnalyzer().analyze(soup)
content = ContentAnalyzer().analyze(soup)
technical = TechnicalSEOAnalyzer().analyze(page_url, soup)
url_structure = URLStructureAnalyzer().analyze(page_url)
accessibility = AccessibilityAnalyzer().analyze(text)
ux = UserExperienceAnalyzer().analyze(text, page_url)
performance = self._performance_from_fetch(load_time, headers)
security = self._security_from_headers(headers)
category_scores = {
'meta': meta.get('score', 0),
'content': content.get('score', 0),
'technical': technical.get('score', 0),
'performance': performance.get('score', 0),
'accessibility': accessibility.get('score', 0),
'ux': ux.get('score', 0),
'security': security.get('score', 0),
'url_structure': url_structure.get('score', 0),
}
overall_score = self._weighted_score(category_scores)
if overall_score >= self.healthy_threshold:
page_status = 'healthy'
elif overall_score >= self.warning_threshold:
page_status = 'needs_review'
else:
page_status = 'fix_scheduled'
audit_data = {
'meta': meta,
'content_health': content,
'technical': technical,
'performance': performance,
'url_structure': url_structure,
'accessibility': accessibility,
'ux': ux,
'security_headers': security,
'overall_score': overall_score,
}
issues = self._collect_findings(audit_data, key='issues')
warnings = self._collect_findings(audit_data, key='warnings')
recommendations = self._collect_findings(audit_data, key='recommendations')
self._upsert_page_audit(
db=db,
user_id=user_id,
website_url=website_url,
page_url=page_url,
overall_score=overall_score,
status=page_status,
category_scores=category_scores,
issues=issues,
warnings=warnings,
recommendations=recommendations,
audit_data=audit_data
)
return {
'success': True,
'page_url': page_url,
'overall_score': overall_score,
'status': page_status
}
def _weighted_score(self, category_scores: Dict[str, int]) -> int:
total = 0.0
for key, weight in self.weights.items():
total += float(category_scores.get(key, 0)) * weight
return int(round(total))
def _collect_findings(self, audit_data: Dict[str, Any], key: str) -> List[Dict[str, Any]]:
findings: List[Dict[str, Any]] = []
for category, data in audit_data.items():
if not isinstance(data, dict):
continue
items = data.get(key)
if not isinstance(items, list):
continue
for item in items:
if isinstance(item, dict):
enriched = dict(item)
enriched.setdefault('category', category)
findings.append(enriched)
return findings
def _performance_from_fetch(self, load_time: float, headers: Dict[str, str]) -> Dict[str, Any]:
issues: List[Dict[str, Any]] = []
warnings: List[Dict[str, Any]] = []
recommendations: List[Dict[str, Any]] = []
if load_time > 3:
issues.append({
'type': 'critical',
'message': f'Page load time too slow ({load_time:.2f}s)',
'location': 'Page performance',
'current_value': f'{load_time:.2f}s',
'fix': 'Optimize page speed (target < 3 seconds)',
'code_example': 'Optimize images, minify CSS/JS, use CDN',
'action': 'optimize_page_speed'
})
elif load_time > 2:
warnings.append({
'type': 'warning',
'message': f'Page load time could be improved ({load_time:.2f}s)',
'location': 'Page performance',
'current_value': f'{load_time:.2f}s',
'fix': 'Optimize for faster loading',
'code_example': 'Compress images, enable caching',
'action': 'improve_page_speed'
})
content_encoding = headers.get('Content-Encoding')
if not content_encoding:
warnings.append({
'type': 'warning',
'message': 'No compression detected',
'location': 'Server configuration',
'fix': 'Enable GZIP/Brotli compression',
'code_example': 'Enable compression in server or CDN',
'action': 'enable_compression'
})
cache_headers = ['Cache-Control', 'Expires', 'ETag']
has_cache = any(headers.get(h) for h in cache_headers)
if not has_cache:
warnings.append({
'type': 'warning',
'message': 'No caching headers found',
'location': 'Server configuration',
'fix': 'Add caching headers',
'code_example': 'Cache-Control: max-age=31536000',
'action': 'add_caching_headers'
})
score = max(0, 100 - len(issues) * 25 - len(warnings) * 10)
return {
'score': score,
'load_time': load_time,
'is_compressed': bool(content_encoding),
'has_cache': has_cache,
'issues': issues,
'warnings': warnings,
'recommendations': recommendations
}
def _security_from_headers(self, headers: Dict[str, str]) -> Dict[str, Any]:
security_headers = {
'X-Frame-Options': headers.get('X-Frame-Options'),
'X-Content-Type-Options': headers.get('X-Content-Type-Options'),
'X-XSS-Protection': headers.get('X-XSS-Protection'),
'Strict-Transport-Security': headers.get('Strict-Transport-Security'),
'Content-Security-Policy': headers.get('Content-Security-Policy'),
'Referrer-Policy': headers.get('Referrer-Policy')
}
issues: List[Dict[str, Any]] = []
warnings: List[Dict[str, Any]] = []
recommendations: List[Dict[str, Any]] = []
present_headers: List[str] = []
missing_headers: List[str] = []
for header_name, header_value in security_headers.items():
if header_value:
present_headers.append(header_name)
continue
missing_headers.append(header_name)
if header_name in ['X-Frame-Options', 'X-Content-Type-Options']:
issues.append({
'type': 'critical',
'message': f'Missing {header_name} header',
'location': 'Server configuration',
'fix': f'Add {header_name} header',
'code_example': f'{header_name}: DENY' if header_name == 'X-Frame-Options' else f'{header_name}: nosniff',
'action': f'add_{header_name.lower().replace("-", "_")}_header'
})
else:
warnings.append({
'type': 'warning',
'message': f'Missing {header_name} header',
'location': 'Server configuration',
'fix': f'Add {header_name} header for better security',
'code_example': f'{header_name}: max-age=31536000',
'action': f'add_{header_name.lower().replace("-", "_")}_header'
})
score = min(100, len(present_headers) * 16)
return {
'score': score,
'present_headers': present_headers,
'missing_headers': missing_headers,
'total_headers': len(present_headers),
'issues': issues,
'warnings': warnings,
'recommendations': recommendations
}
def _upsert_page_audit(
self,
db: Session,
user_id: str,
website_url: str,
page_url: str,
overall_score: int,
status: str,
category_scores: Optional[Dict[str, Any]] = None,
issues: Optional[List[Dict[str, Any]]] = None,
warnings: Optional[List[Dict[str, Any]]] = None,
recommendations: Optional[List[Dict[str, Any]]] = None,
audit_data: Optional[Dict[str, Any]] = None,
) -> None:
existing = db.query(SEOPageAudit).filter(
SEOPageAudit.user_id == user_id,
SEOPageAudit.page_url == page_url
).first()
if existing:
existing.website_url = website_url
existing.overall_score = overall_score
existing.status = status
existing.category_scores = category_scores
existing.issues = issues
existing.warnings = warnings
existing.recommendations = recommendations
existing.audit_data = audit_data
existing.last_analyzed_at = datetime.utcnow()
db.add(existing)
else:
db.add(SEOPageAudit(
user_id=user_id,
website_url=website_url,
page_url=page_url,
overall_score=overall_score,
status=status,
category_scores=category_scores,
issues=issues,
warnings=warnings,
recommendations=recommendations,
audit_data=audit_data,
last_analyzed_at=datetime.utcnow()
))
db.commit()
def _normalize_url(self, url: str) -> str:
u = (url or "").strip()
if not u:
return ""
if not u.startswith("http://") and not u.startswith("https://"):
u = "https://" + u
parsed = urlparse(u)
normalized = parsed._replace(fragment="").geturl()
return normalized.rstrip("/")
def _same_site(self, root: str, url: str) -> bool:
try:
a = urlparse(root)
b = urlparse(url)
return a.netloc == b.netloc
except Exception:
return False

View File

@@ -0,0 +1,153 @@
"""
SIF Indexing Executor
Executes SIF indexing tasks (Step 2 metadata and User Website Content).
"""
import time
from datetime import datetime, timedelta
from typing import Any, Optional
from sqlalchemy.orm import Session
from models.website_analysis_monitoring_models import (
SIFIndexingTask,
SIFIndexingExecutionLog
)
from services.scheduler.core.executor_interface import TaskExecutor, TaskExecutionResult
from services.scheduler.core.failure_detection_service import FailureDetectionService
from services.intelligence.sif_integration import SIFIntegrationService
from utils.logger_utils import get_service_logger
logger = get_service_logger("sif_indexing_executor")
class SIFIndexingExecutor(TaskExecutor):
"""
Executor for SIF indexing tasks.
Handles:
- Indexing Step 2 Website Analysis Data (Metadata)
- Harvesting and Indexing User Website Content (Deep Crawl)
- Scheduling recurring updates (snapshot refresh)
"""
def __init__(self):
pass
async def execute_task(self, task: Any, db: Session) -> TaskExecutionResult:
start_time = time.time()
if not isinstance(task, SIFIndexingTask):
return TaskExecutionResult(
success=False,
error_message="Invalid task type for SIF indexing",
retryable=False
)
task_log = SIFIndexingExecutionLog(
task_id=task.id,
status="running",
execution_date=datetime.utcnow()
)
db.add(task_log)
db.commit()
user_id = str(task.user_id)
website_url = task.website_url
try:
logger.info(f"Executing SIF indexing for user {user_id} ({website_url})")
# Initialize SIF Service
sif_service = SIFIntegrationService(user_id)
# 1. Sync Step 2 Metadata (WebsiteAnalysis, CompetitorAnalysis)
metadata_synced = await sif_service.sync_onboarding_data_to_sif()
# 2. Sync User Website Content (Deep Crawl / Snapshot)
content_synced = await sif_service.sync_user_website_content(website_url)
# Determine overall success
# We consider it a success if at least one operation worked, or if both were attempted without error
# But ideally, content sync is the heavy lifter.
success = metadata_synced or content_synced
if not success:
logger.warning(f"SIF indexing completed but no data was synced/indexed for {user_id}")
task.last_executed = datetime.utcnow()
task.last_success = datetime.utcnow()
# Schedule next execution (Recurring)
frequency_hours = task.frequency_hours or 48
task.next_execution = datetime.utcnow() + timedelta(hours=frequency_hours)
task.status = "active"
task.consecutive_failures = 0
task.failure_pattern = None
task.failure_reason = None
task_log.status = "success"
task_log.result_data = {
"metadata_synced": metadata_synced,
"content_synced": content_synced,
"website_url": website_url
}
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
db.commit()
return TaskExecutionResult(
success=True,
result_data=task_log.result_data,
execution_time_ms=task_log.execution_time_ms,
retryable=False
)
except Exception as e:
db.rollback()
logger.warning(f"SIF indexing task failed for user {user_id}: {e}")
failure_detection = FailureDetectionService(db)
pattern = failure_detection.analyze_task_failures(task.id, "sif_indexing", user_id)
task.last_executed = datetime.utcnow()
task.last_failure = datetime.utcnow()
task.failure_reason = str(e)
task.consecutive_failures = (task.consecutive_failures or 0) + 1
if pattern and pattern.should_cool_off:
task.status = "needs_intervention"
task.failure_pattern = {
"consecutive_failures": pattern.consecutive_failures,
"recent_failures": pattern.recent_failures,
"failure_reason": pattern.failure_reason.value,
"error_patterns": pattern.error_patterns,
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
}
task.next_execution = None
else:
# Retry sooner if it's a transient failure
task.status = "active" # Keep active for retry
task.next_execution = datetime.utcnow() + timedelta(minutes=60)
task_log.status = "failed"
task_log.error_message = str(e)
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
db.add(task_log)
db.commit()
return TaskExecutionResult(
success=False,
error_message=str(e),
execution_time_ms=task_log.execution_time_ms,
retryable=(task.status != "needs_intervention"),
retry_delay=3600
)
def calculate_next_execution(self, task: Any, frequency: str, last_execution: datetime = None) -> datetime:
# Not strictly used here as we handle logic in execute_task, but good for interface compliance
base = last_execution or datetime.utcnow()
hours = getattr(task, 'frequency_hours', 48) or 48
return base + timedelta(hours=hours)

View File

@@ -282,11 +282,18 @@ class WebsiteAnalysisExecutor(TaskExecutor):
None,
partial(self.style_logic.analyze_style_patterns, crawl_result['content'])
)
async def run_seo_audit():
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
None,
partial(self.style_logic.perform_seo_audit, website_url, crawl_result['content'])
)
# Execute style and patterns analysis in parallel
style_analysis, patterns_result = await asyncio.gather(
style_analysis, patterns_result, seo_audit_result = await asyncio.gather(
run_style_analysis(),
run_patterns_analysis(),
run_seo_audit(),
return_exceptions=True
)
@@ -302,6 +309,12 @@ class WebsiteAnalysisExecutor(TaskExecutor):
if isinstance(patterns_result, Exception):
self.logger.warning(f"Patterns analysis exception: {patterns_result}")
patterns_result = None
seo_audit = None
if isinstance(seo_audit_result, Exception):
self.logger.warning(f"SEO audit exception: {seo_audit_result}")
else:
seo_audit = seo_audit_result
# Step 3: Generate style guidelines
style_guidelines = None
@@ -320,6 +333,7 @@ class WebsiteAnalysisExecutor(TaskExecutor):
'style_analysis': style_analysis.get('analysis') if style_analysis and style_analysis.get('success') else None,
'style_patterns': patterns_result if patterns_result and not isinstance(patterns_result, Exception) else None,
'style_guidelines': style_guidelines,
'seo_audit': seo_audit,
}
# Step 4: Store results based on task type
@@ -366,10 +380,12 @@ class WebsiteAnalysisExecutor(TaskExecutor):
):
"""Update existing WebsiteAnalysis record for user's website."""
try:
# Convert Clerk user ID to integer (same as component_logic.py)
# Use the same conversion logic as the website analysis API
import hashlib
user_id_int = int(hashlib.sha256(user_id.encode()).hexdigest()[:15], 16)
session = db.query(OnboardingSession).filter(
OnboardingSession.user_id == user_id
).order_by(OnboardingSession.updated_at.desc()).first()
if not session:
raise ValueError(f"No onboarding session found for user {user_id}")
# Use WebsiteAnalysisService to update
analysis_service = WebsiteAnalysisService(db)
@@ -380,13 +396,15 @@ class WebsiteAnalysisExecutor(TaskExecutor):
'style_analysis': analysis_data.get('style_analysis'),
'style_patterns': analysis_data.get('style_patterns'),
'style_guidelines': analysis_data.get('style_guidelines'),
'seo_audit': analysis_data.get('seo_audit'),
}
# Save/update analysis
analysis_id = analysis_service.save_analysis(
session_id=user_id_int,
session_id=session.id,
website_url=website_url,
analysis_data=response_data
analysis_data=response_data,
preserve_persona=True
)
if analysis_id:
@@ -490,3 +508,82 @@ class WebsiteAnalysisExecutor(TaskExecutor):
)
return last_execution + timedelta(days=task.frequency_days)
async def _perform_full_site_analysis(self, user_id: str, website_url: str, db: Session):
"""
Discover sitemap and perform non-AI SEO audit on all found pages.
"""
try:
self.logger.info(f"Starting full site scan for {website_url}")
sitemap_service = SitemapService()
# 1. Discover Sitemap
sitemap_url = await sitemap_service.discover_sitemap_url(website_url)
if not sitemap_url:
self.logger.warning(f"No sitemap found for {website_url}, skipping full site scan")
return
# 2. Get URLs (Raw mode)
sitemap_data = await sitemap_service.analyze_sitemap(
sitemap_url=sitemap_url,
analyze_content_trends=False,
analyze_publishing_patterns=False,
include_ai_insights=False
)
urls = [u.get('loc') for u in sitemap_data.get('urls', []) if u.get('loc')]
self.logger.info(f"Found {len(urls)} URLs in sitemap for {website_url}")
# 3. Batch Process (Limit to 50 for safety during testing)
urls_to_scan = urls[:50]
for page_url in urls_to_scan:
try:
# Check if exists
existing = db.query(SEOPageAudit).filter(
SEOPageAudit.user_id == user_id,
SEOPageAudit.page_url == page_url
).first()
# Run in executor to avoid blocking
loop = asyncio.get_event_loop()
# Pass empty content dict to trigger internal fetching in perform_seo_audit
audit_result = await loop.run_in_executor(
None,
partial(self.style_logic.perform_seo_audit, page_url, {})
)
if existing:
existing.overall_score = audit_result.get('overall_score')
existing.category_scores = {k: v.get('score') for k, v in audit_result.items() if isinstance(v, dict) and 'score' in v}
existing.issues = audit_result.get('summary', {}).get('critical_issues', [])
existing.warnings = audit_result.get('summary', {}).get('warnings', [])
existing.audit_data = audit_result
existing.last_analyzed_at = datetime.utcnow()
existing.status = 'completed'
else:
new_audit = SEOPageAudit(
user_id=user_id,
website_url=website_url,
page_url=page_url,
overall_score=audit_result.get('overall_score'),
category_scores={k: v.get('score') for k, v in audit_result.items() if isinstance(v, dict) and 'score' in v},
issues=audit_result.get('summary', {}).get('critical_issues', []),
warnings=audit_result.get('summary', {}).get('warnings', []),
audit_data=audit_result,
analysis_source='scheduled_full_site',
status='completed'
)
db.add(new_audit)
db.commit() # Commit each page to show progress
except Exception as e:
self.logger.error(f"Error auditing page {page_url}: {e}")
db.rollback()
self.logger.info(f"Completed full site scan for {website_url}")
except Exception as e:
self.logger.error(f"Error in full site analysis: {e}")

View File

@@ -0,0 +1,32 @@
"""
Advertools Task Loader Utility
Utility functions for loading due Advertools tasks from the database.
"""
from typing import List, Optional
from datetime import datetime
from sqlalchemy.orm import Session
from models.advertools_monitoring_models import AdvertoolsTask
def load_due_advertools_tasks(db: Session, user_id: Optional[str] = None) -> List[AdvertoolsTask]:
"""
Load Advertools tasks that are due for execution.
Args:
db: Database session
user_id: Optional user ID to filter tasks (for multi-tenant support)
Returns:
List of due AdvertoolsTask objects
"""
now = datetime.utcnow()
query = db.query(AdvertoolsTask).filter(
AdvertoolsTask.status == 'active',
AdvertoolsTask.next_execution <= now
)
if user_id:
query = query.filter(AdvertoolsTask.user_id == user_id)
return query.all()

View File

@@ -0,0 +1,30 @@
from datetime import datetime
from typing import List, Optional, Union
from sqlalchemy import and_, or_
from sqlalchemy.orm import Session
from models.website_analysis_monitoring_models import DeepCompetitorAnalysisTask
def load_due_deep_competitor_analysis_tasks(
db: Session,
user_id: Optional[Union[str, int]] = None
) -> List[DeepCompetitorAnalysisTask]:
now = datetime.utcnow()
query = db.query(DeepCompetitorAnalysisTask).filter(
and_(
DeepCompetitorAnalysisTask.status == 'active',
or_(
DeepCompetitorAnalysisTask.next_execution <= now,
DeepCompetitorAnalysisTask.next_execution.is_(None)
)
)
)
if user_id is not None:
query = query.filter(DeepCompetitorAnalysisTask.user_id == str(user_id))
return query.all()

View File

@@ -0,0 +1,33 @@
from typing import List
from datetime import datetime
from sqlalchemy.orm import Session
from sqlalchemy import or_
from models.website_analysis_monitoring_models import DeepWebsiteCrawlTask
def load_due_deep_website_crawl_tasks(db: Session, user_id: str = None) -> List[DeepWebsiteCrawlTask]:
"""
Load due deep website crawl tasks.
Args:
db: Database session
user_id: Optional user_id to filter tasks
Returns:
List of due tasks
"""
query = db.query(DeepWebsiteCrawlTask).filter(
or_(
DeepWebsiteCrawlTask.status == 'active',
DeepWebsiteCrawlTask.status == 'retry'
),
or_(
DeepWebsiteCrawlTask.next_execution <= datetime.utcnow(),
DeepWebsiteCrawlTask.next_execution == None
)
)
if user_id:
query = query.filter(DeepWebsiteCrawlTask.user_id == user_id)
return query.all()

View File

@@ -0,0 +1,37 @@
"""
Market Trends Task Loader
Loads due market trends tasks from the database.
"""
from datetime import datetime
from typing import List, Optional
from sqlalchemy import or_
from sqlalchemy.orm import Session
from models.website_analysis_monitoring_models import MarketTrendsTask
from utils.logger_utils import get_service_logger
logger = get_service_logger("market_trends_task_loader")
def load_due_market_trends_tasks(db: Session, user_id: Optional[str] = None) -> List[MarketTrendsTask]:
try:
now = datetime.utcnow()
query = db.query(MarketTrendsTask).filter(
MarketTrendsTask.status == "active",
or_(MarketTrendsTask.next_execution <= now, MarketTrendsTask.next_execution == None),
)
if user_id:
query = query.filter(MarketTrendsTask.user_id == user_id)
tasks = query.all()
if tasks:
logger.info(f"Loaded {len(tasks)} due market trends tasks")
return tasks
except Exception as e:
logger.error(f"Error loading market trends tasks: {e}")
return []

View File

@@ -0,0 +1,35 @@
"""
Onboarding Full Website Analysis Task Loader
Functions to load due onboarding full-site SEO audit tasks from database.
"""
from datetime import datetime
from typing import List, Optional, Union
from sqlalchemy import and_, or_
from sqlalchemy.orm import Session
from models.website_analysis_monitoring_models import OnboardingFullWebsiteAnalysisTask
def load_due_onboarding_full_website_analysis_tasks(
db: Session,
user_id: Optional[Union[str, int]] = None
) -> List[OnboardingFullWebsiteAnalysisTask]:
now = datetime.utcnow()
query = db.query(OnboardingFullWebsiteAnalysisTask).filter(
and_(
OnboardingFullWebsiteAnalysisTask.status == 'active',
or_(
OnboardingFullWebsiteAnalysisTask.next_execution <= now,
OnboardingFullWebsiteAnalysisTask.next_execution.is_(None)
)
)
)
if user_id is not None:
query = query.filter(OnboardingFullWebsiteAnalysisTask.user_id == str(user_id))
return query.all()

View File

@@ -0,0 +1,45 @@
"""
SIF Indexing Task Loader
Loads due SIF indexing tasks from the database.
"""
from datetime import datetime
from typing import List
from sqlalchemy.orm import Session
from sqlalchemy import or_
from models.website_analysis_monitoring_models import SIFIndexingTask
from utils.logger_utils import get_service_logger
logger = get_service_logger("sif_indexing_task_loader")
def load_due_sif_indexing_tasks(db: Session, user_id: str = None) -> List[SIFIndexingTask]:
"""
Load SIF indexing tasks that are due for execution.
Args:
db: Database session
user_id: Optional user_id to filter by
Returns:
List of SIFIndexingTask objects
"""
try:
query = db.query(SIFIndexingTask).filter(
or_(
SIFIndexingTask.status == "pending",
SIFIndexingTask.status == "failed" # Retry failed tasks
),
SIFIndexingTask.next_run_at <= datetime.utcnow()
)
if user_id:
query = query.filter(SIFIndexingTask.user_id == user_id)
tasks = query.all()
return tasks
except Exception as e:
logger.error(f"Error loading SIF indexing tasks: {str(e)}")
return []

View File

@@ -8,7 +8,7 @@ from urllib.parse import urlparse
from loguru import logger
from sqlalchemy.orm import Session as SQLSession
from services.database import get_db_session
from services.database import get_session_for_user
from models.onboarding import OnboardingSession, WebsiteAnalysis
@@ -79,7 +79,7 @@ def get_user_job_store_name(user_id: str, db: SQLSession = None) -> str:
try:
if not db_session:
db_session = get_db_session()
db_session = get_session_for_user(user_id)
close_db = True
if not db_session: