Recovered state: integrated TrendSurferAgent, restored frontend/backend files, and cleaned up recovery scripts
This commit is contained in:
@@ -14,12 +14,24 @@ from .core.exception_handler import (
|
||||
from .executors.monitoring_task_executor import MonitoringTaskExecutor
|
||||
from .executors.oauth_token_monitoring_executor import OAuthTokenMonitoringExecutor
|
||||
from .executors.website_analysis_executor import WebsiteAnalysisExecutor
|
||||
from .executors.onboarding_full_website_analysis_executor import OnboardingFullWebsiteAnalysisExecutor
|
||||
from .executors.deep_competitor_analysis_executor import DeepCompetitorAnalysisExecutor
|
||||
from .executors.deep_website_crawl_executor import DeepWebsiteCrawlExecutor
|
||||
from .executors.gsc_insights_executor import GSCInsightsExecutor
|
||||
from .executors.bing_insights_executor import BingInsightsExecutor
|
||||
from .executors.advertools_executor import AdvertoolsExecutor
|
||||
from .executors.sif_indexing_executor import SIFIndexingExecutor
|
||||
from .executors.market_trends_executor import MarketTrendsExecutor
|
||||
from .utils.task_loader import load_due_monitoring_tasks
|
||||
from .utils.oauth_token_task_loader import load_due_oauth_token_monitoring_tasks
|
||||
from .utils.website_analysis_task_loader import load_due_website_analysis_tasks
|
||||
from .utils.onboarding_full_website_analysis_task_loader import load_due_onboarding_full_website_analysis_tasks
|
||||
from .utils.deep_competitor_analysis_task_loader import load_due_deep_competitor_analysis_tasks
|
||||
from .utils.deep_website_crawl_task_loader import load_due_deep_website_crawl_tasks
|
||||
from .utils.platform_insights_task_loader import load_due_platform_insights_tasks
|
||||
from .utils.advertools_task_loader import load_due_advertools_tasks
|
||||
from .utils.sif_indexing_task_loader import load_due_sif_indexing_tasks
|
||||
from .utils.market_trends_task_loader import load_due_market_trends_tasks
|
||||
|
||||
# Global scheduler instance (initialized on first access)
|
||||
_scheduler_instance: TaskScheduler = None
|
||||
@@ -62,6 +74,28 @@ def get_scheduler() -> TaskScheduler:
|
||||
website_analysis_executor,
|
||||
load_due_website_analysis_tasks
|
||||
)
|
||||
|
||||
onboarding_full_site_executor = OnboardingFullWebsiteAnalysisExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'onboarding_full_website_analysis',
|
||||
onboarding_full_site_executor,
|
||||
load_due_onboarding_full_website_analysis_tasks
|
||||
)
|
||||
|
||||
deep_competitor_analysis_executor = DeepCompetitorAnalysisExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'deep_competitor_analysis',
|
||||
deep_competitor_analysis_executor,
|
||||
load_due_deep_competitor_analysis_tasks
|
||||
)
|
||||
|
||||
# Register deep website crawl executor
|
||||
deep_website_crawl_executor = DeepWebsiteCrawlExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'deep_website_crawl',
|
||||
deep_website_crawl_executor,
|
||||
load_due_deep_website_crawl_tasks
|
||||
)
|
||||
|
||||
# Register platform insights executors
|
||||
# GSC insights executor
|
||||
@@ -85,6 +119,30 @@ def get_scheduler() -> TaskScheduler:
|
||||
bing_insights_executor,
|
||||
load_due_bing_insights_tasks
|
||||
)
|
||||
|
||||
# Register Advertools executor
|
||||
advertools_executor = AdvertoolsExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'advertools_intelligence',
|
||||
advertools_executor,
|
||||
load_due_advertools_tasks
|
||||
)
|
||||
|
||||
# Register SIF indexing executor
|
||||
sif_indexing_executor = SIFIndexingExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'sif_indexing',
|
||||
sif_indexing_executor,
|
||||
load_due_sif_indexing_tasks
|
||||
)
|
||||
|
||||
# Register market trends executor
|
||||
market_trends_executor = MarketTrendsExecutor()
|
||||
_scheduler_instance.register_executor(
|
||||
'market_trends',
|
||||
market_trends_executor,
|
||||
load_due_market_trends_tasks
|
||||
)
|
||||
|
||||
return _scheduler_instance
|
||||
|
||||
@@ -96,8 +154,11 @@ __all__ = [
|
||||
'MonitoringTaskExecutor',
|
||||
'OAuthTokenMonitoringExecutor',
|
||||
'WebsiteAnalysisExecutor',
|
||||
'OnboardingFullWebsiteAnalysisExecutor',
|
||||
'GSCInsightsExecutor',
|
||||
'BingInsightsExecutor',
|
||||
'SIFIndexingExecutor',
|
||||
'MarketTrendsExecutor',
|
||||
'get_scheduler',
|
||||
# Exception handling
|
||||
'SchedulerExceptionHandler',
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
"""
|
||||
Advertools Task Restoration Utility
|
||||
Handles creation and restoration of Advertools intelligence tasks for users.
|
||||
"""
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any
|
||||
from loguru import logger
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.onboarding import WebsiteAnalysis, OnboardingSession
|
||||
from models.advertools_monitoring_models import AdvertoolsTask
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
|
||||
async def restore_advertools_tasks(scheduler: Any) -> int:
|
||||
"""
|
||||
Restore/create Advertools tasks for all users who have completed Step 2.
|
||||
|
||||
Returns:
|
||||
Number of tasks created/restored
|
||||
"""
|
||||
logger.info("Restoring Advertools intelligence tasks...")
|
||||
total_created = 0
|
||||
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Check if user has completed Step 2 (has WebsiteAnalysis)
|
||||
session = db.query(OnboardingSession).filter(OnboardingSession.user_id == user_id).first()
|
||||
if not session:
|
||||
continue
|
||||
|
||||
analysis = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.session_id == session.id).first()
|
||||
if not analysis or not analysis.website_url:
|
||||
continue
|
||||
|
||||
# Check for existing Advertools tasks
|
||||
existing_audit = db.query(AdvertoolsTask).filter(
|
||||
AdvertoolsTask.user_id == user_id,
|
||||
func.json_extract(AdvertoolsTask.payload, '$.type') == 'content_audit'
|
||||
).first()
|
||||
|
||||
if not existing_audit:
|
||||
# Create weekly content audit task
|
||||
new_audit = AdvertoolsTask(
|
||||
user_id=user_id,
|
||||
website_url=analysis.website_url,
|
||||
status='active',
|
||||
next_execution=datetime.utcnow() + timedelta(days=1), # Start tomorrow
|
||||
frequency_days=7,
|
||||
payload={
|
||||
"type": "content_audit",
|
||||
"website_url": analysis.website_url
|
||||
}
|
||||
)
|
||||
db.add(new_audit)
|
||||
total_created += 1
|
||||
logger.info(f"Created weekly content audit task for user {user_id}")
|
||||
|
||||
existing_health = db.query(AdvertoolsTask).filter(
|
||||
AdvertoolsTask.user_id == user_id,
|
||||
func.json_extract(AdvertoolsTask.payload, '$.type') == 'site_health'
|
||||
).first()
|
||||
|
||||
if not existing_health:
|
||||
# Create weekly site health task
|
||||
new_health = AdvertoolsTask(
|
||||
user_id=user_id,
|
||||
website_url=analysis.website_url,
|
||||
status='active',
|
||||
next_execution=datetime.utcnow() + timedelta(days=2), # Start in 2 days
|
||||
frequency_days=7,
|
||||
payload={
|
||||
"type": "site_health",
|
||||
"website_url": analysis.website_url
|
||||
}
|
||||
)
|
||||
db.add(new_health)
|
||||
total_created += 1
|
||||
logger.info(f"Created weekly site health task for user {user_id}")
|
||||
|
||||
db.commit()
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Error restoring Advertools tasks for user {user_id}: {e}")
|
||||
|
||||
return total_created
|
||||
@@ -7,18 +7,21 @@ from typing import TYPE_CHECKING, Dict, Any
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
from utils.logger_utils import get_service_logger
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
|
||||
from .exception_handler import DatabaseError
|
||||
from .interval_manager import adjust_check_interval_if_needed
|
||||
|
||||
# Import semantic monitoring for Phase 2B integration
|
||||
from services.intelligence.monitoring.semantic_dashboard import RealTimeSemanticMonitor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
|
||||
logger = get_service_logger("check_cycle_handler")
|
||||
|
||||
# Track last semantic check per user to enforce 24-hour interval
|
||||
# In-memory cache is sufficient as it resets on restart (which is fine)
|
||||
LAST_SEMANTIC_CHECKS: Dict[str, datetime] = {}
|
||||
|
||||
async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
"""
|
||||
@@ -42,154 +45,133 @@ async def check_and_execute_due_tasks(scheduler: 'TaskScheduler'):
|
||||
'total_failed': 0
|
||||
}
|
||||
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db is None:
|
||||
logger.error("[Scheduler Check] ❌ Failed to get database session")
|
||||
return
|
||||
|
||||
# Check for active strategies and adjust interval intelligently
|
||||
await adjust_check_interval_if_needed(scheduler, db)
|
||||
|
||||
# Check each registered task type
|
||||
registered_types = scheduler.registry.get_registered_types()
|
||||
for task_type in registered_types:
|
||||
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary)
|
||||
if type_summary:
|
||||
cycle_summary['tasks_found_by_type'][task_type] = type_summary.get('found', 0)
|
||||
cycle_summary['tasks_executed_by_type'][task_type] = type_summary.get('executed', 0)
|
||||
cycle_summary['tasks_failed_by_type'][task_type] = type_summary.get('failed', 0)
|
||||
|
||||
# Calculate totals
|
||||
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
|
||||
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
|
||||
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
|
||||
|
||||
# Log comprehensive check cycle summary
|
||||
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
|
||||
active_strategies = scheduler.stats.get('active_strategies_count', 0)
|
||||
active_executions = len(scheduler.active_executions)
|
||||
|
||||
# Build comprehensive check cycle summary log message
|
||||
check_lines = [
|
||||
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
|
||||
f" ├─ Duration: {check_duration:.2f}s",
|
||||
f" ├─ Active Strategies: {active_strategies}",
|
||||
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
|
||||
f" ├─ User Isolation: Enabled (tasks filtered by user_id)",
|
||||
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
|
||||
]
|
||||
|
||||
if cycle_summary['tasks_found_by_type']:
|
||||
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
|
||||
for idx, (task_type, count) in enumerate(task_types_list):
|
||||
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
|
||||
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
|
||||
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
|
||||
prefix = " └─" if is_last_task_type else " ├─"
|
||||
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
|
||||
|
||||
if cycle_summary['total_found'] > 0:
|
||||
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
|
||||
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
|
||||
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
|
||||
else:
|
||||
check_lines.append(f" └─ No tasks found - scheduler idle")
|
||||
|
||||
# Log comprehensive check cycle summary in single message
|
||||
logger.warning("\n".join(check_lines))
|
||||
|
||||
# Save check cycle event to database for historical tracking
|
||||
event_log_id = None
|
||||
# Iterate through all users (Multi-tenancy support)
|
||||
user_ids = get_all_user_ids()
|
||||
total_active_strategies = 0
|
||||
|
||||
for user_id in user_ids:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.warning(f"[Scheduler Check] Could not get database session for user {user_id}")
|
||||
continue
|
||||
|
||||
try:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='check_cycle',
|
||||
event_date=check_start_time,
|
||||
check_cycle_number=scheduler.stats['total_checks'],
|
||||
check_interval_minutes=scheduler.current_check_interval_minutes,
|
||||
tasks_found=cycle_summary.get('total_found', 0),
|
||||
tasks_executed=cycle_summary.get('total_executed', 0),
|
||||
tasks_failed=cycle_summary.get('total_failed', 0),
|
||||
tasks_by_type=cycle_summary.get('tasks_found_by_type', {}),
|
||||
check_duration_seconds=check_duration,
|
||||
active_strategies_count=active_strategies,
|
||||
active_executions=active_executions,
|
||||
event_data={
|
||||
'executed_by_type': cycle_summary.get('tasks_executed_by_type', {}),
|
||||
'failed_by_type': cycle_summary.get('tasks_failed_by_type', {})
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.flush() # Flush to get the ID without committing
|
||||
event_log_id = event_log.id
|
||||
db.commit()
|
||||
logger.debug(f"[Check Cycle] Saved event log with ID: {event_log_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"[Check Cycle] ❌ Failed to save check cycle event log: {e}", exc_info=True)
|
||||
if db:
|
||||
db.rollback()
|
||||
# Continue execution even if event log save fails
|
||||
|
||||
# Update cumulative stats table (persistent across restarts)
|
||||
try:
|
||||
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
|
||||
|
||||
# Update cumulative metrics by adding this cycle's values
|
||||
# Get current cycle values (incremental, not total)
|
||||
cycle_tasks_found = cycle_summary.get('total_found', 0)
|
||||
cycle_tasks_executed = cycle_summary.get('total_executed', 0)
|
||||
cycle_tasks_failed = cycle_summary.get('total_failed', 0)
|
||||
|
||||
# Update cumulative totals (additive)
|
||||
cumulative_stats.total_check_cycles += 1
|
||||
cumulative_stats.cumulative_tasks_found += cycle_tasks_found
|
||||
cumulative_stats.cumulative_tasks_executed += cycle_tasks_executed
|
||||
cumulative_stats.cumulative_tasks_failed += cycle_tasks_failed
|
||||
# Note: tasks_skipped in scheduler.stats is a running total, not per-cycle
|
||||
# We track it as-is from scheduler.stats (it's already cumulative)
|
||||
# This ensures we don't double-count skipped tasks
|
||||
if cumulative_stats.cumulative_tasks_skipped is None:
|
||||
cumulative_stats.cumulative_tasks_skipped = 0
|
||||
# Update to current total from scheduler (which is already cumulative)
|
||||
current_skipped = scheduler.stats.get('tasks_skipped', 0)
|
||||
if current_skipped > cumulative_stats.cumulative_tasks_skipped:
|
||||
cumulative_stats.cumulative_tasks_skipped = current_skipped
|
||||
cumulative_stats.last_check_cycle_id = event_log_id
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
# Log at DEBUG level to avoid noise during normal operation
|
||||
# This is expected behavior, not a warning
|
||||
logger.debug(
|
||||
f"[Check Cycle] Updated cumulative stats: "
|
||||
f"cycles={cumulative_stats.total_check_cycles}, "
|
||||
f"found={cumulative_stats.cumulative_tasks_found}, "
|
||||
f"executed={cumulative_stats.cumulative_tasks_executed}, "
|
||||
f"failed={cumulative_stats.cumulative_tasks_failed}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Check Cycle] ❌ Failed to update cumulative stats: {e}", exc_info=True)
|
||||
if db:
|
||||
db.rollback()
|
||||
# Log warning but continue - cumulative stats can be rebuilt from event logs
|
||||
logger.warning(
|
||||
"[Check Cycle] ⚠️ Cumulative stats update failed. "
|
||||
"Stats can be rebuilt from event logs on next dashboard load."
|
||||
)
|
||||
|
||||
# Update last_update timestamp for frontend polling
|
||||
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
|
||||
|
||||
except Exception as e:
|
||||
error = DatabaseError(
|
||||
message=f"Error checking for due tasks: {str(e)}",
|
||||
original_error=e
|
||||
)
|
||||
scheduler.exception_handler.handle_exception(error)
|
||||
logger.error(f"[Scheduler Check] ❌ Error in check cycle: {str(e)}")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
# Check active strategies for this user (for interval adjustment)
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
user_active_strategies = active_strategy_service.count_active_strategies_with_tasks()
|
||||
total_active_strategies += user_active_strategies
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
|
||||
|
||||
# Phase 2B: Real-time semantic health monitoring (runs every 24 hours)
|
||||
# Check if 24 hours have passed since last check
|
||||
should_run_semantic = False
|
||||
now = datetime.utcnow()
|
||||
last_check = LAST_SEMANTIC_CHECKS.get(user_id)
|
||||
|
||||
if not last_check or (now - last_check).total_seconds() > 86400: # 24 hours
|
||||
should_run_semantic = True
|
||||
|
||||
if should_run_semantic:
|
||||
try:
|
||||
semantic_monitor = RealTimeSemanticMonitor(user_id)
|
||||
# Use public wrapper method which aggregates metrics
|
||||
# Note: semantic_monitor instantiation loads heavy models, so we limit frequency to 24h
|
||||
semantic_health = await semantic_monitor.check_semantic_health(user_id)
|
||||
logger.info(f"[Semantic Monitor] User {user_id} health check: {semantic_health.status} (score: {semantic_health.value:.2f})")
|
||||
|
||||
# Update timestamp only on success/attempt to prevent spamming retries
|
||||
LAST_SEMANTIC_CHECKS[user_id] = now
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[Semantic Monitor] Error checking semantic health for user {user_id}: {e}")
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
# Check each registered task type for this user
|
||||
registered_types = scheduler.registry.get_registered_types()
|
||||
for task_type in registered_types:
|
||||
# Pass the user-specific session
|
||||
type_summary = await scheduler._process_task_type(task_type, db, cycle_summary, user_id=user_id)
|
||||
if type_summary:
|
||||
cycle_summary['tasks_found_by_type'][task_type] = cycle_summary['tasks_found_by_type'].get(task_type, 0) + type_summary.get('found', 0)
|
||||
cycle_summary['tasks_executed_by_type'][task_type] = cycle_summary['tasks_executed_by_type'].get(task_type, 0) + type_summary.get('executed', 0)
|
||||
cycle_summary['tasks_failed_by_type'][task_type] = cycle_summary['tasks_failed_by_type'].get(task_type, 0) + type_summary.get('failed', 0)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler Check] Error processing user {user_id}: {e}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Adjust interval based on TOTAL active strategies across all users
|
||||
# We manually update the stats and check interval, skipping adjust_check_interval_if_needed
|
||||
# because it's not multi-tenant aware yet.
|
||||
scheduler.stats['active_strategies_count'] = total_active_strategies
|
||||
|
||||
if total_active_strategies > 0:
|
||||
optimal_interval = scheduler.min_check_interval_minutes
|
||||
else:
|
||||
optimal_interval = scheduler.max_check_interval_minutes
|
||||
|
||||
if optimal_interval != scheduler.current_check_interval_minutes:
|
||||
interval_message = (
|
||||
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
|
||||
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
|
||||
f" ├─ Optimal: {optimal_interval}min\n"
|
||||
f" ├─ Active Strategies: {total_active_strategies}\n"
|
||||
f" └─ Reason: {'Active strategies detected' if total_active_strategies > 0 else 'No active strategies'}"
|
||||
)
|
||||
logger.warning(interval_message)
|
||||
|
||||
# Reschedule the job with new interval
|
||||
scheduler.scheduler.modify_job(
|
||||
job_id='check_due_tasks',
|
||||
trigger=scheduler._get_trigger_for_interval(optimal_interval)
|
||||
)
|
||||
scheduler.current_check_interval_minutes = optimal_interval
|
||||
|
||||
# Calculate totals
|
||||
cycle_summary['total_found'] = sum(cycle_summary['tasks_found_by_type'].values())
|
||||
cycle_summary['total_executed'] = sum(cycle_summary['tasks_executed_by_type'].values())
|
||||
cycle_summary['total_failed'] = sum(cycle_summary['tasks_failed_by_type'].values())
|
||||
|
||||
# Log comprehensive check cycle summary
|
||||
check_duration = (datetime.utcnow() - check_start_time).total_seconds()
|
||||
active_executions = len(scheduler.active_executions)
|
||||
|
||||
# Build comprehensive check cycle summary log message
|
||||
check_lines = [
|
||||
f"[Scheduler Check] 🔍 Check Cycle #{scheduler.stats['total_checks']} Completed",
|
||||
f" ├─ Duration: {check_duration:.2f}s",
|
||||
f" ├─ Active Strategies: {total_active_strategies}",
|
||||
f" ├─ Check Interval: {scheduler.current_check_interval_minutes}min",
|
||||
f" ├─ User Isolation: Enabled (Scanned {len(user_ids)} users)",
|
||||
f" ├─ Tasks Found: {cycle_summary['total_found']} total"
|
||||
]
|
||||
|
||||
if cycle_summary['tasks_found_by_type']:
|
||||
task_types_list = list(cycle_summary['tasks_found_by_type'].items())
|
||||
for idx, (task_type, count) in enumerate(task_types_list):
|
||||
executed = cycle_summary['tasks_executed_by_type'].get(task_type, 0)
|
||||
failed = cycle_summary['tasks_failed_by_type'].get(task_type, 0)
|
||||
is_last_task_type = idx == len(task_types_list) - 1 and cycle_summary['total_executed'] == 0 and cycle_summary['total_failed'] == 0
|
||||
prefix = " └─" if is_last_task_type else " ├─"
|
||||
check_lines.append(f"{prefix} {task_type}: {count} found, {executed} executed, {failed} failed")
|
||||
|
||||
if cycle_summary['total_found'] > 0:
|
||||
check_lines.append(f" ├─ Total Executed: {cycle_summary['total_executed']}")
|
||||
check_lines.append(f" ├─ Total Failed: {cycle_summary['total_failed']}")
|
||||
check_lines.append(f" └─ Active Executions: {active_executions}/{scheduler.max_concurrent_executions}")
|
||||
else:
|
||||
check_lines.append(f" └─ No tasks found - scheduler idle")
|
||||
|
||||
# Log comprehensive check cycle summary in single message
|
||||
logger.warning("\n".join(check_lines))
|
||||
|
||||
# Update last_update timestamp for frontend polling
|
||||
scheduler.stats['last_update'] = datetime.utcnow().isoformat()
|
||||
|
||||
|
||||
|
||||
@@ -106,6 +106,7 @@ class DatabaseError(SchedulerException):
|
||||
message: str,
|
||||
user_id: Optional[int] = None,
|
||||
task_id: Optional[int] = None,
|
||||
task_type: Optional[str] = None,
|
||||
context: Dict[str, Any] = None,
|
||||
original_error: Exception = None
|
||||
):
|
||||
@@ -115,6 +116,7 @@ class DatabaseError(SchedulerException):
|
||||
severity=SchedulerErrorSeverity.CRITICAL,
|
||||
user_id=user_id,
|
||||
task_id=task_id,
|
||||
task_type=task_type,
|
||||
context=context or {},
|
||||
original_error=original_error
|
||||
)
|
||||
@@ -180,6 +182,9 @@ class SchedulerConfigError(SchedulerException):
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
user_id: Optional[int] = None,
|
||||
task_id: Optional[int] = None,
|
||||
task_type: Optional[str] = None,
|
||||
context: Dict[str, Any] = None,
|
||||
original_error: Exception = None
|
||||
):
|
||||
@@ -187,6 +192,9 @@ class SchedulerConfigError(SchedulerException):
|
||||
message=message,
|
||||
error_type=SchedulerErrorType.SCHEDULER_CONFIG_ERROR,
|
||||
severity=SchedulerErrorSeverity.CRITICAL,
|
||||
user_id=user_id,
|
||||
task_id=task_id,
|
||||
task_type=task_type,
|
||||
context=context or {},
|
||||
original_error=original_error
|
||||
)
|
||||
|
||||
@@ -7,9 +7,8 @@ from typing import TYPE_CHECKING
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
from utils.logger_utils import get_service_logger
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .scheduler import TaskScheduler
|
||||
@@ -23,7 +22,7 @@ async def determine_optimal_interval(
|
||||
max_interval: int
|
||||
) -> int:
|
||||
"""
|
||||
Determine optimal check interval based on active strategies.
|
||||
Determine optimal check interval based on active strategies across all users.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
@@ -33,107 +32,100 @@ async def determine_optimal_interval(
|
||||
Returns:
|
||||
Optimal check interval in minutes
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
scheduler.stats['active_strategies_count'] = active_count
|
||||
|
||||
if active_count > 0:
|
||||
logger.info(f"Found {active_count} active strategies with tasks - using {min_interval}min interval")
|
||||
return min_interval
|
||||
else:
|
||||
logger.info(f"No active strategies with tasks - using {max_interval}min interval")
|
||||
return max_interval
|
||||
except Exception as e:
|
||||
logger.warning(f"Error determining optimal interval: {e}, using default {min_interval}min")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
total_active_count = 0
|
||||
user_ids = get_all_user_ids()
|
||||
|
||||
# Default to shorter interval on error (safer)
|
||||
return min_interval
|
||||
for user_id in user_ids:
|
||||
db = None
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
total_active_count += user_active_count
|
||||
|
||||
# Optimization: If we found at least one active strategy, we can stop and return min_interval
|
||||
# (unless we want accurate stats)
|
||||
# For stats accuracy, we should continue.
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking user {user_id} for strategies: {e}")
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
|
||||
scheduler.stats['active_strategies_count'] = total_active_count
|
||||
|
||||
if total_active_count > 0:
|
||||
logger.info(f"Found {total_active_count} active strategies across users - using {min_interval}min interval")
|
||||
return min_interval
|
||||
else:
|
||||
logger.info(f"No active strategies found - using {max_interval}min interval")
|
||||
return max_interval
|
||||
|
||||
|
||||
async def adjust_check_interval_if_needed(
|
||||
scheduler: 'TaskScheduler',
|
||||
db: Session
|
||||
db: Session = None # Deprecated parameter, ignored
|
||||
):
|
||||
"""
|
||||
Intelligently adjust check interval based on active strategies.
|
||||
Intelligently adjust check interval based on active strategies across all users.
|
||||
|
||||
If there are active strategies with tasks, check more frequently.
|
||||
If there are no active strategies, check less frequently.
|
||||
|
||||
Args:
|
||||
scheduler: TaskScheduler instance
|
||||
db: Database session
|
||||
db: Deprecated/Ignored
|
||||
"""
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
total_active_count = 0
|
||||
user_ids = get_all_user_ids()
|
||||
|
||||
for user_id in user_ids:
|
||||
user_db = None
|
||||
try:
|
||||
user_db = get_session_for_user(user_id)
|
||||
if user_db:
|
||||
try:
|
||||
from services.active_strategy_service import ActiveStrategyService
|
||||
active_strategy_service = ActiveStrategyService(db_session=user_db)
|
||||
user_active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
total_active_count += user_active_count
|
||||
except Exception as e:
|
||||
logger.warning(f"Error counting active strategies for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking user {user_id} for strategies: {e}")
|
||||
finally:
|
||||
if user_db:
|
||||
user_db.close()
|
||||
|
||||
scheduler.stats['active_strategies_count'] = total_active_count
|
||||
|
||||
# Determine optimal interval
|
||||
if total_active_count > 0:
|
||||
optimal_interval = scheduler.min_check_interval_minutes
|
||||
else:
|
||||
optimal_interval = scheduler.max_check_interval_minutes
|
||||
|
||||
# Only reschedule if interval needs to change
|
||||
if optimal_interval != scheduler.current_check_interval_minutes:
|
||||
interval_message = (
|
||||
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
|
||||
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
|
||||
f" ├─ Optimal: {optimal_interval}min\n"
|
||||
f" ├─ Active Strategies: {total_active_count}\n"
|
||||
f" └─ Reason: {'Active strategies detected' if total_active_count > 0 else 'No active strategies'}"
|
||||
)
|
||||
logger.warning(interval_message)
|
||||
|
||||
active_strategy_service = ActiveStrategyService(db_session=db)
|
||||
active_count = active_strategy_service.count_active_strategies_with_tasks()
|
||||
scheduler.stats['active_strategies_count'] = active_count
|
||||
|
||||
# Determine optimal interval
|
||||
if active_count > 0:
|
||||
optimal_interval = scheduler.min_check_interval_minutes
|
||||
else:
|
||||
optimal_interval = scheduler.max_check_interval_minutes
|
||||
|
||||
# Only reschedule if interval needs to change
|
||||
if optimal_interval != scheduler.current_check_interval_minutes:
|
||||
interval_message = (
|
||||
f"[Scheduler] ⚙️ Adjusting Check Interval\n"
|
||||
f" ├─ Current: {scheduler.current_check_interval_minutes}min\n"
|
||||
f" ├─ Optimal: {optimal_interval}min\n"
|
||||
f" ├─ Active Strategies: {active_count}\n"
|
||||
f" └─ Reason: {'Active strategies detected' if active_count > 0 else 'No active strategies'}"
|
||||
)
|
||||
logger.warning(interval_message)
|
||||
|
||||
# Reschedule the job with new interval
|
||||
scheduler.scheduler.modify_job(
|
||||
'check_due_tasks',
|
||||
trigger=scheduler._get_trigger_for_interval(optimal_interval)
|
||||
)
|
||||
|
||||
# Save previous interval before updating
|
||||
previous_interval = scheduler.current_check_interval_minutes
|
||||
|
||||
# Update current interval
|
||||
scheduler.current_check_interval_minutes = optimal_interval
|
||||
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
|
||||
|
||||
# Save interval adjustment event to database
|
||||
try:
|
||||
event_db = get_db_session()
|
||||
if event_db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='interval_adjustment',
|
||||
event_date=datetime.utcnow(),
|
||||
previous_interval_minutes=previous_interval,
|
||||
new_interval_minutes=optimal_interval,
|
||||
check_interval_minutes=optimal_interval,
|
||||
active_strategies_count=active_count,
|
||||
event_data={
|
||||
'reason': 'intelligent_scheduling',
|
||||
'min_interval': scheduler.min_check_interval_minutes,
|
||||
'max_interval': scheduler.max_check_interval_minutes
|
||||
}
|
||||
)
|
||||
event_db.add(event_log)
|
||||
event_db.commit()
|
||||
event_db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save interval adjustment event log: {e}")
|
||||
|
||||
logger.warning(f"[Scheduler] ✅ Interval adjusted to {optimal_interval}min")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error adjusting check interval: {e}")
|
||||
# Reschedule the job with new interval
|
||||
scheduler.scheduler.modify_job(
|
||||
job_id='check_due_tasks', # Fixed job_id from check_cycle to check_due_tasks to match scheduler.py
|
||||
trigger=scheduler._get_trigger_for_interval(optimal_interval)
|
||||
)
|
||||
scheduler.current_check_interval_minutes = optimal_interval
|
||||
scheduler.stats['last_interval_adjustment'] = datetime.utcnow().isoformat()
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ Preserves original scheduled times from database to avoid rescheduling on server
|
||||
from typing import TYPE_CHECKING
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from utils.logger_utils import get_service_logger
|
||||
from services.database import get_db_session
|
||||
from services.database import get_db_session, get_all_user_ids, get_session_for_user
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -28,35 +28,39 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
|
||||
scheduler: TaskScheduler instance
|
||||
"""
|
||||
try:
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("Could not get database session to restore persona jobs")
|
||||
return
|
||||
user_ids = get_all_user_ids()
|
||||
logger.info(f"[Restoration] Found {len(user_ids)} users to check for persona jobs")
|
||||
|
||||
try:
|
||||
from models.onboarding import OnboardingSession
|
||||
from services.research.research_persona_scheduler import (
|
||||
schedule_research_persona_generation,
|
||||
generate_research_persona_task
|
||||
)
|
||||
from services.persona.facebook.facebook_persona_scheduler import (
|
||||
schedule_facebook_persona_generation,
|
||||
generate_facebook_persona_task
|
||||
)
|
||||
from services.research.research_persona_service import ResearchPersonaService
|
||||
from services.persona_data_service import PersonaDataService
|
||||
for user_id in user_ids:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.warning(f"Could not get database session for user {user_id}")
|
||||
continue
|
||||
|
||||
# Get all users who completed onboarding
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.progress == 100.0
|
||||
).all()
|
||||
|
||||
restored_count = 0
|
||||
skipped_count = 0
|
||||
now = datetime.utcnow().replace(tzinfo=timezone.utc)
|
||||
|
||||
for session in completed_sessions:
|
||||
user_id = session.user_id
|
||||
try:
|
||||
from models.onboarding import OnboardingSession
|
||||
from services.research.research_persona_scheduler import (
|
||||
schedule_research_persona_generation,
|
||||
generate_research_persona_task
|
||||
)
|
||||
from services.persona.facebook.facebook_persona_scheduler import (
|
||||
schedule_facebook_persona_generation,
|
||||
generate_facebook_persona_task
|
||||
)
|
||||
from services.research.research_persona_service import ResearchPersonaService
|
||||
from services.persona_data_service import PersonaDataService
|
||||
|
||||
# Check if user completed onboarding
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session or session.progress < 100.0:
|
||||
continue
|
||||
|
||||
restored_count = 0
|
||||
skipped_count = 0
|
||||
now = datetime.utcnow().replace(tzinfo=timezone.utc)
|
||||
|
||||
# Restore research persona job
|
||||
try:
|
||||
@@ -69,7 +73,7 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
|
||||
research_persona_exists = bool(research_persona_data)
|
||||
|
||||
if not research_persona_exists:
|
||||
# Note: Clerk user_id already includes "user_" prefix
|
||||
# Note: Clerk user_id already includes "user_" prefix if applicable, or we use the string as is
|
||||
job_id = f"research_persona_{user_id}"
|
||||
|
||||
# Check if job already exists in scheduler (just started, so unlikely)
|
||||
@@ -256,13 +260,13 @@ async def restore_persona_jobs(scheduler: 'TaskScheduler'):
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not restore Facebook persona for user {user_id}: {e}")
|
||||
|
||||
if restored_count > 0:
|
||||
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) on startup (preserved original scheduled times)")
|
||||
if skipped_count > 0:
|
||||
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) (already completed/failed or exist)")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
if restored_count > 0:
|
||||
logger.warning(f"[Scheduler] ✅ Restored {restored_count} persona generation job(s) for user {user_id}")
|
||||
if skipped_count > 0:
|
||||
logger.debug(f"[Scheduler] Skipped {skipped_count} persona job(s) for user {user_id}")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error restoring persona jobs: {e}")
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.database import get_session_for_user, get_all_user_ids
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
from services.oauth_token_monitoring_service import get_connected_platforms, create_oauth_monitoring_tasks
|
||||
|
||||
@@ -31,98 +31,41 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
"""
|
||||
try:
|
||||
logger.warning("[OAuth Task Restoration] Starting OAuth monitoring task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[OAuth Task Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Get all existing OAuth tasks to find unique user_ids
|
||||
existing_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Log existing tasks breakdown by platform
|
||||
existing_by_platform = {}
|
||||
for task in existing_tasks:
|
||||
existing_by_platform[task.platform] = existing_by_platform.get(task.platform, 0) + 1
|
||||
|
||||
platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(existing_by_platform.items())])
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Found {len(existing_tasks)} existing OAuth tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Platforms: {platform_summary}"
|
||||
)
|
||||
|
||||
# Check users who already have at least one OAuth task
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
# Also query all users from onboarding who completed step 5 (integrations)
|
||||
# to catch users who connected platforms but tasks weren't created
|
||||
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
|
||||
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
|
||||
# This matches the logic used in home page redirect and persona generation checks
|
||||
user_ids = get_all_user_ids()
|
||||
total_created = 0
|
||||
users_processed = 0
|
||||
total_existing_tasks = 0
|
||||
restoration_summary = []
|
||||
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.debug(f"[OAuth Task Restoration] Could not get database session for user {user_id}")
|
||||
continue
|
||||
|
||||
# Get onboarding progress service (same as used throughout the app)
|
||||
progress_service = get_onboarding_progress_service()
|
||||
|
||||
# Query all sessions and filter using the same completion logic as the service
|
||||
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
|
||||
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
or_(
|
||||
OnboardingSession.current_step >= 6,
|
||||
OnboardingSession.progress >= 100.0
|
||||
)
|
||||
).all()
|
||||
|
||||
# Validate using the service method for consistency
|
||||
onboarding_user_ids = set()
|
||||
for session in completed_sessions:
|
||||
# Use the same service method as the rest of the app
|
||||
status = progress_service.get_onboarding_status(session.user_id)
|
||||
if status.get('is_completed', False):
|
||||
onboarding_user_ids.add(session.user_id)
|
||||
all_user_ids = users_to_check.copy()
|
||||
|
||||
# Add users from onboarding who might not have tasks yet
|
||||
for user_id in onboarding_user_ids:
|
||||
if user_id not in all_user_ids:
|
||||
all_user_ids.append(user_id)
|
||||
|
||||
users_to_check = all_user_ids
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Checking {len(users_to_check)} users "
|
||||
f"({len(user_ids_with_tasks)} with existing tasks, "
|
||||
f"{len(onboarding_user_ids)} from onboarding sessions, "
|
||||
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Task Restoration] Could not query onboarding users: {e}")
|
||||
# Fallback to users with existing tasks only
|
||||
|
||||
total_created = 0
|
||||
restoration_summary = [] # Collect summary for single log
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
users_processed += 1
|
||||
|
||||
# Get existing tasks for this user
|
||||
try:
|
||||
existing_tasks = db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.user_id == user_id
|
||||
).all()
|
||||
total_existing_tasks += len(existing_tasks)
|
||||
except Exception as table_error:
|
||||
# Table might not exist for this user yet
|
||||
continue
|
||||
|
||||
# Get connected platforms for this user (silent - no logging)
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
|
||||
if not connected_platforms:
|
||||
logger.debug(
|
||||
f"[OAuth Task Restoration] No connected platforms for user {user_id[:20]}..., skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
# Check which platforms are missing tasks
|
||||
existing_platforms = {
|
||||
task.platform
|
||||
for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
}
|
||||
existing_platforms = {task.platform for task in existing_tasks}
|
||||
|
||||
missing_platforms = [
|
||||
platform
|
||||
@@ -138,53 +81,44 @@ async def restore_oauth_monitoring_tasks(scheduler):
|
||||
platforms=missing_platforms
|
||||
)
|
||||
|
||||
total_created += len(created)
|
||||
# Collect summary info instead of logging immediately
|
||||
platforms_str = ", ".join([p.upper() for p in missing_platforms])
|
||||
restoration_summary.append(
|
||||
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
|
||||
)
|
||||
if created:
|
||||
total_created += len(created)
|
||||
platforms_str = ", ".join([p.upper() for p in missing_platforms])
|
||||
restoration_summary.append(
|
||||
f" ├─ User {user_id[:20]}...: {len(created)} tasks ({platforms_str})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] Error checking/creating tasks for user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
continue
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[OAuth Task Restoration] Error processing user {user_id}: {e}")
|
||||
continue
|
||||
|
||||
# Log summary
|
||||
if total_created > 0:
|
||||
summary_lines = "\n".join(restoration_summary[:5])
|
||||
if len(restoration_summary) > 5:
|
||||
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
|
||||
|
||||
# Final summary log with platform breakdown
|
||||
final_existing_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
final_by_platform = {}
|
||||
for task in final_existing_tasks:
|
||||
final_by_platform[task.platform] = final_by_platform.get(task.platform, 0) + 1
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
|
||||
f" ├─ Users Processed: {users_processed}\n"
|
||||
f" ├─ Existing Tasks: {total_existing_tasks}\n"
|
||||
f" ├─ New Tasks Created: {total_created}\n"
|
||||
+ summary_lines
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
|
||||
f"Processed {users_processed} users."
|
||||
)
|
||||
|
||||
final_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(final_by_platform.items())])
|
||||
|
||||
# Single formatted summary log (similar to scheduler startup)
|
||||
if total_created > 0:
|
||||
summary_lines = "\n".join(restoration_summary[:5]) # Show first 5 users
|
||||
if len(restoration_summary) > 5:
|
||||
summary_lines += f"\n └─ ... and {len(restoration_summary) - 5} more users"
|
||||
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ OAuth Monitoring Tasks Restored\n"
|
||||
f" ├─ Tasks Created: {total_created}\n"
|
||||
f" ├─ Users Processed: {len(users_to_check)}\n"
|
||||
f" ├─ Platform Breakdown: {final_platform_summary}\n"
|
||||
+ summary_lines
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[OAuth Task Restoration] ✅ All users have required OAuth monitoring tasks. "
|
||||
f"Checked {len(users_to_check)} users. Platform breakdown: {final_platform_summary}"
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
return total_existing_tasks + total_created
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[OAuth Task Restoration] Error restoring OAuth monitoring tasks: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.database import get_session_for_user, get_all_user_ids
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
from services.platform_insights_monitoring_service import create_platform_insights_task
|
||||
from services.oauth_token_monitoring_service import get_connected_platforms
|
||||
@@ -32,44 +32,36 @@ async def restore_platform_insights_tasks(scheduler):
|
||||
"""
|
||||
try:
|
||||
logger.warning("[Platform Insights Restoration] Starting platform insights task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Platform Insights Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Get all existing insights tasks to find unique user_ids
|
||||
existing_tasks = db.query(PlatformInsightsTask).all()
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Get all OAuth tasks to find users with connected platforms
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
user_ids_with_oauth = set(task.user_id for task in oauth_tasks)
|
||||
|
||||
# Platforms that support insights (GSC and Bing only)
|
||||
insights_platforms = ['gsc', 'bing']
|
||||
|
||||
# Get users who have OAuth tasks for GSC or Bing
|
||||
users_to_check = set()
|
||||
for task in oauth_tasks:
|
||||
if task.platform in insights_platforms:
|
||||
users_to_check.add(task.user_id)
|
||||
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] Found {len(existing_tasks)} existing insights tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Checking {len(users_to_check)} users "
|
||||
f"with GSC/Bing OAuth connections."
|
||||
)
|
||||
|
||||
if not users_to_check:
|
||||
logger.warning("[Platform Insights Restoration] No users with GSC/Bing connections found")
|
||||
return
|
||||
|
||||
total_created = 0
|
||||
restoration_summary = []
|
||||
|
||||
for user_id in users_to_check:
|
||||
user_ids = get_all_user_ids()
|
||||
total_created = 0
|
||||
users_processed = 0
|
||||
total_existing_tasks = 0
|
||||
restoration_summary = []
|
||||
|
||||
# Platforms that support insights (GSC and Bing only)
|
||||
insights_platforms = ['gsc', 'bing']
|
||||
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.debug(f"[Platform Insights Restoration] Could not get database session for user {user_id}")
|
||||
continue
|
||||
|
||||
try:
|
||||
users_processed += 1
|
||||
|
||||
# Get existing insights tasks
|
||||
try:
|
||||
existing_tasks = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.user_id == user_id
|
||||
).all()
|
||||
total_existing_tasks += len(existing_tasks)
|
||||
except Exception as table_error:
|
||||
# Table might not exist
|
||||
continue
|
||||
|
||||
# Get connected platforms for this user
|
||||
connected_platforms = get_connected_platforms(user_id)
|
||||
|
||||
@@ -77,17 +69,10 @@ async def restore_platform_insights_tasks(scheduler):
|
||||
insights_connected = [p for p in connected_platforms if p in insights_platforms]
|
||||
|
||||
if not insights_connected:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] No GSC/Bing connections for user {user_id[:20]}..., skipping"
|
||||
)
|
||||
continue
|
||||
|
||||
# Check which platforms are missing insights tasks
|
||||
existing_platforms = {
|
||||
task.platform
|
||||
for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
}
|
||||
existing_platforms = {task.platform for task in existing_tasks}
|
||||
|
||||
missing_platforms = [
|
||||
platform
|
||||
@@ -101,11 +86,10 @@ async def restore_platform_insights_tasks(scheduler):
|
||||
try:
|
||||
# Don't fetch site_url here - it requires API calls
|
||||
# The executor will fetch it when the task runs (weekly)
|
||||
# This avoids API calls during restoration
|
||||
result = create_platform_insights_task(
|
||||
user_id=user_id,
|
||||
platform=platform,
|
||||
site_url=None, # Will be fetched by executor when task runs
|
||||
site_url=None,
|
||||
db=db
|
||||
)
|
||||
|
||||
@@ -125,28 +109,28 @@ async def restore_platform_insights_tasks(scheduler):
|
||||
f"for user {user_id}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"[Platform Insights Restoration] Error processing user {user_id}: {e}"
|
||||
)
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.warning(f"[Platform Insights Restoration] Error processing user {user_id}: {e}")
|
||||
continue
|
||||
|
||||
# Log summary
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
|
||||
"\n".join(restoration_summary)
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
|
||||
f"Processed {users_processed} users."
|
||||
)
|
||||
|
||||
# Log summary
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ Created {total_created} platform insights tasks:\n" +
|
||||
"\n".join(restoration_summary)
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Platform Insights Restoration] ✅ All users have required platform insights tasks. "
|
||||
f"Checked {len(users_to_check)} users, found {len(existing_tasks)} existing tasks."
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
return total_existing_tasks + total_created
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Platform Insights Restoration] Error during restoration: {e}", exc_info=True)
|
||||
|
||||
return 0
|
||||
|
||||
@@ -19,7 +19,7 @@ from .exception_handler import (
|
||||
SchedulerExceptionHandler, SchedulerException, TaskExecutionError, DatabaseError,
|
||||
TaskLoaderError, SchedulerConfigError
|
||||
)
|
||||
from services.database import get_db_session
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
from utils.logger_utils import get_service_logger
|
||||
from ..utils.user_job_store import get_user_job_store_name
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
@@ -28,6 +28,7 @@ from .job_restoration import restore_persona_jobs
|
||||
from .oauth_task_restoration import restore_oauth_monitoring_tasks
|
||||
from .website_analysis_task_restoration import restore_website_analysis_tasks
|
||||
from .platform_insights_task_restoration import restore_platform_insights_tasks
|
||||
from .advertools_task_restoration import restore_advertools_tasks
|
||||
from .check_cycle_handler import check_and_execute_due_tasks
|
||||
from .task_execution_handler import execute_task_async
|
||||
|
||||
@@ -185,13 +186,17 @@ class TaskScheduler:
|
||||
await restore_persona_jobs(self)
|
||||
|
||||
# Restore/create missing OAuth token monitoring tasks for connected platforms
|
||||
await restore_oauth_monitoring_tasks(self)
|
||||
total_oauth_tasks = await restore_oauth_monitoring_tasks(self)
|
||||
oauth_tasks_count = total_oauth_tasks
|
||||
|
||||
# Restore/create missing website analysis tasks for users who completed onboarding
|
||||
await restore_website_analysis_tasks(self)
|
||||
website_analysis_tasks_count = await restore_website_analysis_tasks(self)
|
||||
|
||||
# Restore/create missing platform insights tasks for users with connected GSC/Bing
|
||||
await restore_platform_insights_tasks(self)
|
||||
platform_insights_tasks_count = await restore_platform_insights_tasks(self)
|
||||
|
||||
# Restore/create missing Advertools intelligence tasks
|
||||
advertools_tasks_count = await restore_advertools_tasks(self)
|
||||
|
||||
# Validate and rebuild cumulative stats if needed
|
||||
await self._validate_and_rebuild_cumulative_stats()
|
||||
@@ -203,99 +208,47 @@ class TaskScheduler:
|
||||
|
||||
# Count OAuth token monitoring tasks from database (recurring weekly tasks)
|
||||
oauth_tasks_count = 0
|
||||
oauth_tasks_details = []
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
# Count active tasks
|
||||
oauth_tasks_count = db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.status == 'active'
|
||||
).count()
|
||||
|
||||
# Get all tasks (for detailed logging)
|
||||
all_oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
total_oauth_tasks = len(all_oauth_tasks)
|
||||
|
||||
# Show platform breakdown for ALL tasks (active and inactive)
|
||||
all_platforms = {}
|
||||
active_platforms = {}
|
||||
for task in all_oauth_tasks:
|
||||
all_platforms[task.platform] = all_platforms.get(task.platform, 0) + 1
|
||||
if task.status == 'active':
|
||||
active_platforms[task.platform] = active_platforms.get(task.platform, 0) + 1
|
||||
|
||||
if total_oauth_tasks > 0:
|
||||
# Log details about all tasks (not just active)
|
||||
for task in all_oauth_tasks:
|
||||
oauth_tasks_details.append(
|
||||
f"user={task.user_id}, platform={task.platform}, status={task.status}"
|
||||
)
|
||||
|
||||
if total_oauth_tasks > 0 and oauth_tasks_count == 0:
|
||||
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
|
||||
logger.warning(
|
||||
f"[Scheduler] Found {total_oauth_tasks} OAuth monitoring tasks in database, "
|
||||
f"but {oauth_tasks_count} are active. "
|
||||
f"All platforms: {all_platform_summary}. "
|
||||
f"Task details: {', '.join(oauth_tasks_details[:5])}" # Limit to first 5 for readability
|
||||
)
|
||||
elif oauth_tasks_count > 0:
|
||||
# Show platform breakdown for active tasks
|
||||
active_platform_summary = ", ".join([f"{platform}: {count}" for platform, count in sorted(active_platforms.items())])
|
||||
all_platform_summary = ", ".join([f"{p}: {c}" for p, c in sorted(all_platforms.items())])
|
||||
|
||||
# Check for missing platforms (expected: gsc, bing, wordpress, wix)
|
||||
expected_platforms = ['gsc', 'bing', 'wordpress', 'wix']
|
||||
missing_in_db = [p for p in expected_platforms if p not in all_platforms]
|
||||
|
||||
if missing_in_db:
|
||||
logger.warning(
|
||||
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
|
||||
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
|
||||
f"All platforms: {all_platform_summary}. "
|
||||
f"⚠️ Missing platforms (not connected or no tasks): {', '.join(missing_in_db)}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Scheduler] Found {oauth_tasks_count} active OAuth monitoring tasks "
|
||||
f"(total: {total_oauth_tasks}). Active platforms: {active_platform_summary}. "
|
||||
f"All platforms: {all_platform_summary}"
|
||||
)
|
||||
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Scheduler] Could not get OAuth token monitoring tasks count: {e}. "
|
||||
f"This may indicate the oauth_token_monitoring_tasks table doesn't exist yet or "
|
||||
f"tasks haven't been created. Error type: {type(e).__name__}"
|
||||
)
|
||||
|
||||
# Get website analysis tasks count
|
||||
website_analysis_tasks_count = 0
|
||||
try:
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks_count = db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.status == 'active'
|
||||
).count()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get website analysis tasks count: {e}")
|
||||
|
||||
# Get platform insights tasks count
|
||||
platform_insights_tasks_count = 0
|
||||
try:
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks_count = db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.status == 'active'
|
||||
).count()
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get platform insights tasks count: {e}")
|
||||
advertools_tasks_count = 0
|
||||
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
continue
|
||||
|
||||
try:
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
oauth_tasks_count += db.query(OAuthTokenMonitoringTask).filter(
|
||||
OAuthTokenMonitoringTask.status == 'active'
|
||||
).count()
|
||||
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks_count += db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.status == 'active'
|
||||
).count()
|
||||
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks_count += db.query(PlatformInsightsTask).filter(
|
||||
PlatformInsightsTask.status == 'active'
|
||||
).count()
|
||||
|
||||
from models.advertools_monitoring_models import AdvertoolsTask
|
||||
advertools_tasks_count += db.query(AdvertoolsTask).filter(
|
||||
AdvertoolsTask.status == 'active'
|
||||
).count()
|
||||
finally:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Error counting tasks for user {user_id}: {e}")
|
||||
|
||||
# Calculate job counts
|
||||
apscheduler_recurring = 1 # check_due_tasks
|
||||
apscheduler_one_time = len(all_jobs) - 1
|
||||
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
|
||||
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count
|
||||
total_recurring = apscheduler_recurring + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count + advertools_tasks_count
|
||||
total_jobs = len(all_jobs) + oauth_tasks_count + website_analysis_tasks_count + platform_insights_tasks_count + advertools_tasks_count
|
||||
|
||||
# Build comprehensive startup log message
|
||||
recurring_breakdown = f"check_due_tasks: {apscheduler_recurring}"
|
||||
@@ -305,6 +258,8 @@ class TaskScheduler:
|
||||
recurring_breakdown += f", Website analysis: {website_analysis_tasks_count}"
|
||||
if platform_insights_tasks_count > 0:
|
||||
recurring_breakdown += f", Platform insights: {platform_insights_tasks_count}"
|
||||
if advertools_tasks_count > 0:
|
||||
recurring_breakdown += f", Advertools: {advertools_tasks_count}"
|
||||
|
||||
startup_lines = [
|
||||
f"[Scheduler] ✅ Task Scheduler Started",
|
||||
@@ -347,7 +302,7 @@ class TaskScheduler:
|
||||
|
||||
if user_id_from_job:
|
||||
try:
|
||||
db = get_db_session()
|
||||
db = get_session_for_user(user_id_from_job)
|
||||
if db:
|
||||
user_job_store = get_user_job_store_name(user_id_from_job, db)
|
||||
if user_job_store == 'default':
|
||||
@@ -357,6 +312,8 @@ class TaskScheduler:
|
||||
)
|
||||
user_context = f" | User: {user_id_from_job} | Store: {user_job_store}"
|
||||
db.close()
|
||||
else:
|
||||
user_context = f" | User: {user_id_from_job} | DB: Not Found"
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Scheduler] Could not extract job store name for user {user_id_from_job}: {e}. "
|
||||
@@ -370,134 +327,172 @@ class TaskScheduler:
|
||||
# Show ALL OAuth tasks (active and inactive) for complete visibility
|
||||
if total_oauth_tasks > 0:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
# Get ALL tasks, not just active ones
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
|
||||
for idx, task in enumerate(oauth_tasks):
|
||||
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
if user_job_store == 'default':
|
||||
logger.debug(
|
||||
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
|
||||
f"This may indicate no onboarding data or website URL not found."
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
from models.oauth_token_monitoring_models import OAuthTokenMonitoringTask
|
||||
# Get ALL tasks for this user
|
||||
oauth_tasks = db.query(OAuthTokenMonitoringTask).all()
|
||||
|
||||
for idx, task in enumerate(oauth_tasks):
|
||||
is_last = idx == len(oauth_tasks) - 1 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and user_id == user_ids[-1]
|
||||
prefix = " ├─" # Simplified prefix logic for multi-user list
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
if user_job_store == 'default':
|
||||
logger.debug(
|
||||
f"[Scheduler] Job store extraction returned 'default' for user {task.user_id}. "
|
||||
f"This may indicate no onboarding data or website URL not found."
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
|
||||
f"Using 'default'. Error type: {type(e).__name__}"
|
||||
)
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
# Include status in the log line for visibility
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Scheduler] Could not extract job store name for user {task.user_id}: {e}. "
|
||||
f"Using 'default'. Error type: {type(e).__name__}"
|
||||
)
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
# Include status in the log line for visibility
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: oauth_token_monitoring_{task.platform}_{task.user_id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Platform: {task.platform} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking OAuth tasks for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get OAuth token monitoring task details: {e}")
|
||||
|
||||
# Add website analysis tasks details
|
||||
if website_analysis_tasks_count > 0:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
|
||||
for idx, task in enumerate(website_analysis_tasks):
|
||||
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
frequency = f"Every {task.frequency_days} days"
|
||||
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
|
||||
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
website_analysis_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
|
||||
for idx, task in enumerate(website_analysis_tasks):
|
||||
is_last = idx == len(website_analysis_tasks) - 1 and platform_insights_tasks_count == 0 and len(all_jobs) == 0 and total_oauth_tasks == 0 and user_id == user_ids[-1]
|
||||
prefix = " ├─" # Simplified
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
frequency = f"Every {task.frequency_days} days"
|
||||
task_type_label = "User Website" if task.task_type == 'user_website' else "Competitor"
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
website_display = task.website_url[:50] + "..." if task.website_url and len(task.website_url) > 50 else (task.website_url or 'N/A')
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: website_analysis_{task.task_type}_{task.user_id}_{task.id} | "
|
||||
f"Trigger: CronTrigger ({frequency}) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type_label} | URL: {website_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking website analysis tasks for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get website analysis task details: {e}")
|
||||
|
||||
# Add platform insights tasks details
|
||||
if platform_insights_tasks_count > 0:
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks = db.query(PlatformInsightsTask).all()
|
||||
|
||||
for idx, task in enumerate(platform_insights_tasks):
|
||||
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0
|
||||
prefix = " └─" if is_last else " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
platform_label = task.platform.upper() if task.platform else 'Unknown'
|
||||
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask
|
||||
platform_insights_tasks = db.query(PlatformInsightsTask).all()
|
||||
|
||||
for idx, task in enumerate(platform_insights_tasks):
|
||||
is_last = idx == len(platform_insights_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0 and user_id == user_ids[-1]
|
||||
prefix = " ├─" # Simplified
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_check.isoformat() if task.next_check else 'Not scheduled'
|
||||
platform_label = task.platform.upper() if task.platform else 'Unknown'
|
||||
site_display = task.site_url[:50] + "..." if task.site_url and len(task.site_url) > 50 else (task.site_url or 'N/A')
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: platform_insights_{task.platform}_{task.user_id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Platform: {platform_label} | Site: {site_display} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking platform insights tasks for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get platform insights task details: {e}")
|
||||
|
||||
# Add Advertools tasks details
|
||||
if advertools_tasks_count > 0:
|
||||
try:
|
||||
user_ids = get_all_user_ids()
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
from models.advertools_monitoring_models import AdvertoolsTask
|
||||
advertools_tasks = db.query(AdvertoolsTask).all()
|
||||
|
||||
for idx, task in enumerate(advertools_tasks):
|
||||
is_last = idx == len(advertools_tasks) - 1 and len(all_jobs) == 0 and total_oauth_tasks == 0 and website_analysis_tasks_count == 0 and platform_insights_tasks_count == 0 and user_id == user_ids[-1]
|
||||
prefix = " ├─"
|
||||
|
||||
try:
|
||||
user_job_store = get_user_job_store_name(task.user_id, db)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract job store name for user {task.user_id}: {e}")
|
||||
user_job_store = 'default'
|
||||
|
||||
next_check = task.next_execution.isoformat() if task.next_execution else 'Not scheduled'
|
||||
task_type = task.payload.get('type') if task.payload else 'unknown'
|
||||
status_indicator = "✅" if task.status == 'active' else f"[{task.status}]"
|
||||
|
||||
startup_lines.append(
|
||||
f"{prefix} Job: advertools_{task_type}_{task.user_id}_{task.id} | "
|
||||
f"Trigger: CronTrigger (Weekly) | Next Run: {next_check} | "
|
||||
f"User: {task.user_id} | Store: {user_job_store} | Type: {task_type} {status_indicator}"
|
||||
)
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error checking Advertools tasks for user {user_id}: {e}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get Advertools task details: {e}")
|
||||
|
||||
# Log comprehensive startup information in single message
|
||||
logger.warning("\n".join(startup_lines))
|
||||
|
||||
# Save scheduler start event to database
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='start',
|
||||
event_date=datetime.utcnow(),
|
||||
check_interval_minutes=initial_interval,
|
||||
active_strategies_count=active_strategies,
|
||||
event_data={
|
||||
'registered_types': registered_types,
|
||||
'total_jobs': total_jobs,
|
||||
'recurring_jobs': total_recurring,
|
||||
'one_time_jobs': apscheduler_one_time,
|
||||
'oauth_monitoring_tasks': oauth_tasks_count,
|
||||
'website_analysis_tasks': website_analysis_tasks_count,
|
||||
'platform_insights_tasks': platform_insights_tasks_count
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save scheduler start event log: {e}")
|
||||
# Disabled in multi-tenant mode as there is no global DB
|
||||
# try:
|
||||
# db = get_db_session()
|
||||
# if db:
|
||||
# event_log = SchedulerEventLog(...)
|
||||
# db.add(event_log)
|
||||
# db.commit()
|
||||
# db.close()
|
||||
# except Exception as e:
|
||||
# logger.warning(f"Failed to save scheduler start event log: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to start scheduler: {e}")
|
||||
@@ -544,25 +539,26 @@ class TaskScheduler:
|
||||
logger.warning(shutdown_message)
|
||||
|
||||
# Save scheduler stop event to database
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='stop',
|
||||
event_date=datetime.utcnow(),
|
||||
check_interval_minutes=self.current_check_interval_minutes,
|
||||
event_data={
|
||||
'total_checks': total_checks,
|
||||
'total_executed': total_executed,
|
||||
'total_failed': total_failed,
|
||||
'jobs_cancelled': len(all_jobs_before)
|
||||
}
|
||||
)
|
||||
db.add(event_log)
|
||||
db.commit()
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save scheduler stop event log: {e}")
|
||||
# Disabled in multi-tenant mode as there is no global DB
|
||||
# try:
|
||||
# db = get_db_session()
|
||||
# if db:
|
||||
# event_log = SchedulerEventLog(
|
||||
# event_type='stop',
|
||||
# event_date=datetime.utcnow(),
|
||||
# check_interval_minutes=self.current_check_interval_minutes,
|
||||
# event_data={
|
||||
# 'total_checks': total_checks,
|
||||
# 'total_executed': total_executed,
|
||||
# 'total_failed': total_failed,
|
||||
# 'jobs_cancelled': len(all_jobs_before)
|
||||
# }
|
||||
# )
|
||||
# db.add(event_log)
|
||||
# db.commit()
|
||||
# db.close()
|
||||
# except Exception as e:
|
||||
# logger.warning(f"Failed to save scheduler stop event log: {e}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping scheduler: {e}")
|
||||
@@ -630,12 +626,8 @@ class TaskScheduler:
|
||||
return
|
||||
|
||||
try:
|
||||
db = get_db_session()
|
||||
if db:
|
||||
await adjust_check_interval_if_needed(self, db)
|
||||
db.close()
|
||||
else:
|
||||
logger.warning("Could not get database session for interval adjustment")
|
||||
# Multi-tenant aware adjustment (iterates all users internally)
|
||||
await adjust_check_interval_if_needed(self)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error triggering interval adjustment: {e}")
|
||||
|
||||
@@ -643,125 +635,14 @@ class TaskScheduler:
|
||||
"""
|
||||
Validate cumulative stats on scheduler startup and rebuild if needed.
|
||||
This ensures cumulative stats are accurate after restarts.
|
||||
|
||||
NOTE: Disabled in multi-tenant mode as there is no global database for cumulative stats.
|
||||
TODO: Implement per-user cumulative stats or a global admin database.
|
||||
"""
|
||||
db = None
|
||||
try:
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Scheduler] Could not get database session for cumulative stats validation")
|
||||
return
|
||||
|
||||
try:
|
||||
from models.scheduler_cumulative_stats_model import SchedulerCumulativeStats
|
||||
from models.scheduler_models import SchedulerEventLog
|
||||
from sqlalchemy import func
|
||||
|
||||
# Get cumulative stats from persistent table
|
||||
cumulative_stats = db.query(SchedulerCumulativeStats).filter(
|
||||
SchedulerCumulativeStats.id == 1
|
||||
).first()
|
||||
|
||||
# Count check_cycle events in database
|
||||
check_cycle_count = db.query(func.count(SchedulerEventLog.id)).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).scalar() or 0
|
||||
|
||||
if cumulative_stats:
|
||||
# Validate: cumulative stats should match event log count
|
||||
if cumulative_stats.total_check_cycles != check_cycle_count:
|
||||
logger.warning(
|
||||
f"[Scheduler] ⚠️ Cumulative stats validation failed on startup: "
|
||||
f"cumulative_stats.total_check_cycles={cumulative_stats.total_check_cycles} "
|
||||
f"vs event_logs.count={check_cycle_count}. "
|
||||
f"Rebuilding cumulative stats from event logs..."
|
||||
)
|
||||
|
||||
# Rebuild from event logs
|
||||
result = db.query(
|
||||
func.count(SchedulerEventLog.id),
|
||||
func.sum(SchedulerEventLog.tasks_found),
|
||||
func.sum(SchedulerEventLog.tasks_executed),
|
||||
func.sum(SchedulerEventLog.tasks_failed)
|
||||
).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).first()
|
||||
|
||||
if result:
|
||||
total_cycles = result[0] if result[0] is not None else 0
|
||||
total_found = result[1] if result[1] is not None else 0
|
||||
total_executed = result[2] if result[2] is not None else 0
|
||||
total_failed = result[3] if result[3] is not None else 0
|
||||
|
||||
# Update cumulative stats
|
||||
cumulative_stats.total_check_cycles = int(total_cycles)
|
||||
cumulative_stats.cumulative_tasks_found = int(total_found)
|
||||
cumulative_stats.cumulative_tasks_executed = int(total_executed)
|
||||
cumulative_stats.cumulative_tasks_failed = int(total_failed)
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Rebuilt cumulative stats on startup: "
|
||||
f"cycles={total_cycles}, found={total_found}, "
|
||||
f"executed={total_executed}, failed={total_failed}"
|
||||
)
|
||||
else:
|
||||
logger.warning("[Scheduler] No check_cycle events found to rebuild from")
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Cumulative stats validated: "
|
||||
f"{cumulative_stats.total_check_cycles} check cycles match event logs"
|
||||
)
|
||||
else:
|
||||
# Cumulative stats table doesn't exist, create it from event logs
|
||||
logger.warning(
|
||||
"[Scheduler] Cumulative stats table not found. "
|
||||
"Creating from event logs..."
|
||||
)
|
||||
|
||||
result = db.query(
|
||||
func.count(SchedulerEventLog.id),
|
||||
func.sum(SchedulerEventLog.tasks_found),
|
||||
func.sum(SchedulerEventLog.tasks_executed),
|
||||
func.sum(SchedulerEventLog.tasks_failed)
|
||||
).filter(
|
||||
SchedulerEventLog.event_type == 'check_cycle'
|
||||
).first()
|
||||
|
||||
if result:
|
||||
total_cycles = result[0] if result[0] is not None else 0
|
||||
total_found = result[1] if result[1] is not None else 0
|
||||
total_executed = result[2] if result[2] is not None else 0
|
||||
total_failed = result[3] if result[3] is not None else 0
|
||||
|
||||
cumulative_stats = SchedulerCumulativeStats.get_or_create(db)
|
||||
cumulative_stats.total_check_cycles = int(total_cycles)
|
||||
cumulative_stats.cumulative_tasks_found = int(total_found)
|
||||
cumulative_stats.cumulative_tasks_executed = int(total_executed)
|
||||
cumulative_stats.cumulative_tasks_failed = int(total_failed)
|
||||
cumulative_stats.last_updated = datetime.utcnow()
|
||||
cumulative_stats.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
logger.warning(
|
||||
f"[Scheduler] ✅ Created cumulative stats from event logs: "
|
||||
f"cycles={total_cycles}, found={total_found}, "
|
||||
f"executed={total_executed}, failed={total_failed}"
|
||||
)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"[Scheduler] Cumulative stats model not available. "
|
||||
"Migration may not have been run yet. "
|
||||
"Run: python backend/scripts/run_cumulative_stats_migration.py"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"[Scheduler] Error validating cumulative stats: {e}", exc_info=True)
|
||||
finally:
|
||||
if db:
|
||||
db.close()
|
||||
logger.info("[Scheduler] Cumulative stats validation skipped (multi-tenant mode)")
|
||||
return
|
||||
|
||||
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None) -> Optional[Dict[str, Any]]:
|
||||
async def _process_task_type(self, task_type: str, db: Session, cycle_summary: Dict[str, Any] = None, user_id: str = None) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Process due tasks for a specific task type.
|
||||
|
||||
@@ -816,7 +697,7 @@ class TaskScheduler:
|
||||
# Execute task asynchronously
|
||||
# Note: Each task gets its own database session to prevent concurrent access issues
|
||||
execution_task = asyncio.create_task(
|
||||
execute_task_async(self, task_type, task, summary)
|
||||
execute_task_async(self, task_type, task, summary, user_id=user_id)
|
||||
)
|
||||
|
||||
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
|
||||
@@ -970,7 +851,7 @@ class TaskScheduler:
|
||||
job_store_name = 'default'
|
||||
if user_id:
|
||||
try:
|
||||
db = get_db_session()
|
||||
db = get_session_for_user(user_id)
|
||||
if db:
|
||||
job_store_name = get_user_job_store_name(user_id, db)
|
||||
db.close()
|
||||
@@ -996,27 +877,28 @@ class TaskScheduler:
|
||||
logger.warning(log_message)
|
||||
|
||||
# Log job scheduling to event log for dashboard
|
||||
try:
|
||||
event_db = get_db_session()
|
||||
if event_db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_scheduled',
|
||||
event_date=datetime.utcnow(),
|
||||
job_id=job_id,
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
event_data={
|
||||
'function_name': func_name,
|
||||
'job_store': job_store_name,
|
||||
'scheduled_for': run_date.isoformat(),
|
||||
'replace_existing': replace_existing
|
||||
}
|
||||
)
|
||||
event_db.add(event_log)
|
||||
event_db.commit()
|
||||
event_db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to log job scheduling event: {e}")
|
||||
if user_id:
|
||||
try:
|
||||
event_db = get_session_for_user(user_id)
|
||||
if event_db:
|
||||
event_log = SchedulerEventLog(
|
||||
event_type='job_scheduled',
|
||||
event_date=datetime.utcnow(),
|
||||
job_id=job_id,
|
||||
job_type='one_time',
|
||||
user_id=user_id,
|
||||
event_data={
|
||||
'function_name': func_name,
|
||||
'job_store': job_store_name,
|
||||
'scheduled_for': run_date.isoformat(),
|
||||
'replace_existing': replace_existing
|
||||
}
|
||||
)
|
||||
event_db.add(event_log)
|
||||
event_db.commit()
|
||||
event_db.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to log job scheduling event: {e}")
|
||||
|
||||
return job_id
|
||||
except Exception as e:
|
||||
@@ -1027,3 +909,14 @@ class TaskScheduler:
|
||||
"""Check if scheduler is running."""
|
||||
return self._running
|
||||
|
||||
async def execute_task_by_type(self, task_type: str, user_id: str, payload: Dict[str, Any]):
|
||||
"""
|
||||
Execute a task by type and payload immediately.
|
||||
Used for one-time tasks triggered by system events.
|
||||
"""
|
||||
from collections import namedtuple
|
||||
TaskStub = namedtuple('TaskStub', ['user_id', 'payload', 'id'])
|
||||
task_stub = TaskStub(user_id=user_id, payload=payload, id=f"manual_{datetime.utcnow().timestamp()}")
|
||||
|
||||
await execute_task_async(self, task_type, task_stub, execution_source="manual")
|
||||
|
||||
|
||||
@@ -23,7 +23,8 @@ async def execute_task_async(
|
||||
task_type: str,
|
||||
task: Any,
|
||||
summary: Optional[Dict[str, Any]] = None,
|
||||
execution_source: str = "scheduler" # "scheduler" or "manual"
|
||||
execution_source: str = "scheduler", # "scheduler" or "manual"
|
||||
user_id: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Execute a single task asynchronously with user isolation.
|
||||
@@ -38,21 +39,25 @@ async def execute_task_async(
|
||||
task_type: Type of task
|
||||
task: Task instance from database (detached from original session)
|
||||
summary: Optional summary dict to update with execution results
|
||||
user_id: Optional user ID for user isolation (overrides extraction from task)
|
||||
"""
|
||||
task_id = f"{task_type}_{getattr(task, 'id', id(task))}"
|
||||
db = None
|
||||
user_id = None
|
||||
|
||||
try:
|
||||
# Extract user context if available (for user isolation tracking)
|
||||
try:
|
||||
if hasattr(task, 'strategy') and task.strategy:
|
||||
user_id = getattr(task.strategy, 'user_id', None)
|
||||
elif hasattr(task, 'strategy_id') and task.strategy_id:
|
||||
# Will query user_id after we have db session
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
|
||||
if user_id is None:
|
||||
try:
|
||||
if hasattr(task, 'strategy') and task.strategy:
|
||||
user_id = getattr(task.strategy, 'user_id', None)
|
||||
elif hasattr(task, 'strategy_id') and task.strategy_id:
|
||||
# Will query user_id after we have db session
|
||||
pass
|
||||
elif hasattr(task, 'user_id') and task.user_id:
|
||||
# Direct user_id on task object
|
||||
user_id = task.user_id
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not extract user_id before execution for task {task_id}: {e}")
|
||||
|
||||
# Log task execution start (detailed for important tasks)
|
||||
task_db_id = getattr(task, 'id', None)
|
||||
@@ -61,7 +66,7 @@ async def execute_task_async(
|
||||
|
||||
# Create a new database session for this async task
|
||||
# SQLAlchemy sessions are not async-safe and cannot be shared across concurrent tasks
|
||||
db = get_db_session()
|
||||
db = get_db_session(user_id)
|
||||
if db is None:
|
||||
error = DatabaseError(
|
||||
message=f"Failed to get database session for task {task_id}",
|
||||
@@ -79,7 +84,15 @@ async def execute_task_async(
|
||||
|
||||
# Merge the detached task object into this session
|
||||
# The task object was loaded in a different session and is now detached
|
||||
if object_session(task) is None:
|
||||
from sqlalchemy.inspection import inspect
|
||||
is_model = False
|
||||
try:
|
||||
inspect(task)
|
||||
is_model = True
|
||||
except:
|
||||
pass
|
||||
|
||||
if is_model and object_session(task) is None:
|
||||
# Task is detached, need to merge it into this session
|
||||
task = db.merge(task)
|
||||
|
||||
|
||||
@@ -4,15 +4,13 @@ Automatically creates missing website analysis tasks for users who completed onb
|
||||
but don't have monitoring tasks created yet.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.database import get_all_user_ids, get_session_for_user
|
||||
from models.website_analysis_monitoring_models import WebsiteAnalysisTask
|
||||
from services.website_analysis_monitoring_service import create_website_analysis_tasks
|
||||
from services.website_analysis_monitoring_service import generate_website_analysis_tasks_task
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
|
||||
# Use service logger for consistent logging (WARNING level visible in production)
|
||||
logger = get_service_logger("website_analysis_restoration")
|
||||
@@ -32,162 +30,103 @@ async def restore_website_analysis_tasks(scheduler):
|
||||
"""
|
||||
try:
|
||||
logger.warning("[Website Analysis Restoration] Starting website analysis task restoration...")
|
||||
db = get_db_session()
|
||||
if not db:
|
||||
logger.warning("[Website Analysis Restoration] Could not get database session")
|
||||
return
|
||||
|
||||
try:
|
||||
# Check if table exists (may not exist if migration hasn't run)
|
||||
user_ids = get_all_user_ids()
|
||||
total_created = 0
|
||||
users_processed = 0
|
||||
total_existing_tasks = 0
|
||||
|
||||
for user_id in user_ids:
|
||||
try:
|
||||
existing_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
except Exception as table_error:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist: {table_error}. "
|
||||
f"Please run database migration: create_website_analysis_monitoring_tables.sql"
|
||||
)
|
||||
return
|
||||
|
||||
user_ids_with_tasks = set(task.user_id for task in existing_tasks)
|
||||
|
||||
# Log existing tasks breakdown by type
|
||||
existing_by_type = {}
|
||||
for task in existing_tasks:
|
||||
existing_by_type[task.task_type] = existing_by_type.get(task.task_type, 0) + 1
|
||||
|
||||
type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(existing_by_type.items())])
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Found {len(existing_tasks)} existing website analysis tasks "
|
||||
f"for {len(user_ids_with_tasks)} users. Types: {type_summary}"
|
||||
)
|
||||
|
||||
# Check users who already have at least one website analysis task
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
# Also query all users from onboarding who completed step 2 (website analysis)
|
||||
# to catch users who completed onboarding but tasks weren't created
|
||||
# Use the same pattern as OnboardingProgressService.get_onboarding_status()
|
||||
# Completion is tracked by: current_step >= 6 OR progress >= 100.0
|
||||
# This matches the logic used in home page redirect and persona generation checks
|
||||
try:
|
||||
from services.onboarding.progress_service import get_onboarding_progress_service
|
||||
from models.onboarding import OnboardingSession
|
||||
from sqlalchemy import or_
|
||||
db = get_session_for_user(user_id)
|
||||
if not db:
|
||||
logger.warning(f"[Website Analysis Restoration] Could not get database session for user {user_id}")
|
||||
continue
|
||||
|
||||
# Get onboarding progress service (same as used throughout the app)
|
||||
progress_service = get_onboarding_progress_service()
|
||||
|
||||
# Query all sessions and filter using the same completion logic as the service
|
||||
# This matches the pattern in OnboardingProgressService.get_onboarding_status():
|
||||
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
completed_sessions = db.query(OnboardingSession).filter(
|
||||
or_(
|
||||
OnboardingSession.current_step >= 6,
|
||||
OnboardingSession.progress >= 100.0
|
||||
)
|
||||
).all()
|
||||
|
||||
# Validate using the service method for consistency
|
||||
onboarding_user_ids = set()
|
||||
for session in completed_sessions:
|
||||
# Use the same service method as the rest of the app
|
||||
status = progress_service.get_onboarding_status(session.user_id)
|
||||
if status.get('is_completed', False):
|
||||
onboarding_user_ids.add(session.user_id)
|
||||
|
||||
all_user_ids = users_to_check.copy()
|
||||
|
||||
# Add users from onboarding who might not have tasks yet
|
||||
for user_id in onboarding_user_ids:
|
||||
if user_id not in all_user_ids:
|
||||
all_user_ids.append(user_id)
|
||||
|
||||
users_to_check = all_user_ids
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Checking {len(users_to_check)} users "
|
||||
f"({len(user_ids_with_tasks)} with existing tasks, "
|
||||
f"{len(onboarding_user_ids)} from onboarding sessions, "
|
||||
f"{len(onboarding_user_ids) - len(user_ids_with_tasks)} new users to check)"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"[Website Analysis Restoration] Could not query onboarding users: {e}")
|
||||
# Fallback to users with existing tasks only
|
||||
users_to_check = list(user_ids_with_tasks)
|
||||
|
||||
total_created = 0
|
||||
users_processed = 0
|
||||
|
||||
for user_id in users_to_check:
|
||||
try:
|
||||
users_processed += 1
|
||||
|
||||
# Check if user already has tasks
|
||||
existing_user_tasks = [
|
||||
task for task in existing_tasks
|
||||
if task.user_id == user_id
|
||||
]
|
||||
|
||||
if existing_user_tasks:
|
||||
logger.debug(
|
||||
f"[Website Analysis Restoration] User {user_id} already has "
|
||||
f"{len(existing_user_tasks)} website analysis tasks, skipping"
|
||||
# Check if table exists
|
||||
try:
|
||||
existing_user_tasks = db.query(WebsiteAnalysisTask).filter(
|
||||
WebsiteAnalysisTask.user_id == user_id
|
||||
).all()
|
||||
total_existing_tasks += len(existing_user_tasks)
|
||||
except Exception as table_error:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] ⚠️ WebsiteAnalysisTask table may not exist for user {user_id}: {table_error}"
|
||||
)
|
||||
continue
|
||||
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
|
||||
f"but has no website analysis tasks. Creating tasks..."
|
||||
)
|
||||
|
||||
# Create missing tasks
|
||||
result = create_website_analysis_tasks(user_id=user_id, db=db)
|
||||
|
||||
if result.get('success'):
|
||||
tasks_count = result.get('tasks_created', 0)
|
||||
total_created += tasks_count
|
||||
if existing_user_tasks:
|
||||
# User has tasks, we assume they are fine for now
|
||||
continue
|
||||
|
||||
# Check onboarding status
|
||||
try:
|
||||
from services.onboarding.progress_service import OnboardingProgressService
|
||||
|
||||
# Use a local instance or static logic if service expects global DB (it shouldn't anymore)
|
||||
# We can query OnboardingSession directly
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
continue
|
||||
|
||||
# is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
is_completed = (session.current_step >= 6) or (session.progress >= 100.0)
|
||||
|
||||
if not is_completed:
|
||||
continue
|
||||
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Created {tasks_count} website analysis tasks "
|
||||
f"for user {user_id}"
|
||||
)
|
||||
else:
|
||||
error = result.get('error', 'Unknown error')
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ⚠️ Could not create tasks for user {user_id}: {error}"
|
||||
f"[Website Analysis Restoration] ⚠️ User {user_id} completed onboarding "
|
||||
f"but has no website analysis tasks. Creating tasks..."
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] Error checking/creating tasks for user {user_id}: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
continue
|
||||
|
||||
# Final summary log
|
||||
final_existing_tasks = db.query(WebsiteAnalysisTask).all()
|
||||
final_by_type = {}
|
||||
for task in final_existing_tasks:
|
||||
final_by_type[task.task_type] = final_by_type.get(task.task_type, 0) + 1
|
||||
|
||||
final_type_summary = ", ".join([f"{t}: {c}" for t, c in sorted(final_by_type.items())])
|
||||
|
||||
if total_created > 0:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Created {total_created} missing website analysis tasks. "
|
||||
f"Processed {users_processed} users. Final type breakdown: {final_type_summary}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ All users have required website analysis tasks. "
|
||||
f"Checked {users_processed} users, found {len(existing_tasks)} existing tasks. "
|
||||
f"Type breakdown: {final_type_summary}"
|
||||
)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
job_id = f"website_analysis_tasks_{user_id}"
|
||||
existing_jobs = [j for j in scheduler.scheduler.get_jobs() if j.id == job_id]
|
||||
if existing_jobs:
|
||||
continue
|
||||
|
||||
run_date = datetime.now(timezone.utc) + timedelta(minutes=5)
|
||||
scheduler.schedule_one_time_task(
|
||||
func=generate_website_analysis_tasks_task,
|
||||
run_date=run_date,
|
||||
job_id=job_id,
|
||||
kwargs={"user_id": user_id},
|
||||
replace_existing=True,
|
||||
)
|
||||
total_created += 1
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Scheduled website analysis task creation "
|
||||
f"for user {user_id} at {run_date.isoformat()}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[Website Analysis Restoration] Could not check onboarding for user {user_id}: {e}")
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[Website Analysis Restoration] Error processing user {user_id}: {e}")
|
||||
|
||||
logger.warning(
|
||||
f"[Website Analysis Restoration] ✅ Completed. "
|
||||
f"Processed {users_processed} users. "
|
||||
f"Found {total_existing_tasks} existing tasks. "
|
||||
f"Created {total_created} new tasks."
|
||||
)
|
||||
|
||||
return total_existing_tasks + total_created
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"[Website Analysis Restoration] Error restoring website analysis tasks: {e}",
|
||||
exc_info=True
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
230
backend/services/scheduler/executors/advertools_executor.py
Normal file
230
backend/services/scheduler/executors/advertools_executor.py
Normal file
@@ -0,0 +1,230 @@
|
||||
import asyncio
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
from services.seo.advertools_service import AdvertoolsService
|
||||
from services.seo_tools.sitemap_service import SitemapService
|
||||
from models.advertools_monitoring_models import AdvertoolsTask, AdvertoolsExecutionLog
|
||||
from models.onboarding import WebsiteAnalysis, OnboardingSession
|
||||
|
||||
class AdvertoolsExecutor:
|
||||
"""
|
||||
Executor for Advertools-based SEO intelligence tasks.
|
||||
Handles 'content_audit' and 'site_health' task types.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.advertools_service = AdvertoolsService()
|
||||
self.sitemap_service = SitemapService()
|
||||
self.logger = logger.bind(service="AdvertoolsExecutor")
|
||||
|
||||
async def execute_task(self, task_stub: Any, db: Session, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute an Advertools intelligence task.
|
||||
|
||||
Args:
|
||||
task_stub: Tuple or object containing (id, user_id, payload)
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Execution result dictionary
|
||||
"""
|
||||
start_time = datetime.utcnow()
|
||||
task_id = getattr(task_stub, 'id', None)
|
||||
user_id = getattr(task_stub, 'user_id', None)
|
||||
payload = getattr(task_stub, 'payload', {}) or {}
|
||||
|
||||
task_type = payload.get('type')
|
||||
website_url = payload.get('website_url')
|
||||
|
||||
self.logger.info(f"🚀 Starting Advertools task {task_id} ({task_type}) for {website_url}")
|
||||
|
||||
# Find the actual task record to update state
|
||||
task_record = None
|
||||
if isinstance(task_id, int):
|
||||
task_record = db.query(AdvertoolsTask).filter(AdvertoolsTask.id == task_id).first()
|
||||
|
||||
try:
|
||||
if not website_url:
|
||||
raise ValueError("Missing website_url in payload")
|
||||
|
||||
# 1. Discover exact sitemap URL first (essential for Advertools)
|
||||
discovered_sitemap = await self.sitemap_service.discover_sitemap_url(website_url)
|
||||
effective_url = discovered_sitemap if discovered_sitemap else website_url
|
||||
|
||||
# Set status to running for UI feedback
|
||||
if task_record:
|
||||
task_record.status = 'running'
|
||||
db.commit()
|
||||
|
||||
result = {}
|
||||
if task_type == 'content_audit':
|
||||
# Phase 1: Audit content themes using sample URLs from sitemap
|
||||
# First, get the sitemap to find recent URLs
|
||||
sitemap_result = await self.advertools_service.analyze_sitemap(effective_url)
|
||||
|
||||
audit_urls = []
|
||||
if sitemap_result.get('success'):
|
||||
# Use the sample URLs returned by the service
|
||||
audit_urls = sitemap_result.get('metrics', {}).get('audit_sample_urls', [])
|
||||
|
||||
if not audit_urls:
|
||||
# Fallback to homepage if sitemap fails or empty
|
||||
audit_urls = [website_url]
|
||||
|
||||
# Run the audit on the sample
|
||||
result = await self.advertools_service.audit_content(audit_urls)
|
||||
|
||||
if result.get('success'):
|
||||
await self._update_persona_augmentation(user_id, website_url, result, db)
|
||||
|
||||
elif task_type == 'site_health':
|
||||
# Phase 1: Check site health (freshness, velocity)
|
||||
result = await self.advertools_service.analyze_sitemap(effective_url)
|
||||
|
||||
if result.get('success'):
|
||||
await self._update_site_health_metrics(user_id, website_url, result, db)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown task type: {task_type}")
|
||||
|
||||
success = result.get('success', False)
|
||||
execution_time_ms = int((datetime.utcnow() - start_time).total_seconds() * 1000)
|
||||
|
||||
# Update task state
|
||||
if task_record:
|
||||
task_record.last_executed = datetime.utcnow()
|
||||
if success:
|
||||
task_record.last_success = datetime.utcnow()
|
||||
task_record.consecutive_failures = 0
|
||||
task_record.status = 'active'
|
||||
|
||||
# Smart Scheduling with Backoff reset
|
||||
freq_days = task_record.frequency_days or 7
|
||||
task_record.next_execution = datetime.utcnow() + timedelta(days=freq_days)
|
||||
else:
|
||||
task_record.last_failure = datetime.utcnow()
|
||||
task_record.failure_reason = result.get('error', 'Unknown error')
|
||||
task_record.consecutive_failures = (task_record.consecutive_failures or 0) + 1
|
||||
|
||||
# Exponential Backoff for repeated failures (up to 30 days)
|
||||
backoff_days = min(30, (task_record.frequency_days or 7) * (2 ** (task_record.consecutive_failures - 1)))
|
||||
task_record.next_execution = datetime.utcnow() + timedelta(days=backoff_days)
|
||||
|
||||
if task_record.consecutive_failures >= 5:
|
||||
task_record.status = 'failed' # Mark as failed after 5 attempts
|
||||
|
||||
# Create execution log
|
||||
if isinstance(task_id, int):
|
||||
log_entry = AdvertoolsExecutionLog(
|
||||
task_id=task_id,
|
||||
status='success' if success else 'failed',
|
||||
result_data=result,
|
||||
error_message=result.get('error'),
|
||||
execution_time_ms=execution_time_ms
|
||||
)
|
||||
db.add(log_entry)
|
||||
|
||||
db.commit()
|
||||
|
||||
if success:
|
||||
self.logger.info(f"✅ Advertools task {task_id} completed successfully")
|
||||
else:
|
||||
self.logger.warning(f"⚠️ Advertools task {task_id} failed: {result.get('error')}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
self.logger.error(f"❌ Advertools task execution failed: {e}")
|
||||
|
||||
# Try to update task record with failure even if main logic failed
|
||||
if task_record:
|
||||
try:
|
||||
task_record.last_executed = datetime.utcnow()
|
||||
task_record.last_failure = datetime.utcnow()
|
||||
task_record.failure_reason = str(e)
|
||||
task_record.consecutive_failures = (task_record.consecutive_failures or 0) + 1
|
||||
db.commit()
|
||||
except:
|
||||
db.rollback()
|
||||
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def _update_persona_augmentation(self, user_id: str, website_url: str, audit_result: Dict[str, Any], db: Session):
|
||||
"""
|
||||
Updates the user's Brand Persona with discovered themes from the content audit.
|
||||
"""
|
||||
try:
|
||||
session = db.query(OnboardingSession).filter(OnboardingSession.user_id == user_id).first()
|
||||
if not session:
|
||||
self.logger.warning(f"No onboarding session found for user {user_id}")
|
||||
return
|
||||
|
||||
analysis = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.session_id == session.id).first()
|
||||
if not analysis:
|
||||
self.logger.warning(f"No website analysis found for user {user_id}")
|
||||
return
|
||||
|
||||
# Update brand_analysis with augmented themes
|
||||
current_brand = analysis.brand_analysis or {}
|
||||
|
||||
# Add or update the 'augmented_themes' field
|
||||
current_brand['augmented_themes'] = audit_result.get('themes', [])
|
||||
current_brand['last_advertools_audit'] = datetime.utcnow().isoformat()
|
||||
|
||||
# Force SQLAlchemy to detect change in JSON field
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
flag_modified(analysis, "brand_analysis")
|
||||
|
||||
# Also update content_strategy_insights if relevant
|
||||
if 'avg_word_count' in audit_result:
|
||||
current_strategy = analysis.content_strategy_insights or {}
|
||||
current_strategy['avg_content_length'] = audit_result['avg_word_count']
|
||||
analysis.content_strategy_insights = current_strategy
|
||||
flag_modified(analysis, "content_strategy_insights")
|
||||
|
||||
self.logger.info(f"Updated persona augmentation for {user_id}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to update persona augmentation: {e}")
|
||||
raise e
|
||||
|
||||
async def _update_site_health_metrics(self, user_id: str, website_url: str, health_result: Dict[str, Any], db: Session):
|
||||
"""
|
||||
Updates the WebsiteAnalysis with site health metrics (velocity, freshness).
|
||||
"""
|
||||
try:
|
||||
session = db.query(OnboardingSession).filter(OnboardingSession.user_id == user_id).first()
|
||||
if not session:
|
||||
return
|
||||
|
||||
analysis = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.session_id == session.id).first()
|
||||
if not analysis:
|
||||
return
|
||||
|
||||
# Update seo_audit with health metrics
|
||||
current_seo = analysis.seo_audit or {}
|
||||
metrics = health_result.get('metrics', {})
|
||||
|
||||
current_seo['site_health'] = {
|
||||
"total_urls": metrics.get('total_urls'),
|
||||
"publishing_velocity": metrics.get('publishing_velocity'),
|
||||
"stale_content_count": metrics.get('stale_content_count'),
|
||||
"stale_content_percentage": metrics.get('stale_content_percentage'),
|
||||
"top_pillars": metrics.get('top_pillars')
|
||||
}
|
||||
current_seo['last_advertools_health_check'] = datetime.utcnow().isoformat()
|
||||
|
||||
analysis.seo_audit = current_seo
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
flag_modified(analysis, "seo_audit")
|
||||
self.logger.info(f"Updated site health metrics for {user_id}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to update site health metrics: {e}")
|
||||
raise e
|
||||
@@ -15,6 +15,7 @@ from ..core.exception_handler import TaskExecutionError, DatabaseError, Schedule
|
||||
from models.platform_insights_monitoring_models import PlatformInsightsTask, PlatformInsightsExecutionLog
|
||||
from services.bing_analytics_storage_service import BingAnalyticsStorageService
|
||||
from services.integrations.bing_oauth import BingOAuthService
|
||||
from services.database import get_user_db_path
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("bing_insights_executor")
|
||||
@@ -34,8 +35,6 @@ class BingInsightsExecutor(TaskExecutor):
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
self.exception_handler = SchedulerExceptionHandler()
|
||||
database_url = os.getenv('DATABASE_URL', 'sqlite:///alwrity.db')
|
||||
self.storage_service = BingAnalyticsStorageService(database_url)
|
||||
self.bing_oauth = BingOAuthService()
|
||||
|
||||
async def execute_task(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
@@ -53,6 +52,11 @@ class BingInsightsExecutor(TaskExecutor):
|
||||
user_id = task.user_id
|
||||
site_url = task.site_url
|
||||
|
||||
# Initialize storage service for this user
|
||||
db_path = get_user_db_path(user_id)
|
||||
database_url = f'sqlite:///{db_path}'
|
||||
storage_service = BingAnalyticsStorageService(database_url)
|
||||
|
||||
try:
|
||||
self.logger.info(
|
||||
f"Executing Bing insights fetch: task_id={task.id} | "
|
||||
@@ -69,7 +73,7 @@ class BingInsightsExecutor(TaskExecutor):
|
||||
db.flush()
|
||||
|
||||
# Fetch insights
|
||||
result = await self._fetch_insights(task, db)
|
||||
result = await self._fetch_insights(task, db, storage_service)
|
||||
|
||||
# Update execution log
|
||||
execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
@@ -184,7 +188,7 @@ class BingInsightsExecutor(TaskExecutor):
|
||||
|
||||
return error_result
|
||||
|
||||
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session) -> TaskExecutionResult:
|
||||
async def _fetch_insights(self, task: PlatformInsightsTask, db: Session, storage_service: BingAnalyticsStorageService) -> TaskExecutionResult:
|
||||
"""
|
||||
Fetch Bing insights data.
|
||||
|
||||
@@ -201,7 +205,7 @@ class BingInsightsExecutor(TaskExecutor):
|
||||
if is_first_run:
|
||||
# First run: Try to load from cache
|
||||
self.logger.info(f"First run for Bing insights task {task.id} - loading cached data")
|
||||
cached_data = self._load_cached_data(user_id, site_url)
|
||||
cached_data = self._load_cached_data(user_id, site_url, storage_service)
|
||||
|
||||
if cached_data:
|
||||
self.logger.info(f"Loaded cached Bing data for user {user_id}")
|
||||
@@ -216,11 +220,11 @@ class BingInsightsExecutor(TaskExecutor):
|
||||
else:
|
||||
# No cached data - try to fetch from API
|
||||
self.logger.info(f"No cached data found, fetching from Bing API")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
return await self._fetch_fresh_data(user_id, site_url, storage_service)
|
||||
else:
|
||||
# Subsequent run: Always fetch fresh data
|
||||
self.logger.info(f"Subsequent run for Bing insights task {task.id} - fetching fresh data")
|
||||
return await self._fetch_fresh_data(user_id, site_url)
|
||||
return await self._fetch_fresh_data(user_id, site_url, storage_service)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error fetching Bing insights for user {user_id}: {e}", exc_info=True)
|
||||
@@ -230,11 +234,11 @@ class BingInsightsExecutor(TaskExecutor):
|
||||
result_data={'error': str(e)}
|
||||
)
|
||||
|
||||
def _load_cached_data(self, user_id: str, site_url: Optional[str]) -> Optional[Dict[str, Any]]:
|
||||
def _load_cached_data(self, user_id: str, site_url: Optional[str], storage_service: BingAnalyticsStorageService) -> Optional[Dict[str, Any]]:
|
||||
"""Load most recent cached Bing data from database."""
|
||||
try:
|
||||
# Get analytics summary from storage service
|
||||
summary = self.storage_service.get_analytics_summary(
|
||||
summary = storage_service.get_analytics_summary(
|
||||
user_id=user_id,
|
||||
site_url=site_url or '',
|
||||
days=30
|
||||
@@ -250,7 +254,7 @@ class BingInsightsExecutor(TaskExecutor):
|
||||
self.logger.warning(f"Error loading cached Bing data: {e}")
|
||||
return None
|
||||
|
||||
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str]) -> TaskExecutionResult:
|
||||
async def _fetch_fresh_data(self, user_id: str, site_url: Optional[str], storage_service: BingAnalyticsStorageService) -> TaskExecutionResult:
|
||||
"""Fetch fresh Bing insights from API."""
|
||||
try:
|
||||
# Check if user has active tokens
|
||||
@@ -288,7 +292,7 @@ class BingInsightsExecutor(TaskExecutor):
|
||||
|
||||
# For now, use stored analytics data (Bing API integration can be added later)
|
||||
# This ensures we have data available even if the API class doesn't exist yet
|
||||
summary = self.storage_service.get_analytics_summary(user_id, site_url, days=30)
|
||||
summary = storage_service.get_analytics_summary(user_id, site_url, days=30)
|
||||
|
||||
if summary and isinstance(summary, dict):
|
||||
# Format insights data from stored analytics
|
||||
|
||||
@@ -0,0 +1,200 @@
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from api.content_planning.services.content_strategy.onboarding import OnboardingDataIntegrationService
|
||||
from models.website_analysis_monitoring_models import (
|
||||
DeepCompetitorAnalysisTask,
|
||||
DeepCompetitorAnalysisExecutionLog
|
||||
)
|
||||
from services.scheduler.core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
from services.seo.deep_competitor_analysis_service import DeepCompetitorAnalysisService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("deep_competitor_analysis_executor")
|
||||
|
||||
|
||||
class DeepCompetitorAnalysisExecutor(TaskExecutor):
|
||||
def __init__(self):
|
||||
self.analysis_service = DeepCompetitorAnalysisService()
|
||||
self.integration_service = OnboardingDataIntegrationService()
|
||||
|
||||
async def execute_task(self, task: Any, db: Session) -> TaskExecutionResult:
|
||||
start_time = time.time()
|
||||
|
||||
if not isinstance(task, DeepCompetitorAnalysisTask):
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Invalid task type for deep competitor analysis",
|
||||
retryable=False
|
||||
)
|
||||
|
||||
task_log = DeepCompetitorAnalysisExecutionLog(
|
||||
task_id=task.id,
|
||||
status="running",
|
||||
execution_date=datetime.utcnow()
|
||||
)
|
||||
db.add(task_log)
|
||||
db.commit()
|
||||
|
||||
user_id = str(task.user_id)
|
||||
|
||||
try:
|
||||
integrated = self.integration_service.get_integrated_data_sync(user_id, db)
|
||||
website_analysis = integrated.get("website_analysis") if isinstance(integrated, dict) else {}
|
||||
|
||||
payload = task.payload if isinstance(task.payload, dict) else {}
|
||||
competitors = payload.get("competitors")
|
||||
if not isinstance(competitors, list) or not competitors:
|
||||
# Try to get from research_preferences
|
||||
research_prefs = integrated.get("research_preferences") if isinstance(integrated, dict) else {}
|
||||
if isinstance(research_prefs, dict):
|
||||
competitors = research_prefs.get("competitors")
|
||||
|
||||
# If still not found, try to get from competitor_analysis (Step 3 persistence)
|
||||
if not isinstance(competitors, list) or not competitors:
|
||||
competitors = integrated.get("competitor_analysis") if isinstance(integrated, dict) else []
|
||||
|
||||
if not isinstance(competitors, list) or not competitors:
|
||||
logger.warning(f"Deep competitor analysis skipped for user {user_id}: No competitors found")
|
||||
|
||||
task_log.status = "skipped"
|
||||
task_log.result_data = {"status": "skipped", "reason": "no_competitors"}
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
# Mark task as completed but maybe pause it until user adds competitors?
|
||||
# Or just treat it as success (empty report) so it doesn't retry endlessly
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = "paused" # Pause it so it doesn't run again until triggered manually
|
||||
task.next_execution = None
|
||||
task.consecutive_failures = 0
|
||||
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data={"status": "skipped", "reason": "no_competitors"},
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=False
|
||||
)
|
||||
|
||||
max_competitors = int(payload.get("max_competitors") or 25)
|
||||
crawl_concurrency = int(payload.get("crawl_concurrency") or 4)
|
||||
mode = payload.get("mode", "deep_analysis")
|
||||
|
||||
if mode == "strategic_insights":
|
||||
logger.info(f"Executing weekly strategic insights for user {user_id}")
|
||||
report = await self.analysis_service.generate_weekly_strategy_brief(
|
||||
user_id=user_id,
|
||||
website_analysis=website_analysis if isinstance(website_analysis, dict) else {},
|
||||
competitors=competitors
|
||||
)
|
||||
|
||||
# Persist to WebsiteAnalysis history
|
||||
analysis_id = website_analysis.get('id')
|
||||
if analysis_id:
|
||||
from models.onboarding import WebsiteAnalysis
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
|
||||
wa = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.id == analysis_id).first()
|
||||
if wa:
|
||||
history = wa.strategic_insights_history or []
|
||||
if not isinstance(history, list):
|
||||
history = []
|
||||
history.insert(0, report)
|
||||
wa.strategic_insights_history = history[:52]
|
||||
flag_modified(wa, "strategic_insights_history")
|
||||
db.commit()
|
||||
else:
|
||||
report = await self.analysis_service.run(
|
||||
user_id=user_id,
|
||||
website_analysis=website_analysis if isinstance(website_analysis, dict) else {},
|
||||
competitors=competitors,
|
||||
max_competitors=max_competitors,
|
||||
crawl_concurrency=crawl_concurrency
|
||||
)
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_success = datetime.utcnow()
|
||||
|
||||
# If it's a recurring task (strategic_insights), set next execution
|
||||
if mode == "strategic_insights":
|
||||
task.status = "active"
|
||||
task.next_execution = self.calculate_next_execution(task, "weekly", task.last_executed)
|
||||
else:
|
||||
task.status = "paused"
|
||||
task.next_execution = None
|
||||
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
task.failure_reason = None
|
||||
|
||||
task_log.status = "success"
|
||||
task_log.result_data = report
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.commit()
|
||||
|
||||
try:
|
||||
await self.integration_service.refresh_integrated_data(user_id, db)
|
||||
except Exception as e:
|
||||
logger.warning(f"Deep competitor analysis SSOT refresh failed for user {user_id}: {e}")
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=report,
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.warning(f"Deep competitor analysis task failed for user {user_id}: {e}")
|
||||
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(task.id, "deep_competitor_analysis", user_id)
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
task.status = "needs_intervention"
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
task.next_execution = None
|
||||
else:
|
||||
task.status = "failed"
|
||||
task.next_execution = datetime.utcnow() + timedelta(minutes=30)
|
||||
|
||||
task_log.status = "failed"
|
||||
task_log.error_message = str(e)
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.add(task_log)
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=(task.status != "needs_intervention"),
|
||||
retry_delay=1800
|
||||
)
|
||||
|
||||
def calculate_next_execution(self, task: Any, frequency: str, last_execution: datetime = None) -> datetime:
|
||||
base = last_execution or datetime.utcnow()
|
||||
if frequency == "weekly":
|
||||
return base + timedelta(days=7)
|
||||
return base + timedelta(days=365)
|
||||
|
||||
@@ -0,0 +1,179 @@
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.website_analysis_monitoring_models import (
|
||||
DeepWebsiteCrawlTask,
|
||||
DeepWebsiteCrawlExecutionLog
|
||||
)
|
||||
from services.scheduler.core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
from services.research.deep_crawl_service import DeepCrawlService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("deep_website_crawl_executor")
|
||||
|
||||
|
||||
class DeepWebsiteCrawlExecutor(TaskExecutor):
|
||||
def __init__(self):
|
||||
self.crawl_service = DeepCrawlService()
|
||||
|
||||
async def execute_task(self, task: Any, db: Session) -> TaskExecutionResult:
|
||||
start_time = time.time()
|
||||
|
||||
if not isinstance(task, DeepWebsiteCrawlTask):
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Invalid task type for deep website crawl",
|
||||
retryable=False
|
||||
)
|
||||
|
||||
task_log = DeepWebsiteCrawlExecutionLog(
|
||||
task_id=task.id,
|
||||
status="running",
|
||||
execution_date=datetime.utcnow()
|
||||
)
|
||||
db.add(task_log)
|
||||
db.commit()
|
||||
|
||||
user_id = str(task.user_id)
|
||||
website_url = task.website_url
|
||||
|
||||
try:
|
||||
logger.info(f"Executing deep website crawl for user {user_id}, url {website_url}")
|
||||
|
||||
result = await self.crawl_service.execute_deep_crawl(
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
task_id=task.id # Pass task_id so service can update logs/task if needed, but we handle some here too.
|
||||
# Actually, the service updates logs and task status.
|
||||
# So we should coordinate.
|
||||
# In DeepCrawlService I wrote logic to update logs/task if task_id provided.
|
||||
# But here we also create a log "running".
|
||||
# The service creates a "success" or "failed" log.
|
||||
# This might result in duplicate logs or "running" log stuck.
|
||||
# Let's see DeepCrawlService again.
|
||||
)
|
||||
|
||||
# The service creates a new log entry for success/failure.
|
||||
# So the "running" log created here will stay as "running" unless updated.
|
||||
# I should probably update the "running" log instead of letting service create new one.
|
||||
# OR, I should remove task_id from service call and handle logging here.
|
||||
# Handling logging here is better for separation of concerns, BUT the service has the detailed stats.
|
||||
# The service returns the stats.
|
||||
# I will remove task_id from service call in future refactor, but for now let's just update the local log here too if needed.
|
||||
# Wait, if service creates a log, I have 2 logs.
|
||||
# I'll modify this executor to NOT pass task_id to service, but rely on return value.
|
||||
# But `DeepCrawlService.execute_deep_crawl` takes task_id as Optional.
|
||||
# If I don't pass it, it returns the result dict.
|
||||
# I'll do that.
|
||||
|
||||
# Re-calling service without task_id
|
||||
# Wait, `execute_deep_crawl` signature: `async def execute_deep_crawl(self, user_id: str, website_url: str, task_id: Optional[int] = None)`
|
||||
|
||||
# If I don't pass task_id, the service won't touch the DB for logs/tasks (except for saving content).
|
||||
# This is cleaner.
|
||||
|
||||
# result = await self.crawl_service.execute_deep_crawl(user_id, website_url)
|
||||
# But wait, in the service I implemented:
|
||||
# `if task_id: log = ... db.add(log) ...`
|
||||
# So if I don't pass task_id, it just returns data. Perfect.
|
||||
|
||||
# Correction: I need to update the file `backend/services/research/deep_crawl_service.py` ?
|
||||
# No, it handles optional task_id.
|
||||
|
||||
# So here I call it without task_id.
|
||||
|
||||
# However, `DeepCrawlService` updates task status (last_executed, etc) if task_id is present.
|
||||
# If I don't pass task_id, I must update task status here.
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = "active" # Keep active for recurring? Or paused?
|
||||
# User said "schedule this task". So likely recurring.
|
||||
# But usually crawl is heavy, maybe weekly.
|
||||
|
||||
# Calculate next execution
|
||||
task.next_execution = self.calculate_next_execution(task, "Weekly", task.last_executed)
|
||||
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
task.failure_reason = None
|
||||
|
||||
task_log.status = "success"
|
||||
task_log.result_data = result
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=result,
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.warning(f"Deep website crawl task failed for user {user_id}: {e}")
|
||||
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(task.id, "deep_website_crawl", user_id)
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
task.status = "needs_intervention"
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
task.next_execution = None
|
||||
else:
|
||||
task.status = "failed"
|
||||
task.next_execution = datetime.utcnow() + timedelta(minutes=60) # Retry in hour
|
||||
|
||||
task_log.status = "failed"
|
||||
task_log.error_message = str(e)
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.add(task_log)
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=(task.status != "needs_intervention"),
|
||||
retry_delay=3600
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: Any,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Calculate next execution time based on frequency.
|
||||
"""
|
||||
if not last_execution:
|
||||
last_execution = datetime.utcnow()
|
||||
|
||||
if frequency == 'Daily':
|
||||
return last_execution + timedelta(days=1)
|
||||
elif frequency == 'Weekly':
|
||||
return last_execution + timedelta(weeks=1)
|
||||
elif frequency == 'Monthly':
|
||||
return last_execution + timedelta(days=30)
|
||||
else:
|
||||
# Default to weekly if unknown
|
||||
return last_execution + timedelta(weeks=1)
|
||||
232
backend/services/scheduler/executors/market_trends_executor.py
Normal file
232
backend/services/scheduler/executors/market_trends_executor.py
Normal file
@@ -0,0 +1,232 @@
|
||||
"""
|
||||
Market Trends Executor
|
||||
Runs Google Trends (pytrends) periodically and embeds results into the user SIF index.
|
||||
"""
|
||||
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.website_analysis_monitoring_models import MarketTrendsTask, MarketTrendsExecutionLog
|
||||
from services.scheduler.core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
from services.intelligence.sif_integration import SIFIntegrationService
|
||||
from services.research.trends.google_trends_service import GoogleTrendsService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("market_trends_executor")
|
||||
|
||||
|
||||
class MarketTrendsExecutor(TaskExecutor):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
async def execute_task(self, task: Any, db: Session) -> TaskExecutionResult:
|
||||
start_time = time.time()
|
||||
|
||||
if not isinstance(task, MarketTrendsTask):
|
||||
return TaskExecutionResult(success=False, error_message="Invalid task type for market trends", retryable=False)
|
||||
|
||||
task_log = MarketTrendsExecutionLog(task_id=task.id, status="running", execution_date=datetime.utcnow())
|
||||
db.add(task_log)
|
||||
db.commit()
|
||||
|
||||
user_id = str(task.user_id)
|
||||
website_url = task.website_url
|
||||
payload = task.payload or {}
|
||||
|
||||
try:
|
||||
geo = payload.get("geo") or "US"
|
||||
timeframe = payload.get("timeframe") or "today 12-m"
|
||||
|
||||
sif_service = SIFIntegrationService(user_id)
|
||||
|
||||
keywords = await self._select_keywords_for_user(db=db, user_id=user_id, website_url=website_url)
|
||||
if not keywords:
|
||||
keywords = payload.get("keywords") or []
|
||||
|
||||
keywords = [str(k).strip() for k in (keywords or []) if str(k).strip()]
|
||||
if len(keywords) > 5:
|
||||
keywords = keywords[:5]
|
||||
|
||||
trends_result: Dict[str, Any]
|
||||
if keywords:
|
||||
try:
|
||||
trends_result = await GoogleTrendsService().analyze_trends(
|
||||
keywords=keywords, timeframe=timeframe, geo=geo, user_id=user_id
|
||||
)
|
||||
except Exception as trends_err:
|
||||
trends_result = {
|
||||
"error": str(trends_err),
|
||||
"keywords": keywords,
|
||||
"timeframe": timeframe,
|
||||
"geo": geo,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"cached": False,
|
||||
}
|
||||
else:
|
||||
trends_result = {
|
||||
"error": "No keywords available for market trends run",
|
||||
"keywords": [],
|
||||
"timeframe": timeframe,
|
||||
"geo": geo,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"cached": False,
|
||||
}
|
||||
|
||||
run_id = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
|
||||
await sif_service.index_market_trends_run(trends_result=trends_result, run_id=run_id)
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_success = datetime.utcnow()
|
||||
|
||||
frequency_hours = task.frequency_hours or 72
|
||||
task.next_execution = datetime.utcnow() + timedelta(hours=frequency_hours)
|
||||
task.status = "active"
|
||||
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
task.failure_reason = None
|
||||
|
||||
task_log.status = "success"
|
||||
task_log.result_data = {
|
||||
"run_id": run_id,
|
||||
"keywords": trends_result.get("keywords", keywords),
|
||||
"geo": geo,
|
||||
"timeframe": timeframe,
|
||||
"cached": trends_result.get("cached", False),
|
||||
}
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=task_log.result_data,
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=False,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.warning(f"Market trends task failed for user {user_id}: {e}")
|
||||
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(task.id, "market_trends", user_id)
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
task.status = "needs_intervention"
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat(),
|
||||
}
|
||||
task.next_execution = None
|
||||
else:
|
||||
task.status = "active"
|
||||
task.next_execution = datetime.utcnow() + timedelta(hours=6)
|
||||
|
||||
task_log.status = "failed"
|
||||
task_log.error_message = str(e)
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.add(task_log)
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=(task.status != "needs_intervention"),
|
||||
retry_delay=21600,
|
||||
)
|
||||
|
||||
async def _select_keywords_for_user(self, db: Session, user_id: str, website_url: str) -> List[str]:
|
||||
keywords: List[str] = []
|
||||
|
||||
try:
|
||||
from sqlalchemy import select, desc
|
||||
from models.enhanced_strategy_models import EnhancedContentStrategy
|
||||
|
||||
stmt = (
|
||||
select(EnhancedContentStrategy)
|
||||
.where(EnhancedContentStrategy.user_id == user_id)
|
||||
.order_by(desc(EnhancedContentStrategy.updated_at))
|
||||
)
|
||||
strategy = db.execute(stmt).scalars().first()
|
||||
if strategy:
|
||||
if strategy.emerging_trends:
|
||||
keywords.extend(self._extract_strings(strategy.emerging_trends))
|
||||
if strategy.industry_trends:
|
||||
keywords.extend(self._extract_strings(strategy.industry_trends))
|
||||
if strategy.market_gaps:
|
||||
keywords.extend(self._extract_strings(strategy.market_gaps))
|
||||
if strategy.competitor_content_strategies:
|
||||
keywords.extend(self._extract_strings(strategy.competitor_content_strategies))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not keywords:
|
||||
try:
|
||||
from sqlalchemy import select, desc
|
||||
from models.onboarding import WebsiteAnalysis, OnboardingSession
|
||||
|
||||
stmt = (
|
||||
select(WebsiteAnalysis)
|
||||
.join(OnboardingSession, WebsiteAnalysis.session_id == OnboardingSession.id)
|
||||
.where(OnboardingSession.user_id == user_id)
|
||||
.order_by(desc(WebsiteAnalysis.created_at))
|
||||
)
|
||||
wa = db.execute(stmt).scalars().first()
|
||||
if wa and wa.content_strategy_insights:
|
||||
ai_strategy = wa.content_strategy_insights.get("ai_strategy", {})
|
||||
topic_clusters = ai_strategy.get("topic_clusters") or []
|
||||
keywords.extend(self._extract_strings(topic_clusters))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
deduped = []
|
||||
seen = set()
|
||||
for k in keywords:
|
||||
kk = str(k).strip()
|
||||
if not kk:
|
||||
continue
|
||||
key = kk.lower()
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
deduped.append(kk)
|
||||
|
||||
return deduped[:5]
|
||||
|
||||
def _extract_strings(self, value: Any) -> List[str]:
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, str):
|
||||
return [value]
|
||||
if isinstance(value, list):
|
||||
out: List[str] = []
|
||||
for item in value:
|
||||
out.extend(self._extract_strings(item))
|
||||
return out
|
||||
if isinstance(value, dict):
|
||||
out: List[str] = []
|
||||
for k in ["keyword", "topic", "title", "name", "label"]:
|
||||
if k in value and value.get(k):
|
||||
out.append(str(value.get(k)))
|
||||
return out
|
||||
return [str(value)]
|
||||
|
||||
def calculate_next_execution(self, task: Any, frequency: str, last_execution: datetime = None) -> datetime:
|
||||
base = last_execution or datetime.utcnow()
|
||||
hours = getattr(task, "frequency_hours", 72) or 72
|
||||
return base + timedelta(hours=hours)
|
||||
@@ -21,6 +21,7 @@ from services.gsc_service import GSCService
|
||||
from services.integrations.bing_oauth import BingOAuthService
|
||||
from services.integrations.wordpress_oauth import WordPressOAuthService
|
||||
from services.wix_service import WixService
|
||||
from services.database import get_user_db_path
|
||||
|
||||
logger = get_service_logger("oauth_token_monitoring_executor")
|
||||
|
||||
@@ -289,8 +290,8 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
|
||||
GSC service auto-refreshes tokens if expired when loading credentials.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
# Use dynamic database path
|
||||
db_path = get_user_db_path(user_id)
|
||||
gsc_service = GSCService(db_path=db_path)
|
||||
credentials = gsc_service.load_user_credentials(user_id)
|
||||
|
||||
@@ -341,9 +342,8 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
|
||||
Checks token expiration and attempts refresh if needed.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
bing_service = BingOAuthService(db_path=db_path)
|
||||
# Initialize Bing service
|
||||
bing_service = BingOAuthService()
|
||||
|
||||
# Get token status (includes expired tokens)
|
||||
token_status = bing_service.get_user_token_status(user_id)
|
||||
@@ -502,8 +502,8 @@ class OAuthTokenMonitoringExecutor(TaskExecutor):
|
||||
and require user re-authorization. We only check if token is valid.
|
||||
"""
|
||||
try:
|
||||
# Use absolute database path for consistency with onboarding
|
||||
db_path = os.path.abspath("alwrity.db")
|
||||
# Use dynamic database path
|
||||
db_path = get_user_db_path(user_id)
|
||||
wordpress_service = WordPressOAuthService(db_path=db_path)
|
||||
tokens = wordpress_service.get_user_tokens(user_id)
|
||||
|
||||
|
||||
@@ -0,0 +1,584 @@
|
||||
import asyncio
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import aiohttp
|
||||
from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.onboarding import SEOPageAudit
|
||||
from models.website_analysis_monitoring_models import (
|
||||
OnboardingFullWebsiteAnalysisTask,
|
||||
OnboardingFullWebsiteAnalysisExecutionLog
|
||||
)
|
||||
from services.scheduler.core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
|
||||
from services.seo_analyzer.analyzers import (
|
||||
MetaDataAnalyzer,
|
||||
TechnicalSEOAnalyzer,
|
||||
ContentAnalyzer,
|
||||
URLStructureAnalyzer,
|
||||
AccessibilityAnalyzer,
|
||||
UserExperienceAnalyzer
|
||||
)
|
||||
|
||||
|
||||
class OnboardingFullWebsiteAnalysisExecutor(TaskExecutor):
|
||||
def __init__(self):
|
||||
self.logger = logger.bind(component="OnboardingFullWebsiteAnalysisExecutor")
|
||||
|
||||
self.max_urls_default = 500
|
||||
self.http_timeout_seconds = 25
|
||||
self.http_concurrency = 10
|
||||
|
||||
self.healthy_threshold = 80
|
||||
self.warning_threshold = 60
|
||||
|
||||
self.weights = {
|
||||
'meta': 0.15,
|
||||
'content': 0.20,
|
||||
'technical': 0.20,
|
||||
'performance': 0.20,
|
||||
'accessibility': 0.10,
|
||||
'ux': 0.10,
|
||||
'security': 0.05,
|
||||
}
|
||||
|
||||
async def execute_task(self, task: Any, db: Session) -> TaskExecutionResult:
|
||||
start_time = time.time()
|
||||
|
||||
if not isinstance(task, OnboardingFullWebsiteAnalysisTask):
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Invalid task type for onboarding full website analysis",
|
||||
retryable=False
|
||||
)
|
||||
|
||||
task_log = OnboardingFullWebsiteAnalysisExecutionLog(
|
||||
task_id=task.id,
|
||||
status='running',
|
||||
execution_date=datetime.utcnow()
|
||||
)
|
||||
db.add(task_log)
|
||||
db.commit()
|
||||
|
||||
user_id = str(task.user_id)
|
||||
website_url = task.website_url
|
||||
payload = task.payload or {}
|
||||
|
||||
max_urls = int(payload.get('max_urls') or self.max_urls_default)
|
||||
|
||||
try:
|
||||
urls = await self._discover_urls(website_url, max_urls=max_urls)
|
||||
if not urls:
|
||||
raise ValueError("No URLs discovered for full-site analysis")
|
||||
|
||||
results = await self._audit_urls(user_id, website_url, urls, db)
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_success = datetime.utcnow()
|
||||
task.status = 'paused'
|
||||
task.next_execution = None
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
task.failure_reason = None
|
||||
|
||||
task_log.status = 'success'
|
||||
task_log.result_data = results
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=results,
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
self.logger.error(f"Full-site SEO audit task failed: {e}", exc_info=True)
|
||||
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(task.id, 'onboarding_full_website_analysis', user_id)
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
task.status = "needs_intervention"
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
task.next_execution = None
|
||||
else:
|
||||
task.status = "failed"
|
||||
task.next_execution = datetime.utcnow() + timedelta(minutes=30)
|
||||
|
||||
task_log.status = 'failed'
|
||||
task_log.error_message = str(e)
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.add(task_log)
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=(task.status != "needs_intervention"),
|
||||
retry_delay=1800
|
||||
)
|
||||
|
||||
def calculate_next_execution(
|
||||
self,
|
||||
task: Any,
|
||||
frequency: str,
|
||||
last_execution: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
base = last_execution or datetime.utcnow()
|
||||
return base + timedelta(days=365)
|
||||
|
||||
async def _discover_urls(self, website_url: str, max_urls: int) -> List[str]:
|
||||
base = self._normalize_url(website_url)
|
||||
parsed = urlparse(base)
|
||||
root = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
sitemap_urls: List[str] = []
|
||||
|
||||
robots = await self._fetch_text(urljoin(root, "/robots.txt"))
|
||||
if robots:
|
||||
for line in robots.splitlines():
|
||||
if line.lower().startswith("sitemap:"):
|
||||
sitemap_urls.append(line.split(":", 1)[1].strip())
|
||||
|
||||
if not sitemap_urls:
|
||||
candidates = [
|
||||
urljoin(root, "/sitemap.xml"),
|
||||
urljoin(root, "/sitemap_index.xml"),
|
||||
urljoin(root, "/wp-sitemap.xml"),
|
||||
]
|
||||
sitemap_urls.extend(candidates)
|
||||
|
||||
discovered: List[str] = []
|
||||
seen: Set[str] = set()
|
||||
|
||||
for sm in sitemap_urls:
|
||||
if len(discovered) >= max_urls:
|
||||
break
|
||||
urls_from_sm = await self._parse_sitemap(sm, max_urls=max_urls - len(discovered))
|
||||
for u in urls_from_sm:
|
||||
n = self._normalize_url(u)
|
||||
if n not in seen and self._same_site(root, n):
|
||||
seen.add(n)
|
||||
discovered.append(n)
|
||||
if len(discovered) >= max_urls:
|
||||
break
|
||||
|
||||
if not discovered:
|
||||
discovered.append(base)
|
||||
|
||||
return discovered
|
||||
|
||||
async def _parse_sitemap(self, sitemap_url: str, max_urls: int) -> List[str]:
|
||||
xml_text = await self._fetch_text(sitemap_url)
|
||||
if not xml_text:
|
||||
return []
|
||||
|
||||
try:
|
||||
import xml.etree.ElementTree as ET
|
||||
root = ET.fromstring(xml_text)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
ns = ""
|
||||
if root.tag.startswith("{"):
|
||||
ns = root.tag.split("}", 1)[0] + "}"
|
||||
|
||||
urls: List[str] = []
|
||||
|
||||
if root.tag.endswith("sitemapindex"):
|
||||
locs = root.findall(f".//{ns}sitemap/{ns}loc")
|
||||
for loc in locs:
|
||||
if len(urls) >= max_urls:
|
||||
break
|
||||
child_url = (loc.text or "").strip()
|
||||
if not child_url:
|
||||
continue
|
||||
child_urls = await self._parse_sitemap(child_url, max_urls=max_urls - len(urls))
|
||||
urls.extend(child_urls)
|
||||
else:
|
||||
locs = root.findall(f".//{ns}url/{ns}loc")
|
||||
for loc in locs:
|
||||
if len(urls) >= max_urls:
|
||||
break
|
||||
u = (loc.text or "").strip()
|
||||
if u:
|
||||
urls.append(u)
|
||||
|
||||
return urls
|
||||
|
||||
async def _fetch_text(self, url: str) -> Optional[str]:
|
||||
try:
|
||||
timeout = aiohttp.ClientTimeout(total=self.http_timeout_seconds)
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.get(url, allow_redirects=True, headers={"User-Agent": "ALwrity-SEO-Audit/1.0"}) as resp:
|
||||
if resp.status >= 400:
|
||||
return None
|
||||
return await resp.text(errors="ignore")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
async def _audit_urls(self, user_id: str, website_url: str, urls: List[str], db: Session) -> Dict[str, Any]:
|
||||
timeout = aiohttp.ClientTimeout(total=self.http_timeout_seconds)
|
||||
connector = aiohttp.TCPConnector(limit=self.http_concurrency)
|
||||
|
||||
semaphore = asyncio.Semaphore(self.http_concurrency)
|
||||
|
||||
async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
|
||||
async def audit_one(url: str) -> Dict[str, Any]:
|
||||
async with semaphore:
|
||||
return await self._audit_single_url(user_id, website_url, url, session, db)
|
||||
|
||||
audited = await asyncio.gather(*[audit_one(u) for u in urls], return_exceptions=True)
|
||||
|
||||
successes = [r for r in audited if isinstance(r, dict) and r.get('success')]
|
||||
failures = [r for r in audited if not (isinstance(r, dict) and r.get('success'))]
|
||||
|
||||
avg_score = round(sum(r['overall_score'] for r in successes) / len(successes)) if successes else 0
|
||||
fix_scheduled = len([r for r in successes if r.get('status') == 'fix_scheduled'])
|
||||
|
||||
worst_pages = sorted(
|
||||
[{'page_url': r['page_url'], 'overall_score': r['overall_score'], 'status': r.get('status')} for r in successes],
|
||||
key=lambda x: x['overall_score']
|
||||
)[:10]
|
||||
|
||||
return {
|
||||
'website_url': website_url,
|
||||
'pages_discovered': len(urls),
|
||||
'pages_audited': len(successes),
|
||||
'pages_failed': len(failures),
|
||||
'avg_score': avg_score,
|
||||
'fix_scheduled_pages': fix_scheduled,
|
||||
'worst_pages': worst_pages,
|
||||
}
|
||||
|
||||
async def _audit_single_url(
|
||||
self,
|
||||
user_id: str,
|
||||
website_url: str,
|
||||
page_url: str,
|
||||
session: aiohttp.ClientSession,
|
||||
db: Session
|
||||
) -> Dict[str, Any]:
|
||||
fetch_start = time.time()
|
||||
try:
|
||||
async with session.get(page_url, allow_redirects=True, headers={"User-Agent": "ALwrity-SEO-Audit/1.0"}) as resp:
|
||||
status = resp.status
|
||||
content_type = resp.headers.get("Content-Type", "")
|
||||
text = await resp.text(errors="ignore")
|
||||
headers = dict(resp.headers)
|
||||
except Exception as e:
|
||||
self._upsert_page_audit(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
page_url=page_url,
|
||||
overall_score=0,
|
||||
status='error',
|
||||
audit_data={'error': str(e)}
|
||||
)
|
||||
return {'success': False, 'page_url': page_url, 'error': str(e)}
|
||||
|
||||
load_time = time.time() - fetch_start
|
||||
|
||||
if status >= 400 or "text/html" not in content_type.lower():
|
||||
self._upsert_page_audit(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
page_url=page_url,
|
||||
overall_score=0,
|
||||
status='error',
|
||||
audit_data={'http_status': status, 'content_type': content_type}
|
||||
)
|
||||
return {'success': False, 'page_url': page_url, 'error': f'HTTP {status} / {content_type}'}
|
||||
|
||||
soup = BeautifulSoup(text, 'html.parser')
|
||||
|
||||
meta = MetaDataAnalyzer().analyze(soup)
|
||||
content = ContentAnalyzer().analyze(soup)
|
||||
technical = TechnicalSEOAnalyzer().analyze(page_url, soup)
|
||||
url_structure = URLStructureAnalyzer().analyze(page_url)
|
||||
accessibility = AccessibilityAnalyzer().analyze(text)
|
||||
ux = UserExperienceAnalyzer().analyze(text, page_url)
|
||||
|
||||
performance = self._performance_from_fetch(load_time, headers)
|
||||
security = self._security_from_headers(headers)
|
||||
|
||||
category_scores = {
|
||||
'meta': meta.get('score', 0),
|
||||
'content': content.get('score', 0),
|
||||
'technical': technical.get('score', 0),
|
||||
'performance': performance.get('score', 0),
|
||||
'accessibility': accessibility.get('score', 0),
|
||||
'ux': ux.get('score', 0),
|
||||
'security': security.get('score', 0),
|
||||
'url_structure': url_structure.get('score', 0),
|
||||
}
|
||||
|
||||
overall_score = self._weighted_score(category_scores)
|
||||
|
||||
if overall_score >= self.healthy_threshold:
|
||||
page_status = 'healthy'
|
||||
elif overall_score >= self.warning_threshold:
|
||||
page_status = 'needs_review'
|
||||
else:
|
||||
page_status = 'fix_scheduled'
|
||||
|
||||
audit_data = {
|
||||
'meta': meta,
|
||||
'content_health': content,
|
||||
'technical': technical,
|
||||
'performance': performance,
|
||||
'url_structure': url_structure,
|
||||
'accessibility': accessibility,
|
||||
'ux': ux,
|
||||
'security_headers': security,
|
||||
'overall_score': overall_score,
|
||||
}
|
||||
|
||||
issues = self._collect_findings(audit_data, key='issues')
|
||||
warnings = self._collect_findings(audit_data, key='warnings')
|
||||
recommendations = self._collect_findings(audit_data, key='recommendations')
|
||||
|
||||
self._upsert_page_audit(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
page_url=page_url,
|
||||
overall_score=overall_score,
|
||||
status=page_status,
|
||||
category_scores=category_scores,
|
||||
issues=issues,
|
||||
warnings=warnings,
|
||||
recommendations=recommendations,
|
||||
audit_data=audit_data
|
||||
)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'page_url': page_url,
|
||||
'overall_score': overall_score,
|
||||
'status': page_status
|
||||
}
|
||||
|
||||
def _weighted_score(self, category_scores: Dict[str, int]) -> int:
|
||||
total = 0.0
|
||||
for key, weight in self.weights.items():
|
||||
total += float(category_scores.get(key, 0)) * weight
|
||||
return int(round(total))
|
||||
|
||||
def _collect_findings(self, audit_data: Dict[str, Any], key: str) -> List[Dict[str, Any]]:
|
||||
findings: List[Dict[str, Any]] = []
|
||||
for category, data in audit_data.items():
|
||||
if not isinstance(data, dict):
|
||||
continue
|
||||
items = data.get(key)
|
||||
if not isinstance(items, list):
|
||||
continue
|
||||
for item in items:
|
||||
if isinstance(item, dict):
|
||||
enriched = dict(item)
|
||||
enriched.setdefault('category', category)
|
||||
findings.append(enriched)
|
||||
return findings
|
||||
|
||||
def _performance_from_fetch(self, load_time: float, headers: Dict[str, str]) -> Dict[str, Any]:
|
||||
issues: List[Dict[str, Any]] = []
|
||||
warnings: List[Dict[str, Any]] = []
|
||||
recommendations: List[Dict[str, Any]] = []
|
||||
|
||||
if load_time > 3:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': f'Page load time too slow ({load_time:.2f}s)',
|
||||
'location': 'Page performance',
|
||||
'current_value': f'{load_time:.2f}s',
|
||||
'fix': 'Optimize page speed (target < 3 seconds)',
|
||||
'code_example': 'Optimize images, minify CSS/JS, use CDN',
|
||||
'action': 'optimize_page_speed'
|
||||
})
|
||||
elif load_time > 2:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Page load time could be improved ({load_time:.2f}s)',
|
||||
'location': 'Page performance',
|
||||
'current_value': f'{load_time:.2f}s',
|
||||
'fix': 'Optimize for faster loading',
|
||||
'code_example': 'Compress images, enable caching',
|
||||
'action': 'improve_page_speed'
|
||||
})
|
||||
|
||||
content_encoding = headers.get('Content-Encoding')
|
||||
if not content_encoding:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'No compression detected',
|
||||
'location': 'Server configuration',
|
||||
'fix': 'Enable GZIP/Brotli compression',
|
||||
'code_example': 'Enable compression in server or CDN',
|
||||
'action': 'enable_compression'
|
||||
})
|
||||
|
||||
cache_headers = ['Cache-Control', 'Expires', 'ETag']
|
||||
has_cache = any(headers.get(h) for h in cache_headers)
|
||||
if not has_cache:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': 'No caching headers found',
|
||||
'location': 'Server configuration',
|
||||
'fix': 'Add caching headers',
|
||||
'code_example': 'Cache-Control: max-age=31536000',
|
||||
'action': 'add_caching_headers'
|
||||
})
|
||||
|
||||
score = max(0, 100 - len(issues) * 25 - len(warnings) * 10)
|
||||
return {
|
||||
'score': score,
|
||||
'load_time': load_time,
|
||||
'is_compressed': bool(content_encoding),
|
||||
'has_cache': has_cache,
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'recommendations': recommendations
|
||||
}
|
||||
|
||||
def _security_from_headers(self, headers: Dict[str, str]) -> Dict[str, Any]:
|
||||
security_headers = {
|
||||
'X-Frame-Options': headers.get('X-Frame-Options'),
|
||||
'X-Content-Type-Options': headers.get('X-Content-Type-Options'),
|
||||
'X-XSS-Protection': headers.get('X-XSS-Protection'),
|
||||
'Strict-Transport-Security': headers.get('Strict-Transport-Security'),
|
||||
'Content-Security-Policy': headers.get('Content-Security-Policy'),
|
||||
'Referrer-Policy': headers.get('Referrer-Policy')
|
||||
}
|
||||
|
||||
issues: List[Dict[str, Any]] = []
|
||||
warnings: List[Dict[str, Any]] = []
|
||||
recommendations: List[Dict[str, Any]] = []
|
||||
present_headers: List[str] = []
|
||||
missing_headers: List[str] = []
|
||||
|
||||
for header_name, header_value in security_headers.items():
|
||||
if header_value:
|
||||
present_headers.append(header_name)
|
||||
continue
|
||||
|
||||
missing_headers.append(header_name)
|
||||
if header_name in ['X-Frame-Options', 'X-Content-Type-Options']:
|
||||
issues.append({
|
||||
'type': 'critical',
|
||||
'message': f'Missing {header_name} header',
|
||||
'location': 'Server configuration',
|
||||
'fix': f'Add {header_name} header',
|
||||
'code_example': f'{header_name}: DENY' if header_name == 'X-Frame-Options' else f'{header_name}: nosniff',
|
||||
'action': f'add_{header_name.lower().replace("-", "_")}_header'
|
||||
})
|
||||
else:
|
||||
warnings.append({
|
||||
'type': 'warning',
|
||||
'message': f'Missing {header_name} header',
|
||||
'location': 'Server configuration',
|
||||
'fix': f'Add {header_name} header for better security',
|
||||
'code_example': f'{header_name}: max-age=31536000',
|
||||
'action': f'add_{header_name.lower().replace("-", "_")}_header'
|
||||
})
|
||||
|
||||
score = min(100, len(present_headers) * 16)
|
||||
return {
|
||||
'score': score,
|
||||
'present_headers': present_headers,
|
||||
'missing_headers': missing_headers,
|
||||
'total_headers': len(present_headers),
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'recommendations': recommendations
|
||||
}
|
||||
|
||||
def _upsert_page_audit(
|
||||
self,
|
||||
db: Session,
|
||||
user_id: str,
|
||||
website_url: str,
|
||||
page_url: str,
|
||||
overall_score: int,
|
||||
status: str,
|
||||
category_scores: Optional[Dict[str, Any]] = None,
|
||||
issues: Optional[List[Dict[str, Any]]] = None,
|
||||
warnings: Optional[List[Dict[str, Any]]] = None,
|
||||
recommendations: Optional[List[Dict[str, Any]]] = None,
|
||||
audit_data: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
existing = db.query(SEOPageAudit).filter(
|
||||
SEOPageAudit.user_id == user_id,
|
||||
SEOPageAudit.page_url == page_url
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
existing.website_url = website_url
|
||||
existing.overall_score = overall_score
|
||||
existing.status = status
|
||||
existing.category_scores = category_scores
|
||||
existing.issues = issues
|
||||
existing.warnings = warnings
|
||||
existing.recommendations = recommendations
|
||||
existing.audit_data = audit_data
|
||||
existing.last_analyzed_at = datetime.utcnow()
|
||||
db.add(existing)
|
||||
else:
|
||||
db.add(SEOPageAudit(
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
page_url=page_url,
|
||||
overall_score=overall_score,
|
||||
status=status,
|
||||
category_scores=category_scores,
|
||||
issues=issues,
|
||||
warnings=warnings,
|
||||
recommendations=recommendations,
|
||||
audit_data=audit_data,
|
||||
last_analyzed_at=datetime.utcnow()
|
||||
))
|
||||
|
||||
db.commit()
|
||||
|
||||
def _normalize_url(self, url: str) -> str:
|
||||
u = (url or "").strip()
|
||||
if not u:
|
||||
return ""
|
||||
if not u.startswith("http://") and not u.startswith("https://"):
|
||||
u = "https://" + u
|
||||
parsed = urlparse(u)
|
||||
normalized = parsed._replace(fragment="").geturl()
|
||||
return normalized.rstrip("/")
|
||||
|
||||
def _same_site(self, root: str, url: str) -> bool:
|
||||
try:
|
||||
a = urlparse(root)
|
||||
b = urlparse(url)
|
||||
return a.netloc == b.netloc
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
153
backend/services/scheduler/executors/sif_indexing_executor.py
Normal file
153
backend/services/scheduler/executors/sif_indexing_executor.py
Normal file
@@ -0,0 +1,153 @@
|
||||
"""
|
||||
SIF Indexing Executor
|
||||
Executes SIF indexing tasks (Step 2 metadata and User Website Content).
|
||||
"""
|
||||
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Optional
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.website_analysis_monitoring_models import (
|
||||
SIFIndexingTask,
|
||||
SIFIndexingExecutionLog
|
||||
)
|
||||
from services.scheduler.core.executor_interface import TaskExecutor, TaskExecutionResult
|
||||
from services.scheduler.core.failure_detection_service import FailureDetectionService
|
||||
from services.intelligence.sif_integration import SIFIntegrationService
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("sif_indexing_executor")
|
||||
|
||||
|
||||
class SIFIndexingExecutor(TaskExecutor):
|
||||
"""
|
||||
Executor for SIF indexing tasks.
|
||||
|
||||
Handles:
|
||||
- Indexing Step 2 Website Analysis Data (Metadata)
|
||||
- Harvesting and Indexing User Website Content (Deep Crawl)
|
||||
- Scheduling recurring updates (snapshot refresh)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
async def execute_task(self, task: Any, db: Session) -> TaskExecutionResult:
|
||||
start_time = time.time()
|
||||
|
||||
if not isinstance(task, SIFIndexingTask):
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message="Invalid task type for SIF indexing",
|
||||
retryable=False
|
||||
)
|
||||
|
||||
task_log = SIFIndexingExecutionLog(
|
||||
task_id=task.id,
|
||||
status="running",
|
||||
execution_date=datetime.utcnow()
|
||||
)
|
||||
db.add(task_log)
|
||||
db.commit()
|
||||
|
||||
user_id = str(task.user_id)
|
||||
website_url = task.website_url
|
||||
|
||||
try:
|
||||
logger.info(f"Executing SIF indexing for user {user_id} ({website_url})")
|
||||
|
||||
# Initialize SIF Service
|
||||
sif_service = SIFIntegrationService(user_id)
|
||||
|
||||
# 1. Sync Step 2 Metadata (WebsiteAnalysis, CompetitorAnalysis)
|
||||
metadata_synced = await sif_service.sync_onboarding_data_to_sif()
|
||||
|
||||
# 2. Sync User Website Content (Deep Crawl / Snapshot)
|
||||
content_synced = await sif_service.sync_user_website_content(website_url)
|
||||
|
||||
# Determine overall success
|
||||
# We consider it a success if at least one operation worked, or if both were attempted without error
|
||||
# But ideally, content sync is the heavy lifter.
|
||||
success = metadata_synced or content_synced
|
||||
|
||||
if not success:
|
||||
logger.warning(f"SIF indexing completed but no data was synced/indexed for {user_id}")
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_success = datetime.utcnow()
|
||||
|
||||
# Schedule next execution (Recurring)
|
||||
frequency_hours = task.frequency_hours or 48
|
||||
task.next_execution = datetime.utcnow() + timedelta(hours=frequency_hours)
|
||||
task.status = "active"
|
||||
|
||||
task.consecutive_failures = 0
|
||||
task.failure_pattern = None
|
||||
task.failure_reason = None
|
||||
|
||||
task_log.status = "success"
|
||||
task_log.result_data = {
|
||||
"metadata_synced": metadata_synced,
|
||||
"content_synced": content_synced,
|
||||
"website_url": website_url
|
||||
}
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=True,
|
||||
result_data=task_log.result_data,
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=False
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.warning(f"SIF indexing task failed for user {user_id}: {e}")
|
||||
|
||||
failure_detection = FailureDetectionService(db)
|
||||
pattern = failure_detection.analyze_task_failures(task.id, "sif_indexing", user_id)
|
||||
|
||||
task.last_executed = datetime.utcnow()
|
||||
task.last_failure = datetime.utcnow()
|
||||
task.failure_reason = str(e)
|
||||
task.consecutive_failures = (task.consecutive_failures or 0) + 1
|
||||
|
||||
if pattern and pattern.should_cool_off:
|
||||
task.status = "needs_intervention"
|
||||
task.failure_pattern = {
|
||||
"consecutive_failures": pattern.consecutive_failures,
|
||||
"recent_failures": pattern.recent_failures,
|
||||
"failure_reason": pattern.failure_reason.value,
|
||||
"error_patterns": pattern.error_patterns,
|
||||
"cool_off_until": (datetime.utcnow() + timedelta(days=7)).isoformat()
|
||||
}
|
||||
task.next_execution = None
|
||||
else:
|
||||
# Retry sooner if it's a transient failure
|
||||
task.status = "active" # Keep active for retry
|
||||
task.next_execution = datetime.utcnow() + timedelta(minutes=60)
|
||||
|
||||
task_log.status = "failed"
|
||||
task_log.error_message = str(e)
|
||||
task_log.execution_time_ms = int((time.time() - start_time) * 1000)
|
||||
|
||||
db.add(task_log)
|
||||
db.commit()
|
||||
|
||||
return TaskExecutionResult(
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
execution_time_ms=task_log.execution_time_ms,
|
||||
retryable=(task.status != "needs_intervention"),
|
||||
retry_delay=3600
|
||||
)
|
||||
|
||||
def calculate_next_execution(self, task: Any, frequency: str, last_execution: datetime = None) -> datetime:
|
||||
# Not strictly used here as we handle logic in execute_task, but good for interface compliance
|
||||
base = last_execution or datetime.utcnow()
|
||||
hours = getattr(task, 'frequency_hours', 48) or 48
|
||||
return base + timedelta(hours=hours)
|
||||
@@ -282,11 +282,18 @@ class WebsiteAnalysisExecutor(TaskExecutor):
|
||||
None,
|
||||
partial(self.style_logic.analyze_style_patterns, crawl_result['content'])
|
||||
)
|
||||
|
||||
async def run_seo_audit():
|
||||
loop = asyncio.get_event_loop()
|
||||
return await loop.run_in_executor(
|
||||
None,
|
||||
partial(self.style_logic.perform_seo_audit, website_url, crawl_result['content'])
|
||||
)
|
||||
|
||||
# Execute style and patterns analysis in parallel
|
||||
style_analysis, patterns_result = await asyncio.gather(
|
||||
style_analysis, patterns_result, seo_audit_result = await asyncio.gather(
|
||||
run_style_analysis(),
|
||||
run_patterns_analysis(),
|
||||
run_seo_audit(),
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
@@ -302,6 +309,12 @@ class WebsiteAnalysisExecutor(TaskExecutor):
|
||||
if isinstance(patterns_result, Exception):
|
||||
self.logger.warning(f"Patterns analysis exception: {patterns_result}")
|
||||
patterns_result = None
|
||||
|
||||
seo_audit = None
|
||||
if isinstance(seo_audit_result, Exception):
|
||||
self.logger.warning(f"SEO audit exception: {seo_audit_result}")
|
||||
else:
|
||||
seo_audit = seo_audit_result
|
||||
|
||||
# Step 3: Generate style guidelines
|
||||
style_guidelines = None
|
||||
@@ -320,6 +333,7 @@ class WebsiteAnalysisExecutor(TaskExecutor):
|
||||
'style_analysis': style_analysis.get('analysis') if style_analysis and style_analysis.get('success') else None,
|
||||
'style_patterns': patterns_result if patterns_result and not isinstance(patterns_result, Exception) else None,
|
||||
'style_guidelines': style_guidelines,
|
||||
'seo_audit': seo_audit,
|
||||
}
|
||||
|
||||
# Step 4: Store results based on task type
|
||||
@@ -366,10 +380,12 @@ class WebsiteAnalysisExecutor(TaskExecutor):
|
||||
):
|
||||
"""Update existing WebsiteAnalysis record for user's website."""
|
||||
try:
|
||||
# Convert Clerk user ID to integer (same as component_logic.py)
|
||||
# Use the same conversion logic as the website analysis API
|
||||
import hashlib
|
||||
user_id_int = int(hashlib.sha256(user_id.encode()).hexdigest()[:15], 16)
|
||||
session = db.query(OnboardingSession).filter(
|
||||
OnboardingSession.user_id == user_id
|
||||
).order_by(OnboardingSession.updated_at.desc()).first()
|
||||
|
||||
if not session:
|
||||
raise ValueError(f"No onboarding session found for user {user_id}")
|
||||
|
||||
# Use WebsiteAnalysisService to update
|
||||
analysis_service = WebsiteAnalysisService(db)
|
||||
@@ -380,13 +396,15 @@ class WebsiteAnalysisExecutor(TaskExecutor):
|
||||
'style_analysis': analysis_data.get('style_analysis'),
|
||||
'style_patterns': analysis_data.get('style_patterns'),
|
||||
'style_guidelines': analysis_data.get('style_guidelines'),
|
||||
'seo_audit': analysis_data.get('seo_audit'),
|
||||
}
|
||||
|
||||
# Save/update analysis
|
||||
analysis_id = analysis_service.save_analysis(
|
||||
session_id=user_id_int,
|
||||
session_id=session.id,
|
||||
website_url=website_url,
|
||||
analysis_data=response_data
|
||||
analysis_data=response_data,
|
||||
preserve_persona=True
|
||||
)
|
||||
|
||||
if analysis_id:
|
||||
@@ -490,3 +508,82 @@ class WebsiteAnalysisExecutor(TaskExecutor):
|
||||
)
|
||||
return last_execution + timedelta(days=task.frequency_days)
|
||||
|
||||
async def _perform_full_site_analysis(self, user_id: str, website_url: str, db: Session):
|
||||
"""
|
||||
Discover sitemap and perform non-AI SEO audit on all found pages.
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Starting full site scan for {website_url}")
|
||||
sitemap_service = SitemapService()
|
||||
|
||||
# 1. Discover Sitemap
|
||||
sitemap_url = await sitemap_service.discover_sitemap_url(website_url)
|
||||
if not sitemap_url:
|
||||
self.logger.warning(f"No sitemap found for {website_url}, skipping full site scan")
|
||||
return
|
||||
|
||||
# 2. Get URLs (Raw mode)
|
||||
sitemap_data = await sitemap_service.analyze_sitemap(
|
||||
sitemap_url=sitemap_url,
|
||||
analyze_content_trends=False,
|
||||
analyze_publishing_patterns=False,
|
||||
include_ai_insights=False
|
||||
)
|
||||
|
||||
urls = [u.get('loc') for u in sitemap_data.get('urls', []) if u.get('loc')]
|
||||
self.logger.info(f"Found {len(urls)} URLs in sitemap for {website_url}")
|
||||
|
||||
# 3. Batch Process (Limit to 50 for safety during testing)
|
||||
urls_to_scan = urls[:50]
|
||||
|
||||
for page_url in urls_to_scan:
|
||||
try:
|
||||
# Check if exists
|
||||
existing = db.query(SEOPageAudit).filter(
|
||||
SEOPageAudit.user_id == user_id,
|
||||
SEOPageAudit.page_url == page_url
|
||||
).first()
|
||||
|
||||
# Run in executor to avoid blocking
|
||||
loop = asyncio.get_event_loop()
|
||||
# Pass empty content dict to trigger internal fetching in perform_seo_audit
|
||||
audit_result = await loop.run_in_executor(
|
||||
None,
|
||||
partial(self.style_logic.perform_seo_audit, page_url, {})
|
||||
)
|
||||
|
||||
if existing:
|
||||
existing.overall_score = audit_result.get('overall_score')
|
||||
existing.category_scores = {k: v.get('score') for k, v in audit_result.items() if isinstance(v, dict) and 'score' in v}
|
||||
existing.issues = audit_result.get('summary', {}).get('critical_issues', [])
|
||||
existing.warnings = audit_result.get('summary', {}).get('warnings', [])
|
||||
existing.audit_data = audit_result
|
||||
existing.last_analyzed_at = datetime.utcnow()
|
||||
existing.status = 'completed'
|
||||
else:
|
||||
new_audit = SEOPageAudit(
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
page_url=page_url,
|
||||
overall_score=audit_result.get('overall_score'),
|
||||
category_scores={k: v.get('score') for k, v in audit_result.items() if isinstance(v, dict) and 'score' in v},
|
||||
issues=audit_result.get('summary', {}).get('critical_issues', []),
|
||||
warnings=audit_result.get('summary', {}).get('warnings', []),
|
||||
audit_data=audit_result,
|
||||
analysis_source='scheduled_full_site',
|
||||
status='completed'
|
||||
)
|
||||
db.add(new_audit)
|
||||
|
||||
db.commit() # Commit each page to show progress
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error auditing page {page_url}: {e}")
|
||||
db.rollback()
|
||||
|
||||
self.logger.info(f"Completed full site scan for {website_url}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error in full site analysis: {e}")
|
||||
|
||||
|
||||
|
||||
32
backend/services/scheduler/utils/advertools_task_loader.py
Normal file
32
backend/services/scheduler/utils/advertools_task_loader.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""
|
||||
Advertools Task Loader Utility
|
||||
Utility functions for loading due Advertools tasks from the database.
|
||||
"""
|
||||
|
||||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
from models.advertools_monitoring_models import AdvertoolsTask
|
||||
|
||||
def load_due_advertools_tasks(db: Session, user_id: Optional[str] = None) -> List[AdvertoolsTask]:
|
||||
"""
|
||||
Load Advertools tasks that are due for execution.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Optional user ID to filter tasks (for multi-tenant support)
|
||||
|
||||
Returns:
|
||||
List of due AdvertoolsTask objects
|
||||
"""
|
||||
now = datetime.utcnow()
|
||||
|
||||
query = db.query(AdvertoolsTask).filter(
|
||||
AdvertoolsTask.status == 'active',
|
||||
AdvertoolsTask.next_execution <= now
|
||||
)
|
||||
|
||||
if user_id:
|
||||
query = query.filter(AdvertoolsTask.user_id == user_id)
|
||||
|
||||
return query.all()
|
||||
@@ -0,0 +1,30 @@
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from sqlalchemy import and_, or_
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.website_analysis_monitoring_models import DeepCompetitorAnalysisTask
|
||||
|
||||
|
||||
def load_due_deep_competitor_analysis_tasks(
|
||||
db: Session,
|
||||
user_id: Optional[Union[str, int]] = None
|
||||
) -> List[DeepCompetitorAnalysisTask]:
|
||||
now = datetime.utcnow()
|
||||
|
||||
query = db.query(DeepCompetitorAnalysisTask).filter(
|
||||
and_(
|
||||
DeepCompetitorAnalysisTask.status == 'active',
|
||||
or_(
|
||||
DeepCompetitorAnalysisTask.next_execution <= now,
|
||||
DeepCompetitorAnalysisTask.next_execution.is_(None)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
if user_id is not None:
|
||||
query = query.filter(DeepCompetitorAnalysisTask.user_id == str(user_id))
|
||||
|
||||
return query.all()
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
from typing import List
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import or_
|
||||
|
||||
from models.website_analysis_monitoring_models import DeepWebsiteCrawlTask
|
||||
|
||||
def load_due_deep_website_crawl_tasks(db: Session, user_id: str = None) -> List[DeepWebsiteCrawlTask]:
|
||||
"""
|
||||
Load due deep website crawl tasks.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Optional user_id to filter tasks
|
||||
|
||||
Returns:
|
||||
List of due tasks
|
||||
"""
|
||||
query = db.query(DeepWebsiteCrawlTask).filter(
|
||||
or_(
|
||||
DeepWebsiteCrawlTask.status == 'active',
|
||||
DeepWebsiteCrawlTask.status == 'retry'
|
||||
),
|
||||
or_(
|
||||
DeepWebsiteCrawlTask.next_execution <= datetime.utcnow(),
|
||||
DeepWebsiteCrawlTask.next_execution == None
|
||||
)
|
||||
)
|
||||
|
||||
if user_id:
|
||||
query = query.filter(DeepWebsiteCrawlTask.user_id == user_id)
|
||||
|
||||
return query.all()
|
||||
@@ -0,0 +1,37 @@
|
||||
"""
|
||||
Market Trends Task Loader
|
||||
Loads due market trends tasks from the database.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
|
||||
from sqlalchemy import or_
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.website_analysis_monitoring_models import MarketTrendsTask
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("market_trends_task_loader")
|
||||
|
||||
|
||||
def load_due_market_trends_tasks(db: Session, user_id: Optional[str] = None) -> List[MarketTrendsTask]:
|
||||
try:
|
||||
now = datetime.utcnow()
|
||||
|
||||
query = db.query(MarketTrendsTask).filter(
|
||||
MarketTrendsTask.status == "active",
|
||||
or_(MarketTrendsTask.next_execution <= now, MarketTrendsTask.next_execution == None),
|
||||
)
|
||||
|
||||
if user_id:
|
||||
query = query.filter(MarketTrendsTask.user_id == user_id)
|
||||
|
||||
tasks = query.all()
|
||||
if tasks:
|
||||
logger.info(f"Loaded {len(tasks)} due market trends tasks")
|
||||
return tasks
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading market trends tasks: {e}")
|
||||
return []
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
"""
|
||||
Onboarding Full Website Analysis Task Loader
|
||||
Functions to load due onboarding full-site SEO audit tasks from database.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from sqlalchemy import and_, or_
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.website_analysis_monitoring_models import OnboardingFullWebsiteAnalysisTask
|
||||
|
||||
|
||||
def load_due_onboarding_full_website_analysis_tasks(
|
||||
db: Session,
|
||||
user_id: Optional[Union[str, int]] = None
|
||||
) -> List[OnboardingFullWebsiteAnalysisTask]:
|
||||
now = datetime.utcnow()
|
||||
|
||||
query = db.query(OnboardingFullWebsiteAnalysisTask).filter(
|
||||
and_(
|
||||
OnboardingFullWebsiteAnalysisTask.status == 'active',
|
||||
or_(
|
||||
OnboardingFullWebsiteAnalysisTask.next_execution <= now,
|
||||
OnboardingFullWebsiteAnalysisTask.next_execution.is_(None)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
if user_id is not None:
|
||||
query = query.filter(OnboardingFullWebsiteAnalysisTask.user_id == str(user_id))
|
||||
|
||||
return query.all()
|
||||
|
||||
45
backend/services/scheduler/utils/sif_indexing_task_loader.py
Normal file
45
backend/services/scheduler/utils/sif_indexing_task_loader.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""
|
||||
SIF Indexing Task Loader
|
||||
Loads due SIF indexing tasks from the database.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import or_
|
||||
|
||||
from models.website_analysis_monitoring_models import SIFIndexingTask
|
||||
from utils.logger_utils import get_service_logger
|
||||
|
||||
logger = get_service_logger("sif_indexing_task_loader")
|
||||
|
||||
|
||||
def load_due_sif_indexing_tasks(db: Session, user_id: str = None) -> List[SIFIndexingTask]:
|
||||
"""
|
||||
Load SIF indexing tasks that are due for execution.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Optional user_id to filter by
|
||||
|
||||
Returns:
|
||||
List of SIFIndexingTask objects
|
||||
"""
|
||||
try:
|
||||
query = db.query(SIFIndexingTask).filter(
|
||||
or_(
|
||||
SIFIndexingTask.status == "pending",
|
||||
SIFIndexingTask.status == "failed" # Retry failed tasks
|
||||
),
|
||||
SIFIndexingTask.next_run_at <= datetime.utcnow()
|
||||
)
|
||||
|
||||
if user_id:
|
||||
query = query.filter(SIFIndexingTask.user_id == user_id)
|
||||
|
||||
tasks = query.all()
|
||||
return tasks
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading SIF indexing tasks: {str(e)}")
|
||||
return []
|
||||
@@ -8,7 +8,7 @@ from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
from sqlalchemy.orm import Session as SQLSession
|
||||
|
||||
from services.database import get_db_session
|
||||
from services.database import get_session_for_user
|
||||
from models.onboarding import OnboardingSession, WebsiteAnalysis
|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ def get_user_job_store_name(user_id: str, db: SQLSession = None) -> str:
|
||||
|
||||
try:
|
||||
if not db_session:
|
||||
db_session = get_db_session()
|
||||
db_session = get_session_for_user(user_id)
|
||||
close_db = True
|
||||
|
||||
if not db_session:
|
||||
|
||||
Reference in New Issue
Block a user