import asyncio from datetime import datetime, timedelta from typing import Any, Dict, List from urllib.parse import urlparse from loguru import logger from sqlalchemy.orm import Session from sqlalchemy import text from sqlalchemy.exc import SQLAlchemyError from services.seo.advertools_service import AdvertoolsService from services.seo_tools.sitemap_service import SitemapService from models.advertools_monitoring_models import AdvertoolsTask, AdvertoolsExecutionLog from models.onboarding import WebsiteAnalysis, OnboardingSession class AdvertoolsExecutor: """ Executor for Advertools-based SEO intelligence tasks. Handles 'content_audit' and 'site_health' task types. """ def __init__(self): self.advertools_service = AdvertoolsService() self.sitemap_service = SitemapService() self.logger = logger.bind(service="AdvertoolsExecutor") async def execute_task(self, task_stub: Any, db: Session, **kwargs) -> Dict[str, Any]: """ Execute an Advertools intelligence task. Args: task_stub: Tuple or object containing (id, user_id, payload) db: Database session Returns: Execution result dictionary """ start_time = datetime.utcnow() task_id = getattr(task_stub, 'id', None) user_id = getattr(task_stub, 'user_id', None) payload = getattr(task_stub, 'payload', {}) or {} task_type = payload.get('type') website_url = payload.get('website_url') self.logger.info(f"🚀 Starting Advertools task {task_id} ({task_type}) for {website_url}") # Find the actual task record to update state task_record = None if isinstance(task_id, int): task_record = db.query(AdvertoolsTask).filter(AdvertoolsTask.id == task_id).first() try: if not website_url: raise ValueError("Missing website_url in payload") # 1. Discover exact sitemap URL first (essential for Advertools) discovered_sitemap = await self.sitemap_service.discover_sitemap_url(website_url) effective_url = discovered_sitemap if discovered_sitemap else website_url # Set status to running for UI feedback if task_record: task_record.status = 'running' db.commit() result = {} if task_type == 'content_audit': # Phase 1: Get sitemap analysis (freshness, URL structure, pillars) sitemap_result = await self.advertools_service.analyze_sitemap(effective_url) audit_urls = [] url_structure = {} freshness = {} if sitemap_result.get('success'): metrics = sitemap_result.get('metrics', {}) audit_urls = metrics.get('audit_sample_urls', []) url_structure = metrics.get('url_structure', {}) freshness = { "freshness_score": metrics.get('freshness_score'), "publishing_velocity": metrics.get('publishing_velocity'), "stale_content_percentage": metrics.get('stale_content_percentage'), "publishing_recency": metrics.get('publishing_recency'), "publishing_trend": metrics.get('publishing_trend'), } if not audit_urls: audit_urls = [website_url] # Phase 2: Theme analysis via content audit audit_result = await self.advertools_service.audit_content(audit_urls) # Phase 3: Site structure analysis (links, redirects, image SEO) site_domain = urlparse(website_url).netloc or website_url structure_result = await self.advertools_service.analyze_site_structure( audit_urls, site_domain=site_domain ) # Phase 4: Robots.txt compliance analysis robots_result = await self.advertools_service.analyze_robots_txt(website_url) # Phase 5: Crawl budget analysis budget_result = await self.advertools_service.analyze_crawl_budget( effective_url, site_domain ) # Merge results result = { "success": audit_result.get('success', False) or structure_result.get('success', False), "themes": audit_result.get('themes', []), "page_count": audit_result.get('page_count', 0), "avg_word_count": audit_result.get('avg_word_count', 0), "link_health": structure_result.get('link_health', {}), "redirect_audit": structure_result.get('redirect_audit', {}), "image_seo": structure_result.get('image_seo', {}), "page_status": structure_result.get('page_status', {}), "url_structure": url_structure, "freshness": freshness, "robots_txt": robots_result, "crawl_budget": budget_result, "timestamp": datetime.utcnow().isoformat() } if result.get('success'): await self._update_persona_augmentation(user_id, website_url, result, db) elif task_type == 'site_health': # Site health: freshness, velocity, URL structure result = await self.advertools_service.analyze_sitemap(effective_url) if result.get('success'): await self._update_site_health_metrics(user_id, website_url, result, db) else: raise ValueError(f"Unknown task type: {task_type}") success = result.get('success', False) execution_time_ms = int((datetime.utcnow() - start_time).total_seconds() * 1000) # Update task state if task_record: task_record.last_executed = datetime.utcnow() if success: task_record.last_success = datetime.utcnow() task_record.consecutive_failures = 0 task_record.status = 'active' # Smart Scheduling with Backoff reset freq_days = task_record.frequency_days or 7 task_record.next_execution = datetime.utcnow() + timedelta(days=freq_days) else: task_record.last_failure = datetime.utcnow() task_record.failure_reason = result.get('error', 'Unknown error') task_record.consecutive_failures = (task_record.consecutive_failures or 0) + 1 # Exponential Backoff for repeated failures (up to 30 days) backoff_days = min(30, (task_record.frequency_days or 7) * (2 ** (task_record.consecutive_failures - 1))) task_record.next_execution = datetime.utcnow() + timedelta(days=backoff_days) if task_record.consecutive_failures >= 5: task_record.status = 'failed' # Mark as failed after 5 attempts # Create execution log if isinstance(task_id, int): log_entry = AdvertoolsExecutionLog( task_id=task_id, status='success' if success else 'failed', result_data=result, error_message=result.get('error'), execution_time_ms=execution_time_ms ) db.add(log_entry) db.commit() if success: self.logger.info(f"✅ Advertools task {task_id} completed successfully") else: self.logger.warning(f"⚠️ Advertools task {task_id} failed: {result.get('error')}") return result except Exception as e: db.rollback() self.logger.error(f"❌ Advertools task execution failed: {e}") # Try to update task record with failure even if main logic failed if task_record: try: task_record.last_executed = datetime.utcnow() task_record.last_failure = datetime.utcnow() task_record.failure_reason = str(e) task_record.consecutive_failures = (task_record.consecutive_failures or 0) + 1 db.commit() except: db.rollback() return {"success": False, "error": str(e)} async def _update_persona_augmentation(self, user_id: str, website_url: str, audit_result: Dict[str, Any], db: Session): """ Updates the user's Brand Persona with discovered themes, site structure, link health, and redirect data from the content audit. """ try: session = db.query(OnboardingSession).filter(OnboardingSession.user_id == user_id).first() if not session: self.logger.warning(f"No onboarding session found for user {user_id}") return analysis = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.session_id == session.id).first() if not analysis: self.logger.warning(f"No website analysis found for user {user_id}") return current_brand = analysis.brand_analysis or {} # Core themes current_brand['augmented_themes'] = audit_result.get('themes', []) # Link health current_brand['link_health'] = audit_result.get('link_health', {}) # Redirect audit current_brand['redirect_audit'] = audit_result.get('redirect_audit', {}) # Image SEO current_brand['image_seo'] = audit_result.get('image_seo', {}) # Page status distribution current_brand['page_status'] = audit_result.get('page_status', {}) # URL structure analysis current_brand['url_structure'] = audit_result.get('url_structure', {}) # Freshness current_brand['freshness'] = audit_result.get('freshness', {}) # Robots.txt compliance current_brand['robots_txt'] = audit_result.get('robots_txt', {}) # Crawl budget analysis current_brand['crawl_budget'] = audit_result.get('crawl_budget', {}) current_brand['last_advertools_audit'] = datetime.utcnow().isoformat() from sqlalchemy.orm.attributes import flag_modified flag_modified(analysis, "brand_analysis") if 'avg_word_count' in audit_result: current_strategy = analysis.content_strategy_insights or {} current_strategy['avg_content_length'] = audit_result['avg_word_count'] analysis.content_strategy_insights = current_strategy flag_modified(analysis, "content_strategy_insights") self.logger.info(f"Updated persona augmentation for {user_id}") except Exception as e: self.logger.error(f"Failed to update persona augmentation: {e}") raise e async def _update_site_health_metrics(self, user_id: str, website_url: str, health_result: Dict[str, Any], db: Session): """ Updates the WebsiteAnalysis with site health metrics (velocity, freshness, URL structure analysis, freshness score). """ try: session = db.query(OnboardingSession).filter(OnboardingSession.user_id == user_id).first() if not session: return analysis = db.query(WebsiteAnalysis).filter(WebsiteAnalysis.session_id == session.id).first() if not analysis: return current_seo = analysis.seo_audit or {} metrics = health_result.get('metrics', {}) current_seo['site_health'] = { "total_urls": metrics.get('total_urls'), "publishing_velocity": metrics.get('publishing_velocity'), "stale_content_count": metrics.get('stale_content_count'), "stale_content_percentage": metrics.get('stale_content_percentage'), "freshness_score": metrics.get('freshness_score'), "publishing_recency": metrics.get('publishing_recency'), "publishing_trend": metrics.get('publishing_trend'), "top_pillars": metrics.get('top_pillars'), "url_structure": metrics.get('url_structure', {}) } current_seo['last_advertools_health_check'] = datetime.utcnow().isoformat() analysis.seo_audit = current_seo from sqlalchemy.orm.attributes import flag_modified flag_modified(analysis, "seo_audit") self.logger.info(f"Updated site health metrics for {user_id}") except Exception as e: self.logger.error(f"Failed to update site health metrics: {e}") raise e