""" Background Job Service Handles background processing of expensive operations like comprehensive Bing insights generation. """ import asyncio import threading import time from datetime import datetime, timedelta from typing import Dict, Any, Optional, Callable from loguru import logger from enum import Enum import json class JobStatus(Enum): PENDING = "pending" RUNNING = "running" COMPLETED = "completed" FAILED = "failed" CANCELLED = "cancelled" class BackgroundJob: """Represents a background job""" def __init__(self, job_id: str, job_type: str, user_id: str, data: Dict[str, Any]): self.job_id = job_id self.job_type = job_type self.user_id = user_id self.data = data self.status = JobStatus.PENDING self.created_at = datetime.now() self.started_at: Optional[datetime] = None self.completed_at: Optional[datetime] = None self.result: Optional[Dict[str, Any]] = None self.error: Optional[str] = None self.progress = 0 self.message = "Job queued" class BackgroundJobService: """Service for managing background jobs""" def __init__(self): self.jobs: Dict[str, BackgroundJob] = {} self.workers: Dict[str, threading.Thread] = {} self.job_handlers: Dict[str, Callable] = {} self.max_concurrent_jobs = 3 # Register job handlers self._register_job_handlers() def _register_job_handlers(self): """Register handlers for different job types""" self.job_handlers = { 'bing_comprehensive_insights': self._handle_bing_comprehensive_insights, 'bing_data_collection': self._handle_bing_data_collection, 'analytics_refresh': self._handle_analytics_refresh, } def create_job(self, job_type: str, user_id: str, data: Dict[str, Any]) -> str: """Create a new background job""" job_id = f"{job_type}_{user_id}_{int(time.time())}" job = BackgroundJob(job_id, job_type, user_id, data) self.jobs[job_id] = job logger.info(f"Created background job: {job_id} for user {user_id}") # Start the job if we have capacity if len(self.workers) < self.max_concurrent_jobs: self._start_job(job_id) else: logger.info(f"Job {job_id} queued - max concurrent jobs reached") return job_id def _start_job(self, job_id: str): """Start a background job""" if job_id not in self.jobs: logger.error(f"Job {job_id} not found") return job = self.jobs[job_id] if job.status != JobStatus.PENDING: logger.warning(f"Job {job_id} is not pending, current status: {job.status}") return # Create worker thread worker = threading.Thread( target=self._run_job, args=(job_id,), daemon=True, name=f"BackgroundJob-{job_id}" ) self.workers[job_id] = worker job.status = JobStatus.RUNNING job.started_at = datetime.now() job.message = "Job started" worker.start() logger.info(f"Started background job: {job_id}") def _run_job(self, job_id: str): """Run a background job in a separate thread""" try: job = self.jobs[job_id] handler = self.job_handlers.get(job.job_type) if not handler: raise ValueError(f"No handler registered for job type: {job.job_type}") logger.info(f"Running job {job_id}: {job.job_type}") # Run the job handler result = handler(job) # Mark job as completed job.status = JobStatus.COMPLETED job.completed_at = datetime.now() job.result = result job.progress = 100 job.message = "Job completed successfully" logger.info(f"Completed job {job_id} in {(job.completed_at - job.started_at).total_seconds():.2f}s") except Exception as e: logger.error(f"Job {job_id} failed: {e}") job = self.jobs.get(job_id) if job: job.status = JobStatus.FAILED job.completed_at = datetime.now() job.error = str(e) job.message = f"Job failed: {str(e)}" finally: # Clean up worker thread if job_id in self.workers: del self.workers[job_id] # Start next pending job self._start_next_pending_job() def _start_next_pending_job(self): """Start the next pending job if we have capacity""" if len(self.workers) >= self.max_concurrent_jobs: return # Find next pending job for job_id, job in self.jobs.items(): if job.status == JobStatus.PENDING: self._start_job(job_id) break def get_job_status(self, job_id: str) -> Optional[Dict[str, Any]]: """Get the status of a job""" job = self.jobs.get(job_id) if not job: return None return { 'job_id': job.job_id, 'job_type': job.job_type, 'user_id': job.user_id, 'status': job.status.value, 'progress': job.progress, 'message': job.message, 'created_at': job.created_at.isoformat(), 'started_at': job.started_at.isoformat() if job.started_at else None, 'completed_at': job.completed_at.isoformat() if job.completed_at else None, 'result': job.result, 'error': job.error } def get_user_jobs(self, user_id: str, limit: int = 10) -> list: """Get recent jobs for a user""" user_jobs = [] for job in self.jobs.values(): if job.user_id == user_id: user_jobs.append(self.get_job_status(job.job_id)) # Sort by created_at descending and limit user_jobs.sort(key=lambda x: x['created_at'], reverse=True) return user_jobs[:limit] def cancel_job(self, job_id: str) -> bool: """Cancel a pending job""" job = self.jobs.get(job_id) if not job: return False if job.status == JobStatus.PENDING: job.status = JobStatus.CANCELLED job.message = "Job cancelled" logger.info(f"Cancelled job {job_id}") return True return False def cleanup_old_jobs(self, max_age_hours: int = 24): """Clean up old completed/failed jobs""" cutoff_time = datetime.now() - timedelta(hours=max_age_hours) jobs_to_remove = [] for job_id, job in self.jobs.items(): if (job.status in [JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED] and job.created_at < cutoff_time): jobs_to_remove.append(job_id) for job_id in jobs_to_remove: del self.jobs[job_id] if jobs_to_remove: logger.info(f"Cleaned up {len(jobs_to_remove)} old jobs") # Job Handlers def _handle_bing_comprehensive_insights(self, job: BackgroundJob) -> Dict[str, Any]: """Handle Bing comprehensive insights generation""" try: user_id = job.user_id site_url = job.data.get('site_url', 'https://www.alwrity.com/') days = job.data.get('days', 30) logger.info(f"Generating comprehensive Bing insights for user {user_id}") # Import here to avoid circular imports from services.analytics.insights.bing_insights_service import BingInsightsService import os database_url = os.getenv('DATABASE_URL', 'sqlite:///./bing_analytics.db') insights_service = BingInsightsService(database_url) job.progress = 10 job.message = "Getting performance insights..." # Get performance insights performance_insights = insights_service.get_performance_insights(user_id, site_url, days) job.progress = 30 job.message = "Getting SEO insights..." # Get SEO insights seo_insights = insights_service.get_seo_insights(user_id, site_url, days) job.progress = 60 job.message = "Getting competitive insights..." # Get competitive insights competitive_insights = insights_service.get_competitive_insights(user_id, site_url, days) job.progress = 80 job.message = "Getting actionable recommendations..." # Get actionable recommendations recommendations = insights_service.get_actionable_recommendations(user_id, site_url, days) job.progress = 95 job.message = "Finalizing results..." # Combine all insights comprehensive_insights = { 'performance': performance_insights, 'seo': seo_insights, 'competitive': competitive_insights, 'recommendations': recommendations, 'generated_at': datetime.now().isoformat(), 'site_url': site_url, 'analysis_period': f"{days} days" } job.progress = 100 job.message = "Comprehensive insights generated successfully" logger.info(f"Successfully generated comprehensive Bing insights for user {user_id}") return comprehensive_insights except Exception as e: logger.error(f"Error generating comprehensive Bing insights: {e}") raise def _handle_bing_data_collection(self, job: BackgroundJob) -> Dict[str, Any]: """Handle Bing data collection from API""" try: user_id = job.user_id site_url = job.data.get('site_url', 'https://www.alwrity.com/') days_back = job.data.get('days_back', 30) logger.info(f"Collecting Bing data for user {user_id}") # Import here to avoid circular imports from services.bing_analytics_storage_service import BingAnalyticsStorageService import os database_url = os.getenv('DATABASE_URL', 'sqlite:///./bing_analytics.db') storage_service = BingAnalyticsStorageService(database_url) job.progress = 20 job.message = "Collecting fresh data from Bing API..." # Collect and store data success = storage_service.collect_and_store_data(user_id, site_url, days_back) job.progress = 80 job.message = "Generating daily metrics..." # Generate daily metrics if success: job.progress = 100 job.message = "Data collection completed successfully" return { 'success': True, 'message': f'Collected {days_back} days of Bing data', 'site_url': site_url, 'collected_at': datetime.now().isoformat() } else: raise Exception("Failed to collect data from Bing API") except Exception as e: logger.error(f"Error collecting Bing data: {e}") raise def _handle_analytics_refresh(self, job: BackgroundJob) -> Dict[str, Any]: """Handle analytics refresh for all platforms""" try: user_id = job.user_id platforms = job.data.get('platforms', ['bing', 'gsc']) logger.info(f"Refreshing analytics for user {user_id}, platforms: {platforms}") # Import here to avoid circular imports from services.analytics import PlatformAnalyticsService analytics_service = PlatformAnalyticsService() job.progress = 20 job.message = "Invalidating cache..." # Invalidate cache analytics_service.invalidate_user_cache(user_id) job.progress = 60 job.message = "Refreshing analytics data..." # Get fresh analytics data import asyncio analytics_data = asyncio.run(analytics_service.get_comprehensive_analytics(user_id, platforms)) job.progress = 90 job.message = "Generating summary..." # Generate summary summary = analytics_service.get_analytics_summary(analytics_data) job.progress = 100 job.message = "Analytics refresh completed" return { 'success': True, 'analytics_data': {k: v.__dict__ for k, v in analytics_data.items()}, 'summary': summary, 'refreshed_at': datetime.now().isoformat() } except Exception as e: logger.error(f"Error refreshing analytics: {e}") raise # Global instance background_job_service = BackgroundJobService()